1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  24  */
  25 
  26 /*
  27  * Copyright (c) 2011-2012 Pawel Jakub Dawidek. All rights reserved.
  28  * Portions Copyright 2011 Martin Matuska
  29  * Copyright 2015, OmniTI Computer Consulting, Inc. All rights reserved.
  30  * Copyright (c) 2014, 2016 Joyent, Inc. All rights reserved.
  31  * Copyright (c) 2011, 2017 by Delphix. All rights reserved.
  32  * Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
  33  * Copyright (c) 2013 Steven Hartland. All rights reserved.
  34  * Copyright (c) 2014 Integros [integros.com]
  35  * Copyright 2018 Nexenta Systems, Inc.
  36  * Copyright 2016 Toomas Soome <tsoome@me.com>
  37  * Copyright 2017 RackTop Systems.
  38  * Copyright (c) 2017 Datto Inc.
  39  */
  40 
  41 /*
  42  * ZFS ioctls.
  43  *
  44  * This file handles the ioctls to /dev/zfs, used for configuring ZFS storage
  45  * pools and filesystems, e.g. with /sbin/zfs and /sbin/zpool.
  46  *
  47  * There are two ways that we handle ioctls: the legacy way where almost
  48  * all of the logic is in the ioctl callback, and the new way where most
  49  * of the marshalling is handled in the common entry point, zfsdev_ioctl().
  50  *
  51  * Non-legacy ioctls should be registered by calling
  52  * zfs_ioctl_register() from zfs_ioctl_init().  The ioctl is invoked
  53  * from userland by lzc_ioctl().
  54  *
  55  * The registration arguments are as follows:
  56  *
  57  * const char *name
  58  *   The name of the ioctl.  This is used for history logging.  If the
  59  *   ioctl returns successfully (the callback returns 0), and allow_log
  60  *   is true, then a history log entry will be recorded with the input &
  61  *   output nvlists.  The log entry can be printed with "zpool history -i".
  62  *
  63  * zfs_ioc_t ioc
  64  *   The ioctl request number, which userland will pass to ioctl(2).
  65  *   The ioctl numbers can change from release to release, because
  66  *   the caller (libzfs) must be matched to the kernel.
  67  *
  68  * zfs_secpolicy_func_t *secpolicy
  69  *   This function will be called before the zfs_ioc_func_t, to
  70  *   determine if this operation is permitted.  It should return EPERM
  71  *   on failure, and 0 on success.  Checks include determining if the
  72  *   dataset is visible in this zone, and if the user has either all
  73  *   zfs privileges in the zone (SYS_MOUNT), or has been granted permission
  74  *   to do this operation on this dataset with "zfs allow".
  75  *
  76  * zfs_ioc_namecheck_t namecheck
  77  *   This specifies what to expect in the zfs_cmd_t:zc_name -- a pool
  78  *   name, a dataset name, or nothing.  If the name is not well-formed,
  79  *   the ioctl will fail and the callback will not be called.
  80  *   Therefore, the callback can assume that the name is well-formed
  81  *   (e.g. is null-terminated, doesn't have more than one '@' character,
  82  *   doesn't have invalid characters).
  83  *
  84  * zfs_ioc_poolcheck_t pool_check
  85  *   This specifies requirements on the pool state.  If the pool does
  86  *   not meet them (is suspended or is readonly), the ioctl will fail
  87  *   and the callback will not be called.  If any checks are specified
  88  *   (i.e. it is not POOL_CHECK_NONE), namecheck must not be NO_NAME.
  89  *   Multiple checks can be or-ed together (e.g. POOL_CHECK_SUSPENDED |
  90  *   POOL_CHECK_READONLY).
  91  *
  92  * boolean_t smush_outnvlist
  93  *   If smush_outnvlist is true, then the output is presumed to be a
  94  *   list of errors, and it will be "smushed" down to fit into the
  95  *   caller's buffer, by removing some entries and replacing them with a
  96  *   single "N_MORE_ERRORS" entry indicating how many were removed.  See
  97  *   nvlist_smush() for details.  If smush_outnvlist is false, and the
  98  *   outnvlist does not fit into the userland-provided buffer, then the
  99  *   ioctl will fail with ENOMEM.
 100  *
 101  * zfs_ioc_func_t *func
 102  *   The callback function that will perform the operation.
 103  *
 104  *   The callback should return 0 on success, or an error number on
 105  *   failure.  If the function fails, the userland ioctl will return -1,
 106  *   and errno will be set to the callback's return value.  The callback
 107  *   will be called with the following arguments:
 108  *
 109  *   const char *name
 110  *     The name of the pool or dataset to operate on, from
 111  *     zfs_cmd_t:zc_name.  The 'namecheck' argument specifies the
 112  *     expected type (pool, dataset, or none).
 113  *
 114  *   nvlist_t *innvl
 115  *     The input nvlist, deserialized from zfs_cmd_t:zc_nvlist_src.  Or
 116  *     NULL if no input nvlist was provided.  Changes to this nvlist are
 117  *     ignored.  If the input nvlist could not be deserialized, the
 118  *     ioctl will fail and the callback will not be called.
 119  *
 120  *   nvlist_t *outnvl
 121  *     The output nvlist, initially empty.  The callback can fill it in,
 122  *     and it will be returned to userland by serializing it into
 123  *     zfs_cmd_t:zc_nvlist_dst.  If it is non-empty, and serialization
 124  *     fails (e.g. because the caller didn't supply a large enough
 125  *     buffer), then the overall ioctl will fail.  See the
 126  *     'smush_nvlist' argument above for additional behaviors.
 127  *
 128  *     There are two typical uses of the output nvlist:
 129  *       - To return state, e.g. property values.  In this case,
 130  *         smush_outnvlist should be false.  If the buffer was not large
 131  *         enough, the caller will reallocate a larger buffer and try
 132  *         the ioctl again.
 133  *
 134  *       - To return multiple errors from an ioctl which makes on-disk
 135  *         changes.  In this case, smush_outnvlist should be true.
 136  *         Ioctls which make on-disk modifications should generally not
 137  *         use the outnvl if they succeed, because the caller can not
 138  *         distinguish between the operation failing, and
 139  *         deserialization failing.
 140  */
 141 
 142 #include <sys/types.h>
 143 #include <sys/param.h>
 144 #include <sys/errno.h>
 145 #include <sys/uio.h>
 146 #include <sys/buf.h>
 147 #include <sys/modctl.h>
 148 #include <sys/open.h>
 149 #include <sys/file.h>
 150 #include <sys/kmem.h>
 151 #include <sys/conf.h>
 152 #include <sys/cmn_err.h>
 153 #include <sys/stat.h>
 154 #include <sys/zfs_ioctl.h>
 155 #include <sys/zfs_vfsops.h>
 156 #include <sys/zfs_znode.h>
 157 #include <sys/zap.h>
 158 #include <sys/spa.h>
 159 #include <sys/spa_impl.h>
 160 #include <sys/vdev.h>
 161 #include <sys/priv_impl.h>
 162 #include <sys/autosnap.h>
 163 #include <sys/dmu.h>
 164 #include <sys/dsl_dir.h>
 165 #include <sys/dsl_dataset.h>
 166 #include <sys/dsl_prop.h>
 167 #include <sys/dsl_deleg.h>
 168 #include <sys/dsl_synctask.h>
 169 #include <sys/dmu_objset.h>
 170 #include <sys/dmu_impl.h>
 171 #include <sys/dmu_tx.h>
 172 #include <sys/ddi.h>
 173 #include <sys/sunddi.h>
 174 #include <sys/sunldi.h>
 175 #include <sys/policy.h>
 176 #include <sys/zone.h>
 177 #include <sys/nvpair.h>
 178 #include <sys/pathname.h>
 179 #include <sys/mount.h>
 180 #include <sys/sdt.h>
 181 #include <sys/fs/zfs.h>
 182 #include <sys/zfs_ctldir.h>
 183 #include <sys/zfs_dir.h>
 184 #include <sys/zfs_onexit.h>
 185 #include <sys/zvol.h>
 186 #include <sys/dsl_scan.h>
 187 #include <sharefs/share.h>
 188 #include <sys/dmu_objset.h>
 189 #include <sys/dmu_send.h>
 190 #include <sys/dsl_destroy.h>
 191 #include <sys/dsl_bookmark.h>
 192 #include <sys/dsl_userhold.h>
 193 #include <sys/zfeature.h>
 194 #include <sys/cos.h>
 195 #include <sys/cos_impl.h>
 196 #include <sys/zfeature.h>
 197 #include <sys/sysevent.h>
 198 #include <sys/sysevent_impl.h>
 199 #include <sys/zcp.h>
 200 #include <sys/zio_checksum.h>
 201 
 202 #include "zfs_namecheck.h"
 203 #include "zfs_prop.h"
 204 #include "zfs_deleg.h"
 205 #include "zfs_comutil.h"
 206 #include "zfs_errno.h"
 207 
 208 #include "lua.h"
 209 #include "lauxlib.h"
 210 
 211 extern struct modlfs zfs_modlfs;
 212 
 213 extern void zfs_init(void);
 214 extern void zfs_fini(void);
 215 
 216 ldi_ident_t zfs_li = NULL;
 217 dev_info_t *zfs_dip;
 218 
 219 uint_t zfs_fsyncer_key;
 220 extern uint_t rrw_tsd_key;
 221 static uint_t zfs_allow_log_key;
 222 
 223 typedef int zfs_ioc_legacy_func_t(zfs_cmd_t *);
 224 typedef int zfs_ioc_func_t(const char *, nvlist_t *, nvlist_t *);
 225 typedef int zfs_secpolicy_func_t(zfs_cmd_t *, nvlist_t *, cred_t *);
 226 
 227 typedef enum {
 228         NO_NAME,
 229         POOL_NAME,
 230         DATASET_NAME
 231 } zfs_ioc_namecheck_t;
 232 
 233 typedef enum {
 234         POOL_CHECK_NONE         = 1 << 0,
 235         POOL_CHECK_SUSPENDED    = 1 << 1,
 236         POOL_CHECK_READONLY     = 1 << 2,
 237 } zfs_ioc_poolcheck_t;
 238 
 239 typedef struct zfs_ioc_vec {
 240         zfs_ioc_legacy_func_t   *zvec_legacy_func;
 241         zfs_ioc_func_t          *zvec_func;
 242         zfs_secpolicy_func_t    *zvec_secpolicy;
 243         zfs_ioc_namecheck_t     zvec_namecheck;
 244         boolean_t               zvec_allow_log;
 245         zfs_ioc_poolcheck_t     zvec_pool_check;
 246         boolean_t               zvec_smush_outnvlist;
 247         const char              *zvec_name;
 248 } zfs_ioc_vec_t;
 249 
 250 /* This array is indexed by zfs_userquota_prop_t */
 251 static const char *userquota_perms[] = {
 252         ZFS_DELEG_PERM_USERUSED,
 253         ZFS_DELEG_PERM_USERQUOTA,
 254         ZFS_DELEG_PERM_GROUPUSED,
 255         ZFS_DELEG_PERM_GROUPQUOTA,
 256 };
 257 
 258 static int zfs_ioc_userspace_upgrade(zfs_cmd_t *zc);
 259 static int zfs_check_settable(const char *name, nvpair_t *property,
 260     cred_t *cr);
 261 static int zfs_check_clearable(char *dataset, nvlist_t *props,
 262     nvlist_t **errors);
 263 static int zfs_fill_zplprops_root(uint64_t, nvlist_t *, nvlist_t *,
 264     boolean_t *);
 265 int zfs_set_prop_nvlist(const char *, zprop_source_t, nvlist_t *, nvlist_t *);
 266 static int get_nvlist(uint64_t nvl, uint64_t size, int iflag, nvlist_t **nvp);
 267 
 268 static int zfs_prop_activate_feature(spa_t *spa, spa_feature_t feature);
 269 
 270 static int
 271 zfs_is_wormed_ds(dsl_dataset_t *ds)
 272 {
 273         char worminfo[13] = {0};
 274 
 275         if (dsl_prop_get_ds(ds, "nms:worm", 1, 12, &worminfo, NULL) == 0 &&
 276             worminfo[0] && strcmp(worminfo, "0") != 0 &&
 277             strcmp(worminfo, "off") != 0 && strcmp(worminfo, "-") != 0) {
 278                 return (1);
 279         }
 280         return (0);
 281 }
 282 
 283 static int
 284 zfs_is_wormed(const char *name)
 285 {
 286         char worminfo[13] = {0};
 287         char cname[MAXNAMELEN];
 288         char *end;
 289 
 290         (void) strlcpy(cname, name, MAXNAMELEN);
 291         end = strchr(cname, '@');
 292         if (end)
 293                 *end = 0;
 294 
 295         if (dsl_prop_get(cname, "nms:worm", 1, 12, &worminfo, NULL) == 0 &&
 296             worminfo[0] && strcmp(worminfo, "0") != 0 &&
 297             strcmp(worminfo, "off") != 0 && strcmp(worminfo, "-") != 0) {
 298                 return (1);
 299         }
 300         return (0);
 301 }
 302 
 303 /* _NOTE(PRINTFLIKE(4)) - this is printf-like, but lint is too whiney */
 304 void
 305 __dprintf(const char *file, const char *func, int line, const char *fmt, ...)
 306 {
 307         const char *newfile;
 308         char buf[512];
 309         va_list adx;
 310 
 311         /*
 312          * Get rid of annoying "../common/" prefix to filename.
 313          */
 314         newfile = strrchr(file, '/');
 315         if (newfile != NULL) {
 316                 newfile = newfile + 1; /* Get rid of leading / */
 317         } else {
 318                 newfile = file;
 319         }
 320 
 321         va_start(adx, fmt);
 322         (void) vsnprintf(buf, sizeof (buf), fmt, adx);
 323         va_end(adx);
 324 
 325         /*
 326          * To get this data, use the zfs-dprintf probe as so:
 327          * dtrace -q -n 'zfs-dprintf \
 328          *      /stringof(arg0) == "dbuf.c"/ \
 329          *      {printf("%s: %s", stringof(arg1), stringof(arg3))}'
 330          * arg0 = file name
 331          * arg1 = function name
 332          * arg2 = line number
 333          * arg3 = message
 334          */
 335         DTRACE_PROBE4(zfs__dprintf,
 336             char *, newfile, char *, func, int, line, char *, buf);
 337 }
 338 
 339 static void
 340 history_str_free(char *buf)
 341 {
 342         kmem_free(buf, HIS_MAX_RECORD_LEN);
 343 }
 344 
 345 static char *
 346 history_str_get(zfs_cmd_t *zc)
 347 {
 348         char *buf;
 349 
 350         if (zc->zc_history == NULL)
 351                 return (NULL);
 352 
 353         buf = kmem_alloc(HIS_MAX_RECORD_LEN, KM_SLEEP);
 354         if (copyinstr((void *)(uintptr_t)zc->zc_history,
 355             buf, HIS_MAX_RECORD_LEN, NULL) != 0) {
 356                 history_str_free(buf);
 357                 return (NULL);
 358         }
 359 
 360         buf[HIS_MAX_RECORD_LEN -1] = '\0';
 361 
 362         return (buf);
 363 }
 364 
 365 /*
 366  * Check to see if the named dataset is currently defined as bootable
 367  */
 368 static boolean_t
 369 zfs_is_bootfs(const char *name)
 370 {
 371         objset_t *os;
 372 
 373         if (dmu_objset_hold(name, FTAG, &os) == 0) {
 374                 boolean_t ret;
 375                 ret = (dmu_objset_id(os) == spa_bootfs(dmu_objset_spa(os)));
 376                 dmu_objset_rele(os, FTAG);
 377                 return (ret);
 378         }
 379         return (B_FALSE);
 380 }
 381 
 382 /*
 383  * Return non-zero if the spa version is less than requested version.
 384  */
 385 static int
 386 zfs_earlier_version(const char *name, int version)
 387 {
 388         spa_t *spa;
 389 
 390         if (spa_open(name, &spa, FTAG) == 0) {
 391                 if (spa_version(spa) < version) {
 392                         spa_close(spa, FTAG);
 393                         return (1);
 394                 }
 395                 spa_close(spa, FTAG);
 396         }
 397         return (0);
 398 }
 399 
 400 /*
 401  * Return TRUE if the ZPL version is less than requested version.
 402  */
 403 static boolean_t
 404 zpl_earlier_version(const char *name, int version)
 405 {
 406         objset_t *os;
 407         boolean_t rc = B_TRUE;
 408 
 409         if (dmu_objset_hold(name, FTAG, &os) == 0) {
 410                 uint64_t zplversion;
 411 
 412                 if (dmu_objset_type(os) != DMU_OST_ZFS) {
 413                         dmu_objset_rele(os, FTAG);
 414                         return (B_TRUE);
 415                 }
 416                 /* XXX reading from non-owned objset */
 417                 if (zfs_get_zplprop(os, ZFS_PROP_VERSION, &zplversion) == 0)
 418                         rc = zplversion < version;
 419                 dmu_objset_rele(os, FTAG);
 420         }
 421         return (rc);
 422 }
 423 
 424 static void
 425 zfs_log_history(zfs_cmd_t *zc)
 426 {
 427         spa_t *spa;
 428         char *buf;
 429 
 430         if ((buf = history_str_get(zc)) == NULL)
 431                 return;
 432 
 433         if (spa_open(zc->zc_name, &spa, FTAG) == 0) {
 434                 if (spa_version(spa) >= SPA_VERSION_ZPOOL_HISTORY)
 435                         (void) spa_history_log(spa, buf);
 436                 spa_close(spa, FTAG);
 437         }
 438         history_str_free(buf);
 439 }
 440 
 441 /*
 442  * Policy for top-level read operations (list pools).  Requires no privileges,
 443  * and can be used in the local zone, as there is no associated dataset.
 444  */
 445 /* ARGSUSED */
 446 static int
 447 zfs_secpolicy_none(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
 448 {
 449         return (0);
 450 }
 451 
 452 /*
 453  * Policy for dataset read operations (list children, get statistics).  Requires
 454  * no privileges, but must be visible in the local zone.
 455  */
 456 /* ARGSUSED */
 457 static int
 458 zfs_secpolicy_read(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
 459 {
 460         if (INGLOBALZONE(curproc) ||
 461             zone_dataset_visible(zc->zc_name, NULL))
 462                 return (0);
 463 
 464         return (SET_ERROR(ENOENT));
 465 }
 466 
 467 static int
 468 zfs_dozonecheck_impl(const char *dataset, uint64_t zoned, cred_t *cr)
 469 {
 470         int writable = 1;
 471 
 472         /*
 473          * The dataset must be visible by this zone -- check this first
 474          * so they don't see EPERM on something they shouldn't know about.
 475          */
 476         if (!INGLOBALZONE(curproc) &&
 477             !zone_dataset_visible(dataset, &writable))
 478                 return (SET_ERROR(ENOENT));
 479 
 480         if (INGLOBALZONE(curproc)) {
 481                 /*
 482                  * If the fs is zoned, only root can access it from the
 483                  * global zone.
 484                  */
 485                 if (secpolicy_zfs(cr) && zoned)
 486                         return (SET_ERROR(EPERM));
 487         } else {
 488                 /*
 489                  * If we are in a local zone, the 'zoned' property must be set.
 490                  */
 491                 if (!zoned)
 492                         return (SET_ERROR(EPERM));
 493 
 494                 /* must be writable by this zone */
 495                 if (!writable)
 496                         return (SET_ERROR(EPERM));
 497         }
 498         return (0);
 499 }
 500 
 501 static int
 502 zfs_dozonecheck(const char *dataset, cred_t *cr)
 503 {
 504         uint64_t zoned;
 505 
 506         if (dsl_prop_get_integer(dataset, "zoned", &zoned, NULL))
 507                 return (SET_ERROR(ENOENT));
 508 
 509         return (zfs_dozonecheck_impl(dataset, zoned, cr));
 510 }
 511 
 512 static int
 513 zfs_dozonecheck_ds(const char *dataset, dsl_dataset_t *ds, cred_t *cr)
 514 {
 515         uint64_t zoned;
 516 
 517         if (dsl_prop_get_int_ds(ds, "zoned", &zoned))
 518                 return (SET_ERROR(ENOENT));
 519 
 520         return (zfs_dozonecheck_impl(dataset, zoned, cr));
 521 }
 522 
 523 static int
 524 zfs_secpolicy_write_perms_ds(const char *name, dsl_dataset_t *ds,
 525     const char *perm, cred_t *cr)
 526 {
 527         int error;
 528 
 529         error = zfs_dozonecheck_ds(name, ds, cr);
 530         if (error == 0) {
 531                 error = secpolicy_zfs(cr);
 532                 if (error != 0)
 533                         error = dsl_deleg_access_impl(ds, perm, cr);
 534         }
 535         return (error);
 536 }
 537 
 538 static int
 539 zfs_secpolicy_write_perms(const char *name, const char *perm, cred_t *cr)
 540 {
 541         int error;
 542         dsl_dataset_t *ds;
 543         dsl_pool_t *dp;
 544 
 545         /*
 546          * First do a quick check for root in the global zone, which
 547          * is allowed to do all write_perms.  This ensures that zfs_ioc_*
 548          * will get to handle nonexistent datasets.
 549          */
 550         if (INGLOBALZONE(curproc) && secpolicy_zfs(cr) == 0)
 551                 return (0);
 552 
 553         error = dsl_pool_hold(name, FTAG, &dp);
 554         if (error != 0)
 555                 return (error);
 556 
 557         error = dsl_dataset_hold(dp, name, FTAG, &ds);
 558         if (error != 0) {
 559                 dsl_pool_rele(dp, FTAG);
 560                 return (error);
 561         }
 562 
 563         error = zfs_secpolicy_write_perms_ds(name, ds, perm, cr);
 564 
 565         dsl_dataset_rele(ds, FTAG);
 566         dsl_pool_rele(dp, FTAG);
 567         return (error);
 568 }
 569 
 570 /*
 571  * Policy for setting the security label property.
 572  *
 573  * Returns 0 for success, non-zero for access and other errors.
 574  */
 575 static int
 576 zfs_set_slabel_policy(const char *name, char *strval, cred_t *cr)
 577 {
 578         char            ds_hexsl[MAXNAMELEN];
 579         bslabel_t       ds_sl, new_sl;
 580         boolean_t       new_default = FALSE;
 581         uint64_t        zoned;
 582         int             needed_priv = -1;
 583         int             error;
 584 
 585         /* First get the existing dataset label. */
 586         error = dsl_prop_get(name, zfs_prop_to_name(ZFS_PROP_MLSLABEL),
 587             1, sizeof (ds_hexsl), &ds_hexsl, NULL);
 588         if (error != 0)
 589                 return (SET_ERROR(EPERM));
 590 
 591         if (strcasecmp(strval, ZFS_MLSLABEL_DEFAULT) == 0)
 592                 new_default = TRUE;
 593 
 594         /* The label must be translatable */
 595         if (!new_default && (hexstr_to_label(strval, &new_sl) != 0))
 596                 return (SET_ERROR(EINVAL));
 597 
 598         /*
 599          * In a non-global zone, disallow attempts to set a label that
 600          * doesn't match that of the zone; otherwise no other checks
 601          * are needed.
 602          */
 603         if (!INGLOBALZONE(curproc)) {
 604                 if (new_default || !blequal(&new_sl, CR_SL(CRED())))
 605                         return (SET_ERROR(EPERM));
 606                 return (0);
 607         }
 608 
 609         /*
 610          * For global-zone datasets (i.e., those whose zoned property is
 611          * "off", verify that the specified new label is valid for the
 612          * global zone.
 613          */
 614         if (dsl_prop_get_integer(name,
 615             zfs_prop_to_name(ZFS_PROP_ZONED), &zoned, NULL))
 616                 return (SET_ERROR(EPERM));
 617         if (!zoned) {
 618                 if (zfs_check_global_label(name, strval) != 0)
 619                         return (SET_ERROR(EPERM));
 620         }
 621 
 622         /*
 623          * If the existing dataset label is nondefault, check if the
 624          * dataset is mounted (label cannot be changed while mounted).
 625          * Get the zfsvfs; if there isn't one, then the dataset isn't
 626          * mounted (or isn't a dataset, doesn't exist, ...).
 627          */
 628         if (strcasecmp(ds_hexsl, ZFS_MLSLABEL_DEFAULT) != 0) {
 629                 objset_t *os;
 630                 static char *setsl_tag = "setsl_tag";
 631 
 632                 /*
 633                  * Try to own the dataset; abort if there is any error,
 634                  * (e.g., already mounted, in use, or other error).
 635                  */
 636                 error = dmu_objset_own(name, DMU_OST_ZFS, B_TRUE,
 637                     setsl_tag, &os);
 638                 if (error != 0)
 639                         return (SET_ERROR(EPERM));
 640 
 641                 dmu_objset_disown(os, setsl_tag);
 642 
 643                 if (new_default) {
 644                         needed_priv = PRIV_FILE_DOWNGRADE_SL;
 645                         goto out_check;
 646                 }
 647 
 648                 if (hexstr_to_label(strval, &new_sl) != 0)
 649                         return (SET_ERROR(EPERM));
 650 
 651                 if (blstrictdom(&ds_sl, &new_sl))
 652                         needed_priv = PRIV_FILE_DOWNGRADE_SL;
 653                 else if (blstrictdom(&new_sl, &ds_sl))
 654                         needed_priv = PRIV_FILE_UPGRADE_SL;
 655         } else {
 656                 /* dataset currently has a default label */
 657                 if (!new_default)
 658                         needed_priv = PRIV_FILE_UPGRADE_SL;
 659         }
 660 
 661 out_check:
 662         if (needed_priv != -1)
 663                 return (PRIV_POLICY(cr, needed_priv, B_FALSE, EPERM, NULL));
 664         return (0);
 665 }
 666 
 667 static int
 668 zfs_secpolicy_setprop(const char *dsname, zfs_prop_t prop, nvpair_t *propval,
 669     cred_t *cr)
 670 {
 671         char *strval;
 672 
 673         /*
 674          * Check permissions for special properties.
 675          */
 676         switch (prop) {
 677         case ZFS_PROP_ZONED:
 678                 /*
 679                  * Disallow setting of 'zoned' from within a local zone.
 680                  */
 681                 if (!INGLOBALZONE(curproc))
 682                         return (SET_ERROR(EPERM));
 683                 break;
 684 
 685         case ZFS_PROP_QUOTA:
 686         case ZFS_PROP_FILESYSTEM_LIMIT:
 687         case ZFS_PROP_SNAPSHOT_LIMIT:
 688                 if (!INGLOBALZONE(curproc)) {
 689                         uint64_t zoned;
 690                         char setpoint[ZFS_MAX_DATASET_NAME_LEN];
 691                         /*
 692                          * Unprivileged users are allowed to modify the
 693                          * limit on things *under* (ie. contained by)
 694                          * the thing they own.
 695                          */
 696                         if (dsl_prop_get_integer(dsname, "zoned", &zoned,
 697                             setpoint))
 698                                 return (SET_ERROR(EPERM));
 699                         if (!zoned || strlen(dsname) <= strlen(setpoint))
 700                                 return (SET_ERROR(EPERM));
 701                 }
 702                 break;
 703 
 704         case ZFS_PROP_MLSLABEL:
 705                 if (!is_system_labeled())
 706                         return (SET_ERROR(EPERM));
 707 
 708                 if (nvpair_value_string(propval, &strval) == 0) {
 709                         int err;
 710 
 711                         err = zfs_set_slabel_policy(dsname, strval, CRED());
 712                         if (err != 0)
 713                                 return (err);
 714                 }
 715                 break;
 716         }
 717 
 718         return (zfs_secpolicy_write_perms(dsname, zfs_prop_to_name(prop), cr));
 719 }
 720 
 721 /* ARGSUSED */
 722 static int
 723 zfs_secpolicy_set_fsacl(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
 724 {
 725         int error;
 726 
 727         error = zfs_dozonecheck(zc->zc_name, cr);
 728         if (error != 0)
 729                 return (error);
 730 
 731         /*
 732          * permission to set permissions will be evaluated later in
 733          * dsl_deleg_can_allow()
 734          */
 735         return (0);
 736 }
 737 
 738 /* ARGSUSED */
 739 static int
 740 zfs_secpolicy_rollback(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
 741 {
 742         return (zfs_secpolicy_write_perms(zc->zc_name,
 743             ZFS_DELEG_PERM_ROLLBACK, cr));
 744 }
 745 
 746 /* ARGSUSED */
 747 static int
 748 zfs_secpolicy_send(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
 749 {
 750         dsl_pool_t *dp;
 751         dsl_dataset_t *ds;
 752         char *cp;
 753         int error;
 754 
 755         /*
 756          * Generate the current snapshot name from the given objsetid, then
 757          * use that name for the secpolicy/zone checks.
 758          */
 759         cp = strchr(zc->zc_name, '@');
 760         if (cp == NULL)
 761                 return (SET_ERROR(EINVAL));
 762         error = dsl_pool_hold(zc->zc_name, FTAG, &dp);
 763         if (error != 0)
 764                 return (error);
 765 
 766         error = dsl_dataset_hold_obj(dp, zc->zc_sendobj, FTAG, &ds);
 767         if (error != 0) {
 768                 dsl_pool_rele(dp, FTAG);
 769                 return (error);
 770         }
 771 
 772         dsl_dataset_name(ds, zc->zc_name);
 773 
 774         error = zfs_secpolicy_write_perms_ds(zc->zc_name, ds,
 775             ZFS_DELEG_PERM_SEND, cr);
 776         dsl_dataset_rele(ds, FTAG);
 777         dsl_pool_rele(dp, FTAG);
 778 
 779         return (error);
 780 }
 781 
 782 /* ARGSUSED */
 783 static int
 784 zfs_secpolicy_send_new(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
 785 {
 786         return (zfs_secpolicy_write_perms(zc->zc_name,
 787             ZFS_DELEG_PERM_SEND, cr));
 788 }
 789 
 790 /* ARGSUSED */
 791 static int
 792 zfs_secpolicy_deleg_share(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
 793 {
 794         vnode_t *vp;
 795         int error;
 796 
 797         if ((error = lookupname(zc->zc_value, UIO_SYSSPACE,
 798             NO_FOLLOW, NULL, &vp)) != 0)
 799                 return (error);
 800 
 801         /* Now make sure mntpnt and dataset are ZFS */
 802 
 803         if (vp->v_vfsp->vfs_fstype != zfsfstype ||
 804             (strcmp((char *)refstr_value(vp->v_vfsp->vfs_resource),
 805             zc->zc_name) != 0)) {
 806                 VN_RELE(vp);
 807                 return (SET_ERROR(EPERM));
 808         }
 809 
 810         VN_RELE(vp);
 811         return (dsl_deleg_access(zc->zc_name,
 812             ZFS_DELEG_PERM_SHARE, cr));
 813 }
 814 
 815 int
 816 zfs_secpolicy_share(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
 817 {
 818         if (secpolicy_nfs(cr) == 0) {
 819                 return (0);
 820         } else {
 821                 return (zfs_secpolicy_deleg_share(zc, innvl, cr));
 822         }
 823 }
 824 
 825 int
 826 zfs_secpolicy_smb_acl(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
 827 {
 828         if (secpolicy_smb(cr) == 0) {
 829                 return (0);
 830         } else {
 831                 return (zfs_secpolicy_deleg_share(zc, innvl, cr));
 832         }
 833 }
 834 
 835 static int
 836 zfs_get_parent(const char *datasetname, char *parent, int parentsize)
 837 {
 838         char *cp;
 839 
 840         /*
 841          * Remove the @bla or /bla from the end of the name to get the parent.
 842          */
 843         (void) strncpy(parent, datasetname, parentsize);
 844         cp = strrchr(parent, '@');
 845         if (cp != NULL) {
 846                 cp[0] = '\0';
 847         } else {
 848                 cp = strrchr(parent, '/');
 849                 if (cp == NULL)
 850                         return (SET_ERROR(ENOENT));
 851                 cp[0] = '\0';
 852         }
 853 
 854         return (0);
 855 }
 856 
 857 int
 858 zfs_secpolicy_destroy_perms(const char *name, cred_t *cr)
 859 {
 860         int error;
 861 
 862         if ((error = zfs_secpolicy_write_perms(name,
 863             ZFS_DELEG_PERM_MOUNT, cr)) != 0)
 864                 return (error);
 865 
 866         return (zfs_secpolicy_write_perms(name, ZFS_DELEG_PERM_DESTROY, cr));
 867 }
 868 
 869 /* ARGSUSED */
 870 static int
 871 zfs_secpolicy_destroy(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
 872 {
 873         return (zfs_secpolicy_destroy_perms(zc->zc_name, cr));
 874 }
 875 
 876 /*
 877  * Destroying snapshots with delegated permissions requires
 878  * descendant mount and destroy permissions.
 879  */
 880 /* ARGSUSED */
 881 static int
 882 zfs_secpolicy_destroy_snaps(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
 883 {
 884         nvlist_t *snaps;
 885         nvpair_t *pair, *nextpair;
 886         int error = 0;
 887 
 888         if (nvlist_lookup_nvlist(innvl, "snaps", &snaps) != 0)
 889                 return (SET_ERROR(EINVAL));
 890         for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL;
 891             pair = nextpair) {
 892                 nextpair = nvlist_next_nvpair(snaps, pair);
 893                 error = zfs_secpolicy_destroy_perms(nvpair_name(pair), cr);
 894                 if (error == ENOENT) {
 895                         /*
 896                          * Ignore any snapshots that don't exist (we consider
 897                          * them "already destroyed").  Remove the name from the
 898                          * nvl here in case the snapshot is created between
 899                          * now and when we try to destroy it (in which case
 900                          * we don't want to destroy it since we haven't
 901                          * checked for permission).
 902                          */
 903                         fnvlist_remove_nvpair(snaps, pair);
 904                         error = 0;
 905                 }
 906                 if (error != 0)
 907                         break;
 908         }
 909 
 910         return (error);
 911 }
 912 
 913 int
 914 zfs_secpolicy_rename_perms(const char *from, const char *to, cred_t *cr)
 915 {
 916         char    parentname[ZFS_MAX_DATASET_NAME_LEN];
 917         int     error;
 918 
 919         if ((error = zfs_secpolicy_write_perms(from,
 920             ZFS_DELEG_PERM_RENAME, cr)) != 0)
 921                 return (error);
 922 
 923         if ((error = zfs_secpolicy_write_perms(from,
 924             ZFS_DELEG_PERM_MOUNT, cr)) != 0)
 925                 return (error);
 926 
 927         if ((error = zfs_get_parent(to, parentname,
 928             sizeof (parentname))) != 0)
 929                 return (error);
 930 
 931         if ((error = zfs_secpolicy_write_perms(parentname,
 932             ZFS_DELEG_PERM_CREATE, cr)) != 0)
 933                 return (error);
 934 
 935         if ((error = zfs_secpolicy_write_perms(parentname,
 936             ZFS_DELEG_PERM_MOUNT, cr)) != 0)
 937                 return (error);
 938 
 939         return (error);
 940 }
 941 
 942 /* ARGSUSED */
 943 static int
 944 zfs_secpolicy_rename(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
 945 {
 946         return (zfs_secpolicy_rename_perms(zc->zc_name, zc->zc_value, cr));
 947 }
 948 
 949 /* ARGSUSED */
 950 static int
 951 zfs_secpolicy_promote(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
 952 {
 953         dsl_pool_t *dp;
 954         dsl_dataset_t *clone;
 955         int error;
 956 
 957         error = zfs_secpolicy_write_perms(zc->zc_name,
 958             ZFS_DELEG_PERM_PROMOTE, cr);
 959         if (error != 0)
 960                 return (error);
 961 
 962         error = dsl_pool_hold(zc->zc_name, FTAG, &dp);
 963         if (error != 0)
 964                 return (error);
 965 
 966         error = dsl_dataset_hold(dp, zc->zc_name, FTAG, &clone);
 967 
 968         if (error == 0) {
 969                 char parentname[ZFS_MAX_DATASET_NAME_LEN];
 970                 dsl_dataset_t *origin = NULL;
 971                 dsl_dir_t *dd;
 972                 dd = clone->ds_dir;
 973 
 974                 error = dsl_dataset_hold_obj(dd->dd_pool,
 975                     dsl_dir_phys(dd)->dd_origin_obj, FTAG, &origin);
 976                 if (error != 0) {
 977                         dsl_dataset_rele(clone, FTAG);
 978                         dsl_pool_rele(dp, FTAG);
 979                         return (error);
 980                 }
 981 
 982                 error = zfs_secpolicy_write_perms_ds(zc->zc_name, clone,
 983                     ZFS_DELEG_PERM_MOUNT, cr);
 984 
 985                 dsl_dataset_name(origin, parentname);
 986                 if (error == 0) {
 987                         error = zfs_secpolicy_write_perms_ds(parentname, origin,
 988                             ZFS_DELEG_PERM_PROMOTE, cr);
 989                 }
 990                 dsl_dataset_rele(clone, FTAG);
 991                 dsl_dataset_rele(origin, FTAG);
 992         }
 993         dsl_pool_rele(dp, FTAG);
 994         return (error);
 995 }
 996 
 997 /* ARGSUSED */
 998 static int
 999 zfs_secpolicy_recv(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1000 {
1001         int error;
1002 
1003         if ((error = zfs_secpolicy_write_perms(zc->zc_name,
1004             ZFS_DELEG_PERM_RECEIVE, cr)) != 0)
1005                 return (error);
1006 
1007         if ((error = zfs_secpolicy_write_perms(zc->zc_name,
1008             ZFS_DELEG_PERM_MOUNT, cr)) != 0)
1009                 return (error);
1010 
1011         return (zfs_secpolicy_write_perms(zc->zc_name,
1012             ZFS_DELEG_PERM_CREATE, cr));
1013 }
1014 
1015 int
1016 zfs_secpolicy_snapshot_perms(const char *name, cred_t *cr)
1017 {
1018         return (zfs_secpolicy_write_perms(name,
1019             ZFS_DELEG_PERM_SNAPSHOT, cr));
1020 }
1021 
1022 /*
1023  * Check for permission to create each snapshot in the nvlist.
1024  */
1025 /* ARGSUSED */
1026 static int
1027 zfs_secpolicy_snapshot(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1028 {
1029         nvlist_t *snaps;
1030         int error = 0;
1031         nvpair_t *pair;
1032 
1033         if (nvlist_lookup_nvlist(innvl, "snaps", &snaps) != 0)
1034                 return (SET_ERROR(EINVAL));
1035         for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL;
1036             pair = nvlist_next_nvpair(snaps, pair)) {
1037                 char *name = nvpair_name(pair);
1038                 char *atp = strchr(name, '@');
1039 
1040                 if (atp == NULL) {
1041                         error = SET_ERROR(EINVAL);
1042                         break;
1043                 }
1044                 *atp = '\0';
1045                 error = zfs_secpolicy_snapshot_perms(name, cr);
1046                 *atp = '@';
1047                 if (error != 0)
1048                         break;
1049         }
1050         return (error);
1051 }
1052 
1053 /*
1054  * Check for permission to create each snapshot in the nvlist.
1055  */
1056 /* ARGSUSED */
1057 static int
1058 zfs_secpolicy_bookmark(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1059 {
1060         int error = 0;
1061 
1062         for (nvpair_t *pair = nvlist_next_nvpair(innvl, NULL);
1063             pair != NULL; pair = nvlist_next_nvpair(innvl, pair)) {
1064                 char *name = nvpair_name(pair);
1065                 char *hashp = strchr(name, '#');
1066 
1067                 if (hashp == NULL) {
1068                         error = SET_ERROR(EINVAL);
1069                         break;
1070                 }
1071                 *hashp = '\0';
1072                 error = zfs_secpolicy_write_perms(name,
1073                     ZFS_DELEG_PERM_BOOKMARK, cr);
1074                 *hashp = '#';
1075                 if (error != 0)
1076                         break;
1077         }
1078         return (error);
1079 }
1080 
1081 /* ARGSUSED */
1082 static int
1083 zfs_secpolicy_destroy_bookmarks(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1084 {
1085         nvpair_t *pair, *nextpair;
1086         int error = 0;
1087 
1088         for (pair = nvlist_next_nvpair(innvl, NULL); pair != NULL;
1089             pair = nextpair) {
1090                 char *name = nvpair_name(pair);
1091                 char *hashp = strchr(name, '#');
1092                 nextpair = nvlist_next_nvpair(innvl, pair);
1093 
1094                 if (hashp == NULL) {
1095                         error = SET_ERROR(EINVAL);
1096                         break;
1097                 }
1098 
1099                 *hashp = '\0';
1100                 error = zfs_secpolicy_write_perms(name,
1101                     ZFS_DELEG_PERM_DESTROY, cr);
1102                 *hashp = '#';
1103                 if (error == ENOENT) {
1104                         /*
1105                          * Ignore any filesystems that don't exist (we consider
1106                          * their bookmarks "already destroyed").  Remove
1107                          * the name from the nvl here in case the filesystem
1108                          * is created between now and when we try to destroy
1109                          * the bookmark (in which case we don't want to
1110                          * destroy it since we haven't checked for permission).
1111                          */
1112                         fnvlist_remove_nvpair(innvl, pair);
1113                         error = 0;
1114                 }
1115                 if (error != 0)
1116                         break;
1117         }
1118 
1119         return (error);
1120 }
1121 
1122 /* ARGSUSED */
1123 static int
1124 zfs_secpolicy_log_history(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1125 {
1126         /*
1127          * Even root must have a proper TSD so that we know what pool
1128          * to log to.
1129          */
1130         if (tsd_get(zfs_allow_log_key) == NULL)
1131                 return (SET_ERROR(EPERM));
1132         return (0);
1133 }
1134 
1135 static int
1136 zfs_secpolicy_create_clone(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1137 {
1138         char    parentname[ZFS_MAX_DATASET_NAME_LEN];
1139         int     error;
1140         char    *origin;
1141 
1142         if ((error = zfs_get_parent(zc->zc_name, parentname,
1143             sizeof (parentname))) != 0)
1144                 return (error);
1145 
1146         if (nvlist_lookup_string(innvl, "origin", &origin) == 0 &&
1147             (error = zfs_secpolicy_write_perms(origin,
1148             ZFS_DELEG_PERM_CLONE, cr)) != 0)
1149                 return (error);
1150 
1151         if ((error = zfs_secpolicy_write_perms(parentname,
1152             ZFS_DELEG_PERM_CREATE, cr)) != 0)
1153                 return (error);
1154 
1155         return (zfs_secpolicy_write_perms(parentname,
1156             ZFS_DELEG_PERM_MOUNT, cr));
1157 }
1158 
1159 /*
1160  * Policy for pool operations - create/destroy pools, add vdevs, etc.  Requires
1161  * SYS_CONFIG privilege, which is not available in a local zone.
1162  */
1163 /* ARGSUSED */
1164 static int
1165 zfs_secpolicy_config(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1166 {
1167         if (secpolicy_sys_config(cr, B_FALSE) != 0)
1168                 return (SET_ERROR(EPERM));
1169 
1170         return (0);
1171 }
1172 
1173 /*
1174  * Policy for object to name lookups.
1175  */
1176 /* ARGSUSED */
1177 static int
1178 zfs_secpolicy_diff(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1179 {
1180         int error;
1181 
1182         if ((error = secpolicy_sys_config(cr, B_FALSE)) == 0)
1183                 return (0);
1184 
1185         error = zfs_secpolicy_write_perms(zc->zc_name, ZFS_DELEG_PERM_DIFF, cr);
1186         return (error);
1187 }
1188 
1189 /*
1190  * Policy for fault injection.  Requires all privileges.
1191  */
1192 /* ARGSUSED */
1193 static int
1194 zfs_secpolicy_inject(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1195 {
1196         return (secpolicy_zinject(cr));
1197 }
1198 
1199 /* ARGSUSED */
1200 static int
1201 zfs_secpolicy_inherit_prop(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1202 {
1203         zfs_prop_t prop = zfs_name_to_prop(zc->zc_value);
1204 
1205         if (prop == ZPROP_INVAL) {
1206                 if (!zfs_prop_user(zc->zc_value))
1207                         return (SET_ERROR(EINVAL));
1208                 return (zfs_secpolicy_write_perms(zc->zc_name,
1209                     ZFS_DELEG_PERM_USERPROP, cr));
1210         } else {
1211                 return (zfs_secpolicy_setprop(zc->zc_name, prop,
1212                     NULL, cr));
1213         }
1214 }
1215 
1216 static int
1217 zfs_secpolicy_userspace_one(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1218 {
1219         int err = zfs_secpolicy_read(zc, innvl, cr);
1220         if (err)
1221                 return (err);
1222 
1223         if (zc->zc_objset_type >= ZFS_NUM_USERQUOTA_PROPS)
1224                 return (SET_ERROR(EINVAL));
1225 
1226         if (zc->zc_value[0] == 0) {
1227                 /*
1228                  * They are asking about a posix uid/gid.  If it's
1229                  * themself, allow it.
1230                  */
1231                 if (zc->zc_objset_type == ZFS_PROP_USERUSED ||
1232                     zc->zc_objset_type == ZFS_PROP_USERQUOTA) {
1233                         if (zc->zc_guid == crgetuid(cr))
1234                                 return (0);
1235                 } else {
1236                         if (groupmember(zc->zc_guid, cr))
1237                                 return (0);
1238                 }
1239         }
1240 
1241         return (zfs_secpolicy_write_perms(zc->zc_name,
1242             userquota_perms[zc->zc_objset_type], cr));
1243 }
1244 
1245 static int
1246 zfs_secpolicy_userspace_many(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1247 {
1248         int err = zfs_secpolicy_read(zc, innvl, cr);
1249         if (err)
1250                 return (err);
1251 
1252         if (zc->zc_objset_type >= ZFS_NUM_USERQUOTA_PROPS)
1253                 return (SET_ERROR(EINVAL));
1254 
1255         return (zfs_secpolicy_write_perms(zc->zc_name,
1256             userquota_perms[zc->zc_objset_type], cr));
1257 }
1258 
1259 /* ARGSUSED */
1260 static int
1261 zfs_secpolicy_userspace_upgrade(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1262 {
1263         return (zfs_secpolicy_setprop(zc->zc_name, ZFS_PROP_VERSION,
1264             NULL, cr));
1265 }
1266 
1267 /* ARGSUSED */
1268 static int
1269 zfs_secpolicy_hold(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1270 {
1271         nvpair_t *pair;
1272         nvlist_t *holds;
1273         int error;
1274 
1275         error = nvlist_lookup_nvlist(innvl, "holds", &holds);
1276         if (error != 0)
1277                 return (SET_ERROR(EINVAL));
1278 
1279         for (pair = nvlist_next_nvpair(holds, NULL); pair != NULL;
1280             pair = nvlist_next_nvpair(holds, pair)) {
1281                 char fsname[ZFS_MAX_DATASET_NAME_LEN];
1282                 error = dmu_fsname(nvpair_name(pair), fsname);
1283                 if (error != 0)
1284                         return (error);
1285                 error = zfs_secpolicy_write_perms(fsname,
1286                     ZFS_DELEG_PERM_HOLD, cr);
1287                 if (error != 0)
1288                         return (error);
1289         }
1290         return (0);
1291 }
1292 
1293 /* ARGSUSED */
1294 static int
1295 zfs_secpolicy_release(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1296 {
1297         nvpair_t *pair;
1298         int error;
1299 
1300         for (pair = nvlist_next_nvpair(innvl, NULL); pair != NULL;
1301             pair = nvlist_next_nvpair(innvl, pair)) {
1302                 char fsname[ZFS_MAX_DATASET_NAME_LEN];
1303                 error = dmu_fsname(nvpair_name(pair), fsname);
1304                 if (error != 0)
1305                         return (error);
1306                 error = zfs_secpolicy_write_perms(fsname,
1307                     ZFS_DELEG_PERM_RELEASE, cr);
1308                 if (error != 0)
1309                         return (error);
1310         }
1311         return (0);
1312 }
1313 
1314 /*
1315  * Policy for allowing temporary snapshots to be taken or released
1316  */
1317 static int
1318 zfs_secpolicy_tmp_snapshot(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1319 {
1320         /*
1321          * A temporary snapshot is the same as a snapshot,
1322          * hold, destroy and release all rolled into one.
1323          * Delegated diff alone is sufficient that we allow this.
1324          */
1325         int error;
1326 
1327         if ((error = zfs_secpolicy_write_perms(zc->zc_name,
1328             ZFS_DELEG_PERM_DIFF, cr)) == 0)
1329                 return (0);
1330 
1331         error = zfs_secpolicy_snapshot_perms(zc->zc_name, cr);
1332         if (error == 0)
1333                 error = zfs_secpolicy_hold(zc, innvl, cr);
1334         if (error == 0)
1335                 error = zfs_secpolicy_release(zc, innvl, cr);
1336         if (error == 0)
1337                 error = zfs_secpolicy_destroy(zc, innvl, cr);
1338         return (error);
1339 }
1340 
1341 /*
1342  * Returns the nvlist as specified by the user in the zfs_cmd_t.
1343  */
1344 static int
1345 get_nvlist(uint64_t nvl, uint64_t size, int iflag, nvlist_t **nvp)
1346 {
1347         char *packed;
1348         int error;
1349         nvlist_t *list = NULL;
1350 
1351         /*
1352          * Read in and unpack the user-supplied nvlist.
1353          */
1354         if (size == 0)
1355                 return (SET_ERROR(EINVAL));
1356 
1357         packed = kmem_alloc(size, KM_SLEEP);
1358 
1359         if ((error = ddi_copyin((void *)(uintptr_t)nvl, packed, size,
1360             iflag)) != 0) {
1361                 kmem_free(packed, size);
1362                 return (SET_ERROR(EFAULT));
1363         }
1364 
1365         if ((error = nvlist_unpack(packed, size, &list, 0)) != 0) {
1366                 kmem_free(packed, size);
1367                 return (error);
1368         }
1369 
1370         kmem_free(packed, size);
1371 
1372         *nvp = list;
1373         return (0);
1374 }
1375 
1376 /*
1377  * Reduce the size of this nvlist until it can be serialized in 'max' bytes.
1378  * Entries will be removed from the end of the nvlist, and one int32 entry
1379  * named "N_MORE_ERRORS" will be added indicating how many entries were
1380  * removed.
1381  */
1382 static int
1383 nvlist_smush(nvlist_t *errors, size_t max)
1384 {
1385         size_t size;
1386 
1387         size = fnvlist_size(errors);
1388 
1389         if (size > max) {
1390                 nvpair_t *more_errors;
1391                 int n = 0;
1392 
1393                 if (max < 1024)
1394                         return (SET_ERROR(ENOMEM));
1395 
1396                 fnvlist_add_int32(errors, ZPROP_N_MORE_ERRORS, 0);
1397                 more_errors = nvlist_prev_nvpair(errors, NULL);
1398 
1399                 do {
1400                         nvpair_t *pair = nvlist_prev_nvpair(errors,
1401                             more_errors);
1402                         fnvlist_remove_nvpair(errors, pair);
1403                         n++;
1404                         size = fnvlist_size(errors);
1405                 } while (size > max);
1406 
1407                 fnvlist_remove_nvpair(errors, more_errors);
1408                 fnvlist_add_int32(errors, ZPROP_N_MORE_ERRORS, n);
1409                 ASSERT3U(fnvlist_size(errors), <=, max);
1410         }
1411 
1412         return (0);
1413 }
1414 
1415 /*
1416  * Callers will know whether there's anything to unpack based on ret non-0/errno
1417  * set to ENOMEM, but observers (e.g truss) need the message properly marked to
1418  * know if it should be unpacked and displayed. Don't marked as filled unless
1419  * completely successful. If there's a non-empty nvlist, set size to its nvl
1420  * size as resize hint.
1421  */
1422 static int
1423 put_nvlist(zfs_cmd_t *zc, nvlist_t *nvl)
1424 {
1425         char *packed = NULL;
1426         int error = 0;
1427         size_t size;
1428 
1429         size = fnvlist_size(nvl);
1430 
1431         zc->zc_nvlist_dst_filled = B_FALSE;
1432         if (size > zc->zc_nvlist_dst_size) {
1433                 error = SET_ERROR(ENOMEM);
1434         } else {
1435                 packed = fnvlist_pack(nvl, &size);
1436                 if (ddi_copyout(packed, (void *)(uintptr_t)zc->zc_nvlist_dst,
1437                     size, zc->zc_iflags) != 0)
1438                         error = SET_ERROR(EFAULT);
1439                 else
1440                         zc->zc_nvlist_dst_filled = B_TRUE;
1441                 fnvlist_pack_free(packed, size);
1442         }
1443 
1444         zc->zc_nvlist_dst_size = size;
1445         return (error);
1446 }
1447 
1448 static int
1449 getzfsvfs_from_ds(dsl_dataset_t *ds, zfsvfs_t **zfvp)
1450 {
1451         objset_t *os;
1452         int error;
1453         dsl_pool_t *dp;
1454 
1455         dp = ds->ds_dir->dd_pool;
1456         dsl_pool_config_enter(dp, FTAG);
1457 
1458         /*
1459          * IU:  we probably need to hold dataset here.
1460          *      For now let's assume we do.
1461          *      May need revision later.
1462          */
1463         dsl_dataset_long_hold(ds, FTAG);
1464         error = dmu_objset_from_ds(ds, &os);
1465         if (dmu_objset_type(os) != DMU_OST_ZFS) {
1466                 dsl_dataset_long_rele(ds, FTAG);
1467                 dsl_pool_config_exit(dp, FTAG);
1468                 return (EINVAL);
1469         }
1470 
1471         mutex_enter(&os->os_user_ptr_lock);
1472         *zfvp = dmu_objset_get_user(os);
1473         if (*zfvp) {
1474                 VFS_HOLD((*zfvp)->z_vfs);
1475         } else {
1476                 error = ESRCH;
1477         }
1478         mutex_exit(&os->os_user_ptr_lock);
1479         dsl_dataset_long_rele(ds, FTAG);
1480         dsl_pool_config_exit(dp, FTAG);
1481         return (error);
1482 }
1483 
1484 int
1485 getzfsvfs_impl(objset_t *os, zfsvfs_t **zfvp)
1486 {
1487         int error = 0;
1488         if (dmu_objset_type(os) != DMU_OST_ZFS) {
1489                 return (SET_ERROR(EINVAL));
1490         }
1491 
1492         mutex_enter(&os->os_user_ptr_lock);
1493         *zfvp = dmu_objset_get_user(os);
1494         if (*zfvp) {
1495                 VFS_HOLD((*zfvp)->z_vfs);
1496         } else {
1497                 error = SET_ERROR(ESRCH);
1498         }
1499         mutex_exit(&os->os_user_ptr_lock);
1500         return (error);
1501 }
1502 
1503 int
1504 getzfsvfs(const char *dsname, zfsvfs_t **zfvp)
1505 {
1506         objset_t *os;
1507         int error;
1508 
1509         error = dmu_objset_hold(dsname, FTAG, &os);
1510         if (error != 0)
1511                 return (error);
1512 
1513         error = getzfsvfs_impl(os, zfvp);
1514         dmu_objset_rele(os, FTAG);
1515         return (error);
1516 }
1517 
1518 /*
1519  * Find a zfsvfs_t for a mounted filesystem, or create our own, in which
1520  * case its z_vfs will be NULL, and it will be opened as the owner.
1521  * If 'writer' is set, the z_teardown_lock will be held for RW_WRITER,
1522  * which prevents all vnode ops from running.
1523  */
1524 static int
1525 zfsvfs_hold(const char *name, void *tag, zfsvfs_t **zfvp, boolean_t writer)
1526 {
1527         int error = 0;
1528 
1529         if (getzfsvfs(name, zfvp) != 0)
1530                 error = zfsvfs_create(name, zfvp);
1531         if (error == 0) {
1532                 rrm_enter(&(*zfvp)->z_teardown_lock, (writer) ? RW_WRITER :
1533                     RW_READER, tag);
1534                 if ((*zfvp)->z_unmounted) {
1535                         /*
1536                          * XXX we could probably try again, since the unmounting
1537                          * thread should be just about to disassociate the
1538                          * objset from the zfsvfs.
1539                          */
1540                         rrm_exit(&(*zfvp)->z_teardown_lock, tag);
1541                         return (SET_ERROR(EBUSY));
1542                 }
1543         }
1544         return (error);
1545 }
1546 
1547 static void
1548 zfsvfs_rele(zfsvfs_t *zfsvfs, void *tag)
1549 {
1550         rrm_exit(&zfsvfs->z_teardown_lock, tag);
1551 
1552         if (zfsvfs->z_vfs) {
1553                 VFS_RELE(zfsvfs->z_vfs);
1554         } else {
1555                 dmu_objset_disown(zfsvfs->z_os, zfsvfs);
1556                 zfsvfs_free(zfsvfs);
1557         }
1558 }
1559 
1560 
1561 /*
1562  * Publish events using GPEC subsystem
1563  */
1564 
1565 static evchan_t *zfs_channel = NULL;
1566 
1567 void
1568 zfs_event_post(const char *subclass, const char *operation, nvlist_t *ev_data)
1569 {
1570 
1571         if (zfs_channel == NULL)
1572                 goto out;
1573 
1574         fnvlist_add_string(ev_data, "operation", operation);
1575 
1576         (void) sysevent_evc_publish(zfs_channel, subclass, operation,
1577             "com.nexenta", "zfs-kernel", ev_data, EVCH_NOSLEEP);
1578 
1579 out:
1580         fnvlist_free(ev_data);
1581 }
1582 
1583 static int
1584 zfs_ioc_pool_create(zfs_cmd_t *zc)
1585 {
1586         int error;
1587         nvlist_t *config, *props = NULL;
1588         nvlist_t *rootprops = NULL;
1589         nvlist_t *zplprops = NULL;
1590         nvlist_t *event;
1591 
1592         if (error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
1593             zc->zc_iflags, &config))
1594                 return (error);
1595 
1596         if (zc->zc_nvlist_src_size != 0 && (error =
1597             get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
1598             zc->zc_iflags, &props))) {
1599                 nvlist_free(config);
1600                 return (error);
1601         }
1602 
1603         if (props) {
1604                 nvlist_t *nvl = NULL;
1605                 uint64_t version = SPA_VERSION;
1606 
1607                 (void) nvlist_lookup_uint64(props,
1608                     zpool_prop_to_name(ZPOOL_PROP_VERSION), &version);
1609                 if (!SPA_VERSION_IS_SUPPORTED(version)) {
1610                         error = SET_ERROR(EINVAL);
1611                         goto pool_props_bad;
1612                 }
1613                 (void) nvlist_lookup_nvlist(props, ZPOOL_ROOTFS_PROPS, &nvl);
1614                 if (nvl) {
1615                         error = nvlist_dup(nvl, &rootprops, KM_SLEEP);
1616                         if (error != 0) {
1617                                 nvlist_free(config);
1618                                 nvlist_free(props);
1619                                 return (error);
1620                         }
1621                         (void) nvlist_remove_all(props, ZPOOL_ROOTFS_PROPS);
1622                 }
1623                 VERIFY(nvlist_alloc(&zplprops, NV_UNIQUE_NAME, KM_SLEEP) == 0);
1624                 error = zfs_fill_zplprops_root(version, rootprops,
1625                     zplprops, NULL);
1626                 if (error != 0)
1627                         goto pool_props_bad;
1628         }
1629 
1630         error = spa_create(zc->zc_name, config, props, zplprops);
1631 
1632         /*
1633          * Set the remaining root properties
1634          */
1635         if (!error && (error = zfs_set_prop_nvlist(zc->zc_name,
1636             ZPROP_SRC_LOCAL, rootprops, NULL)) != 0)
1637                 (void) spa_destroy(zc->zc_name);
1638 
1639         if (error == 0) {
1640                 event = fnvlist_alloc();
1641                 fnvlist_add_string(event, "name", zc->zc_name);
1642                 fnvlist_add_nvlist(event, "config", config);
1643                 if (props != NULL)
1644                         fnvlist_add_nvlist(event, "props", props);
1645                 zfs_event_post(ZPOOL_EC_STATUS, "create", event);
1646         }
1647 
1648 pool_props_bad:
1649         nvlist_free(rootprops);
1650         nvlist_free(zplprops);
1651         nvlist_free(config);
1652         nvlist_free(props);
1653 
1654         return (error);
1655 }
1656 
1657 static int
1658 zfs_ioc_pool_destroy(zfs_cmd_t *zc)
1659 {
1660         int error;
1661         nvlist_t *event;
1662         zfs_log_history(zc);
1663         error = spa_destroy(zc->zc_name);
1664         if (error == 0) {
1665                 zvol_remove_minors(zc->zc_name);
1666                 event = fnvlist_alloc();
1667                 fnvlist_add_string(event, "pool", zc->zc_name);
1668                 zfs_event_post(ZPOOL_EC_STATUS, "destroy", event);
1669         }
1670         return (error);
1671 }
1672 
1673 static int
1674 zfs_ioc_pool_import(zfs_cmd_t *zc)
1675 {
1676         nvlist_t *config, *props = NULL;
1677         uint64_t guid;
1678         int error;
1679         nvlist_t *event;
1680 
1681         if ((error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
1682             zc->zc_iflags, &config)) != 0)
1683                 return (error);
1684 
1685         if (zc->zc_nvlist_src_size != 0 && (error =
1686             get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
1687             zc->zc_iflags, &props))) {
1688                 nvlist_free(config);
1689                 return (error);
1690         }
1691 
1692         if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID, &guid) != 0 ||
1693             guid != zc->zc_guid)
1694                 error = SET_ERROR(EINVAL);
1695         else
1696                 error = spa_import(zc->zc_name, config, props, zc->zc_cookie);
1697 
1698         if (error == 0) {
1699                 event = fnvlist_alloc();
1700                 fnvlist_add_string(event, "pool", zc->zc_name);
1701                 fnvlist_add_uint64(event, "guid", zc->zc_guid);
1702                 fnvlist_add_nvlist(event, "config", config);
1703                 if (props != NULL)
1704                         fnvlist_add_nvlist(event, "props", props);
1705                 zfs_event_post(ZPOOL_EC_STATUS, "import", event);
1706         }
1707 
1708         if (zc->zc_nvlist_dst != 0) {
1709                 int err;
1710 
1711                 if ((err = put_nvlist(zc, config)) != 0)
1712                         error = err;
1713         }
1714 
1715         nvlist_free(config);
1716 
1717         nvlist_free(props);
1718 
1719         return (error);
1720 }
1721 
1722 static int
1723 zfs_ioc_pool_export(zfs_cmd_t *zc)
1724 {
1725         int error;
1726         boolean_t force = (boolean_t)zc->zc_cookie;
1727         boolean_t hardforce = (boolean_t)zc->zc_guid;
1728         boolean_t saveconfig = (boolean_t)zc->zc_obj;
1729         nvlist_t *event;
1730 
1731         zfs_log_history(zc);
1732         error = spa_export(zc->zc_name, NULL, force, hardforce, saveconfig);
1733         if (error == 0) {
1734                 zvol_remove_minors(zc->zc_name);
1735                 event = fnvlist_alloc();
1736                 fnvlist_add_string(event, "pool", zc->zc_name);
1737                 zfs_event_post(ZPOOL_EC_STATUS, "export", event);
1738         }
1739         return (error);
1740 }
1741 
1742 static int
1743 zfs_ioc_pool_configs(zfs_cmd_t *zc)
1744 {
1745         nvlist_t *configs;
1746         int error;
1747 
1748         if ((configs = spa_all_configs(&zc->zc_cookie)) == NULL)
1749                 return (SET_ERROR(EEXIST));
1750 
1751         error = put_nvlist(zc, configs);
1752 
1753         nvlist_free(configs);
1754 
1755         return (error);
1756 }
1757 
1758 /*
1759  * inputs:
1760  * zc_name              name of the pool
1761  *
1762  * outputs:
1763  * zc_cookie            real errno
1764  * zc_nvlist_dst        config nvlist
1765  * zc_nvlist_dst_size   size of config nvlist
1766  */
1767 static int
1768 zfs_ioc_pool_stats(zfs_cmd_t *zc)
1769 {
1770         nvlist_t *config;
1771         int error;
1772         int ret = 0;
1773 
1774         error = spa_get_stats(zc->zc_name, &config, zc->zc_value,
1775             sizeof (zc->zc_value));
1776 
1777         if (config != NULL) {
1778                 ret = put_nvlist(zc, config);
1779                 nvlist_free(config);
1780 
1781                 /*
1782                  * The config may be present even if 'error' is non-zero.
1783                  * In this case we return success, and preserve the real errno
1784                  * in 'zc_cookie'.
1785                  */
1786                 zc->zc_cookie = error;
1787         } else {
1788                 ret = error;
1789         }
1790 
1791         return (ret);
1792 }
1793 
1794 /*
1795  * Try to import the given pool, returning pool stats as appropriate so that
1796  * user land knows which devices are available and overall pool health.
1797  */
1798 static int
1799 zfs_ioc_pool_tryimport(zfs_cmd_t *zc)
1800 {
1801         nvlist_t *tryconfig, *config;
1802         int error;
1803 
1804         if ((error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
1805             zc->zc_iflags, &tryconfig)) != 0)
1806                 return (error);
1807 
1808         config = spa_tryimport(tryconfig);
1809 
1810         nvlist_free(tryconfig);
1811 
1812         if (config == NULL)
1813                 return (SET_ERROR(EINVAL));
1814 
1815         error = put_nvlist(zc, config);
1816         nvlist_free(config);
1817 
1818         return (error);
1819 }
1820 
1821 /*
1822  * inputs:
1823  * zc_name              name of the pool
1824  * zc_cookie            scan func (pool_scan_func_t)
1825  * zc_flags             scrub pause/resume flag (pool_scrub_cmd_t)
1826  */
1827 static int
1828 zfs_ioc_pool_scan(zfs_cmd_t *zc)
1829 {
1830         spa_t *spa;
1831         int error;
1832 
1833         if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
1834                 return (error);
1835 
1836         if (zc->zc_flags >= POOL_SCRUB_FLAGS_END)
1837                 return (SET_ERROR(EINVAL));
1838 
1839         if (zc->zc_flags == POOL_SCRUB_PAUSE)
1840                 error = spa_scrub_pause_resume(spa, POOL_SCRUB_PAUSE);
1841         else if (zc->zc_cookie == POOL_SCAN_NONE)
1842                 error = spa_scan_stop(spa);
1843         else
1844                 error = spa_scan(spa, zc->zc_cookie);
1845 
1846         spa_close(spa, FTAG);
1847 
1848         return (error);
1849 }
1850 
1851 /*
1852  * inputs:
1853  * zc_name              name of the pool
1854  * zc_cookie            trim_cmd_info_t
1855  */
1856 static int
1857 zfs_ioc_pool_trim(zfs_cmd_t *zc)
1858 {
1859         spa_t *spa;
1860         int error;
1861         trim_cmd_info_t tci;
1862 
1863         if (ddi_copyin((void *)(uintptr_t)zc->zc_cookie, &tci,
1864             sizeof (tci), 0) == -1)
1865                 return (EFAULT);
1866 
1867         if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
1868                 return (error);
1869 
1870         if (tci.tci_start) {
1871                 spa_man_trim(spa, tci.tci_rate);
1872         } else {
1873                 spa_man_trim_stop(spa);
1874         }
1875 
1876         spa_close(spa, FTAG);
1877 
1878         return (error);
1879 }
1880 
1881 static int
1882 zfs_ioc_pool_freeze(zfs_cmd_t *zc)
1883 {
1884         spa_t *spa;
1885         int error;
1886 
1887         error = spa_open(zc->zc_name, &spa, FTAG);
1888         if (error == 0) {
1889                 spa_freeze(spa);
1890                 spa_close(spa, FTAG);
1891         }
1892         return (error);
1893 }
1894 
1895 static int
1896 zfs_ioc_pool_upgrade(zfs_cmd_t *zc)
1897 {
1898         spa_t *spa;
1899         int error;
1900 
1901         if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
1902                 return (error);
1903 
1904         if (zc->zc_cookie < spa_version(spa) ||
1905             !SPA_VERSION_IS_SUPPORTED(zc->zc_cookie)) {
1906                 spa_close(spa, FTAG);
1907                 return (SET_ERROR(EINVAL));
1908         }
1909 
1910         spa_upgrade(spa, zc->zc_cookie);
1911         spa_close(spa, FTAG);
1912 
1913         return (error);
1914 }
1915 
1916 static int
1917 zfs_ioc_pool_get_history(zfs_cmd_t *zc)
1918 {
1919         spa_t *spa;
1920         char *hist_buf;
1921         uint64_t size;
1922         int error;
1923 
1924         if ((size = zc->zc_history_len) == 0)
1925                 return (SET_ERROR(EINVAL));
1926 
1927         if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
1928                 return (error);
1929 
1930         if (spa_version(spa) < SPA_VERSION_ZPOOL_HISTORY) {
1931                 spa_close(spa, FTAG);
1932                 return (SET_ERROR(ENOTSUP));
1933         }
1934 
1935         hist_buf = kmem_alloc(size, KM_SLEEP);
1936         if ((error = spa_history_get(spa, &zc->zc_history_offset,
1937             &zc->zc_history_len, hist_buf)) == 0) {
1938                 error = ddi_copyout(hist_buf,
1939                     (void *)(uintptr_t)zc->zc_history,
1940                     zc->zc_history_len, zc->zc_iflags);
1941         }
1942 
1943         spa_close(spa, FTAG);
1944         kmem_free(hist_buf, size);
1945         return (error);
1946 }
1947 
1948 static int
1949 zfs_ioc_pool_reguid(zfs_cmd_t *zc)
1950 {
1951         spa_t *spa;
1952         int error;
1953 
1954         error = spa_open(zc->zc_name, &spa, FTAG);
1955         if (error == 0) {
1956                 error = spa_change_guid(spa);
1957                 spa_close(spa, FTAG);
1958         }
1959         return (error);
1960 }
1961 
1962 static int
1963 zfs_ioc_dsobj_to_dsname(zfs_cmd_t *zc)
1964 {
1965         return (dsl_dsobj_to_dsname(zc->zc_name, zc->zc_obj, zc->zc_value));
1966 }
1967 
1968 /*
1969  * inputs:
1970  * zc_name              name of filesystem
1971  * zc_obj               object to find
1972  *
1973  * outputs:
1974  * zc_value             name of object
1975  */
1976 static int
1977 zfs_ioc_obj_to_path(zfs_cmd_t *zc)
1978 {
1979         objset_t *os;
1980         int error;
1981 
1982         /* XXX reading from objset not owned */
1983         if ((error = dmu_objset_hold(zc->zc_name, FTAG, &os)) != 0)
1984                 return (error);
1985         if (dmu_objset_type(os) != DMU_OST_ZFS) {
1986                 dmu_objset_rele(os, FTAG);
1987                 return (SET_ERROR(EINVAL));
1988         }
1989         error = zfs_obj_to_path(os, zc->zc_obj, zc->zc_value,
1990             sizeof (zc->zc_value));
1991         dmu_objset_rele(os, FTAG);
1992 
1993         return (error);
1994 }
1995 
1996 /*
1997  * inputs:
1998  * zc_name              name of filesystem
1999  * zc_obj               object to find
2000  *
2001  * outputs:
2002  * zc_stat              stats on object
2003  * zc_value             path to object
2004  */
2005 static int
2006 zfs_ioc_obj_to_stats(zfs_cmd_t *zc)
2007 {
2008         objset_t *os;
2009         int error;
2010 
2011         /* XXX reading from objset not owned */
2012         if ((error = dmu_objset_hold(zc->zc_name, FTAG, &os)) != 0)
2013                 return (error);
2014         if (dmu_objset_type(os) != DMU_OST_ZFS) {
2015                 dmu_objset_rele(os, FTAG);
2016                 return (SET_ERROR(EINVAL));
2017         }
2018         error = zfs_obj_to_stats(os, zc->zc_obj, &zc->zc_stat, zc->zc_value,
2019             sizeof (zc->zc_value));
2020         dmu_objset_rele(os, FTAG);
2021 
2022         return (error);
2023 }
2024 
2025 static int
2026 zfs_ioc_vdev_add(zfs_cmd_t *zc)
2027 {
2028         spa_t *spa;
2029         int error;
2030         nvlist_t *config, **l2cache, **spares;
2031         uint_t nl2cache = 0, nspares = 0;
2032         nvlist_t *event;
2033 
2034         error = spa_open(zc->zc_name, &spa, FTAG);
2035         if (error != 0)
2036                 return (error);
2037 
2038         error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
2039             zc->zc_iflags, &config);
2040         (void) nvlist_lookup_nvlist_array(config, ZPOOL_CONFIG_L2CACHE,
2041             &l2cache, &nl2cache);
2042 
2043         (void) nvlist_lookup_nvlist_array(config, ZPOOL_CONFIG_SPARES,
2044             &spares, &nspares);
2045 
2046         /*
2047          * A root pool with concatenated devices is not supported.
2048          * Thus, can not add a device to a root pool.
2049          *
2050          * Intent log device can not be added to a rootpool because
2051          * during mountroot, zil is replayed, a seperated log device
2052          * can not be accessed during the mountroot time.
2053          *
2054          * l2cache and spare devices are ok to be added to a rootpool.
2055          */
2056         if (spa_bootfs(spa) != 0 && nl2cache == 0 && nspares == 0) {
2057                 nvlist_free(config);
2058                 spa_close(spa, FTAG);
2059                 return (SET_ERROR(EDOM));
2060         }
2061 
2062         if (error == 0) {
2063                 error = spa_vdev_add(spa, config);
2064                 if (error == 0) {
2065                         event = fnvlist_alloc();
2066                         fnvlist_add_string(event, "pool", zc->zc_name);
2067                         fnvlist_add_nvlist(event, "config", config);
2068                         zfs_event_post(ZPOOL_EC_STATUS, "add", event);
2069 
2070                 }
2071                 nvlist_free(config);
2072         }
2073         spa_close(spa, FTAG);
2074         return (error);
2075 }
2076 
2077 /*
2078  * inputs:
2079  * zc_name              name of the pool
2080  * zc_nvlist_conf       nvlist of devices to remove
2081  * zc_cookie            to stop the remove?
2082  */
2083 static int
2084 zfs_ioc_vdev_remove(zfs_cmd_t *zc)
2085 {
2086         spa_t *spa;
2087         int error;
2088         nvlist_t *event;
2089 
2090         error = spa_open(zc->zc_name, &spa, FTAG);
2091         if (error != 0)
2092                 return (error);
2093         error = spa_vdev_remove(spa, zc->zc_guid, B_FALSE);
2094         if (error == 0) {
2095                 event = fnvlist_alloc();
2096                 fnvlist_add_string(event, "pool", zc->zc_name);
2097                 fnvlist_add_uint64(event, "guid", zc->zc_guid);
2098                 zfs_event_post(ZPOOL_EC_STATUS, "remove", event);
2099         }
2100 
2101         spa_close(spa, FTAG);
2102         return (error);
2103 }
2104 
2105 static int
2106 zfs_ioc_vdev_set_state(zfs_cmd_t *zc)
2107 {
2108         spa_t *spa;
2109         int error;
2110         vdev_state_t newstate = VDEV_STATE_UNKNOWN;
2111 
2112         if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
2113                 return (error);
2114         switch (zc->zc_cookie) {
2115         case VDEV_STATE_ONLINE:
2116                 error = vdev_online(spa, zc->zc_guid, zc->zc_obj, &newstate);
2117                 break;
2118 
2119         case VDEV_STATE_OFFLINE:
2120                 error = vdev_offline(spa, zc->zc_guid, zc->zc_obj);
2121                 break;
2122 
2123         case VDEV_STATE_FAULTED:
2124                 if (zc->zc_obj != VDEV_AUX_ERR_EXCEEDED &&
2125                     zc->zc_obj != VDEV_AUX_EXTERNAL &&
2126                     zc->zc_obj != VDEV_AUX_OPEN_FAILED)
2127                         zc->zc_obj = VDEV_AUX_ERR_EXCEEDED;
2128 
2129                 error = vdev_fault(spa, zc->zc_guid, zc->zc_obj);
2130                 break;
2131 
2132         case VDEV_STATE_DEGRADED:
2133                 if (zc->zc_obj != VDEV_AUX_ERR_EXCEEDED &&
2134                     zc->zc_obj != VDEV_AUX_EXTERNAL)
2135                         zc->zc_obj = VDEV_AUX_ERR_EXCEEDED;
2136 
2137                 error = vdev_degrade(spa, zc->zc_guid, zc->zc_obj);
2138                 break;
2139 
2140         default:
2141                 error = SET_ERROR(EINVAL);
2142         }
2143         zc->zc_cookie = newstate;
2144         spa_close(spa, FTAG);
2145         return (error);
2146 }
2147 
2148 static int
2149 zfs_ioc_vdev_attach(zfs_cmd_t *zc)
2150 {
2151         spa_t *spa;
2152         int replacing = zc->zc_cookie;
2153         nvlist_t *config;
2154         nvlist_t *event;
2155         int error;
2156 
2157         if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
2158                 return (error);
2159 
2160         if ((error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
2161             zc->zc_iflags, &config)) == 0) {
2162                 error = spa_vdev_attach(spa, zc->zc_guid, config, replacing);
2163                 if (error == 0) {
2164                         event = fnvlist_alloc();
2165                         fnvlist_add_string(event, "pool", zc->zc_name);
2166                         fnvlist_add_nvlist(event, "config", config);
2167                         fnvlist_add_int32(event, "replacing", replacing);
2168                         zfs_event_post(ZPOOL_EC_STATUS, "attach", event);
2169                 }
2170                 nvlist_free(config);
2171         }
2172 
2173         spa_close(spa, FTAG);
2174         return (error);
2175 }
2176 
2177 static int
2178 zfs_ioc_vdev_detach(zfs_cmd_t *zc)
2179 {
2180         spa_t *spa;
2181         int error;
2182         nvlist_t *event;
2183 
2184         if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
2185                 return (error);
2186 
2187         error = spa_vdev_detach(spa, zc->zc_guid, 0, B_FALSE);
2188         if (error == 0) {
2189                 event = fnvlist_alloc();
2190                 fnvlist_add_string(event, "pool", zc->zc_name);
2191                 fnvlist_add_uint64(event, "guid", zc->zc_guid);
2192                 zfs_event_post(ZPOOL_EC_STATUS, "detach", event);
2193         }
2194         spa_close(spa, FTAG);
2195         return (error);
2196 }
2197 
2198 static int
2199 zfs_ioc_vdev_split(zfs_cmd_t *zc)
2200 {
2201         spa_t *spa;
2202         nvlist_t *config, *props = NULL;
2203         int error;
2204         boolean_t exp = !!(zc->zc_cookie & ZPOOL_EXPORT_AFTER_SPLIT);
2205 
2206         if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
2207                 return (error);
2208 
2209         if (error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
2210             zc->zc_iflags, &config)) {
2211                 spa_close(spa, FTAG);
2212                 return (error);
2213         }
2214 
2215         if (zc->zc_nvlist_src_size != 0 && (error =
2216             get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
2217             zc->zc_iflags, &props))) {
2218                 spa_close(spa, FTAG);
2219                 nvlist_free(config);
2220                 return (error);
2221         }
2222 
2223         error = spa_vdev_split_mirror(spa, zc->zc_string, config, props, exp);
2224 
2225         spa_close(spa, FTAG);
2226 
2227         nvlist_free(config);
2228         nvlist_free(props);
2229 
2230         return (error);
2231 }
2232 
2233 static int
2234 zfs_ioc_vdev_setl2adddt(zfs_cmd_t *zc)
2235 {
2236         spa_t *spa;
2237         int error;
2238         uint64_t guid = zc->zc_guid;
2239         char *l2ad_ddt = zc->zc_value;
2240 
2241         error = spa_open(zc->zc_name, &spa, FTAG);
2242         if (error != 0)
2243                 return (error);
2244 
2245         error = spa_vdev_setl2adddt(spa, guid, l2ad_ddt);
2246         spa_close(spa, FTAG);
2247         return (error);
2248 }
2249 
2250 
2251 static int
2252 zfs_ioc_vdev_setpath(zfs_cmd_t *zc)
2253 {
2254         spa_t *spa;
2255         char *path = zc->zc_value;
2256         uint64_t guid = zc->zc_guid;
2257         int error;
2258 
2259         error = spa_open(zc->zc_name, &spa, FTAG);
2260         if (error != 0)
2261                 return (error);
2262 
2263         error = spa_vdev_setpath(spa, guid, path);
2264         spa_close(spa, FTAG);
2265         return (error);
2266 }
2267 
2268 static int
2269 zfs_ioc_vdev_setfru(zfs_cmd_t *zc)
2270 {
2271         spa_t *spa;
2272         char *fru = zc->zc_value;
2273         uint64_t guid = zc->zc_guid;
2274         int error;
2275 
2276         error = spa_open(zc->zc_name, &spa, FTAG);
2277         if (error != 0)
2278                 return (error);
2279 
2280         error = spa_vdev_setfru(spa, guid, fru);
2281         spa_close(spa, FTAG);
2282         return (error);
2283 }
2284 
2285 static int
2286 zfs_ioc_objset_stats_impl(zfs_cmd_t *zc, objset_t *os)
2287 {
2288         int error = 0;
2289         nvlist_t *nv;
2290 
2291         dmu_objset_fast_stat(os, &zc->zc_objset_stats);
2292 
2293         if (zc->zc_nvlist_dst != 0 &&
2294             (error = dsl_prop_get_all(os, &nv)) == 0) {
2295                 dmu_objset_stats(os, nv);
2296                 /*
2297                  * NB: zvol_get_stats() will read the objset contents,
2298                  * which we aren't supposed to do with a
2299                  * DS_MODE_USER hold, because it could be
2300                  * inconsistent.  So this is a bit of a workaround...
2301                  * XXX reading with out owning
2302                  */
2303                 if (!zc->zc_objset_stats.dds_inconsistent &&
2304                     dmu_objset_type(os) == DMU_OST_ZVOL) {
2305                         error = zvol_get_stats(os, nv);
2306                         if (error == EIO)
2307                                 return (error);
2308                         VERIFY0(error);
2309                 }
2310                 error = put_nvlist(zc, nv);
2311                 nvlist_free(nv);
2312         }
2313 
2314         return (error);
2315 }
2316 
2317 /*
2318  * inputs:
2319  * zc_name              name of filesystem
2320  * zc_nvlist_dst_size   size of buffer for property nvlist
2321  *
2322  * outputs:
2323  * zc_objset_stats      stats
2324  * zc_nvlist_dst        property nvlist
2325  * zc_nvlist_dst_size   size of property nvlist
2326  */
2327 static int
2328 zfs_ioc_objset_stats(zfs_cmd_t *zc)
2329 {
2330         objset_t *os = NULL;
2331         int error;
2332 
2333         error = dmu_objset_hold(zc->zc_name, FTAG, &os);
2334         if (error == 0) {
2335                 error = zfs_ioc_objset_stats_impl(zc, os);
2336                 dmu_objset_rele(os, FTAG);
2337         }
2338 
2339         return (error);
2340 }
2341 
2342 /*
2343  * inputs:
2344  * zc_name              name of filesystem
2345  * zc_nvlist_dst_size   size of buffer for property nvlist
2346  *
2347  * outputs:
2348  * zc_nvlist_dst        received property nvlist
2349  * zc_nvlist_dst_size   size of received property nvlist
2350  *
2351  * Gets received properties (distinct from local properties on or after
2352  * SPA_VERSION_RECVD_PROPS) for callers who want to differentiate received from
2353  * local property values.
2354  */
2355 static int
2356 zfs_ioc_objset_recvd_props(zfs_cmd_t *zc)
2357 {
2358         int error = 0;
2359         nvlist_t *nv;
2360 
2361         /*
2362          * Without this check, we would return local property values if the
2363          * caller has not already received properties on or after
2364          * SPA_VERSION_RECVD_PROPS.
2365          */
2366         if (!dsl_prop_get_hasrecvd(zc->zc_name))
2367                 return (SET_ERROR(ENOTSUP));
2368 
2369         if (zc->zc_nvlist_dst != 0 &&
2370             (error = dsl_prop_get_received(zc->zc_name, &nv)) == 0) {
2371                 error = put_nvlist(zc, nv);
2372                 nvlist_free(nv);
2373         }
2374 
2375         return (error);
2376 }
2377 
2378 static int
2379 nvl_add_zplprop(objset_t *os, nvlist_t *props, zfs_prop_t prop)
2380 {
2381         uint64_t value;
2382         int error;
2383 
2384         /*
2385          * zfs_get_zplprop() will either find a value or give us
2386          * the default value (if there is one).
2387          */
2388         if ((error = zfs_get_zplprop(os, prop, &value)) != 0)
2389                 return (error);
2390         VERIFY(nvlist_add_uint64(props, zfs_prop_to_name(prop), value) == 0);
2391         return (0);
2392 }
2393 
2394 /*
2395  * inputs:
2396  * zc_name              name of filesystem
2397  * zc_nvlist_dst_size   size of buffer for zpl property nvlist
2398  *
2399  * outputs:
2400  * zc_nvlist_dst        zpl property nvlist
2401  * zc_nvlist_dst_size   size of zpl property nvlist
2402  */
2403 static int
2404 zfs_ioc_objset_zplprops(zfs_cmd_t *zc)
2405 {
2406         objset_t *os;
2407         int err;
2408 
2409         /* XXX reading without owning */
2410         if (err = dmu_objset_hold(zc->zc_name, FTAG, &os))
2411                 return (err);
2412 
2413         dmu_objset_fast_stat(os, &zc->zc_objset_stats);
2414 
2415         /*
2416          * NB: nvl_add_zplprop() will read the objset contents,
2417          * which we aren't supposed to do with a DS_MODE_USER
2418          * hold, because it could be inconsistent.
2419          */
2420         if (zc->zc_nvlist_dst != NULL &&
2421             !zc->zc_objset_stats.dds_inconsistent &&
2422             dmu_objset_type(os) == DMU_OST_ZFS) {
2423                 nvlist_t *nv;
2424 
2425                 VERIFY(nvlist_alloc(&nv, NV_UNIQUE_NAME, KM_SLEEP) == 0);
2426                 if ((err = nvl_add_zplprop(os, nv, ZFS_PROP_VERSION)) == 0 &&
2427                     (err = nvl_add_zplprop(os, nv, ZFS_PROP_NORMALIZE)) == 0 &&
2428                     (err = nvl_add_zplprop(os, nv, ZFS_PROP_UTF8ONLY)) == 0 &&
2429                     (err = nvl_add_zplprop(os, nv, ZFS_PROP_CASE)) == 0)
2430                         err = put_nvlist(zc, nv);
2431                 nvlist_free(nv);
2432         } else {
2433                 err = SET_ERROR(ENOENT);
2434         }
2435         dmu_objset_rele(os, FTAG);
2436         return (err);
2437 }
2438 
2439 /*
2440  * inputs:
2441  * zc_name              name of filesystem
2442  * zc_cookie            zap cursor
2443  * zc_nvlist_dst_size   size of buffer for property nvlist
2444  *
2445  * outputs:
2446  * zc_name              name of next filesystem
2447  * zc_cookie            zap cursor
2448  * zc_objset_stats      stats
2449  * zc_nvlist_dst        property nvlist
2450  * zc_nvlist_dst_size   size of property nvlist
2451  */
2452 static int
2453 zfs_ioc_dataset_list_next(zfs_cmd_t *zc)
2454 {
2455         objset_t *os;
2456         int error;
2457         char *p;
2458         size_t orig_len = strlen(zc->zc_name);
2459 
2460 top:
2461         if (error = dmu_objset_hold(zc->zc_name, FTAG, &os)) {
2462                 if (error == ENOENT)
2463                         error = SET_ERROR(ESRCH);
2464                 return (error);
2465         }
2466 
2467         p = strrchr(zc->zc_name, '/');
2468         if (p == NULL || p[1] != '\0')
2469                 (void) strlcat(zc->zc_name, "/", sizeof (zc->zc_name));
2470         p = zc->zc_name + strlen(zc->zc_name);
2471 
2472         do {
2473                 error = dmu_dir_list_next(os,
2474                     sizeof (zc->zc_name) - (p - zc->zc_name), p,
2475                     NULL, &zc->zc_cookie);
2476                 if (error == ENOENT)
2477                         error = SET_ERROR(ESRCH);
2478         } while (error == 0 && dataset_name_hidden(zc->zc_name));
2479         dmu_objset_rele(os, FTAG);
2480 
2481         /*
2482          * If it's an internal dataset (ie. with a '$' in its name),
2483          * don't try to get stats for it, otherwise we'll return ENOENT.
2484          */
2485         if (error == 0 && strchr(zc->zc_name, '$') == NULL) {
2486                 error = zfs_ioc_objset_stats(zc); /* fill in the stats */
2487                 if (error == ENOENT) {
2488                         /* We lost a race with destroy, get the next one. */
2489                         zc->zc_name[orig_len] = '\0';
2490                         goto top;
2491                 }
2492         }
2493         return (error);
2494 }
2495 
2496 /*
2497  * inputs:
2498  * zc_name              name of filesystem
2499  * zc_cookie            zap cursor
2500  * zc_nvlist_dst_size   size of buffer for property nvlist
2501  * zc_simple            when set, only name is requested
2502  *
2503  * outputs:
2504  * zc_name              name of next snapshot
2505  * zc_objset_stats      stats
2506  * zc_nvlist_dst        property nvlist
2507  * zc_nvlist_dst_size   size of property nvlist
2508  */
2509 static int
2510 zfs_ioc_snapshot_list_next(zfs_cmd_t *zc)
2511 {
2512         objset_t *os;
2513         int error;
2514 
2515         error = dmu_objset_hold(zc->zc_name, FTAG, &os);
2516         if (error != 0) {
2517                 return (error == ENOENT ? ESRCH : error);
2518         }
2519 
2520         /*
2521          * A dataset name of maximum length cannot have any snapshots,
2522          * so exit immediately.
2523          */
2524         if (strlcat(zc->zc_name, "@", sizeof (zc->zc_name)) >=
2525             ZFS_MAX_DATASET_NAME_LEN) {
2526                 dmu_objset_rele(os, FTAG);
2527                 return (SET_ERROR(ESRCH));
2528         }
2529 
2530         error = dmu_snapshot_list_next(os,
2531             sizeof (zc->zc_name) - strlen(zc->zc_name),
2532             zc->zc_name + strlen(zc->zc_name), &zc->zc_obj, &zc->zc_cookie,
2533             NULL);
2534 
2535         if (error == 0 && !zc->zc_simple) {
2536                 dsl_dataset_t *ds;
2537                 dsl_pool_t *dp = os->os_dsl_dataset->ds_dir->dd_pool;
2538 
2539                 error = dsl_dataset_hold_obj(dp, zc->zc_obj, FTAG, &ds);
2540                 if (error == 0) {
2541                         objset_t *ossnap;
2542 
2543                         error = dmu_objset_from_ds(ds, &ossnap);
2544                         if (error == 0)
2545                                 error = zfs_ioc_objset_stats_impl(zc, ossnap);
2546                         dsl_dataset_rele(ds, FTAG);
2547                 }
2548         } else if (error == ENOENT) {
2549                 error = SET_ERROR(ESRCH);
2550         }
2551 
2552         dmu_objset_rele(os, FTAG);
2553         /* if we failed, undo the @ that we tacked on to zc_name */
2554         if (error != 0)
2555                 *strchr(zc->zc_name, '@') = '\0';
2556         return (error);
2557 }
2558 
2559 static int
2560 zfs_prop_set_userquota(const char *dsname, nvpair_t *pair)
2561 {
2562         const char *propname = nvpair_name(pair);
2563         uint64_t *valary;
2564         unsigned int vallen;
2565         const char *domain;
2566         char *dash;
2567         zfs_userquota_prop_t type;
2568         uint64_t rid;
2569         uint64_t quota;
2570         zfsvfs_t *zfsvfs;
2571         int err;
2572 
2573         if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
2574                 nvlist_t *attrs;
2575                 VERIFY(nvpair_value_nvlist(pair, &attrs) == 0);
2576                 if (nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
2577                     &pair) != 0)
2578                         return (SET_ERROR(EINVAL));
2579         }
2580 
2581         /*
2582          * A correctly constructed propname is encoded as
2583          * userquota@<rid>-<domain>.
2584          */
2585         if ((dash = strchr(propname, '-')) == NULL ||
2586             nvpair_value_uint64_array(pair, &valary, &vallen) != 0 ||
2587             vallen != 3)
2588                 return (SET_ERROR(EINVAL));
2589 
2590         domain = dash + 1;
2591         type = valary[0];
2592         rid = valary[1];
2593         quota = valary[2];
2594 
2595         err = zfsvfs_hold(dsname, FTAG, &zfsvfs, B_FALSE);
2596         if (err == 0) {
2597                 err = zfs_set_userquota(zfsvfs, type, domain, rid, quota);
2598                 zfsvfs_rele(zfsvfs, FTAG);
2599         }
2600 
2601         return (err);
2602 }
2603 
2604 /*
2605  * If the named property is one that has a special function to set its value,
2606  * return 0 on success and a positive error code on failure; otherwise if it is
2607  * not one of the special properties handled by this function, return -1.
2608  *
2609  * XXX: It would be better for callers of the property interface if we handled
2610  * these special cases in dsl_prop.c (in the dsl layer).
2611  */
2612 static int
2613 zfs_prop_set_special(const char *dsname, zprop_source_t source,
2614     nvpair_t *pair)
2615 {
2616         const char *propname = nvpair_name(pair);
2617         zfs_prop_t prop = zfs_name_to_prop(propname);
2618         uint64_t intval;
2619         int err = -1;
2620 
2621         if (prop == ZPROP_INVAL) {
2622                 if (zfs_prop_userquota(propname))
2623                         return (zfs_prop_set_userquota(dsname, pair));
2624                 return (-1);
2625         }
2626 
2627         if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
2628                 nvlist_t *attrs;
2629                 VERIFY(nvpair_value_nvlist(pair, &attrs) == 0);
2630                 VERIFY(nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
2631                     &pair) == 0);
2632         }
2633 
2634         if (zfs_prop_get_type(prop) == PROP_TYPE_STRING)
2635                 return (-1);
2636 
2637         VERIFY(0 == nvpair_value_uint64(pair, &intval));
2638 
2639         switch (prop) {
2640         case ZFS_PROP_QUOTA:
2641                 err = dsl_dir_set_quota(dsname, source, intval);
2642                 break;
2643         case ZFS_PROP_REFQUOTA:
2644                 err = dsl_dataset_set_refquota(dsname, source, intval);
2645                 break;
2646         case ZFS_PROP_FILESYSTEM_LIMIT:
2647         case ZFS_PROP_SNAPSHOT_LIMIT:
2648                 if (intval == UINT64_MAX) {
2649                         /* clearing the limit, just do it */
2650                         err = 0;
2651                 } else {
2652                         err = dsl_dir_activate_fs_ss_limit(dsname);
2653                 }
2654                 /*
2655                  * Set err to -1 to force the zfs_set_prop_nvlist code down the
2656                  * default path to set the value in the nvlist.
2657                  */
2658                 if (err == 0)
2659                         err = -1;
2660                 break;
2661         case ZFS_PROP_RESERVATION:
2662                 err = dsl_dir_set_reservation(dsname, source, intval);
2663                 break;
2664         case ZFS_PROP_REFRESERVATION:
2665                 err = dsl_dataset_set_refreservation(dsname, source, intval);
2666                 break;
2667         case ZFS_PROP_VOLSIZE:
2668                 err = zvol_set_volsize(dsname, intval);
2669                 break;
2670         case ZFS_PROP_VERSION:
2671         {
2672                 zfsvfs_t *zfsvfs;
2673 
2674                 if ((err = zfsvfs_hold(dsname, FTAG, &zfsvfs, B_TRUE)) != 0)
2675                         break;
2676 
2677                 err = zfs_set_version(zfsvfs, intval);
2678                 zfsvfs_rele(zfsvfs, FTAG);
2679 
2680                 if (err == 0 && intval >= ZPL_VERSION_USERSPACE) {
2681                         zfs_cmd_t *zc;
2682 
2683                         zc = kmem_zalloc(sizeof (zfs_cmd_t), KM_SLEEP);
2684                         (void) strcpy(zc->zc_name, dsname);
2685                         (void) zfs_ioc_userspace_upgrade(zc);
2686                         kmem_free(zc, sizeof (zfs_cmd_t));
2687                 }
2688                 break;
2689         }
2690         default:
2691                 err = -1;
2692         }
2693 
2694         return (err);
2695 }
2696 
2697 /*
2698  * This function is best effort. If it fails to set any of the given properties,
2699  * it continues to set as many as it can and returns the last error
2700  * encountered. If the caller provides a non-NULL errlist, it will be filled in
2701  * with the list of names of all the properties that failed along with the
2702  * corresponding error numbers.
2703  *
2704  * If every property is set successfully, zero is returned and errlist is not
2705  * modified.
2706  */
2707 int
2708 zfs_set_prop_nvlist(const char *dsname, zprop_source_t source, nvlist_t *nvl,
2709     nvlist_t *errlist)
2710 {
2711         spa_t *spa = NULL;
2712         nvpair_t *pair;
2713         nvpair_t *propval;
2714         int rv = 0;
2715         uint64_t intval;
2716         char *strval;
2717         nvlist_t *genericnvl = fnvlist_alloc();
2718         nvlist_t *retrynvl = fnvlist_alloc();
2719         zfsvfs_t *zfsvfs;
2720         boolean_t set_worm = B_FALSE;
2721         boolean_t set_wbc_mode = B_FALSE;
2722         boolean_t wbc_walk_locked = B_FALSE;
2723         boolean_t set_dedup = B_FALSE;
2724 
2725         if ((rv = spa_open(dsname, &spa, FTAG)) != 0)
2726                 return (rv);
2727 
2728 retry:
2729         pair = NULL;
2730         while ((pair = nvlist_next_nvpair(nvl, pair)) != NULL) {
2731                 const char *propname = nvpair_name(pair);
2732                 zfs_prop_t prop = zfs_name_to_prop(propname);
2733                 int err = 0;
2734 
2735                 if (!set_worm && (strcmp(propname, "nms:worm") == 0)) {
2736                         set_worm = B_TRUE;
2737                 }
2738 
2739                 /*
2740                  * If 'wbc_mode' is going to be changed, then we need to
2741                  * do some actions before 'set'
2742                  */
2743                 if (prop == ZFS_PROP_WBC_MODE)
2744                         set_wbc_mode = B_TRUE;
2745 
2746                 /*
2747                  *
2748                  */
2749                 if (prop == ZFS_PROP_DEDUP)
2750                         set_dedup = B_TRUE;
2751 
2752                 /* decode the property value */
2753                 propval = pair;
2754                 if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
2755                         nvlist_t *attrs;
2756                         attrs = fnvpair_value_nvlist(pair);
2757                         if (nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
2758                             &propval) != 0)
2759                                 err = SET_ERROR(EINVAL);
2760                 }
2761 
2762                 /* Validate value type */
2763                 if (err == 0 && prop == ZPROP_INVAL) {
2764                         if (zfs_prop_user(propname)) {
2765                                 if (nvpair_type(propval) != DATA_TYPE_STRING)
2766                                         err = SET_ERROR(EINVAL);
2767                         } else if (zfs_prop_userquota(propname)) {
2768                                 if (nvpair_type(propval) !=
2769                                     DATA_TYPE_UINT64_ARRAY)
2770                                         err = SET_ERROR(EINVAL);
2771                         } else {
2772                                 err = SET_ERROR(EINVAL);
2773                         }
2774                 } else if (err == 0) {
2775                         if (nvpair_type(propval) == DATA_TYPE_STRING) {
2776                                 if (zfs_prop_get_type(prop) != PROP_TYPE_STRING)
2777                                         err = SET_ERROR(EINVAL);
2778                         } else if (nvpair_type(propval) == DATA_TYPE_UINT64) {
2779                                 const char *unused;
2780 
2781                                 intval = fnvpair_value_uint64(propval);
2782 
2783                                 switch (zfs_prop_get_type(prop)) {
2784                                 case PROP_TYPE_NUMBER:
2785                                         break;
2786                                 case PROP_TYPE_STRING:
2787                                         err = SET_ERROR(EINVAL);
2788                                         break;
2789                                 case PROP_TYPE_INDEX:
2790                                         if (zfs_prop_index_to_string(prop,
2791                                             intval, &unused) != 0)
2792                                                 err = SET_ERROR(EINVAL);
2793                                         break;
2794                                 default:
2795                                         cmn_err(CE_PANIC,
2796                                             "unknown property type");
2797                                 }
2798                         } else {
2799                                 err = SET_ERROR(EINVAL);
2800                         }
2801                 }
2802 
2803                 /* Validate permissions */
2804                 if (err == 0)
2805                         err = zfs_check_settable(dsname, pair, CRED());
2806 
2807                 if (err == 0) {
2808                         err = zfs_prop_set_special(dsname, source, pair);
2809                         if (err == -1) {
2810                                 /*
2811                                  * For better performance we build up a list of
2812                                  * properties to set in a single transaction.
2813                                  */
2814                                 err = nvlist_add_nvpair(genericnvl, pair);
2815                         } else if (err != 0 && nvl != retrynvl) {
2816                                 /*
2817                                  * This may be a spurious error caused by
2818                                  * receiving quota and reservation out of order.
2819                                  * Try again in a second pass.
2820                                  */
2821                                 err = nvlist_add_nvpair(retrynvl, pair);
2822                         }
2823                 }
2824 
2825                 if (err != 0) {
2826                         if (errlist != NULL)
2827                                 fnvlist_add_int32(errlist, propname, err);
2828                         rv = err;
2829                 }
2830         }
2831 
2832         if (nvl != retrynvl && !nvlist_empty(retrynvl)) {
2833                 nvl = retrynvl;
2834                 goto retry;
2835         }
2836 
2837         /*
2838          * Deduplication and WBC cannot be used together
2839          * This code returns error also for case when
2840          * WBC is ON, DEDUP is off and a user tries
2841          * to do DEDUP=off, because in this case the code
2842          * will be more complex, but benefit is too small
2843          */
2844         if (set_wbc_mode && set_dedup) {
2845                 nvlist_free(genericnvl);
2846                 nvlist_free(retrynvl);
2847                 spa_close(spa, FTAG);
2848 
2849                 return (SET_ERROR(EKZFS_WBCCONFLICT));
2850         }
2851 
2852         /*
2853          * Additional actions before set wbc_mode:
2854          * - first need to try to lock WBC-walking, to stop migration and
2855          *   avoid the openning of new migration window
2856          * - second step (from sync-context): if migration window
2857          *   is active it will be purged, to correctly add/remove WBC-instance
2858          */
2859         if (set_wbc_mode && wbc_walk_lock(spa) == 0)
2860                 wbc_walk_locked = B_TRUE;
2861 
2862         if (!nvlist_empty(genericnvl) &&
2863             dsl_props_set(dsname, source, genericnvl) != 0) {
2864                 /*
2865                  * If this fails, we still want to set as many properties as we
2866                  * can, so try setting them individually.
2867                  */
2868                 pair = NULL;
2869                 while ((pair = nvlist_next_nvpair(genericnvl, pair)) != NULL) {
2870                         const char *propname = nvpair_name(pair);
2871                         int err = 0;
2872 
2873                         propval = pair;
2874                         if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
2875                                 nvlist_t *attrs;
2876                                 attrs = fnvpair_value_nvlist(pair);
2877                                 propval = fnvlist_lookup_nvpair(attrs,
2878                                     ZPROP_VALUE);
2879                         }
2880 
2881                         if (nvpair_type(propval) == DATA_TYPE_STRING) {
2882                                 strval = fnvpair_value_string(propval);
2883                                 err = dsl_prop_set_string(dsname, propname,
2884                                     source, strval);
2885                         } else {
2886                                 intval = fnvpair_value_uint64(propval);
2887                                 err = dsl_prop_set_int(dsname, propname, source,
2888                                     intval);
2889                         }
2890 
2891                         if (err != 0) {
2892                                 if (errlist != NULL) {
2893                                         fnvlist_add_int32(errlist, propname,
2894                                             err);
2895                                 }
2896                                 rv = err;
2897                         }
2898                 }
2899         }
2900         nvlist_free(genericnvl);
2901         nvlist_free(retrynvl);
2902 
2903         if (wbc_walk_locked)
2904                 wbc_walk_unlock(spa);
2905 
2906         if (set_worm && getzfsvfs(dsname, &zfsvfs) == 0) {
2907                 if (zfs_is_wormed(dsname)) {
2908                         zfsvfs->z_isworm = B_TRUE;
2909                 } else {
2910                         zfsvfs->z_isworm = B_FALSE;
2911                 }
2912                 VFS_RELE(zfsvfs->z_vfs);
2913         }
2914 
2915         if (rv == 0)
2916                 autosnap_force_snap_by_name(dsname, NULL, B_FALSE);
2917 
2918         spa_close(spa, FTAG);
2919 
2920         return (rv);
2921 }
2922 
2923 /*
2924  * Check that all the properties are valid user properties.
2925  */
2926 static int
2927 zfs_check_userprops(const char *fsname, nvlist_t *nvl)
2928 {
2929         nvpair_t *pair = NULL;
2930         int error = 0;
2931 
2932         while ((pair = nvlist_next_nvpair(nvl, pair)) != NULL) {
2933                 const char *propname = nvpair_name(pair);
2934 
2935                 if (!zfs_prop_user(propname) ||
2936                     nvpair_type(pair) != DATA_TYPE_STRING)
2937                         return (SET_ERROR(EINVAL));
2938 
2939                 if (error = zfs_secpolicy_write_perms(fsname,
2940                     ZFS_DELEG_PERM_USERPROP, CRED()))
2941                         return (error);
2942 
2943                 if (strlen(propname) >= ZAP_MAXNAMELEN)
2944                         return (SET_ERROR(ENAMETOOLONG));
2945 
2946                 if (strlen(fnvpair_value_string(pair)) >= ZAP_MAXVALUELEN)
2947                         return (SET_ERROR(E2BIG));
2948         }
2949         return (0);
2950 }
2951 
2952 static void
2953 props_skip(nvlist_t *props, nvlist_t *skipped, nvlist_t **newprops)
2954 {
2955         nvpair_t *pair;
2956 
2957         VERIFY(nvlist_alloc(newprops, NV_UNIQUE_NAME, KM_SLEEP) == 0);
2958 
2959         pair = NULL;
2960         while ((pair = nvlist_next_nvpair(props, pair)) != NULL) {
2961                 if (nvlist_exists(skipped, nvpair_name(pair)))
2962                         continue;
2963 
2964                 VERIFY(nvlist_add_nvpair(*newprops, pair) == 0);
2965         }
2966 }
2967 
2968 static int
2969 clear_received_props(const char *dsname, nvlist_t *props,
2970     nvlist_t *skipped)
2971 {
2972         int err = 0;
2973         nvlist_t *cleared_props = NULL;
2974         props_skip(props, skipped, &cleared_props);
2975         if (!nvlist_empty(cleared_props)) {
2976                 /*
2977                  * Acts on local properties until the dataset has received
2978                  * properties at least once on or after SPA_VERSION_RECVD_PROPS.
2979                  */
2980                 zprop_source_t flags = (ZPROP_SRC_NONE |
2981                     (dsl_prop_get_hasrecvd(dsname) ? ZPROP_SRC_RECEIVED : 0));
2982                 err = zfs_set_prop_nvlist(dsname, flags, cleared_props, NULL);
2983         }
2984         nvlist_free(cleared_props);
2985         return (err);
2986 }
2987 
2988 int
2989 zfs_ioc_set_prop_impl(char *name, nvlist_t *props,
2990     boolean_t received, nvlist_t **out_errors)
2991 {
2992         int error = 0;
2993         nvlist_t *errors, *event;
2994         zprop_source_t source = (received ? ZPROP_SRC_RECEIVED :
2995             ZPROP_SRC_LOCAL);
2996 
2997         ASSERT(props != NULL);
2998 
2999         if (received) {
3000                 nvlist_t *origprops;
3001 
3002                 if (dsl_prop_get_received(name, &origprops) == 0) {
3003                         (void) clear_received_props(name, origprops, props);
3004                         nvlist_free(origprops);
3005                 }
3006 
3007                 error = dsl_prop_set_hasrecvd(name);
3008         }
3009 
3010         errors = fnvlist_alloc();
3011         if (error == 0)
3012                 error = zfs_set_prop_nvlist(name, source, props, errors);
3013 
3014         event = fnvlist_alloc();
3015         fnvlist_add_string(event, "fsname", name);
3016         fnvlist_add_nvlist(event, "properties", props);
3017         fnvlist_add_nvlist(event, "errors", errors);
3018         zfs_event_post(ZFS_EC_STATUS, "set", event);
3019 
3020         if (out_errors != NULL)
3021                 *out_errors = fnvlist_dup(errors);
3022 
3023         fnvlist_free(errors);
3024 
3025         return (error);
3026 }
3027 
3028 /*
3029  * XXX This functionality will be removed after integration of
3030  * functionality, that does the same via zfs-channel programm.
3031  * The zfs-channel programm implementation is being developed
3032  * by Delphix.
3033  *
3034  * This functions sets provided props for provided datasets
3035  * in one sync-round. There are some requirements:
3036  *  - all datasets should belong to the same pool
3037  *  - only user-properties
3038  *
3039  * This function does all or nothing.
3040  *
3041  * inputs:
3042  * zc_nvlist_src{_size} nvlist of datasets and properties to apply
3043  *
3044  * outputs:
3045  * zc_nvlist_dst{_size} error for each unapplied property
3046  */
3047 /* ARGSUSED */
3048 static int
3049 zfs_ioc_set_prop_mds(const char *pool_name, nvlist_t *dss_props,
3050     nvlist_t *outnvl)
3051 {
3052         int error = 0;
3053         spa_t *spa = NULL;
3054         nvpair_t *pair = NULL;
3055         size_t pool_name_len;
3056         size_t total_num_props = 0;
3057 
3058         ASSERT(dss_props != NULL);
3059 
3060         if (nvlist_empty(dss_props))
3061                 return (SET_ERROR(ENODATA));
3062 
3063         pool_name_len = strlen(pool_name);
3064         while ((pair = nvlist_next_nvpair(dss_props, pair)) != NULL) {
3065                 nvlist_t *props;
3066                 nvpair_t *prop_nvp = NULL;
3067                 const char *ds_name;
3068 
3069                 ds_name = nvpair_name(pair);
3070                 if (strncmp(pool_name, ds_name, pool_name_len) == 0) {
3071                         char c = ds_name[pool_name_len];
3072                         if (c != '\0' && c != '/' && c != '@')
3073                                 return (SET_ERROR(EXDEV));
3074                 }
3075 
3076                 if (nvpair_type(pair) != DATA_TYPE_NVLIST)
3077                         return (SET_ERROR(EINVAL));
3078 
3079                 props = fnvpair_value_nvlist(pair);
3080                 while ((prop_nvp = nvlist_next_nvpair(props,
3081                     prop_nvp)) != NULL) {
3082                         const char *propname = nvpair_name(prop_nvp);
3083                         /* Only user-props */
3084                         if (!zfs_prop_user(propname) ||
3085                             nvpair_type(prop_nvp) != DATA_TYPE_STRING)
3086                                 return (SET_ERROR(EINVAL));
3087 
3088                         /*
3089                          * We count the number to use it
3090                          * later to check for ENOSPC
3091                          */
3092                         total_num_props++;
3093                 }
3094         }
3095 
3096         if ((error = spa_open(pool_name, &spa, FTAG)) != 0)
3097                 return (error);
3098 
3099         error = dsl_props_set_mds(pool_name, dss_props, total_num_props);
3100         spa_close(spa, FTAG);
3101         if (error == 0) {
3102                 nvlist_t *event = fnvlist_alloc();
3103                 fnvlist_add_nvlist(event, "properties", dss_props);
3104                 zfs_event_post(ZFS_EC_STATUS, "set-mds", event);
3105         }
3106 
3107         return (error);
3108 }
3109 
3110 /*
3111  * inputs:
3112  * zc_name              name of filesystem
3113  * zc_value             name of property to set
3114  * zc_nvlist_src{_size} nvlist of properties to apply
3115  * zc_cookie            received properties flag
3116  *
3117  * outputs:
3118  * zc_nvlist_dst{_size} error for each unapplied received property
3119  */
3120 static int
3121 zfs_ioc_set_prop(zfs_cmd_t *zc)
3122 {
3123         nvlist_t *nvl;
3124         boolean_t received = zc->zc_cookie;
3125         nvlist_t *errors = NULL;
3126         int error;
3127 
3128         if ((error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
3129             zc->zc_iflags, &nvl)) != 0)
3130                 return (error);
3131 
3132         error = zfs_ioc_set_prop_impl(zc->zc_name, nvl, received, &errors);
3133 
3134         if (zc->zc_nvlist_dst != NULL && errors != NULL) {
3135                 (void) put_nvlist(zc, errors);
3136         }
3137 
3138         nvlist_free(errors);
3139         nvlist_free(nvl);
3140         return (error);
3141 }
3142 
3143 /*
3144  * inputs:
3145  * zc_name              name of filesystem
3146  * zc_value             name of property to inherit
3147  * zc_cookie            revert to received value if TRUE
3148  *
3149  * outputs:             none
3150  */
3151 static int
3152 zfs_ioc_inherit_prop(zfs_cmd_t *zc)
3153 {
3154         const char *propname = zc->zc_value;
3155         zfs_prop_t prop = zfs_name_to_prop(propname);
3156         boolean_t received = zc->zc_cookie;
3157         zprop_source_t source = (received
3158             ? ZPROP_SRC_NONE            /* revert to received value, if any */
3159             : ZPROP_SRC_INHERITED);     /* explicitly inherit */
3160 
3161         if (received) {
3162                 nvlist_t *dummy;
3163                 nvpair_t *pair;
3164                 zprop_type_t type;
3165                 int err;
3166 
3167                 /*
3168                  * zfs_prop_set_special() expects properties in the form of an
3169                  * nvpair with type info.
3170                  */
3171                 if (prop == ZPROP_INVAL) {
3172                         if (!zfs_prop_user(propname))
3173                                 return (SET_ERROR(EINVAL));
3174 
3175                         type = PROP_TYPE_STRING;
3176                 } else if (prop == ZFS_PROP_VOLSIZE ||
3177                     prop == ZFS_PROP_VERSION) {
3178                         return (SET_ERROR(EINVAL));
3179                 } else {
3180                         type = zfs_prop_get_type(prop);
3181                 }
3182 
3183                 VERIFY(nvlist_alloc(&dummy, NV_UNIQUE_NAME, KM_SLEEP) == 0);
3184 
3185                 switch (type) {
3186                 case PROP_TYPE_STRING:
3187                         VERIFY(0 == nvlist_add_string(dummy, propname, ""));
3188                         break;
3189                 case PROP_TYPE_NUMBER:
3190                 case PROP_TYPE_INDEX:
3191                         VERIFY(0 == nvlist_add_uint64(dummy, propname, 0));
3192                         break;
3193                 default:
3194                         nvlist_free(dummy);
3195                         return (SET_ERROR(EINVAL));
3196                 }
3197 
3198                 pair = nvlist_next_nvpair(dummy, NULL);
3199                 err = zfs_prop_set_special(zc->zc_name, source, pair);
3200                 nvlist_free(dummy);
3201                 if (err != -1)
3202                         return (err); /* special property already handled */
3203         } else {
3204                 /*
3205                  * Only check this in the non-received case. We want to allow
3206                  * 'inherit -S' to revert non-inheritable properties like quota
3207                  * and reservation to the received or default values even though
3208                  * they are not considered inheritable.
3209                  */
3210                 if (prop != ZPROP_INVAL && !zfs_prop_inheritable(prop))
3211                         return (SET_ERROR(EINVAL));
3212         }
3213 
3214         /* property name has been validated by zfs_secpolicy_inherit_prop() */
3215         return (dsl_prop_inherit(zc->zc_name, zc->zc_value, source));
3216 }
3217 
3218 static int
3219 zfs_ioc_pool_set_props(zfs_cmd_t *zc)
3220 {
3221         nvlist_t *props;
3222         spa_t *spa;
3223         int error;
3224         nvpair_t *pair;
3225         nvlist_t *event;
3226         if (error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
3227             zc->zc_iflags, &props))
3228                 return (error);
3229 
3230         /*
3231          * If the only property is the configfile, then just do a spa_lookup()
3232          * to handle the faulted case.
3233          */
3234         pair = nvlist_next_nvpair(props, NULL);
3235         if (pair != NULL && strcmp(nvpair_name(pair),
3236             zpool_prop_to_name(ZPOOL_PROP_CACHEFILE)) == 0 &&
3237             nvlist_next_nvpair(props, pair) == NULL) {
3238                 mutex_enter(&spa_namespace_lock);
3239                 if ((spa = spa_lookup(zc->zc_name)) != NULL) {
3240                         spa_configfile_set(spa, props, B_FALSE);
3241                         spa_config_sync(spa, B_FALSE, B_TRUE);
3242                 }
3243                 mutex_exit(&spa_namespace_lock);
3244                 if (spa != NULL) {
3245                         nvlist_free(props);
3246                         return (0);
3247                 }
3248         }
3249 
3250         if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0) {
3251                 nvlist_free(props);
3252                 return (error);
3253         }
3254 
3255         error = spa_prop_set(spa, props);
3256 
3257         if (error == 0) {
3258                 event = fnvlist_alloc();
3259                 fnvlist_add_string(event, "pool", zc->zc_name);
3260                 fnvlist_add_nvlist(event, "props", props);
3261                 zfs_event_post(ZPOOL_EC_STATUS, "set", event);
3262         }
3263 
3264         nvlist_free(props);
3265         spa_close(spa, FTAG);
3266 
3267         return (error);
3268 }
3269 
3270 static int
3271 zfs_ioc_pool_get_props(zfs_cmd_t *zc)
3272 {
3273         spa_t *spa;
3274         int error;
3275         nvlist_t *nvp = NULL;
3276 
3277         if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0) {
3278                 /*
3279                  * If the pool is faulted, there may be properties we can still
3280                  * get (such as altroot and cachefile), so attempt to get them
3281                  * anyway.
3282                  */
3283                 mutex_enter(&spa_namespace_lock);
3284                 if ((spa = spa_lookup(zc->zc_name)) != NULL)
3285                         error = spa_prop_get(spa, &nvp);
3286                 mutex_exit(&spa_namespace_lock);
3287         } else {
3288                 error = spa_prop_get(spa, &nvp);
3289                 spa_close(spa, FTAG);
3290         }
3291 
3292         if (error == 0 && zc->zc_nvlist_dst != NULL)
3293                 error = put_nvlist(zc, nvp);
3294         else
3295                 error = SET_ERROR(EFAULT);
3296 
3297         nvlist_free(nvp);
3298         return (error);
3299 }
3300 
3301 /*
3302  * inputs:
3303  * zc_name              name of filesystem
3304  * zc_nvlist_src{_size} nvlist of delegated permissions
3305  * zc_perm_action       allow/unallow flag
3306  *
3307  * outputs:             none
3308  */
3309 static int
3310 zfs_ioc_set_fsacl(zfs_cmd_t *zc)
3311 {
3312         int error;
3313         nvlist_t *fsaclnv = NULL;
3314 
3315         if ((error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
3316             zc->zc_iflags, &fsaclnv)) != 0)
3317                 return (error);
3318 
3319         /*
3320          * Verify nvlist is constructed correctly
3321          */
3322         if ((error = zfs_deleg_verify_nvlist(fsaclnv)) != 0) {
3323                 nvlist_free(fsaclnv);
3324                 return (SET_ERROR(EINVAL));
3325         }
3326 
3327         /*
3328          * If we don't have PRIV_SYS_MOUNT, then validate
3329          * that user is allowed to hand out each permission in
3330          * the nvlist(s)
3331          */
3332 
3333         error = secpolicy_zfs(CRED());
3334         if (error != 0) {
3335                 if (zc->zc_perm_action == B_FALSE) {
3336                         error = dsl_deleg_can_allow(zc->zc_name,
3337                             fsaclnv, CRED());
3338                 } else {
3339                         error = dsl_deleg_can_unallow(zc->zc_name,
3340                             fsaclnv, CRED());
3341                 }
3342         }
3343 
3344         if (error == 0)
3345                 error = dsl_deleg_set(zc->zc_name, fsaclnv, zc->zc_perm_action);
3346 
3347         nvlist_free(fsaclnv);
3348         return (error);
3349 }
3350 
3351 /*
3352  * inputs:
3353  * zc_name              name of filesystem
3354  *
3355  * outputs:
3356  * zc_nvlist_src{_size} nvlist of delegated permissions
3357  */
3358 static int
3359 zfs_ioc_get_fsacl(zfs_cmd_t *zc)
3360 {
3361         nvlist_t *nvp;
3362         int error;
3363 
3364         if ((error = dsl_deleg_get(zc->zc_name, &nvp)) == 0) {
3365                 error = put_nvlist(zc, nvp);
3366                 nvlist_free(nvp);
3367         }
3368 
3369         return (error);
3370 }
3371 
3372 /* ARGSUSED */
3373 static void
3374 zfs_create_cb(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx)
3375 {
3376         zfs_creat_t *zct = arg;
3377 
3378         zfs_create_fs(os, cr, zct->zct_zplprops, tx);
3379 }
3380 
3381 #define ZFS_PROP_UNDEFINED      ((uint64_t)-1)
3382 
3383 /*
3384  * inputs:
3385  * createprops          list of properties requested by creator
3386  * default_zplver       zpl version to use if unspecified in createprops
3387  * fuids_ok             fuids allowed in this version of the spa?
3388  * os                   parent objset pointer (NULL if root fs)
3389  * fuids_ok             fuids allowed in this version of the spa?
3390  * sa_ok                SAs allowed in this version of the spa?
3391  * createprops          list of properties requested by creator
3392  *
3393  * outputs:
3394  * zplprops     values for the zplprops we attach to the master node object
3395  * is_ci        true if requested file system will be purely case-insensitive
3396  *
3397  * Determine the settings for utf8only, normalization and
3398  * casesensitivity.  Specific values may have been requested by the
3399  * creator and/or we can inherit values from the parent dataset.  If
3400  * the file system is of too early a vintage, a creator can not
3401  * request settings for these properties, even if the requested
3402  * setting is the default value.  We don't actually want to create dsl
3403  * properties for these, so remove them from the source nvlist after
3404  * processing.
3405  */
3406 static int
3407 zfs_fill_zplprops_impl(objset_t *os, uint64_t zplver,
3408     boolean_t fuids_ok, boolean_t sa_ok, nvlist_t *createprops,
3409     nvlist_t *zplprops, boolean_t *is_ci)
3410 {
3411         uint64_t sense = ZFS_PROP_UNDEFINED;
3412         uint64_t norm = ZFS_PROP_UNDEFINED;
3413         uint64_t u8 = ZFS_PROP_UNDEFINED;
3414 
3415         ASSERT(zplprops != NULL);
3416 
3417         if (os != NULL && os->os_phys->os_type != DMU_OST_ZFS)
3418                 return (SET_ERROR(EINVAL));
3419 
3420         /*
3421          * Pull out creator prop choices, if any.
3422          */
3423         if (createprops) {
3424                 (void) nvlist_lookup_uint64(createprops,
3425                     zfs_prop_to_name(ZFS_PROP_VERSION), &zplver);
3426                 (void) nvlist_lookup_uint64(createprops,
3427                     zfs_prop_to_name(ZFS_PROP_NORMALIZE), &norm);
3428                 (void) nvlist_remove_all(createprops,
3429                     zfs_prop_to_name(ZFS_PROP_NORMALIZE));
3430                 (void) nvlist_lookup_uint64(createprops,
3431                     zfs_prop_to_name(ZFS_PROP_UTF8ONLY), &u8);
3432                 (void) nvlist_remove_all(createprops,
3433                     zfs_prop_to_name(ZFS_PROP_UTF8ONLY));
3434                 (void) nvlist_lookup_uint64(createprops,
3435                     zfs_prop_to_name(ZFS_PROP_CASE), &sense);
3436                 (void) nvlist_remove_all(createprops,
3437                     zfs_prop_to_name(ZFS_PROP_CASE));
3438         }
3439 
3440         /*
3441          * If the zpl version requested is whacky or the file system
3442          * or pool is version is too "young" to support normalization
3443          * and the creator tried to set a value for one of the props,
3444          * error out.
3445          */
3446         if ((zplver < ZPL_VERSION_INITIAL || zplver > ZPL_VERSION) ||
3447             (zplver >= ZPL_VERSION_FUID && !fuids_ok) ||
3448             (zplver >= ZPL_VERSION_SA && !sa_ok) ||
3449             (zplver < ZPL_VERSION_NORMALIZATION &&
3450             (norm != ZFS_PROP_UNDEFINED || u8 != ZFS_PROP_UNDEFINED ||
3451             sense != ZFS_PROP_UNDEFINED)))
3452                 return (SET_ERROR(ENOTSUP));
3453 
3454         /*
3455          * Put the version in the zplprops
3456          */
3457         VERIFY(nvlist_add_uint64(zplprops,
3458             zfs_prop_to_name(ZFS_PROP_VERSION), zplver) == 0);
3459 
3460         if (norm == ZFS_PROP_UNDEFINED)
3461                 VERIFY(zfs_get_zplprop(os, ZFS_PROP_NORMALIZE, &norm) == 0);
3462         VERIFY(nvlist_add_uint64(zplprops,
3463             zfs_prop_to_name(ZFS_PROP_NORMALIZE), norm) == 0);
3464 
3465         if (os) {
3466                 if (zfs_is_wormed_ds(dmu_objset_ds(os)))
3467                         return (SET_ERROR(EPERM));
3468         }
3469 
3470         /*
3471          * If we're normalizing, names must always be valid UTF-8 strings.
3472          */
3473         if (norm)
3474                 u8 = 1;
3475         if (u8 == ZFS_PROP_UNDEFINED)
3476                 VERIFY(zfs_get_zplprop(os, ZFS_PROP_UTF8ONLY, &u8) == 0);
3477         VERIFY(nvlist_add_uint64(zplprops,
3478             zfs_prop_to_name(ZFS_PROP_UTF8ONLY), u8) == 0);
3479 
3480         if (sense == ZFS_PROP_UNDEFINED)
3481                 VERIFY(zfs_get_zplprop(os, ZFS_PROP_CASE, &sense) == 0);
3482         VERIFY(nvlist_add_uint64(zplprops,
3483             zfs_prop_to_name(ZFS_PROP_CASE), sense) == 0);
3484 
3485         if (is_ci)
3486                 *is_ci = (sense == ZFS_CASE_INSENSITIVE);
3487 
3488         return (0);
3489 }
3490 
3491 static int
3492 zfs_fill_zplprops(const char *dataset, nvlist_t *createprops,
3493     nvlist_t *zplprops, boolean_t *is_ci)
3494 {
3495         boolean_t fuids_ok, sa_ok;
3496         uint64_t zplver = ZPL_VERSION;
3497         objset_t *os = NULL;
3498         char parentname[ZFS_MAX_DATASET_NAME_LEN];
3499         char *cp;
3500         spa_t *spa;
3501         uint64_t spa_vers;
3502         int error;
3503 
3504         (void) strlcpy(parentname, dataset, sizeof (parentname));
3505         cp = strrchr(parentname, '/');
3506         ASSERT(cp != NULL);
3507         cp[0] = '\0';
3508 
3509         if ((error = spa_open(dataset, &spa, FTAG)) != 0)
3510                 return (error);
3511 
3512         spa_vers = spa_version(spa);
3513         spa_close(spa, FTAG);
3514 
3515         zplver = zfs_zpl_version_map(spa_vers);
3516         fuids_ok = (zplver >= ZPL_VERSION_FUID);
3517         sa_ok = (zplver >= ZPL_VERSION_SA);
3518 
3519         /*
3520          * Open parent object set so we can inherit zplprop values.
3521          */
3522         if ((error = dmu_objset_hold(parentname, FTAG, &os)) != 0)
3523                 return (error);
3524 
3525         error = zfs_fill_zplprops_impl(os, zplver, fuids_ok, sa_ok, createprops,
3526             zplprops, is_ci);
3527         dmu_objset_rele(os, FTAG);
3528         return (error);
3529 }
3530 
3531 static int
3532 zfs_fill_zplprops_root(uint64_t spa_vers, nvlist_t *createprops,
3533     nvlist_t *zplprops, boolean_t *is_ci)
3534 {
3535         boolean_t fuids_ok;
3536         boolean_t sa_ok;
3537         uint64_t zplver = ZPL_VERSION;
3538         int error;
3539 
3540         zplver = zfs_zpl_version_map(spa_vers);
3541         fuids_ok = (zplver >= ZPL_VERSION_FUID);
3542         sa_ok = (zplver >= ZPL_VERSION_SA);
3543 
3544         error = zfs_fill_zplprops_impl(NULL, zplver, fuids_ok, sa_ok,
3545             createprops, zplprops, is_ci);
3546         return (error);
3547 }
3548 
3549 /*
3550  * innvl: {
3551  *     "type" -> dmu_objset_type_t (int32)
3552  *     (optional) "props" -> { prop -> value }
3553  * }
3554  *
3555  * outnvl: propname -> error code (int32)
3556  */
3557 static int
3558 zfs_ioc_create(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl)
3559 {
3560         int error = 0;
3561         zfs_creat_t zct = { 0 };
3562         nvlist_t *nvprops = NULL;
3563         void (*cbfunc)(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx);
3564         int32_t type32;
3565         dmu_objset_type_t type;
3566         boolean_t is_insensitive = B_FALSE;
3567         char parent[MAXNAMELEN];
3568         nvlist_t *event;
3569 
3570         if (nvlist_lookup_int32(innvl, "type", &type32) != 0)
3571                 return (SET_ERROR(EINVAL));
3572         type = type32;
3573         (void) nvlist_lookup_nvlist(innvl, "props", &nvprops);
3574 
3575         switch (type) {
3576         case DMU_OST_ZFS:
3577                 cbfunc = zfs_create_cb;
3578                 break;
3579 
3580         case DMU_OST_ZVOL:
3581                 cbfunc = zvol_create_cb;
3582                 break;
3583 
3584         default:
3585                 cbfunc = NULL;
3586                 break;
3587         }
3588         if (strchr(fsname, '@') ||
3589             strchr(fsname, '%'))
3590                 return (SET_ERROR(EINVAL));
3591 
3592         zct.zct_props = nvprops;
3593 
3594         if (cbfunc == NULL)
3595                 return (SET_ERROR(EINVAL));
3596 
3597         if (zfs_get_parent(fsname, parent, MAXNAMELEN) == 0 &&
3598             zfs_is_wormed(parent)) {
3599                 return (SET_ERROR(EPERM));
3600         }
3601 
3602         if (type == DMU_OST_ZVOL) {
3603                 uint64_t volsize, volblocksize;
3604 
3605                 if (nvprops == NULL)
3606                         return (SET_ERROR(EINVAL));
3607                 if (nvlist_lookup_uint64(nvprops,
3608                     zfs_prop_to_name(ZFS_PROP_VOLSIZE), &volsize) != 0)
3609                         return (SET_ERROR(EINVAL));
3610 
3611                 if ((error = nvlist_lookup_uint64(nvprops,
3612                     zfs_prop_to_name(ZFS_PROP_VOLBLOCKSIZE),
3613                     &volblocksize)) != 0 && error != ENOENT)
3614                         return (SET_ERROR(EINVAL));
3615 
3616                 if (error != 0)
3617                         volblocksize = zfs_prop_default_numeric(
3618                             ZFS_PROP_VOLBLOCKSIZE);
3619 
3620                 if ((error = zvol_check_volblocksize(
3621                     volblocksize)) != 0 ||
3622                     (error = zvol_check_volsize(volsize,
3623                     volblocksize)) != 0)
3624                         return (error);
3625         } else if (type == DMU_OST_ZFS) {
3626                 /*
3627                  * We have to have normalization and
3628                  * case-folding flags correct when we do the
3629                  * file system creation, so go figure them out
3630                  * now.
3631                  */
3632                 VERIFY(nvlist_alloc(&zct.zct_zplprops,
3633                     NV_UNIQUE_NAME, KM_SLEEP) == 0);
3634                 error = zfs_fill_zplprops(fsname, nvprops,
3635                     zct.zct_zplprops, &is_insensitive);
3636                 if (error != 0) {
3637                         nvlist_free(zct.zct_zplprops);
3638                         return (error);
3639                 }
3640         }
3641 
3642         error = dmu_objset_create(fsname, type,
3643             is_insensitive ? DS_FLAG_CI_DATASET : 0, cbfunc, &zct);
3644         nvlist_free(zct.zct_zplprops);
3645 
3646         /*
3647          * It would be nice to do this atomically.
3648          */
3649         if (error == 0) {
3650                 error = zfs_set_prop_nvlist(fsname, ZPROP_SRC_LOCAL,
3651                     nvprops, outnvl);
3652                 if (error != 0)
3653                         (void) dsl_destroy_head(fsname);
3654         }
3655 
3656         if (error == 0) {
3657                 event = fnvlist_alloc();
3658                 fnvlist_add_string(event, "fsname", fsname);
3659                 fnvlist_add_int32(event, "type", type);
3660                 if (nvprops != NULL)
3661                         fnvlist_add_nvlist(event, "properties", nvprops);
3662                 zfs_event_post(ZFS_EC_STATUS, "create", event);
3663         }
3664 
3665         return (error);
3666 }
3667 
3668 /*
3669  * innvl: {
3670  *     "origin" -> name of origin snapshot
3671  *     (optional) "props" -> { prop -> value }
3672  * }
3673  *
3674  * outnvl: propname -> error code (int32)
3675  */
3676 static int
3677 zfs_ioc_clone(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl)
3678 {
3679         int error = 0;
3680         nvlist_t *nvprops = NULL;
3681         char *origin_name, *origin_snap;
3682         nvlist_t *event;
3683 
3684         if (nvlist_lookup_string(innvl, "origin", &origin_name) != 0)
3685                 return (SET_ERROR(EINVAL));
3686 
3687         origin_snap = strchr(origin_name, '@');
3688         if (!origin_snap)
3689                 return (SET_ERROR(EINVAL));
3690 
3691         if (autosnap_check_name(origin_snap))
3692                 return (SET_ERROR(EPERM));
3693 
3694         (void) nvlist_lookup_nvlist(innvl, "props", &nvprops);
3695 
3696         if (strchr(fsname, '@') ||
3697             strchr(fsname, '%'))
3698                 return (SET_ERROR(EINVAL));
3699 
3700         if (dataset_namecheck(origin_name, NULL, NULL) != 0)
3701                 return (SET_ERROR(EINVAL));
3702 
3703         error = dmu_objset_clone(fsname, origin_name);
3704         if (error != 0)
3705                 return (error);
3706 
3707         /*
3708          * It would be nice to do this atomically.
3709          */
3710         if (error == 0) {
3711                 error = zfs_set_prop_nvlist(fsname, ZPROP_SRC_LOCAL,
3712                     nvprops, outnvl);
3713                 if (error != 0)
3714                         (void) dsl_destroy_head(fsname);
3715         }
3716 
3717         if (error == 0) {
3718                 event = fnvlist_alloc();
3719                 fnvlist_add_string(event, "origin", origin_name);
3720                 fnvlist_add_string(event, "fsname", fsname);
3721                 if (nvprops != NULL)
3722                         fnvlist_add_nvlist(event, "properties", nvprops);
3723                 zfs_event_post(ZFS_EC_STATUS, "clone", event);
3724         }
3725 
3726         return (error);
3727 }
3728 
3729 /*
3730  * innvl: {
3731  *     "snaps" -> { snapshot1, snapshot2 }
3732  *     (optional) "props" -> { prop -> value (string) }
3733  * }
3734  *
3735  * outnvl: snapshot -> error code (int32)
3736  */
3737 static int
3738 zfs_ioc_snapshot(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
3739 {
3740         nvlist_t *snaps;
3741         nvlist_t *props = NULL;
3742         int error, poollen;
3743         nvpair_t *pair;
3744         nvlist_t *event;
3745 
3746         (void) nvlist_lookup_nvlist(innvl, "props", &props);
3747         if ((error = zfs_check_userprops(poolname, props)) != 0)
3748                 return (error);
3749 
3750         if (!nvlist_empty(props) &&
3751             zfs_earlier_version(poolname, SPA_VERSION_SNAP_PROPS))
3752                 return (SET_ERROR(ENOTSUP));
3753 
3754         if (nvlist_lookup_nvlist(innvl, "snaps", &snaps) != 0)
3755                 return (SET_ERROR(EINVAL));
3756         poollen = strlen(poolname);
3757         for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL;
3758             pair = nvlist_next_nvpair(snaps, pair)) {
3759                 const char *name = nvpair_name(pair);
3760                 const char *cp = strchr(name, '@');
3761 
3762                 /*
3763                  * The snap name must contain an @, and the part after it must
3764                  * contain only valid characters.
3765                  */
3766                 if (cp == NULL ||
3767                     zfs_component_namecheck(cp + 1, NULL, NULL) != 0)
3768                         return (SET_ERROR(EINVAL));
3769 
3770                 if (autosnap_check_name(cp))
3771                         return (EINVAL);
3772 
3773                 /*
3774                  * The snap must be in the specified pool.
3775                  */
3776                 if (strncmp(name, poolname, poollen) != 0 ||
3777                     (name[poollen] != '/' && name[poollen] != '@'))
3778                         return (SET_ERROR(EXDEV));
3779 
3780                 /* This must be the only snap of this fs. */
3781                 for (nvpair_t *pair2 = nvlist_next_nvpair(snaps, pair);
3782                     pair2 != NULL; pair2 = nvlist_next_nvpair(snaps, pair2)) {
3783                         if (strncmp(name, nvpair_name(pair2), cp - name + 1)
3784                             == 0) {
3785                                 return (SET_ERROR(EXDEV));
3786                         }
3787                 }
3788         }
3789 
3790         error = dsl_dataset_snapshot(snaps, props, outnvl);
3791 
3792         event = fnvlist_alloc();
3793         fnvlist_add_nvlist(event, "snaps", snaps);
3794         fnvlist_add_nvlist(event, "errors", outnvl);
3795         fnvlist_add_string(event, "pool", poolname);
3796         zfs_event_post(ZFS_EC_STATUS, "snapshot", event);
3797 
3798         return (error);
3799 }
3800 
3801 /*
3802  * innvl: "message" -> string
3803  */
3804 /* ARGSUSED */
3805 static int
3806 zfs_ioc_log_history(const char *unused, nvlist_t *innvl, nvlist_t *outnvl)
3807 {
3808         char *message;
3809         spa_t *spa;
3810         int error;
3811         char *poolname;
3812 
3813         /*
3814          * The poolname in the ioctl is not set, we get it from the TSD,
3815          * which was set at the end of the last successful ioctl that allows
3816          * logging.  The secpolicy func already checked that it is set.
3817          * Only one log ioctl is allowed after each successful ioctl, so
3818          * we clear the TSD here.
3819          */
3820         poolname = tsd_get(zfs_allow_log_key);
3821         (void) tsd_set(zfs_allow_log_key, NULL);
3822         error = spa_open(poolname, &spa, FTAG);
3823         strfree(poolname);
3824         if (error != 0)
3825                 return (error);
3826 
3827         if (nvlist_lookup_string(innvl, "message", &message) != 0)  {
3828                 spa_close(spa, FTAG);
3829                 return (SET_ERROR(EINVAL));
3830         }
3831 
3832         if (spa_version(spa) < SPA_VERSION_ZPOOL_HISTORY) {
3833                 spa_close(spa, FTAG);
3834                 return (SET_ERROR(ENOTSUP));
3835         }
3836 
3837         error = spa_history_log(spa, message);
3838         spa_close(spa, FTAG);
3839         return (error);
3840 }
3841 
3842 /*
3843  * The dp_config_rwlock must not be held when calling this, because the
3844  * unmount may need to write out data.
3845  *
3846  * This function is best-effort.  Callers must deal gracefully if it
3847  * remains mounted (or is remounted after this call).
3848  *
3849  * Returns 0 if the argument is not a snapshot, or it is not currently a
3850  * filesystem, or we were able to unmount it.  Returns error code otherwise.
3851  */
3852 void
3853 zfs_unmount_snap(const char *snapname)
3854 {
3855         vfs_t *vfsp = NULL;
3856         zfsvfs_t *zfsvfs = NULL;
3857 
3858         if (strchr(snapname, '@') == NULL)
3859                 return;
3860 
3861         int err = getzfsvfs(snapname, &zfsvfs);
3862         if (err != 0) {
3863                 ASSERT3P(zfsvfs, ==, NULL);
3864                 return;
3865         }
3866         vfsp = zfsvfs->z_vfs;
3867 
3868         ASSERT(!dsl_pool_config_held(dmu_objset_pool(zfsvfs->z_os)));
3869 
3870         err = vn_vfswlock(vfsp->vfs_vnodecovered);
3871         VFS_RELE(vfsp);
3872         if (err != 0)
3873                 return;
3874 
3875         /*
3876          * Always force the unmount for snapshots.
3877          */
3878         (void) dounmount(vfsp, MS_FORCE, kcred);
3879 }
3880 
3881 /* ARGSUSED */
3882 static int
3883 zfs_unmount_snap_cb(const char *snapname, void *arg)
3884 {
3885         zfs_unmount_snap(snapname);
3886         return (0);
3887 }
3888 
3889 /*
3890  * When a clone is destroyed, its origin may also need to be destroyed,
3891  * in which case it must be unmounted.  This routine will do that unmount
3892  * if necessary.
3893  */
3894 void
3895 zfs_destroy_unmount_origin(const char *fsname)
3896 {
3897         int error;
3898         objset_t *os;
3899         dsl_dataset_t *ds;
3900 
3901         error = dmu_objset_hold(fsname, FTAG, &os);
3902         if (error != 0)
3903                 return;
3904         ds = dmu_objset_ds(os);
3905         if (dsl_dir_is_clone(ds->ds_dir) && DS_IS_DEFER_DESTROY(ds->ds_prev)) {
3906                 char originname[ZFS_MAX_DATASET_NAME_LEN];
3907                 dsl_dataset_name(ds->ds_prev, originname);
3908                 dmu_objset_rele(os, FTAG);
3909                 zfs_unmount_snap(originname);
3910         } else {
3911                 dmu_objset_rele(os, FTAG);
3912         }
3913 }
3914 
3915 static int
3916 zfs_destroy_check_autosnap(spa_t *spa, const char *name)
3917 {
3918         const char *snap = strchr(name, '@');
3919 
3920         if (snap == NULL)
3921                 return (EINVAL);
3922 
3923         if (autosnap_check_name(snap)) {
3924                 int err = autosnap_check_for_destroy(
3925                     spa_get_autosnap(spa), name);
3926 
3927                 if (err != 0)
3928                         return (EBUSY);
3929         }
3930 
3931         return (0);
3932 }
3933 
3934 /*
3935  * innvl: {
3936  *     "snaps" -> { snapshot1, snapshot2 }
3937  *     (optional boolean) "defer"
3938  * }
3939  *
3940  * outnvl: snapshot -> error code (int32)
3941  *
3942  */
3943 /* ARGSUSED */
3944 static int
3945 zfs_ioc_destroy_snaps(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
3946 {
3947         nvlist_t *snaps;
3948         nvpair_t *pair;
3949         boolean_t defer;
3950         int error = 0;
3951         nvlist_t *event;
3952         spa_t *spa;
3953 
3954         if (zfs_is_wormed(poolname))
3955                 return (SET_ERROR(EPERM));
3956 
3957         if (nvlist_lookup_nvlist(innvl, "snaps", &snaps) != 0)
3958                 return (SET_ERROR(EINVAL));
3959         defer = nvlist_exists(innvl, "defer");
3960 
3961         error = spa_open(poolname, &spa, FTAG);
3962         if (spa == NULL)
3963                 return (error);
3964 
3965         for (pair = nvlist_next_nvpair(snaps, NULL);
3966             pair != NULL; pair = nvlist_next_nvpair(snaps, pair)) {
3967                 error = zfs_destroy_check_autosnap(spa, nvpair_name(pair));
3968                 if (error)
3969                         fnvlist_add_int32(outnvl, nvpair_name(pair), error);
3970         }
3971 
3972         spa_close(spa, FTAG);
3973 
3974         if (error)
3975                 return (error);
3976 
3977         for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL;
3978             pair = nvlist_next_nvpair(snaps, pair)) {
3979                 zfs_unmount_snap(nvpair_name(pair));
3980         }
3981 
3982         error = dsl_destroy_snapshots_nvl(snaps, defer, outnvl);
3983 
3984         if (error == 0) {
3985                 event = fnvlist_alloc();
3986                 fnvlist_add_nvlist(event, "snaps", snaps);
3987                 fnvlist_add_nvlist(event, "errors", outnvl);
3988                 zfs_event_post(ZFS_EC_STATUS, "destroy_snaps", event);
3989         }
3990 
3991         return (error);
3992 }
3993 
3994 /*
3995  * Create bookmarks.  Bookmark names are of the form <fs>#<bmark>.
3996  * All bookmarks must be in the same pool.
3997  *
3998  * innvl: {
3999  *     bookmark1 -> snapshot1, bookmark2 -> snapshot2
4000  * }
4001  *
4002  * outnvl: bookmark -> error code (int32)
4003  *
4004  */
4005 /* ARGSUSED */
4006 static int
4007 zfs_ioc_bookmark(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
4008 {
4009         for (nvpair_t *pair = nvlist_next_nvpair(innvl, NULL);
4010             pair != NULL; pair = nvlist_next_nvpair(innvl, pair)) {
4011                 char *snap_name;
4012 
4013                 /*
4014                  * Verify the snapshot argument.
4015                  */
4016                 if (nvpair_value_string(pair, &snap_name) != 0)
4017                         return (SET_ERROR(EINVAL));
4018 
4019 
4020                 /* Verify that the keys (bookmarks) are unique */
4021                 for (nvpair_t *pair2 = nvlist_next_nvpair(innvl, pair);
4022                     pair2 != NULL; pair2 = nvlist_next_nvpair(innvl, pair2)) {
4023                         if (strcmp(nvpair_name(pair), nvpair_name(pair2)) == 0)
4024                                 return (SET_ERROR(EINVAL));
4025                 }
4026         }
4027 
4028         return (dsl_bookmark_create(innvl, outnvl));
4029 }
4030 
4031 /*
4032  * innvl: {
4033  *     property 1, property 2, ...
4034  * }
4035  *
4036  * outnvl: {
4037  *     bookmark name 1 -> { property 1, property 2, ... },
4038  *     bookmark name 2 -> { property 1, property 2, ... }
4039  * }
4040  *
4041  */
4042 static int
4043 zfs_ioc_get_bookmarks(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl)
4044 {
4045         return (dsl_get_bookmarks(fsname, innvl, outnvl));
4046 }
4047 
4048 /*
4049  * innvl: {
4050  *     bookmark name 1, bookmark name 2
4051  * }
4052  *
4053  * outnvl: bookmark -> error code (int32)
4054  *
4055  */
4056 static int
4057 zfs_ioc_destroy_bookmarks(const char *poolname, nvlist_t *innvl,
4058     nvlist_t *outnvl)
4059 {
4060         int error, poollen;
4061 
4062         poollen = strlen(poolname);
4063         for (nvpair_t *pair = nvlist_next_nvpair(innvl, NULL);
4064             pair != NULL; pair = nvlist_next_nvpair(innvl, pair)) {
4065                 const char *name = nvpair_name(pair);
4066                 const char *cp = strchr(name, '#');
4067 
4068                 /*
4069                  * The bookmark name must contain an #, and the part after it
4070                  * must contain only valid characters.
4071                  */
4072                 if (cp == NULL ||
4073                     zfs_component_namecheck(cp + 1, NULL, NULL) != 0)
4074                         return (SET_ERROR(EINVAL));
4075 
4076                 /*
4077                  * The bookmark must be in the specified pool.
4078                  */
4079                 if (strncmp(name, poolname, poollen) != 0 ||
4080                     (name[poollen] != '/' && name[poollen] != '#'))
4081                         return (SET_ERROR(EXDEV));
4082         }
4083 
4084         error = dsl_bookmark_destroy(innvl, outnvl);
4085         return (error);
4086 }
4087 
4088 static int
4089 zfs_ioc_channel_program(const char *poolname, nvlist_t *innvl,
4090     nvlist_t *outnvl)
4091 {
4092         char *program;
4093         uint64_t instrlimit, memlimit;
4094         boolean_t sync_flag;
4095         nvpair_t *nvarg = NULL;
4096 
4097         if (0 != nvlist_lookup_string(innvl, ZCP_ARG_PROGRAM, &program)) {
4098                 return (EINVAL);
4099         }
4100         if (0 != nvlist_lookup_boolean_value(innvl, ZCP_ARG_SYNC, &sync_flag)) {
4101                 sync_flag = B_TRUE;
4102         }
4103         if (0 != nvlist_lookup_uint64(innvl, ZCP_ARG_INSTRLIMIT, &instrlimit)) {
4104                 instrlimit = ZCP_DEFAULT_INSTRLIMIT;
4105         }
4106         if (0 != nvlist_lookup_uint64(innvl, ZCP_ARG_MEMLIMIT, &memlimit)) {
4107                 memlimit = ZCP_DEFAULT_MEMLIMIT;
4108         }
4109         if (0 != nvlist_lookup_nvpair(innvl, ZCP_ARG_ARGLIST, &nvarg)) {
4110                 return (EINVAL);
4111         }
4112 
4113         if (instrlimit == 0 || instrlimit > zfs_lua_max_instrlimit)
4114                 return (EINVAL);
4115         if (memlimit == 0 || memlimit > zfs_lua_max_memlimit)
4116                 return (EINVAL);
4117 
4118         return (zcp_eval(poolname, program, sync_flag, instrlimit, memlimit,
4119             nvarg, outnvl));
4120 }
4121 
4122 /*
4123  * inputs:
4124  * zc_name              name of dataset to destroy
4125  * zc_objset_type       type of objset
4126  * zc_defer_destroy     mark for deferred destroy
4127  * zc_guid              if set, do atomical recursive destroy
4128  *
4129  * outputs:             none
4130  */
4131 static int
4132 zfs_ioc_destroy(zfs_cmd_t *zc)
4133 {
4134         int err;
4135         nvlist_t *event;
4136 
4137         if (zfs_is_wormed(zc->zc_name))
4138                 return (SET_ERROR(EPERM));
4139 
4140         if (zc->zc_objset_type == DMU_OST_ZFS)
4141                 zfs_unmount_snap(zc->zc_name);
4142 
4143         if (zc->zc_guid) {
4144                 spa_t *spa;
4145 
4146                 if ((err = spa_open(zc->zc_name, &spa, FTAG)) != 0)
4147                         return (err);
4148 
4149                 err = autosnap_lock(spa, RW_WRITER);
4150                 if (err == 0) {
4151                         err = wbc_walk_lock(spa);
4152                         if (err != 0)
4153                                 autosnap_unlock(spa);
4154                 }
4155 
4156                 if (err == 0) {
4157                         err = dsl_destroy_atomically(zc->zc_name,
4158                             zc->zc_defer_destroy);
4159                         wbc_walk_unlock(spa);
4160                         autosnap_unlock(spa);
4161                 }
4162 
4163                 spa_close(spa, FTAG);
4164         } else {
4165                 if (strchr(zc->zc_name, '@')) {
4166                         spa_t *spa = NULL;
4167 
4168                         err = spa_open(zc->zc_name, &spa, FTAG);
4169                         if (err != 0)
4170                                 return (err);
4171 
4172                         err = zfs_destroy_check_autosnap(spa, zc->zc_name);
4173                         if (err == 0) {
4174                                 err = dsl_destroy_snapshot(zc->zc_name,
4175                                     zc->zc_defer_destroy);
4176                         }
4177 
4178                         spa_close(spa, FTAG);
4179                 } else {
4180                         err = dsl_destroy_head(zc->zc_name);
4181                         if (err == EEXIST) {
4182                                 /*
4183                                  * It is possible that the given DS may have
4184                                  * hidden child (%recv) datasets - "leftovers"
4185                                  * resulting from the previously interrupted
4186                                  * 'zfs receive'.
4187                                  */
4188                                 char namebuf[ZFS_MAX_DATASET_NAME_LEN];
4189 
4190                                 if (snprintf(namebuf, sizeof (namebuf),
4191                                     "%s/%%recv", zc->zc_name) >=
4192                                     sizeof (namebuf))
4193                                         return (err);
4194 
4195                                 /* Try to remove the hidden child (%recv) */
4196                                 err = dsl_destroy_head(namebuf);
4197                                 if (err == 0) {
4198                                         /*
4199                                          * Now the given DS should not have
4200                                          * children, so we can try to remove
4201                                          * it again
4202                                          */
4203                                         err = dsl_destroy_head(zc->zc_name);
4204                                 } else if (err == ENOENT) {
4205                                         /*
4206                                          * The hidden child (%recv) does not
4207                                          * exist, so need to restore original
4208                                          * error
4209                                          */
4210                                         err = EEXIST;
4211                                 }
4212 
4213                         }
4214                 }
4215         }
4216         if (zc->zc_objset_type == DMU_OST_ZVOL && err == 0)
4217                 (void) zvol_remove_minor(zc->zc_name);
4218 
4219         if (err == 0) {
4220                 event = fnvlist_alloc();
4221                 fnvlist_add_string(event, "fsname", zc->zc_name);
4222                 fnvlist_add_int32(event, "type", zc->zc_objset_type);
4223                 zfs_event_post(ZFS_EC_STATUS, "destroy", event);
4224         }
4225 
4226         return (err);
4227 }
4228 
4229 /*
4230  * fsname is name of dataset to rollback (to most recent snapshot)
4231  *
4232  * innvl may contain name of expected target snapshot
4233  *
4234  * outnvl: "target" -> name of most recent snapshot
4235  * }
4236  */
4237 /* ARGSUSED */
4238 static int
4239 zfs_ioc_rollback(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl)
4240 {
4241         zfsvfs_t *zfsvfs;
4242         char *target = NULL;
4243         int error;
4244         nvlist_t *event;
4245         int resume_err = 0;
4246 
4247         if (zfs_is_wormed(fsname))
4248                 return (SET_ERROR(EPERM));
4249 
4250         (void) nvlist_lookup_string(innvl, "target", &target);
4251         if (target != NULL) {
4252                 const char *cp = strchr(target, '@');
4253 
4254                 /*
4255                  * The snap name must contain an @, and the part after it must
4256                  * contain only valid characters.
4257                  */
4258                 if (cp == NULL ||
4259                     zfs_component_namecheck(cp + 1, NULL, NULL) != 0)
4260                         return (SET_ERROR(EINVAL));
4261         }
4262 
4263         if (getzfsvfs(fsname, &zfsvfs) == 0) {
4264                 dsl_dataset_t *ds;
4265 
4266                 ds = dmu_objset_ds(zfsvfs->z_os);
4267                 error = zfs_suspend_fs(zfsvfs);
4268                 if (error == 0) {
4269                         error = dsl_dataset_rollback(fsname, target, zfsvfs,
4270                             outnvl);
4271                         resume_err = zfs_resume_fs(zfsvfs, ds);
4272                 }
4273                 VFS_RELE(zfsvfs->z_vfs);
4274         } else {
4275                 error = dsl_dataset_rollback(fsname, target, NULL, outnvl);
4276         }
4277 
4278         if (error == 0) {
4279                 event = fnvlist_alloc();
4280                 fnvlist_add_string(event, "target", (target != NULL) ? target : "");
4281                 fnvlist_add_string(event, "fsname", fsname);
4282                 fnvlist_add_int32(event, "resume_err", resume_err);
4283                 zfs_event_post(ZFS_EC_STATUS, "rollback", event);
4284         }
4285 
4286         error = (error != 0) ? error : resume_err;
4287         return (error);
4288 }
4289 
4290 static int
4291 recursive_unmount(const char *fsname, void *arg)
4292 {
4293         const char *snapname = arg;
4294         char fullname[ZFS_MAX_DATASET_NAME_LEN];
4295 
4296         (void) snprintf(fullname, sizeof (fullname), "%s@%s", fsname, snapname);
4297         zfs_unmount_snap(fullname);
4298 
4299         return (0);
4300 }
4301 
4302 /*
4303  * inputs:
4304  * zc_name      old name of dataset
4305  * zc_value     new name of dataset
4306  * zc_cookie    recursive flag (only valid for snapshots)
4307  *
4308  * outputs:     none
4309  */
4310 static int
4311 zfs_ioc_rename(zfs_cmd_t *zc)
4312 {
4313         boolean_t recursive = zc->zc_cookie & 1;
4314         char *at;
4315         nvlist_t *event;
4316         int error;
4317 
4318         if (zfs_is_wormed(zc->zc_name))
4319                 return (SET_ERROR(EPERM));
4320 
4321         zc->zc_value[sizeof (zc->zc_value) - 1] = '\0';
4322         if (dataset_namecheck(zc->zc_value, NULL, NULL) != 0 ||
4323             strchr(zc->zc_value, '%'))
4324                 return (SET_ERROR(EINVAL));
4325 
4326         at = strchr(zc->zc_name, '@');
4327         if (at != NULL) {
4328                 /* snaps must be in same fs */
4329 
4330                 if (strncmp(zc->zc_name, zc->zc_value, at - zc->zc_name + 1))
4331                         return (SET_ERROR(EXDEV));
4332                 *at = '\0';
4333                 if (zc->zc_objset_type == DMU_OST_ZFS) {
4334                         error = dmu_objset_find(zc->zc_name,
4335                             recursive_unmount, at + 1,
4336                             recursive ? DS_FIND_CHILDREN : 0);
4337                         if (error != 0) {
4338                                 *at = '@';
4339                                 return (error);
4340                         }
4341                 }
4342                 error = dsl_dataset_rename_snapshot(zc->zc_name,
4343                     at + 1, strchr(zc->zc_value, '@') + 1, recursive);
4344                 *at = '@';
4345 
4346         } else {
4347                 if (zc->zc_objset_type == DMU_OST_ZVOL)
4348                         (void) zvol_remove_minor(zc->zc_name);
4349                 error = dsl_dir_rename(zc->zc_name, zc->zc_value);
4350         }
4351 
4352         if (error == 0) {
4353                 event = fnvlist_alloc();
4354                 fnvlist_add_string(event, "origin", zc->zc_name);
4355                 fnvlist_add_string(event, "fsname", zc->zc_value);
4356                 fnvlist_add_int32(event, "type", zc->zc_objset_type);
4357                 zfs_event_post(ZFS_EC_STATUS, "rename", event);
4358         }
4359 
4360         return (error);
4361 }
4362 
4363 static int
4364 zfs_check_settable(const char *dsname, nvpair_t *pair, cred_t *cr)
4365 {
4366         const char *propname = nvpair_name(pair);
4367         boolean_t issnap = (strchr(dsname, '@') != NULL);
4368         zfs_prop_t prop = zfs_name_to_prop(propname);
4369         uint64_t intval;
4370         int err;
4371 
4372         if (prop == ZPROP_INVAL) {
4373                 if (zfs_prop_user(propname)) {
4374                         if (err = zfs_secpolicy_write_perms(dsname,
4375                             ZFS_DELEG_PERM_USERPROP, cr))
4376                                 return (err);
4377                         return (0);
4378                 }
4379 
4380                 if (!issnap && zfs_prop_userquota(propname)) {
4381                         const char *perm = NULL;
4382                         const char *uq_prefix =
4383                             zfs_userquota_prop_prefixes[ZFS_PROP_USERQUOTA];
4384                         const char *gq_prefix =
4385                             zfs_userquota_prop_prefixes[ZFS_PROP_GROUPQUOTA];
4386 
4387                         if (strncmp(propname, uq_prefix,
4388                             strlen(uq_prefix)) == 0) {
4389                                 perm = ZFS_DELEG_PERM_USERQUOTA;
4390                         } else if (strncmp(propname, gq_prefix,
4391                             strlen(gq_prefix)) == 0) {
4392                                 perm = ZFS_DELEG_PERM_GROUPQUOTA;
4393                         } else {
4394                                 /* USERUSED and GROUPUSED are read-only */
4395                                 return (SET_ERROR(EINVAL));
4396                         }
4397 
4398                         if (err = zfs_secpolicy_write_perms(dsname, perm, cr))
4399                                 return (err);
4400                         return (0);
4401                 }
4402 
4403                 return (SET_ERROR(EINVAL));
4404         }
4405 
4406         if (issnap)
4407                 return (SET_ERROR(EINVAL));
4408 
4409         if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
4410                 /*
4411                  * dsl_prop_get_all_impl() returns properties in this
4412                  * format.
4413                  */
4414                 nvlist_t *attrs;
4415                 VERIFY(nvpair_value_nvlist(pair, &attrs) == 0);
4416                 VERIFY(nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
4417                     &pair) == 0);
4418         }
4419 
4420         /*
4421          * Check that this value is valid for this pool version
4422          */
4423         switch (prop) {
4424         case ZFS_PROP_COMPRESSION:
4425                 /*
4426                  * If the user specified gzip compression, make sure
4427                  * the SPA supports it. We ignore any errors here since
4428                  * we'll catch them later.
4429                  */
4430                 if (nvpair_value_uint64(pair, &intval) == 0) {
4431                         if (intval >= ZIO_COMPRESS_GZIP_1 &&
4432                             intval <= ZIO_COMPRESS_GZIP_9 &&
4433                             zfs_earlier_version(dsname,
4434                             SPA_VERSION_GZIP_COMPRESSION)) {
4435                                 return (SET_ERROR(ENOTSUP));
4436                         }
4437 
4438                         if (intval == ZIO_COMPRESS_ZLE &&
4439                             zfs_earlier_version(dsname,
4440                             SPA_VERSION_ZLE_COMPRESSION))
4441                                 return (SET_ERROR(ENOTSUP));
4442 
4443                         if (intval == ZIO_COMPRESS_LZ4) {
4444                                 spa_t *spa;
4445 
4446                                 if ((err = spa_open(dsname, &spa, FTAG)) != 0)
4447                                         return (err);
4448 
4449                                 if (!spa_feature_is_enabled(spa,
4450                                     SPA_FEATURE_LZ4_COMPRESS)) {
4451                                         spa_close(spa, FTAG);
4452                                         return (SET_ERROR(ENOTSUP));
4453                                 }
4454                                 spa_close(spa, FTAG);
4455                         }
4456 
4457                         /*
4458                          * If this is a bootable dataset then
4459                          * verify that the compression algorithm
4460                          * is supported for booting. We must return
4461                          * something other than ENOTSUP since it
4462                          * implies a downrev pool version.
4463                          */
4464                         if (zfs_is_bootfs(dsname) &&
4465                             !BOOTFS_COMPRESS_VALID(intval)) {
4466                                 return (SET_ERROR(ERANGE));
4467                         }
4468                 }
4469                 break;
4470 
4471         case ZFS_PROP_COPIES:
4472                 if (zfs_earlier_version(dsname, SPA_VERSION_DITTO_BLOCKS))
4473                         return (SET_ERROR(ENOTSUP));
4474                 break;
4475 
4476         case ZFS_PROP_RECORDSIZE:
4477                 /* Record sizes above 128k need the feature to be enabled */
4478                 if (nvpair_value_uint64(pair, &intval) == 0 &&
4479                     intval > SPA_OLD_MAXBLOCKSIZE) {
4480                         spa_t *spa;
4481 
4482                         /*
4483                          * We don't allow setting the property above 1MB,
4484                          * unless the tunable has been changed.
4485                          */
4486                         if (intval > zfs_max_recordsize ||
4487                             intval > SPA_MAXBLOCKSIZE)
4488                                 return (SET_ERROR(ERANGE));
4489 
4490                         if ((err = spa_open(dsname, &spa, FTAG)) != 0)
4491                                 return (err);
4492 
4493                         if (!spa_feature_is_enabled(spa,
4494                             SPA_FEATURE_LARGE_BLOCKS)) {
4495                                 spa_close(spa, FTAG);
4496                                 return (SET_ERROR(ENOTSUP));
4497                         }
4498                         spa_close(spa, FTAG);
4499                 }
4500                 break;
4501 
4502         case ZFS_PROP_SHARESMB:
4503                 if (zpl_earlier_version(dsname, ZPL_VERSION_FUID))
4504                         return (SET_ERROR(ENOTSUP));
4505                 break;
4506 
4507         case ZFS_PROP_ACLINHERIT:
4508                 if (nvpair_type(pair) == DATA_TYPE_UINT64 &&
4509                     nvpair_value_uint64(pair, &intval) == 0) {
4510                         if (intval == ZFS_ACL_PASSTHROUGH_X &&
4511                             zfs_earlier_version(dsname,
4512                             SPA_VERSION_PASSTHROUGH_X))
4513                                 return (SET_ERROR(ENOTSUP));
4514                 }
4515                 break;
4516 
4517         case ZFS_PROP_WBC_MODE:
4518                 {
4519                         spa_t *spa;
4520                         boolean_t wbc_feature_enabled;
4521 
4522                         if ((err = spa_open(dsname, &spa, FTAG)) != 0)
4523                                 return (err);
4524 
4525                         wbc_feature_enabled =
4526                             spa_feature_is_enabled(spa, SPA_FEATURE_WBC);
4527                         spa_close(spa, FTAG);
4528 
4529                         /* WBC cannot be used without special-vdev */
4530                         if (!wbc_feature_enabled || !spa_has_special(spa))
4531                                 return (SET_ERROR(EKZFS_WBCNOTSUP));
4532 
4533                         /*
4534                          * We do not want to have races, because on
4535                          * import or after reboot WBC does registration
4536                          * asynchronously.
4537                          */
4538                         if (!spa->spa_wbc.wbc_ready_to_use)
4539                                 return (SET_ERROR(EBUSY));
4540                 }
4541                 break;
4542 
4543         case ZFS_PROP_CHECKSUM:
4544         case ZFS_PROP_DEDUP:
4545         {
4546                 spa_feature_t feature;
4547                 spa_t *spa;
4548 
4549                 /* dedup feature version checks */
4550                 if (prop == ZFS_PROP_DEDUP &&
4551                     zfs_earlier_version(dsname, SPA_VERSION_DEDUP))
4552                         return (SET_ERROR(ENOTSUP));
4553 
4554                 if (nvpair_value_uint64(pair, &intval) != 0)
4555                         return (SET_ERROR(EINVAL));
4556 
4557                 /* check prop value is enabled in features */
4558                 feature = zio_checksum_to_feature(intval & ZIO_CHECKSUM_MASK);
4559                 if (feature == SPA_FEATURE_NONE)
4560                         break;
4561 
4562                 if ((err = spa_open(dsname, &spa, FTAG)) != 0)
4563                         return (err);
4564                 /*
4565                  * Salted checksums are not supported on root pools.
4566                  */
4567                 if (spa_bootfs(spa) != 0 &&
4568                     intval < ZIO_CHECKSUM_FUNCTIONS &&
4569                     (zio_checksum_table[intval].ci_flags &
4570                     ZCHECKSUM_FLAG_SALTED)) {
4571                         spa_close(spa, FTAG);
4572                         return (SET_ERROR(ERANGE));
4573                 }
4574                 if (!spa_feature_is_enabled(spa, feature)) {
4575                         spa_close(spa, FTAG);
4576                         return (SET_ERROR(ENOTSUP));
4577                 }
4578                 spa_close(spa, FTAG);
4579                 break;
4580         }
4581         }
4582 
4583         return (zfs_secpolicy_setprop(dsname, prop, pair, CRED()));
4584 }
4585 
4586 /*
4587  * Checks for a race condition to make sure we don't increment a feature flag
4588  * multiple times.
4589  */
4590 static int
4591 zfs_prop_activate_feature_check(void *arg, dmu_tx_t *tx)
4592 {
4593         spa_t *spa = dmu_tx_pool(tx)->dp_spa;
4594         spa_feature_t *featurep = arg;
4595 
4596         if (!spa_feature_is_active(spa, *featurep))
4597                 return (0);
4598         else
4599                 return (SET_ERROR(EBUSY));
4600 }
4601 
4602 /*
4603  * The callback invoked on feature activation in the sync task caused by
4604  * zfs_prop_activate_feature.
4605  */
4606 static void
4607 zfs_prop_activate_feature_sync(void *arg, dmu_tx_t *tx)
4608 {
4609         spa_t *spa = dmu_tx_pool(tx)->dp_spa;
4610         spa_feature_t *featurep = arg;
4611 
4612         spa_feature_incr(spa, *featurep, tx);
4613 }
4614 
4615 /*
4616  * Activates a feature on a pool in response to a property setting. This
4617  * creates a new sync task which modifies the pool to reflect the feature
4618  * as being active.
4619  */
4620 static int
4621 zfs_prop_activate_feature(spa_t *spa, spa_feature_t feature)
4622 {
4623         int err;
4624 
4625         /* EBUSY here indicates that the feature is already active */
4626         err = dsl_sync_task(spa_name(spa),
4627             zfs_prop_activate_feature_check, zfs_prop_activate_feature_sync,
4628             &feature, 2, ZFS_SPACE_CHECK_RESERVED);
4629 
4630         if (err != 0 && err != EBUSY)
4631                 return (err);
4632         else
4633                 return (0);
4634 }
4635 
4636 /*
4637  * Removes properties from the given props list that fail permission checks
4638  * needed to clear them and to restore them in case of a receive error. For each
4639  * property, make sure we have both set and inherit permissions.
4640  *
4641  * Returns the first error encountered if any permission checks fail. If the
4642  * caller provides a non-NULL errlist, it also gives the complete list of names
4643  * of all the properties that failed a permission check along with the
4644  * corresponding error numbers. The caller is responsible for freeing the
4645  * returned errlist.
4646  *
4647  * If every property checks out successfully, zero is returned and the list
4648  * pointed at by errlist is NULL.
4649  */
4650 static int
4651 zfs_check_clearable(char *dataset, nvlist_t *props, nvlist_t **errlist)
4652 {
4653         zfs_cmd_t *zc;
4654         nvpair_t *pair, *next_pair;
4655         nvlist_t *errors;
4656         int err, rv = 0;
4657 
4658         if (props == NULL)
4659                 return (0);
4660 
4661         VERIFY(nvlist_alloc(&errors, NV_UNIQUE_NAME, KM_SLEEP) == 0);
4662 
4663         zc = kmem_alloc(sizeof (zfs_cmd_t), KM_SLEEP);
4664         (void) strcpy(zc->zc_name, dataset);
4665         pair = nvlist_next_nvpair(props, NULL);
4666         while (pair != NULL) {
4667                 next_pair = nvlist_next_nvpair(props, pair);
4668 
4669                 (void) strcpy(zc->zc_value, nvpair_name(pair));
4670                 if ((err = zfs_check_settable(dataset, pair, CRED())) != 0 ||
4671                     (err = zfs_secpolicy_inherit_prop(zc, NULL, CRED())) != 0) {
4672                         VERIFY(nvlist_remove_nvpair(props, pair) == 0);
4673                         VERIFY(nvlist_add_int32(errors,
4674                             zc->zc_value, err) == 0);
4675                 }
4676                 pair = next_pair;
4677         }
4678         kmem_free(zc, sizeof (zfs_cmd_t));
4679 
4680         if ((pair = nvlist_next_nvpair(errors, NULL)) == NULL) {
4681                 nvlist_free(errors);
4682                 errors = NULL;
4683         } else {
4684                 VERIFY(nvpair_value_int32(pair, &rv) == 0);
4685         }
4686 
4687         if (errlist == NULL)
4688                 nvlist_free(errors);
4689         else
4690                 *errlist = errors;
4691 
4692         return (rv);
4693 }
4694 
4695 static boolean_t
4696 propval_equals(nvpair_t *p1, nvpair_t *p2)
4697 {
4698         if (nvpair_type(p1) == DATA_TYPE_NVLIST) {
4699                 /* dsl_prop_get_all_impl() format */
4700                 nvlist_t *attrs;
4701                 VERIFY(nvpair_value_nvlist(p1, &attrs) == 0);
4702                 VERIFY(nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
4703                     &p1) == 0);
4704         }
4705 
4706         if (nvpair_type(p2) == DATA_TYPE_NVLIST) {
4707                 nvlist_t *attrs;
4708                 VERIFY(nvpair_value_nvlist(p2, &attrs) == 0);
4709                 VERIFY(nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
4710                     &p2) == 0);
4711         }
4712 
4713         if (nvpair_type(p1) != nvpair_type(p2))
4714                 return (B_FALSE);
4715 
4716         if (nvpair_type(p1) == DATA_TYPE_STRING) {
4717                 char *valstr1, *valstr2;
4718 
4719                 VERIFY(nvpair_value_string(p1, (char **)&valstr1) == 0);
4720                 VERIFY(nvpair_value_string(p2, (char **)&valstr2) == 0);
4721                 return (strcmp(valstr1, valstr2) == 0);
4722         } else {
4723                 uint64_t intval1, intval2;
4724 
4725                 VERIFY(nvpair_value_uint64(p1, &intval1) == 0);
4726                 VERIFY(nvpair_value_uint64(p2, &intval2) == 0);
4727                 return (intval1 == intval2);
4728         }
4729 }
4730 
4731 /*
4732  * Remove properties from props if they are not going to change (as determined
4733  * by comparison with origprops). Remove them from origprops as well, since we
4734  * do not need to clear or restore properties that won't change.
4735  */
4736 static void
4737 props_reduce(nvlist_t *props, nvlist_t *origprops)
4738 {
4739         nvpair_t *pair, *next_pair;
4740 
4741         if (origprops == NULL)
4742                 return; /* all props need to be received */
4743 
4744         pair = nvlist_next_nvpair(props, NULL);
4745         while (pair != NULL) {
4746                 const char *propname = nvpair_name(pair);
4747                 nvpair_t *match;
4748 
4749                 next_pair = nvlist_next_nvpair(props, pair);
4750 
4751                 if ((nvlist_lookup_nvpair(origprops, propname,
4752                     &match) != 0) || !propval_equals(pair, match))
4753                         goto next; /* need to set received value */
4754 
4755                 /* don't clear the existing received value */
4756                 (void) nvlist_remove_nvpair(origprops, match);
4757                 /* don't bother receiving the property */
4758                 (void) nvlist_remove_nvpair(props, pair);
4759 next:
4760                 pair = next_pair;
4761         }
4762 }
4763 
4764 /*
4765  * Extract properties that cannot be set PRIOR to the receipt of a dataset.
4766  * For example, refquota cannot be set until after the receipt of a dataset,
4767  * because in replication streams, an older/earlier snapshot may exceed the
4768  * refquota.  We want to receive the older/earlier snapshot, but setting
4769  * refquota pre-receipt will set the dsl's ACTUAL quota, which will prevent
4770  * the older/earlier snapshot from being received (with EDQUOT).
4771  *
4772  * The ZFS test "zfs_receive_011_pos" demonstrates such a scenario.
4773  *
4774  * libzfs will need to be judicious handling errors encountered by props
4775  * extracted by this function.
4776  */
4777 static nvlist_t *
4778 extract_delay_props(nvlist_t *props)
4779 {
4780         nvlist_t *delayprops;
4781         nvpair_t *nvp, *tmp;
4782         static const zfs_prop_t delayable[] = { ZFS_PROP_REFQUOTA, 0 };
4783         int i;
4784 
4785         VERIFY(nvlist_alloc(&delayprops, NV_UNIQUE_NAME, KM_SLEEP) == 0);
4786 
4787         for (nvp = nvlist_next_nvpair(props, NULL); nvp != NULL;
4788             nvp = nvlist_next_nvpair(props, nvp)) {
4789                 /*
4790                  * strcmp() is safe because zfs_prop_to_name() always returns
4791                  * a bounded string.
4792                  */
4793                 for (i = 0; delayable[i] != 0; i++) {
4794                         if (strcmp(zfs_prop_to_name(delayable[i]),
4795                             nvpair_name(nvp)) == 0) {
4796                                 break;
4797                         }
4798                 }
4799                 if (delayable[i] != 0) {
4800                         tmp = nvlist_prev_nvpair(props, nvp);
4801                         VERIFY(nvlist_add_nvpair(delayprops, nvp) == 0);
4802                         VERIFY(nvlist_remove_nvpair(props, nvp) == 0);
4803                         nvp = tmp;
4804                 }
4805         }
4806 
4807         if (nvlist_empty(delayprops)) {
4808                 nvlist_free(delayprops);
4809                 delayprops = NULL;
4810         }
4811         return (delayprops);
4812 }
4813 
4814 #ifdef  DEBUG
4815 static boolean_t zfs_ioc_recv_inject_err;
4816 #endif
4817 
4818 int
4819 dmu_recv_impl(int fd, char *tofs, char *tosnap, char *origin,
4820     dmu_replay_record_t *drr_begin, boolean_t is_resumable, nvlist_t *props,
4821     nvlist_t *errors, uint64_t *errf, int cfd, uint64_t *ahdl, uint64_t *sz,
4822     boolean_t force, dmu_krrp_task_t *krrp_task)
4823 {
4824         file_t *fp = getf(fd);
4825         dmu_recv_cookie_t drc;
4826         int error = 0;
4827         int props_error = 0;
4828         offset_t off;
4829         nvlist_t *origprops = NULL; /* existing properties */
4830         nvlist_t *delayprops = NULL; /* sent properties applied post-receive */
4831         boolean_t first_recvd_props = B_FALSE;
4832         nvlist_t *event;
4833         boolean_t force_cksum =
4834             !krrp_task || krrp_task->buffer_args.force_cksum;
4835 
4836         ASSERT(fp || krrp_task);
4837 
4838         error = dmu_recv_begin(tofs, tosnap,
4839             drr_begin, force, is_resumable, force_cksum, origin, &drc);
4840 
4841         if (error != 0)
4842                 goto out;
4843 
4844         drc.drc_krrp_task = krrp_task;
4845         /*
4846          * Set properties before we receive the stream so that they are applied
4847          * to the new data. Note that we must call dmu_recv_stream() if
4848          * dmu_recv_begin() succeeds.
4849          */
4850         if (props != NULL && !drc.drc_newfs) {
4851                 if (spa_version(dsl_dataset_get_spa(drc.drc_ds)) >=
4852                     SPA_VERSION_RECVD_PROPS &&
4853                     !dsl_prop_get_hasrecvd(tofs))
4854                         first_recvd_props = B_TRUE;
4855 
4856                 /*
4857                  * If new received properties are supplied, they are to
4858                  * completely replace the existing received properties, so stash
4859                  * away the existing ones.
4860                  */
4861                 if (dsl_prop_get_received(tofs, &origprops) == 0) {
4862                         nvlist_t *errlist = NULL;
4863                         /*
4864                          * Don't bother writing a property if its value won't
4865                          * change (and avoid the unnecessary security checks).
4866                          *
4867                          * The first receive after SPA_VERSION_RECVD_PROPS is a
4868                          * special case where we blow away all local properties
4869                          * regardless.
4870                          */
4871                         if (!first_recvd_props)
4872                                 props_reduce(props, origprops);
4873                         if (zfs_check_clearable(tofs, origprops, &errlist) != 0)
4874                                 (void) nvlist_merge(errors, errlist, 0);
4875                         nvlist_free(errlist);
4876 
4877                         if (clear_received_props(tofs, origprops,
4878                             first_recvd_props ? NULL : props) != 0)
4879                                 *errf |= ZPROP_ERR_NOCLEAR;
4880                 } else {
4881                         *errf |= ZPROP_ERR_NOCLEAR;
4882                 }
4883         }
4884 
4885         if (props != NULL) {
4886                 props_error = dsl_prop_set_hasrecvd(tofs);
4887 
4888                 if (props_error == 0) {
4889                         delayprops = extract_delay_props(props);
4890                         (void) zfs_set_prop_nvlist(tofs, ZPROP_SRC_RECEIVED,
4891                             props, errors);
4892                 }
4893         }
4894 
4895         if (fp) {
4896                 off = fp->f_offset;
4897         } else {
4898                 off = 0;
4899         }
4900         error = dmu_recv_stream(&drc, fp ? fp->f_vnode : NULL,
4901             &off, cfd, ahdl, krrp_task);
4902 
4903         if (error == 0) {
4904                 zfsvfs_t *zfsvfs = NULL;
4905 
4906                 error = getzfsvfs(tofs, &zfsvfs);
4907                 if (error == 0) {
4908                         /* online recv */
4909                         dsl_dataset_t *ds;
4910                         int end_err;
4911 
4912                         ds = dmu_objset_ds(zfsvfs->z_os);
4913                         error = zfs_suspend_fs(zfsvfs);
4914                         /*
4915                          * If the suspend fails, then the recv_end will
4916                          * likely also fail, and clean up after itself.
4917                          */
4918                         end_err = dmu_recv_end(&drc, zfsvfs);
4919                         if (error == 0)
4920                                 error = zfs_resume_fs(zfsvfs, ds);
4921                         error = error ? error : end_err;
4922                         VFS_RELE(zfsvfs->z_vfs);
4923                 } else {
4924                         error = dmu_recv_end(&drc, NULL);
4925                 }
4926 
4927                 /* Set delayed properties now, after we're done receiving. */
4928                 if (delayprops != NULL && error == 0) {
4929                         (void) zfs_set_prop_nvlist(tofs, ZPROP_SRC_RECEIVED,
4930                             delayprops, errors);
4931                 }
4932         }
4933 
4934         if (delayprops != NULL) {
4935                 /*
4936                  * Merge delayed props back in with initial props, in case
4937                  * we're DEBUG and zfs_ioc_recv_inject_err is set (which means
4938                  * we have to make sure clear_received_props() includes
4939                  * the delayed properties).
4940                  *
4941                  * Since zfs_ioc_recv_inject_err is only in DEBUG kernels,
4942                  * using ASSERT() will be just like a VERIFY.
4943                  */
4944                 ASSERT(nvlist_merge(props, delayprops, 0) == 0);
4945                 nvlist_free(delayprops);
4946         }
4947 
4948         if (fp) {
4949                 *sz = off - fp->f_offset;
4950                 if (VOP_SEEK(fp->f_vnode, fp->f_offset, &off, NULL) == 0)
4951                         fp->f_offset = off;
4952         } else {
4953                 *sz = off;
4954         }
4955         if (error == 0) {
4956                 char val[MAXNAMELEN];
4957 
4958                 (void) strcpy(val, tofs);
4959                 (void) strcat(val, "@");
4960                 (void) strcat(val, tosnap);
4961 
4962                 event = fnvlist_alloc();
4963                 if (props != NULL)
4964                         fnvlist_add_nvlist(event, "props", props);
4965                 fnvlist_add_string(event, "origin", tofs);
4966                 fnvlist_add_string(event, "tosnap", val);
4967                 fnvlist_add_uint64(event, "bytes", *sz);
4968                 fnvlist_add_boolean_value(event, "newds", drc.drc_newfs);
4969                 zfs_event_post(ZFS_EC_STATUS, "recv", event);
4970         }
4971 
4972 #ifdef  DEBUG
4973         if (zfs_ioc_recv_inject_err) {
4974                 zfs_ioc_recv_inject_err = B_FALSE;
4975                 error = 1;
4976         }
4977 #endif
4978         /*
4979          * On error, restore the original props.
4980          */
4981         if (error != 0 && props != NULL && !drc.drc_newfs) {
4982                 if (clear_received_props(tofs, props, NULL) != 0) {
4983                         /*
4984                          * We failed to clear the received properties.
4985                          * Since we may have left a $recvd value on the
4986                          * system, we can't clear the $hasrecvd flag.
4987                          */
4988                         *errf |= ZPROP_ERR_NORESTORE;
4989                 } else if (first_recvd_props) {
4990                         dsl_prop_unset_hasrecvd(tofs);
4991                 }
4992 
4993                 if (origprops == NULL && !drc.drc_newfs) {
4994                         /* We failed to stash the original properties. */
4995                         *errf |= ZPROP_ERR_NORESTORE;
4996                 }
4997 
4998                 /*
4999                  * dsl_props_set() will not convert RECEIVED to LOCAL on or
5000                  * after SPA_VERSION_RECVD_PROPS, so we need to specify LOCAL
5001                  * explictly if we're restoring local properties cleared in the
5002                  * first new-style receive.
5003                  */
5004                 if (origprops != NULL &&
5005                     zfs_set_prop_nvlist(tofs, (first_recvd_props ?
5006                     ZPROP_SRC_LOCAL : ZPROP_SRC_RECEIVED),
5007                     origprops, NULL) != 0) {
5008                         /*
5009                          * We stashed the original properties but failed to
5010                          * restore them.
5011                          */
5012                         *errf |= ZPROP_ERR_NORESTORE;
5013                 }
5014         }
5015 out:
5016         nvlist_free(origprops);
5017         if (fp)
5018                 releasef(fd);
5019 
5020         if (error == 0)
5021                 error = props_error;
5022 
5023         return (error);
5024 }
5025 
5026 /*
5027  * inputs:
5028  * zc_name              name of containing filesystem
5029  * zc_nvlist_src{_size} nvlist of properties to apply
5030  * zc_value             name of snapshot to create
5031  * zc_string            name of clone origin (if DRR_FLAG_CLONE)
5032  * zc_cookie            file descriptor to recv from
5033  * zc_begin_record      the BEGIN record of the stream (not byteswapped)
5034  * zc_guid              force flag
5035  * zc_cleanup_fd        cleanup-on-exit file descriptor
5036  * zc_action_handle     handle for this guid/ds mapping (or zero on first call)
5037  * zc_resumable         if data is incomplete assume sender will resume
5038  *
5039  * outputs:
5040  * zc_cookie            number of bytes read
5041  * zc_nvlist_dst{_size} error for each unapplied received property
5042  * zc_obj               zprop_errflags_t
5043  * zc_action_handle     handle for this guid/ds mapping
5044  */
5045 static int
5046 zfs_ioc_recv(zfs_cmd_t *zc)
5047 {
5048         int fd = zc->zc_cookie;
5049         char tofs[ZFS_MAX_DATASET_NAME_LEN];
5050         char *tosnap;
5051         char *origin = NULL;
5052         nvlist_t *errors;
5053         nvlist_t *props = NULL; /* sent properties */
5054         boolean_t force = (boolean_t)zc->zc_guid;
5055         int err;
5056 
5057         if (dataset_namecheck(zc->zc_value, NULL, NULL) != 0 ||
5058             strchr(zc->zc_value, '@') == NULL ||
5059             strchr(zc->zc_value, '%'))
5060                 return (SET_ERROR(EINVAL));
5061 
5062         (void) strcpy(tofs, zc->zc_value);
5063         tosnap = strchr(tofs, '@');
5064         *tosnap++ = '\0';
5065 
5066         if (zc->zc_string[0])
5067                 origin = zc->zc_string;
5068 
5069         if (zc->zc_nvlist_src != NULL &&
5070             (err = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
5071             zc->zc_iflags, &props)) != 0)
5072                 return (err);
5073 
5074         errors = fnvlist_alloc();
5075 
5076         err = dmu_recv_impl(fd, tofs, tosnap, origin,
5077             &zc->zc_begin_record, zc->zc_resumable, props, errors, &zc->zc_obj,
5078             zc->zc_cleanup_fd, &zc->zc_action_handle, &zc->zc_cookie,
5079             force, NULL);
5080 
5081         /*
5082          * Now that all props, initial and delayed, are set, report the prop
5083          * errors to the caller.
5084          */
5085         if (zc->zc_nvlist_dst_size != 0 &&
5086             (nvlist_smush(errors, zc->zc_nvlist_dst_size) != 0 ||
5087             put_nvlist(zc, errors) != 0)) {
5088                 /*
5089                  * Caller made zc->zc_nvlist_dst less than the minimum expected
5090                  * size or supplied an invalid address.
5091                  */
5092                 err = SET_ERROR(EINVAL);
5093         }
5094 
5095         nvlist_free(errors);
5096         nvlist_free(props);
5097         return (err);
5098 
5099 }
5100 
5101 /*
5102  * inputs:
5103  * zc_name      name of snapshot to send
5104  * zc_cookie    file descriptor to send stream to
5105  * zc_obj       fromorigin flag (mutually exclusive with zc_fromobj)
5106  * zc_sendobj   objsetid of snapshot to send
5107  * zc_fromobj   objsetid of incremental fromsnap (may be zero)
5108  * zc_guid      if set, estimate size of stream only.  zc_cookie is ignored.
5109  *              output size in zc_objset_type.
5110  * zc_flags     lzc_send_flags
5111  *
5112  * outputs:
5113  * zc_objset_type       estimated size, if zc_guid is set
5114  */
5115 static int
5116 zfs_ioc_send(zfs_cmd_t *zc)
5117 {
5118         int error;
5119         offset_t off;
5120         boolean_t estimate = (zc->zc_guid != 0);
5121         boolean_t embedok = (zc->zc_flags & 0x1);
5122         boolean_t large_block_ok = (zc->zc_flags & 0x2);
5123         boolean_t compressok = (zc->zc_flags & 0x4);
5124 
5125         if (zc->zc_obj != 0) {
5126                 dsl_pool_t *dp;
5127                 dsl_dataset_t *tosnap;
5128 
5129                 error = dsl_pool_hold(zc->zc_name, FTAG, &dp);
5130                 if (error != 0)
5131                         return (error);
5132 
5133                 error = dsl_dataset_hold_obj(dp, zc->zc_sendobj, FTAG, &tosnap);
5134                 if (error != 0) {
5135                         dsl_pool_rele(dp, FTAG);
5136                         return (error);
5137                 }
5138 
5139                 if (dsl_dir_is_clone(tosnap->ds_dir))
5140                         zc->zc_fromobj =
5141                             dsl_dir_phys(tosnap->ds_dir)->dd_origin_obj;
5142                 dsl_dataset_rele(tosnap, FTAG);
5143                 dsl_pool_rele(dp, FTAG);
5144         }
5145 
5146         if (estimate) {
5147                 dsl_pool_t *dp;
5148                 dsl_dataset_t *tosnap;
5149                 dsl_dataset_t *fromsnap = NULL;
5150 
5151                 error = dsl_pool_hold(zc->zc_name, FTAG, &dp);
5152                 if (error != 0)
5153                         return (error);
5154 
5155                 error = dsl_dataset_hold_obj(dp, zc->zc_sendobj, FTAG, &tosnap);
5156                 if (error != 0) {
5157                         dsl_pool_rele(dp, FTAG);
5158                         return (error);
5159                 }
5160 
5161                 if (zc->zc_fromobj != 0) {
5162                         error = dsl_dataset_hold_obj(dp, zc->zc_fromobj,
5163                             FTAG, &fromsnap);
5164                         if (error != 0) {
5165                                 dsl_dataset_rele(tosnap, FTAG);
5166                                 dsl_pool_rele(dp, FTAG);
5167                                 return (error);
5168                         }
5169                 }
5170 
5171                 error = dmu_send_estimate(tosnap, fromsnap, compressok,
5172                     &zc->zc_objset_type);
5173 
5174                 if (fromsnap != NULL)
5175                         dsl_dataset_rele(fromsnap, FTAG);
5176                 dsl_dataset_rele(tosnap, FTAG);
5177                 dsl_pool_rele(dp, FTAG);
5178         } else {
5179                 offset_t off_starting;
5180                 file_t *fp = getf(zc->zc_cookie);
5181                 if (fp == NULL)
5182                         return (SET_ERROR(EBADF));
5183 
5184                 off_starting = off = fp->f_offset;
5185                 error = dmu_send_obj(zc->zc_name, zc->zc_sendobj,
5186                     zc->zc_fromobj, embedok, large_block_ok, compressok,
5187                     zc->zc_cookie, fp->f_vnode, &off, zc->zc_sendsize);
5188 
5189                 zc->zc_sendcounter = off - off_starting;
5190                 if (VOP_SEEK(fp->f_vnode, fp->f_offset, &off, NULL) == 0)
5191                         fp->f_offset = off;
5192                 releasef(zc->zc_cookie);
5193         }
5194         return (error);
5195 }
5196 
5197 /*
5198  * inputs:
5199  * zc_name      name of snapshot on which to report progress
5200  * zc_cookie    file descriptor of send stream
5201  *
5202  * outputs:
5203  * zc_cookie    number of bytes written in send stream thus far
5204  */
5205 static int
5206 zfs_ioc_send_progress(zfs_cmd_t *zc)
5207 {
5208         dsl_pool_t *dp;
5209         dsl_dataset_t *ds;
5210         dmu_sendarg_t *dsp = NULL;
5211         int error;
5212 
5213         error = dsl_pool_hold(zc->zc_name, FTAG, &dp);
5214         if (error != 0)
5215                 return (error);
5216 
5217         error = dsl_dataset_hold(dp, zc->zc_name, FTAG, &ds);
5218         if (error != 0) {
5219                 dsl_pool_rele(dp, FTAG);
5220                 return (error);
5221         }
5222 
5223         mutex_enter(&ds->ds_sendstream_lock);
5224 
5225         /*
5226          * Iterate over all the send streams currently active on this dataset.
5227          * If there's one which matches the specified file descriptor _and_ the
5228          * stream was started by the current process, return the progress of
5229          * that stream.
5230          */
5231         for (dsp = list_head(&ds->ds_sendstreams); dsp != NULL;
5232             dsp = list_next(&ds->ds_sendstreams, dsp)) {
5233                 if (dsp->dsa_outfd == zc->zc_cookie &&
5234                     dsp->dsa_proc == curproc)
5235                         break;
5236         }
5237 
5238         if (dsp != NULL)
5239                 zc->zc_cookie = *(dsp->dsa_off);
5240         else
5241                 error = SET_ERROR(ENOENT);
5242 
5243         mutex_exit(&ds->ds_sendstream_lock);
5244         dsl_dataset_rele(ds, FTAG);
5245         dsl_pool_rele(dp, FTAG);
5246         return (error);
5247 }
5248 
5249 static int
5250 zfs_ioc_inject_fault(zfs_cmd_t *zc)
5251 {
5252         int id, error;
5253 
5254         error = zio_inject_fault(zc->zc_name, (int)zc->zc_guid, &id,
5255             &zc->zc_inject_record);
5256 
5257         if (error == 0)
5258                 zc->zc_guid = (uint64_t)id;
5259 
5260         return (error);
5261 }
5262 
5263 static int
5264 zfs_ioc_clear_fault(zfs_cmd_t *zc)
5265 {
5266         return (zio_clear_fault((int)zc->zc_guid));
5267 }
5268 
5269 static int
5270 zfs_ioc_inject_list_next(zfs_cmd_t *zc)
5271 {
5272         int id = (int)zc->zc_guid;
5273         int error;
5274 
5275         error = zio_inject_list_next(&id, zc->zc_name, sizeof (zc->zc_name),
5276             &zc->zc_inject_record);
5277 
5278         zc->zc_guid = id;
5279 
5280         return (error);
5281 }
5282 
5283 static int
5284 zfs_ioc_error_log(zfs_cmd_t *zc)
5285 {
5286         spa_t *spa;
5287         int error;
5288         size_t count = (size_t)zc->zc_nvlist_dst_size;
5289 
5290         if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
5291                 return (error);
5292 
5293         error = spa_get_errlog(spa, (void *)(uintptr_t)zc->zc_nvlist_dst,
5294             &count);
5295         if (error == 0)
5296                 zc->zc_nvlist_dst_size = count;
5297         else
5298                 zc->zc_nvlist_dst_size = spa_get_errlog_size(spa);
5299 
5300         spa_close(spa, FTAG);
5301 
5302         return (error);
5303 }
5304 
5305 static int
5306 zfs_ioc_clear(zfs_cmd_t *zc)
5307 {
5308         spa_t *spa;
5309         vdev_t *vd;
5310         int error;
5311 
5312         /*
5313          * On zpool clear we also fix up missing slogs
5314          */
5315         mutex_enter(&spa_namespace_lock);
5316         spa = spa_lookup(zc->zc_name);
5317         if (spa == NULL) {
5318                 mutex_exit(&spa_namespace_lock);
5319                 return (SET_ERROR(EIO));
5320         }
5321         if (spa_get_log_state(spa) == SPA_LOG_MISSING) {
5322                 /* we need to let spa_open/spa_load clear the chains */
5323                 spa_set_log_state(spa, SPA_LOG_CLEAR);
5324         }
5325         spa->spa_last_open_failed = 0;
5326         mutex_exit(&spa_namespace_lock);
5327 
5328         if (zc->zc_cookie & ZPOOL_NO_REWIND) {
5329                 error = spa_open(zc->zc_name, &spa, FTAG);
5330         } else {
5331                 nvlist_t *policy;
5332                 nvlist_t *config = NULL;
5333 
5334                 if (zc->zc_nvlist_src == NULL)
5335                         return (SET_ERROR(EINVAL));
5336 
5337                 if ((error = get_nvlist(zc->zc_nvlist_src,
5338                     zc->zc_nvlist_src_size, zc->zc_iflags, &policy)) == 0) {
5339                         error = spa_open_rewind(zc->zc_name, &spa, FTAG,
5340                             policy, &config);
5341                         if (config != NULL) {
5342                                 int err;
5343 
5344                                 if ((err = put_nvlist(zc, config)) != 0)
5345                                         error = err;
5346                                 nvlist_free(config);
5347                         }
5348                         nvlist_free(policy);
5349                 }
5350         }
5351 
5352         if (error != 0)
5353                 return (error);
5354 
5355         spa_vdev_state_enter(spa, SCL_NONE);
5356 
5357         if (zc->zc_guid == 0) {
5358                 vd = NULL;
5359         } else {
5360                 vd = spa_lookup_by_guid(spa, zc->zc_guid, B_TRUE);
5361                 if (vd == NULL) {
5362                         (void) spa_vdev_state_exit(spa, NULL, ENODEV);
5363                         spa_close(spa, FTAG);
5364                         return (SET_ERROR(ENODEV));
5365                 }
5366         }
5367 
5368         vdev_clear(spa, vd);
5369 
5370         (void) spa_vdev_state_exit(spa, NULL, 0);
5371 
5372         /*
5373          * Resume any suspended I/Os.
5374          */
5375         if (zio_resume(spa) != 0)
5376                 error = SET_ERROR(EIO);
5377 
5378         spa_close(spa, FTAG);
5379 
5380         return (error);
5381 }
5382 
5383 static int
5384 zfs_ioc_pool_reopen(zfs_cmd_t *zc)
5385 {
5386         spa_t *spa;
5387         int error;
5388 
5389         error = spa_open(zc->zc_name, &spa, FTAG);
5390         if (error != 0)
5391                 return (error);
5392 
5393         spa_vdev_state_enter(spa, SCL_NONE);
5394 
5395         /*
5396          * If a resilver is already in progress then set the
5397          * spa_scrub_reopen flag to B_TRUE so that we don't restart
5398          * the scan as a side effect of the reopen. Otherwise, let
5399          * vdev_open() decided if a resilver is required.
5400          */
5401         spa->spa_scrub_reopen = dsl_scan_resilvering(spa->spa_dsl_pool);
5402         vdev_reopen(spa->spa_root_vdev);
5403         spa->spa_scrub_reopen = B_FALSE;
5404 
5405         (void) spa_vdev_state_exit(spa, NULL, 0);
5406         spa_close(spa, FTAG);
5407         return (0);
5408 }
5409 /*
5410  * inputs:
5411  * zc_name      name of filesystem
5412  *
5413  * outputs:
5414  * zc_string    name of conflicting snapshot, if there is one
5415  */
5416 static int
5417 zfs_ioc_promote(zfs_cmd_t *zc)
5418 {
5419         dsl_pool_t *dp;
5420         dsl_dataset_t *ds, *ods;
5421         char origin[ZFS_MAX_DATASET_NAME_LEN];
5422         char *cp;
5423         int error;
5424         nvlist_t *event;
5425 
5426         error = dsl_pool_hold(zc->zc_name, FTAG, &dp);
5427         if (error != 0)
5428                 return (error);
5429 
5430         error = dsl_dataset_hold(dp, zc->zc_name, FTAG, &ds);
5431         if (error != 0) {
5432                 dsl_pool_rele(dp, FTAG);
5433                 return (error);
5434         }
5435 
5436         if (!dsl_dir_is_clone(ds->ds_dir)) {
5437                 dsl_dataset_rele(ds, FTAG);
5438                 dsl_pool_rele(dp, FTAG);
5439                 return (SET_ERROR(EINVAL));
5440         }
5441 
5442         error = dsl_dataset_hold_obj(dp,
5443             dsl_dir_phys(ds->ds_dir)->dd_origin_obj, FTAG, &ods);
5444         if (error != 0) {
5445                 dsl_dataset_rele(ds, FTAG);
5446                 dsl_pool_rele(dp, FTAG);
5447                 return (error);
5448         }
5449 
5450         dsl_dataset_name(ods, origin);
5451         dsl_dataset_rele(ods, FTAG);
5452         dsl_dataset_rele(ds, FTAG);
5453         dsl_pool_rele(dp, FTAG);
5454 
5455         /*
5456          * We don't need to unmount *all* the origin fs's snapshots, but
5457          * it's easier.
5458          */
5459         cp = strchr(origin, '@');
5460         if (cp)
5461                 *cp = '\0';
5462         (void) dmu_objset_find(origin,
5463             zfs_unmount_snap_cb, NULL, DS_FIND_SNAPSHOTS);
5464         error = dsl_dataset_promote(zc->zc_name, zc->zc_string);
5465 
5466         if (error == 0) {
5467                 event = fnvlist_alloc();
5468                 fnvlist_add_string(event, "fsname", zc->zc_name);
5469                 fnvlist_add_string(event, "origin", zc->zc_value);
5470                 zfs_event_post(ZFS_EC_STATUS, "promote", event);
5471         }
5472 
5473         return (error);
5474 }
5475 
5476 /*
5477  * Retrieve a single {user|group}{used|quota}@... property.
5478  *
5479  * inputs:
5480  * zc_name      name of filesystem
5481  * zc_objset_type zfs_userquota_prop_t
5482  * zc_value     domain name (eg. "S-1-234-567-89")
5483  * zc_guid      RID/UID/GID
5484  *
5485  * outputs:
5486  * zc_cookie    property value
5487  */
5488 static int
5489 zfs_ioc_userspace_one(zfs_cmd_t *zc)
5490 {
5491         zfsvfs_t *zfsvfs;
5492         int error;
5493 
5494         if (zc->zc_objset_type >= ZFS_NUM_USERQUOTA_PROPS)
5495                 return (SET_ERROR(EINVAL));
5496 
5497         error = zfsvfs_hold(zc->zc_name, FTAG, &zfsvfs, B_FALSE);
5498         if (error != 0)
5499                 return (error);
5500 
5501         error = zfs_userspace_one(zfsvfs,
5502             zc->zc_objset_type, zc->zc_value, zc->zc_guid, &zc->zc_cookie);
5503         zfsvfs_rele(zfsvfs, FTAG);
5504 
5505         return (error);
5506 }
5507 
5508 /*
5509  * inputs:
5510  * zc_name              name of filesystem
5511  * zc_cookie            zap cursor
5512  * zc_objset_type       zfs_userquota_prop_t
5513  * zc_nvlist_dst[_size] buffer to fill (not really an nvlist)
5514  *
5515  * outputs:
5516  * zc_nvlist_dst[_size] data buffer (array of zfs_useracct_t)
5517  * zc_cookie    zap cursor
5518  */
5519 static int
5520 zfs_ioc_userspace_many(zfs_cmd_t *zc)
5521 {
5522         zfsvfs_t *zfsvfs;
5523         int bufsize = zc->zc_nvlist_dst_size;
5524 
5525         if (bufsize <= 0)
5526                 return (SET_ERROR(ENOMEM));
5527 
5528         int error = zfsvfs_hold(zc->zc_name, FTAG, &zfsvfs, B_FALSE);
5529         if (error != 0)
5530                 return (error);
5531 
5532         void *buf = kmem_alloc(bufsize, KM_SLEEP);
5533 
5534         error = zfs_userspace_many(zfsvfs, zc->zc_objset_type, &zc->zc_cookie,
5535             buf, &zc->zc_nvlist_dst_size);
5536 
5537         if (error == 0) {
5538                 error = xcopyout(buf,
5539                     (void *)(uintptr_t)zc->zc_nvlist_dst,
5540                     zc->zc_nvlist_dst_size);
5541         }
5542         kmem_free(buf, bufsize);
5543         zfsvfs_rele(zfsvfs, FTAG);
5544 
5545         return (error);
5546 }
5547 
5548 /*
5549  * inputs:
5550  * zc_name              name of filesystem
5551  *
5552  * outputs:
5553  * none
5554  */
5555 static int
5556 zfs_ioc_userspace_upgrade(zfs_cmd_t *zc)
5557 {
5558         objset_t *os;
5559         int error = 0;
5560         zfsvfs_t *zfsvfs;
5561 
5562         if (getzfsvfs(zc->zc_name, &zfsvfs) == 0) {
5563                 if (!dmu_objset_userused_enabled(zfsvfs->z_os)) {
5564                         /*
5565                          * If userused is not enabled, it may be because the
5566                          * objset needs to be closed & reopened (to grow the
5567                          * objset_phys_t).  Suspend/resume the fs will do that.
5568                          */
5569                         dsl_dataset_t *ds;
5570 
5571                         ds = dmu_objset_ds(zfsvfs->z_os);
5572                         error = zfs_suspend_fs(zfsvfs);
5573                         if (error == 0) {
5574                                 dmu_objset_refresh_ownership(zfsvfs->z_os,
5575                                     zfsvfs);
5576                                 error = zfs_resume_fs(zfsvfs, ds);
5577                         }
5578                 }
5579                 if (error == 0)
5580                         error = dmu_objset_userspace_upgrade(zfsvfs->z_os);
5581                 VFS_RELE(zfsvfs->z_vfs);
5582         } else {
5583                 /* XXX kind of reading contents without owning */
5584                 error = dmu_objset_hold(zc->zc_name, FTAG, &os);
5585                 if (error != 0)
5586                         return (error);
5587 
5588                 error = dmu_objset_userspace_upgrade(os);
5589                 dmu_objset_rele(os, FTAG);
5590         }
5591 
5592         return (error);
5593 }
5594 
5595 /*
5596  * We don't want to have a hard dependency
5597  * against some special symbols in sharefs
5598  * nfs, and smbsrv.  Determine them if needed when
5599  * the first file system is shared.
5600  * Neither sharefs, nfs or smbsrv are unloadable modules.
5601  */
5602 int (*znfsexport_fs)(void *arg);
5603 int (*zshare_fs)(enum sharefs_sys_op, share_t *, uint32_t);
5604 int (*zsmbexport_fs)(void *arg, boolean_t add_share);
5605 
5606 int zfs_nfsshare_inited;
5607 int zfs_smbshare_inited;
5608 
5609 ddi_modhandle_t nfs_mod;
5610 ddi_modhandle_t sharefs_mod;
5611 ddi_modhandle_t smbsrv_mod;
5612 kmutex_t zfs_share_lock;
5613 
5614 static int
5615 zfs_init_sharefs()
5616 {
5617         int error;
5618 
5619         ASSERT(MUTEX_HELD(&zfs_share_lock));
5620         /* Both NFS and SMB shares also require sharetab support. */
5621         if (sharefs_mod == NULL && ((sharefs_mod =
5622             ddi_modopen("fs/sharefs",
5623             KRTLD_MODE_FIRST, &error)) == NULL)) {
5624                 return (SET_ERROR(ENOSYS));
5625         }
5626         if (zshare_fs == NULL && ((zshare_fs =
5627             (int (*)(enum sharefs_sys_op, share_t *, uint32_t))
5628             ddi_modsym(sharefs_mod, "sharefs_impl", &error)) == NULL)) {
5629                 return (SET_ERROR(ENOSYS));
5630         }
5631         return (0);
5632 }
5633 
5634 static int
5635 zfs_ioc_share(zfs_cmd_t *zc)
5636 {
5637         int error;
5638         int opcode;
5639 
5640         switch (zc->zc_share.z_sharetype) {
5641         case ZFS_SHARE_NFS:
5642         case ZFS_UNSHARE_NFS:
5643                 if (zfs_nfsshare_inited == 0) {
5644                         mutex_enter(&zfs_share_lock);
5645                         if (nfs_mod == NULL && ((nfs_mod = ddi_modopen("fs/nfs",
5646                             KRTLD_MODE_FIRST, &error)) == NULL)) {
5647                                 mutex_exit(&zfs_share_lock);
5648                                 return (SET_ERROR(ENOSYS));
5649                         }
5650                         if (znfsexport_fs == NULL &&
5651                             ((znfsexport_fs = (int (*)(void *))
5652                             ddi_modsym(nfs_mod,
5653                             "nfs_export", &error)) == NULL)) {
5654                                 mutex_exit(&zfs_share_lock);
5655                                 return (SET_ERROR(ENOSYS));
5656                         }
5657                         error = zfs_init_sharefs();
5658                         if (error != 0) {
5659                                 mutex_exit(&zfs_share_lock);
5660                                 return (SET_ERROR(ENOSYS));
5661                         }
5662                         zfs_nfsshare_inited = 1;
5663                         mutex_exit(&zfs_share_lock);
5664                 }
5665                 break;
5666         case ZFS_SHARE_SMB:
5667         case ZFS_UNSHARE_SMB:
5668                 if (zfs_smbshare_inited == 0) {
5669                         mutex_enter(&zfs_share_lock);
5670                         if (smbsrv_mod == NULL && ((smbsrv_mod =
5671                             ddi_modopen("drv/smbsrv",
5672                             KRTLD_MODE_FIRST, &error)) == NULL)) {
5673                                 mutex_exit(&zfs_share_lock);
5674                                 return (SET_ERROR(ENOSYS));
5675                         }
5676                         if (zsmbexport_fs == NULL && ((zsmbexport_fs =
5677                             (int (*)(void *, boolean_t))ddi_modsym(smbsrv_mod,
5678                             "smb_server_share", &error)) == NULL)) {
5679                                 mutex_exit(&zfs_share_lock);
5680                                 return (SET_ERROR(ENOSYS));
5681                         }
5682                         error = zfs_init_sharefs();
5683                         if (error != 0) {
5684                                 mutex_exit(&zfs_share_lock);
5685                                 return (SET_ERROR(ENOSYS));
5686                         }
5687                         zfs_smbshare_inited = 1;
5688                         mutex_exit(&zfs_share_lock);
5689                 }
5690                 break;
5691         default:
5692                 return (SET_ERROR(EINVAL));
5693         }
5694 
5695         switch (zc->zc_share.z_sharetype) {
5696         case ZFS_SHARE_NFS:
5697         case ZFS_UNSHARE_NFS:
5698                 if (error =
5699                     znfsexport_fs((void *)
5700                     (uintptr_t)zc->zc_share.z_exportdata))
5701                         return (error);
5702                 break;
5703         case ZFS_SHARE_SMB:
5704         case ZFS_UNSHARE_SMB:
5705                 if (error = zsmbexport_fs((void *)
5706                     (uintptr_t)zc->zc_share.z_exportdata,
5707                     zc->zc_share.z_sharetype == ZFS_SHARE_SMB ?
5708                     B_TRUE: B_FALSE)) {
5709                         return (error);
5710                 }
5711                 break;
5712         }
5713 
5714         opcode = (zc->zc_share.z_sharetype == ZFS_SHARE_NFS ||
5715             zc->zc_share.z_sharetype == ZFS_SHARE_SMB) ?
5716             SHAREFS_ADD : SHAREFS_REMOVE;
5717 
5718         /*
5719          * Add or remove share from sharetab
5720          */
5721         error = zshare_fs(opcode,
5722             (void *)(uintptr_t)zc->zc_share.z_sharedata,
5723             zc->zc_share.z_sharemax);
5724 
5725         return (error);
5726 
5727 }
5728 
5729 ace_t full_access[] = {
5730         {(uid_t)-1, ACE_ALL_PERMS, ACE_EVERYONE, 0}
5731 };
5732 
5733 /*
5734  * inputs:
5735  * zc_name              name of containing filesystem
5736  * zc_obj               object # beyond which we want next in-use object #
5737  *
5738  * outputs:
5739  * zc_obj               next in-use object #
5740  */
5741 static int
5742 zfs_ioc_next_obj(zfs_cmd_t *zc)
5743 {
5744         objset_t *os = NULL;
5745         int error;
5746 
5747         error = dmu_objset_hold(zc->zc_name, FTAG, &os);
5748         if (error != 0)
5749                 return (error);
5750 
5751         error = dmu_object_next(os, &zc->zc_obj, B_FALSE,
5752             dsl_dataset_phys(os->os_dsl_dataset)->ds_prev_snap_txg);
5753 
5754         dmu_objset_rele(os, FTAG);
5755         return (error);
5756 }
5757 
5758 /*
5759  * inputs:
5760  * zc_name              name of filesystem
5761  * zc_value             prefix name for snapshot
5762  * zc_cleanup_fd        cleanup-on-exit file descriptor for calling process
5763  *
5764  * outputs:
5765  * zc_value             short name of new snapshot
5766  */
5767 static int
5768 zfs_ioc_tmp_snapshot(zfs_cmd_t *zc)
5769 {
5770         char *snap_name;
5771         char *hold_name;
5772         int error;
5773         minor_t minor;
5774 
5775         error = zfs_onexit_fd_hold(zc->zc_cleanup_fd, &minor);
5776         if (error != 0)
5777                 return (error);
5778 
5779         snap_name = kmem_asprintf("%s-%016llx", zc->zc_value,
5780             (u_longlong_t)ddi_get_lbolt64());
5781         hold_name = kmem_asprintf("%%%s", zc->zc_value);
5782 
5783         error = dsl_dataset_snapshot_tmp(zc->zc_name, snap_name, minor,
5784             hold_name);
5785         if (error == 0)
5786                 (void) strcpy(zc->zc_value, snap_name);
5787         strfree(snap_name);
5788         strfree(hold_name);
5789         zfs_onexit_fd_rele(zc->zc_cleanup_fd);
5790         return (error);
5791 }
5792 
5793 /*
5794  * inputs:
5795  * zc_name              name of "to" snapshot
5796  * zc_value             name of "from" snapshot
5797  * zc_cookie            file descriptor to write diff data on
5798  *
5799  * outputs:
5800  * dmu_diff_record_t's to the file descriptor
5801  */
5802 static int
5803 zfs_ioc_diff(zfs_cmd_t *zc)
5804 {
5805         file_t *fp;
5806         offset_t off;
5807         int error;
5808 
5809         fp = getf(zc->zc_cookie);
5810         if (fp == NULL)
5811                 return (SET_ERROR(EBADF));
5812 
5813         off = fp->f_offset;
5814 
5815         error = dmu_diff(zc->zc_name, zc->zc_value, fp->f_vnode, &off);
5816 
5817         if (VOP_SEEK(fp->f_vnode, fp->f_offset, &off, NULL) == 0)
5818                 fp->f_offset = off;
5819         releasef(zc->zc_cookie);
5820 
5821         return (error);
5822 }
5823 
5824 /*
5825  * Remove all ACL files in shares dir
5826  */
5827 static int
5828 zfs_smb_acl_purge(znode_t *dzp)
5829 {
5830         zap_cursor_t    zc;
5831         zap_attribute_t zap;
5832         zfsvfs_t *zfsvfs = dzp->z_zfsvfs;
5833         int error;
5834 
5835         for (zap_cursor_init(&zc, zfsvfs->z_os, dzp->z_id);
5836             (error = zap_cursor_retrieve(&zc, &zap)) == 0;
5837             zap_cursor_advance(&zc)) {
5838                 if ((error = VOP_REMOVE(ZTOV(dzp), zap.za_name, kcred,
5839                     NULL, 0)) != 0)
5840                         break;
5841         }
5842         zap_cursor_fini(&zc);
5843         return (error);
5844 }
5845 
5846 static int
5847 zfs_ioc_smb_acl(zfs_cmd_t *zc)
5848 {
5849         vnode_t *vp;
5850         znode_t *dzp;
5851         vnode_t *resourcevp = NULL;
5852         znode_t *sharedir;
5853         zfsvfs_t *zfsvfs;
5854         nvlist_t *nvlist;
5855         char *src, *target;
5856         vattr_t vattr;
5857         vsecattr_t vsec;
5858         int error = 0;
5859 
5860         if ((error = lookupname(zc->zc_value, UIO_SYSSPACE,
5861             NO_FOLLOW, NULL, &vp)) != 0)
5862                 return (error);
5863 
5864         /* Now make sure mntpnt and dataset are ZFS */
5865 
5866         if (vp->v_vfsp->vfs_fstype != zfsfstype ||
5867             (strcmp((char *)refstr_value(vp->v_vfsp->vfs_resource),
5868             zc->zc_name) != 0)) {
5869                 VN_RELE(vp);
5870                 return (SET_ERROR(EINVAL));
5871         }
5872 
5873         dzp = VTOZ(vp);
5874         zfsvfs = dzp->z_zfsvfs;
5875         ZFS_ENTER(zfsvfs);
5876 
5877         /*
5878          * Create share dir if its missing.
5879          */
5880         mutex_enter(&zfsvfs->z_lock);
5881         if (zfsvfs->z_shares_dir == 0) {
5882                 dmu_tx_t *tx;
5883 
5884                 tx = dmu_tx_create(zfsvfs->z_os);
5885                 dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, TRUE,
5886                     ZFS_SHARES_DIR);
5887                 dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, FALSE, NULL);
5888                 error = dmu_tx_assign(tx, TXG_WAIT);
5889                 if (error != 0) {
5890                         dmu_tx_abort(tx);
5891                 } else {
5892                         error = zfs_create_share_dir(zfsvfs, tx);
5893                         dmu_tx_commit(tx);
5894                 }
5895                 if (error != 0) {
5896                         mutex_exit(&zfsvfs->z_lock);
5897                         VN_RELE(vp);
5898                         ZFS_EXIT(zfsvfs);
5899                         return (error);
5900                 }
5901         }
5902         mutex_exit(&zfsvfs->z_lock);
5903 
5904         ASSERT(zfsvfs->z_shares_dir);
5905         if ((error = zfs_zget(zfsvfs, zfsvfs->z_shares_dir, &sharedir)) != 0) {
5906                 VN_RELE(vp);
5907                 ZFS_EXIT(zfsvfs);
5908                 return (error);
5909         }
5910 
5911         switch (zc->zc_cookie) {
5912         case ZFS_SMB_ACL_ADD:
5913                 vattr.va_mask = AT_MODE|AT_UID|AT_GID|AT_TYPE;
5914                 vattr.va_type = VREG;
5915                 vattr.va_mode = S_IFREG|0777;
5916                 vattr.va_uid = 0;
5917                 vattr.va_gid = 0;
5918 
5919                 vsec.vsa_mask = VSA_ACE;
5920                 vsec.vsa_aclentp = &full_access;
5921                 vsec.vsa_aclentsz = sizeof (full_access);
5922                 vsec.vsa_aclcnt = 1;
5923 
5924                 error = VOP_CREATE(ZTOV(sharedir), zc->zc_string,
5925                     &vattr, EXCL, 0, &resourcevp, kcred, 0, NULL, &vsec);
5926                 if (resourcevp)
5927                         VN_RELE(resourcevp);
5928                 break;
5929 
5930         case ZFS_SMB_ACL_REMOVE:
5931                 error = VOP_REMOVE(ZTOV(sharedir), zc->zc_string, kcred,
5932                     NULL, 0);
5933                 break;
5934 
5935         case ZFS_SMB_ACL_RENAME:
5936                 if ((error = get_nvlist(zc->zc_nvlist_src,
5937                     zc->zc_nvlist_src_size, zc->zc_iflags, &nvlist)) != 0) {
5938                         VN_RELE(vp);
5939                         VN_RELE(ZTOV(sharedir));
5940                         ZFS_EXIT(zfsvfs);
5941                         return (error);
5942                 }
5943                 if (nvlist_lookup_string(nvlist, ZFS_SMB_ACL_SRC, &src) ||
5944                     nvlist_lookup_string(nvlist, ZFS_SMB_ACL_TARGET,
5945                     &target)) {
5946                         VN_RELE(vp);
5947                         VN_RELE(ZTOV(sharedir));
5948                         ZFS_EXIT(zfsvfs);
5949                         nvlist_free(nvlist);
5950                         return (error);
5951                 }
5952                 error = VOP_RENAME(ZTOV(sharedir), src, ZTOV(sharedir), target,
5953                     kcred, NULL, 0);
5954                 nvlist_free(nvlist);
5955                 break;
5956 
5957         case ZFS_SMB_ACL_PURGE:
5958                 error = zfs_smb_acl_purge(sharedir);
5959                 break;
5960 
5961         default:
5962                 error = SET_ERROR(EINVAL);
5963                 break;
5964         }
5965 
5966         VN_RELE(vp);
5967         VN_RELE(ZTOV(sharedir));
5968 
5969         ZFS_EXIT(zfsvfs);
5970 
5971         return (error);
5972 }
5973 
5974 /*
5975  * innvl: {
5976  *     "holds" -> { snapname -> holdname (string), ... }
5977  *     (optional) "cleanup_fd" -> fd (int32)
5978  * }
5979  *
5980  * outnvl: {
5981  *     snapname -> error value (int32)
5982  *     ...
5983  * }
5984  */
5985 /* ARGSUSED */
5986 static int
5987 zfs_ioc_hold(const char *pool, nvlist_t *args, nvlist_t *errlist)
5988 {
5989         nvpair_t *pair;
5990         nvlist_t *holds;
5991         int cleanup_fd = -1;
5992         int error;
5993         minor_t minor = 0;
5994 
5995         error = nvlist_lookup_nvlist(args, "holds", &holds);
5996         if (error != 0)
5997                 return (SET_ERROR(EINVAL));
5998 
5999         /* make sure the user didn't pass us any invalid (empty) tags */
6000         for (pair = nvlist_next_nvpair(holds, NULL); pair != NULL;
6001             pair = nvlist_next_nvpair(holds, pair)) {
6002                 char *htag;
6003 
6004                 error = nvpair_value_string(pair, &htag);
6005                 if (error != 0)
6006                         return (SET_ERROR(error));
6007 
6008                 if (strlen(htag) == 0)
6009                         return (SET_ERROR(EINVAL));
6010         }
6011 
6012         if (nvlist_lookup_int32(args, "cleanup_fd", &cleanup_fd) == 0) {
6013                 error = zfs_onexit_fd_hold(cleanup_fd, &minor);
6014                 if (error != 0)
6015                         return (error);
6016         }
6017 
6018         error = dsl_dataset_user_hold(holds, minor, errlist);
6019         if (minor != 0)
6020                 zfs_onexit_fd_rele(cleanup_fd);
6021         return (error);
6022 }
6023 
6024 /*
6025  * innvl is not used.
6026  *
6027  * outnvl: {
6028  *    holdname -> time added (uint64 seconds since epoch)
6029  *    ...
6030  * }
6031  */
6032 /* ARGSUSED */
6033 static int
6034 zfs_ioc_get_holds(const char *snapname, nvlist_t *args, nvlist_t *outnvl)
6035 {
6036         return (dsl_dataset_get_holds(snapname, outnvl));
6037 }
6038 
6039 /*
6040  * innvl: {
6041  *     snapname -> { holdname, ... }
6042  *     ...
6043  * }
6044  *
6045  * outnvl: {
6046  *     snapname -> error value (int32)
6047  *     ...
6048  * }
6049  */
6050 /* ARGSUSED */
6051 static int
6052 zfs_ioc_release(const char *pool, nvlist_t *holds, nvlist_t *errlist)
6053 {
6054         return (dsl_dataset_user_release(holds, errlist));
6055 }
6056 
6057 /*
6058  * inputs:
6059  * zc_name              name of new filesystem or snapshot
6060  * zc_value             full name of old snapshot
6061  *
6062  * outputs:
6063  * zc_cookie            space in bytes
6064  * zc_objset_type       compressed space in bytes
6065  * zc_perm_action       uncompressed space in bytes
6066  */
6067 static int
6068 zfs_ioc_space_written(zfs_cmd_t *zc)
6069 {
6070         int error;
6071         dsl_pool_t *dp;
6072         dsl_dataset_t *new, *old;
6073 
6074         error = dsl_pool_hold(zc->zc_name, FTAG, &dp);
6075         if (error != 0)
6076                 return (error);
6077         error = dsl_dataset_hold(dp, zc->zc_name, FTAG, &new);
6078         if (error != 0) {
6079                 dsl_pool_rele(dp, FTAG);
6080                 return (error);
6081         }
6082         error = dsl_dataset_hold(dp, zc->zc_value, FTAG, &old);
6083         if (error != 0) {
6084                 dsl_dataset_rele(new, FTAG);
6085                 dsl_pool_rele(dp, FTAG);
6086                 return (error);
6087         }
6088 
6089         error = dsl_dataset_space_written(old, new, &zc->zc_cookie,
6090             &zc->zc_objset_type, &zc->zc_perm_action);
6091         dsl_dataset_rele(old, FTAG);
6092         dsl_dataset_rele(new, FTAG);
6093         dsl_pool_rele(dp, FTAG);
6094         return (error);
6095 }
6096 
6097 /*
6098  * innvl: {
6099  *     "firstsnap" -> snapshot name
6100  * }
6101  *
6102  * outnvl: {
6103  *     "used" -> space in bytes
6104  *     "compressed" -> compressed space in bytes
6105  *     "uncompressed" -> uncompressed space in bytes
6106  * }
6107  */
6108 static int
6109 zfs_ioc_space_snaps(const char *lastsnap, nvlist_t *innvl, nvlist_t *outnvl)
6110 {
6111         int error;
6112         dsl_pool_t *dp;
6113         dsl_dataset_t *new, *old;
6114         char *firstsnap;
6115         uint64_t used, comp, uncomp;
6116 
6117         if (nvlist_lookup_string(innvl, "firstsnap", &firstsnap) != 0)
6118                 return (SET_ERROR(EINVAL));
6119 
6120         error = dsl_pool_hold(lastsnap, FTAG, &dp);
6121         if (error != 0)
6122                 return (error);
6123 
6124         error = dsl_dataset_hold(dp, lastsnap, FTAG, &new);
6125         if (error == 0 && !new->ds_is_snapshot) {
6126                 dsl_dataset_rele(new, FTAG);
6127                 error = SET_ERROR(EINVAL);
6128         }
6129         if (error != 0) {
6130                 dsl_pool_rele(dp, FTAG);
6131                 return (error);
6132         }
6133         error = dsl_dataset_hold(dp, firstsnap, FTAG, &old);
6134         if (error == 0 && !old->ds_is_snapshot) {
6135                 dsl_dataset_rele(old, FTAG);
6136                 error = SET_ERROR(EINVAL);
6137         }
6138         if (error != 0) {
6139                 dsl_dataset_rele(new, FTAG);
6140                 dsl_pool_rele(dp, FTAG);
6141                 return (error);
6142         }
6143 
6144         error = dsl_dataset_space_wouldfree(old, new, &used, &comp, &uncomp);
6145         dsl_dataset_rele(old, FTAG);
6146         dsl_dataset_rele(new, FTAG);
6147         dsl_pool_rele(dp, FTAG);
6148         fnvlist_add_uint64(outnvl, "used", used);
6149         fnvlist_add_uint64(outnvl, "compressed", comp);
6150         fnvlist_add_uint64(outnvl, "uncompressed", uncomp);
6151         return (error);
6152 }
6153 
6154 static int
6155 zfs_ioc_vdev_set_props(zfs_cmd_t *zc)
6156 {
6157         nvlist_t *props;
6158         spa_t *spa;
6159         int error;
6160         uint64_t vdev_guid = zc->zc_guid;
6161 
6162         if (error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
6163             zc->zc_iflags, &props))
6164                 return (error);
6165 
6166         if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0) {
6167                 nvlist_free(props);
6168                 return (error);
6169         }
6170 
6171         error = spa_vdev_prop_set(spa, vdev_guid, props);
6172 
6173         nvlist_free(props);
6174         spa_close(spa, FTAG);
6175 
6176         return (error);
6177 }
6178 
6179 static int
6180 zfs_ioc_vdev_get_props(zfs_cmd_t *zc)
6181 {
6182         spa_t *spa;
6183         uint64_t vdev_guid = zc->zc_guid;
6184         nvlist_t *nvp = NULL;
6185         int error;
6186 
6187         if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0) {
6188                 /*
6189                  * If the pool is faulted, there may be properties we can still
6190                  * get (such as altroot and cachefile), so attempt to get them
6191                  * anyway.
6192                  */
6193                 mutex_enter(&spa_namespace_lock);
6194                 if ((spa = spa_lookup(zc->zc_name)) != NULL)
6195                         error = spa_vdev_prop_get(spa, vdev_guid, &nvp);
6196                 mutex_exit(&spa_namespace_lock);
6197         } else {
6198                 error = spa_vdev_prop_get(spa, vdev_guid, &nvp);
6199                 spa_close(spa, FTAG);
6200         }
6201 
6202         if (error == 0 && zc->zc_nvlist_dst != NULL)
6203                 error = put_nvlist(zc, nvp);
6204         else
6205                 error = EFAULT;
6206 
6207         nvlist_free(nvp);
6208         return (error);
6209 }
6210 
6211 static int
6212 zfs_ioc_cos_alloc(zfs_cmd_t *zc)
6213 {
6214         nvlist_t *props;
6215         spa_t *spa;
6216         int error;
6217 
6218         if (error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
6219             zc->zc_iflags, &props))
6220                 return (error);
6221 
6222         if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0) {
6223                 nvlist_free(props);
6224                 return (error);
6225         }
6226 
6227         error = spa_alloc_cos(spa, zc->zc_string, 0);
6228         if (!error)
6229                 error = spa_cos_prop_set(spa, zc->zc_string, props);
6230 
6231         spa_close(spa, FTAG);
6232         nvlist_free(props);
6233 
6234         return (error);
6235 }
6236 
6237 static int
6238 zfs_ioc_cos_free(zfs_cmd_t *zc)
6239 {
6240         spa_t *spa;
6241         int error = 0;
6242 
6243         if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
6244                 return (error);
6245 
6246         error = spa_free_cos(spa, zc->zc_string, zc->zc_cookie);
6247 
6248         spa_close(spa, FTAG);
6249 
6250         return (error);
6251 }
6252 
6253 static int
6254 zfs_ioc_cos_list(zfs_cmd_t *zc)
6255 {
6256         spa_t *spa;
6257         nvlist_t *nvl;
6258         int error = 0;
6259 
6260         VERIFY(nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP) == 0);
6261 
6262         if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0) {
6263                 nvlist_free(nvl);
6264                 return (error);
6265         }
6266 
6267         error = spa_list_cos(spa, nvl);
6268 
6269         if (error == 0 && zc->zc_nvlist_dst != NULL)
6270                 error = put_nvlist(zc, nvl);
6271 
6272         spa_close(spa, FTAG);
6273         nvlist_free(nvl);
6274 
6275         return (error);
6276 }
6277 
6278 static int
6279 zfs_ioc_cos_set_props(zfs_cmd_t *zc)
6280 {
6281         nvlist_t *props;
6282         spa_t *spa;
6283         cos_t *cos;
6284         const char *cosname = NULL;
6285         int error = 0;
6286 
6287         if ((zc->zc_string == NULL || zc->zc_string[0] == '\0') &&
6288             zc->zc_guid == 0)
6289                 return (SET_ERROR(EINVAL));
6290 
6291         if (error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
6292             zc->zc_iflags, &props))
6293                 return (error);
6294 
6295         if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0) {
6296                 nvlist_free(props);
6297                 return (error);
6298         }
6299 
6300         if (zc->zc_guid == 0) {
6301                 cosname = zc->zc_string;
6302         } else {
6303                 spa_cos_enter(spa);
6304                 cos = spa_lookup_cos_by_guid(spa, zc->zc_guid);
6305                 if (cos != NULL)
6306                         cosname = cos->cos_name;
6307                 else
6308                         error = SET_ERROR(ENOENT);
6309                 spa_cos_exit(spa);
6310         }
6311 
6312         if (error == 0)
6313                 error = spa_cos_prop_set(spa, cosname, props);
6314 
6315         spa_close(spa, FTAG);
6316         nvlist_free(props);
6317 
6318         return (error);
6319 }
6320 
6321 static int
6322 zfs_ioc_cos_get_props(zfs_cmd_t *zc)
6323 {
6324         spa_t *spa;
6325         cos_t *cos;
6326         nvlist_t *nvp = NULL;
6327         const char *cosname = NULL;
6328         int error = 0;
6329 
6330         if ((zc->zc_string == NULL || zc->zc_string[0] == '\0') &&
6331             zc->zc_guid == 0)
6332                 return (SET_ERROR(EINVAL));
6333 
6334         if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
6335                 return (error);
6336 
6337         if (zc->zc_guid == 0) {
6338                 cosname = zc->zc_string;
6339         } else {
6340                 spa_cos_enter(spa);
6341                 cos = spa_lookup_cos_by_guid(spa, zc->zc_guid);
6342                 if (cos != NULL)
6343                         cosname = cos->cos_name;
6344                 spa_cos_exit(spa);
6345         }
6346 
6347         if (error == 0)
6348                 error = spa_cos_prop_get(spa, cosname, &nvp);
6349 
6350         spa_close(spa, FTAG);
6351 
6352         if (error == 0 && zc->zc_nvlist_dst != NULL)
6353                 error = put_nvlist(zc, nvp);
6354         else
6355                 error = EFAULT;
6356 
6357         nvlist_free(nvp);
6358         return (error);
6359 }
6360 
6361 /*
6362  * innvl: {
6363  *     "fd" -> file descriptor to write stream to (int32)
6364  *     (optional) "fromsnap" -> full snap name to send an incremental from
6365  *     (optional) "largeblockok" -> (value ignored)
6366  *         indicates that blocks > 128KB are permitted
6367  *     (optional) "embedok" -> (value ignored)
6368  *         presence indicates DRR_WRITE_EMBEDDED records are permitted
6369  *     (optional) "compressok" -> (value ignored)
6370  *         presence indicates compressed DRR_WRITE records are permitted
6371  *     (optional) "resume_object" and "resume_offset" -> (uint64)
6372  *         if present, resume send stream from specified object and offset.
6373  * }
6374  *
6375  * outnvl is unused
6376  */
6377 /* ARGSUSED */
6378 static int
6379 zfs_ioc_send_new(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl)
6380 {
6381         int error;
6382         offset_t off;
6383         char *fromname = NULL;
6384         int fd;
6385         boolean_t largeblockok;
6386         boolean_t embedok;
6387         boolean_t compressok;
6388         uint64_t resumeobj = 0;
6389         uint64_t resumeoff = 0;
6390 
6391         error = nvlist_lookup_int32(innvl, "fd", &fd);
6392         if (error != 0)
6393                 return (SET_ERROR(EINVAL));
6394 
6395         (void) nvlist_lookup_string(innvl, "fromsnap", &fromname);
6396 
6397         largeblockok = nvlist_exists(innvl, "largeblockok");
6398         embedok = nvlist_exists(innvl, "embedok");
6399         compressok = nvlist_exists(innvl, "compressok");
6400 
6401         (void) nvlist_lookup_uint64(innvl, "resume_object", &resumeobj);
6402         (void) nvlist_lookup_uint64(innvl, "resume_offset", &resumeoff);
6403 
6404         file_t *fp = getf(fd);
6405         if (fp == NULL)
6406                 return (SET_ERROR(EBADF));
6407 
6408         off = fp->f_offset;
6409         error = dmu_send(snapname, fromname, embedok, largeblockok, compressok,
6410             fd, resumeobj, resumeoff, fp->f_vnode, &off);
6411 
6412         if (VOP_SEEK(fp->f_vnode, fp->f_offset, &off, NULL) == 0)
6413                 fp->f_offset = off;
6414         releasef(fd);
6415         return (error);
6416 }
6417 
6418 /*
6419  * Determine approximately how large a zfs send stream will be -- the number
6420  * of bytes that will be written to the fd supplied to zfs_ioc_send_new().
6421  *
6422  * innvl: {
6423  *     (optional) "from" -> full snap or bookmark name to send an incremental
6424  *                          from
6425  *     (optional) "largeblockok" -> (value ignored)
6426  *         indicates that blocks > 128KB are permitted
6427  *     (optional) "embedok" -> (value ignored)
6428  *         presence indicates DRR_WRITE_EMBEDDED records are permitted
6429  *     (optional) "compressok" -> (value ignored)
6430  *         presence indicates compressed DRR_WRITE records are permitted
6431  * }
6432  *
6433  * outnvl: {
6434  *     "space" -> bytes of space (uint64)
6435  * }
6436  */
6437 static int
6438 zfs_ioc_send_space(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl)
6439 {
6440         dsl_pool_t *dp;
6441         dsl_dataset_t *tosnap;
6442         int error;
6443         char *fromname;
6444         boolean_t compressok;
6445         uint64_t space;
6446 
6447         error = dsl_pool_hold(snapname, FTAG, &dp);
6448         if (error != 0)
6449                 return (error);
6450 
6451         error = dsl_dataset_hold(dp, snapname, FTAG, &tosnap);
6452         if (error != 0) {
6453                 dsl_pool_rele(dp, FTAG);
6454                 return (error);
6455         }
6456 
6457         compressok = nvlist_exists(innvl, "compressok");
6458 
6459         error = nvlist_lookup_string(innvl, "from", &fromname);
6460         if (error == 0) {
6461                 if (strchr(fromname, '@') != NULL) {
6462                         /*
6463                          * If from is a snapshot, hold it and use the more
6464                          * efficient dmu_send_estimate to estimate send space
6465                          * size using deadlists.
6466                          */
6467                         dsl_dataset_t *fromsnap;
6468                         error = dsl_dataset_hold(dp, fromname, FTAG, &fromsnap);
6469                         if (error != 0)
6470                                 goto out;
6471                         error = dmu_send_estimate(tosnap, fromsnap, compressok,
6472                             &space);
6473                         dsl_dataset_rele(fromsnap, FTAG);
6474                 } else if (strchr(fromname, '#') != NULL) {
6475                         /*
6476                          * If from is a bookmark, fetch the creation TXG of the
6477                          * snapshot it was created from and use that to find
6478                          * blocks that were born after it.
6479                          */
6480                         zfs_bookmark_phys_t frombm;
6481 
6482                         error = dsl_bookmark_lookup(dp, fromname, tosnap,
6483                             &frombm);
6484                         if (error != 0)
6485                                 goto out;
6486                         error = dmu_send_estimate_from_txg(tosnap,
6487                             frombm.zbm_creation_txg, compressok, &space);
6488                 } else {
6489                         /*
6490                          * from is not properly formatted as a snapshot or
6491                          * bookmark
6492                          */
6493                         error = SET_ERROR(EINVAL);
6494                         goto out;
6495                 }
6496         } else {
6497                 /*
6498                  * If estimating the size of a full send, use dmu_send_estimate.
6499                  */
6500                 error = dmu_send_estimate(tosnap, NULL, compressok, &space);
6501         }
6502 
6503         fnvlist_add_uint64(outnvl, "space", space);
6504 
6505 out:
6506         dsl_dataset_rele(tosnap, FTAG);
6507         dsl_pool_rele(dp, FTAG);
6508         return (error);
6509 }
6510 
6511 typedef struct dp_cursor_cb_arg {
6512         nvlist_t *outnvl;
6513         uint64_t offset;
6514         uint32_t count;
6515         uint32_t skip;
6516         boolean_t verbose;
6517         boolean_t snaps;
6518 } dp_cursor_cb_arg_t;
6519 
6520 /* ARGSUSED */
6521 int
6522 ds_cursor_cb(dsl_pool_t *dp, dsl_dataset_t *ds, void *arg)
6523 {
6524         int error;
6525         char dsname[MAXNAMELEN];
6526         objset_t *osp;
6527 
6528         dp_cursor_cb_arg_t *cb = (dp_cursor_cb_arg_t *)arg;
6529 
6530         dsl_dataset_name(ds, dsname);
6531         nvlist_t *nv = fnvlist_alloc();
6532 
6533         fnvlist_add_uint64(nv, zfs_prop_to_name(ZFS_PROP_GUID),
6534             dsl_dataset_phys(ds)->ds_guid);
6535 
6536         if (cb->verbose) {
6537                 uint64_t refd, avail, uobjs, aobjs;
6538                 dsl_dataset_space(ds, &refd, &avail, &uobjs, &aobjs);
6539 
6540                 fnvlist_add_uint64(nv, zfs_prop_to_name(ZFS_PROP_AVAILABLE),
6541                     avail);
6542                 fnvlist_add_uint64(nv, zfs_prop_to_name(ZFS_PROP_REFERENCED),
6543                     refd);
6544                 fnvlist_add_uint64(nv, zfs_prop_to_name(ZFS_PROP_USED),
6545                     dsl_dir_phys(ds->ds_dir)->dd_used_bytes);
6546         }
6547 
6548         error = dmu_objset_from_ds(ds, &osp);
6549 
6550         if (error == 0)
6551                 fnvlist_add_uint64(nv, zfs_prop_to_name(ZFS_PROP_TYPE),
6552                     dmu_objset_type(osp));
6553 
6554         fnvlist_add_nvlist(cb->outnvl, dsname, nv);
6555         nvlist_free(nv);
6556         return (error);
6557 }
6558 
6559 int
6560 dmu_objset_find_dp_cursor(dsl_pool_t *dp, uint64_t ddobj,
6561     int func(dsl_pool_t *, dsl_dataset_t *, void *), void *arg)
6562 {
6563         dsl_dir_t *dd;
6564         dsl_dataset_t *ds;
6565         zap_cursor_t zc;
6566         zap_attribute_t *attr;
6567         uint64_t thisobj;
6568         int error, i;
6569 
6570         dp_cursor_cb_arg_t *cb = (dp_cursor_cb_arg_t *)arg;
6571 
6572         ASSERT(dsl_pool_config_held(dp));
6573         error = dsl_dir_hold_obj(dp, ddobj, NULL, FTAG, &dd);
6574         thisobj = dsl_dir_phys(dd)->dd_head_dataset_obj;
6575 
6576         if (error != 0)
6577                 return (error);
6578 
6579         attr = kmem_alloc(sizeof (zap_attribute_t), KM_SLEEP);
6580 
6581         /* we are interrestd in filesytems and volumes */
6582         if (!cb->snaps) {
6583 
6584                 /* init the cursor at given offset */
6585                 zap_cursor_init_serialized(&zc, dp->dp_meta_objset,
6586                     dsl_dir_phys(dd)->dd_child_dir_zapobj, cb->offset);
6587 
6588 
6589                 for (i = 0; i < cb->skip; i++) {
6590                         zap_cursor_advance(&zc);
6591                         if ((zap_cursor_retrieve(&zc, attr) != 0)) {
6592                                 error = ENOENT;
6593                                 goto out;
6594                         }
6595                 }
6596 
6597                 for (i = 0; i < cb->count; i++) {
6598                         zap_cursor_advance(&zc);
6599                         if ((zap_cursor_retrieve(&zc, attr) != 0)) {
6600                                 error = ENOENT;
6601                                 goto out;
6602                         }
6603 
6604                         ASSERT3U(attr->za_integer_length, ==,
6605                             sizeof (uint64_t));
6606                         ASSERT3U(attr->za_num_integers, ==, 1);
6607                         /* recursivly walk objects skipping $MOS and $ORIGIN */
6608                         error = dmu_objset_find_dp(dp, attr->za_first_integer,
6609                             func, arg, 0);
6610                         if (error != 0)
6611                                 break;
6612                 }
6613         } else {
6614 
6615                 dsl_dataset_t *ds;
6616 
6617                 error = dsl_dataset_hold_obj(dp, thisobj, FTAG, &ds);
6618 
6619                 if (error == 0) {
6620 
6621                         dsl_dataset_rele(ds, FTAG);
6622                         zap_cursor_init_serialized(&zc, dp->dp_meta_objset,
6623                             dsl_dataset_phys(ds)->ds_snapnames_zapobj,
6624                             cb->offset);
6625 
6626                         for (i = 0; i < cb->skip; i++) {
6627                                 zap_cursor_advance(&zc);
6628                                         if ((zap_cursor_retrieve(&zc,
6629                                             attr) != 0)) {
6630                                         error = ENOENT;
6631                                         goto out;
6632                                 }
6633                         }
6634 
6635                         for (i = 0; i < cb->count; i++) {
6636                                 zap_cursor_advance(&zc);
6637                                 if ((zap_cursor_retrieve(&zc, attr) != 0)) {
6638                                         error = ENOENT;
6639                                         goto out;
6640 
6641                                 }
6642 
6643                                 ASSERT3U(attr->za_integer_length, ==,
6644                                     sizeof (uint64_t));
6645                                 ASSERT3U(attr->za_num_integers, ==, 1);
6646 
6647                                 error = dsl_dataset_hold_obj(dp,
6648                                     attr->za_first_integer, FTAG, &ds);
6649                                 if (error != 0)
6650                                         break;
6651                                 error = func(dp, ds, arg);
6652                                 dsl_dataset_rele(ds, FTAG);
6653                                 if (error != 0)
6654                                         break;
6655                         }
6656                 }
6657         }
6658 out:
6659         cb->offset = zap_cursor_serialize(&zc);
6660         zap_cursor_fini(&zc);
6661         dsl_dir_rele(dd, FTAG);
6662         kmem_free(attr, sizeof (zap_attribute_t));
6663 
6664         /* return self as the last dataset */
6665         if (error == ENOENT) {
6666                 if ((error = dsl_dataset_hold_obj(dp, thisobj, FTAG, &ds)) != 0)
6667                         return (error);
6668                 error = func(dp, ds, arg);
6669                 dsl_dataset_rele(ds, FTAG);
6670                 if (error)
6671                         return (error);
6672                 error = ENOENT;
6673         }
6674 
6675         return (error);
6676 }
6677 
6678 
6679 /*
6680  * We want to list all dataset under the given name. Optionally, we advance the
6681  * ZAP cursor "skip" times and retrieve "count" datasets. We return the offset
6682  * so the user can start the next invocation where he left off.
6683  */
6684 
6685 static int
6686 zfs_ioc_list_from_cursor(const char *name, nvlist_t *innvl, nvlist_t *outnvl)
6687 {
6688 
6689         dsl_pool_t *dp;
6690         dsl_dataset_t *ds;
6691 
6692         int error;
6693 
6694         dp_cursor_cb_arg_t cb_args;
6695 
6696         if ((strchr(name, '@') != NULL))
6697                 return (EINVAL);
6698 
6699         if ((error = dsl_pool_hold(name, FTAG, &dp)) != 0)
6700                 return (error);
6701 
6702         if ((error = dsl_dataset_hold(dp, name, FTAG, &ds)) != 0) {
6703                 dsl_pool_rele(dp, FTAG);
6704                 return (error);
6705         }
6706 
6707         (void) nvlist_lookup_uint32(innvl, "count", &cb_args.count);
6708         (void) nvlist_lookup_uint32(innvl, "skip", &cb_args.skip);
6709         (void) nvlist_lookup_uint64(innvl, "offset", &cb_args.offset);
6710         (void) nvlist_lookup_boolean_value(innvl, "verbose", &cb_args.verbose);
6711         (void) nvlist_lookup_boolean_value(innvl, "snaps", &cb_args.snaps);
6712 
6713         cb_args.outnvl = outnvl;
6714         error = dmu_objset_find_dp_cursor(dp, ds->ds_dir->dd_object,
6715             &ds_cursor_cb, &cb_args);
6716 
6717         fnvlist_add_uint64(outnvl, "offset", cb_args.offset);
6718         dsl_dataset_rele(ds, FTAG);
6719         dsl_pool_rele(dp, FTAG);
6720 
6721         return (error);
6722 }
6723 
6724 
6725 static zfs_ioc_vec_t zfs_ioc_vec[ZFS_IOC_LAST - ZFS_IOC_FIRST];
6726 
6727 static void
6728 zfs_ioctl_register_legacy(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func,
6729     zfs_secpolicy_func_t *secpolicy, zfs_ioc_namecheck_t namecheck,
6730     boolean_t log_history, zfs_ioc_poolcheck_t pool_check)
6731 {
6732         zfs_ioc_vec_t *vec = &zfs_ioc_vec[ioc - ZFS_IOC_FIRST];
6733 
6734         ASSERT3U(ioc, >=, ZFS_IOC_FIRST);
6735         ASSERT3U(ioc, <, ZFS_IOC_LAST);
6736         ASSERT3P(vec->zvec_legacy_func, ==, NULL);
6737         ASSERT3P(vec->zvec_func, ==, NULL);
6738 
6739         vec->zvec_legacy_func = func;
6740         vec->zvec_secpolicy = secpolicy;
6741         vec->zvec_namecheck = namecheck;
6742         vec->zvec_allow_log = log_history;
6743         vec->zvec_pool_check = pool_check;
6744 }
6745 
6746 /*
6747  * See the block comment at the beginning of this file for details on
6748  * each argument to this function.
6749  */
6750 static void
6751 zfs_ioctl_register(const char *name, zfs_ioc_t ioc, zfs_ioc_func_t *func,
6752     zfs_secpolicy_func_t *secpolicy, zfs_ioc_namecheck_t namecheck,
6753     zfs_ioc_poolcheck_t pool_check, boolean_t smush_outnvlist,
6754     boolean_t allow_log)
6755 {
6756         zfs_ioc_vec_t *vec = &zfs_ioc_vec[ioc - ZFS_IOC_FIRST];
6757 
6758         ASSERT3U(ioc, >=, ZFS_IOC_FIRST);
6759         ASSERT3U(ioc, <, ZFS_IOC_LAST);
6760         ASSERT3P(vec->zvec_legacy_func, ==, NULL);
6761         ASSERT3P(vec->zvec_func, ==, NULL);
6762 
6763         /* if we are logging, the name must be valid */
6764         ASSERT(!allow_log || namecheck != NO_NAME);
6765 
6766         vec->zvec_name = name;
6767         vec->zvec_func = func;
6768         vec->zvec_secpolicy = secpolicy;
6769         vec->zvec_namecheck = namecheck;
6770         vec->zvec_pool_check = pool_check;
6771         vec->zvec_smush_outnvlist = smush_outnvlist;
6772         vec->zvec_allow_log = allow_log;
6773 }
6774 
6775 static void
6776 zfs_ioctl_register_pool(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func,
6777     zfs_secpolicy_func_t *secpolicy, boolean_t log_history,
6778     zfs_ioc_poolcheck_t pool_check)
6779 {
6780         zfs_ioctl_register_legacy(ioc, func, secpolicy,
6781             POOL_NAME, log_history, pool_check);
6782 }
6783 
6784 static void
6785 zfs_ioctl_register_dataset_nolog(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func,
6786     zfs_secpolicy_func_t *secpolicy, zfs_ioc_poolcheck_t pool_check)
6787 {
6788         zfs_ioctl_register_legacy(ioc, func, secpolicy,
6789             DATASET_NAME, B_FALSE, pool_check);
6790 }
6791 
6792 static void
6793 zfs_ioctl_register_pool_modify(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func)
6794 {
6795         zfs_ioctl_register_legacy(ioc, func, zfs_secpolicy_config,
6796             POOL_NAME, B_TRUE, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY);
6797 }
6798 
6799 static void
6800 zfs_ioctl_register_pool_meta(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func,
6801     zfs_secpolicy_func_t *secpolicy)
6802 {
6803         zfs_ioctl_register_legacy(ioc, func, secpolicy,
6804             NO_NAME, B_FALSE, POOL_CHECK_NONE);
6805 }
6806 
6807 static void
6808 zfs_ioctl_register_dataset_read_secpolicy(zfs_ioc_t ioc,
6809     zfs_ioc_legacy_func_t *func, zfs_secpolicy_func_t *secpolicy)
6810 {
6811         zfs_ioctl_register_legacy(ioc, func, secpolicy,
6812             DATASET_NAME, B_FALSE, POOL_CHECK_SUSPENDED);
6813 }
6814 
6815 static void
6816 zfs_ioctl_register_dataset_read(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func)
6817 {
6818         zfs_ioctl_register_dataset_read_secpolicy(ioc, func,
6819             zfs_secpolicy_read);
6820 }
6821 
6822 static void
6823 zfs_ioctl_register_dataset_modify(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func,
6824     zfs_secpolicy_func_t *secpolicy)
6825 {
6826         zfs_ioctl_register_legacy(ioc, func, secpolicy,
6827             DATASET_NAME, B_TRUE, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY);
6828 }
6829 
6830 /*
6831  * Appearing to take poolname as a parameter is a concession to the ioctl
6832  * handler. Leading underbar for generation idea nvpair exists only on output to
6833  * avoid pool name conflict.
6834  */
6835 /* ARGSUSED */
6836 static int
6837 zfs_ioc_pool_configs_nvl(const char *poolname, nvlist_t *innvl,
6838     nvlist_t *outnvl)
6839 {
6840         nvlist_t *configs;
6841         uint64_t generation;
6842 
6843         if (nvlist_lookup_uint64(innvl, "generation", &generation) != 0)
6844                 return (SET_ERROR(EINVAL));
6845 
6846         if ((configs = spa_all_configs(&generation)) == NULL)
6847                 return (SET_ERROR(EEXIST));
6848 
6849         fnvlist_merge(outnvl, configs);
6850         nvlist_free(configs);
6851         fnvlist_add_uint64(outnvl, "_generation", generation);
6852 
6853         return (0);
6854 }
6855 
6856 /*
6857  * Ask spa for pool statistics. If we get a non-NULL config but a non-zero
6858  * return from spa, we return EAGAIN to hint to callers that we've retrieved
6859  * a config for a faulted pool. We take no arguments but declare otherwise to
6860  * suit the ioctl handler's pattern. Similar considerations apply to outnvl as a
6861  * single pointer that has to be merged with config allocated or nulled by spa.
6862  */
6863 static int
6864 zfs_ioc_pool_stats_nvl(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
6865 {
6866         nvlist_t *config;
6867         int error;
6868         int ret = 0;
6869 
6870         ASSERT3P(innvl, ==, NULL);
6871         error = spa_get_stats(poolname, &config, NULL, 0);
6872         ASSERT3U(error, !=, EAGAIN);
6873 
6874         if (config != NULL) {
6875                 fnvlist_merge(outnvl, config);
6876                 nvlist_free(config);
6877                 if (error)
6878                         ret = SET_ERROR(EAGAIN);
6879         } else {
6880                 ret = error;
6881         }
6882 
6883         return (ret);
6884 }
6885 
6886 static nvlist_t *
6887 objset_stats2nv(dmu_objset_stats_t *stat)
6888 {
6889         nvlist_t *statlist  = fnvlist_alloc();
6890 
6891         fnvlist_add_uint64(statlist, "dds_num_clones", stat->dds_num_clones);
6892         fnvlist_add_uint64(statlist, "dds_creation_txg",
6893             stat->dds_creation_txg);
6894         fnvlist_add_uint64(statlist, "dds_guid", stat->dds_guid);
6895         fnvlist_add_uint8(statlist, "dds_type", (uint8_t)stat->dds_type);
6896         fnvlist_add_uint8(statlist, "dds_is_snapshot", stat->dds_is_snapshot);
6897         fnvlist_add_uint8(statlist, "dds_inconsistent",
6898             stat->dds_inconsistent);
6899         fnvlist_add_string(statlist, "dds_origin", stat->dds_origin);
6900 
6901         return (statlist);
6902 }
6903 
6904 /* Given an objset, retrieve stats and props by adding them to the output nvl */
6905 static int
6906 objset_render(objset_t *os, nvlist_t *outnvl)
6907 {
6908         int error = 0;
6909         nvlist_t *props = NULL, *statlist = NULL;
6910         dmu_objset_stats_t stats;
6911 
6912         dmu_objset_fast_stat(os, &stats);
6913 
6914         if ((error = dsl_prop_get_all(os, &props)) == 0) {
6915                 dmu_objset_stats(os, props);
6916                 /*
6917                  * NB: zvol_get_stats() will read the objset contents,
6918                  * which we aren't supposed to do with a
6919                  * DS_MODE_USER hold, because it could be
6920                  * inconsistent.  So this is a bit of a workaround...
6921                  * XXX reading with out owning
6922                  */
6923                 if (!stats.dds_inconsistent &&
6924                     dmu_objset_type(os) == DMU_OST_ZVOL) {
6925                         error = zvol_get_stats(os, props);
6926                         if (error == EIO)
6927                                 goto out;
6928                         VERIFY0(error);
6929                 }
6930                 fnvlist_add_nvlist(outnvl, "props", props);
6931                 statlist = objset_stats2nv(&stats);
6932                 fnvlist_add_nvlist(outnvl, "stats", statlist);
6933                 nvlist_free(statlist);
6934         }
6935 
6936 out:
6937         nvlist_free(props);
6938         return (error);
6939 }
6940 
6941 /*
6942  * Note: this IOC can be called internally by other IOCs as an existence
6943  * check against race conditions. Given a dataset name, return its stats
6944  * and props. Optionally we can verify type, which simplifies things for
6945  * callers that may not want to parse stats for themselves (and may discard
6946  * the outnvl in handlers).
6947  */
6948 static int
6949 zfs_ioc_objset_stats_nvl(const char *data, nvlist_t *innvl, nvlist_t *outnvl)
6950 {
6951         objset_t *os;
6952         int error;
6953         dmu_objset_type_t checktype = DMU_OST_ANY;
6954         boolean_t gettype = B_FALSE;
6955 
6956         if (innvl != NULL) {
6957                 if (nvlist_lookup_uint8(innvl, "type", (uint8_t *)&checktype)
6958                     == 0)
6959                         gettype = B_TRUE;
6960         }
6961         if ((error = dmu_objset_hold(data, FTAG, &os)) == 0) {
6962                 error = objset_render(os, outnvl);
6963                 dmu_objset_rele(os, FTAG);
6964 
6965                 if (error == 0) {
6966                         nvlist_t *statlist;
6967                         dmu_objset_type_t type;
6968                         statlist = fnvlist_lookup_nvlist(outnvl, "stats");
6969                         type = fnvlist_lookup_uint8_t(statlist, "dds_type");
6970                         if (checktype != DMU_OST_ANY && type != checktype) {
6971                                 error = EEXIST;
6972                                 fnvlist_remove(outnvl, "stats");
6973                                 fnvlist_remove(outnvl, "props");
6974                         }
6975                         if (gettype)
6976                                 fnvlist_add_uint8(outnvl, "type", type);
6977                 }
6978         }
6979 
6980         return (error);
6981 }
6982 
6983 /*
6984  * Given a dataset name and an innvl containing a DMU cursor offset, find the
6985  * next child dataset, and return its name, stats, and props and an updated
6986  * cursor.
6987  */
6988 static int
6989 zfs_ioc_dataset_list_next_nvl(const char *data, nvlist_t *innvl,
6990     nvlist_t *outnvl)
6991 {
6992         objset_t *os;
6993         int error;
6994         uint64_t off;
6995         char *p, *nextds;
6996         char name[MAXNAMELEN];
6997         size_t len;
6998         size_t orig_len = strlen(data);
6999 
7000         if (innvl == NULL ||
7001             nvlist_lookup_uint64(innvl, "offset", &off) != 0)
7002                 return (SET_ERROR(EINVAL));
7003 
7004         (void) strlcpy(name, data, sizeof (name));
7005 top:
7006         if (error = dmu_objset_hold(name, FTAG, &os)) {
7007                 if (error == ENOENT)
7008                         error = SET_ERROR(ESRCH);
7009                 return (error);
7010         }
7011 
7012         p = strrchr(name, '/');
7013         if (p == NULL || p[1] != '\0') {
7014                 if ((len = strlcat(name, "/", sizeof (name))) >= MAXNAMELEN) {
7015                         dmu_objset_rele(os, FTAG);
7016                         return (SET_ERROR(ESRCH));
7017                 }
7018         } else {
7019                 len = orig_len;
7020         }
7021         p = name + len;
7022 
7023         do {
7024                 error = dmu_dir_list_next(os, sizeof (name) - len, p, NULL,
7025                     &off);
7026                 if (error == ENOENT)
7027                         error = ESRCH;
7028         } while (error == 0 && dataset_name_hidden(name));
7029         dmu_objset_rele(os, FTAG);
7030 
7031         /*
7032          * If it's an internal dataset (ie. with a '$' in its name),
7033          * don't try to get stats for it, otherwise we'll return ENOENT.
7034          */
7035         if (error == 0 && strchr(name, '$') == NULL) {
7036                 error = zfs_ioc_objset_stats_nvl(name, NULL, outnvl);
7037                 if (error == ENOENT) {
7038                         /* We lost a race with destroy, get the next one. */
7039                         name[orig_len] = '\0';
7040                         goto top;
7041                 }
7042                 len = strlen(name) + 1;
7043                 nextds = kmem_alloc(len, KM_SLEEP);
7044                 (void) strlcpy(nextds, name, len);
7045                 fnvlist_add_string(outnvl, "nextds", (const char *)nextds);
7046                 fnvlist_add_uint64(outnvl, "offset", off);
7047         }
7048 
7049         return (error);
7050 }
7051 
7052 /*
7053  * Given a dataset name and a DMU cursor offset, find its next snapshot, and
7054  * return its name, props, and stats and an updated cursor offset.
7055  */
7056 static int
7057 zfs_ioc_snapshot_list_next_nvl(const char *data, nvlist_t *innvl,
7058     nvlist_t *outnvl)
7059 {
7060         objset_t *os;
7061         int error;
7062         uint64_t off, obj;
7063         char name[MAXNAMELEN], *nextsnap;
7064         size_t len;
7065 
7066         if (innvl == NULL ||
7067             nvlist_lookup_uint64(innvl, "offset", &off) != 0)
7068                 return (SET_ERROR(EINVAL));
7069 
7070         error = dmu_objset_hold(data, FTAG, &os);
7071         if (error != 0) {
7072                 return (error == ENOENT ? ESRCH : error);
7073         }
7074 
7075         /*
7076          * A dataset name of maximum length cannot have any snapshots,
7077          * so exit immediately.
7078          */
7079         (void) strlcpy(name, data, sizeof (name));
7080         if ((len = strlcat(name, "@", sizeof (name))) >= MAXNAMELEN) {
7081                 dmu_objset_rele(os, FTAG);
7082                 return (SET_ERROR(ESRCH));
7083         }
7084 
7085         /* Rest of name buffer is passed so snap ID can be appended. */
7086         error = dmu_snapshot_list_next(os, sizeof (name) - len, name + len,
7087             &obj, &off, NULL);
7088 
7089         if (error == 0) {
7090                 dsl_dataset_t *ds;
7091                 dsl_pool_t *dp = os->os_dsl_dataset->ds_dir->dd_pool;
7092 
7093                 error = dsl_dataset_hold_obj(dp, obj, FTAG, &ds);
7094                 if (error == 0) {
7095                         objset_t *ossnap;
7096 
7097                         error = dmu_objset_from_ds(ds, &ossnap);
7098                         if (error == 0)
7099                                 error = objset_render(ossnap, outnvl);
7100                         dsl_dataset_rele(ds, FTAG);
7101                 }
7102         } else if (error == ENOENT) {
7103                 error = ESRCH;
7104         }
7105 
7106         dmu_objset_rele(os, FTAG);
7107 
7108         if (error == 0) {
7109                 len = strlen(name) + 1;
7110                 nextsnap = kmem_alloc(len, KM_SLEEP);
7111                 (void) strlcpy(nextsnap, name, len);
7112                 fnvlist_add_string(outnvl, "nextsnap", (const char *)nextsnap);
7113                 fnvlist_add_uint64(outnvl, "offset", off);
7114         }
7115         return (error);
7116 }
7117 
7118 static int
7119 zfs_ioc_pool_get_props_nvl(const char *poolname, nvlist_t *innvl,
7120     nvlist_t *outnvl)
7121 {
7122         spa_t *spa;
7123         int error;
7124         nvlist_t *props = NULL;
7125 
7126         ASSERT3P(innvl, ==, NULL);
7127         if ((error = spa_open(poolname, &spa, FTAG)) != 0) {
7128                 /*
7129                  * If the pool is faulted, there may be properties we can still
7130                  * get (such as altroot and cachefile), so attempt to get them
7131                  * anyway.
7132                  */
7133                 mutex_enter(&spa_namespace_lock);
7134                 if ((spa = spa_lookup(poolname)) != NULL)
7135                         error = spa_prop_get(spa, &props);
7136                 mutex_exit(&spa_namespace_lock);
7137         } else {
7138                 error = spa_prop_get(spa, &props);
7139                 spa_close(spa, FTAG);
7140         }
7141 
7142         if (props != NULL) {
7143                 fnvlist_merge(outnvl, props);
7144                 nvlist_free(props);
7145         } else {
7146                 ASSERT3S(error, !=, 0);
7147         }
7148 
7149         return (error);
7150 }
7151 
7152 /* ARGSUSED */
7153 static int
7154 zfs_ioc_check_krrp(const char *dataset, nvlist_t *innvl, nvlist_t *outnvl)
7155 {
7156         spa_t *spa;
7157         int err;
7158 
7159         /*
7160          * Here we use different way to open spa for the given pool,
7161          * because the pool maybe faulted
7162          */
7163 
7164         mutex_enter(&spa_namespace_lock);
7165         if ((spa = spa_lookup(dataset)) == NULL) {
7166                 mutex_exit(&spa_namespace_lock);
7167                 /* From KRRP side everything nice */
7168                 return (0);
7169         }
7170 
7171         spa_open_ref(spa, FTAG);
7172         mutex_exit(&spa_namespace_lock);
7173 
7174         err = autosnap_check_for_destroy(spa_get_autosnap(spa), dataset);
7175         if (err == 0)
7176                 err = ENOTSUP;
7177 
7178         mutex_enter(&spa_namespace_lock);
7179         spa_close(spa, FTAG);
7180         mutex_exit(&spa_namespace_lock);
7181 
7182         return (err != 0 ? SET_ERROR(err) : 0);
7183 }
7184 
7185 static void
7186 zfs_ioctl_init(void)
7187 {
7188         zfs_ioctl_register("bulk_list", ZFS_IOC_BULK_LIST,
7189             zfs_ioc_list_from_cursor, zfs_secpolicy_read,
7190             DATASET_NAME, POOL_CHECK_SUSPENDED, B_FALSE, B_FALSE);
7191 
7192         zfs_ioctl_register("snapshot", ZFS_IOC_SNAPSHOT,
7193             zfs_ioc_snapshot, zfs_secpolicy_snapshot, POOL_NAME,
7194             POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE);
7195 
7196         zfs_ioctl_register("log_history", ZFS_IOC_LOG_HISTORY,
7197             zfs_ioc_log_history, zfs_secpolicy_log_history, NO_NAME,
7198             POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_FALSE, B_FALSE);
7199 
7200         zfs_ioctl_register("space_snaps", ZFS_IOC_SPACE_SNAPS,
7201             zfs_ioc_space_snaps, zfs_secpolicy_read, DATASET_NAME,
7202             POOL_CHECK_SUSPENDED, B_FALSE, B_FALSE);
7203 
7204         zfs_ioctl_register("send", ZFS_IOC_SEND_NEW,
7205             zfs_ioc_send_new, zfs_secpolicy_send_new, DATASET_NAME,
7206             POOL_CHECK_SUSPENDED, B_FALSE, B_FALSE);
7207 
7208         zfs_ioctl_register("send_space", ZFS_IOC_SEND_SPACE,
7209             zfs_ioc_send_space, zfs_secpolicy_read, DATASET_NAME,
7210             POOL_CHECK_SUSPENDED, B_FALSE, B_FALSE);
7211 
7212         zfs_ioctl_register("create", ZFS_IOC_CREATE,
7213             zfs_ioc_create, zfs_secpolicy_create_clone, DATASET_NAME,
7214             POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE);
7215 
7216         zfs_ioctl_register("clone", ZFS_IOC_CLONE,
7217             zfs_ioc_clone, zfs_secpolicy_create_clone, DATASET_NAME,
7218             POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE);
7219 
7220         zfs_ioctl_register("destroy_snaps", ZFS_IOC_DESTROY_SNAPS,
7221             zfs_ioc_destroy_snaps, zfs_secpolicy_destroy_snaps, POOL_NAME,
7222             POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE);
7223 
7224         zfs_ioctl_register("check_krrp", ZFS_IOC_CHECK_KRRP,
7225             zfs_ioc_check_krrp, zfs_secpolicy_read, DATASET_NAME,
7226             POOL_CHECK_NONE, B_FALSE, B_FALSE);
7227 
7228         zfs_ioctl_register("pool_stats_nvl", ZFS_IOC_POOL_STATS_NVL,
7229             zfs_ioc_pool_stats_nvl, zfs_secpolicy_read, POOL_NAME,
7230             POOL_CHECK_NONE, B_FALSE, B_FALSE);
7231 
7232         zfs_ioctl_register("pool_configs_nvl", ZFS_IOC_POOL_CONFIGS_NVL,
7233             zfs_ioc_pool_configs_nvl, zfs_secpolicy_none, NO_NAME,
7234             POOL_CHECK_NONE, B_FALSE, B_FALSE);
7235 
7236         zfs_ioctl_register("pool_get_props_nvl", ZFS_IOC_POOL_GET_PROPS_NVL,
7237             zfs_ioc_pool_get_props_nvl, zfs_secpolicy_read, POOL_NAME,
7238             POOL_CHECK_NONE, B_FALSE, B_FALSE);
7239 
7240         zfs_ioctl_register("objset_stats_nvl", ZFS_IOC_OBJSET_STATS_NVL,
7241             zfs_ioc_objset_stats_nvl, zfs_secpolicy_read, DATASET_NAME,
7242             POOL_CHECK_SUSPENDED, B_FALSE, B_FALSE);
7243 
7244         zfs_ioctl_register("dataset_list_next_nvl",
7245             ZFS_IOC_DATASET_LIST_NEXT_NVL, zfs_ioc_dataset_list_next_nvl,
7246             zfs_secpolicy_read, DATASET_NAME, POOL_CHECK_SUSPENDED, B_FALSE,
7247             B_FALSE);
7248 
7249         zfs_ioctl_register("snapshot_list_next_nvl",
7250             ZFS_IOC_SNAPSHOT_LIST_NEXT_NVL, zfs_ioc_snapshot_list_next_nvl,
7251             zfs_secpolicy_read, DATASET_NAME, POOL_CHECK_SUSPENDED, B_FALSE,
7252             B_FALSE);
7253 
7254         zfs_ioctl_register("hold", ZFS_IOC_HOLD,
7255             zfs_ioc_hold, zfs_secpolicy_hold, POOL_NAME,
7256             POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE);
7257         zfs_ioctl_register("release", ZFS_IOC_RELEASE,
7258             zfs_ioc_release, zfs_secpolicy_release, POOL_NAME,
7259             POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE);
7260 
7261         zfs_ioctl_register("get_holds", ZFS_IOC_GET_HOLDS,
7262             zfs_ioc_get_holds, zfs_secpolicy_read, DATASET_NAME,
7263             POOL_CHECK_SUSPENDED, B_FALSE, B_FALSE);
7264 
7265         zfs_ioctl_register("rollback", ZFS_IOC_ROLLBACK,
7266             zfs_ioc_rollback, zfs_secpolicy_rollback, DATASET_NAME,
7267             POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_FALSE, B_TRUE);
7268 
7269         zfs_ioctl_register("bookmark", ZFS_IOC_BOOKMARK,
7270             zfs_ioc_bookmark, zfs_secpolicy_bookmark, POOL_NAME,
7271             POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE);
7272 
7273         zfs_ioctl_register("get_bookmarks", ZFS_IOC_GET_BOOKMARKS,
7274             zfs_ioc_get_bookmarks, zfs_secpolicy_read, DATASET_NAME,
7275             POOL_CHECK_SUSPENDED, B_FALSE, B_FALSE);
7276 
7277         zfs_ioctl_register("destroy_bookmarks", ZFS_IOC_DESTROY_BOOKMARKS,
7278             zfs_ioc_destroy_bookmarks, zfs_secpolicy_destroy_bookmarks,
7279             POOL_NAME,
7280             POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE);
7281 
7282         zfs_ioctl_register("channel_program", ZFS_IOC_CHANNEL_PROGRAM,
7283             zfs_ioc_channel_program, zfs_secpolicy_config,
7284             POOL_NAME, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE,
7285             B_TRUE);
7286 
7287         zfs_ioctl_register("set_props_mds", ZFS_IOC_SET_PROPS_MDS,
7288             zfs_ioc_set_prop_mds, zfs_secpolicy_config,
7289             POOL_NAME,
7290             POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE);
7291 
7292         /* IOCTLS that use the legacy function signature */
7293 
7294         zfs_ioctl_register_legacy(ZFS_IOC_POOL_FREEZE, zfs_ioc_pool_freeze,
7295             zfs_secpolicy_config, NO_NAME, B_FALSE, POOL_CHECK_READONLY);
7296 
7297         zfs_ioctl_register_pool(ZFS_IOC_POOL_CREATE, zfs_ioc_pool_create,
7298             zfs_secpolicy_config, B_TRUE, POOL_CHECK_NONE);
7299         zfs_ioctl_register_pool_modify(ZFS_IOC_POOL_SCAN,
7300             zfs_ioc_pool_scan);
7301         zfs_ioctl_register_pool_modify(ZFS_IOC_POOL_TRIM,
7302             zfs_ioc_pool_trim);
7303         zfs_ioctl_register_pool_modify(ZFS_IOC_POOL_UPGRADE,
7304             zfs_ioc_pool_upgrade);
7305         zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_ADD,
7306             zfs_ioc_vdev_add);
7307         zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_REMOVE,
7308             zfs_ioc_vdev_remove);
7309         zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_SET_STATE,
7310             zfs_ioc_vdev_set_state);
7311         zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_ATTACH,
7312             zfs_ioc_vdev_attach);
7313         zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_DETACH,
7314             zfs_ioc_vdev_detach);
7315         zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_SETL2ADDDT,
7316             zfs_ioc_vdev_setl2adddt);
7317         zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_SETPATH,
7318             zfs_ioc_vdev_setpath);
7319         zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_SETFRU,
7320             zfs_ioc_vdev_setfru);
7321         zfs_ioctl_register_pool_modify(ZFS_IOC_POOL_SET_PROPS,
7322             zfs_ioc_pool_set_props);
7323         zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_SPLIT,
7324             zfs_ioc_vdev_split);
7325         zfs_ioctl_register_pool_modify(ZFS_IOC_POOL_REGUID,
7326             zfs_ioc_pool_reguid);
7327 
7328         zfs_ioctl_register_pool_meta(ZFS_IOC_POOL_CONFIGS,
7329             zfs_ioc_pool_configs, zfs_secpolicy_none);
7330         zfs_ioctl_register_pool_meta(ZFS_IOC_POOL_TRYIMPORT,
7331             zfs_ioc_pool_tryimport, zfs_secpolicy_config);
7332         zfs_ioctl_register_pool_meta(ZFS_IOC_INJECT_FAULT,
7333             zfs_ioc_inject_fault, zfs_secpolicy_inject);
7334         zfs_ioctl_register_pool_meta(ZFS_IOC_CLEAR_FAULT,
7335             zfs_ioc_clear_fault, zfs_secpolicy_inject);
7336         zfs_ioctl_register_pool_meta(ZFS_IOC_INJECT_LIST_NEXT,
7337             zfs_ioc_inject_list_next, zfs_secpolicy_inject);
7338 
7339         /*
7340          * pool destroy, and export don't log the history as part of
7341          * zfsdev_ioctl, but rather zfs_ioc_pool_export
7342          * does the logging of those commands.
7343          */
7344         zfs_ioctl_register_pool(ZFS_IOC_POOL_DESTROY, zfs_ioc_pool_destroy,
7345             zfs_secpolicy_config, B_FALSE, POOL_CHECK_NONE);
7346         zfs_ioctl_register_pool(ZFS_IOC_POOL_EXPORT, zfs_ioc_pool_export,
7347             zfs_secpolicy_config, B_FALSE, POOL_CHECK_NONE);
7348 
7349         zfs_ioctl_register_pool(ZFS_IOC_POOL_STATS, zfs_ioc_pool_stats,
7350             zfs_secpolicy_read, B_FALSE, POOL_CHECK_NONE);
7351         zfs_ioctl_register_pool(ZFS_IOC_POOL_GET_PROPS, zfs_ioc_pool_get_props,
7352             zfs_secpolicy_read, B_FALSE, POOL_CHECK_NONE);
7353 
7354         zfs_ioctl_register_pool(ZFS_IOC_ERROR_LOG, zfs_ioc_error_log,
7355             zfs_secpolicy_inject, B_FALSE, POOL_CHECK_SUSPENDED);
7356         zfs_ioctl_register_pool(ZFS_IOC_DSOBJ_TO_DSNAME,
7357             zfs_ioc_dsobj_to_dsname,
7358             zfs_secpolicy_diff, B_FALSE, POOL_CHECK_SUSPENDED);
7359         zfs_ioctl_register_pool(ZFS_IOC_POOL_GET_HISTORY,
7360             zfs_ioc_pool_get_history,
7361             zfs_secpolicy_config, B_FALSE, POOL_CHECK_SUSPENDED);
7362 
7363         zfs_ioctl_register_pool(ZFS_IOC_POOL_IMPORT, zfs_ioc_pool_import,
7364             zfs_secpolicy_config, B_TRUE, POOL_CHECK_NONE);
7365 
7366         zfs_ioctl_register_pool(ZFS_IOC_CLEAR, zfs_ioc_clear,
7367             zfs_secpolicy_config, B_TRUE, POOL_CHECK_NONE);
7368         zfs_ioctl_register_pool(ZFS_IOC_POOL_REOPEN, zfs_ioc_pool_reopen,
7369             zfs_secpolicy_config, B_TRUE, POOL_CHECK_SUSPENDED);
7370         zfs_ioctl_register_pool(ZFS_IOC_VDEV_SET_PROPS, zfs_ioc_vdev_set_props,
7371             zfs_secpolicy_config, B_TRUE, POOL_CHECK_SUSPENDED);
7372         zfs_ioctl_register_pool(ZFS_IOC_VDEV_GET_PROPS, zfs_ioc_vdev_get_props,
7373             zfs_secpolicy_config, B_TRUE, POOL_CHECK_SUSPENDED);
7374         zfs_ioctl_register_pool(ZFS_IOC_COS_ALLOC, zfs_ioc_cos_alloc,
7375             zfs_secpolicy_config, B_TRUE, POOL_CHECK_SUSPENDED);
7376         zfs_ioctl_register_pool(ZFS_IOC_COS_FREE, zfs_ioc_cos_free,
7377             zfs_secpolicy_config, B_TRUE, POOL_CHECK_SUSPENDED);
7378         zfs_ioctl_register_pool(ZFS_IOC_COS_LIST, zfs_ioc_cos_list,
7379             zfs_secpolicy_config, B_TRUE, POOL_CHECK_SUSPENDED);
7380         zfs_ioctl_register_pool(ZFS_IOC_COS_SET_PROPS, zfs_ioc_cos_set_props,
7381             zfs_secpolicy_config, B_TRUE, POOL_CHECK_SUSPENDED);
7382         zfs_ioctl_register_pool(ZFS_IOC_COS_GET_PROPS, zfs_ioc_cos_get_props,
7383             zfs_secpolicy_config, B_TRUE, POOL_CHECK_SUSPENDED);
7384         zfs_ioctl_register_dataset_read(ZFS_IOC_SPACE_WRITTEN,
7385             zfs_ioc_space_written);
7386         zfs_ioctl_register_dataset_read(ZFS_IOC_OBJSET_RECVD_PROPS,
7387             zfs_ioc_objset_recvd_props);
7388         zfs_ioctl_register_dataset_read(ZFS_IOC_NEXT_OBJ,
7389             zfs_ioc_next_obj);
7390         zfs_ioctl_register_dataset_read(ZFS_IOC_GET_FSACL,
7391             zfs_ioc_get_fsacl);
7392         zfs_ioctl_register_dataset_read(ZFS_IOC_OBJSET_STATS,
7393             zfs_ioc_objset_stats);
7394         zfs_ioctl_register_dataset_read(ZFS_IOC_OBJSET_ZPLPROPS,
7395             zfs_ioc_objset_zplprops);
7396         zfs_ioctl_register_dataset_read(ZFS_IOC_DATASET_LIST_NEXT,
7397             zfs_ioc_dataset_list_next);
7398         zfs_ioctl_register_dataset_read(ZFS_IOC_SNAPSHOT_LIST_NEXT,
7399             zfs_ioc_snapshot_list_next);
7400         zfs_ioctl_register_dataset_read(ZFS_IOC_SEND_PROGRESS,
7401             zfs_ioc_send_progress);
7402 
7403         zfs_ioctl_register_dataset_read_secpolicy(ZFS_IOC_DIFF,
7404             zfs_ioc_diff, zfs_secpolicy_diff);
7405         zfs_ioctl_register_dataset_read_secpolicy(ZFS_IOC_OBJ_TO_STATS,
7406             zfs_ioc_obj_to_stats, zfs_secpolicy_diff);
7407         zfs_ioctl_register_dataset_read_secpolicy(ZFS_IOC_OBJ_TO_PATH,
7408             zfs_ioc_obj_to_path, zfs_secpolicy_diff);
7409         zfs_ioctl_register_dataset_read_secpolicy(ZFS_IOC_USERSPACE_ONE,
7410             zfs_ioc_userspace_one, zfs_secpolicy_userspace_one);
7411         zfs_ioctl_register_dataset_read_secpolicy(ZFS_IOC_USERSPACE_MANY,
7412             zfs_ioc_userspace_many, zfs_secpolicy_userspace_many);
7413         zfs_ioctl_register_dataset_read_secpolicy(ZFS_IOC_SEND,
7414             zfs_ioc_send, zfs_secpolicy_send);
7415 
7416         zfs_ioctl_register_dataset_modify(ZFS_IOC_SET_PROP, zfs_ioc_set_prop,
7417             zfs_secpolicy_none);
7418         zfs_ioctl_register_dataset_modify(ZFS_IOC_DESTROY, zfs_ioc_destroy,
7419             zfs_secpolicy_destroy);
7420         zfs_ioctl_register_dataset_modify(ZFS_IOC_RENAME, zfs_ioc_rename,
7421             zfs_secpolicy_rename);
7422         zfs_ioctl_register_dataset_modify(ZFS_IOC_RECV, zfs_ioc_recv,
7423             zfs_secpolicy_recv);
7424         zfs_ioctl_register_dataset_modify(ZFS_IOC_PROMOTE, zfs_ioc_promote,
7425             zfs_secpolicy_promote);
7426         zfs_ioctl_register_dataset_modify(ZFS_IOC_INHERIT_PROP,
7427             zfs_ioc_inherit_prop, zfs_secpolicy_inherit_prop);
7428         zfs_ioctl_register_dataset_modify(ZFS_IOC_SET_FSACL, zfs_ioc_set_fsacl,
7429             zfs_secpolicy_set_fsacl);
7430 
7431         zfs_ioctl_register_dataset_nolog(ZFS_IOC_SHARE, zfs_ioc_share,
7432             zfs_secpolicy_share, POOL_CHECK_NONE);
7433         zfs_ioctl_register_dataset_nolog(ZFS_IOC_SMB_ACL, zfs_ioc_smb_acl,
7434             zfs_secpolicy_smb_acl, POOL_CHECK_NONE);
7435         zfs_ioctl_register_dataset_nolog(ZFS_IOC_USERSPACE_UPGRADE,
7436             zfs_ioc_userspace_upgrade, zfs_secpolicy_userspace_upgrade,
7437             POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY);
7438         zfs_ioctl_register_dataset_nolog(ZFS_IOC_TMP_SNAPSHOT,
7439             zfs_ioc_tmp_snapshot, zfs_secpolicy_tmp_snapshot,
7440             POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY);
7441 }
7442 
7443 int
7444 pool_status_check(const char *name, zfs_ioc_namecheck_t type,
7445     zfs_ioc_poolcheck_t check)
7446 {
7447         spa_t *spa;
7448         int error;
7449 
7450         ASSERT(type == POOL_NAME || type == DATASET_NAME);
7451 
7452         if (check & POOL_CHECK_NONE)
7453                 return (0);
7454 
7455         error = spa_open(name, &spa, FTAG);
7456         if (error == 0) {
7457                 if ((check & POOL_CHECK_SUSPENDED) && spa_suspended(spa))
7458                         error = SET_ERROR(EAGAIN);
7459                 else if ((check & POOL_CHECK_READONLY) && !spa_writeable(spa))
7460                         error = SET_ERROR(EROFS);
7461                 spa_close(spa, FTAG);
7462         }
7463         return (error);
7464 }
7465 
7466 /*
7467  * Find a free minor number.
7468  */
7469 minor_t
7470 zfsdev_minor_alloc(void)
7471 {
7472         static minor_t last_minor;
7473         minor_t m;
7474 
7475         ASSERT(MUTEX_HELD(&zfsdev_state_lock));
7476 
7477         for (m = last_minor + 1; m != last_minor; m++) {
7478                 if (m > ZFSDEV_MAX_MINOR)
7479                         m = 1;
7480                 if (ddi_get_soft_state(zfsdev_state, m) == NULL) {
7481                         last_minor = m;
7482                         return (m);
7483                 }
7484         }
7485 
7486         return (0);
7487 }
7488 
7489 static int
7490 zfs_ctldev_init(dev_t *devp)
7491 {
7492         minor_t minor;
7493         zfs_soft_state_t *zs;
7494 
7495         ASSERT(MUTEX_HELD(&zfsdev_state_lock));
7496         ASSERT(getminor(*devp) == 0);
7497 
7498         minor = zfsdev_minor_alloc();
7499         if (minor == 0)
7500                 return (SET_ERROR(ENXIO));
7501 
7502         if (ddi_soft_state_zalloc(zfsdev_state, minor) != DDI_SUCCESS)
7503                 return (SET_ERROR(EAGAIN));
7504 
7505         *devp = makedevice(getemajor(*devp), minor);
7506 
7507         zs = ddi_get_soft_state(zfsdev_state, minor);
7508         zs->zss_type = ZSST_CTLDEV;
7509         zfs_onexit_init((zfs_onexit_t **)&zs->zss_data);
7510 
7511         return (0);
7512 }
7513 
7514 static void
7515 zfs_ctldev_destroy(zfs_onexit_t *zo, minor_t minor)
7516 {
7517         ASSERT(MUTEX_HELD(&zfsdev_state_lock));
7518 
7519         zfs_onexit_destroy(zo);
7520         ddi_soft_state_free(zfsdev_state, minor);
7521 }
7522 
7523 void *
7524 zfsdev_get_soft_state(minor_t minor, enum zfs_soft_state_type which)
7525 {
7526         zfs_soft_state_t *zp;
7527 
7528         zp = ddi_get_soft_state(zfsdev_state, minor);
7529         if (zp == NULL || zp->zss_type != which)
7530                 return (NULL);
7531 
7532         return (zp->zss_data);
7533 }
7534 
7535 static int
7536 zfsdev_open(dev_t *devp, int flag, int otyp, cred_t *cr)
7537 {
7538         int error = 0;
7539 
7540         if (getminor(*devp) != 0)
7541                 return (zvol_open(devp, flag, otyp, cr));
7542 
7543         /* This is the control device. Allocate a new minor if requested. */
7544         if (flag & FEXCL) {
7545                 mutex_enter(&zfsdev_state_lock);
7546                 error = zfs_ctldev_init(devp);
7547                 mutex_exit(&zfsdev_state_lock);
7548         }
7549 
7550         return (error);
7551 }
7552 
7553 static int
7554 zfsdev_close(dev_t dev, int flag, int otyp, cred_t *cr)
7555 {
7556         zfs_onexit_t *zo;
7557         minor_t minor = getminor(dev);
7558 
7559         if (minor == 0)
7560                 return (0);
7561 
7562         mutex_enter(&zfsdev_state_lock);
7563         zo = zfsdev_get_soft_state(minor, ZSST_CTLDEV);
7564         if (zo == NULL) {
7565                 mutex_exit(&zfsdev_state_lock);
7566                 return (zvol_close(dev, flag, otyp, cr));
7567         }
7568         zfs_ctldev_destroy(zo, minor);
7569         mutex_exit(&zfsdev_state_lock);
7570 
7571         return (0);
7572 }
7573 
7574 static int
7575 zfsdev_ioctl(dev_t dev, int cmd, intptr_t arg, int flag, cred_t *cr, int *rvalp)
7576 {
7577         zfs_cmd_t *zc;
7578         uint_t vecnum;
7579         int error, rc, len;
7580         minor_t minor = getminor(dev);
7581         const zfs_ioc_vec_t *vec;
7582         char *saved_poolname = NULL;
7583         nvlist_t *innvl = NULL;
7584 
7585         if (minor != 0 &&
7586             zfsdev_get_soft_state(minor, ZSST_CTLDEV) == NULL)
7587                 return (zvol_ioctl(dev, cmd, arg, flag, cr, rvalp));
7588 
7589         vecnum = cmd - ZFS_IOC_FIRST;
7590         ASSERT3U(getmajor(dev), ==, ddi_driver_major(zfs_dip));
7591 
7592         if (vecnum >= sizeof (zfs_ioc_vec) / sizeof (zfs_ioc_vec[0]))
7593                 return (SET_ERROR(EINVAL));
7594         vec = &zfs_ioc_vec[vecnum];
7595 
7596         zc = kmem_zalloc(sizeof (zfs_cmd_t), KM_SLEEP);
7597 
7598         error = ddi_copyin((void *)arg, zc, sizeof (zfs_cmd_t), flag);
7599         if (error != 0) {
7600                 error = SET_ERROR(EFAULT);
7601                 goto out;
7602         }
7603 
7604         zc->zc_iflags = flag & FKIOCTL;
7605         if (zc->zc_nvlist_src_size != 0) {
7606                 error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
7607                     zc->zc_iflags, &innvl);
7608                 if (error != 0)
7609                         goto out;
7610         }
7611 
7612         /*
7613          * Ensure that all pool/dataset names are valid before we pass down to
7614          * the lower layers.
7615          */
7616         zc->zc_name[sizeof (zc->zc_name) - 1] = '\0';
7617         switch (vec->zvec_namecheck) {
7618         case POOL_NAME:
7619                 if (pool_namecheck(zc->zc_name, NULL, NULL) != 0)
7620                         error = SET_ERROR(EINVAL);
7621                 else
7622                         error = pool_status_check(zc->zc_name,
7623                             vec->zvec_namecheck, vec->zvec_pool_check);
7624                 break;
7625 
7626         case DATASET_NAME:
7627                 if (dataset_namecheck(zc->zc_name, NULL, NULL) != 0)
7628                         error = SET_ERROR(EINVAL);
7629                 else
7630                         error = pool_status_check(zc->zc_name,
7631                             vec->zvec_namecheck, vec->zvec_pool_check);
7632                 break;
7633 
7634         case NO_NAME:
7635                 break;
7636         }
7637 
7638 
7639         if (error == 0)
7640                 error = vec->zvec_secpolicy(zc, innvl, cr);
7641 
7642         if (error != 0)
7643                 goto out;
7644 
7645         /* legacy ioctls can modify zc_name */
7646         len = strcspn(zc->zc_name, "/@#") + 1;
7647         saved_poolname = kmem_alloc(len, KM_SLEEP);
7648         (void) strlcpy(saved_poolname, zc->zc_name, len);
7649 
7650         if (vec->zvec_func != NULL) {
7651                 nvlist_t *outnvl;
7652                 int puterror = 0;
7653                 spa_t *spa;
7654                 nvlist_t *lognv = NULL;
7655 
7656                 ASSERT(vec->zvec_legacy_func == NULL);
7657 
7658                 /*
7659                  * Add the innvl to the lognv before calling the func,
7660                  * in case the func changes the innvl.
7661                  */
7662                 if (vec->zvec_allow_log) {
7663                         lognv = fnvlist_alloc();
7664                         fnvlist_add_string(lognv, ZPOOL_HIST_IOCTL,
7665                             vec->zvec_name);
7666                         if (!nvlist_empty(innvl)) {
7667                                 fnvlist_add_nvlist(lognv, ZPOOL_HIST_INPUT_NVL,
7668                                     innvl);
7669                         }
7670                 }
7671 
7672                 outnvl = fnvlist_alloc();
7673                 error = vec->zvec_func(zc->zc_name, innvl, outnvl);
7674 
7675                 /*
7676                  * Some commands can partially execute, modfiy state, and still
7677                  * return an error.  In these cases, attempt to record what
7678                  * was modified.
7679                  */
7680                 if ((error == 0 ||
7681                     (cmd == ZFS_IOC_CHANNEL_PROGRAM && error != EINVAL)) &&
7682                     vec->zvec_allow_log &&
7683                     spa_open(zc->zc_name, &spa, FTAG) == 0) {
7684                         if (!nvlist_empty(outnvl)) {
7685                                 fnvlist_add_nvlist(lognv, ZPOOL_HIST_OUTPUT_NVL,
7686                                     outnvl);
7687                         }
7688                         if (error != 0) {
7689                                 fnvlist_add_int64(lognv, ZPOOL_HIST_ERRNO,
7690                                     error);
7691                         }
7692                         (void) spa_history_log_nvl(spa, lognv);
7693                         spa_close(spa, FTAG);
7694                 }
7695                 fnvlist_free(lognv);
7696 
7697                 if (!nvlist_empty(outnvl) || zc->zc_nvlist_dst_size != 0) {
7698                         int smusherror = 0;
7699                         if (vec->zvec_smush_outnvlist) {
7700                                 smusherror = nvlist_smush(outnvl,
7701                                     zc->zc_nvlist_dst_size);
7702                         }
7703                         if (smusherror == 0)
7704                                 puterror = put_nvlist(zc, outnvl);
7705                 }
7706 
7707                 if (puterror != 0)
7708                         error = puterror;
7709 
7710                 nvlist_free(outnvl);
7711         } else {
7712                 error = vec->zvec_legacy_func(zc);
7713         }
7714 
7715 out:
7716         nvlist_free(innvl);
7717         rc = ddi_copyout(zc, (void *)arg, sizeof (zfs_cmd_t), flag);
7718         if (error == 0 && rc != 0)
7719                 error = SET_ERROR(EFAULT);
7720         if (error == 0 && vec->zvec_allow_log) {
7721                 char *s = tsd_get(zfs_allow_log_key);
7722                 if (s != NULL)
7723                         strfree(s);
7724                 (void) tsd_set(zfs_allow_log_key, saved_poolname);
7725         } else {
7726                 if (saved_poolname != NULL)
7727                         strfree(saved_poolname);
7728         }
7729 
7730         kmem_free(zc, sizeof (zfs_cmd_t));
7731         return (error);
7732 }
7733 
7734 static int
7735 zfs_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
7736 {
7737         if (cmd != DDI_ATTACH)
7738                 return (DDI_FAILURE);
7739 
7740         if (ddi_create_minor_node(dip, "zfs", S_IFCHR, 0,
7741             DDI_PSEUDO, 0) == DDI_FAILURE)
7742                 return (DDI_FAILURE);
7743 
7744         zfs_dip = dip;
7745 
7746         ddi_report_dev(dip);
7747 
7748         return (DDI_SUCCESS);
7749 }
7750 
7751 static int
7752 zfs_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
7753 {
7754         if (spa_busy() || zfs_busy() || zvol_busy())
7755                 return (DDI_FAILURE);
7756 
7757         if (cmd != DDI_DETACH)
7758                 return (DDI_FAILURE);
7759 
7760         zfs_dip = NULL;
7761 
7762         ddi_prop_remove_all(dip);
7763         ddi_remove_minor_node(dip, NULL);
7764 
7765         return (DDI_SUCCESS);
7766 }
7767 
7768 /*ARGSUSED*/
7769 static int
7770 zfs_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
7771 {
7772         switch (infocmd) {
7773         case DDI_INFO_DEVT2DEVINFO:
7774                 *result = zfs_dip;
7775                 return (DDI_SUCCESS);
7776 
7777         case DDI_INFO_DEVT2INSTANCE:
7778                 *result = (void *)0;
7779                 return (DDI_SUCCESS);
7780         }
7781 
7782         return (DDI_FAILURE);
7783 }
7784 
7785 /*
7786  * OK, so this is a little weird.
7787  *
7788  * /dev/zfs is the control node, i.e. minor 0.
7789  * /dev/zvol/[r]dsk/pool/dataset are the zvols, minor > 0.
7790  *
7791  * /dev/zfs has basically nothing to do except serve up ioctls,
7792  * so most of the standard driver entry points are in zvol.c.
7793  */
7794 static struct cb_ops zfs_cb_ops = {
7795         zfsdev_open,    /* open */
7796         zfsdev_close,   /* close */
7797         zvol_strategy,  /* strategy */
7798         nodev,          /* print */
7799         zvol_dump,      /* dump */
7800         zvol_read,      /* read */
7801         zvol_write,     /* write */
7802         zfsdev_ioctl,   /* ioctl */
7803         nodev,          /* devmap */
7804         nodev,          /* mmap */
7805         nodev,          /* segmap */
7806         nochpoll,       /* poll */
7807         ddi_prop_op,    /* prop_op */
7808         NULL,           /* streamtab */
7809         D_NEW | D_MP | D_64BIT,         /* Driver compatibility flag */
7810         CB_REV,         /* version */
7811         nodev,          /* async read */
7812         nodev,          /* async write */
7813 };
7814 
7815 static struct dev_ops zfs_dev_ops = {
7816         DEVO_REV,       /* version */
7817         0,              /* refcnt */
7818         zfs_info,       /* info */
7819         nulldev,        /* identify */
7820         nulldev,        /* probe */
7821         zfs_attach,     /* attach */
7822         zfs_detach,     /* detach */
7823         nodev,          /* reset */
7824         &zfs_cb_ops,        /* driver operations */
7825         NULL,           /* no bus operations */
7826         NULL,           /* power */
7827         ddi_quiesce_not_needed, /* quiesce */
7828 };
7829 
7830 static struct modldrv zfs_modldrv = {
7831         &mod_driverops,
7832         "ZFS storage pool",
7833         &zfs_dev_ops
7834 };
7835 
7836 static struct modlinkage modlinkage = {
7837         MODREV_1,
7838         (void *)&zfs_modlfs,
7839         (void *)&zfs_modldrv,
7840         NULL
7841 };
7842 
7843 static void
7844 zfs_allow_log_destroy(void *arg)
7845 {
7846         char *poolname = arg;
7847         strfree(poolname);
7848 }
7849 
7850 int
7851 _init(void)
7852 {
7853         int error;
7854 
7855         spa_init(FREAD | FWRITE);
7856         zfs_init();
7857         zvol_init();
7858         zfs_ioctl_init();
7859 
7860         if ((error = mod_install(&modlinkage)) != 0) {
7861                 zvol_fini();
7862                 zfs_fini();
7863                 spa_fini();
7864                 return (error);
7865         }
7866 
7867         tsd_create(&zfs_fsyncer_key, NULL);
7868         tsd_create(&rrw_tsd_key, rrw_tsd_destroy);
7869         tsd_create(&zfs_allow_log_key, zfs_allow_log_destroy);
7870 
7871         error = ldi_ident_from_mod(&modlinkage, &zfs_li);
7872         ASSERT(error == 0);
7873         mutex_init(&zfs_share_lock, NULL, MUTEX_DEFAULT, NULL);
7874 
7875         if (sysevent_evc_bind(ZFS_EVENT_CHANNEL, &zfs_channel,
7876             EVCH_HOLD_PEND | EVCH_CREAT) != 0)
7877                 cmn_err(CE_NOTE, "Failed to bind to zfs event channel");
7878 
7879         return (0);
7880 }
7881 
7882 int
7883 _fini(void)
7884 {
7885         int error;
7886 
7887         if (spa_busy() || zfs_busy() || zvol_busy() || zio_injection_enabled)
7888                 return (SET_ERROR(EBUSY));
7889 
7890         if ((error = mod_remove(&modlinkage)) != 0)
7891                 return (error);
7892 
7893         zvol_fini();
7894         zfs_fini();
7895         spa_fini();
7896         if (zfs_nfsshare_inited)
7897                 (void) ddi_modclose(nfs_mod);
7898         if (zfs_smbshare_inited)
7899                 (void) ddi_modclose(smbsrv_mod);
7900         if (zfs_nfsshare_inited || zfs_smbshare_inited)
7901                 (void) ddi_modclose(sharefs_mod);
7902 
7903         tsd_destroy(&zfs_fsyncer_key);
7904         ldi_ident_release(zfs_li);
7905         zfs_li = NULL;
7906         mutex_destroy(&zfs_share_lock);
7907 
7908         if (zfs_channel)
7909                 (void) sysevent_evc_unbind(zfs_channel);
7910 
7911         return (error);
7912 }
7913 
7914 int
7915 _info(struct modinfo *modinfop)
7916 {
7917         return (mod_info(&modlinkage, modinfop));
7918 }