1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  24  * Copyright (c) 2011-2012 Pawel Jakub Dawidek. All rights reserved.
  25  * Portions Copyright 2011 Martin Matuska
  26  * Copyright 2015, OmniTI Computer Consulting, Inc. All rights reserved.
  27  * Copyright 2016 Nexenta Systems, Inc.  All rights reserved.
  28  * Copyright 2019 Joyent, Inc.
  29  * Copyright (c) 2011, 2017 by Delphix. All rights reserved.
  30  * Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
  31  * Copyright (c) 2013 Steven Hartland. All rights reserved.
  32  * Copyright (c) 2014 Integros [integros.com]
  33  * Copyright 2016 Toomas Soome <tsoome@me.com>
  34  * Copyright (c) 2017, loli10K <ezomori.nozomu@gmail.com>. All rights reserved.
  35  * Copyright 2017 RackTop Systems.
  36  * Copyright (c) 2017, Datto, Inc. All rights reserved.
  37  */
  38 
  39 /*
  40  * ZFS ioctls.
  41  *
  42  * This file handles the ioctls to /dev/zfs, used for configuring ZFS storage
  43  * pools and filesystems, e.g. with /sbin/zfs and /sbin/zpool.
  44  *
  45  * There are two ways that we handle ioctls: the legacy way where almost
  46  * all of the logic is in the ioctl callback, and the new way where most
  47  * of the marshalling is handled in the common entry point, zfsdev_ioctl().
  48  *
  49  * Non-legacy ioctls should be registered by calling
  50  * zfs_ioctl_register() from zfs_ioctl_init().  The ioctl is invoked
  51  * from userland by lzc_ioctl().
  52  *
  53  * The registration arguments are as follows:
  54  *
  55  * const char *name
  56  *   The name of the ioctl.  This is used for history logging.  If the
  57  *   ioctl returns successfully (the callback returns 0), and allow_log
  58  *   is true, then a history log entry will be recorded with the input &
  59  *   output nvlists.  The log entry can be printed with "zpool history -i".
  60  *
  61  * zfs_ioc_t ioc
  62  *   The ioctl request number, which userland will pass to ioctl(2).
  63  *   The ioctl numbers can change from release to release, because
  64  *   the caller (libzfs) must be matched to the kernel.
  65  *
  66  * zfs_secpolicy_func_t *secpolicy
  67  *   This function will be called before the zfs_ioc_func_t, to
  68  *   determine if this operation is permitted.  It should return EPERM
  69  *   on failure, and 0 on success.  Checks include determining if the
  70  *   dataset is visible in this zone, and if the user has either all
  71  *   zfs privileges in the zone (SYS_MOUNT), or has been granted permission
  72  *   to do this operation on this dataset with "zfs allow".
  73  *
  74  * zfs_ioc_namecheck_t namecheck
  75  *   This specifies what to expect in the zfs_cmd_t:zc_name -- a pool
  76  *   name, a dataset name, or nothing.  If the name is not well-formed,
  77  *   the ioctl will fail and the callback will not be called.
  78  *   Therefore, the callback can assume that the name is well-formed
  79  *   (e.g. is null-terminated, doesn't have more than one '@' character,
  80  *   doesn't have invalid characters).
  81  *
  82  * zfs_ioc_poolcheck_t pool_check
  83  *   This specifies requirements on the pool state.  If the pool does
  84  *   not meet them (is suspended or is readonly), the ioctl will fail
  85  *   and the callback will not be called.  If any checks are specified
  86  *   (i.e. it is not POOL_CHECK_NONE), namecheck must not be NO_NAME.
  87  *   Multiple checks can be or-ed together (e.g. POOL_CHECK_SUSPENDED |
  88  *   POOL_CHECK_READONLY).
  89  *
  90  * boolean_t smush_outnvlist
  91  *   If smush_outnvlist is true, then the output is presumed to be a
  92  *   list of errors, and it will be "smushed" down to fit into the
  93  *   caller's buffer, by removing some entries and replacing them with a
  94  *   single "N_MORE_ERRORS" entry indicating how many were removed.  See
  95  *   nvlist_smush() for details.  If smush_outnvlist is false, and the
  96  *   outnvlist does not fit into the userland-provided buffer, then the
  97  *   ioctl will fail with ENOMEM.
  98  *
  99  * zfs_ioc_func_t *func
 100  *   The callback function that will perform the operation.
 101  *
 102  *   The callback should return 0 on success, or an error number on
 103  *   failure.  If the function fails, the userland ioctl will return -1,
 104  *   and errno will be set to the callback's return value.  The callback
 105  *   will be called with the following arguments:
 106  *
 107  *   const char *name
 108  *     The name of the pool or dataset to operate on, from
 109  *     zfs_cmd_t:zc_name.  The 'namecheck' argument specifies the
 110  *     expected type (pool, dataset, or none).
 111  *
 112  *   nvlist_t *innvl
 113  *     The input nvlist, deserialized from zfs_cmd_t:zc_nvlist_src.  Or
 114  *     NULL if no input nvlist was provided.  Changes to this nvlist are
 115  *     ignored.  If the input nvlist could not be deserialized, the
 116  *     ioctl will fail and the callback will not be called.
 117  *
 118  *   nvlist_t *outnvl
 119  *     The output nvlist, initially empty.  The callback can fill it in,
 120  *     and it will be returned to userland by serializing it into
 121  *     zfs_cmd_t:zc_nvlist_dst.  If it is non-empty, and serialization
 122  *     fails (e.g. because the caller didn't supply a large enough
 123  *     buffer), then the overall ioctl will fail.  See the
 124  *     'smush_nvlist' argument above for additional behaviors.
 125  *
 126  *     There are two typical uses of the output nvlist:
 127  *       - To return state, e.g. property values.  In this case,
 128  *         smush_outnvlist should be false.  If the buffer was not large
 129  *         enough, the caller will reallocate a larger buffer and try
 130  *         the ioctl again.
 131  *
 132  *       - To return multiple errors from an ioctl which makes on-disk
 133  *         changes.  In this case, smush_outnvlist should be true.
 134  *         Ioctls which make on-disk modifications should generally not
 135  *         use the outnvl if they succeed, because the caller can not
 136  *         distinguish between the operation failing, and
 137  *         deserialization failing.
 138  */
 139 
 140 #include <sys/types.h>
 141 #include <sys/param.h>
 142 #include <sys/errno.h>
 143 #include <sys/uio.h>
 144 #include <sys/buf.h>
 145 #include <sys/modctl.h>
 146 #include <sys/open.h>
 147 #include <sys/file.h>
 148 #include <sys/kmem.h>
 149 #include <sys/conf.h>
 150 #include <sys/cmn_err.h>
 151 #include <sys/stat.h>
 152 #include <sys/zfs_ioctl.h>
 153 #include <sys/zfs_vfsops.h>
 154 #include <sys/zfs_znode.h>
 155 #include <sys/zap.h>
 156 #include <sys/spa.h>
 157 #include <sys/spa_impl.h>
 158 #include <sys/vdev.h>
 159 #include <sys/priv_impl.h>
 160 #include <sys/dmu.h>
 161 #include <sys/dsl_dir.h>
 162 #include <sys/dsl_dataset.h>
 163 #include <sys/dsl_prop.h>
 164 #include <sys/dsl_deleg.h>
 165 #include <sys/dmu_objset.h>
 166 #include <sys/dmu_impl.h>
 167 #include <sys/dmu_tx.h>
 168 #include <sys/ddi.h>
 169 #include <sys/sunddi.h>
 170 #include <sys/sunldi.h>
 171 #include <sys/policy.h>
 172 #include <sys/zone.h>
 173 #include <sys/nvpair.h>
 174 #include <sys/pathname.h>
 175 #include <sys/mount.h>
 176 #include <sys/sdt.h>
 177 #include <sys/fs/zfs.h>
 178 #include <sys/zfs_ctldir.h>
 179 #include <sys/zfs_dir.h>
 180 #include <sys/zfs_onexit.h>
 181 #include <sys/zvol.h>
 182 #include <sys/dsl_scan.h>
 183 #include <sharefs/share.h>
 184 #include <sys/dmu_objset.h>
 185 #include <sys/dmu_recv.h>
 186 #include <sys/dmu_send.h>
 187 #include <sys/dsl_destroy.h>
 188 #include <sys/dsl_bookmark.h>
 189 #include <sys/dsl_userhold.h>
 190 #include <sys/zfeature.h>
 191 #include <sys/zcp.h>
 192 #include <sys/zio_checksum.h>
 193 #include <sys/vdev_removal.h>
 194 #include <sys/vdev_impl.h>
 195 #include <sys/vdev_initialize.h>
 196 #include <sys/vdev_trim.h>
 197 #include <sys/dsl_crypt.h>
 198 
 199 #include "zfs_namecheck.h"
 200 #include "zfs_prop.h"
 201 #include "zfs_deleg.h"
 202 #include "zfs_comutil.h"
 203 
 204 #include "lua.h"
 205 #include "lauxlib.h"
 206 
 207 extern struct modlfs zfs_modlfs;
 208 
 209 extern void zfs_init(void);
 210 extern void zfs_fini(void);
 211 
 212 ldi_ident_t zfs_li = NULL;
 213 dev_info_t *zfs_dip;
 214 
 215 uint_t zfs_fsyncer_key;
 216 extern uint_t rrw_tsd_key;
 217 static uint_t zfs_allow_log_key;
 218 
 219 typedef int zfs_ioc_legacy_func_t(zfs_cmd_t *);
 220 typedef int zfs_ioc_func_t(const char *, nvlist_t *, nvlist_t *);
 221 typedef int zfs_secpolicy_func_t(zfs_cmd_t *, nvlist_t *, cred_t *);
 222 
 223 typedef enum {
 224         NO_NAME,
 225         POOL_NAME,
 226         DATASET_NAME
 227 } zfs_ioc_namecheck_t;
 228 
 229 typedef enum {
 230         POOL_CHECK_NONE         = 1 << 0,
 231         POOL_CHECK_SUSPENDED    = 1 << 1,
 232         POOL_CHECK_READONLY     = 1 << 2,
 233 } zfs_ioc_poolcheck_t;
 234 
 235 typedef struct zfs_ioc_vec {
 236         zfs_ioc_legacy_func_t   *zvec_legacy_func;
 237         zfs_ioc_func_t          *zvec_func;
 238         zfs_secpolicy_func_t    *zvec_secpolicy;
 239         zfs_ioc_namecheck_t     zvec_namecheck;
 240         boolean_t               zvec_allow_log;
 241         zfs_ioc_poolcheck_t     zvec_pool_check;
 242         boolean_t               zvec_smush_outnvlist;
 243         const char              *zvec_name;
 244 } zfs_ioc_vec_t;
 245 
 246 /* This array is indexed by zfs_userquota_prop_t */
 247 static const char *userquota_perms[] = {
 248         ZFS_DELEG_PERM_USERUSED,
 249         ZFS_DELEG_PERM_USERQUOTA,
 250         ZFS_DELEG_PERM_GROUPUSED,
 251         ZFS_DELEG_PERM_GROUPQUOTA,
 252         ZFS_DELEG_PERM_USEROBJUSED,
 253         ZFS_DELEG_PERM_USEROBJQUOTA,
 254         ZFS_DELEG_PERM_GROUPOBJUSED,
 255         ZFS_DELEG_PERM_GROUPOBJQUOTA,
 256         ZFS_DELEG_PERM_PROJECTUSED,
 257         ZFS_DELEG_PERM_PROJECTQUOTA,
 258         ZFS_DELEG_PERM_PROJECTOBJUSED,
 259         ZFS_DELEG_PERM_PROJECTOBJQUOTA,
 260 };
 261 
 262 static int zfs_ioc_userspace_upgrade(zfs_cmd_t *zc);
 263 static int zfs_ioc_id_quota_upgrade(zfs_cmd_t *zc);
 264 static int zfs_check_settable(const char *name, nvpair_t *property,
 265     cred_t *cr);
 266 static int zfs_check_clearable(char *dataset, nvlist_t *props,
 267     nvlist_t **errors);
 268 static int zfs_fill_zplprops_root(uint64_t, nvlist_t *, nvlist_t *,
 269     boolean_t *);
 270 int zfs_set_prop_nvlist(const char *, zprop_source_t, nvlist_t *, nvlist_t *);
 271 static int get_nvlist(uint64_t nvl, uint64_t size, int iflag, nvlist_t **nvp);
 272 
 273 static int zfs_prop_activate_feature(spa_t *spa, spa_feature_t feature);
 274 
 275 /* _NOTE(PRINTFLIKE(4)) - this is printf-like, but lint is too whiney */
 276 void
 277 __dprintf(const char *file, const char *func, int line, const char *fmt, ...)
 278 {
 279         const char *newfile;
 280         char buf[512];
 281         va_list adx;
 282 
 283         /*
 284          * Get rid of annoying "../common/" prefix to filename.
 285          */
 286         newfile = strrchr(file, '/');
 287         if (newfile != NULL) {
 288                 newfile = newfile + 1; /* Get rid of leading / */
 289         } else {
 290                 newfile = file;
 291         }
 292 
 293         va_start(adx, fmt);
 294         (void) vsnprintf(buf, sizeof (buf), fmt, adx);
 295         va_end(adx);
 296 
 297         /*
 298          * To get this data, use the zfs-dprintf probe as so:
 299          * dtrace -q -n 'zfs-dprintf \
 300          *      /stringof(arg0) == "dbuf.c"/ \
 301          *      {printf("%s: %s", stringof(arg1), stringof(arg3))}'
 302          * arg0 = file name
 303          * arg1 = function name
 304          * arg2 = line number
 305          * arg3 = message
 306          */
 307         DTRACE_PROBE4(zfs__dprintf,
 308             char *, newfile, char *, func, int, line, char *, buf);
 309 }
 310 
 311 static void
 312 history_str_free(char *buf)
 313 {
 314         kmem_free(buf, HIS_MAX_RECORD_LEN);
 315 }
 316 
 317 static char *
 318 history_str_get(zfs_cmd_t *zc)
 319 {
 320         char *buf;
 321 
 322         if (zc->zc_history == 0)
 323                 return (NULL);
 324 
 325         buf = kmem_alloc(HIS_MAX_RECORD_LEN, KM_SLEEP);
 326         if (copyinstr((void *)(uintptr_t)zc->zc_history,
 327             buf, HIS_MAX_RECORD_LEN, NULL) != 0) {
 328                 history_str_free(buf);
 329                 return (NULL);
 330         }
 331 
 332         buf[HIS_MAX_RECORD_LEN -1] = '\0';
 333 
 334         return (buf);
 335 }
 336 
 337 /*
 338  * Check to see if the named dataset is currently defined as bootable
 339  */
 340 static boolean_t
 341 zfs_is_bootfs(const char *name)
 342 {
 343         objset_t *os;
 344 
 345         if (dmu_objset_hold(name, FTAG, &os) == 0) {
 346                 boolean_t ret;
 347                 ret = (dmu_objset_id(os) == spa_bootfs(dmu_objset_spa(os)));
 348                 dmu_objset_rele(os, FTAG);
 349                 return (ret);
 350         }
 351         return (B_FALSE);
 352 }
 353 
 354 /*
 355  * Return non-zero if the spa version is less than requested version.
 356  */
 357 static int
 358 zfs_earlier_version(const char *name, int version)
 359 {
 360         spa_t *spa;
 361 
 362         if (spa_open(name, &spa, FTAG) == 0) {
 363                 if (spa_version(spa) < version) {
 364                         spa_close(spa, FTAG);
 365                         return (1);
 366                 }
 367                 spa_close(spa, FTAG);
 368         }
 369         return (0);
 370 }
 371 
 372 /*
 373  * Return TRUE if the ZPL version is less than requested version.
 374  */
 375 static boolean_t
 376 zpl_earlier_version(const char *name, int version)
 377 {
 378         objset_t *os;
 379         boolean_t rc = B_TRUE;
 380 
 381         if (dmu_objset_hold(name, FTAG, &os) == 0) {
 382                 uint64_t zplversion;
 383 
 384                 if (dmu_objset_type(os) != DMU_OST_ZFS) {
 385                         dmu_objset_rele(os, FTAG);
 386                         return (B_TRUE);
 387                 }
 388                 /* XXX reading from non-owned objset */
 389                 if (zfs_get_zplprop(os, ZFS_PROP_VERSION, &zplversion) == 0)
 390                         rc = zplversion < version;
 391                 dmu_objset_rele(os, FTAG);
 392         }
 393         return (rc);
 394 }
 395 
 396 static void
 397 zfs_log_history(zfs_cmd_t *zc)
 398 {
 399         spa_t *spa;
 400         char *buf;
 401 
 402         if ((buf = history_str_get(zc)) == NULL)
 403                 return;
 404 
 405         if (spa_open(zc->zc_name, &spa, FTAG) == 0) {
 406                 if (spa_version(spa) >= SPA_VERSION_ZPOOL_HISTORY)
 407                         (void) spa_history_log(spa, buf);
 408                 spa_close(spa, FTAG);
 409         }
 410         history_str_free(buf);
 411 }
 412 
 413 /*
 414  * Policy for top-level read operations (list pools).  Requires no privileges,
 415  * and can be used in the local zone, as there is no associated dataset.
 416  */
 417 /* ARGSUSED */
 418 static int
 419 zfs_secpolicy_none(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
 420 {
 421         return (0);
 422 }
 423 
 424 /*
 425  * Policy for dataset read operations (list children, get statistics).  Requires
 426  * no privileges, but must be visible in the local zone.
 427  */
 428 /* ARGSUSED */
 429 static int
 430 zfs_secpolicy_read(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
 431 {
 432         if (INGLOBALZONE(curproc) ||
 433             zone_dataset_visible(zc->zc_name, NULL))
 434                 return (0);
 435 
 436         return (SET_ERROR(ENOENT));
 437 }
 438 
 439 static int
 440 zfs_dozonecheck_impl(const char *dataset, uint64_t zoned, cred_t *cr)
 441 {
 442         int writable = 1;
 443 
 444         /*
 445          * The dataset must be visible by this zone -- check this first
 446          * so they don't see EPERM on something they shouldn't know about.
 447          */
 448         if (!INGLOBALZONE(curproc) &&
 449             !zone_dataset_visible(dataset, &writable))
 450                 return (SET_ERROR(ENOENT));
 451 
 452         if (INGLOBALZONE(curproc)) {
 453                 /*
 454                  * If the fs is zoned, only root can access it from the
 455                  * global zone.
 456                  */
 457                 if (secpolicy_zfs(cr) && zoned)
 458                         return (SET_ERROR(EPERM));
 459         } else {
 460                 /*
 461                  * If we are in a local zone, the 'zoned' property must be set.
 462                  */
 463                 if (!zoned)
 464                         return (SET_ERROR(EPERM));
 465 
 466                 /* must be writable by this zone */
 467                 if (!writable)
 468                         return (SET_ERROR(EPERM));
 469         }
 470         return (0);
 471 }
 472 
 473 static int
 474 zfs_dozonecheck(const char *dataset, cred_t *cr)
 475 {
 476         uint64_t zoned;
 477 
 478         if (dsl_prop_get_integer(dataset, "zoned", &zoned, NULL))
 479                 return (SET_ERROR(ENOENT));
 480 
 481         return (zfs_dozonecheck_impl(dataset, zoned, cr));
 482 }
 483 
 484 static int
 485 zfs_dozonecheck_ds(const char *dataset, dsl_dataset_t *ds, cred_t *cr)
 486 {
 487         uint64_t zoned;
 488 
 489         if (dsl_prop_get_int_ds(ds, "zoned", &zoned))
 490                 return (SET_ERROR(ENOENT));
 491 
 492         return (zfs_dozonecheck_impl(dataset, zoned, cr));
 493 }
 494 
 495 static int
 496 zfs_secpolicy_write_perms_ds(const char *name, dsl_dataset_t *ds,
 497     const char *perm, cred_t *cr)
 498 {
 499         int error;
 500 
 501         error = zfs_dozonecheck_ds(name, ds, cr);
 502         if (error == 0) {
 503                 error = secpolicy_zfs(cr);
 504                 if (error != 0)
 505                         error = dsl_deleg_access_impl(ds, perm, cr);
 506         }
 507         return (error);
 508 }
 509 
 510 static int
 511 zfs_secpolicy_write_perms(const char *name, const char *perm, cred_t *cr)
 512 {
 513         int error;
 514         dsl_dataset_t *ds;
 515         dsl_pool_t *dp;
 516 
 517         /*
 518          * First do a quick check for root in the global zone, which
 519          * is allowed to do all write_perms.  This ensures that zfs_ioc_*
 520          * will get to handle nonexistent datasets.
 521          */
 522         if (INGLOBALZONE(curproc) && secpolicy_zfs(cr) == 0)
 523                 return (0);
 524 
 525         error = dsl_pool_hold(name, FTAG, &dp);
 526         if (error != 0)
 527                 return (error);
 528 
 529         error = dsl_dataset_hold(dp, name, FTAG, &ds);
 530         if (error != 0) {
 531                 dsl_pool_rele(dp, FTAG);
 532                 return (error);
 533         }
 534 
 535         error = zfs_secpolicy_write_perms_ds(name, ds, perm, cr);
 536 
 537         dsl_dataset_rele(ds, FTAG);
 538         dsl_pool_rele(dp, FTAG);
 539         return (error);
 540 }
 541 
 542 /*
 543  * Policy for setting the security label property.
 544  *
 545  * Returns 0 for success, non-zero for access and other errors.
 546  */
 547 static int
 548 zfs_set_slabel_policy(const char *name, char *strval, cred_t *cr)
 549 {
 550         char            ds_hexsl[MAXNAMELEN];
 551         bslabel_t       ds_sl, new_sl;
 552         boolean_t       new_default = FALSE;
 553         uint64_t        zoned;
 554         int             needed_priv = -1;
 555         int             error;
 556 
 557         /* First get the existing dataset label. */
 558         error = dsl_prop_get(name, zfs_prop_to_name(ZFS_PROP_MLSLABEL),
 559             1, sizeof (ds_hexsl), &ds_hexsl, NULL);
 560         if (error != 0)
 561                 return (SET_ERROR(EPERM));
 562 
 563         if (strcasecmp(strval, ZFS_MLSLABEL_DEFAULT) == 0)
 564                 new_default = TRUE;
 565 
 566         /* The label must be translatable */
 567         if (!new_default && (hexstr_to_label(strval, &new_sl) != 0))
 568                 return (SET_ERROR(EINVAL));
 569 
 570         /*
 571          * In a non-global zone, disallow attempts to set a label that
 572          * doesn't match that of the zone; otherwise no other checks
 573          * are needed.
 574          */
 575         if (!INGLOBALZONE(curproc)) {
 576                 if (new_default || !blequal(&new_sl, CR_SL(CRED())))
 577                         return (SET_ERROR(EPERM));
 578                 return (0);
 579         }
 580 
 581         /*
 582          * For global-zone datasets (i.e., those whose zoned property is
 583          * "off", verify that the specified new label is valid for the
 584          * global zone.
 585          */
 586         if (dsl_prop_get_integer(name,
 587             zfs_prop_to_name(ZFS_PROP_ZONED), &zoned, NULL))
 588                 return (SET_ERROR(EPERM));
 589         if (!zoned) {
 590                 if (zfs_check_global_label(name, strval) != 0)
 591                         return (SET_ERROR(EPERM));
 592         }
 593 
 594         /*
 595          * If the existing dataset label is nondefault, check if the
 596          * dataset is mounted (label cannot be changed while mounted).
 597          * Get the zfsvfs; if there isn't one, then the dataset isn't
 598          * mounted (or isn't a dataset, doesn't exist, ...).
 599          */
 600         if (strcasecmp(ds_hexsl, ZFS_MLSLABEL_DEFAULT) != 0) {
 601                 objset_t *os;
 602                 static char *setsl_tag = "setsl_tag";
 603 
 604                 /*
 605                  * Try to own the dataset; abort if there is any error,
 606                  * (e.g., already mounted, in use, or other error).
 607                  */
 608                 error = dmu_objset_own(name, DMU_OST_ZFS, B_TRUE, B_TRUE,
 609                     setsl_tag, &os);
 610                 if (error != 0)
 611                         return (SET_ERROR(EPERM));
 612 
 613                 dmu_objset_disown(os, B_TRUE, setsl_tag);
 614 
 615                 if (new_default) {
 616                         needed_priv = PRIV_FILE_DOWNGRADE_SL;
 617                         goto out_check;
 618                 }
 619 
 620                 if (hexstr_to_label(strval, &new_sl) != 0)
 621                         return (SET_ERROR(EPERM));
 622 
 623                 if (blstrictdom(&ds_sl, &new_sl))
 624                         needed_priv = PRIV_FILE_DOWNGRADE_SL;
 625                 else if (blstrictdom(&new_sl, &ds_sl))
 626                         needed_priv = PRIV_FILE_UPGRADE_SL;
 627         } else {
 628                 /* dataset currently has a default label */
 629                 if (!new_default)
 630                         needed_priv = PRIV_FILE_UPGRADE_SL;
 631         }
 632 
 633 out_check:
 634         if (needed_priv != -1)
 635                 return (PRIV_POLICY(cr, needed_priv, B_FALSE, EPERM, NULL));
 636         return (0);
 637 }
 638 
 639 static int
 640 zfs_secpolicy_setprop(const char *dsname, zfs_prop_t prop, nvpair_t *propval,
 641     cred_t *cr)
 642 {
 643         char *strval;
 644 
 645         /*
 646          * Check permissions for special properties.
 647          */
 648         switch (prop) {
 649         case ZFS_PROP_ZONED:
 650                 /*
 651                  * Disallow setting of 'zoned' from within a local zone.
 652                  */
 653                 if (!INGLOBALZONE(curproc))
 654                         return (SET_ERROR(EPERM));
 655                 break;
 656 
 657         case ZFS_PROP_QUOTA:
 658         case ZFS_PROP_FILESYSTEM_LIMIT:
 659         case ZFS_PROP_SNAPSHOT_LIMIT:
 660                 if (!INGLOBALZONE(curproc)) {
 661                         uint64_t zoned;
 662                         char setpoint[ZFS_MAX_DATASET_NAME_LEN];
 663                         /*
 664                          * Unprivileged users are allowed to modify the
 665                          * limit on things *under* (ie. contained by)
 666                          * the thing they own.
 667                          */
 668                         if (dsl_prop_get_integer(dsname, "zoned", &zoned,
 669                             setpoint))
 670                                 return (SET_ERROR(EPERM));
 671                         if (!zoned || strlen(dsname) <= strlen(setpoint))
 672                                 return (SET_ERROR(EPERM));
 673                 }
 674                 break;
 675 
 676         case ZFS_PROP_MLSLABEL:
 677                 if (!is_system_labeled())
 678                         return (SET_ERROR(EPERM));
 679 
 680                 if (nvpair_value_string(propval, &strval) == 0) {
 681                         int err;
 682 
 683                         err = zfs_set_slabel_policy(dsname, strval, CRED());
 684                         if (err != 0)
 685                                 return (err);
 686                 }
 687                 break;
 688         }
 689 
 690         return (zfs_secpolicy_write_perms(dsname, zfs_prop_to_name(prop), cr));
 691 }
 692 
 693 /* ARGSUSED */
 694 static int
 695 zfs_secpolicy_set_fsacl(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
 696 {
 697         int error;
 698 
 699         error = zfs_dozonecheck(zc->zc_name, cr);
 700         if (error != 0)
 701                 return (error);
 702 
 703         /*
 704          * permission to set permissions will be evaluated later in
 705          * dsl_deleg_can_allow()
 706          */
 707         return (0);
 708 }
 709 
 710 /* ARGSUSED */
 711 static int
 712 zfs_secpolicy_rollback(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
 713 {
 714         return (zfs_secpolicy_write_perms(zc->zc_name,
 715             ZFS_DELEG_PERM_ROLLBACK, cr));
 716 }
 717 
 718 /* ARGSUSED */
 719 static int
 720 zfs_secpolicy_send(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
 721 {
 722         dsl_pool_t *dp;
 723         dsl_dataset_t *ds;
 724         char *cp;
 725         int error;
 726 
 727         /*
 728          * Generate the current snapshot name from the given objsetid, then
 729          * use that name for the secpolicy/zone checks.
 730          */
 731         cp = strchr(zc->zc_name, '@');
 732         if (cp == NULL)
 733                 return (SET_ERROR(EINVAL));
 734         error = dsl_pool_hold(zc->zc_name, FTAG, &dp);
 735         if (error != 0)
 736                 return (error);
 737 
 738         error = dsl_dataset_hold_obj(dp, zc->zc_sendobj, FTAG, &ds);
 739         if (error != 0) {
 740                 dsl_pool_rele(dp, FTAG);
 741                 return (error);
 742         }
 743 
 744         dsl_dataset_name(ds, zc->zc_name);
 745 
 746         error = zfs_secpolicy_write_perms_ds(zc->zc_name, ds,
 747             ZFS_DELEG_PERM_SEND, cr);
 748         dsl_dataset_rele(ds, FTAG);
 749         dsl_pool_rele(dp, FTAG);
 750 
 751         return (error);
 752 }
 753 
 754 /* ARGSUSED */
 755 static int
 756 zfs_secpolicy_send_new(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
 757 {
 758         return (zfs_secpolicy_write_perms(zc->zc_name,
 759             ZFS_DELEG_PERM_SEND, cr));
 760 }
 761 
 762 /* ARGSUSED */
 763 static int
 764 zfs_secpolicy_deleg_share(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
 765 {
 766         vnode_t *vp;
 767         int error;
 768 
 769         if ((error = lookupname(zc->zc_value, UIO_SYSSPACE,
 770             NO_FOLLOW, NULL, &vp)) != 0)
 771                 return (error);
 772 
 773         /* Now make sure mntpnt and dataset are ZFS */
 774 
 775         if (vp->v_vfsp->vfs_fstype != zfsfstype ||
 776             (strcmp((char *)refstr_value(vp->v_vfsp->vfs_resource),
 777             zc->zc_name) != 0)) {
 778                 VN_RELE(vp);
 779                 return (SET_ERROR(EPERM));
 780         }
 781 
 782         VN_RELE(vp);
 783         return (dsl_deleg_access(zc->zc_name,
 784             ZFS_DELEG_PERM_SHARE, cr));
 785 }
 786 
 787 int
 788 zfs_secpolicy_share(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
 789 {
 790         if (!INGLOBALZONE(curproc))
 791                 return (SET_ERROR(EPERM));
 792 
 793         if (secpolicy_nfs(cr) == 0) {
 794                 return (0);
 795         } else {
 796                 return (zfs_secpolicy_deleg_share(zc, innvl, cr));
 797         }
 798 }
 799 
 800 int
 801 zfs_secpolicy_smb_acl(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
 802 {
 803         if (!INGLOBALZONE(curproc))
 804                 return (SET_ERROR(EPERM));
 805 
 806         if (secpolicy_smb(cr) == 0) {
 807                 return (0);
 808         } else {
 809                 return (zfs_secpolicy_deleg_share(zc, innvl, cr));
 810         }
 811 }
 812 
 813 static int
 814 zfs_get_parent(const char *datasetname, char *parent, int parentsize)
 815 {
 816         char *cp;
 817 
 818         /*
 819          * Remove the @bla or /bla from the end of the name to get the parent.
 820          */
 821         (void) strncpy(parent, datasetname, parentsize);
 822         cp = strrchr(parent, '@');
 823         if (cp != NULL) {
 824                 cp[0] = '\0';
 825         } else {
 826                 cp = strrchr(parent, '/');
 827                 if (cp == NULL)
 828                         return (SET_ERROR(ENOENT));
 829                 cp[0] = '\0';
 830         }
 831 
 832         return (0);
 833 }
 834 
 835 int
 836 zfs_secpolicy_destroy_perms(const char *name, cred_t *cr)
 837 {
 838         int error;
 839 
 840         if ((error = zfs_secpolicy_write_perms(name,
 841             ZFS_DELEG_PERM_MOUNT, cr)) != 0)
 842                 return (error);
 843 
 844         return (zfs_secpolicy_write_perms(name, ZFS_DELEG_PERM_DESTROY, cr));
 845 }
 846 
 847 /* ARGSUSED */
 848 static int
 849 zfs_secpolicy_destroy(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
 850 {
 851         return (zfs_secpolicy_destroy_perms(zc->zc_name, cr));
 852 }
 853 
 854 /*
 855  * Destroying snapshots with delegated permissions requires
 856  * descendant mount and destroy permissions.
 857  */
 858 /* ARGSUSED */
 859 static int
 860 zfs_secpolicy_destroy_snaps(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
 861 {
 862         nvlist_t *snaps;
 863         nvpair_t *pair, *nextpair;
 864         int error = 0;
 865 
 866         if (nvlist_lookup_nvlist(innvl, "snaps", &snaps) != 0)
 867                 return (SET_ERROR(EINVAL));
 868         for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL;
 869             pair = nextpair) {
 870                 nextpair = nvlist_next_nvpair(snaps, pair);
 871                 error = zfs_secpolicy_destroy_perms(nvpair_name(pair), cr);
 872                 if (error == ENOENT) {
 873                         /*
 874                          * Ignore any snapshots that don't exist (we consider
 875                          * them "already destroyed").  Remove the name from the
 876                          * nvl here in case the snapshot is created between
 877                          * now and when we try to destroy it (in which case
 878                          * we don't want to destroy it since we haven't
 879                          * checked for permission).
 880                          */
 881                         fnvlist_remove_nvpair(snaps, pair);
 882                         error = 0;
 883                 }
 884                 if (error != 0)
 885                         break;
 886         }
 887 
 888         return (error);
 889 }
 890 
 891 int
 892 zfs_secpolicy_rename_perms(const char *from, const char *to, cred_t *cr)
 893 {
 894         char    parentname[ZFS_MAX_DATASET_NAME_LEN];
 895         int     error;
 896 
 897         if ((error = zfs_secpolicy_write_perms(from,
 898             ZFS_DELEG_PERM_RENAME, cr)) != 0)
 899                 return (error);
 900 
 901         if ((error = zfs_secpolicy_write_perms(from,
 902             ZFS_DELEG_PERM_MOUNT, cr)) != 0)
 903                 return (error);
 904 
 905         if ((error = zfs_get_parent(to, parentname,
 906             sizeof (parentname))) != 0)
 907                 return (error);
 908 
 909         if ((error = zfs_secpolicy_write_perms(parentname,
 910             ZFS_DELEG_PERM_CREATE, cr)) != 0)
 911                 return (error);
 912 
 913         if ((error = zfs_secpolicy_write_perms(parentname,
 914             ZFS_DELEG_PERM_MOUNT, cr)) != 0)
 915                 return (error);
 916 
 917         return (error);
 918 }
 919 
 920 /* ARGSUSED */
 921 static int
 922 zfs_secpolicy_rename(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
 923 {
 924         return (zfs_secpolicy_rename_perms(zc->zc_name, zc->zc_value, cr));
 925 }
 926 
 927 /* ARGSUSED */
 928 static int
 929 zfs_secpolicy_promote(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
 930 {
 931         dsl_pool_t *dp;
 932         dsl_dataset_t *clone;
 933         int error;
 934 
 935         error = zfs_secpolicy_write_perms(zc->zc_name,
 936             ZFS_DELEG_PERM_PROMOTE, cr);
 937         if (error != 0)
 938                 return (error);
 939 
 940         error = dsl_pool_hold(zc->zc_name, FTAG, &dp);
 941         if (error != 0)
 942                 return (error);
 943 
 944         error = dsl_dataset_hold(dp, zc->zc_name, FTAG, &clone);
 945 
 946         if (error == 0) {
 947                 char parentname[ZFS_MAX_DATASET_NAME_LEN];
 948                 dsl_dataset_t *origin = NULL;
 949                 dsl_dir_t *dd;
 950                 dd = clone->ds_dir;
 951 
 952                 error = dsl_dataset_hold_obj(dd->dd_pool,
 953                     dsl_dir_phys(dd)->dd_origin_obj, FTAG, &origin);
 954                 if (error != 0) {
 955                         dsl_dataset_rele(clone, FTAG);
 956                         dsl_pool_rele(dp, FTAG);
 957                         return (error);
 958                 }
 959 
 960                 error = zfs_secpolicy_write_perms_ds(zc->zc_name, clone,
 961                     ZFS_DELEG_PERM_MOUNT, cr);
 962 
 963                 dsl_dataset_name(origin, parentname);
 964                 if (error == 0) {
 965                         error = zfs_secpolicy_write_perms_ds(parentname, origin,
 966                             ZFS_DELEG_PERM_PROMOTE, cr);
 967                 }
 968                 dsl_dataset_rele(clone, FTAG);
 969                 dsl_dataset_rele(origin, FTAG);
 970         }
 971         dsl_pool_rele(dp, FTAG);
 972         return (error);
 973 }
 974 
 975 /* ARGSUSED */
 976 static int
 977 zfs_secpolicy_recv(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
 978 {
 979         int error;
 980 
 981         if ((error = zfs_secpolicy_write_perms(zc->zc_name,
 982             ZFS_DELEG_PERM_RECEIVE, cr)) != 0)
 983                 return (error);
 984 
 985         if ((error = zfs_secpolicy_write_perms(zc->zc_name,
 986             ZFS_DELEG_PERM_MOUNT, cr)) != 0)
 987                 return (error);
 988 
 989         return (zfs_secpolicy_write_perms(zc->zc_name,
 990             ZFS_DELEG_PERM_CREATE, cr));
 991 }
 992 
 993 int
 994 zfs_secpolicy_snapshot_perms(const char *name, cred_t *cr)
 995 {
 996         return (zfs_secpolicy_write_perms(name,
 997             ZFS_DELEG_PERM_SNAPSHOT, cr));
 998 }
 999 
1000 /*
1001  * Check for permission to create each snapshot in the nvlist.
1002  */
1003 /* ARGSUSED */
1004 static int
1005 zfs_secpolicy_snapshot(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1006 {
1007         nvlist_t *snaps;
1008         int error = 0;
1009         nvpair_t *pair;
1010 
1011         if (nvlist_lookup_nvlist(innvl, "snaps", &snaps) != 0)
1012                 return (SET_ERROR(EINVAL));
1013         for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL;
1014             pair = nvlist_next_nvpair(snaps, pair)) {
1015                 char *name = nvpair_name(pair);
1016                 char *atp = strchr(name, '@');
1017 
1018                 if (atp == NULL) {
1019                         error = SET_ERROR(EINVAL);
1020                         break;
1021                 }
1022                 *atp = '\0';
1023                 error = zfs_secpolicy_snapshot_perms(name, cr);
1024                 *atp = '@';
1025                 if (error != 0)
1026                         break;
1027         }
1028         return (error);
1029 }
1030 
1031 /*
1032  * Check for permission to create each snapshot in the nvlist.
1033  */
1034 /* ARGSUSED */
1035 static int
1036 zfs_secpolicy_bookmark(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1037 {
1038         int error = 0;
1039 
1040         for (nvpair_t *pair = nvlist_next_nvpair(innvl, NULL);
1041             pair != NULL; pair = nvlist_next_nvpair(innvl, pair)) {
1042                 char *name = nvpair_name(pair);
1043                 char *hashp = strchr(name, '#');
1044 
1045                 if (hashp == NULL) {
1046                         error = SET_ERROR(EINVAL);
1047                         break;
1048                 }
1049                 *hashp = '\0';
1050                 error = zfs_secpolicy_write_perms(name,
1051                     ZFS_DELEG_PERM_BOOKMARK, cr);
1052                 *hashp = '#';
1053                 if (error != 0)
1054                         break;
1055         }
1056         return (error);
1057 }
1058 
1059 /* ARGSUSED */
1060 static int
1061 zfs_secpolicy_remap(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1062 {
1063         return (zfs_secpolicy_write_perms(zc->zc_name,
1064             ZFS_DELEG_PERM_REMAP, cr));
1065 }
1066 
1067 /* ARGSUSED */
1068 static int
1069 zfs_secpolicy_destroy_bookmarks(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1070 {
1071         nvpair_t *pair, *nextpair;
1072         int error = 0;
1073 
1074         for (pair = nvlist_next_nvpair(innvl, NULL); pair != NULL;
1075             pair = nextpair) {
1076                 char *name = nvpair_name(pair);
1077                 char *hashp = strchr(name, '#');
1078                 nextpair = nvlist_next_nvpair(innvl, pair);
1079 
1080                 if (hashp == NULL) {
1081                         error = SET_ERROR(EINVAL);
1082                         break;
1083                 }
1084 
1085                 *hashp = '\0';
1086                 error = zfs_secpolicy_write_perms(name,
1087                     ZFS_DELEG_PERM_DESTROY, cr);
1088                 *hashp = '#';
1089                 if (error == ENOENT) {
1090                         /*
1091                          * Ignore any filesystems that don't exist (we consider
1092                          * their bookmarks "already destroyed").  Remove
1093                          * the name from the nvl here in case the filesystem
1094                          * is created between now and when we try to destroy
1095                          * the bookmark (in which case we don't want to
1096                          * destroy it since we haven't checked for permission).
1097                          */
1098                         fnvlist_remove_nvpair(innvl, pair);
1099                         error = 0;
1100                 }
1101                 if (error != 0)
1102                         break;
1103         }
1104 
1105         return (error);
1106 }
1107 
1108 /* ARGSUSED */
1109 static int
1110 zfs_secpolicy_log_history(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1111 {
1112         /*
1113          * Even root must have a proper TSD so that we know what pool
1114          * to log to.
1115          */
1116         if (tsd_get(zfs_allow_log_key) == NULL)
1117                 return (SET_ERROR(EPERM));
1118         return (0);
1119 }
1120 
1121 static int
1122 zfs_secpolicy_create_clone(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1123 {
1124         char    parentname[ZFS_MAX_DATASET_NAME_LEN];
1125         int     error;
1126         char    *origin;
1127 
1128         if ((error = zfs_get_parent(zc->zc_name, parentname,
1129             sizeof (parentname))) != 0)
1130                 return (error);
1131 
1132         if (nvlist_lookup_string(innvl, "origin", &origin) == 0 &&
1133             (error = zfs_secpolicy_write_perms(origin,
1134             ZFS_DELEG_PERM_CLONE, cr)) != 0)
1135                 return (error);
1136 
1137         if ((error = zfs_secpolicy_write_perms(parentname,
1138             ZFS_DELEG_PERM_CREATE, cr)) != 0)
1139                 return (error);
1140 
1141         return (zfs_secpolicy_write_perms(parentname,
1142             ZFS_DELEG_PERM_MOUNT, cr));
1143 }
1144 
1145 /*
1146  * Policy for pool operations - create/destroy pools, add vdevs, etc.  Requires
1147  * SYS_CONFIG privilege, which is not available in a local zone.
1148  */
1149 /* ARGSUSED */
1150 static int
1151 zfs_secpolicy_config(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1152 {
1153         if (secpolicy_sys_config(cr, B_FALSE) != 0)
1154                 return (SET_ERROR(EPERM));
1155 
1156         return (0);
1157 }
1158 
1159 /*
1160  * Policy for object to name lookups.
1161  */
1162 /* ARGSUSED */
1163 static int
1164 zfs_secpolicy_diff(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1165 {
1166         int error;
1167 
1168         if ((error = secpolicy_sys_config(cr, B_FALSE)) == 0)
1169                 return (0);
1170 
1171         error = zfs_secpolicy_write_perms(zc->zc_name, ZFS_DELEG_PERM_DIFF, cr);
1172         return (error);
1173 }
1174 
1175 /*
1176  * Policy for fault injection.  Requires all privileges.
1177  */
1178 /* ARGSUSED */
1179 static int
1180 zfs_secpolicy_inject(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1181 {
1182         return (secpolicy_zinject(cr));
1183 }
1184 
1185 /* ARGSUSED */
1186 static int
1187 zfs_secpolicy_inherit_prop(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1188 {
1189         zfs_prop_t prop = zfs_name_to_prop(zc->zc_value);
1190 
1191         if (prop == ZPROP_INVAL) {
1192                 if (!zfs_prop_user(zc->zc_value))
1193                         return (SET_ERROR(EINVAL));
1194                 return (zfs_secpolicy_write_perms(zc->zc_name,
1195                     ZFS_DELEG_PERM_USERPROP, cr));
1196         } else {
1197                 return (zfs_secpolicy_setprop(zc->zc_name, prop,
1198                     NULL, cr));
1199         }
1200 }
1201 
1202 static int
1203 zfs_secpolicy_userspace_one(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1204 {
1205         int err = zfs_secpolicy_read(zc, innvl, cr);
1206         if (err)
1207                 return (err);
1208 
1209         if (zc->zc_objset_type >= ZFS_NUM_USERQUOTA_PROPS)
1210                 return (SET_ERROR(EINVAL));
1211 
1212         if (zc->zc_value[0] == 0) {
1213                 /*
1214                  * They are asking about a posix uid/gid.  If it's
1215                  * themself, allow it.
1216                  */
1217                 if (zc->zc_objset_type == ZFS_PROP_USERUSED ||
1218                     zc->zc_objset_type == ZFS_PROP_USERQUOTA ||
1219                     zc->zc_objset_type == ZFS_PROP_USEROBJUSED ||
1220                     zc->zc_objset_type == ZFS_PROP_USEROBJQUOTA) {
1221                         if (zc->zc_guid == crgetuid(cr))
1222                                 return (0);
1223                 } else if (zc->zc_objset_type == ZFS_PROP_GROUPUSED ||
1224                     zc->zc_objset_type == ZFS_PROP_GROUPQUOTA ||
1225                     zc->zc_objset_type == ZFS_PROP_GROUPOBJUSED ||
1226                     zc->zc_objset_type == ZFS_PROP_GROUPOBJQUOTA) {
1227                         if (groupmember(zc->zc_guid, cr))
1228                                 return (0);
1229                 }
1230                 /* else is for project quota/used */
1231         }
1232 
1233         return (zfs_secpolicy_write_perms(zc->zc_name,
1234             userquota_perms[zc->zc_objset_type], cr));
1235 }
1236 
1237 static int
1238 zfs_secpolicy_userspace_many(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1239 {
1240         int err = zfs_secpolicy_read(zc, innvl, cr);
1241         if (err)
1242                 return (err);
1243 
1244         if (zc->zc_objset_type >= ZFS_NUM_USERQUOTA_PROPS)
1245                 return (SET_ERROR(EINVAL));
1246 
1247         return (zfs_secpolicy_write_perms(zc->zc_name,
1248             userquota_perms[zc->zc_objset_type], cr));
1249 }
1250 
1251 /* ARGSUSED */
1252 static int
1253 zfs_secpolicy_userspace_upgrade(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1254 {
1255         return (zfs_secpolicy_setprop(zc->zc_name, ZFS_PROP_VERSION,
1256             NULL, cr));
1257 }
1258 
1259 /* ARGSUSED */
1260 static int
1261 zfs_secpolicy_hold(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1262 {
1263         nvpair_t *pair;
1264         nvlist_t *holds;
1265         int error;
1266 
1267         error = nvlist_lookup_nvlist(innvl, "holds", &holds);
1268         if (error != 0)
1269                 return (SET_ERROR(EINVAL));
1270 
1271         for (pair = nvlist_next_nvpair(holds, NULL); pair != NULL;
1272             pair = nvlist_next_nvpair(holds, pair)) {
1273                 char fsname[ZFS_MAX_DATASET_NAME_LEN];
1274                 error = dmu_fsname(nvpair_name(pair), fsname);
1275                 if (error != 0)
1276                         return (error);
1277                 error = zfs_secpolicy_write_perms(fsname,
1278                     ZFS_DELEG_PERM_HOLD, cr);
1279                 if (error != 0)
1280                         return (error);
1281         }
1282         return (0);
1283 }
1284 
1285 /* ARGSUSED */
1286 static int
1287 zfs_secpolicy_release(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1288 {
1289         nvpair_t *pair;
1290         int error;
1291 
1292         for (pair = nvlist_next_nvpair(innvl, NULL); pair != NULL;
1293             pair = nvlist_next_nvpair(innvl, pair)) {
1294                 char fsname[ZFS_MAX_DATASET_NAME_LEN];
1295                 error = dmu_fsname(nvpair_name(pair), fsname);
1296                 if (error != 0)
1297                         return (error);
1298                 error = zfs_secpolicy_write_perms(fsname,
1299                     ZFS_DELEG_PERM_RELEASE, cr);
1300                 if (error != 0)
1301                         return (error);
1302         }
1303         return (0);
1304 }
1305 
1306 /* ARGSUSED */
1307 static int
1308 zfs_secpolicy_load_key(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1309 {
1310         return (zfs_secpolicy_write_perms(zc->zc_name,
1311             ZFS_DELEG_PERM_LOAD_KEY, cr));
1312 }
1313 
1314 /* ARGSUSED */
1315 static int
1316 zfs_secpolicy_change_key(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1317 {
1318         return (zfs_secpolicy_write_perms(zc->zc_name,
1319             ZFS_DELEG_PERM_CHANGE_KEY, cr));
1320 }
1321 
1322 /*
1323  * Policy for allowing temporary snapshots to be taken or released
1324  */
1325 static int
1326 zfs_secpolicy_tmp_snapshot(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1327 {
1328         /*
1329          * A temporary snapshot is the same as a snapshot,
1330          * hold, destroy and release all rolled into one.
1331          * Delegated diff alone is sufficient that we allow this.
1332          */
1333         int error;
1334 
1335         if ((error = zfs_secpolicy_write_perms(zc->zc_name,
1336             ZFS_DELEG_PERM_DIFF, cr)) == 0)
1337                 return (0);
1338 
1339         error = zfs_secpolicy_snapshot_perms(zc->zc_name, cr);
1340         if (error == 0)
1341                 error = zfs_secpolicy_hold(zc, innvl, cr);
1342         if (error == 0)
1343                 error = zfs_secpolicy_release(zc, innvl, cr);
1344         if (error == 0)
1345                 error = zfs_secpolicy_destroy(zc, innvl, cr);
1346         return (error);
1347 }
1348 
1349 /*
1350  * Returns the nvlist as specified by the user in the zfs_cmd_t.
1351  */
1352 static int
1353 get_nvlist(uint64_t nvl, uint64_t size, int iflag, nvlist_t **nvp)
1354 {
1355         char *packed;
1356         int error;
1357         nvlist_t *list = NULL;
1358 
1359         /*
1360          * Read in and unpack the user-supplied nvlist.
1361          */
1362         if (size == 0)
1363                 return (SET_ERROR(EINVAL));
1364 
1365         packed = kmem_alloc(size, KM_SLEEP);
1366 
1367         if ((error = ddi_copyin((void *)(uintptr_t)nvl, packed, size,
1368             iflag)) != 0) {
1369                 kmem_free(packed, size);
1370                 return (SET_ERROR(EFAULT));
1371         }
1372 
1373         if ((error = nvlist_unpack(packed, size, &list, 0)) != 0) {
1374                 kmem_free(packed, size);
1375                 return (error);
1376         }
1377 
1378         kmem_free(packed, size);
1379 
1380         *nvp = list;
1381         return (0);
1382 }
1383 
1384 /*
1385  * Reduce the size of this nvlist until it can be serialized in 'max' bytes.
1386  * Entries will be removed from the end of the nvlist, and one int32 entry
1387  * named "N_MORE_ERRORS" will be added indicating how many entries were
1388  * removed.
1389  */
1390 static int
1391 nvlist_smush(nvlist_t *errors, size_t max)
1392 {
1393         size_t size;
1394 
1395         size = fnvlist_size(errors);
1396 
1397         if (size > max) {
1398                 nvpair_t *more_errors;
1399                 int n = 0;
1400 
1401                 if (max < 1024)
1402                         return (SET_ERROR(ENOMEM));
1403 
1404                 fnvlist_add_int32(errors, ZPROP_N_MORE_ERRORS, 0);
1405                 more_errors = nvlist_prev_nvpair(errors, NULL);
1406 
1407                 do {
1408                         nvpair_t *pair = nvlist_prev_nvpair(errors,
1409                             more_errors);
1410                         fnvlist_remove_nvpair(errors, pair);
1411                         n++;
1412                         size = fnvlist_size(errors);
1413                 } while (size > max);
1414 
1415                 fnvlist_remove_nvpair(errors, more_errors);
1416                 fnvlist_add_int32(errors, ZPROP_N_MORE_ERRORS, n);
1417                 ASSERT3U(fnvlist_size(errors), <=, max);
1418         }
1419 
1420         return (0);
1421 }
1422 
1423 static int
1424 put_nvlist(zfs_cmd_t *zc, nvlist_t *nvl)
1425 {
1426         char *packed = NULL;
1427         int error = 0;
1428         size_t size;
1429 
1430         size = fnvlist_size(nvl);
1431 
1432         if (size > zc->zc_nvlist_dst_size) {
1433                 error = SET_ERROR(ENOMEM);
1434         } else {
1435                 packed = fnvlist_pack(nvl, &size);
1436                 if (ddi_copyout(packed, (void *)(uintptr_t)zc->zc_nvlist_dst,
1437                     size, zc->zc_iflags) != 0)
1438                         error = SET_ERROR(EFAULT);
1439                 fnvlist_pack_free(packed, size);
1440         }
1441 
1442         zc->zc_nvlist_dst_size = size;
1443         zc->zc_nvlist_dst_filled = B_TRUE;
1444         return (error);
1445 }
1446 
1447 int
1448 getzfsvfs_impl(objset_t *os, zfsvfs_t **zfvp)
1449 {
1450         int error = 0;
1451         if (dmu_objset_type(os) != DMU_OST_ZFS) {
1452                 return (SET_ERROR(EINVAL));
1453         }
1454 
1455         mutex_enter(&os->os_user_ptr_lock);
1456         *zfvp = dmu_objset_get_user(os);
1457         if (*zfvp) {
1458                 VFS_HOLD((*zfvp)->z_vfs);
1459         } else {
1460                 error = SET_ERROR(ESRCH);
1461         }
1462         mutex_exit(&os->os_user_ptr_lock);
1463         return (error);
1464 }
1465 
1466 int
1467 getzfsvfs(const char *dsname, zfsvfs_t **zfvp)
1468 {
1469         objset_t *os;
1470         int error;
1471 
1472         error = dmu_objset_hold(dsname, FTAG, &os);
1473         if (error != 0)
1474                 return (error);
1475 
1476         error = getzfsvfs_impl(os, zfvp);
1477         dmu_objset_rele(os, FTAG);
1478         return (error);
1479 }
1480 
1481 /*
1482  * Find a zfsvfs_t for a mounted filesystem, or create our own, in which
1483  * case its z_vfs will be NULL, and it will be opened as the owner.
1484  * If 'writer' is set, the z_teardown_lock will be held for RW_WRITER,
1485  * which prevents all vnode ops from running.
1486  */
1487 static int
1488 zfsvfs_hold(const char *name, void *tag, zfsvfs_t **zfvp, boolean_t writer)
1489 {
1490         int error = 0;
1491 
1492         if (getzfsvfs(name, zfvp) != 0)
1493                 error = zfsvfs_create(name, B_FALSE, zfvp);
1494         if (error == 0) {
1495                 rrm_enter(&(*zfvp)->z_teardown_lock, (writer) ? RW_WRITER :
1496                     RW_READER, tag);
1497                 if ((*zfvp)->z_unmounted) {
1498                         /*
1499                          * XXX we could probably try again, since the unmounting
1500                          * thread should be just about to disassociate the
1501                          * objset from the zfsvfs.
1502                          */
1503                         rrm_exit(&(*zfvp)->z_teardown_lock, tag);
1504                         return (SET_ERROR(EBUSY));
1505                 }
1506         }
1507         return (error);
1508 }
1509 
1510 static void
1511 zfsvfs_rele(zfsvfs_t *zfsvfs, void *tag)
1512 {
1513         rrm_exit(&zfsvfs->z_teardown_lock, tag);
1514 
1515         if (zfsvfs->z_vfs) {
1516                 VFS_RELE(zfsvfs->z_vfs);
1517         } else {
1518                 dmu_objset_disown(zfsvfs->z_os, B_TRUE, zfsvfs);
1519                 zfsvfs_free(zfsvfs);
1520         }
1521 }
1522 
1523 static int
1524 zfs_ioc_pool_create(zfs_cmd_t *zc)
1525 {
1526         int error;
1527         nvlist_t *config, *props = NULL;
1528         nvlist_t *rootprops = NULL;
1529         nvlist_t *zplprops = NULL;
1530         char *spa_name = zc->zc_name;
1531         dsl_crypto_params_t *dcp = NULL;
1532 
1533         if (error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
1534             zc->zc_iflags, &config))
1535                 return (error);
1536 
1537         if (zc->zc_nvlist_src_size != 0 && (error =
1538             get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
1539             zc->zc_iflags, &props))) {
1540                 nvlist_free(config);
1541                 return (error);
1542         }
1543 
1544         if (props) {
1545                 nvlist_t *nvl = NULL;
1546                 nvlist_t *hidden_args = NULL;
1547                 uint64_t version = SPA_VERSION;
1548                 char *tname;
1549 
1550                 (void) nvlist_lookup_uint64(props,
1551                     zpool_prop_to_name(ZPOOL_PROP_VERSION), &version);
1552                 if (!SPA_VERSION_IS_SUPPORTED(version)) {
1553                         error = SET_ERROR(EINVAL);
1554                         goto pool_props_bad;
1555                 }
1556                 (void) nvlist_lookup_nvlist(props, ZPOOL_ROOTFS_PROPS, &nvl);
1557                 if (nvl) {
1558                         error = nvlist_dup(nvl, &rootprops, KM_SLEEP);
1559                         if (error != 0) {
1560                                 nvlist_free(config);
1561                                 nvlist_free(props);
1562                                 return (error);
1563                         }
1564                         (void) nvlist_remove_all(props, ZPOOL_ROOTFS_PROPS);
1565                 }
1566 
1567                 (void) nvlist_lookup_nvlist(props, ZPOOL_HIDDEN_ARGS,
1568                     &hidden_args);
1569                 error = dsl_crypto_params_create_nvlist(DCP_CMD_NONE,
1570                     rootprops, hidden_args, &dcp);
1571                 if (error != 0) {
1572                         nvlist_free(config);
1573                         nvlist_free(props);
1574                         return (error);
1575                 }
1576                 (void) nvlist_remove_all(props, ZPOOL_HIDDEN_ARGS);
1577 
1578                 VERIFY(nvlist_alloc(&zplprops, NV_UNIQUE_NAME, KM_SLEEP) == 0);
1579                 error = zfs_fill_zplprops_root(version, rootprops,
1580                     zplprops, NULL);
1581                 if (error != 0)
1582                         goto pool_props_bad;
1583 
1584                 if (nvlist_lookup_string(props,
1585                     zpool_prop_to_name(ZPOOL_PROP_TNAME), &tname) == 0)
1586                         spa_name = tname;
1587         }
1588 
1589         error = spa_create(zc->zc_name, config, props, zplprops, dcp);
1590 
1591         /*
1592          * Set the remaining root properties
1593          */
1594         if (!error && (error = zfs_set_prop_nvlist(spa_name,
1595             ZPROP_SRC_LOCAL, rootprops, NULL)) != 0)
1596                 (void) spa_destroy(spa_name);
1597 
1598 pool_props_bad:
1599         nvlist_free(rootprops);
1600         nvlist_free(zplprops);
1601         nvlist_free(config);
1602         nvlist_free(props);
1603         dsl_crypto_params_free(dcp, !!error);
1604 
1605         return (error);
1606 }
1607 
1608 static int
1609 zfs_ioc_pool_destroy(zfs_cmd_t *zc)
1610 {
1611         int error;
1612         zfs_log_history(zc);
1613         error = spa_destroy(zc->zc_name);
1614         if (error == 0)
1615                 zvol_remove_minors(zc->zc_name);
1616         return (error);
1617 }
1618 
1619 static int
1620 zfs_ioc_pool_import(zfs_cmd_t *zc)
1621 {
1622         nvlist_t *config, *props = NULL;
1623         uint64_t guid;
1624         int error;
1625 
1626         if ((error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
1627             zc->zc_iflags, &config)) != 0)
1628                 return (error);
1629 
1630         if (zc->zc_nvlist_src_size != 0 && (error =
1631             get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
1632             zc->zc_iflags, &props))) {
1633                 nvlist_free(config);
1634                 return (error);
1635         }
1636 
1637         if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID, &guid) != 0 ||
1638             guid != zc->zc_guid)
1639                 error = SET_ERROR(EINVAL);
1640         else
1641                 error = spa_import(zc->zc_name, config, props, zc->zc_cookie);
1642 
1643         if (zc->zc_nvlist_dst != 0) {
1644                 int err;
1645 
1646                 if ((err = put_nvlist(zc, config)) != 0)
1647                         error = err;
1648         }
1649 
1650         nvlist_free(config);
1651 
1652         nvlist_free(props);
1653 
1654         return (error);
1655 }
1656 
1657 static int
1658 zfs_ioc_pool_export(zfs_cmd_t *zc)
1659 {
1660         int error;
1661         boolean_t force = (boolean_t)zc->zc_cookie;
1662         boolean_t hardforce = (boolean_t)zc->zc_guid;
1663 
1664         zfs_log_history(zc);
1665         error = spa_export(zc->zc_name, NULL, force, hardforce);
1666         if (error == 0)
1667                 zvol_remove_minors(zc->zc_name);
1668         return (error);
1669 }
1670 
1671 static int
1672 zfs_ioc_pool_configs(zfs_cmd_t *zc)
1673 {
1674         nvlist_t *configs;
1675         int error;
1676 
1677         if ((configs = spa_all_configs(&zc->zc_cookie)) == NULL)
1678                 return (SET_ERROR(EEXIST));
1679 
1680         error = put_nvlist(zc, configs);
1681 
1682         nvlist_free(configs);
1683 
1684         return (error);
1685 }
1686 
1687 /*
1688  * inputs:
1689  * zc_name              name of the pool
1690  *
1691  * outputs:
1692  * zc_cookie            real errno
1693  * zc_nvlist_dst        config nvlist
1694  * zc_nvlist_dst_size   size of config nvlist
1695  */
1696 static int
1697 zfs_ioc_pool_stats(zfs_cmd_t *zc)
1698 {
1699         nvlist_t *config;
1700         int error;
1701         int ret = 0;
1702 
1703         error = spa_get_stats(zc->zc_name, &config, zc->zc_value,
1704             sizeof (zc->zc_value));
1705 
1706         if (config != NULL) {
1707                 ret = put_nvlist(zc, config);
1708                 nvlist_free(config);
1709 
1710                 /*
1711                  * The config may be present even if 'error' is non-zero.
1712                  * In this case we return success, and preserve the real errno
1713                  * in 'zc_cookie'.
1714                  */
1715                 zc->zc_cookie = error;
1716         } else {
1717                 ret = error;
1718         }
1719 
1720         return (ret);
1721 }
1722 
1723 /*
1724  * Try to import the given pool, returning pool stats as appropriate so that
1725  * user land knows which devices are available and overall pool health.
1726  */
1727 static int
1728 zfs_ioc_pool_tryimport(zfs_cmd_t *zc)
1729 {
1730         nvlist_t *tryconfig, *config;
1731         int error;
1732 
1733         if ((error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
1734             zc->zc_iflags, &tryconfig)) != 0)
1735                 return (error);
1736 
1737         config = spa_tryimport(tryconfig);
1738 
1739         nvlist_free(tryconfig);
1740 
1741         if (config == NULL)
1742                 return (SET_ERROR(EINVAL));
1743 
1744         error = put_nvlist(zc, config);
1745         nvlist_free(config);
1746 
1747         return (error);
1748 }
1749 
1750 /*
1751  * inputs:
1752  * zc_name              name of the pool
1753  * zc_cookie            scan func (pool_scan_func_t)
1754  * zc_flags             scrub pause/resume flag (pool_scrub_cmd_t)
1755  */
1756 static int
1757 zfs_ioc_pool_scan(zfs_cmd_t *zc)
1758 {
1759         spa_t *spa;
1760         int error;
1761 
1762         if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
1763                 return (error);
1764 
1765         if (zc->zc_flags >= POOL_SCRUB_FLAGS_END)
1766                 return (SET_ERROR(EINVAL));
1767 
1768         if (zc->zc_flags == POOL_SCRUB_PAUSE)
1769                 error = spa_scrub_pause_resume(spa, POOL_SCRUB_PAUSE);
1770         else if (zc->zc_cookie == POOL_SCAN_NONE)
1771                 error = spa_scan_stop(spa);
1772         else
1773                 error = spa_scan(spa, zc->zc_cookie);
1774 
1775         spa_close(spa, FTAG);
1776 
1777         return (error);
1778 }
1779 
1780 static int
1781 zfs_ioc_pool_freeze(zfs_cmd_t *zc)
1782 {
1783         spa_t *spa;
1784         int error;
1785 
1786         error = spa_open(zc->zc_name, &spa, FTAG);
1787         if (error == 0) {
1788                 spa_freeze(spa);
1789                 spa_close(spa, FTAG);
1790         }
1791         return (error);
1792 }
1793 
1794 static int
1795 zfs_ioc_pool_upgrade(zfs_cmd_t *zc)
1796 {
1797         spa_t *spa;
1798         int error;
1799 
1800         if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
1801                 return (error);
1802 
1803         if (zc->zc_cookie < spa_version(spa) ||
1804             !SPA_VERSION_IS_SUPPORTED(zc->zc_cookie)) {
1805                 spa_close(spa, FTAG);
1806                 return (SET_ERROR(EINVAL));
1807         }
1808 
1809         spa_upgrade(spa, zc->zc_cookie);
1810         spa_close(spa, FTAG);
1811 
1812         return (error);
1813 }
1814 
1815 static int
1816 zfs_ioc_pool_get_history(zfs_cmd_t *zc)
1817 {
1818         spa_t *spa;
1819         char *hist_buf;
1820         uint64_t size;
1821         int error;
1822 
1823         if ((size = zc->zc_history_len) == 0)
1824                 return (SET_ERROR(EINVAL));
1825 
1826         if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
1827                 return (error);
1828 
1829         if (spa_version(spa) < SPA_VERSION_ZPOOL_HISTORY) {
1830                 spa_close(spa, FTAG);
1831                 return (SET_ERROR(ENOTSUP));
1832         }
1833 
1834         hist_buf = kmem_alloc(size, KM_SLEEP);
1835         if ((error = spa_history_get(spa, &zc->zc_history_offset,
1836             &zc->zc_history_len, hist_buf)) == 0) {
1837                 error = ddi_copyout(hist_buf,
1838                     (void *)(uintptr_t)zc->zc_history,
1839                     zc->zc_history_len, zc->zc_iflags);
1840         }
1841 
1842         spa_close(spa, FTAG);
1843         kmem_free(hist_buf, size);
1844         return (error);
1845 }
1846 
1847 static int
1848 zfs_ioc_pool_reguid(zfs_cmd_t *zc)
1849 {
1850         spa_t *spa;
1851         int error;
1852 
1853         error = spa_open(zc->zc_name, &spa, FTAG);
1854         if (error == 0) {
1855                 error = spa_change_guid(spa);
1856                 spa_close(spa, FTAG);
1857         }
1858         return (error);
1859 }
1860 
1861 static int
1862 zfs_ioc_dsobj_to_dsname(zfs_cmd_t *zc)
1863 {
1864         return (dsl_dsobj_to_dsname(zc->zc_name, zc->zc_obj, zc->zc_value));
1865 }
1866 
1867 /*
1868  * inputs:
1869  * zc_name              name of filesystem
1870  * zc_obj               object to find
1871  *
1872  * outputs:
1873  * zc_value             name of object
1874  */
1875 static int
1876 zfs_ioc_obj_to_path(zfs_cmd_t *zc)
1877 {
1878         objset_t *os;
1879         int error;
1880 
1881         /* XXX reading from objset not owned */
1882         if ((error = dmu_objset_hold_flags(zc->zc_name, B_TRUE,
1883             FTAG, &os)) != 0)
1884                 return (error);
1885         if (dmu_objset_type(os) != DMU_OST_ZFS) {
1886                 dmu_objset_rele_flags(os, B_TRUE, FTAG);
1887                 return (SET_ERROR(EINVAL));
1888         }
1889         error = zfs_obj_to_path(os, zc->zc_obj, zc->zc_value,
1890             sizeof (zc->zc_value));
1891         dmu_objset_rele_flags(os, B_TRUE, FTAG);
1892 
1893         return (error);
1894 }
1895 
1896 /*
1897  * inputs:
1898  * zc_name              name of filesystem
1899  * zc_obj               object to find
1900  *
1901  * outputs:
1902  * zc_stat              stats on object
1903  * zc_value             path to object
1904  */
1905 static int
1906 zfs_ioc_obj_to_stats(zfs_cmd_t *zc)
1907 {
1908         objset_t *os;
1909         int error;
1910 
1911         /* XXX reading from objset not owned */
1912         if ((error = dmu_objset_hold_flags(zc->zc_name, B_TRUE,
1913             FTAG, &os)) != 0)
1914                 return (error);
1915         if (dmu_objset_type(os) != DMU_OST_ZFS) {
1916                 dmu_objset_rele_flags(os, B_TRUE, FTAG);
1917                 return (SET_ERROR(EINVAL));
1918         }
1919         error = zfs_obj_to_stats(os, zc->zc_obj, &zc->zc_stat, zc->zc_value,
1920             sizeof (zc->zc_value));
1921         dmu_objset_rele_flags(os, B_TRUE, FTAG);
1922 
1923         return (error);
1924 }
1925 
1926 static int
1927 zfs_ioc_vdev_add(zfs_cmd_t *zc)
1928 {
1929         spa_t *spa;
1930         int error;
1931         nvlist_t *config, **l2cache, **spares;
1932         uint_t nl2cache = 0, nspares = 0;
1933 
1934         error = spa_open(zc->zc_name, &spa, FTAG);
1935         if (error != 0)
1936                 return (error);
1937 
1938         error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
1939             zc->zc_iflags, &config);
1940         (void) nvlist_lookup_nvlist_array(config, ZPOOL_CONFIG_L2CACHE,
1941             &l2cache, &nl2cache);
1942 
1943         (void) nvlist_lookup_nvlist_array(config, ZPOOL_CONFIG_SPARES,
1944             &spares, &nspares);
1945 
1946         /*
1947          * A root pool with concatenated devices is not supported.
1948          * Thus, can not add a device to a root pool.
1949          *
1950          * Intent log device can not be added to a rootpool because
1951          * during mountroot, zil is replayed, a seperated log device
1952          * can not be accessed during the mountroot time.
1953          *
1954          * l2cache and spare devices are ok to be added to a rootpool.
1955          */
1956         if (spa_bootfs(spa) != 0 && nl2cache == 0 && nspares == 0) {
1957                 nvlist_free(config);
1958                 spa_close(spa, FTAG);
1959                 return (SET_ERROR(EDOM));
1960         }
1961 
1962         if (error == 0) {
1963                 error = spa_vdev_add(spa, config);
1964                 nvlist_free(config);
1965         }
1966         spa_close(spa, FTAG);
1967         return (error);
1968 }
1969 
1970 /*
1971  * inputs:
1972  * zc_name              name of the pool
1973  * zc_guid              guid of vdev to remove
1974  * zc_cookie            cancel removal
1975  */
1976 static int
1977 zfs_ioc_vdev_remove(zfs_cmd_t *zc)
1978 {
1979         spa_t *spa;
1980         int error;
1981 
1982         error = spa_open(zc->zc_name, &spa, FTAG);
1983         if (error != 0)
1984                 return (error);
1985         if (zc->zc_cookie != 0) {
1986                 error = spa_vdev_remove_cancel(spa);
1987         } else {
1988                 error = spa_vdev_remove(spa, zc->zc_guid, B_FALSE);
1989         }
1990         spa_close(spa, FTAG);
1991         return (error);
1992 }
1993 
1994 static int
1995 zfs_ioc_vdev_set_state(zfs_cmd_t *zc)
1996 {
1997         spa_t *spa;
1998         int error;
1999         vdev_state_t newstate = VDEV_STATE_UNKNOWN;
2000 
2001         if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
2002                 return (error);
2003         switch (zc->zc_cookie) {
2004         case VDEV_STATE_ONLINE:
2005                 error = vdev_online(spa, zc->zc_guid, zc->zc_obj, &newstate);
2006                 break;
2007 
2008         case VDEV_STATE_OFFLINE:
2009                 error = vdev_offline(spa, zc->zc_guid, zc->zc_obj);
2010                 break;
2011 
2012         case VDEV_STATE_FAULTED:
2013                 if (zc->zc_obj != VDEV_AUX_ERR_EXCEEDED &&
2014                     zc->zc_obj != VDEV_AUX_EXTERNAL)
2015                         zc->zc_obj = VDEV_AUX_ERR_EXCEEDED;
2016 
2017                 error = vdev_fault(spa, zc->zc_guid, zc->zc_obj);
2018                 break;
2019 
2020         case VDEV_STATE_DEGRADED:
2021                 if (zc->zc_obj != VDEV_AUX_ERR_EXCEEDED &&
2022                     zc->zc_obj != VDEV_AUX_EXTERNAL)
2023                         zc->zc_obj = VDEV_AUX_ERR_EXCEEDED;
2024 
2025                 error = vdev_degrade(spa, zc->zc_guid, zc->zc_obj);
2026                 break;
2027 
2028         default:
2029                 error = SET_ERROR(EINVAL);
2030         }
2031         zc->zc_cookie = newstate;
2032         spa_close(spa, FTAG);
2033         return (error);
2034 }
2035 
2036 static int
2037 zfs_ioc_vdev_attach(zfs_cmd_t *zc)
2038 {
2039         spa_t *spa;
2040         int replacing = zc->zc_cookie;
2041         nvlist_t *config;
2042         int error;
2043 
2044         if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
2045                 return (error);
2046 
2047         if ((error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
2048             zc->zc_iflags, &config)) == 0) {
2049                 error = spa_vdev_attach(spa, zc->zc_guid, config, replacing);
2050                 nvlist_free(config);
2051         }
2052 
2053         spa_close(spa, FTAG);
2054         return (error);
2055 }
2056 
2057 static int
2058 zfs_ioc_vdev_detach(zfs_cmd_t *zc)
2059 {
2060         spa_t *spa;
2061         int error;
2062 
2063         if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
2064                 return (error);
2065 
2066         error = spa_vdev_detach(spa, zc->zc_guid, 0, B_FALSE);
2067 
2068         spa_close(spa, FTAG);
2069         return (error);
2070 }
2071 
2072 static int
2073 zfs_ioc_vdev_split(zfs_cmd_t *zc)
2074 {
2075         spa_t *spa;
2076         nvlist_t *config, *props = NULL;
2077         int error;
2078         boolean_t exp = !!(zc->zc_cookie & ZPOOL_EXPORT_AFTER_SPLIT);
2079 
2080         if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
2081                 return (error);
2082 
2083         if (error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
2084             zc->zc_iflags, &config)) {
2085                 spa_close(spa, FTAG);
2086                 return (error);
2087         }
2088 
2089         if (zc->zc_nvlist_src_size != 0 && (error =
2090             get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
2091             zc->zc_iflags, &props))) {
2092                 spa_close(spa, FTAG);
2093                 nvlist_free(config);
2094                 return (error);
2095         }
2096 
2097         error = spa_vdev_split_mirror(spa, zc->zc_string, config, props, exp);
2098 
2099         spa_close(spa, FTAG);
2100 
2101         nvlist_free(config);
2102         nvlist_free(props);
2103 
2104         return (error);
2105 }
2106 
2107 static int
2108 zfs_ioc_vdev_setpath(zfs_cmd_t *zc)
2109 {
2110         spa_t *spa;
2111         char *path = zc->zc_value;
2112         uint64_t guid = zc->zc_guid;
2113         int error;
2114 
2115         error = spa_open(zc->zc_name, &spa, FTAG);
2116         if (error != 0)
2117                 return (error);
2118 
2119         error = spa_vdev_setpath(spa, guid, path);
2120         spa_close(spa, FTAG);
2121         return (error);
2122 }
2123 
2124 static int
2125 zfs_ioc_vdev_setfru(zfs_cmd_t *zc)
2126 {
2127         spa_t *spa;
2128         char *fru = zc->zc_value;
2129         uint64_t guid = zc->zc_guid;
2130         int error;
2131 
2132         error = spa_open(zc->zc_name, &spa, FTAG);
2133         if (error != 0)
2134                 return (error);
2135 
2136         error = spa_vdev_setfru(spa, guid, fru);
2137         spa_close(spa, FTAG);
2138         return (error);
2139 }
2140 
2141 static int
2142 zfs_ioc_objset_stats_impl(zfs_cmd_t *zc, objset_t *os)
2143 {
2144         int error = 0;
2145         nvlist_t *nv;
2146 
2147         dmu_objset_fast_stat(os, &zc->zc_objset_stats);
2148 
2149         if (zc->zc_nvlist_dst != 0 &&
2150             (error = dsl_prop_get_all(os, &nv)) == 0) {
2151                 dmu_objset_stats(os, nv);
2152                 /*
2153                  * NB: zvol_get_stats() will read the objset contents,
2154                  * which we aren't supposed to do with a
2155                  * DS_MODE_USER hold, because it could be
2156                  * inconsistent.  So this is a bit of a workaround...
2157                  * XXX reading with out owning
2158                  */
2159                 if (!zc->zc_objset_stats.dds_inconsistent &&
2160                     dmu_objset_type(os) == DMU_OST_ZVOL) {
2161                         error = zvol_get_stats(os, nv);
2162                         if (error == EIO)
2163                                 return (error);
2164                         VERIFY0(error);
2165                 }
2166                 error = put_nvlist(zc, nv);
2167                 nvlist_free(nv);
2168         }
2169 
2170         return (error);
2171 }
2172 
2173 /*
2174  * inputs:
2175  * zc_name              name of filesystem
2176  * zc_nvlist_dst_size   size of buffer for property nvlist
2177  *
2178  * outputs:
2179  * zc_objset_stats      stats
2180  * zc_nvlist_dst        property nvlist
2181  * zc_nvlist_dst_size   size of property nvlist
2182  */
2183 static int
2184 zfs_ioc_objset_stats(zfs_cmd_t *zc)
2185 {
2186         objset_t *os;
2187         int error;
2188 
2189         error = dmu_objset_hold(zc->zc_name, FTAG, &os);
2190         if (error == 0) {
2191                 error = zfs_ioc_objset_stats_impl(zc, os);
2192                 dmu_objset_rele(os, FTAG);
2193         }
2194 
2195         return (error);
2196 }
2197 
2198 /*
2199  * inputs:
2200  * zc_name              name of filesystem
2201  * zc_nvlist_dst_size   size of buffer for property nvlist
2202  *
2203  * outputs:
2204  * zc_nvlist_dst        received property nvlist
2205  * zc_nvlist_dst_size   size of received property nvlist
2206  *
2207  * Gets received properties (distinct from local properties on or after
2208  * SPA_VERSION_RECVD_PROPS) for callers who want to differentiate received from
2209  * local property values.
2210  */
2211 static int
2212 zfs_ioc_objset_recvd_props(zfs_cmd_t *zc)
2213 {
2214         int error = 0;
2215         nvlist_t *nv;
2216 
2217         /*
2218          * Without this check, we would return local property values if the
2219          * caller has not already received properties on or after
2220          * SPA_VERSION_RECVD_PROPS.
2221          */
2222         if (!dsl_prop_get_hasrecvd(zc->zc_name))
2223                 return (SET_ERROR(ENOTSUP));
2224 
2225         if (zc->zc_nvlist_dst != 0 &&
2226             (error = dsl_prop_get_received(zc->zc_name, &nv)) == 0) {
2227                 error = put_nvlist(zc, nv);
2228                 nvlist_free(nv);
2229         }
2230 
2231         return (error);
2232 }
2233 
2234 static int
2235 nvl_add_zplprop(objset_t *os, nvlist_t *props, zfs_prop_t prop)
2236 {
2237         uint64_t value;
2238         int error;
2239 
2240         /*
2241          * zfs_get_zplprop() will either find a value or give us
2242          * the default value (if there is one).
2243          */
2244         if ((error = zfs_get_zplprop(os, prop, &value)) != 0)
2245                 return (error);
2246         VERIFY(nvlist_add_uint64(props, zfs_prop_to_name(prop), value) == 0);
2247         return (0);
2248 }
2249 
2250 /*
2251  * inputs:
2252  * zc_name              name of filesystem
2253  * zc_nvlist_dst_size   size of buffer for zpl property nvlist
2254  *
2255  * outputs:
2256  * zc_nvlist_dst        zpl property nvlist
2257  * zc_nvlist_dst_size   size of zpl property nvlist
2258  */
2259 static int
2260 zfs_ioc_objset_zplprops(zfs_cmd_t *zc)
2261 {
2262         objset_t *os;
2263         int err;
2264 
2265         /* XXX reading without owning */
2266         if (err = dmu_objset_hold(zc->zc_name, FTAG, &os))
2267                 return (err);
2268 
2269         dmu_objset_fast_stat(os, &zc->zc_objset_stats);
2270 
2271         /*
2272          * NB: nvl_add_zplprop() will read the objset contents,
2273          * which we aren't supposed to do with a DS_MODE_USER
2274          * hold, because it could be inconsistent.
2275          */
2276         if (zc->zc_nvlist_dst != 0 &&
2277             !zc->zc_objset_stats.dds_inconsistent &&
2278             dmu_objset_type(os) == DMU_OST_ZFS) {
2279                 nvlist_t *nv;
2280 
2281                 VERIFY(nvlist_alloc(&nv, NV_UNIQUE_NAME, KM_SLEEP) == 0);
2282                 if ((err = nvl_add_zplprop(os, nv, ZFS_PROP_VERSION)) == 0 &&
2283                     (err = nvl_add_zplprop(os, nv, ZFS_PROP_NORMALIZE)) == 0 &&
2284                     (err = nvl_add_zplprop(os, nv, ZFS_PROP_UTF8ONLY)) == 0 &&
2285                     (err = nvl_add_zplprop(os, nv, ZFS_PROP_CASE)) == 0)
2286                         err = put_nvlist(zc, nv);
2287                 nvlist_free(nv);
2288         } else {
2289                 err = SET_ERROR(ENOENT);
2290         }
2291         dmu_objset_rele(os, FTAG);
2292         return (err);
2293 }
2294 
2295 static boolean_t
2296 dataset_name_hidden(const char *name)
2297 {
2298         /*
2299          * Skip over datasets that are not visible in this zone,
2300          * internal datasets (which have a $ in their name), and
2301          * temporary datasets (which have a % in their name).
2302          */
2303         if (strchr(name, '$') != NULL)
2304                 return (B_TRUE);
2305         if (strchr(name, '%') != NULL)
2306                 return (B_TRUE);
2307         if (!INGLOBALZONE(curproc) && !zone_dataset_visible(name, NULL))
2308                 return (B_TRUE);
2309         return (B_FALSE);
2310 }
2311 
2312 /*
2313  * inputs:
2314  * zc_name              name of filesystem
2315  * zc_cookie            zap cursor
2316  * zc_nvlist_dst_size   size of buffer for property nvlist
2317  *
2318  * outputs:
2319  * zc_name              name of next filesystem
2320  * zc_cookie            zap cursor
2321  * zc_objset_stats      stats
2322  * zc_nvlist_dst        property nvlist
2323  * zc_nvlist_dst_size   size of property nvlist
2324  */
2325 static int
2326 zfs_ioc_dataset_list_next(zfs_cmd_t *zc)
2327 {
2328         objset_t *os;
2329         int error;
2330         char *p;
2331         size_t orig_len = strlen(zc->zc_name);
2332 
2333 top:
2334         if (error = dmu_objset_hold(zc->zc_name, FTAG, &os)) {
2335                 if (error == ENOENT)
2336                         error = SET_ERROR(ESRCH);
2337                 return (error);
2338         }
2339 
2340         p = strrchr(zc->zc_name, '/');
2341         if (p == NULL || p[1] != '\0')
2342                 (void) strlcat(zc->zc_name, "/", sizeof (zc->zc_name));
2343         p = zc->zc_name + strlen(zc->zc_name);
2344 
2345         do {
2346                 error = dmu_dir_list_next(os,
2347                     sizeof (zc->zc_name) - (p - zc->zc_name), p,
2348                     NULL, &zc->zc_cookie);
2349                 if (error == ENOENT)
2350                         error = SET_ERROR(ESRCH);
2351         } while (error == 0 && dataset_name_hidden(zc->zc_name));
2352         dmu_objset_rele(os, FTAG);
2353 
2354         /*
2355          * If it's an internal dataset (ie. with a '$' in its name),
2356          * don't try to get stats for it, otherwise we'll return ENOENT.
2357          */
2358         if (error == 0 && strchr(zc->zc_name, '$') == NULL) {
2359                 error = zfs_ioc_objset_stats(zc); /* fill in the stats */
2360                 if (error == ENOENT) {
2361                         /* We lost a race with destroy, get the next one. */
2362                         zc->zc_name[orig_len] = '\0';
2363                         goto top;
2364                 }
2365         }
2366         return (error);
2367 }
2368 
2369 /*
2370  * inputs:
2371  * zc_name              name of filesystem
2372  * zc_cookie            zap cursor
2373  * zc_nvlist_dst_size   size of buffer for property nvlist
2374  * zc_simple            when set, only name is requested
2375  *
2376  * outputs:
2377  * zc_name              name of next snapshot
2378  * zc_objset_stats      stats
2379  * zc_nvlist_dst        property nvlist
2380  * zc_nvlist_dst_size   size of property nvlist
2381  */
2382 static int
2383 zfs_ioc_snapshot_list_next(zfs_cmd_t *zc)
2384 {
2385         objset_t *os;
2386         int error;
2387 
2388         error = dmu_objset_hold(zc->zc_name, FTAG, &os);
2389         if (error != 0) {
2390                 return (error == ENOENT ? ESRCH : error);
2391         }
2392 
2393         /*
2394          * A dataset name of maximum length cannot have any snapshots,
2395          * so exit immediately.
2396          */
2397         if (strlcat(zc->zc_name, "@", sizeof (zc->zc_name)) >=
2398             ZFS_MAX_DATASET_NAME_LEN) {
2399                 dmu_objset_rele(os, FTAG);
2400                 return (SET_ERROR(ESRCH));
2401         }
2402 
2403         error = dmu_snapshot_list_next(os,
2404             sizeof (zc->zc_name) - strlen(zc->zc_name),
2405             zc->zc_name + strlen(zc->zc_name), &zc->zc_obj, &zc->zc_cookie,
2406             NULL);
2407 
2408         if (error == 0 && !zc->zc_simple) {
2409                 dsl_dataset_t *ds;
2410                 dsl_pool_t *dp = os->os_dsl_dataset->ds_dir->dd_pool;
2411 
2412                 error = dsl_dataset_hold_obj(dp, zc->zc_obj, FTAG, &ds);
2413                 if (error == 0) {
2414                         objset_t *ossnap;
2415 
2416                         error = dmu_objset_from_ds(ds, &ossnap);
2417                         if (error == 0)
2418                                 error = zfs_ioc_objset_stats_impl(zc, ossnap);
2419                         dsl_dataset_rele(ds, FTAG);
2420                 }
2421         } else if (error == ENOENT) {
2422                 error = SET_ERROR(ESRCH);
2423         }
2424 
2425         dmu_objset_rele(os, FTAG);
2426         /* if we failed, undo the @ that we tacked on to zc_name */
2427         if (error != 0)
2428                 *strchr(zc->zc_name, '@') = '\0';
2429         return (error);
2430 }
2431 
2432 static int
2433 zfs_prop_set_userquota(const char *dsname, nvpair_t *pair)
2434 {
2435         const char *propname = nvpair_name(pair);
2436         uint64_t *valary;
2437         unsigned int vallen;
2438         const char *domain;
2439         char *dash;
2440         zfs_userquota_prop_t type;
2441         uint64_t rid;
2442         uint64_t quota;
2443         zfsvfs_t *zfsvfs;
2444         int err;
2445 
2446         if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
2447                 nvlist_t *attrs;
2448                 VERIFY(nvpair_value_nvlist(pair, &attrs) == 0);
2449                 if (nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
2450                     &pair) != 0)
2451                         return (SET_ERROR(EINVAL));
2452         }
2453 
2454         /*
2455          * A correctly constructed propname is encoded as
2456          * userquota@<rid>-<domain>.
2457          */
2458         if ((dash = strchr(propname, '-')) == NULL ||
2459             nvpair_value_uint64_array(pair, &valary, &vallen) != 0 ||
2460             vallen != 3)
2461                 return (SET_ERROR(EINVAL));
2462 
2463         domain = dash + 1;
2464         type = valary[0];
2465         rid = valary[1];
2466         quota = valary[2];
2467 
2468         err = zfsvfs_hold(dsname, FTAG, &zfsvfs, B_FALSE);
2469         if (err == 0) {
2470                 err = zfs_set_userquota(zfsvfs, type, domain, rid, quota);
2471                 zfsvfs_rele(zfsvfs, FTAG);
2472         }
2473 
2474         return (err);
2475 }
2476 
2477 /*
2478  * If the named property is one that has a special function to set its value,
2479  * return 0 on success and a positive error code on failure; otherwise if it is
2480  * not one of the special properties handled by this function, return -1.
2481  *
2482  * XXX: It would be better for callers of the property interface if we handled
2483  * these special cases in dsl_prop.c (in the dsl layer).
2484  */
2485 static int
2486 zfs_prop_set_special(const char *dsname, zprop_source_t source,
2487     nvpair_t *pair)
2488 {
2489         const char *propname = nvpair_name(pair);
2490         zfs_prop_t prop = zfs_name_to_prop(propname);
2491         uint64_t intval = 0;
2492         char *strval = NULL;
2493         int err = -1;
2494 
2495         if (prop == ZPROP_INVAL) {
2496                 if (zfs_prop_userquota(propname))
2497                         return (zfs_prop_set_userquota(dsname, pair));
2498                 return (-1);
2499         }
2500 
2501         if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
2502                 nvlist_t *attrs;
2503                 VERIFY(nvpair_value_nvlist(pair, &attrs) == 0);
2504                 VERIFY(nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
2505                     &pair) == 0);
2506         }
2507 
2508         /* all special properties are numeric except for keylocation */
2509         if (zfs_prop_get_type(prop) == PROP_TYPE_STRING) {
2510                 strval = fnvpair_value_string(pair);
2511         } else {
2512                 intval = fnvpair_value_uint64(pair);
2513         }
2514 
2515         switch (prop) {
2516         case ZFS_PROP_QUOTA:
2517                 err = dsl_dir_set_quota(dsname, source, intval);
2518                 break;
2519         case ZFS_PROP_REFQUOTA:
2520                 err = dsl_dataset_set_refquota(dsname, source, intval);
2521                 break;
2522         case ZFS_PROP_FILESYSTEM_LIMIT:
2523         case ZFS_PROP_SNAPSHOT_LIMIT:
2524                 if (intval == UINT64_MAX) {
2525                         /* clearing the limit, just do it */
2526                         err = 0;
2527                 } else {
2528                         err = dsl_dir_activate_fs_ss_limit(dsname);
2529                 }
2530                 /*
2531                  * Set err to -1 to force the zfs_set_prop_nvlist code down the
2532                  * default path to set the value in the nvlist.
2533                  */
2534                 if (err == 0)
2535                         err = -1;
2536                 break;
2537         case ZFS_PROP_KEYLOCATION:
2538                 err = dsl_crypto_can_set_keylocation(dsname, strval);
2539 
2540                 /*
2541                  * Set err to -1 to force the zfs_set_prop_nvlist code down the
2542                  * default path to set the value in the nvlist.
2543                  */
2544                 if (err == 0)
2545                         err = -1;
2546                 break;
2547         case ZFS_PROP_RESERVATION:
2548                 err = dsl_dir_set_reservation(dsname, source, intval);
2549                 break;
2550         case ZFS_PROP_REFRESERVATION:
2551                 err = dsl_dataset_set_refreservation(dsname, source, intval);
2552                 break;
2553         case ZFS_PROP_VOLSIZE:
2554                 err = zvol_set_volsize(dsname, intval);
2555                 break;
2556         case ZFS_PROP_VERSION:
2557         {
2558                 zfsvfs_t *zfsvfs;
2559 
2560                 if ((err = zfsvfs_hold(dsname, FTAG, &zfsvfs, B_TRUE)) != 0)
2561                         break;
2562 
2563                 err = zfs_set_version(zfsvfs, intval);
2564                 zfsvfs_rele(zfsvfs, FTAG);
2565 
2566                 if (err == 0 && intval >= ZPL_VERSION_USERSPACE) {
2567                         zfs_cmd_t *zc;
2568 
2569                         zc = kmem_zalloc(sizeof (zfs_cmd_t), KM_SLEEP);
2570                         (void) strcpy(zc->zc_name, dsname);
2571                         (void) zfs_ioc_userspace_upgrade(zc);
2572                         (void) zfs_ioc_id_quota_upgrade(zc);
2573                         kmem_free(zc, sizeof (zfs_cmd_t));
2574                 }
2575                 break;
2576         }
2577         default:
2578                 err = -1;
2579         }
2580 
2581         return (err);
2582 }
2583 
2584 /*
2585  * This function is best effort. If it fails to set any of the given properties,
2586  * it continues to set as many as it can and returns the last error
2587  * encountered. If the caller provides a non-NULL errlist, it will be filled in
2588  * with the list of names of all the properties that failed along with the
2589  * corresponding error numbers.
2590  *
2591  * If every property is set successfully, zero is returned and errlist is not
2592  * modified.
2593  */
2594 int
2595 zfs_set_prop_nvlist(const char *dsname, zprop_source_t source, nvlist_t *nvl,
2596     nvlist_t *errlist)
2597 {
2598         nvpair_t *pair;
2599         nvpair_t *propval;
2600         int rv = 0;
2601         uint64_t intval;
2602         char *strval;
2603         nvlist_t *genericnvl = fnvlist_alloc();
2604         nvlist_t *retrynvl = fnvlist_alloc();
2605 
2606 retry:
2607         pair = NULL;
2608         while ((pair = nvlist_next_nvpair(nvl, pair)) != NULL) {
2609                 const char *propname = nvpair_name(pair);
2610                 zfs_prop_t prop = zfs_name_to_prop(propname);
2611                 int err = 0;
2612 
2613                 /* decode the property value */
2614                 propval = pair;
2615                 if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
2616                         nvlist_t *attrs;
2617                         attrs = fnvpair_value_nvlist(pair);
2618                         if (nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
2619                             &propval) != 0)
2620                                 err = SET_ERROR(EINVAL);
2621                 }
2622 
2623                 /* Validate value type */
2624                 if (err == 0 && source == ZPROP_SRC_INHERITED) {
2625                         /* inherited properties are expected to be booleans */
2626                         if (nvpair_type(propval) != DATA_TYPE_BOOLEAN)
2627                                 err = SET_ERROR(EINVAL);
2628                 } else if (err == 0 && prop == ZPROP_INVAL) {
2629                         if (zfs_prop_user(propname)) {
2630                                 if (nvpair_type(propval) != DATA_TYPE_STRING)
2631                                         err = SET_ERROR(EINVAL);
2632                         } else if (zfs_prop_userquota(propname)) {
2633                                 if (nvpair_type(propval) !=
2634                                     DATA_TYPE_UINT64_ARRAY)
2635                                         err = SET_ERROR(EINVAL);
2636                         } else {
2637                                 err = SET_ERROR(EINVAL);
2638                         }
2639                 } else if (err == 0) {
2640                         if (nvpair_type(propval) == DATA_TYPE_STRING) {
2641                                 if (zfs_prop_get_type(prop) != PROP_TYPE_STRING)
2642                                         err = SET_ERROR(EINVAL);
2643                         } else if (nvpair_type(propval) == DATA_TYPE_UINT64) {
2644                                 const char *unused;
2645 
2646                                 intval = fnvpair_value_uint64(propval);
2647 
2648                                 switch (zfs_prop_get_type(prop)) {
2649                                 case PROP_TYPE_NUMBER:
2650                                         break;
2651                                 case PROP_TYPE_STRING:
2652                                         err = SET_ERROR(EINVAL);
2653                                         break;
2654                                 case PROP_TYPE_INDEX:
2655                                         if (zfs_prop_index_to_string(prop,
2656                                             intval, &unused) != 0)
2657                                                 err = SET_ERROR(EINVAL);
2658                                         break;
2659                                 default:
2660                                         cmn_err(CE_PANIC,
2661                                             "unknown property type");
2662                                 }
2663                         } else {
2664                                 err = SET_ERROR(EINVAL);
2665                         }
2666                 }
2667 
2668                 /* Validate permissions */
2669                 if (err == 0)
2670                         err = zfs_check_settable(dsname, pair, CRED());
2671 
2672                 if (err == 0) {
2673                         if (source == ZPROP_SRC_INHERITED)
2674                                 err = -1; /* does not need special handling */
2675                         else
2676                                 err = zfs_prop_set_special(dsname, source,
2677                                     pair);
2678                         if (err == -1) {
2679                                 /*
2680                                  * For better performance we build up a list of
2681                                  * properties to set in a single transaction.
2682                                  */
2683                                 err = nvlist_add_nvpair(genericnvl, pair);
2684                         } else if (err != 0 && nvl != retrynvl) {
2685                                 /*
2686                                  * This may be a spurious error caused by
2687                                  * receiving quota and reservation out of order.
2688                                  * Try again in a second pass.
2689                                  */
2690                                 err = nvlist_add_nvpair(retrynvl, pair);
2691                         }
2692                 }
2693 
2694                 if (err != 0) {
2695                         if (errlist != NULL)
2696                                 fnvlist_add_int32(errlist, propname, err);
2697                         rv = err;
2698                 }
2699         }
2700 
2701         if (nvl != retrynvl && !nvlist_empty(retrynvl)) {
2702                 nvl = retrynvl;
2703                 goto retry;
2704         }
2705 
2706         if (!nvlist_empty(genericnvl) &&
2707             dsl_props_set(dsname, source, genericnvl) != 0) {
2708                 /*
2709                  * If this fails, we still want to set as many properties as we
2710                  * can, so try setting them individually.
2711                  */
2712                 pair = NULL;
2713                 while ((pair = nvlist_next_nvpair(genericnvl, pair)) != NULL) {
2714                         const char *propname = nvpair_name(pair);
2715                         int err = 0;
2716 
2717                         propval = pair;
2718                         if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
2719                                 nvlist_t *attrs;
2720                                 attrs = fnvpair_value_nvlist(pair);
2721                                 propval = fnvlist_lookup_nvpair(attrs,
2722                                     ZPROP_VALUE);
2723                         }
2724 
2725                         if (nvpair_type(propval) == DATA_TYPE_STRING) {
2726                                 strval = fnvpair_value_string(propval);
2727                                 err = dsl_prop_set_string(dsname, propname,
2728                                     source, strval);
2729                         } else if (nvpair_type(propval) == DATA_TYPE_BOOLEAN) {
2730                                 err = dsl_prop_inherit(dsname, propname,
2731                                     source);
2732                         } else {
2733                                 intval = fnvpair_value_uint64(propval);
2734                                 err = dsl_prop_set_int(dsname, propname, source,
2735                                     intval);
2736                         }
2737 
2738                         if (err != 0) {
2739                                 if (errlist != NULL) {
2740                                         fnvlist_add_int32(errlist, propname,
2741                                             err);
2742                                 }
2743                                 rv = err;
2744                         }
2745                 }
2746         }
2747         nvlist_free(genericnvl);
2748         nvlist_free(retrynvl);
2749 
2750         return (rv);
2751 }
2752 
2753 /*
2754  * Check that all the properties are valid user properties.
2755  */
2756 static int
2757 zfs_check_userprops(const char *fsname, nvlist_t *nvl)
2758 {
2759         nvpair_t *pair = NULL;
2760         int error = 0;
2761 
2762         while ((pair = nvlist_next_nvpair(nvl, pair)) != NULL) {
2763                 const char *propname = nvpair_name(pair);
2764 
2765                 if (!zfs_prop_user(propname) ||
2766                     nvpair_type(pair) != DATA_TYPE_STRING)
2767                         return (SET_ERROR(EINVAL));
2768 
2769                 if (error = zfs_secpolicy_write_perms(fsname,
2770                     ZFS_DELEG_PERM_USERPROP, CRED()))
2771                         return (error);
2772 
2773                 if (strlen(propname) >= ZAP_MAXNAMELEN)
2774                         return (SET_ERROR(ENAMETOOLONG));
2775 
2776                 if (strlen(fnvpair_value_string(pair)) >= ZAP_MAXVALUELEN)
2777                         return (E2BIG);
2778         }
2779         return (0);
2780 }
2781 
2782 static void
2783 props_skip(nvlist_t *props, nvlist_t *skipped, nvlist_t **newprops)
2784 {
2785         nvpair_t *pair;
2786 
2787         VERIFY(nvlist_alloc(newprops, NV_UNIQUE_NAME, KM_SLEEP) == 0);
2788 
2789         pair = NULL;
2790         while ((pair = nvlist_next_nvpair(props, pair)) != NULL) {
2791                 if (nvlist_exists(skipped, nvpair_name(pair)))
2792                         continue;
2793 
2794                 VERIFY(nvlist_add_nvpair(*newprops, pair) == 0);
2795         }
2796 }
2797 
2798 static int
2799 clear_received_props(const char *dsname, nvlist_t *props,
2800     nvlist_t *skipped)
2801 {
2802         int err = 0;
2803         nvlist_t *cleared_props = NULL;
2804         props_skip(props, skipped, &cleared_props);
2805         if (!nvlist_empty(cleared_props)) {
2806                 /*
2807                  * Acts on local properties until the dataset has received
2808                  * properties at least once on or after SPA_VERSION_RECVD_PROPS.
2809                  */
2810                 zprop_source_t flags = (ZPROP_SRC_NONE |
2811                     (dsl_prop_get_hasrecvd(dsname) ? ZPROP_SRC_RECEIVED : 0));
2812                 err = zfs_set_prop_nvlist(dsname, flags, cleared_props, NULL);
2813         }
2814         nvlist_free(cleared_props);
2815         return (err);
2816 }
2817 
2818 /*
2819  * inputs:
2820  * zc_name              name of filesystem
2821  * zc_value             name of property to set
2822  * zc_nvlist_src{_size} nvlist of properties to apply
2823  * zc_cookie            received properties flag
2824  *
2825  * outputs:
2826  * zc_nvlist_dst{_size} error for each unapplied received property
2827  */
2828 static int
2829 zfs_ioc_set_prop(zfs_cmd_t *zc)
2830 {
2831         nvlist_t *nvl;
2832         boolean_t received = zc->zc_cookie;
2833         zprop_source_t source = (received ? ZPROP_SRC_RECEIVED :
2834             ZPROP_SRC_LOCAL);
2835         nvlist_t *errors;
2836         int error;
2837 
2838         if ((error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
2839             zc->zc_iflags, &nvl)) != 0)
2840                 return (error);
2841 
2842         if (received) {
2843                 nvlist_t *origprops;
2844 
2845                 if (dsl_prop_get_received(zc->zc_name, &origprops) == 0) {
2846                         (void) clear_received_props(zc->zc_name,
2847                             origprops, nvl);
2848                         nvlist_free(origprops);
2849                 }
2850 
2851                 error = dsl_prop_set_hasrecvd(zc->zc_name);
2852         }
2853 
2854         errors = fnvlist_alloc();
2855         if (error == 0)
2856                 error = zfs_set_prop_nvlist(zc->zc_name, source, nvl, errors);
2857 
2858         if (zc->zc_nvlist_dst != 0 && errors != NULL) {
2859                 (void) put_nvlist(zc, errors);
2860         }
2861 
2862         nvlist_free(errors);
2863         nvlist_free(nvl);
2864         return (error);
2865 }
2866 
2867 /*
2868  * inputs:
2869  * zc_name              name of filesystem
2870  * zc_value             name of property to inherit
2871  * zc_cookie            revert to received value if TRUE
2872  *
2873  * outputs:             none
2874  */
2875 static int
2876 zfs_ioc_inherit_prop(zfs_cmd_t *zc)
2877 {
2878         const char *propname = zc->zc_value;
2879         zfs_prop_t prop = zfs_name_to_prop(propname);
2880         boolean_t received = zc->zc_cookie;
2881         zprop_source_t source = (received
2882             ? ZPROP_SRC_NONE            /* revert to received value, if any */
2883             : ZPROP_SRC_INHERITED);     /* explicitly inherit */
2884 
2885         if (received) {
2886                 nvlist_t *dummy;
2887                 nvpair_t *pair;
2888                 zprop_type_t type;
2889                 int err;
2890 
2891                 /*
2892                  * zfs_prop_set_special() expects properties in the form of an
2893                  * nvpair with type info.
2894                  */
2895                 if (prop == ZPROP_INVAL) {
2896                         if (!zfs_prop_user(propname))
2897                                 return (SET_ERROR(EINVAL));
2898 
2899                         type = PROP_TYPE_STRING;
2900                 } else if (prop == ZFS_PROP_VOLSIZE ||
2901                     prop == ZFS_PROP_VERSION) {
2902                         return (SET_ERROR(EINVAL));
2903                 } else {
2904                         type = zfs_prop_get_type(prop);
2905                 }
2906 
2907                 VERIFY(nvlist_alloc(&dummy, NV_UNIQUE_NAME, KM_SLEEP) == 0);
2908 
2909                 switch (type) {
2910                 case PROP_TYPE_STRING:
2911                         VERIFY(0 == nvlist_add_string(dummy, propname, ""));
2912                         break;
2913                 case PROP_TYPE_NUMBER:
2914                 case PROP_TYPE_INDEX:
2915                         VERIFY(0 == nvlist_add_uint64(dummy, propname, 0));
2916                         break;
2917                 default:
2918                         nvlist_free(dummy);
2919                         return (SET_ERROR(EINVAL));
2920                 }
2921 
2922                 pair = nvlist_next_nvpair(dummy, NULL);
2923                 err = zfs_prop_set_special(zc->zc_name, source, pair);
2924                 nvlist_free(dummy);
2925                 if (err != -1)
2926                         return (err); /* special property already handled */
2927         } else {
2928                 /*
2929                  * Only check this in the non-received case. We want to allow
2930                  * 'inherit -S' to revert non-inheritable properties like quota
2931                  * and reservation to the received or default values even though
2932                  * they are not considered inheritable.
2933                  */
2934                 if (prop != ZPROP_INVAL && !zfs_prop_inheritable(prop))
2935                         return (SET_ERROR(EINVAL));
2936         }
2937 
2938         /* property name has been validated by zfs_secpolicy_inherit_prop() */
2939         return (dsl_prop_inherit(zc->zc_name, zc->zc_value, source));
2940 }
2941 
2942 static int
2943 zfs_ioc_pool_set_props(zfs_cmd_t *zc)
2944 {
2945         nvlist_t *props;
2946         spa_t *spa;
2947         int error;
2948         nvpair_t *pair;
2949 
2950         if (error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
2951             zc->zc_iflags, &props))
2952                 return (error);
2953 
2954         /*
2955          * If the only property is the configfile, then just do a spa_lookup()
2956          * to handle the faulted case.
2957          */
2958         pair = nvlist_next_nvpair(props, NULL);
2959         if (pair != NULL && strcmp(nvpair_name(pair),
2960             zpool_prop_to_name(ZPOOL_PROP_CACHEFILE)) == 0 &&
2961             nvlist_next_nvpair(props, pair) == NULL) {
2962                 mutex_enter(&spa_namespace_lock);
2963                 if ((spa = spa_lookup(zc->zc_name)) != NULL) {
2964                         spa_configfile_set(spa, props, B_FALSE);
2965                         spa_write_cachefile(spa, B_FALSE, B_TRUE);
2966                 }
2967                 mutex_exit(&spa_namespace_lock);
2968                 if (spa != NULL) {
2969                         nvlist_free(props);
2970                         return (0);
2971                 }
2972         }
2973 
2974         if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0) {
2975                 nvlist_free(props);
2976                 return (error);
2977         }
2978 
2979         error = spa_prop_set(spa, props);
2980 
2981         nvlist_free(props);
2982         spa_close(spa, FTAG);
2983 
2984         return (error);
2985 }
2986 
2987 static int
2988 zfs_ioc_pool_get_props(zfs_cmd_t *zc)
2989 {
2990         spa_t *spa;
2991         int error;
2992         nvlist_t *nvp = NULL;
2993 
2994         if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0) {
2995                 /*
2996                  * If the pool is faulted, there may be properties we can still
2997                  * get (such as altroot and cachefile), so attempt to get them
2998                  * anyway.
2999                  */
3000                 mutex_enter(&spa_namespace_lock);
3001                 if ((spa = spa_lookup(zc->zc_name)) != NULL)
3002                         error = spa_prop_get(spa, &nvp);
3003                 mutex_exit(&spa_namespace_lock);
3004         } else {
3005                 error = spa_prop_get(spa, &nvp);
3006                 spa_close(spa, FTAG);
3007         }
3008 
3009         if (error == 0 && zc->zc_nvlist_dst != 0)
3010                 error = put_nvlist(zc, nvp);
3011         else
3012                 error = SET_ERROR(EFAULT);
3013 
3014         nvlist_free(nvp);
3015         return (error);
3016 }
3017 
3018 /*
3019  * inputs:
3020  * zc_name              name of filesystem
3021  * zc_nvlist_src{_size} nvlist of delegated permissions
3022  * zc_perm_action       allow/unallow flag
3023  *
3024  * outputs:             none
3025  */
3026 static int
3027 zfs_ioc_set_fsacl(zfs_cmd_t *zc)
3028 {
3029         int error;
3030         nvlist_t *fsaclnv = NULL;
3031 
3032         if ((error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
3033             zc->zc_iflags, &fsaclnv)) != 0)
3034                 return (error);
3035 
3036         /*
3037          * Verify nvlist is constructed correctly
3038          */
3039         if ((error = zfs_deleg_verify_nvlist(fsaclnv)) != 0) {
3040                 nvlist_free(fsaclnv);
3041                 return (SET_ERROR(EINVAL));
3042         }
3043 
3044         /*
3045          * If we don't have PRIV_SYS_MOUNT, then validate
3046          * that user is allowed to hand out each permission in
3047          * the nvlist(s)
3048          */
3049 
3050         error = secpolicy_zfs(CRED());
3051         if (error != 0) {
3052                 if (zc->zc_perm_action == B_FALSE) {
3053                         error = dsl_deleg_can_allow(zc->zc_name,
3054                             fsaclnv, CRED());
3055                 } else {
3056                         error = dsl_deleg_can_unallow(zc->zc_name,
3057                             fsaclnv, CRED());
3058                 }
3059         }
3060 
3061         if (error == 0)
3062                 error = dsl_deleg_set(zc->zc_name, fsaclnv, zc->zc_perm_action);
3063 
3064         nvlist_free(fsaclnv);
3065         return (error);
3066 }
3067 
3068 /*
3069  * inputs:
3070  * zc_name              name of filesystem
3071  *
3072  * outputs:
3073  * zc_nvlist_src{_size} nvlist of delegated permissions
3074  */
3075 static int
3076 zfs_ioc_get_fsacl(zfs_cmd_t *zc)
3077 {
3078         nvlist_t *nvp;
3079         int error;
3080 
3081         if ((error = dsl_deleg_get(zc->zc_name, &nvp)) == 0) {
3082                 error = put_nvlist(zc, nvp);
3083                 nvlist_free(nvp);
3084         }
3085 
3086         return (error);
3087 }
3088 
3089 /* ARGSUSED */
3090 static void
3091 zfs_create_cb(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx)
3092 {
3093         zfs_creat_t *zct = arg;
3094 
3095         zfs_create_fs(os, cr, zct->zct_zplprops, tx);
3096 }
3097 
3098 #define ZFS_PROP_UNDEFINED      ((uint64_t)-1)
3099 
3100 /*
3101  * inputs:
3102  * os                   parent objset pointer (NULL if root fs)
3103  * fuids_ok             fuids allowed in this version of the spa?
3104  * sa_ok                SAs allowed in this version of the spa?
3105  * createprops          list of properties requested by creator
3106  *
3107  * outputs:
3108  * zplprops     values for the zplprops we attach to the master node object
3109  * is_ci        true if requested file system will be purely case-insensitive
3110  *
3111  * Determine the settings for utf8only, normalization and
3112  * casesensitivity.  Specific values may have been requested by the
3113  * creator and/or we can inherit values from the parent dataset.  If
3114  * the file system is of too early a vintage, a creator can not
3115  * request settings for these properties, even if the requested
3116  * setting is the default value.  We don't actually want to create dsl
3117  * properties for these, so remove them from the source nvlist after
3118  * processing.
3119  */
3120 static int
3121 zfs_fill_zplprops_impl(objset_t *os, uint64_t zplver,
3122     boolean_t fuids_ok, boolean_t sa_ok, nvlist_t *createprops,
3123     nvlist_t *zplprops, boolean_t *is_ci)
3124 {
3125         uint64_t sense = ZFS_PROP_UNDEFINED;
3126         uint64_t norm = ZFS_PROP_UNDEFINED;
3127         uint64_t u8 = ZFS_PROP_UNDEFINED;
3128 
3129         ASSERT(zplprops != NULL);
3130 
3131         if (os != NULL && os->os_phys->os_type != DMU_OST_ZFS)
3132                 return (SET_ERROR(EINVAL));
3133 
3134         /*
3135          * Pull out creator prop choices, if any.
3136          */
3137         if (createprops) {
3138                 (void) nvlist_lookup_uint64(createprops,
3139                     zfs_prop_to_name(ZFS_PROP_VERSION), &zplver);
3140                 (void) nvlist_lookup_uint64(createprops,
3141                     zfs_prop_to_name(ZFS_PROP_NORMALIZE), &norm);
3142                 (void) nvlist_remove_all(createprops,
3143                     zfs_prop_to_name(ZFS_PROP_NORMALIZE));
3144                 (void) nvlist_lookup_uint64(createprops,
3145                     zfs_prop_to_name(ZFS_PROP_UTF8ONLY), &u8);
3146                 (void) nvlist_remove_all(createprops,
3147                     zfs_prop_to_name(ZFS_PROP_UTF8ONLY));
3148                 (void) nvlist_lookup_uint64(createprops,
3149                     zfs_prop_to_name(ZFS_PROP_CASE), &sense);
3150                 (void) nvlist_remove_all(createprops,
3151                     zfs_prop_to_name(ZFS_PROP_CASE));
3152         }
3153 
3154         /*
3155          * If the zpl version requested is whacky or the file system
3156          * or pool is version is too "young" to support normalization
3157          * and the creator tried to set a value for one of the props,
3158          * error out.
3159          */
3160         if ((zplver < ZPL_VERSION_INITIAL || zplver > ZPL_VERSION) ||
3161             (zplver >= ZPL_VERSION_FUID && !fuids_ok) ||
3162             (zplver >= ZPL_VERSION_SA && !sa_ok) ||
3163             (zplver < ZPL_VERSION_NORMALIZATION &&
3164             (norm != ZFS_PROP_UNDEFINED || u8 != ZFS_PROP_UNDEFINED ||
3165             sense != ZFS_PROP_UNDEFINED)))
3166                 return (SET_ERROR(ENOTSUP));
3167 
3168         /*
3169          * Put the version in the zplprops
3170          */
3171         VERIFY(nvlist_add_uint64(zplprops,
3172             zfs_prop_to_name(ZFS_PROP_VERSION), zplver) == 0);
3173 
3174         if (norm == ZFS_PROP_UNDEFINED)
3175                 VERIFY(zfs_get_zplprop(os, ZFS_PROP_NORMALIZE, &norm) == 0);
3176         VERIFY(nvlist_add_uint64(zplprops,
3177             zfs_prop_to_name(ZFS_PROP_NORMALIZE), norm) == 0);
3178 
3179         /*
3180          * If we're normalizing, names must always be valid UTF-8 strings.
3181          */
3182         if (norm)
3183                 u8 = 1;
3184         if (u8 == ZFS_PROP_UNDEFINED)
3185                 VERIFY(zfs_get_zplprop(os, ZFS_PROP_UTF8ONLY, &u8) == 0);
3186         VERIFY(nvlist_add_uint64(zplprops,
3187             zfs_prop_to_name(ZFS_PROP_UTF8ONLY), u8) == 0);
3188 
3189         if (sense == ZFS_PROP_UNDEFINED)
3190                 VERIFY(zfs_get_zplprop(os, ZFS_PROP_CASE, &sense) == 0);
3191         VERIFY(nvlist_add_uint64(zplprops,
3192             zfs_prop_to_name(ZFS_PROP_CASE), sense) == 0);
3193 
3194         if (is_ci)
3195                 *is_ci = (sense == ZFS_CASE_INSENSITIVE);
3196 
3197         return (0);
3198 }
3199 
3200 static int
3201 zfs_fill_zplprops(const char *dataset, nvlist_t *createprops,
3202     nvlist_t *zplprops, boolean_t *is_ci)
3203 {
3204         boolean_t fuids_ok, sa_ok;
3205         uint64_t zplver = ZPL_VERSION;
3206         objset_t *os = NULL;
3207         char parentname[ZFS_MAX_DATASET_NAME_LEN];
3208         char *cp;
3209         spa_t *spa;
3210         uint64_t spa_vers;
3211         int error;
3212 
3213         (void) strlcpy(parentname, dataset, sizeof (parentname));
3214         cp = strrchr(parentname, '/');
3215         ASSERT(cp != NULL);
3216         cp[0] = '\0';
3217 
3218         if ((error = spa_open(dataset, &spa, FTAG)) != 0)
3219                 return (error);
3220 
3221         spa_vers = spa_version(spa);
3222         spa_close(spa, FTAG);
3223 
3224         zplver = zfs_zpl_version_map(spa_vers);
3225         fuids_ok = (zplver >= ZPL_VERSION_FUID);
3226         sa_ok = (zplver >= ZPL_VERSION_SA);
3227 
3228         /*
3229          * Open parent object set so we can inherit zplprop values.
3230          */
3231         if ((error = dmu_objset_hold(parentname, FTAG, &os)) != 0)
3232                 return (error);
3233 
3234         error = zfs_fill_zplprops_impl(os, zplver, fuids_ok, sa_ok, createprops,
3235             zplprops, is_ci);
3236         dmu_objset_rele(os, FTAG);
3237         return (error);
3238 }
3239 
3240 static int
3241 zfs_fill_zplprops_root(uint64_t spa_vers, nvlist_t *createprops,
3242     nvlist_t *zplprops, boolean_t *is_ci)
3243 {
3244         boolean_t fuids_ok;
3245         boolean_t sa_ok;
3246         uint64_t zplver = ZPL_VERSION;
3247         int error;
3248 
3249         zplver = zfs_zpl_version_map(spa_vers);
3250         fuids_ok = (zplver >= ZPL_VERSION_FUID);
3251         sa_ok = (zplver >= ZPL_VERSION_SA);
3252 
3253         error = zfs_fill_zplprops_impl(NULL, zplver, fuids_ok, sa_ok,
3254             createprops, zplprops, is_ci);
3255         return (error);
3256 }
3257 
3258 /*
3259  * innvl: {
3260  *     "type" -> dmu_objset_type_t (int32)
3261  *     (optional) "props" -> { prop -> value }
3262  *     (optional) "hidden_args" -> { "wkeydata" -> value }
3263  *         raw uint8_t array of encryption wrapping key data (32 bytes)
3264  * }
3265  *
3266  * outnvl: propname -> error code (int32)
3267  */
3268 static int
3269 zfs_ioc_create(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl)
3270 {
3271         int error = 0;
3272         zfs_creat_t zct = { 0 };
3273         nvlist_t *nvprops = NULL;
3274         nvlist_t *hidden_args = NULL;
3275         void (*cbfunc)(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx);
3276         int32_t type32;
3277         dmu_objset_type_t type;
3278         boolean_t is_insensitive = B_FALSE;
3279         dsl_crypto_params_t *dcp = NULL;
3280 
3281         if (nvlist_lookup_int32(innvl, "type", &type32) != 0)
3282                 return (SET_ERROR(EINVAL));
3283         type = type32;
3284         (void) nvlist_lookup_nvlist(innvl, "props", &nvprops);
3285         (void) nvlist_lookup_nvlist(innvl, ZPOOL_HIDDEN_ARGS, &hidden_args);
3286 
3287         switch (type) {
3288         case DMU_OST_ZFS:
3289                 cbfunc = zfs_create_cb;
3290                 break;
3291 
3292         case DMU_OST_ZVOL:
3293                 cbfunc = zvol_create_cb;
3294                 break;
3295 
3296         default:
3297                 cbfunc = NULL;
3298                 break;
3299         }
3300         if (strchr(fsname, '@') ||
3301             strchr(fsname, '%'))
3302                 return (SET_ERROR(EINVAL));
3303 
3304         zct.zct_props = nvprops;
3305 
3306         if (cbfunc == NULL)
3307                 return (SET_ERROR(EINVAL));
3308 
3309         if (type == DMU_OST_ZVOL) {
3310                 uint64_t volsize, volblocksize;
3311 
3312                 if (nvprops == NULL)
3313                         return (SET_ERROR(EINVAL));
3314                 if (nvlist_lookup_uint64(nvprops,
3315                     zfs_prop_to_name(ZFS_PROP_VOLSIZE), &volsize) != 0)
3316                         return (SET_ERROR(EINVAL));
3317 
3318                 if ((error = nvlist_lookup_uint64(nvprops,
3319                     zfs_prop_to_name(ZFS_PROP_VOLBLOCKSIZE),
3320                     &volblocksize)) != 0 && error != ENOENT)
3321                         return (SET_ERROR(EINVAL));
3322 
3323                 if (error != 0)
3324                         volblocksize = zfs_prop_default_numeric(
3325                             ZFS_PROP_VOLBLOCKSIZE);
3326 
3327                 if ((error = zvol_check_volblocksize(
3328                     volblocksize)) != 0 ||
3329                     (error = zvol_check_volsize(volsize,
3330                     volblocksize)) != 0)
3331                         return (error);
3332         } else if (type == DMU_OST_ZFS) {
3333                 int error;
3334 
3335                 /*
3336                  * We have to have normalization and
3337                  * case-folding flags correct when we do the
3338                  * file system creation, so go figure them out
3339                  * now.
3340                  */
3341                 VERIFY(nvlist_alloc(&zct.zct_zplprops,
3342                     NV_UNIQUE_NAME, KM_SLEEP) == 0);
3343                 error = zfs_fill_zplprops(fsname, nvprops,
3344                     zct.zct_zplprops, &is_insensitive);
3345                 if (error != 0) {
3346                         nvlist_free(zct.zct_zplprops);
3347                         return (error);
3348                 }
3349         }
3350 
3351         error = dsl_crypto_params_create_nvlist(DCP_CMD_NONE, nvprops,
3352             hidden_args, &dcp);
3353         if (error != 0) {
3354                 nvlist_free(zct.zct_zplprops);
3355                 return (error);
3356         }
3357 
3358         error = dmu_objset_create(fsname, type,
3359             is_insensitive ? DS_FLAG_CI_DATASET : 0, dcp, cbfunc, &zct);
3360 
3361         nvlist_free(zct.zct_zplprops);
3362         dsl_crypto_params_free(dcp, !!error);
3363 
3364         /*
3365          * It would be nice to do this atomically.
3366          */
3367         if (error == 0) {
3368                 error = zfs_set_prop_nvlist(fsname, ZPROP_SRC_LOCAL,
3369                     nvprops, outnvl);
3370                 if (error != 0)
3371                         (void) dsl_destroy_head(fsname);
3372         }
3373         return (error);
3374 }
3375 
3376 /*
3377  * innvl: {
3378  *     "origin" -> name of origin snapshot
3379  *     (optional) "props" -> { prop -> value }
3380  *     (optional) "hidden_args" -> { "wkeydata" -> value }
3381  *         raw uint8_t array of encryption wrapping key data (32 bytes)
3382  * }
3383  *
3384  * outnvl: propname -> error code (int32)
3385  */
3386 static int
3387 zfs_ioc_clone(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl)
3388 {
3389         int error = 0;
3390         nvlist_t *nvprops = NULL;
3391         char *origin_name;
3392 
3393         if (nvlist_lookup_string(innvl, "origin", &origin_name) != 0)
3394                 return (SET_ERROR(EINVAL));
3395         (void) nvlist_lookup_nvlist(innvl, "props", &nvprops);
3396 
3397         if (strchr(fsname, '@') ||
3398             strchr(fsname, '%'))
3399                 return (SET_ERROR(EINVAL));
3400 
3401         if (dataset_namecheck(origin_name, NULL, NULL) != 0)
3402                 return (SET_ERROR(EINVAL));
3403 
3404         error = dmu_objset_clone(fsname, origin_name);
3405 
3406         /*
3407          * It would be nice to do this atomically.
3408          */
3409         if (error == 0) {
3410                 error = zfs_set_prop_nvlist(fsname, ZPROP_SRC_LOCAL,
3411                     nvprops, outnvl);
3412                 if (error != 0)
3413                         (void) dsl_destroy_head(fsname);
3414         }
3415         return (error);
3416 }
3417 
3418 /* ARGSUSED */
3419 static int
3420 zfs_ioc_remap(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl)
3421 {
3422         if (strchr(fsname, '@') ||
3423             strchr(fsname, '%'))
3424                 return (SET_ERROR(EINVAL));
3425 
3426         return (dmu_objset_remap_indirects(fsname));
3427 }
3428 
3429 /*
3430  * innvl: {
3431  *     "snaps" -> { snapshot1, snapshot2 }
3432  *     (optional) "props" -> { prop -> value (string) }
3433  * }
3434  *
3435  * outnvl: snapshot -> error code (int32)
3436  */
3437 static int
3438 zfs_ioc_snapshot(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
3439 {
3440         nvlist_t *snaps;
3441         nvlist_t *props = NULL;
3442         int error, poollen;
3443         nvpair_t *pair;
3444 
3445         (void) nvlist_lookup_nvlist(innvl, "props", &props);
3446         if ((error = zfs_check_userprops(poolname, props)) != 0)
3447                 return (error);
3448 
3449         if (!nvlist_empty(props) &&
3450             zfs_earlier_version(poolname, SPA_VERSION_SNAP_PROPS))
3451                 return (SET_ERROR(ENOTSUP));
3452 
3453         if (nvlist_lookup_nvlist(innvl, "snaps", &snaps) != 0)
3454                 return (SET_ERROR(EINVAL));
3455         poollen = strlen(poolname);
3456         for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL;
3457             pair = nvlist_next_nvpair(snaps, pair)) {
3458                 const char *name = nvpair_name(pair);
3459                 const char *cp = strchr(name, '@');
3460 
3461                 /*
3462                  * The snap name must contain an @, and the part after it must
3463                  * contain only valid characters.
3464                  */
3465                 if (cp == NULL ||
3466                     zfs_component_namecheck(cp + 1, NULL, NULL) != 0)
3467                         return (SET_ERROR(EINVAL));
3468 
3469                 /*
3470                  * The snap must be in the specified pool.
3471                  */
3472                 if (strncmp(name, poolname, poollen) != 0 ||
3473                     (name[poollen] != '/' && name[poollen] != '@'))
3474                         return (SET_ERROR(EXDEV));
3475 
3476                 /* This must be the only snap of this fs. */
3477                 for (nvpair_t *pair2 = nvlist_next_nvpair(snaps, pair);
3478                     pair2 != NULL; pair2 = nvlist_next_nvpair(snaps, pair2)) {
3479                         if (strncmp(name, nvpair_name(pair2), cp - name + 1)
3480                             == 0) {
3481                                 return (SET_ERROR(EXDEV));
3482                         }
3483                 }
3484         }
3485 
3486         error = dsl_dataset_snapshot(snaps, props, outnvl);
3487         return (error);
3488 }
3489 
3490 /*
3491  * innvl: "message" -> string
3492  */
3493 /* ARGSUSED */
3494 static int
3495 zfs_ioc_log_history(const char *unused, nvlist_t *innvl, nvlist_t *outnvl)
3496 {
3497         char *message;
3498         spa_t *spa;
3499         int error;
3500         char *poolname;
3501 
3502         /*
3503          * The poolname in the ioctl is not set, we get it from the TSD,
3504          * which was set at the end of the last successful ioctl that allows
3505          * logging.  The secpolicy func already checked that it is set.
3506          * Only one log ioctl is allowed after each successful ioctl, so
3507          * we clear the TSD here.
3508          */
3509         poolname = tsd_get(zfs_allow_log_key);
3510         (void) tsd_set(zfs_allow_log_key, NULL);
3511         error = spa_open(poolname, &spa, FTAG);
3512         strfree(poolname);
3513         if (error != 0)
3514                 return (error);
3515 
3516         if (nvlist_lookup_string(innvl, "message", &message) != 0)  {
3517                 spa_close(spa, FTAG);
3518                 return (SET_ERROR(EINVAL));
3519         }
3520 
3521         if (spa_version(spa) < SPA_VERSION_ZPOOL_HISTORY) {
3522                 spa_close(spa, FTAG);
3523                 return (SET_ERROR(ENOTSUP));
3524         }
3525 
3526         error = spa_history_log(spa, message);
3527         spa_close(spa, FTAG);
3528         return (error);
3529 }
3530 
3531 /*
3532  * The dp_config_rwlock must not be held when calling this, because the
3533  * unmount may need to write out data.
3534  *
3535  * This function is best-effort.  Callers must deal gracefully if it
3536  * remains mounted (or is remounted after this call).
3537  *
3538  * Returns 0 if the argument is not a snapshot, or it is not currently a
3539  * filesystem, or we were able to unmount it.  Returns error code otherwise.
3540  */
3541 void
3542 zfs_unmount_snap(const char *snapname)
3543 {
3544         vfs_t *vfsp = NULL;
3545         zfsvfs_t *zfsvfs = NULL;
3546 
3547         if (strchr(snapname, '@') == NULL)
3548                 return;
3549 
3550         int err = getzfsvfs(snapname, &zfsvfs);
3551         if (err != 0) {
3552                 ASSERT3P(zfsvfs, ==, NULL);
3553                 return;
3554         }
3555         vfsp = zfsvfs->z_vfs;
3556 
3557         ASSERT(!dsl_pool_config_held(dmu_objset_pool(zfsvfs->z_os)));
3558 
3559         err = vn_vfswlock(vfsp->vfs_vnodecovered);
3560         VFS_RELE(vfsp);
3561         if (err != 0)
3562                 return;
3563 
3564         /*
3565          * Always force the unmount for snapshots.
3566          */
3567         (void) dounmount(vfsp, MS_FORCE, kcred);
3568 }
3569 
3570 /* ARGSUSED */
3571 static int
3572 zfs_unmount_snap_cb(const char *snapname, void *arg)
3573 {
3574         zfs_unmount_snap(snapname);
3575         return (0);
3576 }
3577 
3578 /*
3579  * When a clone is destroyed, its origin may also need to be destroyed,
3580  * in which case it must be unmounted.  This routine will do that unmount
3581  * if necessary.
3582  */
3583 void
3584 zfs_destroy_unmount_origin(const char *fsname)
3585 {
3586         int error;
3587         objset_t *os;
3588         dsl_dataset_t *ds;
3589 
3590         error = dmu_objset_hold(fsname, FTAG, &os);
3591         if (error != 0)
3592                 return;
3593         ds = dmu_objset_ds(os);
3594         if (dsl_dir_is_clone(ds->ds_dir) && DS_IS_DEFER_DESTROY(ds->ds_prev)) {
3595                 char originname[ZFS_MAX_DATASET_NAME_LEN];
3596                 dsl_dataset_name(ds->ds_prev, originname);
3597                 dmu_objset_rele(os, FTAG);
3598                 zfs_unmount_snap(originname);
3599         } else {
3600                 dmu_objset_rele(os, FTAG);
3601         }
3602 }
3603 
3604 /*
3605  * innvl: {
3606  *     "snaps" -> { snapshot1, snapshot2 }
3607  *     (optional boolean) "defer"
3608  * }
3609  *
3610  * outnvl: snapshot -> error code (int32)
3611  *
3612  */
3613 /* ARGSUSED */
3614 static int
3615 zfs_ioc_destroy_snaps(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
3616 {
3617         nvlist_t *snaps;
3618         nvpair_t *pair;
3619         boolean_t defer;
3620 
3621         if (nvlist_lookup_nvlist(innvl, "snaps", &snaps) != 0)
3622                 return (SET_ERROR(EINVAL));
3623         defer = nvlist_exists(innvl, "defer");
3624 
3625         for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL;
3626             pair = nvlist_next_nvpair(snaps, pair)) {
3627                 zfs_unmount_snap(nvpair_name(pair));
3628         }
3629 
3630         return (dsl_destroy_snapshots_nvl(snaps, defer, outnvl));
3631 }
3632 
3633 /*
3634  * Create bookmarks.  Bookmark names are of the form <fs>#<bmark>.
3635  * All bookmarks must be in the same pool.
3636  *
3637  * innvl: {
3638  *     bookmark1 -> snapshot1, bookmark2 -> snapshot2
3639  * }
3640  *
3641  * outnvl: bookmark -> error code (int32)
3642  *
3643  */
3644 /* ARGSUSED */
3645 static int
3646 zfs_ioc_bookmark(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
3647 {
3648         for (nvpair_t *pair = nvlist_next_nvpair(innvl, NULL);
3649             pair != NULL; pair = nvlist_next_nvpair(innvl, pair)) {
3650                 char *snap_name;
3651 
3652                 /*
3653                  * Verify the snapshot argument.
3654                  */
3655                 if (nvpair_value_string(pair, &snap_name) != 0)
3656                         return (SET_ERROR(EINVAL));
3657 
3658 
3659                 /* Verify that the keys (bookmarks) are unique */
3660                 for (nvpair_t *pair2 = nvlist_next_nvpair(innvl, pair);
3661                     pair2 != NULL; pair2 = nvlist_next_nvpair(innvl, pair2)) {
3662                         if (strcmp(nvpair_name(pair), nvpair_name(pair2)) == 0)
3663                                 return (SET_ERROR(EINVAL));
3664                 }
3665         }
3666 
3667         return (dsl_bookmark_create(innvl, outnvl));
3668 }
3669 
3670 /*
3671  * innvl: {
3672  *     property 1, property 2, ...
3673  * }
3674  *
3675  * outnvl: {
3676  *     bookmark name 1 -> { property 1, property 2, ... },
3677  *     bookmark name 2 -> { property 1, property 2, ... }
3678  * }
3679  *
3680  */
3681 static int
3682 zfs_ioc_get_bookmarks(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl)
3683 {
3684         return (dsl_get_bookmarks(fsname, innvl, outnvl));
3685 }
3686 
3687 /*
3688  * innvl: {
3689  *     bookmark name 1, bookmark name 2
3690  * }
3691  *
3692  * outnvl: bookmark -> error code (int32)
3693  *
3694  */
3695 static int
3696 zfs_ioc_destroy_bookmarks(const char *poolname, nvlist_t *innvl,
3697     nvlist_t *outnvl)
3698 {
3699         int error, poollen;
3700 
3701         poollen = strlen(poolname);
3702         for (nvpair_t *pair = nvlist_next_nvpair(innvl, NULL);
3703             pair != NULL; pair = nvlist_next_nvpair(innvl, pair)) {
3704                 const char *name = nvpair_name(pair);
3705                 const char *cp = strchr(name, '#');
3706 
3707                 /*
3708                  * The bookmark name must contain an #, and the part after it
3709                  * must contain only valid characters.
3710                  */
3711                 if (cp == NULL ||
3712                     zfs_component_namecheck(cp + 1, NULL, NULL) != 0)
3713                         return (SET_ERROR(EINVAL));
3714 
3715                 /*
3716                  * The bookmark must be in the specified pool.
3717                  */
3718                 if (strncmp(name, poolname, poollen) != 0 ||
3719                     (name[poollen] != '/' && name[poollen] != '#'))
3720                         return (SET_ERROR(EXDEV));
3721         }
3722 
3723         error = dsl_bookmark_destroy(innvl, outnvl);
3724         return (error);
3725 }
3726 
3727 static int
3728 zfs_ioc_channel_program(const char *poolname, nvlist_t *innvl,
3729     nvlist_t *outnvl)
3730 {
3731         char *program;
3732         uint64_t instrlimit, memlimit;
3733         boolean_t sync_flag;
3734         nvpair_t *nvarg = NULL;
3735 
3736         if (0 != nvlist_lookup_string(innvl, ZCP_ARG_PROGRAM, &program)) {
3737                 return (EINVAL);
3738         }
3739         if (0 != nvlist_lookup_boolean_value(innvl, ZCP_ARG_SYNC, &sync_flag)) {
3740                 sync_flag = B_TRUE;
3741         }
3742         if (0 != nvlist_lookup_uint64(innvl, ZCP_ARG_INSTRLIMIT, &instrlimit)) {
3743                 instrlimit = ZCP_DEFAULT_INSTRLIMIT;
3744         }
3745         if (0 != nvlist_lookup_uint64(innvl, ZCP_ARG_MEMLIMIT, &memlimit)) {
3746                 memlimit = ZCP_DEFAULT_MEMLIMIT;
3747         }
3748         if (0 != nvlist_lookup_nvpair(innvl, ZCP_ARG_ARGLIST, &nvarg)) {
3749                 return (EINVAL);
3750         }
3751 
3752         if (instrlimit == 0 || instrlimit > zfs_lua_max_instrlimit)
3753                 return (EINVAL);
3754         if (memlimit == 0 || memlimit > zfs_lua_max_memlimit)
3755                 return (EINVAL);
3756 
3757         return (zcp_eval(poolname, program, sync_flag, instrlimit, memlimit,
3758             nvarg, outnvl));
3759 }
3760 
3761 /*
3762  * innvl: unused
3763  * outnvl: empty
3764  */
3765 /* ARGSUSED */
3766 static int
3767 zfs_ioc_pool_checkpoint(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
3768 {
3769         return (spa_checkpoint(poolname));
3770 }
3771 
3772 /*
3773  * innvl: unused
3774  * outnvl: empty
3775  */
3776 /* ARGSUSED */
3777 static int
3778 zfs_ioc_pool_discard_checkpoint(const char *poolname, nvlist_t *innvl,
3779     nvlist_t *outnvl)
3780 {
3781         return (spa_checkpoint_discard(poolname));
3782 }
3783 
3784 /*
3785  * inputs:
3786  * zc_name              name of dataset to destroy
3787  * zc_defer_destroy     mark for deferred destroy
3788  *
3789  * outputs:             none
3790  */
3791 static int
3792 zfs_ioc_destroy(zfs_cmd_t *zc)
3793 {
3794         objset_t *os;
3795         dmu_objset_type_t ost;
3796         int err;
3797 
3798         err = dmu_objset_hold(zc->zc_name, FTAG, &os);
3799         if (err != 0)
3800                 return (err);
3801         ost = dmu_objset_type(os);
3802         dmu_objset_rele(os, FTAG);
3803 
3804         if (ost == DMU_OST_ZFS)
3805                 zfs_unmount_snap(zc->zc_name);
3806 
3807         if (strchr(zc->zc_name, '@')) {
3808                 err = dsl_destroy_snapshot(zc->zc_name, zc->zc_defer_destroy);
3809         } else {
3810                 err = dsl_destroy_head(zc->zc_name);
3811                 if (err == EEXIST) {
3812                         /*
3813                          * It is possible that the given DS may have
3814                          * hidden child (%recv) datasets - "leftovers"
3815                          * resulting from the previously interrupted
3816                          * 'zfs receive'.
3817                          *
3818                          * 6 extra bytes for /%recv
3819                          */
3820                         char namebuf[ZFS_MAX_DATASET_NAME_LEN + 6];
3821 
3822                         if (snprintf(namebuf, sizeof (namebuf), "%s/%s",
3823                             zc->zc_name, recv_clone_name) >=
3824                             sizeof (namebuf))
3825                                 return (SET_ERROR(EINVAL));
3826 
3827                         /*
3828                          * Try to remove the hidden child (%recv) and after
3829                          * that try to remove the target dataset.
3830                          * If the hidden child (%recv) does not exist
3831                          * the original error (EEXIST) will be returned
3832                          */
3833                         err = dsl_destroy_head(namebuf);
3834                         if (err == 0)
3835                                 err = dsl_destroy_head(zc->zc_name);
3836                         else if (err == ENOENT)
3837                                 err = SET_ERROR(EEXIST);
3838                 }
3839         }
3840         if (ost == DMU_OST_ZVOL && err == 0)
3841                 (void) zvol_remove_minor(zc->zc_name);
3842         return (err);
3843 }
3844 
3845 /*
3846  * innvl: {
3847  *     "initialize_command" -> POOL_INITIALIZE_{CANCEL|START|SUSPEND} (uint64)
3848  *     "initialize_vdevs": { -> guids to initialize (nvlist)
3849  *         "vdev_path_1": vdev_guid_1, (uint64),
3850  *         "vdev_path_2": vdev_guid_2, (uint64),
3851  *         ...
3852  *     },
3853  * }
3854  *
3855  * outnvl: {
3856  *     "initialize_vdevs": { -> initialization errors (nvlist)
3857  *         "vdev_path_1": errno, see function body for possible errnos (uint64)
3858  *         "vdev_path_2": errno, ... (uint64)
3859  *         ...
3860  *     }
3861  * }
3862  *
3863  * EINVAL is returned for an unknown command or if any of the provided vdev
3864  * guids have be specified with a type other than uint64.
3865  */
3866 static int
3867 zfs_ioc_pool_initialize(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
3868 {
3869         uint64_t cmd_type;
3870         if (nvlist_lookup_uint64(innvl, ZPOOL_INITIALIZE_COMMAND,
3871             &cmd_type) != 0) {
3872                 return (SET_ERROR(EINVAL));
3873         }
3874 
3875         if (!(cmd_type == POOL_INITIALIZE_CANCEL ||
3876             cmd_type == POOL_INITIALIZE_START ||
3877             cmd_type == POOL_INITIALIZE_SUSPEND)) {
3878                 return (SET_ERROR(EINVAL));
3879         }
3880 
3881         nvlist_t *vdev_guids;
3882         if (nvlist_lookup_nvlist(innvl, ZPOOL_INITIALIZE_VDEVS,
3883             &vdev_guids) != 0) {
3884                 return (SET_ERROR(EINVAL));
3885         }
3886 
3887         for (nvpair_t *pair = nvlist_next_nvpair(vdev_guids, NULL);
3888             pair != NULL; pair = nvlist_next_nvpair(vdev_guids, pair)) {
3889                 uint64_t vdev_guid;
3890                 if (nvpair_value_uint64(pair, &vdev_guid) != 0) {
3891                         return (SET_ERROR(EINVAL));
3892                 }
3893         }
3894 
3895         spa_t *spa;
3896         int error = spa_open(poolname, &spa, FTAG);
3897         if (error != 0)
3898                 return (error);
3899 
3900         nvlist_t *vdev_errlist = fnvlist_alloc();
3901         int total_errors = spa_vdev_initialize(spa, vdev_guids, cmd_type,
3902             vdev_errlist);
3903 
3904         if (fnvlist_size(vdev_errlist) > 0) {
3905                 fnvlist_add_nvlist(outnvl, ZPOOL_INITIALIZE_VDEVS,
3906                     vdev_errlist);
3907         }
3908         fnvlist_free(vdev_errlist);
3909 
3910         spa_close(spa, FTAG);
3911         return (total_errors > 0 ? EINVAL : 0);
3912 }
3913 
3914 /*
3915  * innvl: {
3916  *     "trim_command" -> POOL_TRIM_{CANCEL|START|SUSPEND} (uint64)
3917  *     "trim_vdevs": { -> guids to TRIM (nvlist)
3918  *         "vdev_path_1": vdev_guid_1, (uint64),
3919  *         "vdev_path_2": vdev_guid_2, (uint64),
3920  *         ...
3921  *     },
3922  *     "trim_rate" -> Target TRIM rate in bytes/sec.
3923  *     "trim_secure" -> Set to request a secure TRIM.
3924  * }
3925  *
3926  * outnvl: {
3927  *     "trim_vdevs": { -> TRIM errors (nvlist)
3928  *         "vdev_path_1": errno, see function body for possible errnos (uint64)
3929  *         "vdev_path_2": errno, ... (uint64)
3930  *         ...
3931  *     }
3932  * }
3933  *
3934  * EINVAL is returned for an unknown command or if any of the provided vdev
3935  * guids have be specified with a type other than uint64.
3936  */
3937 
3938 static int
3939 zfs_ioc_pool_trim(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
3940 {
3941         uint64_t cmd_type;
3942         if (nvlist_lookup_uint64(innvl, ZPOOL_TRIM_COMMAND, &cmd_type) != 0)
3943                 return (SET_ERROR(EINVAL));
3944 
3945         if (!(cmd_type == POOL_TRIM_CANCEL ||
3946             cmd_type == POOL_TRIM_START ||
3947             cmd_type == POOL_TRIM_SUSPEND)) {
3948                 return (SET_ERROR(EINVAL));
3949         }
3950 
3951         nvlist_t *vdev_guids;
3952         if (nvlist_lookup_nvlist(innvl, ZPOOL_TRIM_VDEVS, &vdev_guids) != 0)
3953                 return (SET_ERROR(EINVAL));
3954 
3955         for (nvpair_t *pair = nvlist_next_nvpair(vdev_guids, NULL);
3956             pair != NULL; pair = nvlist_next_nvpair(vdev_guids, pair)) {
3957                 uint64_t vdev_guid;
3958                 if (nvpair_value_uint64(pair, &vdev_guid) != 0) {
3959                         return (SET_ERROR(EINVAL));
3960                 }
3961         }
3962 
3963         /* Optional, defaults to maximum rate when not provided */
3964         uint64_t rate;
3965         if (nvlist_lookup_uint64(innvl, ZPOOL_TRIM_RATE, &rate) != 0)
3966                 rate = 0;
3967 
3968         /* Optional, defaults to standard TRIM when not provided */
3969         boolean_t secure;
3970         if (nvlist_lookup_boolean_value(innvl, ZPOOL_TRIM_SECURE,
3971             &secure) != 0) {
3972                 secure = B_FALSE;
3973         }
3974 
3975         spa_t *spa;
3976         int error = spa_open(poolname, &spa, FTAG);
3977         if (error != 0)
3978                 return (error);
3979 
3980         nvlist_t *vdev_errlist = fnvlist_alloc();
3981         int total_errors = spa_vdev_trim(spa, vdev_guids, cmd_type,
3982             rate, !!zfs_trim_metaslab_skip, secure, vdev_errlist);
3983 
3984         if (fnvlist_size(vdev_errlist) > 0)
3985                 fnvlist_add_nvlist(outnvl, ZPOOL_TRIM_VDEVS, vdev_errlist);
3986 
3987         fnvlist_free(vdev_errlist);
3988 
3989         spa_close(spa, FTAG);
3990         return (total_errors > 0 ? EINVAL : 0);
3991 }
3992 
3993 /*
3994  * fsname is name of dataset to rollback (to most recent snapshot)
3995  *
3996  * innvl may contain name of expected target snapshot
3997  *
3998  * outnvl: "target" -> name of most recent snapshot
3999  * }
4000  */
4001 /* ARGSUSED */
4002 static int
4003 zfs_ioc_rollback(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl)
4004 {
4005         zfsvfs_t *zfsvfs;
4006         char *target = NULL;
4007         int error;
4008 
4009         (void) nvlist_lookup_string(innvl, "target", &target);
4010         if (target != NULL) {
4011                 const char *cp = strchr(target, '@');
4012 
4013                 /*
4014                  * The snap name must contain an @, and the part after it must
4015                  * contain only valid characters.
4016                  */
4017                 if (cp == NULL ||
4018                     zfs_component_namecheck(cp + 1, NULL, NULL) != 0)
4019                         return (SET_ERROR(EINVAL));
4020         }
4021 
4022         if (getzfsvfs(fsname, &zfsvfs) == 0) {
4023                 dsl_dataset_t *ds;
4024 
4025                 ds = dmu_objset_ds(zfsvfs->z_os);
4026                 error = zfs_suspend_fs(zfsvfs);
4027                 if (error == 0) {
4028                         int resume_err;
4029 
4030                         error = dsl_dataset_rollback(fsname, target, zfsvfs,
4031                             outnvl);
4032                         resume_err = zfs_resume_fs(zfsvfs, ds);
4033                         error = error ? error : resume_err;
4034                 }
4035                 VFS_RELE(zfsvfs->z_vfs);
4036         } else {
4037                 error = dsl_dataset_rollback(fsname, target, NULL, outnvl);
4038         }
4039         return (error);
4040 }
4041 
4042 static int
4043 recursive_unmount(const char *fsname, void *arg)
4044 {
4045         const char *snapname = arg;
4046         char fullname[ZFS_MAX_DATASET_NAME_LEN];
4047 
4048         (void) snprintf(fullname, sizeof (fullname), "%s@%s", fsname, snapname);
4049         zfs_unmount_snap(fullname);
4050 
4051         return (0);
4052 }
4053 
4054 /*
4055  * inputs:
4056  * zc_name      old name of dataset
4057  * zc_value     new name of dataset
4058  * zc_cookie    recursive flag (only valid for snapshots)
4059  *
4060  * outputs:     none
4061  */
4062 static int
4063 zfs_ioc_rename(zfs_cmd_t *zc)
4064 {
4065         objset_t *os;
4066         dmu_objset_type_t ost;
4067         boolean_t recursive = zc->zc_cookie & 1;
4068         char *at;
4069         int err;
4070 
4071         /* "zfs rename" from and to ...%recv datasets should both fail */
4072         zc->zc_name[sizeof (zc->zc_name) - 1] = '\0';
4073         zc->zc_value[sizeof (zc->zc_value) - 1] = '\0';
4074         if (dataset_namecheck(zc->zc_name, NULL, NULL) != 0 ||
4075             dataset_namecheck(zc->zc_value, NULL, NULL) != 0 ||
4076             strchr(zc->zc_name, '%') || strchr(zc->zc_value, '%'))
4077                 return (SET_ERROR(EINVAL));
4078 
4079         err = dmu_objset_hold(zc->zc_name, FTAG, &os);
4080         if (err != 0)
4081                 return (err);
4082         ost = dmu_objset_type(os);
4083         dmu_objset_rele(os, FTAG);
4084 
4085         at = strchr(zc->zc_name, '@');
4086         if (at != NULL) {
4087                 /* snaps must be in same fs */
4088                 int error;
4089 
4090                 if (strncmp(zc->zc_name, zc->zc_value, at - zc->zc_name + 1))
4091                         return (SET_ERROR(EXDEV));
4092                 *at = '\0';
4093                 if (ost == DMU_OST_ZFS) {
4094                         error = dmu_objset_find(zc->zc_name,
4095                             recursive_unmount, at + 1,
4096                             recursive ? DS_FIND_CHILDREN : 0);
4097                         if (error != 0) {
4098                                 *at = '@';
4099                                 return (error);
4100                         }
4101                 }
4102                 error = dsl_dataset_rename_snapshot(zc->zc_name,
4103                     at + 1, strchr(zc->zc_value, '@') + 1, recursive);
4104                 *at = '@';
4105 
4106                 return (error);
4107         } else {
4108                 if (ost == DMU_OST_ZVOL)
4109                         (void) zvol_remove_minor(zc->zc_name);
4110                 return (dsl_dir_rename(zc->zc_name, zc->zc_value));
4111         }
4112 }
4113 
4114 static int
4115 zfs_check_settable(const char *dsname, nvpair_t *pair, cred_t *cr)
4116 {
4117         const char *propname = nvpair_name(pair);
4118         boolean_t issnap = (strchr(dsname, '@') != NULL);
4119         zfs_prop_t prop = zfs_name_to_prop(propname);
4120         uint64_t intval;
4121         int err;
4122 
4123         if (prop == ZPROP_INVAL) {
4124                 if (zfs_prop_user(propname)) {
4125                         if (err = zfs_secpolicy_write_perms(dsname,
4126                             ZFS_DELEG_PERM_USERPROP, cr))
4127                                 return (err);
4128                         return (0);
4129                 }
4130 
4131                 if (!issnap && zfs_prop_userquota(propname)) {
4132                         const char *perm = NULL;
4133                         const char *uq_prefix =
4134                             zfs_userquota_prop_prefixes[ZFS_PROP_USERQUOTA];
4135                         const char *gq_prefix =
4136                             zfs_userquota_prop_prefixes[ZFS_PROP_GROUPQUOTA];
4137                         const char *uiq_prefix =
4138                             zfs_userquota_prop_prefixes[ZFS_PROP_USEROBJQUOTA];
4139                         const char *giq_prefix =
4140                             zfs_userquota_prop_prefixes[ZFS_PROP_GROUPOBJQUOTA];
4141                         const char *pq_prefix =
4142                             zfs_userquota_prop_prefixes[ZFS_PROP_PROJECTQUOTA];
4143                         const char *piq_prefix = zfs_userquota_prop_prefixes[\
4144                             ZFS_PROP_PROJECTOBJQUOTA];
4145 
4146                         if (strncmp(propname, uq_prefix,
4147                             strlen(uq_prefix)) == 0) {
4148                                 perm = ZFS_DELEG_PERM_USERQUOTA;
4149                         } else if (strncmp(propname, uiq_prefix,
4150                             strlen(uiq_prefix)) == 0) {
4151                                 perm = ZFS_DELEG_PERM_USEROBJQUOTA;
4152                         } else if (strncmp(propname, gq_prefix,
4153                             strlen(gq_prefix)) == 0) {
4154                                 perm = ZFS_DELEG_PERM_GROUPQUOTA;
4155                         } else if (strncmp(propname, giq_prefix,
4156                             strlen(giq_prefix)) == 0) {
4157                                 perm = ZFS_DELEG_PERM_GROUPOBJQUOTA;
4158                         } else if (strncmp(propname, pq_prefix,
4159                             strlen(pq_prefix)) == 0) {
4160                                 perm = ZFS_DELEG_PERM_PROJECTQUOTA;
4161                         } else if (strncmp(propname, piq_prefix,
4162                             strlen(piq_prefix)) == 0) {
4163                                 perm = ZFS_DELEG_PERM_PROJECTOBJQUOTA;
4164                         } else {
4165                                 /* {USER|GROUP|PROJECT}USED are read-only */
4166                                 return (SET_ERROR(EINVAL));
4167                         }
4168 
4169                         if (err = zfs_secpolicy_write_perms(dsname, perm, cr))
4170                                 return (err);
4171                         return (0);
4172                 }
4173 
4174                 return (SET_ERROR(EINVAL));
4175         }
4176 
4177         if (issnap)
4178                 return (SET_ERROR(EINVAL));
4179 
4180         if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
4181                 /*
4182                  * dsl_prop_get_all_impl() returns properties in this
4183                  * format.
4184                  */
4185                 nvlist_t *attrs;
4186                 VERIFY(nvpair_value_nvlist(pair, &attrs) == 0);
4187                 VERIFY(nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
4188                     &pair) == 0);
4189         }
4190 
4191         /*
4192          * Check that this value is valid for this pool version
4193          */
4194         switch (prop) {
4195         case ZFS_PROP_COMPRESSION:
4196                 /*
4197                  * If the user specified gzip compression, make sure
4198                  * the SPA supports it. We ignore any errors here since
4199                  * we'll catch them later.
4200                  */
4201                 if (nvpair_value_uint64(pair, &intval) == 0) {
4202                         if (intval >= ZIO_COMPRESS_GZIP_1 &&
4203                             intval <= ZIO_COMPRESS_GZIP_9 &&
4204                             zfs_earlier_version(dsname,
4205                             SPA_VERSION_GZIP_COMPRESSION)) {
4206                                 return (SET_ERROR(ENOTSUP));
4207                         }
4208 
4209                         if (intval == ZIO_COMPRESS_ZLE &&
4210                             zfs_earlier_version(dsname,
4211                             SPA_VERSION_ZLE_COMPRESSION))
4212                                 return (SET_ERROR(ENOTSUP));
4213 
4214                         if (intval == ZIO_COMPRESS_LZ4) {
4215                                 spa_t *spa;
4216 
4217                                 if ((err = spa_open(dsname, &spa, FTAG)) != 0)
4218                                         return (err);
4219 
4220                                 if (!spa_feature_is_enabled(spa,
4221                                     SPA_FEATURE_LZ4_COMPRESS)) {
4222                                         spa_close(spa, FTAG);
4223                                         return (SET_ERROR(ENOTSUP));
4224                                 }
4225                                 spa_close(spa, FTAG);
4226                         }
4227 
4228                         /*
4229                          * If this is a bootable dataset then
4230                          * verify that the compression algorithm
4231                          * is supported for booting. We must return
4232                          * something other than ENOTSUP since it
4233                          * implies a downrev pool version.
4234                          */
4235                         if (zfs_is_bootfs(dsname) &&
4236                             !BOOTFS_COMPRESS_VALID(intval)) {
4237                                 return (SET_ERROR(ERANGE));
4238                         }
4239                 }
4240                 break;
4241 
4242         case ZFS_PROP_COPIES:
4243                 if (zfs_earlier_version(dsname, SPA_VERSION_DITTO_BLOCKS))
4244                         return (SET_ERROR(ENOTSUP));
4245                 break;
4246 
4247         case ZFS_PROP_RECORDSIZE:
4248                 /* Record sizes above 128k need the feature to be enabled */
4249                 if (nvpair_value_uint64(pair, &intval) == 0 &&
4250                     intval > SPA_OLD_MAXBLOCKSIZE) {
4251                         spa_t *spa;
4252 
4253                         /*
4254                          * We don't allow setting the property above 1MB,
4255                          * unless the tunable has been changed.
4256                          */
4257                         if (intval > zfs_max_recordsize ||
4258                             intval > SPA_MAXBLOCKSIZE)
4259                                 return (SET_ERROR(ERANGE));
4260 
4261                         if ((err = spa_open(dsname, &spa, FTAG)) != 0)
4262                                 return (err);
4263 
4264                         if (!spa_feature_is_enabled(spa,
4265                             SPA_FEATURE_LARGE_BLOCKS)) {
4266                                 spa_close(spa, FTAG);
4267                                 return (SET_ERROR(ENOTSUP));
4268                         }
4269                         spa_close(spa, FTAG);
4270                 }
4271                 break;
4272 
4273         case ZFS_PROP_DNODESIZE:
4274                 /* Dnode sizes above 512 need the feature to be enabled */
4275                 if (nvpair_value_uint64(pair, &intval) == 0 &&
4276                     intval != ZFS_DNSIZE_LEGACY) {
4277                         spa_t *spa;
4278 
4279                         if ((err = spa_open(dsname, &spa, FTAG)) != 0)
4280                                 return (err);
4281 
4282                         if (!spa_feature_is_enabled(spa,
4283                             SPA_FEATURE_LARGE_DNODE)) {
4284                                 spa_close(spa, FTAG);
4285                                 return (SET_ERROR(ENOTSUP));
4286                         }
4287                         spa_close(spa, FTAG);
4288                 }
4289                 break;
4290 
4291         case ZFS_PROP_SPECIAL_SMALL_BLOCKS:
4292                 /*
4293                  * This property could require the allocation classes
4294                  * feature to be active for setting, however we allow
4295                  * it so that tests of settable properties succeed.
4296                  * The CLI will issue a warning in this case.
4297                  */
4298                 break;
4299 
4300         case ZFS_PROP_SHARESMB:
4301                 if (zpl_earlier_version(dsname, ZPL_VERSION_FUID))
4302                         return (SET_ERROR(ENOTSUP));
4303                 break;
4304 
4305         case ZFS_PROP_ACLINHERIT:
4306                 if (nvpair_type(pair) == DATA_TYPE_UINT64 &&
4307                     nvpair_value_uint64(pair, &intval) == 0) {
4308                         if (intval == ZFS_ACL_PASSTHROUGH_X &&
4309                             zfs_earlier_version(dsname,
4310                             SPA_VERSION_PASSTHROUGH_X))
4311                                 return (SET_ERROR(ENOTSUP));
4312                 }
4313                 break;
4314 
4315         case ZFS_PROP_CHECKSUM:
4316         case ZFS_PROP_DEDUP:
4317         {
4318                 spa_feature_t feature;
4319                 spa_t *spa;
4320 
4321                 /* dedup feature version checks */
4322                 if (prop == ZFS_PROP_DEDUP &&
4323                     zfs_earlier_version(dsname, SPA_VERSION_DEDUP))
4324                         return (SET_ERROR(ENOTSUP));
4325 
4326                 if (nvpair_value_uint64(pair, &intval) != 0)
4327                         return (SET_ERROR(EINVAL));
4328 
4329                 /* check prop value is enabled in features */
4330                 feature = zio_checksum_to_feature(intval & ZIO_CHECKSUM_MASK);
4331                 if (feature == SPA_FEATURE_NONE)
4332                         break;
4333 
4334                 if ((err = spa_open(dsname, &spa, FTAG)) != 0)
4335                         return (err);
4336 
4337                 if (!spa_feature_is_enabled(spa, feature)) {
4338                         spa_close(spa, FTAG);
4339                         return (SET_ERROR(ENOTSUP));
4340                 }
4341                 spa_close(spa, FTAG);
4342                 break;
4343         }
4344         }
4345 
4346         return (zfs_secpolicy_setprop(dsname, prop, pair, CRED()));
4347 }
4348 
4349 /*
4350  * Checks for a race condition to make sure we don't increment a feature flag
4351  * multiple times.
4352  */
4353 static int
4354 zfs_prop_activate_feature_check(void *arg, dmu_tx_t *tx)
4355 {
4356         spa_t *spa = dmu_tx_pool(tx)->dp_spa;
4357         spa_feature_t *featurep = arg;
4358 
4359         if (!spa_feature_is_active(spa, *featurep))
4360                 return (0);
4361         else
4362                 return (SET_ERROR(EBUSY));
4363 }
4364 
4365 /*
4366  * The callback invoked on feature activation in the sync task caused by
4367  * zfs_prop_activate_feature.
4368  */
4369 static void
4370 zfs_prop_activate_feature_sync(void *arg, dmu_tx_t *tx)
4371 {
4372         spa_t *spa = dmu_tx_pool(tx)->dp_spa;
4373         spa_feature_t *featurep = arg;
4374 
4375         spa_feature_incr(spa, *featurep, tx);
4376 }
4377 
4378 /*
4379  * Activates a feature on a pool in response to a property setting. This
4380  * creates a new sync task which modifies the pool to reflect the feature
4381  * as being active.
4382  */
4383 static int
4384 zfs_prop_activate_feature(spa_t *spa, spa_feature_t feature)
4385 {
4386         int err;
4387 
4388         /* EBUSY here indicates that the feature is already active */
4389         err = dsl_sync_task(spa_name(spa),
4390             zfs_prop_activate_feature_check, zfs_prop_activate_feature_sync,
4391             &feature, 2, ZFS_SPACE_CHECK_RESERVED);
4392 
4393         if (err != 0 && err != EBUSY)
4394                 return (err);
4395         else
4396                 return (0);
4397 }
4398 
4399 /*
4400  * Removes properties from the given props list that fail permission checks
4401  * needed to clear them and to restore them in case of a receive error. For each
4402  * property, make sure we have both set and inherit permissions.
4403  *
4404  * Returns the first error encountered if any permission checks fail. If the
4405  * caller provides a non-NULL errlist, it also gives the complete list of names
4406  * of all the properties that failed a permission check along with the
4407  * corresponding error numbers. The caller is responsible for freeing the
4408  * returned errlist.
4409  *
4410  * If every property checks out successfully, zero is returned and the list
4411  * pointed at by errlist is NULL.
4412  */
4413 static int
4414 zfs_check_clearable(char *dataset, nvlist_t *props, nvlist_t **errlist)
4415 {
4416         zfs_cmd_t *zc;
4417         nvpair_t *pair, *next_pair;
4418         nvlist_t *errors;
4419         int err, rv = 0;
4420 
4421         if (props == NULL)
4422                 return (0);
4423 
4424         VERIFY(nvlist_alloc(&errors, NV_UNIQUE_NAME, KM_SLEEP) == 0);
4425 
4426         zc = kmem_alloc(sizeof (zfs_cmd_t), KM_SLEEP);
4427         (void) strcpy(zc->zc_name, dataset);
4428         pair = nvlist_next_nvpair(props, NULL);
4429         while (pair != NULL) {
4430                 next_pair = nvlist_next_nvpair(props, pair);
4431 
4432                 (void) strcpy(zc->zc_value, nvpair_name(pair));
4433                 if ((err = zfs_check_settable(dataset, pair, CRED())) != 0 ||
4434                     (err = zfs_secpolicy_inherit_prop(zc, NULL, CRED())) != 0) {
4435                         VERIFY(nvlist_remove_nvpair(props, pair) == 0);
4436                         VERIFY(nvlist_add_int32(errors,
4437                             zc->zc_value, err) == 0);
4438                 }
4439                 pair = next_pair;
4440         }
4441         kmem_free(zc, sizeof (zfs_cmd_t));
4442 
4443         if ((pair = nvlist_next_nvpair(errors, NULL)) == NULL) {
4444                 nvlist_free(errors);
4445                 errors = NULL;
4446         } else {
4447                 VERIFY(nvpair_value_int32(pair, &rv) == 0);
4448         }
4449 
4450         if (errlist == NULL)
4451                 nvlist_free(errors);
4452         else
4453                 *errlist = errors;
4454 
4455         return (rv);
4456 }
4457 
4458 static boolean_t
4459 propval_equals(nvpair_t *p1, nvpair_t *p2)
4460 {
4461         if (nvpair_type(p1) == DATA_TYPE_NVLIST) {
4462                 /* dsl_prop_get_all_impl() format */
4463                 nvlist_t *attrs;
4464                 VERIFY(nvpair_value_nvlist(p1, &attrs) == 0);
4465                 VERIFY(nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
4466                     &p1) == 0);
4467         }
4468 
4469         if (nvpair_type(p2) == DATA_TYPE_NVLIST) {
4470                 nvlist_t *attrs;
4471                 VERIFY(nvpair_value_nvlist(p2, &attrs) == 0);
4472                 VERIFY(nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
4473                     &p2) == 0);
4474         }
4475 
4476         if (nvpair_type(p1) != nvpair_type(p2))
4477                 return (B_FALSE);
4478 
4479         if (nvpair_type(p1) == DATA_TYPE_STRING) {
4480                 char *valstr1, *valstr2;
4481 
4482                 VERIFY(nvpair_value_string(p1, (char **)&valstr1) == 0);
4483                 VERIFY(nvpair_value_string(p2, (char **)&valstr2) == 0);
4484                 return (strcmp(valstr1, valstr2) == 0);
4485         } else {
4486                 uint64_t intval1, intval2;
4487 
4488                 VERIFY(nvpair_value_uint64(p1, &intval1) == 0);
4489                 VERIFY(nvpair_value_uint64(p2, &intval2) == 0);
4490                 return (intval1 == intval2);
4491         }
4492 }
4493 
4494 /*
4495  * Remove properties from props if they are not going to change (as determined
4496  * by comparison with origprops). Remove them from origprops as well, since we
4497  * do not need to clear or restore properties that won't change.
4498  */
4499 static void
4500 props_reduce(nvlist_t *props, nvlist_t *origprops)
4501 {
4502         nvpair_t *pair, *next_pair;
4503 
4504         if (origprops == NULL)
4505                 return; /* all props need to be received */
4506 
4507         pair = nvlist_next_nvpair(props, NULL);
4508         while (pair != NULL) {
4509                 const char *propname = nvpair_name(pair);
4510                 nvpair_t *match;
4511 
4512                 next_pair = nvlist_next_nvpair(props, pair);
4513 
4514                 if ((nvlist_lookup_nvpair(origprops, propname,
4515                     &match) != 0) || !propval_equals(pair, match))
4516                         goto next; /* need to set received value */
4517 
4518                 /* don't clear the existing received value */
4519                 (void) nvlist_remove_nvpair(origprops, match);
4520                 /* don't bother receiving the property */
4521                 (void) nvlist_remove_nvpair(props, pair);
4522 next:
4523                 pair = next_pair;
4524         }
4525 }
4526 
4527 /*
4528  * Extract properties that cannot be set PRIOR to the receipt of a dataset.
4529  * For example, refquota cannot be set until after the receipt of a dataset,
4530  * because in replication streams, an older/earlier snapshot may exceed the
4531  * refquota.  We want to receive the older/earlier snapshot, but setting
4532  * refquota pre-receipt will set the dsl's ACTUAL quota, which will prevent
4533  * the older/earlier snapshot from being received (with EDQUOT).
4534  *
4535  * The ZFS test "zfs_receive_011_pos" demonstrates such a scenario.
4536  *
4537  * libzfs will need to be judicious handling errors encountered by props
4538  * extracted by this function.
4539  */
4540 static nvlist_t *
4541 extract_delay_props(nvlist_t *props)
4542 {
4543         nvlist_t *delayprops;
4544         nvpair_t *nvp, *tmp;
4545         static const zfs_prop_t delayable[] = {
4546                 ZFS_PROP_REFQUOTA,
4547                 ZFS_PROP_KEYLOCATION,
4548                 0
4549         };
4550         int i;
4551 
4552         VERIFY(nvlist_alloc(&delayprops, NV_UNIQUE_NAME, KM_SLEEP) == 0);
4553 
4554         for (nvp = nvlist_next_nvpair(props, NULL); nvp != NULL;
4555             nvp = nvlist_next_nvpair(props, nvp)) {
4556                 /*
4557                  * strcmp() is safe because zfs_prop_to_name() always returns
4558                  * a bounded string.
4559                  */
4560                 for (i = 0; delayable[i] != 0; i++) {
4561                         if (strcmp(zfs_prop_to_name(delayable[i]),
4562                             nvpair_name(nvp)) == 0) {
4563                                 break;
4564                         }
4565                 }
4566                 if (delayable[i] != 0) {
4567                         tmp = nvlist_prev_nvpair(props, nvp);
4568                         VERIFY(nvlist_add_nvpair(delayprops, nvp) == 0);
4569                         VERIFY(nvlist_remove_nvpair(props, nvp) == 0);
4570                         nvp = tmp;
4571                 }
4572         }
4573 
4574         if (nvlist_empty(delayprops)) {
4575                 nvlist_free(delayprops);
4576                 delayprops = NULL;
4577         }
4578         return (delayprops);
4579 }
4580 
4581 #ifdef  DEBUG
4582 static boolean_t zfs_ioc_recv_inject_err;
4583 #endif
4584 
4585 /*
4586  * nvlist 'errors' is always allocated. It will contain descriptions of
4587  * encountered errors, if any. It's the callers responsibility to free.
4588  */
4589 static int
4590 zfs_ioc_recv_impl(char *tofs, char *tosnap, char *origin, nvlist_t *recvprops,
4591     nvlist_t *localprops, nvlist_t *hidden_args, boolean_t force,
4592     boolean_t resumable, int input_fd, dmu_replay_record_t *begin_record,
4593     int cleanup_fd, uint64_t *read_bytes, uint64_t *errflags,
4594     uint64_t *action_handle, nvlist_t **errors)
4595 {
4596         dmu_recv_cookie_t drc;
4597         int error = 0;
4598         int props_error = 0;
4599         offset_t off;
4600         nvlist_t *local_delayprops = NULL;
4601         nvlist_t *recv_delayprops = NULL;
4602         nvlist_t *origprops = NULL; /* existing properties */
4603         nvlist_t *origrecvd = NULL; /* existing received properties */
4604         boolean_t first_recvd_props = B_FALSE;
4605         file_t *input_fp;
4606 
4607         *read_bytes = 0;
4608         *errflags = 0;
4609         *errors = fnvlist_alloc();
4610 
4611         input_fp = getf(input_fd);
4612         if (input_fp == NULL)
4613                 return (SET_ERROR(EBADF));
4614 
4615         error = dmu_recv_begin(tofs, tosnap, begin_record, force,
4616             resumable, localprops, hidden_args, origin, &drc);
4617         if (error != 0)
4618                 goto out;
4619 
4620         /*
4621          * Set properties before we receive the stream so that they are applied
4622          * to the new data. Note that we must call dmu_recv_stream() if
4623          * dmu_recv_begin() succeeds.
4624          */
4625         if (recvprops != NULL && !drc.drc_newfs) {
4626                 if (spa_version(dsl_dataset_get_spa(drc.drc_ds)) >=
4627                     SPA_VERSION_RECVD_PROPS &&
4628                     !dsl_prop_get_hasrecvd(tofs))
4629                         first_recvd_props = B_TRUE;
4630 
4631                 /*
4632                  * If new received properties are supplied, they are to
4633                  * completely replace the existing received properties,
4634                  * so stash away the existing ones.
4635                  */
4636                 if (dsl_prop_get_received(tofs, &origrecvd) == 0) {
4637                         nvlist_t *errlist = NULL;
4638                         /*
4639                          * Don't bother writing a property if its value won't
4640                          * change (and avoid the unnecessary security checks).
4641                          *
4642                          * The first receive after SPA_VERSION_RECVD_PROPS is a
4643                          * special case where we blow away all local properties
4644                          * regardless.
4645                          */
4646                         if (!first_recvd_props)
4647                                 props_reduce(recvprops, origrecvd);
4648                         if (zfs_check_clearable(tofs, origrecvd, &errlist) != 0)
4649                                 (void) nvlist_merge(*errors, errlist, 0);
4650                         nvlist_free(errlist);
4651 
4652                         if (clear_received_props(tofs, origrecvd,
4653                             first_recvd_props ? NULL : recvprops) != 0)
4654                                 *errflags |= ZPROP_ERR_NOCLEAR;
4655                 } else {
4656                         *errflags |= ZPROP_ERR_NOCLEAR;
4657                 }
4658         }
4659 
4660         /*
4661          * Stash away existing properties so we can restore them on error unless
4662          * we're doing the first receive after SPA_VERSION_RECVD_PROPS, in which
4663          * case "origrecvd" will take care of that.
4664          */
4665         if (localprops != NULL && !drc.drc_newfs && !first_recvd_props) {
4666                 objset_t *os;
4667                 if (dmu_objset_hold(tofs, FTAG, &os) == 0) {
4668                         if (dsl_prop_get_all(os, &origprops) != 0) {
4669                                 *errflags |= ZPROP_ERR_NOCLEAR;
4670                         }
4671                         dmu_objset_rele(os, FTAG);
4672                 } else {
4673                         *errflags |= ZPROP_ERR_NOCLEAR;
4674                 }
4675         }
4676 
4677         if (recvprops != NULL) {
4678                 props_error = dsl_prop_set_hasrecvd(tofs);
4679 
4680                 if (props_error == 0) {
4681                         recv_delayprops = extract_delay_props(recvprops);
4682                         (void) zfs_set_prop_nvlist(tofs, ZPROP_SRC_RECEIVED,
4683                             recvprops, *errors);
4684                 }
4685         }
4686 
4687         if (localprops != NULL) {
4688                 nvlist_t *oprops = fnvlist_alloc();
4689                 nvlist_t *xprops = fnvlist_alloc();
4690                 nvpair_t *nvp = NULL;
4691 
4692                 while ((nvp = nvlist_next_nvpair(localprops, nvp)) != NULL) {
4693                         if (nvpair_type(nvp) == DATA_TYPE_BOOLEAN) {
4694                                 /* -x property */
4695                                 const char *name = nvpair_name(nvp);
4696                                 zfs_prop_t prop = zfs_name_to_prop(name);
4697                                 if (prop != ZPROP_INVAL) {
4698                                         if (!zfs_prop_inheritable(prop))
4699                                                 continue;
4700                                 } else if (!zfs_prop_user(name))
4701                                         continue;
4702                                 fnvlist_add_boolean(xprops, name);
4703                         } else {
4704                                 /* -o property=value */
4705                                 fnvlist_add_nvpair(oprops, nvp);
4706                         }
4707                 }
4708 
4709                 local_delayprops = extract_delay_props(oprops);
4710                 (void) zfs_set_prop_nvlist(tofs, ZPROP_SRC_LOCAL,
4711                     oprops, *errors);
4712                 (void) zfs_set_prop_nvlist(tofs, ZPROP_SRC_INHERITED,
4713                     xprops, *errors);
4714 
4715                 nvlist_free(oprops);
4716                 nvlist_free(xprops);
4717         }
4718 
4719         off = input_fp->f_offset;
4720         error = dmu_recv_stream(&drc, input_fp->f_vnode, &off, cleanup_fd,
4721             action_handle);
4722 
4723         if (error == 0) {
4724                 zfsvfs_t *zfsvfs = NULL;
4725 
4726                 if (getzfsvfs(tofs, &zfsvfs) == 0) {
4727                         /* online recv */
4728                         dsl_dataset_t *ds;
4729                         int end_err;
4730 
4731                         ds = dmu_objset_ds(zfsvfs->z_os);
4732                         error = zfs_suspend_fs(zfsvfs);
4733                         /*
4734                          * If the suspend fails, then the recv_end will
4735                          * likely also fail, and clean up after itself.
4736                          */
4737                         end_err = dmu_recv_end(&drc, zfsvfs);
4738                         if (error == 0)
4739                                 error = zfs_resume_fs(zfsvfs, ds);
4740                         error = error ? error : end_err;
4741                         VFS_RELE(zfsvfs->z_vfs);
4742                 } else {
4743                         error = dmu_recv_end(&drc, NULL);
4744                 }
4745 
4746                 /* Set delayed properties now, after we're done receiving. */
4747                 if (recv_delayprops != NULL && error == 0) {
4748                         (void) zfs_set_prop_nvlist(tofs, ZPROP_SRC_RECEIVED,
4749                             recv_delayprops, *errors);
4750                 }
4751                 if (local_delayprops != NULL && error == 0) {
4752                         (void) zfs_set_prop_nvlist(tofs, ZPROP_SRC_LOCAL,
4753                             local_delayprops, *errors);
4754                 }
4755         }
4756 
4757         /*
4758          * Merge delayed props back in with initial props, in case
4759          * we're DEBUG and zfs_ioc_recv_inject_err is set (which means
4760          * we have to make sure clear_received_props() includes
4761          * the delayed properties).
4762          *
4763          * Since zfs_ioc_recv_inject_err is only in DEBUG kernels,
4764          * using ASSERT() will be just like a VERIFY.
4765          */
4766         if (recv_delayprops != NULL) {
4767                 ASSERT(nvlist_merge(recvprops, recv_delayprops, 0) == 0);
4768                 nvlist_free(recv_delayprops);
4769         }
4770         if (local_delayprops != NULL) {
4771                 ASSERT(nvlist_merge(localprops, local_delayprops, 0) == 0);
4772                 nvlist_free(local_delayprops);
4773         }
4774 
4775         *read_bytes = off - input_fp->f_offset;
4776         if (VOP_SEEK(input_fp->f_vnode, input_fp->f_offset, &off, NULL) == 0)
4777                 input_fp->f_offset = off;
4778 
4779 #ifdef  DEBUG
4780         if (zfs_ioc_recv_inject_err) {
4781                 zfs_ioc_recv_inject_err = B_FALSE;
4782                 error = 1;
4783         }
4784 #endif
4785 
4786         /*
4787          * On error, restore the original props.
4788          */
4789         if (error != 0 && recvprops != NULL && !drc.drc_newfs) {
4790                 if (clear_received_props(tofs, recvprops, NULL) != 0) {
4791                         /*
4792                          * We failed to clear the received properties.
4793                          * Since we may have left a $recvd value on the
4794                          * system, we can't clear the $hasrecvd flag.
4795                          */
4796                         *errflags |= ZPROP_ERR_NORESTORE;
4797                 } else if (first_recvd_props) {
4798                         dsl_prop_unset_hasrecvd(tofs);
4799                 }
4800 
4801                 if (origrecvd == NULL && !drc.drc_newfs) {
4802                         /* We failed to stash the original properties. */
4803                         *errflags |= ZPROP_ERR_NORESTORE;
4804                 }
4805 
4806                 /*
4807                  * dsl_props_set() will not convert RECEIVED to LOCAL on or
4808                  * after SPA_VERSION_RECVD_PROPS, so we need to specify LOCAL
4809                  * explicitly if we're restoring local properties cleared in the
4810                  * first new-style receive.
4811                  */
4812                 if (origrecvd != NULL &&
4813                     zfs_set_prop_nvlist(tofs, (first_recvd_props ?
4814                     ZPROP_SRC_LOCAL : ZPROP_SRC_RECEIVED),
4815                     origrecvd, NULL) != 0) {
4816                         /*
4817                          * We stashed the original properties but failed to
4818                          * restore them.
4819                          */
4820                         *errflags |= ZPROP_ERR_NORESTORE;
4821                 }
4822         }
4823         if (error != 0 && localprops != NULL && !drc.drc_newfs &&
4824             !first_recvd_props) {
4825                 nvlist_t *setprops;
4826                 nvlist_t *inheritprops;
4827                 nvpair_t *nvp;
4828 
4829                 if (origprops == NULL) {
4830                         /* We failed to stash the original properties. */
4831                         *errflags |= ZPROP_ERR_NORESTORE;
4832                         goto out;
4833                 }
4834 
4835                 /* Restore original props */
4836                 setprops = fnvlist_alloc();
4837                 inheritprops = fnvlist_alloc();
4838                 nvp = NULL;
4839                 while ((nvp = nvlist_next_nvpair(localprops, nvp)) != NULL) {
4840                         const char *name = nvpair_name(nvp);
4841                         const char *source;
4842                         nvlist_t *attrs;
4843 
4844                         if (!nvlist_exists(origprops, name)) {
4845                                 /*
4846                                  * Property was not present or was explicitly
4847                                  * inherited before the receive, restore this.
4848                                  */
4849                                 fnvlist_add_boolean(inheritprops, name);
4850                                 continue;
4851                         }
4852                         attrs = fnvlist_lookup_nvlist(origprops, name);
4853                         source = fnvlist_lookup_string(attrs, ZPROP_SOURCE);
4854 
4855                         /* Skip received properties */
4856                         if (strcmp(source, ZPROP_SOURCE_VAL_RECVD) == 0)
4857                                 continue;
4858 
4859                         if (strcmp(source, tofs) == 0) {
4860                                 /* Property was locally set */
4861                                 fnvlist_add_nvlist(setprops, name, attrs);
4862                         } else {
4863                                 /* Property was implicitly inherited */
4864                                 fnvlist_add_boolean(inheritprops, name);
4865                         }
4866                 }
4867 
4868                 if (zfs_set_prop_nvlist(tofs, ZPROP_SRC_LOCAL, setprops,
4869                     NULL) != 0)
4870                         *errflags |= ZPROP_ERR_NORESTORE;
4871                 if (zfs_set_prop_nvlist(tofs, ZPROP_SRC_INHERITED, inheritprops,
4872                     NULL) != 0)
4873                         *errflags |= ZPROP_ERR_NORESTORE;
4874 
4875                 nvlist_free(setprops);
4876                 nvlist_free(inheritprops);
4877         }
4878 out:
4879         releasef(input_fd);
4880         nvlist_free(origrecvd);
4881         nvlist_free(origprops);
4882 
4883         if (error == 0)
4884                 error = props_error;
4885 
4886         return (error);
4887 }
4888 
4889 /*
4890  * inputs:
4891  * zc_name              name of containing filesystem
4892  * zc_nvlist_src{_size} nvlist of received properties to apply
4893  * zc_nvlist_conf{_size} nvlist of local properties to apply
4894  * zc_history_offset{_len} nvlist of hidden args { "wkeydata" -> value }
4895  * zc_value             name of snapshot to create
4896  * zc_string            name of clone origin (if DRR_FLAG_CLONE)
4897  * zc_cookie            file descriptor to recv from
4898  * zc_begin_record      the BEGIN record of the stream (not byteswapped)
4899  * zc_guid              force flag
4900  * zc_cleanup_fd        cleanup-on-exit file descriptor
4901  * zc_action_handle     handle for this guid/ds mapping (or zero on first call)
4902  * zc_resumable         if data is incomplete assume sender will resume
4903  *
4904  * outputs:
4905  * zc_cookie            number of bytes read
4906  * zc_nvlist_dst{_size} error for each unapplied received property
4907  * zc_obj               zprop_errflags_t
4908  * zc_action_handle     handle for this guid/ds mapping
4909  */
4910 static int
4911 zfs_ioc_recv(zfs_cmd_t *zc)
4912 {
4913         dmu_replay_record_t begin_record;
4914         nvlist_t *errors = NULL;
4915         nvlist_t *recvdprops = NULL;
4916         nvlist_t *localprops = NULL;
4917         nvlist_t *hidden_args = NULL;
4918         char *origin = NULL;
4919         char *tosnap;
4920         char tofs[ZFS_MAX_DATASET_NAME_LEN];
4921         int error = 0;
4922 
4923         if (dataset_namecheck(zc->zc_value, NULL, NULL) != 0 ||
4924             strchr(zc->zc_value, '@') == NULL ||
4925             strchr(zc->zc_value, '%'))
4926                 return (SET_ERROR(EINVAL));
4927 
4928         (void) strlcpy(tofs, zc->zc_value, sizeof (tofs));
4929         tosnap = strchr(tofs, '@');
4930         *tosnap++ = '\0';
4931 
4932         if (zc->zc_nvlist_src != 0 &&
4933             (error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
4934             zc->zc_iflags, &recvdprops)) != 0)
4935                 return (error);
4936 
4937         if (zc->zc_nvlist_conf != 0 &&
4938             (error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
4939             zc->zc_iflags, &localprops)) != 0)
4940                 return (error);
4941 
4942         if (zc->zc_history_offset != 0 &&
4943             (error = get_nvlist(zc->zc_history_offset, zc->zc_history_len,
4944             zc->zc_iflags, &hidden_args)) != 0)
4945                 return (error);
4946 
4947         if (zc->zc_string[0])
4948                 origin = zc->zc_string;
4949 
4950         begin_record.drr_type = DRR_BEGIN;
4951         begin_record.drr_payloadlen = zc->zc_begin_record.drr_payloadlen;
4952         begin_record.drr_u.drr_begin = zc->zc_begin_record.drr_u.drr_begin;
4953 
4954         error = zfs_ioc_recv_impl(tofs, tosnap, origin, recvdprops, localprops,
4955             hidden_args, zc->zc_guid, zc->zc_resumable, zc->zc_cookie,
4956             &begin_record, zc->zc_cleanup_fd, &zc->zc_cookie, &zc->zc_obj,
4957             &zc->zc_action_handle, &errors);
4958         nvlist_free(recvdprops);
4959         nvlist_free(localprops);
4960 
4961         /*
4962          * Now that all props, initial and delayed, are set, report the prop
4963          * errors to the caller.
4964          */
4965         if (zc->zc_nvlist_dst_size != 0 && errors != NULL &&
4966             (nvlist_smush(errors, zc->zc_nvlist_dst_size) != 0 ||
4967             put_nvlist(zc, errors) != 0)) {
4968                 /*
4969                  * Caller made zc->zc_nvlist_dst less than the minimum expected
4970                  * size or supplied an invalid address.
4971                  */
4972                 error = SET_ERROR(EINVAL);
4973         }
4974 
4975         nvlist_free(errors);
4976 
4977         return (error);
4978 }
4979 
4980 /*
4981  * inputs:
4982  * zc_name      name of snapshot to send
4983  * zc_cookie    file descriptor to send stream to
4984  * zc_obj       fromorigin flag (mutually exclusive with zc_fromobj)
4985  * zc_sendobj   objsetid of snapshot to send
4986  * zc_fromobj   objsetid of incremental fromsnap (may be zero)
4987  * zc_guid      if set, estimate size of stream only.  zc_cookie is ignored.
4988  *              output size in zc_objset_type.
4989  * zc_flags     lzc_send_flags
4990  *
4991  * outputs:
4992  * zc_objset_type       estimated size, if zc_guid is set
4993  */
4994 static int
4995 zfs_ioc_send(zfs_cmd_t *zc)
4996 {
4997         int error;
4998         offset_t off;
4999         boolean_t estimate = (zc->zc_guid != 0);
5000         boolean_t embedok = (zc->zc_flags & 0x1);
5001         boolean_t large_block_ok = (zc->zc_flags & 0x2);
5002         boolean_t compressok = (zc->zc_flags & 0x4);
5003         boolean_t rawok = (zc->zc_flags & 0x8);
5004 
5005         if (zc->zc_obj != 0) {
5006                 dsl_pool_t *dp;
5007                 dsl_dataset_t *tosnap;
5008 
5009                 error = dsl_pool_hold(zc->zc_name, FTAG, &dp);
5010                 if (error != 0)
5011                         return (error);
5012 
5013                 error = dsl_dataset_hold_obj(dp, zc->zc_sendobj, FTAG, &tosnap);
5014                 if (error != 0) {
5015                         dsl_pool_rele(dp, FTAG);
5016                         return (error);
5017                 }
5018 
5019                 if (dsl_dir_is_clone(tosnap->ds_dir))
5020                         zc->zc_fromobj =
5021                             dsl_dir_phys(tosnap->ds_dir)->dd_origin_obj;
5022                 dsl_dataset_rele(tosnap, FTAG);
5023                 dsl_pool_rele(dp, FTAG);
5024         }
5025 
5026         if (estimate) {
5027                 dsl_pool_t *dp;
5028                 dsl_dataset_t *tosnap;
5029                 dsl_dataset_t *fromsnap = NULL;
5030 
5031                 error = dsl_pool_hold(zc->zc_name, FTAG, &dp);
5032                 if (error != 0)
5033                         return (error);
5034 
5035                 error = dsl_dataset_hold_obj(dp, zc->zc_sendobj,
5036                     FTAG, &tosnap);
5037                 if (error != 0) {
5038                         dsl_pool_rele(dp, FTAG);
5039                         return (error);
5040                 }
5041 
5042                 if (zc->zc_fromobj != 0) {
5043                         error = dsl_dataset_hold_obj(dp, zc->zc_fromobj,
5044                             FTAG, &fromsnap);
5045                         if (error != 0) {
5046                                 dsl_dataset_rele(tosnap, FTAG);
5047                                 dsl_pool_rele(dp, FTAG);
5048                                 return (error);
5049                         }
5050                 }
5051 
5052                 error = dmu_send_estimate(tosnap, fromsnap, compressok || rawok,
5053                     &zc->zc_objset_type);
5054 
5055                 if (fromsnap != NULL)
5056                         dsl_dataset_rele(fromsnap, FTAG);
5057                 dsl_dataset_rele(tosnap, FTAG);
5058                 dsl_pool_rele(dp, FTAG);
5059         } else {
5060                 file_t *fp = getf(zc->zc_cookie);
5061                 if (fp == NULL)
5062                         return (SET_ERROR(EBADF));
5063 
5064                 off = fp->f_offset;
5065                 error = dmu_send_obj(zc->zc_name, zc->zc_sendobj,
5066                     zc->zc_fromobj, embedok, large_block_ok, compressok, rawok,
5067                     zc->zc_cookie, fp->f_vnode, &off);
5068 
5069                 if (VOP_SEEK(fp->f_vnode, fp->f_offset, &off, NULL) == 0)
5070                         fp->f_offset = off;
5071                 releasef(zc->zc_cookie);
5072         }
5073         return (error);
5074 }
5075 
5076 /*
5077  * inputs:
5078  * zc_name      name of snapshot on which to report progress
5079  * zc_cookie    file descriptor of send stream
5080  *
5081  * outputs:
5082  * zc_cookie    number of bytes written in send stream thus far
5083  */
5084 static int
5085 zfs_ioc_send_progress(zfs_cmd_t *zc)
5086 {
5087         dsl_pool_t *dp;
5088         dsl_dataset_t *ds;
5089         dmu_sendarg_t *dsp = NULL;
5090         int error;
5091 
5092         error = dsl_pool_hold(zc->zc_name, FTAG, &dp);
5093         if (error != 0)
5094                 return (error);
5095 
5096         error = dsl_dataset_hold(dp, zc->zc_name, FTAG, &ds);
5097         if (error != 0) {
5098                 dsl_pool_rele(dp, FTAG);
5099                 return (error);
5100         }
5101 
5102         mutex_enter(&ds->ds_sendstream_lock);
5103 
5104         /*
5105          * Iterate over all the send streams currently active on this dataset.
5106          * If there's one which matches the specified file descriptor _and_ the
5107          * stream was started by the current process, return the progress of
5108          * that stream.
5109          */
5110         for (dsp = list_head(&ds->ds_sendstreams); dsp != NULL;
5111             dsp = list_next(&ds->ds_sendstreams, dsp)) {
5112                 if (dsp->dsa_outfd == zc->zc_cookie &&
5113                     dsp->dsa_proc == curproc)
5114                         break;
5115         }
5116 
5117         if (dsp != NULL)
5118                 zc->zc_cookie = *(dsp->dsa_off);
5119         else
5120                 error = SET_ERROR(ENOENT);
5121 
5122         mutex_exit(&ds->ds_sendstream_lock);
5123         dsl_dataset_rele(ds, FTAG);
5124         dsl_pool_rele(dp, FTAG);
5125         return (error);
5126 }
5127 
5128 static int
5129 zfs_ioc_inject_fault(zfs_cmd_t *zc)
5130 {
5131         int id, error;
5132 
5133         error = zio_inject_fault(zc->zc_name, (int)zc->zc_guid, &id,
5134             &zc->zc_inject_record);
5135 
5136         if (error == 0)
5137                 zc->zc_guid = (uint64_t)id;
5138 
5139         return (error);
5140 }
5141 
5142 static int
5143 zfs_ioc_clear_fault(zfs_cmd_t *zc)
5144 {
5145         return (zio_clear_fault((int)zc->zc_guid));
5146 }
5147 
5148 static int
5149 zfs_ioc_inject_list_next(zfs_cmd_t *zc)
5150 {
5151         int id = (int)zc->zc_guid;
5152         int error;
5153 
5154         error = zio_inject_list_next(&id, zc->zc_name, sizeof (zc->zc_name),
5155             &zc->zc_inject_record);
5156 
5157         zc->zc_guid = id;
5158 
5159         return (error);
5160 }
5161 
5162 static int
5163 zfs_ioc_error_log(zfs_cmd_t *zc)
5164 {
5165         spa_t *spa;
5166         int error;
5167         size_t count = (size_t)zc->zc_nvlist_dst_size;
5168 
5169         if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
5170                 return (error);
5171 
5172         error = spa_get_errlog(spa, (void *)(uintptr_t)zc->zc_nvlist_dst,
5173             &count);
5174         if (error == 0)
5175                 zc->zc_nvlist_dst_size = count;
5176         else
5177                 zc->zc_nvlist_dst_size = spa_get_errlog_size(spa);
5178 
5179         spa_close(spa, FTAG);
5180 
5181         return (error);
5182 }
5183 
5184 static int
5185 zfs_ioc_clear(zfs_cmd_t *zc)
5186 {
5187         spa_t *spa;
5188         vdev_t *vd;
5189         int error;
5190 
5191         /*
5192          * On zpool clear we also fix up missing slogs
5193          */
5194         mutex_enter(&spa_namespace_lock);
5195         spa = spa_lookup(zc->zc_name);
5196         if (spa == NULL) {
5197                 mutex_exit(&spa_namespace_lock);
5198                 return (SET_ERROR(EIO));
5199         }
5200         if (spa_get_log_state(spa) == SPA_LOG_MISSING) {
5201                 /* we need to let spa_open/spa_load clear the chains */
5202                 spa_set_log_state(spa, SPA_LOG_CLEAR);
5203         }
5204         spa->spa_last_open_failed = 0;
5205         mutex_exit(&spa_namespace_lock);
5206 
5207         if (zc->zc_cookie & ZPOOL_NO_REWIND) {
5208                 error = spa_open(zc->zc_name, &spa, FTAG);
5209         } else {
5210                 nvlist_t *policy;
5211                 nvlist_t *config = NULL;
5212 
5213                 if (zc->zc_nvlist_src == 0)
5214                         return (SET_ERROR(EINVAL));
5215 
5216                 if ((error = get_nvlist(zc->zc_nvlist_src,
5217                     zc->zc_nvlist_src_size, zc->zc_iflags, &policy)) == 0) {
5218                         error = spa_open_rewind(zc->zc_name, &spa, FTAG,
5219                             policy, &config);
5220                         if (config != NULL) {
5221                                 int err;
5222 
5223                                 if ((err = put_nvlist(zc, config)) != 0)
5224                                         error = err;
5225                                 nvlist_free(config);
5226                         }
5227                         nvlist_free(policy);
5228                 }
5229         }
5230 
5231         if (error != 0)
5232                 return (error);
5233 
5234         /*
5235          * If multihost is enabled, resuming I/O is unsafe as another
5236          * host may have imported the pool.
5237          */
5238         if (spa_multihost(spa) && spa_suspended(spa))
5239                 return (SET_ERROR(EINVAL));
5240 
5241         spa_vdev_state_enter(spa, SCL_NONE);
5242 
5243         if (zc->zc_guid == 0) {
5244                 vd = NULL;
5245         } else {
5246                 vd = spa_lookup_by_guid(spa, zc->zc_guid, B_TRUE);
5247                 if (vd == NULL) {
5248                         (void) spa_vdev_state_exit(spa, NULL, ENODEV);
5249                         spa_close(spa, FTAG);
5250                         return (SET_ERROR(ENODEV));
5251                 }
5252         }
5253 
5254         vdev_clear(spa, vd);
5255 
5256         (void) spa_vdev_state_exit(spa, NULL, 0);
5257 
5258         /*
5259          * Resume any suspended I/Os.
5260          */
5261         if (zio_resume(spa) != 0)
5262                 error = SET_ERROR(EIO);
5263 
5264         spa_close(spa, FTAG);
5265 
5266         return (error);
5267 }
5268 
5269 static int
5270 zfs_ioc_pool_reopen(zfs_cmd_t *zc)
5271 {
5272         spa_t *spa;
5273         int error;
5274 
5275         error = spa_open(zc->zc_name, &spa, FTAG);
5276         if (error != 0)
5277                 return (error);
5278 
5279         spa_vdev_state_enter(spa, SCL_NONE);
5280 
5281         /*
5282          * If a resilver is already in progress then set the
5283          * spa_scrub_reopen flag to B_TRUE so that we don't restart
5284          * the scan as a side effect of the reopen. Otherwise, let
5285          * vdev_open() decided if a resilver is required.
5286          */
5287         spa->spa_scrub_reopen = dsl_scan_resilvering(spa->spa_dsl_pool);
5288         vdev_reopen(spa->spa_root_vdev);
5289         spa->spa_scrub_reopen = B_FALSE;
5290 
5291         (void) spa_vdev_state_exit(spa, NULL, 0);
5292         spa_close(spa, FTAG);
5293         return (0);
5294 }
5295 /*
5296  * inputs:
5297  * zc_name      name of filesystem
5298  *
5299  * outputs:
5300  * zc_string    name of conflicting snapshot, if there is one
5301  */
5302 static int
5303 zfs_ioc_promote(zfs_cmd_t *zc)
5304 {
5305         dsl_pool_t *dp;
5306         dsl_dataset_t *ds, *ods;
5307         char origin[ZFS_MAX_DATASET_NAME_LEN];
5308         char *cp;
5309         int error;
5310 
5311         zc->zc_name[sizeof (zc->zc_name) - 1] = '\0';
5312         if (dataset_namecheck(zc->zc_name, NULL, NULL) != 0 ||
5313             strchr(zc->zc_name, '%'))
5314                 return (SET_ERROR(EINVAL));
5315 
5316         error = dsl_pool_hold(zc->zc_name, FTAG, &dp);
5317         if (error != 0)
5318                 return (error);
5319 
5320         error = dsl_dataset_hold(dp, zc->zc_name, FTAG, &ds);
5321         if (error != 0) {
5322                 dsl_pool_rele(dp, FTAG);
5323                 return (error);
5324         }
5325 
5326         if (!dsl_dir_is_clone(ds->ds_dir)) {
5327                 dsl_dataset_rele(ds, FTAG);
5328                 dsl_pool_rele(dp, FTAG);
5329                 return (SET_ERROR(EINVAL));
5330         }
5331 
5332         error = dsl_dataset_hold_obj(dp,
5333             dsl_dir_phys(ds->ds_dir)->dd_origin_obj, FTAG, &ods);
5334         if (error != 0) {
5335                 dsl_dataset_rele(ds, FTAG);
5336                 dsl_pool_rele(dp, FTAG);
5337                 return (error);
5338         }
5339 
5340         dsl_dataset_name(ods, origin);
5341         dsl_dataset_rele(ods, FTAG);
5342         dsl_dataset_rele(ds, FTAG);
5343         dsl_pool_rele(dp, FTAG);
5344 
5345         /*
5346          * We don't need to unmount *all* the origin fs's snapshots, but
5347          * it's easier.
5348          */
5349         cp = strchr(origin, '@');
5350         if (cp)
5351                 *cp = '\0';
5352         (void) dmu_objset_find(origin,
5353             zfs_unmount_snap_cb, NULL, DS_FIND_SNAPSHOTS);
5354         return (dsl_dataset_promote(zc->zc_name, zc->zc_string));
5355 }
5356 
5357 /*
5358  * Retrieve a single {user|group|project}{used|quota}@... property.
5359  *
5360  * inputs:
5361  * zc_name      name of filesystem
5362  * zc_objset_type zfs_userquota_prop_t
5363  * zc_value     domain name (eg. "S-1-234-567-89")
5364  * zc_guid      RID/UID/GID
5365  *
5366  * outputs:
5367  * zc_cookie    property value
5368  */
5369 static int
5370 zfs_ioc_userspace_one(zfs_cmd_t *zc)
5371 {
5372         zfsvfs_t *zfsvfs;
5373         int error;
5374 
5375         if (zc->zc_objset_type >= ZFS_NUM_USERQUOTA_PROPS)
5376                 return (SET_ERROR(EINVAL));
5377 
5378         error = zfsvfs_hold(zc->zc_name, FTAG, &zfsvfs, B_FALSE);
5379         if (error != 0)
5380                 return (error);
5381 
5382         error = zfs_userspace_one(zfsvfs,
5383             zc->zc_objset_type, zc->zc_value, zc->zc_guid, &zc->zc_cookie);
5384         zfsvfs_rele(zfsvfs, FTAG);
5385 
5386         return (error);
5387 }
5388 
5389 /*
5390  * inputs:
5391  * zc_name              name of filesystem
5392  * zc_cookie            zap cursor
5393  * zc_objset_type       zfs_userquota_prop_t
5394  * zc_nvlist_dst[_size] buffer to fill (not really an nvlist)
5395  *
5396  * outputs:
5397  * zc_nvlist_dst[_size] data buffer (array of zfs_useracct_t)
5398  * zc_cookie    zap cursor
5399  */
5400 static int
5401 zfs_ioc_userspace_many(zfs_cmd_t *zc)
5402 {
5403         zfsvfs_t *zfsvfs;
5404         int bufsize = zc->zc_nvlist_dst_size;
5405 
5406         if (bufsize <= 0)
5407                 return (SET_ERROR(ENOMEM));
5408 
5409         int error = zfsvfs_hold(zc->zc_name, FTAG, &zfsvfs, B_FALSE);
5410         if (error != 0)
5411                 return (error);
5412 
5413         void *buf = kmem_alloc(bufsize, KM_SLEEP);
5414 
5415         error = zfs_userspace_many(zfsvfs, zc->zc_objset_type, &zc->zc_cookie,
5416             buf, &zc->zc_nvlist_dst_size);
5417 
5418         if (error == 0) {
5419                 error = xcopyout(buf,
5420                     (void *)(uintptr_t)zc->zc_nvlist_dst,
5421                     zc->zc_nvlist_dst_size);
5422         }
5423         kmem_free(buf, bufsize);
5424         zfsvfs_rele(zfsvfs, FTAG);
5425 
5426         return (error);
5427 }
5428 
5429 /*
5430  * inputs:
5431  * zc_name              name of filesystem
5432  *
5433  * outputs:
5434  * none
5435  */
5436 static int
5437 zfs_ioc_userspace_upgrade(zfs_cmd_t *zc)
5438 {
5439         objset_t *os;
5440         int error = 0;
5441         zfsvfs_t *zfsvfs;
5442 
5443         if (getzfsvfs(zc->zc_name, &zfsvfs) == 0) {
5444                 if (!dmu_objset_userused_enabled(zfsvfs->z_os)) {
5445                         /*
5446                          * If userused is not enabled, it may be because the
5447                          * objset needs to be closed & reopened (to grow the
5448                          * objset_phys_t).  Suspend/resume the fs will do that.
5449                          */
5450                         dsl_dataset_t *ds, *newds;
5451 
5452                         ds = dmu_objset_ds(zfsvfs->z_os);
5453                         error = zfs_suspend_fs(zfsvfs);
5454                         if (error == 0) {
5455                                 dmu_objset_refresh_ownership(ds, &newds,
5456                                     B_TRUE, zfsvfs);
5457                                 error = zfs_resume_fs(zfsvfs, newds);
5458                         }
5459                 }
5460                 if (error == 0)
5461                         error = dmu_objset_userspace_upgrade(zfsvfs->z_os);
5462                 VFS_RELE(zfsvfs->z_vfs);
5463         } else {
5464                 /* XXX kind of reading contents without owning */
5465                 error = dmu_objset_hold_flags(zc->zc_name, B_TRUE, FTAG, &os);
5466                 if (error != 0)
5467                         return (error);
5468 
5469                 error = dmu_objset_userspace_upgrade(os);
5470                 dmu_objset_rele_flags(os, B_TRUE, FTAG);
5471         }
5472 
5473         return (error);
5474 }
5475 
5476 /*
5477  * inputs:
5478  * zc_name              name of filesystem
5479  *
5480  * outputs:
5481  * none
5482  */
5483 static int
5484 zfs_ioc_id_quota_upgrade(zfs_cmd_t *zc)
5485 {
5486         objset_t *os;
5487         int error;
5488 
5489         error = dmu_objset_hold(zc->zc_name, FTAG, &os);
5490         if (error != 0)
5491                 return (error);
5492 
5493         dsl_dataset_long_hold(dmu_objset_ds(os), FTAG);
5494         dsl_pool_rele(dmu_objset_pool(os), FTAG);
5495 
5496         if (dmu_objset_userobjspace_upgradable(os) ||
5497             dmu_objset_projectquota_upgradable(os)) {
5498                 mutex_enter(&os->os_upgrade_lock);
5499                 if (os->os_upgrade_id == 0) {
5500                         /* clear potential error code and retry */
5501                         os->os_upgrade_status = 0;
5502                         mutex_exit(&os->os_upgrade_lock);
5503 
5504                         dmu_objset_id_quota_upgrade(os);
5505                 } else {
5506                         mutex_exit(&os->os_upgrade_lock);
5507                 }
5508 
5509                 taskq_wait_id(os->os_spa->spa_upgrade_taskq, os->os_upgrade_id);
5510                 error = os->os_upgrade_status;
5511         }
5512 
5513         dsl_dataset_long_rele(dmu_objset_ds(os), FTAG);
5514         dsl_dataset_rele(dmu_objset_ds(os), FTAG);
5515 
5516         return (error);
5517 }
5518 
5519 /*
5520  * We don't want to have a hard dependency
5521  * against some special symbols in sharefs
5522  * nfs, and smbsrv.  Determine them if needed when
5523  * the first file system is shared.
5524  * Neither sharefs, nfs or smbsrv are unloadable modules.
5525  */
5526 int (*znfsexport_fs)(void *arg);
5527 int (*zshare_fs)(enum sharefs_sys_op, share_t *, uint32_t);
5528 int (*zsmbexport_fs)(void *arg, boolean_t add_share);
5529 
5530 int zfs_nfsshare_inited;
5531 int zfs_smbshare_inited;
5532 
5533 ddi_modhandle_t nfs_mod;
5534 ddi_modhandle_t sharefs_mod;
5535 ddi_modhandle_t smbsrv_mod;
5536 kmutex_t zfs_share_lock;
5537 
5538 static int
5539 zfs_init_sharefs()
5540 {
5541         int error;
5542 
5543         ASSERT(MUTEX_HELD(&zfs_share_lock));
5544         /* Both NFS and SMB shares also require sharetab support. */
5545         if (sharefs_mod == NULL && ((sharefs_mod =
5546             ddi_modopen("fs/sharefs",
5547             KRTLD_MODE_FIRST, &error)) == NULL)) {
5548                 return (SET_ERROR(ENOSYS));
5549         }
5550         if (zshare_fs == NULL && ((zshare_fs =
5551             (int (*)(enum sharefs_sys_op, share_t *, uint32_t))
5552             ddi_modsym(sharefs_mod, "sharefs_impl", &error)) == NULL)) {
5553                 return (SET_ERROR(ENOSYS));
5554         }
5555         return (0);
5556 }
5557 
5558 static int
5559 zfs_ioc_share(zfs_cmd_t *zc)
5560 {
5561         int error;
5562         int opcode;
5563 
5564         switch (zc->zc_share.z_sharetype) {
5565         case ZFS_SHARE_NFS:
5566         case ZFS_UNSHARE_NFS:
5567                 if (zfs_nfsshare_inited == 0) {
5568                         mutex_enter(&zfs_share_lock);
5569                         if (nfs_mod == NULL && ((nfs_mod = ddi_modopen("fs/nfs",
5570                             KRTLD_MODE_FIRST, &error)) == NULL)) {
5571                                 mutex_exit(&zfs_share_lock);
5572                                 return (SET_ERROR(ENOSYS));
5573                         }
5574                         if (znfsexport_fs == NULL &&
5575                             ((znfsexport_fs = (int (*)(void *))
5576                             ddi_modsym(nfs_mod,
5577                             "nfs_export", &error)) == NULL)) {
5578                                 mutex_exit(&zfs_share_lock);
5579                                 return (SET_ERROR(ENOSYS));
5580                         }
5581                         error = zfs_init_sharefs();
5582                         if (error != 0) {
5583                                 mutex_exit(&zfs_share_lock);
5584                                 return (SET_ERROR(ENOSYS));
5585                         }
5586                         zfs_nfsshare_inited = 1;
5587                         mutex_exit(&zfs_share_lock);
5588                 }
5589                 break;
5590         case ZFS_SHARE_SMB:
5591         case ZFS_UNSHARE_SMB:
5592                 if (zfs_smbshare_inited == 0) {
5593                         mutex_enter(&zfs_share_lock);
5594                         if (smbsrv_mod == NULL && ((smbsrv_mod =
5595                             ddi_modopen("drv/smbsrv",
5596                             KRTLD_MODE_FIRST, &error)) == NULL)) {
5597                                 mutex_exit(&zfs_share_lock);
5598                                 return (SET_ERROR(ENOSYS));
5599                         }
5600                         if (zsmbexport_fs == NULL && ((zsmbexport_fs =
5601                             (int (*)(void *, boolean_t))ddi_modsym(smbsrv_mod,
5602                             "smb_server_share", &error)) == NULL)) {
5603                                 mutex_exit(&zfs_share_lock);
5604                                 return (SET_ERROR(ENOSYS));
5605                         }
5606                         error = zfs_init_sharefs();
5607                         if (error != 0) {
5608                                 mutex_exit(&zfs_share_lock);
5609                                 return (SET_ERROR(ENOSYS));
5610                         }
5611                         zfs_smbshare_inited = 1;
5612                         mutex_exit(&zfs_share_lock);
5613                 }
5614                 break;
5615         default:
5616                 return (SET_ERROR(EINVAL));
5617         }
5618 
5619         switch (zc->zc_share.z_sharetype) {
5620         case ZFS_SHARE_NFS:
5621         case ZFS_UNSHARE_NFS:
5622                 if (error =
5623                     znfsexport_fs((void *)
5624                     (uintptr_t)zc->zc_share.z_exportdata))
5625                         return (error);
5626                 break;
5627         case ZFS_SHARE_SMB:
5628         case ZFS_UNSHARE_SMB:
5629                 if (error = zsmbexport_fs((void *)
5630                     (uintptr_t)zc->zc_share.z_exportdata,
5631                     zc->zc_share.z_sharetype == ZFS_SHARE_SMB ?
5632                     B_TRUE: B_FALSE)) {
5633                         return (error);
5634                 }
5635                 break;
5636         }
5637 
5638         opcode = (zc->zc_share.z_sharetype == ZFS_SHARE_NFS ||
5639             zc->zc_share.z_sharetype == ZFS_SHARE_SMB) ?
5640             SHAREFS_ADD : SHAREFS_REMOVE;
5641 
5642         /*
5643          * Add or remove share from sharetab
5644          */
5645         error = zshare_fs(opcode,
5646             (void *)(uintptr_t)zc->zc_share.z_sharedata,
5647             zc->zc_share.z_sharemax);
5648 
5649         return (error);
5650 
5651 }
5652 
5653 ace_t full_access[] = {
5654         {(uid_t)-1, ACE_ALL_PERMS, ACE_EVERYONE, 0}
5655 };
5656 
5657 /*
5658  * inputs:
5659  * zc_name              name of containing filesystem
5660  * zc_obj               object # beyond which we want next in-use object #
5661  *
5662  * outputs:
5663  * zc_obj               next in-use object #
5664  */
5665 static int
5666 zfs_ioc_next_obj(zfs_cmd_t *zc)
5667 {
5668         objset_t *os = NULL;
5669         int error;
5670 
5671         error = dmu_objset_hold(zc->zc_name, FTAG, &os);
5672         if (error != 0)
5673                 return (error);
5674 
5675         error = dmu_object_next(os, &zc->zc_obj, B_FALSE,
5676             dsl_dataset_phys(os->os_dsl_dataset)->ds_prev_snap_txg);
5677 
5678         dmu_objset_rele(os, FTAG);
5679         return (error);
5680 }
5681 
5682 /*
5683  * inputs:
5684  * zc_name              name of filesystem
5685  * zc_value             prefix name for snapshot
5686  * zc_cleanup_fd        cleanup-on-exit file descriptor for calling process
5687  *
5688  * outputs:
5689  * zc_value             short name of new snapshot
5690  */
5691 static int
5692 zfs_ioc_tmp_snapshot(zfs_cmd_t *zc)
5693 {
5694         char *snap_name;
5695         char *hold_name;
5696         int error;
5697         minor_t minor;
5698 
5699         error = zfs_onexit_fd_hold(zc->zc_cleanup_fd, &minor);
5700         if (error != 0)
5701                 return (error);
5702 
5703         snap_name = kmem_asprintf("%s-%016llx", zc->zc_value,
5704             (u_longlong_t)ddi_get_lbolt64());
5705         hold_name = kmem_asprintf("%%%s", zc->zc_value);
5706 
5707         error = dsl_dataset_snapshot_tmp(zc->zc_name, snap_name, minor,
5708             hold_name);
5709         if (error == 0)
5710                 (void) strcpy(zc->zc_value, snap_name);
5711         strfree(snap_name);
5712         strfree(hold_name);
5713         zfs_onexit_fd_rele(zc->zc_cleanup_fd);
5714         return (error);
5715 }
5716 
5717 /*
5718  * inputs:
5719  * zc_name              name of "to" snapshot
5720  * zc_value             name of "from" snapshot
5721  * zc_cookie            file descriptor to write diff data on
5722  *
5723  * outputs:
5724  * dmu_diff_record_t's to the file descriptor
5725  */
5726 static int
5727 zfs_ioc_diff(zfs_cmd_t *zc)
5728 {
5729         file_t *fp;
5730         offset_t off;
5731         int error;
5732 
5733         fp = getf(zc->zc_cookie);
5734         if (fp == NULL)
5735                 return (SET_ERROR(EBADF));
5736 
5737         off = fp->f_offset;
5738 
5739         error = dmu_diff(zc->zc_name, zc->zc_value, fp->f_vnode, &off);
5740 
5741         if (VOP_SEEK(fp->f_vnode, fp->f_offset, &off, NULL) == 0)
5742                 fp->f_offset = off;
5743         releasef(zc->zc_cookie);
5744 
5745         return (error);
5746 }
5747 
5748 /*
5749  * Remove all ACL files in shares dir
5750  */
5751 static int
5752 zfs_smb_acl_purge(znode_t *dzp)
5753 {
5754         zap_cursor_t    zc;
5755         zap_attribute_t zap;
5756         zfsvfs_t *zfsvfs = dzp->z_zfsvfs;
5757         int error;
5758 
5759         for (zap_cursor_init(&zc, zfsvfs->z_os, dzp->z_id);
5760             (error = zap_cursor_retrieve(&zc, &zap)) == 0;
5761             zap_cursor_advance(&zc)) {
5762                 if ((error = VOP_REMOVE(ZTOV(dzp), zap.za_name, kcred,
5763                     NULL, 0)) != 0)
5764                         break;
5765         }
5766         zap_cursor_fini(&zc);
5767         return (error);
5768 }
5769 
5770 static int
5771 zfs_ioc_smb_acl(zfs_cmd_t *zc)
5772 {
5773         vnode_t *vp;
5774         znode_t *dzp;
5775         vnode_t *resourcevp = NULL;
5776         znode_t *sharedir;
5777         zfsvfs_t *zfsvfs;
5778         nvlist_t *nvlist;
5779         char *src, *target;
5780         vattr_t vattr;
5781         vsecattr_t vsec;
5782         int error = 0;
5783 
5784         if ((error = lookupname(zc->zc_value, UIO_SYSSPACE,
5785             NO_FOLLOW, NULL, &vp)) != 0)
5786                 return (error);
5787 
5788         /* Now make sure mntpnt and dataset are ZFS */
5789 
5790         if (vp->v_vfsp->vfs_fstype != zfsfstype ||
5791             (strcmp((char *)refstr_value(vp->v_vfsp->vfs_resource),
5792             zc->zc_name) != 0)) {
5793                 VN_RELE(vp);
5794                 return (SET_ERROR(EINVAL));
5795         }
5796 
5797         dzp = VTOZ(vp);
5798         zfsvfs = dzp->z_zfsvfs;
5799         ZFS_ENTER(zfsvfs);
5800 
5801         /*
5802          * Create share dir if its missing.
5803          */
5804         mutex_enter(&zfsvfs->z_lock);
5805         if (zfsvfs->z_shares_dir == 0) {
5806                 dmu_tx_t *tx;
5807 
5808                 tx = dmu_tx_create(zfsvfs->z_os);
5809                 dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, TRUE,
5810                     ZFS_SHARES_DIR);
5811                 dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, FALSE, NULL);
5812                 error = dmu_tx_assign(tx, TXG_WAIT);
5813                 if (error != 0) {
5814                         dmu_tx_abort(tx);
5815                 } else {
5816                         error = zfs_create_share_dir(zfsvfs, tx);
5817                         dmu_tx_commit(tx);
5818                 }
5819                 if (error != 0) {
5820                         mutex_exit(&zfsvfs->z_lock);
5821                         VN_RELE(vp);
5822                         ZFS_EXIT(zfsvfs);
5823                         return (error);
5824                 }
5825         }
5826         mutex_exit(&zfsvfs->z_lock);
5827 
5828         ASSERT(zfsvfs->z_shares_dir);
5829         if ((error = zfs_zget(zfsvfs, zfsvfs->z_shares_dir, &sharedir)) != 0) {
5830                 VN_RELE(vp);
5831                 ZFS_EXIT(zfsvfs);
5832                 return (error);
5833         }
5834 
5835         switch (zc->zc_cookie) {
5836         case ZFS_SMB_ACL_ADD:
5837                 vattr.va_mask = AT_MODE|AT_UID|AT_GID|AT_TYPE;
5838                 vattr.va_type = VREG;
5839                 vattr.va_mode = S_IFREG|0777;
5840                 vattr.va_uid = 0;
5841                 vattr.va_gid = 0;
5842 
5843                 vsec.vsa_mask = VSA_ACE;
5844                 vsec.vsa_aclentp = &full_access;
5845                 vsec.vsa_aclentsz = sizeof (full_access);
5846                 vsec.vsa_aclcnt = 1;
5847 
5848                 error = VOP_CREATE(ZTOV(sharedir), zc->zc_string,
5849                     &vattr, EXCL, 0, &resourcevp, kcred, 0, NULL, &vsec);
5850                 if (resourcevp)
5851                         VN_RELE(resourcevp);
5852                 break;
5853 
5854         case ZFS_SMB_ACL_REMOVE:
5855                 error = VOP_REMOVE(ZTOV(sharedir), zc->zc_string, kcred,
5856                     NULL, 0);
5857                 break;
5858 
5859         case ZFS_SMB_ACL_RENAME:
5860                 if ((error = get_nvlist(zc->zc_nvlist_src,
5861                     zc->zc_nvlist_src_size, zc->zc_iflags, &nvlist)) != 0) {
5862                         VN_RELE(vp);
5863                         VN_RELE(ZTOV(sharedir));
5864                         ZFS_EXIT(zfsvfs);
5865                         return (error);
5866                 }
5867                 if (nvlist_lookup_string(nvlist, ZFS_SMB_ACL_SRC, &src) ||
5868                     nvlist_lookup_string(nvlist, ZFS_SMB_ACL_TARGET,
5869                     &target)) {
5870                         VN_RELE(vp);
5871                         VN_RELE(ZTOV(sharedir));
5872                         ZFS_EXIT(zfsvfs);
5873                         nvlist_free(nvlist);
5874                         return (error);
5875                 }
5876                 error = VOP_RENAME(ZTOV(sharedir), src, ZTOV(sharedir), target,
5877                     kcred, NULL, 0);
5878                 nvlist_free(nvlist);
5879                 break;
5880 
5881         case ZFS_SMB_ACL_PURGE:
5882                 error = zfs_smb_acl_purge(sharedir);
5883                 break;
5884 
5885         default:
5886                 error = SET_ERROR(EINVAL);
5887                 break;
5888         }
5889 
5890         VN_RELE(vp);
5891         VN_RELE(ZTOV(sharedir));
5892 
5893         ZFS_EXIT(zfsvfs);
5894 
5895         return (error);
5896 }
5897 
5898 /*
5899  * innvl: {
5900  *     "holds" -> { snapname -> holdname (string), ... }
5901  *     (optional) "cleanup_fd" -> fd (int32)
5902  * }
5903  *
5904  * outnvl: {
5905  *     snapname -> error value (int32)
5906  *     ...
5907  * }
5908  */
5909 /* ARGSUSED */
5910 static int
5911 zfs_ioc_hold(const char *pool, nvlist_t *args, nvlist_t *errlist)
5912 {
5913         nvpair_t *pair;
5914         nvlist_t *holds;
5915         int cleanup_fd = -1;
5916         int error;
5917         minor_t minor = 0;
5918 
5919         error = nvlist_lookup_nvlist(args, "holds", &holds);
5920         if (error != 0)
5921                 return (SET_ERROR(EINVAL));
5922 
5923         /* make sure the user didn't pass us any invalid (empty) tags */
5924         for (pair = nvlist_next_nvpair(holds, NULL); pair != NULL;
5925             pair = nvlist_next_nvpair(holds, pair)) {
5926                 char *htag;
5927 
5928                 error = nvpair_value_string(pair, &htag);
5929                 if (error != 0)
5930                         return (SET_ERROR(error));
5931 
5932                 if (strlen(htag) == 0)
5933                         return (SET_ERROR(EINVAL));
5934         }
5935 
5936         if (nvlist_lookup_int32(args, "cleanup_fd", &cleanup_fd) == 0) {
5937                 error = zfs_onexit_fd_hold(cleanup_fd, &minor);
5938                 if (error != 0)
5939                         return (error);
5940         }
5941 
5942         error = dsl_dataset_user_hold(holds, minor, errlist);
5943         if (minor != 0)
5944                 zfs_onexit_fd_rele(cleanup_fd);
5945         return (error);
5946 }
5947 
5948 /*
5949  * innvl is not used.
5950  *
5951  * outnvl: {
5952  *    holdname -> time added (uint64 seconds since epoch)
5953  *    ...
5954  * }
5955  */
5956 /* ARGSUSED */
5957 static int
5958 zfs_ioc_get_holds(const char *snapname, nvlist_t *args, nvlist_t *outnvl)
5959 {
5960         ASSERT3P(args, ==, NULL);
5961         return (dsl_dataset_get_holds(snapname, outnvl));
5962 }
5963 
5964 /*
5965  * innvl: {
5966  *     snapname -> { holdname, ... }
5967  *     ...
5968  * }
5969  *
5970  * outnvl: {
5971  *     snapname -> error value (int32)
5972  *     ...
5973  * }
5974  */
5975 /* ARGSUSED */
5976 static int
5977 zfs_ioc_release(const char *pool, nvlist_t *holds, nvlist_t *errlist)
5978 {
5979         return (dsl_dataset_user_release(holds, errlist));
5980 }
5981 
5982 /*
5983  * inputs:
5984  * zc_name              name of new filesystem or snapshot
5985  * zc_value             full name of old snapshot
5986  *
5987  * outputs:
5988  * zc_cookie            space in bytes
5989  * zc_objset_type       compressed space in bytes
5990  * zc_perm_action       uncompressed space in bytes
5991  */
5992 static int
5993 zfs_ioc_space_written(zfs_cmd_t *zc)
5994 {
5995         int error;
5996         dsl_pool_t *dp;
5997         dsl_dataset_t *new, *old;
5998 
5999         error = dsl_pool_hold(zc->zc_name, FTAG, &dp);
6000         if (error != 0)
6001                 return (error);
6002         error = dsl_dataset_hold(dp, zc->zc_name, FTAG, &new);
6003         if (error != 0) {
6004                 dsl_pool_rele(dp, FTAG);
6005                 return (error);
6006         }
6007         error = dsl_dataset_hold(dp, zc->zc_value, FTAG, &old);
6008         if (error != 0) {
6009                 dsl_dataset_rele(new, FTAG);
6010                 dsl_pool_rele(dp, FTAG);
6011                 return (error);
6012         }
6013 
6014         error = dsl_dataset_space_written(old, new, &zc->zc_cookie,
6015             &zc->zc_objset_type, &zc->zc_perm_action);
6016         dsl_dataset_rele(old, FTAG);
6017         dsl_dataset_rele(new, FTAG);
6018         dsl_pool_rele(dp, FTAG);
6019         return (error);
6020 }
6021 
6022 /*
6023  * innvl: {
6024  *     "firstsnap" -> snapshot name
6025  * }
6026  *
6027  * outnvl: {
6028  *     "used" -> space in bytes
6029  *     "compressed" -> compressed space in bytes
6030  *     "uncompressed" -> uncompressed space in bytes
6031  * }
6032  */
6033 static int
6034 zfs_ioc_space_snaps(const char *lastsnap, nvlist_t *innvl, nvlist_t *outnvl)
6035 {
6036         int error;
6037         dsl_pool_t *dp;
6038         dsl_dataset_t *new, *old;
6039         char *firstsnap;
6040         uint64_t used, comp, uncomp;
6041 
6042         if (nvlist_lookup_string(innvl, "firstsnap", &firstsnap) != 0)
6043                 return (SET_ERROR(EINVAL));
6044 
6045         error = dsl_pool_hold(lastsnap, FTAG, &dp);
6046         if (error != 0)
6047                 return (error);
6048 
6049         error = dsl_dataset_hold(dp, lastsnap, FTAG, &new);
6050         if (error == 0 && !new->ds_is_snapshot) {
6051                 dsl_dataset_rele(new, FTAG);
6052                 error = SET_ERROR(EINVAL);
6053         }
6054         if (error != 0) {
6055                 dsl_pool_rele(dp, FTAG);
6056                 return (error);
6057         }
6058         error = dsl_dataset_hold(dp, firstsnap, FTAG, &old);
6059         if (error == 0 && !old->ds_is_snapshot) {
6060                 dsl_dataset_rele(old, FTAG);
6061                 error = SET_ERROR(EINVAL);
6062         }
6063         if (error != 0) {
6064                 dsl_dataset_rele(new, FTAG);
6065                 dsl_pool_rele(dp, FTAG);
6066                 return (error);
6067         }
6068 
6069         error = dsl_dataset_space_wouldfree(old, new, &used, &comp, &uncomp);
6070         dsl_dataset_rele(old, FTAG);
6071         dsl_dataset_rele(new, FTAG);
6072         dsl_pool_rele(dp, FTAG);
6073         fnvlist_add_uint64(outnvl, "used", used);
6074         fnvlist_add_uint64(outnvl, "compressed", comp);
6075         fnvlist_add_uint64(outnvl, "uncompressed", uncomp);
6076         return (error);
6077 }
6078 
6079 /*
6080  * innvl: {
6081  *     "fd" -> file descriptor to write stream to (int32)
6082  *     (optional) "fromsnap" -> full snap name to send an incremental from
6083  *     (optional) "largeblockok" -> (value ignored)
6084  *         indicates that blocks > 128KB are permitted
6085  *     (optional) "embedok" -> (value ignored)
6086  *         presence indicates DRR_WRITE_EMBEDDED records are permitted
6087  *     (optional) "compressok" -> (value ignored)
6088  *         presence indicates compressed DRR_WRITE records are permitted
6089  *     (optional) "rawok" -> (value ignored)
6090  *         presence indicates raw encrypted records should be used.
6091  *     (optional) "resume_object" and "resume_offset" -> (uint64)
6092  *         if present, resume send stream from specified object and offset.
6093  * }
6094  *
6095  * outnvl is unused
6096  */
6097 /* ARGSUSED */
6098 static int
6099 zfs_ioc_send_new(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl)
6100 {
6101         int error;
6102         offset_t off;
6103         char *fromname = NULL;
6104         int fd;
6105         boolean_t largeblockok;
6106         boolean_t embedok;
6107         boolean_t compressok;
6108         boolean_t rawok;
6109         uint64_t resumeobj = 0;
6110         uint64_t resumeoff = 0;
6111 
6112         error = nvlist_lookup_int32(innvl, "fd", &fd);
6113         if (error != 0)
6114                 return (SET_ERROR(EINVAL));
6115 
6116         (void) nvlist_lookup_string(innvl, "fromsnap", &fromname);
6117 
6118         largeblockok = nvlist_exists(innvl, "largeblockok");
6119         embedok = nvlist_exists(innvl, "embedok");
6120         compressok = nvlist_exists(innvl, "compressok");
6121         rawok = nvlist_exists(innvl, "rawok");
6122 
6123         (void) nvlist_lookup_uint64(innvl, "resume_object", &resumeobj);
6124         (void) nvlist_lookup_uint64(innvl, "resume_offset", &resumeoff);
6125 
6126         file_t *fp = getf(fd);
6127         if (fp == NULL)
6128                 return (SET_ERROR(EBADF));
6129 
6130         off = fp->f_offset;
6131         error = dmu_send(snapname, fromname, embedok, largeblockok, compressok,
6132             rawok, fd, resumeobj, resumeoff, fp->f_vnode, &off);
6133 
6134         if (VOP_SEEK(fp->f_vnode, fp->f_offset, &off, NULL) == 0)
6135                 fp->f_offset = off;
6136         releasef(fd);
6137         return (error);
6138 }
6139 
6140 /*
6141  * Determine approximately how large a zfs send stream will be -- the number
6142  * of bytes that will be written to the fd supplied to zfs_ioc_send_new().
6143  *
6144  * innvl: {
6145  *     (optional) "from" -> full snap or bookmark name to send an incremental
6146  *                          from
6147  *     (optional) "largeblockok" -> (value ignored)
6148  *         indicates that blocks > 128KB are permitted
6149  *     (optional) "embedok" -> (value ignored)
6150  *         presence indicates DRR_WRITE_EMBEDDED records are permitted
6151  *     (optional) "compressok" -> (value ignored)
6152  *         presence indicates compressed DRR_WRITE records are permitted
6153  * }
6154  *
6155  * outnvl: {
6156  *     "space" -> bytes of space (uint64)
6157  * }
6158  */
6159 static int
6160 zfs_ioc_send_space(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl)
6161 {
6162         dsl_pool_t *dp;
6163         dsl_dataset_t *tosnap;
6164         int error;
6165         char *fromname;
6166         boolean_t compressok;
6167         boolean_t rawok;
6168         uint64_t space;
6169 
6170         error = dsl_pool_hold(snapname, FTAG, &dp);
6171         if (error != 0)
6172                 return (error);
6173 
6174         error = dsl_dataset_hold(dp, snapname, FTAG, &tosnap);
6175         if (error != 0) {
6176                 dsl_pool_rele(dp, FTAG);
6177                 return (error);
6178         }
6179 
6180         compressok = nvlist_exists(innvl, "compressok");
6181         rawok = nvlist_exists(innvl, "rawok");
6182 
6183         error = nvlist_lookup_string(innvl, "from", &fromname);
6184         if (error == 0) {
6185                 if (strchr(fromname, '@') != NULL) {
6186                         /*
6187                          * If from is a snapshot, hold it and use the more
6188                          * efficient dmu_send_estimate to estimate send space
6189                          * size using deadlists.
6190                          */
6191                         dsl_dataset_t *fromsnap;
6192                         error = dsl_dataset_hold(dp, fromname, FTAG, &fromsnap);
6193                         if (error != 0)
6194                                 goto out;
6195                         error = dmu_send_estimate(tosnap, fromsnap,
6196                             compressok || rawok, &space);
6197                         dsl_dataset_rele(fromsnap, FTAG);
6198                 } else if (strchr(fromname, '#') != NULL) {
6199                         /*
6200                          * If from is a bookmark, fetch the creation TXG of the
6201                          * snapshot it was created from and use that to find
6202                          * blocks that were born after it.
6203                          */
6204                         zfs_bookmark_phys_t frombm;
6205 
6206                         error = dsl_bookmark_lookup(dp, fromname, tosnap,
6207                             &frombm);
6208                         if (error != 0)
6209                                 goto out;
6210                         error = dmu_send_estimate_from_txg(tosnap,
6211                             frombm.zbm_creation_txg, compressok || rawok,
6212                             &space);
6213                 } else {
6214                         /*
6215                          * from is not properly formatted as a snapshot or
6216                          * bookmark
6217                          */
6218                         error = SET_ERROR(EINVAL);
6219                         goto out;
6220                 }
6221         } else {
6222                 /*
6223                  * If estimating the size of a full send, use dmu_send_estimate.
6224                  */
6225                 error = dmu_send_estimate(tosnap, NULL, compressok || rawok,
6226                     &space);
6227         }
6228 
6229         fnvlist_add_uint64(outnvl, "space", space);
6230 
6231 out:
6232         dsl_dataset_rele(tosnap, FTAG);
6233         dsl_pool_rele(dp, FTAG);
6234         return (error);
6235 }
6236 
6237 /*
6238  * Sync the currently open TXG to disk for the specified pool.
6239  * This is somewhat similar to 'zfs_sync()'.
6240  * For cases that do not result in error this ioctl will wait for
6241  * the currently open TXG to commit before returning back to the caller.
6242  *
6243  * innvl: {
6244  *  "force" -> when true, force uberblock update even if there is no dirty data.
6245  *             In addition this will cause the vdev configuration to be written
6246  *             out including updating the zpool cache file. (boolean_t)
6247  * }
6248  *
6249  * onvl is unused
6250  */
6251 /* ARGSUSED */
6252 static int
6253 zfs_ioc_pool_sync(const char *pool, nvlist_t *innvl, nvlist_t *onvl)
6254 {
6255         int err;
6256         boolean_t force;
6257         spa_t *spa;
6258 
6259         if ((err = spa_open(pool, &spa, FTAG)) != 0)
6260                 return (err);
6261 
6262         force = fnvlist_lookup_boolean_value(innvl, "force");
6263         if (force) {
6264                 spa_config_enter(spa, SCL_CONFIG, FTAG, RW_WRITER);
6265                 vdev_config_dirty(spa->spa_root_vdev);
6266                 spa_config_exit(spa, SCL_CONFIG, FTAG);
6267         }
6268         txg_wait_synced(spa_get_dsl(spa), 0);
6269 
6270         spa_close(spa, FTAG);
6271 
6272         return (err);
6273 }
6274 
6275 /*
6276  * Load a user's wrapping key into the kernel.
6277  * innvl: {
6278  *     "hidden_args" -> { "wkeydata" -> value }
6279  *         raw uint8_t array of encryption wrapping key data (32 bytes)
6280  *     (optional) "noop" -> (value ignored)
6281  *         presence indicated key should only be verified, not loaded
6282  * }
6283  */
6284 /* ARGSUSED */
6285 static int
6286 zfs_ioc_load_key(const char *dsname, nvlist_t *innvl, nvlist_t *outnvl)
6287 {
6288         int ret = 0;
6289         dsl_crypto_params_t *dcp = NULL;
6290         nvlist_t *hidden_args;
6291         boolean_t noop = nvlist_exists(innvl, "noop");
6292 
6293         if (strchr(dsname, '@') != NULL || strchr(dsname, '%') != NULL) {
6294                 ret = SET_ERROR(EINVAL);
6295                 goto error;
6296         }
6297 
6298         ret = nvlist_lookup_nvlist(innvl, ZPOOL_HIDDEN_ARGS, &hidden_args);
6299         if (ret != 0) {
6300                 ret = SET_ERROR(EINVAL);
6301                 goto error;
6302         }
6303 
6304         ret = dsl_crypto_params_create_nvlist(DCP_CMD_NONE, NULL,
6305             hidden_args, &dcp);
6306         if (ret != 0)
6307                 goto error;
6308 
6309         ret = spa_keystore_load_wkey(dsname, dcp, noop);
6310         if (ret != 0)
6311                 goto error;
6312 
6313         dsl_crypto_params_free(dcp, noop);
6314 
6315         return (0);
6316 
6317 error:
6318         dsl_crypto_params_free(dcp, B_TRUE);
6319         return (ret);
6320 }
6321 
6322 /*
6323  * Unload a user's wrapping key from the kernel.
6324  * Both innvl and outnvl are unused.
6325  */
6326 /* ARGSUSED */
6327 static int
6328 zfs_ioc_unload_key(const char *dsname, nvlist_t *innvl, nvlist_t *outnvl)
6329 {
6330         int ret = 0;
6331 
6332         if (strchr(dsname, '@') != NULL || strchr(dsname, '%') != NULL) {
6333                 ret = (SET_ERROR(EINVAL));
6334                 goto out;
6335         }
6336 
6337         ret = spa_keystore_unload_wkey(dsname);
6338         if (ret != 0)
6339                 goto out;
6340 
6341 out:
6342         return (ret);
6343 }
6344 
6345 /*
6346  * Changes a user's wrapping key used to decrypt a dataset. The keyformat,
6347  * keylocation, pbkdf2salt, and  pbkdf2iters properties can also be specified
6348  * here to change how the key is derived in userspace.
6349  *
6350  * innvl: {
6351  *    "hidden_args" (optional) -> { "wkeydata" -> value }
6352  *         raw uint8_t array of new encryption wrapping key data (32 bytes)
6353  *    "props" (optional) -> { prop -> value }
6354  * }
6355  *
6356  * outnvl is unused
6357  */
6358 /* ARGSUSED */
6359 static int
6360 zfs_ioc_change_key(const char *dsname, nvlist_t *innvl, nvlist_t *outnvl)
6361 {
6362         int ret;
6363         uint64_t cmd = DCP_CMD_NONE;
6364         dsl_crypto_params_t *dcp = NULL;
6365         nvlist_t *args = NULL, *hidden_args = NULL;
6366 
6367         if (strchr(dsname, '@') != NULL || strchr(dsname, '%') != NULL) {
6368                 ret = (SET_ERROR(EINVAL));
6369                 goto error;
6370         }
6371 
6372         (void) nvlist_lookup_uint64(innvl, "crypt_cmd", &cmd);
6373         (void) nvlist_lookup_nvlist(innvl, "props", &args);
6374         (void) nvlist_lookup_nvlist(innvl, ZPOOL_HIDDEN_ARGS, &hidden_args);
6375 
6376         ret = dsl_crypto_params_create_nvlist(cmd, args, hidden_args, &dcp);
6377         if (ret != 0)
6378                 goto error;
6379 
6380         ret = spa_keystore_change_key(dsname, dcp);
6381         if (ret != 0)
6382                 goto error;
6383 
6384         dsl_crypto_params_free(dcp, B_FALSE);
6385 
6386         return (0);
6387 
6388 error:
6389         dsl_crypto_params_free(dcp, B_TRUE);
6390         return (ret);
6391 }
6392 
6393 static zfs_ioc_vec_t zfs_ioc_vec[ZFS_IOC_LAST - ZFS_IOC_FIRST];
6394 
6395 static void
6396 zfs_ioctl_register_legacy(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func,
6397     zfs_secpolicy_func_t *secpolicy, zfs_ioc_namecheck_t namecheck,
6398     boolean_t log_history, zfs_ioc_poolcheck_t pool_check)
6399 {
6400         zfs_ioc_vec_t *vec = &zfs_ioc_vec[ioc - ZFS_IOC_FIRST];
6401 
6402         ASSERT3U(ioc, >=, ZFS_IOC_FIRST);
6403         ASSERT3U(ioc, <, ZFS_IOC_LAST);
6404         ASSERT3P(vec->zvec_legacy_func, ==, NULL);
6405         ASSERT3P(vec->zvec_func, ==, NULL);
6406 
6407         vec->zvec_legacy_func = func;
6408         vec->zvec_secpolicy = secpolicy;
6409         vec->zvec_namecheck = namecheck;
6410         vec->zvec_allow_log = log_history;
6411         vec->zvec_pool_check = pool_check;
6412 }
6413 
6414 /*
6415  * See the block comment at the beginning of this file for details on
6416  * each argument to this function.
6417  */
6418 static void
6419 zfs_ioctl_register(const char *name, zfs_ioc_t ioc, zfs_ioc_func_t *func,
6420     zfs_secpolicy_func_t *secpolicy, zfs_ioc_namecheck_t namecheck,
6421     zfs_ioc_poolcheck_t pool_check, boolean_t smush_outnvlist,
6422     boolean_t allow_log)
6423 {
6424         zfs_ioc_vec_t *vec = &zfs_ioc_vec[ioc - ZFS_IOC_FIRST];
6425 
6426         ASSERT3U(ioc, >=, ZFS_IOC_FIRST);
6427         ASSERT3U(ioc, <, ZFS_IOC_LAST);
6428         ASSERT3P(vec->zvec_legacy_func, ==, NULL);
6429         ASSERT3P(vec->zvec_func, ==, NULL);
6430 
6431         /* if we are logging, the name must be valid */
6432         ASSERT(!allow_log || namecheck != NO_NAME);
6433 
6434         vec->zvec_name = name;
6435         vec->zvec_func = func;
6436         vec->zvec_secpolicy = secpolicy;
6437         vec->zvec_namecheck = namecheck;
6438         vec->zvec_pool_check = pool_check;
6439         vec->zvec_smush_outnvlist = smush_outnvlist;
6440         vec->zvec_allow_log = allow_log;
6441 }
6442 
6443 static void
6444 zfs_ioctl_register_pool(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func,
6445     zfs_secpolicy_func_t *secpolicy, boolean_t log_history,
6446     zfs_ioc_poolcheck_t pool_check)
6447 {
6448         zfs_ioctl_register_legacy(ioc, func, secpolicy,
6449             POOL_NAME, log_history, pool_check);
6450 }
6451 
6452 static void
6453 zfs_ioctl_register_dataset_nolog(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func,
6454     zfs_secpolicy_func_t *secpolicy, zfs_ioc_poolcheck_t pool_check)
6455 {
6456         zfs_ioctl_register_legacy(ioc, func, secpolicy,
6457             DATASET_NAME, B_FALSE, pool_check);
6458 }
6459 
6460 static void
6461 zfs_ioctl_register_pool_modify(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func)
6462 {
6463         zfs_ioctl_register_legacy(ioc, func, zfs_secpolicy_config,
6464             POOL_NAME, B_TRUE, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY);
6465 }
6466 
6467 static void
6468 zfs_ioctl_register_pool_meta(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func,
6469     zfs_secpolicy_func_t *secpolicy)
6470 {
6471         zfs_ioctl_register_legacy(ioc, func, secpolicy,
6472             NO_NAME, B_FALSE, POOL_CHECK_NONE);
6473 }
6474 
6475 static void
6476 zfs_ioctl_register_dataset_read_secpolicy(zfs_ioc_t ioc,
6477     zfs_ioc_legacy_func_t *func, zfs_secpolicy_func_t *secpolicy)
6478 {
6479         zfs_ioctl_register_legacy(ioc, func, secpolicy,
6480             DATASET_NAME, B_FALSE, POOL_CHECK_SUSPENDED);
6481 }
6482 
6483 static void
6484 zfs_ioctl_register_dataset_read(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func)
6485 {
6486         zfs_ioctl_register_dataset_read_secpolicy(ioc, func,
6487             zfs_secpolicy_read);
6488 }
6489 
6490 static void
6491 zfs_ioctl_register_dataset_modify(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func,
6492     zfs_secpolicy_func_t *secpolicy)
6493 {
6494         zfs_ioctl_register_legacy(ioc, func, secpolicy,
6495             DATASET_NAME, B_TRUE, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY);
6496 }
6497 
6498 static void
6499 zfs_ioctl_init(void)
6500 {
6501         zfs_ioctl_register("snapshot", ZFS_IOC_SNAPSHOT,
6502             zfs_ioc_snapshot, zfs_secpolicy_snapshot, POOL_NAME,
6503             POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE);
6504 
6505         zfs_ioctl_register("log_history", ZFS_IOC_LOG_HISTORY,
6506             zfs_ioc_log_history, zfs_secpolicy_log_history, NO_NAME,
6507             POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_FALSE, B_FALSE);
6508 
6509         zfs_ioctl_register("space_snaps", ZFS_IOC_SPACE_SNAPS,
6510             zfs_ioc_space_snaps, zfs_secpolicy_read, DATASET_NAME,
6511             POOL_CHECK_SUSPENDED, B_FALSE, B_FALSE);
6512 
6513         zfs_ioctl_register("send", ZFS_IOC_SEND_NEW,
6514             zfs_ioc_send_new, zfs_secpolicy_send_new, DATASET_NAME,
6515             POOL_CHECK_SUSPENDED, B_FALSE, B_FALSE);
6516 
6517         zfs_ioctl_register("send_space", ZFS_IOC_SEND_SPACE,
6518             zfs_ioc_send_space, zfs_secpolicy_read, DATASET_NAME,
6519             POOL_CHECK_SUSPENDED, B_FALSE, B_FALSE);
6520 
6521         zfs_ioctl_register("create", ZFS_IOC_CREATE,
6522             zfs_ioc_create, zfs_secpolicy_create_clone, DATASET_NAME,
6523             POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE);
6524 
6525         zfs_ioctl_register("clone", ZFS_IOC_CLONE,
6526             zfs_ioc_clone, zfs_secpolicy_create_clone, DATASET_NAME,
6527             POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE);
6528 
6529         zfs_ioctl_register("remap", ZFS_IOC_REMAP,
6530             zfs_ioc_remap, zfs_secpolicy_remap, DATASET_NAME,
6531             POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_FALSE, B_TRUE);
6532 
6533         zfs_ioctl_register("destroy_snaps", ZFS_IOC_DESTROY_SNAPS,
6534             zfs_ioc_destroy_snaps, zfs_secpolicy_destroy_snaps, POOL_NAME,
6535             POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE);
6536 
6537         zfs_ioctl_register("hold", ZFS_IOC_HOLD,
6538             zfs_ioc_hold, zfs_secpolicy_hold, POOL_NAME,
6539             POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE);
6540         zfs_ioctl_register("release", ZFS_IOC_RELEASE,
6541             zfs_ioc_release, zfs_secpolicy_release, POOL_NAME,
6542             POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE);
6543 
6544         zfs_ioctl_register("get_holds", ZFS_IOC_GET_HOLDS,
6545             zfs_ioc_get_holds, zfs_secpolicy_read, DATASET_NAME,
6546             POOL_CHECK_SUSPENDED, B_FALSE, B_FALSE);
6547 
6548         zfs_ioctl_register("rollback", ZFS_IOC_ROLLBACK,
6549             zfs_ioc_rollback, zfs_secpolicy_rollback, DATASET_NAME,
6550             POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_FALSE, B_TRUE);
6551 
6552         zfs_ioctl_register("bookmark", ZFS_IOC_BOOKMARK,
6553             zfs_ioc_bookmark, zfs_secpolicy_bookmark, POOL_NAME,
6554             POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE);
6555 
6556         zfs_ioctl_register("get_bookmarks", ZFS_IOC_GET_BOOKMARKS,
6557             zfs_ioc_get_bookmarks, zfs_secpolicy_read, DATASET_NAME,
6558             POOL_CHECK_SUSPENDED, B_FALSE, B_FALSE);
6559 
6560         zfs_ioctl_register("destroy_bookmarks", ZFS_IOC_DESTROY_BOOKMARKS,
6561             zfs_ioc_destroy_bookmarks, zfs_secpolicy_destroy_bookmarks,
6562             POOL_NAME,
6563             POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE);
6564 
6565         zfs_ioctl_register("channel_program", ZFS_IOC_CHANNEL_PROGRAM,
6566             zfs_ioc_channel_program, zfs_secpolicy_config,
6567             POOL_NAME, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE,
6568             B_TRUE);
6569 
6570         zfs_ioctl_register("zpool_checkpoint", ZFS_IOC_POOL_CHECKPOINT,
6571             zfs_ioc_pool_checkpoint, zfs_secpolicy_config, POOL_NAME,
6572             POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE);
6573 
6574         zfs_ioctl_register("zpool_discard_checkpoint",
6575             ZFS_IOC_POOL_DISCARD_CHECKPOINT, zfs_ioc_pool_discard_checkpoint,
6576             zfs_secpolicy_config, POOL_NAME,
6577             POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE);
6578 
6579         zfs_ioctl_register("initialize", ZFS_IOC_POOL_INITIALIZE,
6580             zfs_ioc_pool_initialize, zfs_secpolicy_config, POOL_NAME,
6581             POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE);
6582 
6583         zfs_ioctl_register("trim", ZFS_IOC_POOL_TRIM,
6584             zfs_ioc_pool_trim, zfs_secpolicy_config, POOL_NAME,
6585             POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE);
6586 
6587         zfs_ioctl_register("sync", ZFS_IOC_POOL_SYNC,
6588             zfs_ioc_pool_sync, zfs_secpolicy_none, POOL_NAME,
6589             POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_FALSE, B_FALSE);
6590 
6591         zfs_ioctl_register("load-key", ZFS_IOC_LOAD_KEY,
6592             zfs_ioc_load_key, zfs_secpolicy_load_key,
6593             DATASET_NAME, POOL_CHECK_SUSPENDED, B_TRUE, B_TRUE);
6594         zfs_ioctl_register("unload-key", ZFS_IOC_UNLOAD_KEY,
6595             zfs_ioc_unload_key, zfs_secpolicy_load_key,
6596             DATASET_NAME, POOL_CHECK_SUSPENDED, B_TRUE, B_TRUE);
6597         zfs_ioctl_register("change-key", ZFS_IOC_CHANGE_KEY,
6598             zfs_ioc_change_key, zfs_secpolicy_change_key,
6599             DATASET_NAME, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY,
6600             B_TRUE, B_TRUE);
6601 
6602         /* IOCTLS that use the legacy function signature */
6603 
6604         zfs_ioctl_register_legacy(ZFS_IOC_POOL_FREEZE, zfs_ioc_pool_freeze,
6605             zfs_secpolicy_config, NO_NAME, B_FALSE, POOL_CHECK_READONLY);
6606 
6607         zfs_ioctl_register_pool(ZFS_IOC_POOL_CREATE, zfs_ioc_pool_create,
6608             zfs_secpolicy_config, B_TRUE, POOL_CHECK_NONE);
6609         zfs_ioctl_register_pool_modify(ZFS_IOC_POOL_SCAN,
6610             zfs_ioc_pool_scan);
6611         zfs_ioctl_register_pool_modify(ZFS_IOC_POOL_UPGRADE,
6612             zfs_ioc_pool_upgrade);
6613         zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_ADD,
6614             zfs_ioc_vdev_add);
6615         zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_REMOVE,
6616             zfs_ioc_vdev_remove);
6617         zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_SET_STATE,
6618             zfs_ioc_vdev_set_state);
6619         zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_ATTACH,
6620             zfs_ioc_vdev_attach);
6621         zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_DETACH,
6622             zfs_ioc_vdev_detach);
6623         zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_SETPATH,
6624             zfs_ioc_vdev_setpath);
6625         zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_SETFRU,
6626             zfs_ioc_vdev_setfru);
6627         zfs_ioctl_register_pool_modify(ZFS_IOC_POOL_SET_PROPS,
6628             zfs_ioc_pool_set_props);
6629         zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_SPLIT,
6630             zfs_ioc_vdev_split);
6631         zfs_ioctl_register_pool_modify(ZFS_IOC_POOL_REGUID,
6632             zfs_ioc_pool_reguid);
6633 
6634         zfs_ioctl_register_pool_meta(ZFS_IOC_POOL_CONFIGS,
6635             zfs_ioc_pool_configs, zfs_secpolicy_none);
6636         zfs_ioctl_register_pool_meta(ZFS_IOC_POOL_TRYIMPORT,
6637             zfs_ioc_pool_tryimport, zfs_secpolicy_config);
6638         zfs_ioctl_register_pool_meta(ZFS_IOC_INJECT_FAULT,
6639             zfs_ioc_inject_fault, zfs_secpolicy_inject);
6640         zfs_ioctl_register_pool_meta(ZFS_IOC_CLEAR_FAULT,
6641             zfs_ioc_clear_fault, zfs_secpolicy_inject);
6642         zfs_ioctl_register_pool_meta(ZFS_IOC_INJECT_LIST_NEXT,
6643             zfs_ioc_inject_list_next, zfs_secpolicy_inject);
6644 
6645         /*
6646          * pool destroy, and export don't log the history as part of
6647          * zfsdev_ioctl, but rather zfs_ioc_pool_export
6648          * does the logging of those commands.
6649          */
6650         zfs_ioctl_register_pool(ZFS_IOC_POOL_DESTROY, zfs_ioc_pool_destroy,
6651             zfs_secpolicy_config, B_FALSE, POOL_CHECK_NONE);
6652         zfs_ioctl_register_pool(ZFS_IOC_POOL_EXPORT, zfs_ioc_pool_export,
6653             zfs_secpolicy_config, B_FALSE, POOL_CHECK_NONE);
6654 
6655         zfs_ioctl_register_pool(ZFS_IOC_POOL_STATS, zfs_ioc_pool_stats,
6656             zfs_secpolicy_read, B_FALSE, POOL_CHECK_NONE);
6657         zfs_ioctl_register_pool(ZFS_IOC_POOL_GET_PROPS, zfs_ioc_pool_get_props,
6658             zfs_secpolicy_read, B_FALSE, POOL_CHECK_NONE);
6659 
6660         zfs_ioctl_register_pool(ZFS_IOC_ERROR_LOG, zfs_ioc_error_log,
6661             zfs_secpolicy_inject, B_FALSE, POOL_CHECK_SUSPENDED);
6662         zfs_ioctl_register_pool(ZFS_IOC_DSOBJ_TO_DSNAME,
6663             zfs_ioc_dsobj_to_dsname,
6664             zfs_secpolicy_diff, B_FALSE, POOL_CHECK_SUSPENDED);
6665         zfs_ioctl_register_pool(ZFS_IOC_POOL_GET_HISTORY,
6666             zfs_ioc_pool_get_history,
6667             zfs_secpolicy_config, B_FALSE, POOL_CHECK_SUSPENDED);
6668 
6669         zfs_ioctl_register_pool(ZFS_IOC_POOL_IMPORT, zfs_ioc_pool_import,
6670             zfs_secpolicy_config, B_TRUE, POOL_CHECK_NONE);
6671 
6672         zfs_ioctl_register_pool(ZFS_IOC_CLEAR, zfs_ioc_clear,
6673             zfs_secpolicy_config, B_TRUE, POOL_CHECK_READONLY);
6674         zfs_ioctl_register_pool(ZFS_IOC_POOL_REOPEN, zfs_ioc_pool_reopen,
6675             zfs_secpolicy_config, B_TRUE, POOL_CHECK_SUSPENDED);
6676 
6677         zfs_ioctl_register_dataset_read(ZFS_IOC_SPACE_WRITTEN,
6678             zfs_ioc_space_written);
6679         zfs_ioctl_register_dataset_read(ZFS_IOC_OBJSET_RECVD_PROPS,
6680             zfs_ioc_objset_recvd_props);
6681         zfs_ioctl_register_dataset_read(ZFS_IOC_NEXT_OBJ,
6682             zfs_ioc_next_obj);
6683         zfs_ioctl_register_dataset_read(ZFS_IOC_GET_FSACL,
6684             zfs_ioc_get_fsacl);
6685         zfs_ioctl_register_dataset_read(ZFS_IOC_OBJSET_STATS,
6686             zfs_ioc_objset_stats);
6687         zfs_ioctl_register_dataset_read(ZFS_IOC_OBJSET_ZPLPROPS,
6688             zfs_ioc_objset_zplprops);
6689         zfs_ioctl_register_dataset_read(ZFS_IOC_DATASET_LIST_NEXT,
6690             zfs_ioc_dataset_list_next);
6691         zfs_ioctl_register_dataset_read(ZFS_IOC_SNAPSHOT_LIST_NEXT,
6692             zfs_ioc_snapshot_list_next);
6693         zfs_ioctl_register_dataset_read(ZFS_IOC_SEND_PROGRESS,
6694             zfs_ioc_send_progress);
6695 
6696         zfs_ioctl_register_dataset_read_secpolicy(ZFS_IOC_DIFF,
6697             zfs_ioc_diff, zfs_secpolicy_diff);
6698         zfs_ioctl_register_dataset_read_secpolicy(ZFS_IOC_OBJ_TO_STATS,
6699             zfs_ioc_obj_to_stats, zfs_secpolicy_diff);
6700         zfs_ioctl_register_dataset_read_secpolicy(ZFS_IOC_OBJ_TO_PATH,
6701             zfs_ioc_obj_to_path, zfs_secpolicy_diff);
6702         zfs_ioctl_register_dataset_read_secpolicy(ZFS_IOC_USERSPACE_ONE,
6703             zfs_ioc_userspace_one, zfs_secpolicy_userspace_one);
6704         zfs_ioctl_register_dataset_read_secpolicy(ZFS_IOC_USERSPACE_MANY,
6705             zfs_ioc_userspace_many, zfs_secpolicy_userspace_many);
6706         zfs_ioctl_register_dataset_read_secpolicy(ZFS_IOC_SEND,
6707             zfs_ioc_send, zfs_secpolicy_send);
6708 
6709         zfs_ioctl_register_dataset_modify(ZFS_IOC_SET_PROP, zfs_ioc_set_prop,
6710             zfs_secpolicy_none);
6711         zfs_ioctl_register_dataset_modify(ZFS_IOC_DESTROY, zfs_ioc_destroy,
6712             zfs_secpolicy_destroy);
6713         zfs_ioctl_register_dataset_modify(ZFS_IOC_RENAME, zfs_ioc_rename,
6714             zfs_secpolicy_rename);
6715         zfs_ioctl_register_dataset_modify(ZFS_IOC_RECV, zfs_ioc_recv,
6716             zfs_secpolicy_recv);
6717         zfs_ioctl_register_dataset_modify(ZFS_IOC_PROMOTE, zfs_ioc_promote,
6718             zfs_secpolicy_promote);
6719         zfs_ioctl_register_dataset_modify(ZFS_IOC_INHERIT_PROP,
6720             zfs_ioc_inherit_prop, zfs_secpolicy_inherit_prop);
6721         zfs_ioctl_register_dataset_modify(ZFS_IOC_SET_FSACL, zfs_ioc_set_fsacl,
6722             zfs_secpolicy_set_fsacl);
6723 
6724         zfs_ioctl_register_dataset_nolog(ZFS_IOC_SHARE, zfs_ioc_share,
6725             zfs_secpolicy_share, POOL_CHECK_NONE);
6726         zfs_ioctl_register_dataset_nolog(ZFS_IOC_SMB_ACL, zfs_ioc_smb_acl,
6727             zfs_secpolicy_smb_acl, POOL_CHECK_NONE);
6728         zfs_ioctl_register_dataset_nolog(ZFS_IOC_USERSPACE_UPGRADE,
6729             zfs_ioc_userspace_upgrade, zfs_secpolicy_userspace_upgrade,
6730             POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY);
6731         zfs_ioctl_register_dataset_nolog(ZFS_IOC_TMP_SNAPSHOT,
6732             zfs_ioc_tmp_snapshot, zfs_secpolicy_tmp_snapshot,
6733             POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY);
6734 }
6735 
6736 int
6737 pool_status_check(const char *name, zfs_ioc_namecheck_t type,
6738     zfs_ioc_poolcheck_t check)
6739 {
6740         spa_t *spa;
6741         int error;
6742 
6743         ASSERT(type == POOL_NAME || type == DATASET_NAME);
6744 
6745         if (check & POOL_CHECK_NONE)
6746                 return (0);
6747 
6748         error = spa_open(name, &spa, FTAG);
6749         if (error == 0) {
6750                 if ((check & POOL_CHECK_SUSPENDED) && spa_suspended(spa))
6751                         error = SET_ERROR(EAGAIN);
6752                 else if ((check & POOL_CHECK_READONLY) && !spa_writeable(spa))
6753                         error = SET_ERROR(EROFS);
6754                 spa_close(spa, FTAG);
6755         }
6756         return (error);
6757 }
6758 
6759 /*
6760  * Find a free minor number.
6761  */
6762 minor_t
6763 zfsdev_minor_alloc(void)
6764 {
6765         static minor_t last_minor;
6766         minor_t m;
6767 
6768         ASSERT(MUTEX_HELD(&zfsdev_state_lock));
6769 
6770         for (m = last_minor + 1; m != last_minor; m++) {
6771                 if (m > ZFSDEV_MAX_MINOR)
6772                         m = 1;
6773                 if (ddi_get_soft_state(zfsdev_state, m) == NULL) {
6774                         last_minor = m;
6775                         return (m);
6776                 }
6777         }
6778 
6779         return (0);
6780 }
6781 
6782 static int
6783 zfs_ctldev_init(dev_t *devp)
6784 {
6785         minor_t minor;
6786         zfs_soft_state_t *zs;
6787 
6788         ASSERT(MUTEX_HELD(&zfsdev_state_lock));
6789         ASSERT(getminor(*devp) == 0);
6790 
6791         minor = zfsdev_minor_alloc();
6792         if (minor == 0)
6793                 return (SET_ERROR(ENXIO));
6794 
6795         if (ddi_soft_state_zalloc(zfsdev_state, minor) != DDI_SUCCESS)
6796                 return (SET_ERROR(EAGAIN));
6797 
6798         *devp = makedevice(getemajor(*devp), minor);
6799 
6800         zs = ddi_get_soft_state(zfsdev_state, minor);
6801         zs->zss_type = ZSST_CTLDEV;
6802         zfs_onexit_init((zfs_onexit_t **)&zs->zss_data);
6803 
6804         return (0);
6805 }
6806 
6807 static void
6808 zfs_ctldev_destroy(zfs_onexit_t *zo, minor_t minor)
6809 {
6810         ASSERT(MUTEX_HELD(&zfsdev_state_lock));
6811 
6812         zfs_onexit_destroy(zo);
6813         ddi_soft_state_free(zfsdev_state, minor);
6814 }
6815 
6816 void *
6817 zfsdev_get_soft_state(minor_t minor, enum zfs_soft_state_type which)
6818 {
6819         zfs_soft_state_t *zp;
6820 
6821         zp = ddi_get_soft_state(zfsdev_state, minor);
6822         if (zp == NULL || zp->zss_type != which)
6823                 return (NULL);
6824 
6825         return (zp->zss_data);
6826 }
6827 
6828 static int
6829 zfsdev_open(dev_t *devp, int flag, int otyp, cred_t *cr)
6830 {
6831         int error = 0;
6832 
6833         if (getminor(*devp) != 0)
6834                 return (zvol_open(devp, flag, otyp, cr));
6835 
6836         /* This is the control device. Allocate a new minor if requested. */
6837         if (flag & FEXCL) {
6838                 mutex_enter(&zfsdev_state_lock);
6839                 error = zfs_ctldev_init(devp);
6840                 mutex_exit(&zfsdev_state_lock);
6841         }
6842 
6843         return (error);
6844 }
6845 
6846 static int
6847 zfsdev_close(dev_t dev, int flag, int otyp, cred_t *cr)
6848 {
6849         zfs_onexit_t *zo;
6850         minor_t minor = getminor(dev);
6851 
6852         if (minor == 0)
6853                 return (0);
6854 
6855         mutex_enter(&zfsdev_state_lock);
6856         zo = zfsdev_get_soft_state(minor, ZSST_CTLDEV);
6857         if (zo == NULL) {
6858                 mutex_exit(&zfsdev_state_lock);
6859                 return (zvol_close(dev, flag, otyp, cr));
6860         }
6861         zfs_ctldev_destroy(zo, minor);
6862         mutex_exit(&zfsdev_state_lock);
6863 
6864         return (0);
6865 }
6866 
6867 static int
6868 zfsdev_ioctl(dev_t dev, int cmd, intptr_t arg, int flag, cred_t *cr, int *rvalp)
6869 {
6870         zfs_cmd_t *zc;
6871         uint_t vecnum;
6872         int error, rc, len;
6873         minor_t minor = getminor(dev);
6874         const zfs_ioc_vec_t *vec;
6875         char *saved_poolname = NULL;
6876         nvlist_t *innvl = NULL;
6877 
6878         if (minor != 0 &&
6879             zfsdev_get_soft_state(minor, ZSST_CTLDEV) == NULL)
6880                 return (zvol_ioctl(dev, cmd, arg, flag, cr, rvalp));
6881 
6882         vecnum = cmd - ZFS_IOC_FIRST;
6883         ASSERT3U(getmajor(dev), ==, ddi_driver_major(zfs_dip));
6884 
6885         if (vecnum >= sizeof (zfs_ioc_vec) / sizeof (zfs_ioc_vec[0]))
6886                 return (SET_ERROR(EINVAL));
6887         vec = &zfs_ioc_vec[vecnum];
6888 
6889         zc = kmem_zalloc(sizeof (zfs_cmd_t), KM_SLEEP);
6890 
6891         error = ddi_copyin((void *)arg, zc, sizeof (zfs_cmd_t), flag);
6892         if (error != 0) {
6893                 error = SET_ERROR(EFAULT);
6894                 goto out;
6895         }
6896 
6897         zc->zc_iflags = flag & FKIOCTL;
6898         if (zc->zc_nvlist_src_size != 0) {
6899                 error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
6900                     zc->zc_iflags, &innvl);
6901                 if (error != 0)
6902                         goto out;
6903         }
6904 
6905         /*
6906          * Ensure that all pool/dataset names are valid before we pass down to
6907          * the lower layers.
6908          */
6909         zc->zc_name[sizeof (zc->zc_name) - 1] = '\0';
6910         switch (vec->zvec_namecheck) {
6911         case POOL_NAME:
6912                 if (pool_namecheck(zc->zc_name, NULL, NULL) != 0)
6913                         error = SET_ERROR(EINVAL);
6914                 else
6915                         error = pool_status_check(zc->zc_name,
6916                             vec->zvec_namecheck, vec->zvec_pool_check);
6917                 break;
6918 
6919         case DATASET_NAME:
6920                 if (dataset_namecheck(zc->zc_name, NULL, NULL) != 0)
6921                         error = SET_ERROR(EINVAL);
6922                 else
6923                         error = pool_status_check(zc->zc_name,
6924                             vec->zvec_namecheck, vec->zvec_pool_check);
6925                 break;
6926 
6927         case NO_NAME:
6928                 break;
6929         }
6930 
6931 
6932         if (error == 0)
6933                 error = vec->zvec_secpolicy(zc, innvl, cr);
6934 
6935         if (error != 0)
6936                 goto out;
6937 
6938         /* legacy ioctls can modify zc_name */
6939         len = strcspn(zc->zc_name, "/@#") + 1;
6940         saved_poolname = kmem_alloc(len, KM_SLEEP);
6941         (void) strlcpy(saved_poolname, zc->zc_name, len);
6942 
6943         if (vec->zvec_func != NULL) {
6944                 nvlist_t *outnvl;
6945                 int puterror = 0;
6946                 spa_t *spa;
6947                 nvlist_t *lognv = NULL;
6948 
6949                 ASSERT(vec->zvec_legacy_func == NULL);
6950 
6951                 /*
6952                  * Add the innvl to the lognv before calling the func,
6953                  * in case the func changes the innvl.
6954                  */
6955                 if (vec->zvec_allow_log) {
6956                         lognv = fnvlist_alloc();
6957                         fnvlist_add_string(lognv, ZPOOL_HIST_IOCTL,
6958                             vec->zvec_name);
6959                         if (!nvlist_empty(innvl)) {
6960                                 fnvlist_add_nvlist(lognv, ZPOOL_HIST_INPUT_NVL,
6961                                     innvl);
6962                         }
6963                 }
6964 
6965                 outnvl = fnvlist_alloc();
6966                 error = vec->zvec_func(zc->zc_name, innvl, outnvl);
6967 
6968                 /*
6969                  * Some commands can partially execute, modify state, and still
6970                  * return an error.  In these cases, attempt to record what
6971                  * was modified.
6972                  */
6973                 if ((error == 0 ||
6974                     (cmd == ZFS_IOC_CHANNEL_PROGRAM && error != EINVAL)) &&
6975                     vec->zvec_allow_log &&
6976                     spa_open(zc->zc_name, &spa, FTAG) == 0) {
6977                         if (!nvlist_empty(outnvl)) {
6978                                 fnvlist_add_nvlist(lognv, ZPOOL_HIST_OUTPUT_NVL,
6979                                     outnvl);
6980                         }
6981                         if (error != 0) {
6982                                 fnvlist_add_int64(lognv, ZPOOL_HIST_ERRNO,
6983                                     error);
6984                         }
6985                         (void) spa_history_log_nvl(spa, lognv);
6986                         spa_close(spa, FTAG);
6987                 }
6988                 fnvlist_free(lognv);
6989 
6990                 if (!nvlist_empty(outnvl) || zc->zc_nvlist_dst_size != 0) {
6991                         int smusherror = 0;
6992                         if (vec->zvec_smush_outnvlist) {
6993                                 smusherror = nvlist_smush(outnvl,
6994                                     zc->zc_nvlist_dst_size);
6995                         }
6996                         if (smusherror == 0)
6997                                 puterror = put_nvlist(zc, outnvl);
6998                 }
6999 
7000                 if (puterror != 0)
7001                         error = puterror;
7002 
7003                 nvlist_free(outnvl);
7004         } else {
7005                 error = vec->zvec_legacy_func(zc);
7006         }
7007 
7008 out:
7009         nvlist_free(innvl);
7010         rc = ddi_copyout(zc, (void *)arg, sizeof (zfs_cmd_t), flag);
7011         if (error == 0 && rc != 0)
7012                 error = SET_ERROR(EFAULT);
7013         if (error == 0 && vec->zvec_allow_log) {
7014                 char *s = tsd_get(zfs_allow_log_key);
7015                 if (s != NULL)
7016                         strfree(s);
7017                 (void) tsd_set(zfs_allow_log_key, saved_poolname);
7018         } else {
7019                 if (saved_poolname != NULL)
7020                         strfree(saved_poolname);
7021         }
7022 
7023         kmem_free(zc, sizeof (zfs_cmd_t));
7024         return (error);
7025 }
7026 
7027 static int
7028 zfs_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
7029 {
7030         if (cmd != DDI_ATTACH)
7031                 return (DDI_FAILURE);
7032 
7033         if (ddi_create_minor_node(dip, "zfs", S_IFCHR, 0,
7034             DDI_PSEUDO, 0) == DDI_FAILURE)
7035                 return (DDI_FAILURE);
7036 
7037         zfs_dip = dip;
7038 
7039         ddi_report_dev(dip);
7040 
7041         return (DDI_SUCCESS);
7042 }
7043 
7044 static int
7045 zfs_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
7046 {
7047         if (spa_busy() || zfs_busy() || zvol_busy())
7048                 return (DDI_FAILURE);
7049 
7050         if (cmd != DDI_DETACH)
7051                 return (DDI_FAILURE);
7052 
7053         zfs_dip = NULL;
7054 
7055         ddi_prop_remove_all(dip);
7056         ddi_remove_minor_node(dip, NULL);
7057 
7058         return (DDI_SUCCESS);
7059 }
7060 
7061 /*ARGSUSED*/
7062 static int
7063 zfs_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
7064 {
7065         switch (infocmd) {
7066         case DDI_INFO_DEVT2DEVINFO:
7067                 *result = zfs_dip;
7068                 return (DDI_SUCCESS);
7069 
7070         case DDI_INFO_DEVT2INSTANCE:
7071                 *result = (void *)0;
7072                 return (DDI_SUCCESS);
7073         }
7074 
7075         return (DDI_FAILURE);
7076 }
7077 
7078 /*
7079  * OK, so this is a little weird.
7080  *
7081  * /dev/zfs is the control node, i.e. minor 0.
7082  * /dev/zvol/[r]dsk/pool/dataset are the zvols, minor > 0.
7083  *
7084  * /dev/zfs has basically nothing to do except serve up ioctls,
7085  * so most of the standard driver entry points are in zvol.c.
7086  */
7087 static struct cb_ops zfs_cb_ops = {
7088         zfsdev_open,    /* open */
7089         zfsdev_close,   /* close */
7090         zvol_strategy,  /* strategy */
7091         nodev,          /* print */
7092         zvol_dump,      /* dump */
7093         zvol_read,      /* read */
7094         zvol_write,     /* write */
7095         zfsdev_ioctl,   /* ioctl */
7096         nodev,          /* devmap */
7097         nodev,          /* mmap */
7098         nodev,          /* segmap */
7099         nochpoll,       /* poll */
7100         ddi_prop_op,    /* prop_op */
7101         NULL,           /* streamtab */
7102         D_NEW | D_MP | D_64BIT,         /* Driver compatibility flag */
7103         CB_REV,         /* version */
7104         nodev,          /* async read */
7105         nodev,          /* async write */
7106 };
7107 
7108 static struct dev_ops zfs_dev_ops = {
7109         DEVO_REV,       /* version */
7110         0,              /* refcnt */
7111         zfs_info,       /* info */
7112         nulldev,        /* identify */
7113         nulldev,        /* probe */
7114         zfs_attach,     /* attach */
7115         zfs_detach,     /* detach */
7116         nodev,          /* reset */
7117         &zfs_cb_ops,        /* driver operations */
7118         NULL,           /* no bus operations */
7119         NULL,           /* power */
7120         ddi_quiesce_not_needed, /* quiesce */
7121 };
7122 
7123 static struct modldrv zfs_modldrv = {
7124         &mod_driverops,
7125         "ZFS storage pool",
7126         &zfs_dev_ops
7127 };
7128 
7129 static struct modlinkage modlinkage = {
7130         MODREV_1,
7131         (void *)&zfs_modlfs,
7132         (void *)&zfs_modldrv,
7133         NULL
7134 };
7135 
7136 static void
7137 zfs_allow_log_destroy(void *arg)
7138 {
7139         char *poolname = arg;
7140         strfree(poolname);
7141 }
7142 
7143 int
7144 _init(void)
7145 {
7146         int error;
7147 
7148         spa_init(FREAD | FWRITE);
7149         zfs_init();
7150         zvol_init();
7151         zfs_ioctl_init();
7152 
7153         if ((error = mod_install(&modlinkage)) != 0) {
7154                 zvol_fini();
7155                 zfs_fini();
7156                 spa_fini();
7157                 return (error);
7158         }
7159 
7160         tsd_create(&zfs_fsyncer_key, NULL);
7161         tsd_create(&rrw_tsd_key, rrw_tsd_destroy);
7162         tsd_create(&zfs_allow_log_key, zfs_allow_log_destroy);
7163 
7164         error = ldi_ident_from_mod(&modlinkage, &zfs_li);
7165         ASSERT(error == 0);
7166         mutex_init(&zfs_share_lock, NULL, MUTEX_DEFAULT, NULL);
7167 
7168         return (0);
7169 }
7170 
7171 int
7172 _fini(void)
7173 {
7174         int error;
7175 
7176         if (spa_busy() || zfs_busy() || zvol_busy() || zio_injection_enabled)
7177                 return (SET_ERROR(EBUSY));
7178 
7179         if ((error = mod_remove(&modlinkage)) != 0)
7180                 return (error);
7181 
7182         zvol_fini();
7183         zfs_fini();
7184         spa_fini();
7185         if (zfs_nfsshare_inited)
7186                 (void) ddi_modclose(nfs_mod);
7187         if (zfs_smbshare_inited)
7188                 (void) ddi_modclose(smbsrv_mod);
7189         if (zfs_nfsshare_inited || zfs_smbshare_inited)
7190                 (void) ddi_modclose(sharefs_mod);
7191 
7192         tsd_destroy(&zfs_fsyncer_key);
7193         ldi_ident_release(zfs_li);
7194         zfs_li = NULL;
7195         mutex_destroy(&zfs_share_lock);
7196 
7197         return (error);
7198 }
7199 
7200 int
7201 _info(struct modinfo *modinfop)
7202 {
7203         return (mod_info(&modlinkage, modinfop));
7204 }