1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  24  */
  25 
  26 /*
  27  * Copyright (c) 2011-2012 Pawel Jakub Dawidek. All rights reserved.
  28  * Portions Copyright 2011 Martin Matuska
  29  * Copyright 2015, OmniTI Computer Consulting, Inc. All rights reserved.
  30  * Copyright 2018 Nexenta Systems, Inc.  All rights reserved.
  31  * Copyright 2019 Joyent, Inc.
  32  * Copyright (c) 2011, 2017 by Delphix. All rights reserved.
  33  * Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
  34  * Copyright (c) 2013 Steven Hartland. All rights reserved.
  35  * Copyright (c) 2014 Integros [integros.com]
  36  * Copyright 2016 Toomas Soome <tsoome@me.com>
  37  * Copyright (c) 2017, loli10K <ezomori.nozomu@gmail.com>. All rights reserved.
  38  * Copyright 2017 RackTop Systems.
  39  * Copyright (c) 2017, Datto, Inc. All rights reserved.
  40  */
  41 
  42 /*
  43  * ZFS ioctls.
  44  *
  45  * This file handles the ioctls to /dev/zfs, used for configuring ZFS storage
  46  * pools and filesystems, e.g. with /sbin/zfs and /sbin/zpool.
  47  *
  48  * There are two ways that we handle ioctls: the legacy way where almost
  49  * all of the logic is in the ioctl callback, and the new way where most
  50  * of the marshalling is handled in the common entry point, zfsdev_ioctl().
  51  *
  52  * Non-legacy ioctls should be registered by calling
  53  * zfs_ioctl_register() from zfs_ioctl_init().  The ioctl is invoked
  54  * from userland by lzc_ioctl().
  55  *
  56  * The registration arguments are as follows:
  57  *
  58  * const char *name
  59  *   The name of the ioctl.  This is used for history logging.  If the
  60  *   ioctl returns successfully (the callback returns 0), and allow_log
  61  *   is true, then a history log entry will be recorded with the input &
  62  *   output nvlists.  The log entry can be printed with "zpool history -i".
  63  *
  64  * zfs_ioc_t ioc
  65  *   The ioctl request number, which userland will pass to ioctl(2).
  66  *   The ioctl numbers can change from release to release, because
  67  *   the caller (libzfs) must be matched to the kernel.
  68  *
  69  * zfs_secpolicy_func_t *secpolicy
  70  *   This function will be called before the zfs_ioc_func_t, to
  71  *   determine if this operation is permitted.  It should return EPERM
  72  *   on failure, and 0 on success.  Checks include determining if the
  73  *   dataset is visible in this zone, and if the user has either all
  74  *   zfs privileges in the zone (SYS_MOUNT), or has been granted permission
  75  *   to do this operation on this dataset with "zfs allow".
  76  *
  77  * zfs_ioc_namecheck_t namecheck
  78  *   This specifies what to expect in the zfs_cmd_t:zc_name -- a pool
  79  *   name, a dataset name, or nothing.  If the name is not well-formed,
  80  *   the ioctl will fail and the callback will not be called.
  81  *   Therefore, the callback can assume that the name is well-formed
  82  *   (e.g. is null-terminated, doesn't have more than one '@' character,
  83  *   doesn't have invalid characters).
  84  *
  85  * zfs_ioc_poolcheck_t pool_check
  86  *   This specifies requirements on the pool state.  If the pool does
  87  *   not meet them (is suspended or is readonly), the ioctl will fail
  88  *   and the callback will not be called.  If any checks are specified
  89  *   (i.e. it is not POOL_CHECK_NONE), namecheck must not be NO_NAME.
  90  *   Multiple checks can be or-ed together (e.g. POOL_CHECK_SUSPENDED |
  91  *   POOL_CHECK_READONLY).
  92  *
  93  * boolean_t smush_outnvlist
  94  *   If smush_outnvlist is true, then the output is presumed to be a
  95  *   list of errors, and it will be "smushed" down to fit into the
  96  *   caller's buffer, by removing some entries and replacing them with a
  97  *   single "N_MORE_ERRORS" entry indicating how many were removed.  See
  98  *   nvlist_smush() for details.  If smush_outnvlist is false, and the
  99  *   outnvlist does not fit into the userland-provided buffer, then the
 100  *   ioctl will fail with ENOMEM.
 101  *
 102  * zfs_ioc_func_t *func
 103  *   The callback function that will perform the operation.
 104  *
 105  *   The callback should return 0 on success, or an error number on
 106  *   failure.  If the function fails, the userland ioctl will return -1,
 107  *   and errno will be set to the callback's return value.  The callback
 108  *   will be called with the following arguments:
 109  *
 110  *   const char *name
 111  *     The name of the pool or dataset to operate on, from
 112  *     zfs_cmd_t:zc_name.  The 'namecheck' argument specifies the
 113  *     expected type (pool, dataset, or none).
 114  *
 115  *   nvlist_t *innvl
 116  *     The input nvlist, deserialized from zfs_cmd_t:zc_nvlist_src.  Or
 117  *     NULL if no input nvlist was provided.  Changes to this nvlist are
 118  *     ignored.  If the input nvlist could not be deserialized, the
 119  *     ioctl will fail and the callback will not be called.
 120  *
 121  *   nvlist_t *outnvl
 122  *     The output nvlist, initially empty.  The callback can fill it in,
 123  *     and it will be returned to userland by serializing it into
 124  *     zfs_cmd_t:zc_nvlist_dst.  If it is non-empty, and serialization
 125  *     fails (e.g. because the caller didn't supply a large enough
 126  *     buffer), then the overall ioctl will fail.  See the
 127  *     'smush_nvlist' argument above for additional behaviors.
 128  *
 129  *     There are two typical uses of the output nvlist:
 130  *       - To return state, e.g. property values.  In this case,
 131  *         smush_outnvlist should be false.  If the buffer was not large
 132  *         enough, the caller will reallocate a larger buffer and try
 133  *         the ioctl again.
 134  *
 135  *       - To return multiple errors from an ioctl which makes on-disk
 136  *         changes.  In this case, smush_outnvlist should be true.
 137  *         Ioctls which make on-disk modifications should generally not
 138  *         use the outnvl if they succeed, because the caller can not
 139  *         distinguish between the operation failing, and
 140  *         deserialization failing.
 141  */
 142 
 143 #include <sys/types.h>
 144 #include <sys/param.h>
 145 #include <sys/errno.h>
 146 #include <sys/uio.h>
 147 #include <sys/buf.h>
 148 #include <sys/modctl.h>
 149 #include <sys/open.h>
 150 #include <sys/file.h>
 151 #include <sys/kmem.h>
 152 #include <sys/conf.h>
 153 #include <sys/cmn_err.h>
 154 #include <sys/stat.h>
 155 #include <sys/zfs_ioctl.h>
 156 #include <sys/zfs_vfsops.h>
 157 #include <sys/zfs_znode.h>
 158 #include <sys/zap.h>
 159 #include <sys/spa.h>
 160 #include <sys/spa_impl.h>
 161 #include <sys/vdev.h>
 162 #include <sys/priv_impl.h>
 163 #include <sys/dmu.h>
 164 #include <sys/dsl_dir.h>
 165 #include <sys/dsl_dataset.h>
 166 #include <sys/dsl_prop.h>
 167 #include <sys/dsl_deleg.h>
 168 #include <sys/dmu_objset.h>
 169 #include <sys/dmu_impl.h>
 170 #include <sys/dmu_tx.h>
 171 #include <sys/ddi.h>
 172 #include <sys/sunddi.h>
 173 #include <sys/sunldi.h>
 174 #include <sys/policy.h>
 175 #include <sys/zone.h>
 176 #include <sys/nvpair.h>
 177 #include <sys/pathname.h>
 178 #include <sys/mount.h>
 179 #include <sys/sdt.h>
 180 #include <sys/fs/zfs.h>
 181 #include <sys/zfs_ctldir.h>
 182 #include <sys/zfs_dir.h>
 183 #include <sys/zfs_onexit.h>
 184 #include <sys/zvol.h>
 185 #include <sys/dsl_scan.h>
 186 #include <sharefs/share.h>
 187 #include <sys/dmu_objset.h>
 188 #include <sys/dmu_recv.h>
 189 #include <sys/dmu_send.h>
 190 #include <sys/dsl_destroy.h>
 191 #include <sys/dsl_bookmark.h>
 192 #include <sys/dsl_userhold.h>
 193 #include <sys/zfeature.h>
 194 #include <sys/zcp.h>
 195 #include <sys/zio_checksum.h>
 196 #include <sys/vdev_removal.h>
 197 #include <sys/vdev_impl.h>
 198 #include <sys/vdev_initialize.h>
 199 #include <sys/vdev_trim.h>
 200 #include <sys/dsl_crypt.h>
 201 
 202 #include "zfs_namecheck.h"
 203 #include "zfs_prop.h"
 204 #include "zfs_deleg.h"
 205 #include "zfs_comutil.h"
 206 
 207 #include "lua.h"
 208 #include "lauxlib.h"
 209 
 210 extern struct modlfs zfs_modlfs;
 211 
 212 extern void zfs_init(void);
 213 extern void zfs_fini(void);
 214 
 215 ldi_ident_t zfs_li = NULL;
 216 dev_info_t *zfs_dip;
 217 
 218 uint_t zfs_fsyncer_key;
 219 extern uint_t rrw_tsd_key;
 220 static uint_t zfs_allow_log_key;
 221 
 222 typedef int zfs_ioc_legacy_func_t(zfs_cmd_t *);
 223 typedef int zfs_ioc_func_t(const char *, nvlist_t *, nvlist_t *);
 224 typedef int zfs_secpolicy_func_t(zfs_cmd_t *, nvlist_t *, cred_t *);
 225 
 226 typedef enum {
 227         NO_NAME,
 228         POOL_NAME,
 229         DATASET_NAME
 230 } zfs_ioc_namecheck_t;
 231 
 232 typedef enum {
 233         POOL_CHECK_NONE         = 1 << 0,
 234         POOL_CHECK_SUSPENDED    = 1 << 1,
 235         POOL_CHECK_READONLY     = 1 << 2,
 236 } zfs_ioc_poolcheck_t;
 237 
 238 typedef struct zfs_ioc_vec {
 239         zfs_ioc_legacy_func_t   *zvec_legacy_func;
 240         zfs_ioc_func_t          *zvec_func;
 241         zfs_secpolicy_func_t    *zvec_secpolicy;
 242         zfs_ioc_namecheck_t     zvec_namecheck;
 243         boolean_t               zvec_allow_log;
 244         zfs_ioc_poolcheck_t     zvec_pool_check;
 245         boolean_t               zvec_smush_outnvlist;
 246         const char              *zvec_name;
 247 } zfs_ioc_vec_t;
 248 
 249 /* This array is indexed by zfs_userquota_prop_t */
 250 static const char *userquota_perms[] = {
 251         ZFS_DELEG_PERM_USERUSED,
 252         ZFS_DELEG_PERM_USERQUOTA,
 253         ZFS_DELEG_PERM_GROUPUSED,
 254         ZFS_DELEG_PERM_GROUPQUOTA,
 255         ZFS_DELEG_PERM_USEROBJUSED,
 256         ZFS_DELEG_PERM_USEROBJQUOTA,
 257         ZFS_DELEG_PERM_GROUPOBJUSED,
 258         ZFS_DELEG_PERM_GROUPOBJQUOTA,
 259         ZFS_DELEG_PERM_PROJECTUSED,
 260         ZFS_DELEG_PERM_PROJECTQUOTA,
 261         ZFS_DELEG_PERM_PROJECTOBJUSED,
 262         ZFS_DELEG_PERM_PROJECTOBJQUOTA,
 263 };
 264 
 265 static int zfs_ioc_userspace_upgrade(zfs_cmd_t *zc);
 266 static int zfs_ioc_id_quota_upgrade(zfs_cmd_t *zc);
 267 static int zfs_check_settable(const char *name, nvpair_t *property,
 268     cred_t *cr);
 269 static int zfs_check_clearable(char *dataset, nvlist_t *props,
 270     nvlist_t **errors);
 271 static int zfs_fill_zplprops_root(uint64_t, nvlist_t *, nvlist_t *,
 272     boolean_t *);
 273 int zfs_set_prop_nvlist(const char *, zprop_source_t, nvlist_t *, nvlist_t *);
 274 static int get_nvlist(uint64_t nvl, uint64_t size, int iflag, nvlist_t **nvp);
 275 
 276 static int zfs_prop_activate_feature(spa_t *spa, spa_feature_t feature);
 277 
 278 /* _NOTE(PRINTFLIKE(4)) - this is printf-like, but lint is too whiney */
 279 void
 280 __dprintf(const char *file, const char *func, int line, const char *fmt, ...)
 281 {
 282         const char *newfile;
 283         char buf[512];
 284         va_list adx;
 285 
 286         /*
 287          * Get rid of annoying "../common/" prefix to filename.
 288          */
 289         newfile = strrchr(file, '/');
 290         if (newfile != NULL) {
 291                 newfile = newfile + 1; /* Get rid of leading / */
 292         } else {
 293                 newfile = file;
 294         }
 295 
 296         va_start(adx, fmt);
 297         (void) vsnprintf(buf, sizeof (buf), fmt, adx);
 298         va_end(adx);
 299 
 300         /*
 301          * To get this data, use the zfs-dprintf probe as so:
 302          * dtrace -q -n 'zfs-dprintf \
 303          *      /stringof(arg0) == "dbuf.c"/ \
 304          *      {printf("%s: %s", stringof(arg1), stringof(arg3))}'
 305          * arg0 = file name
 306          * arg1 = function name
 307          * arg2 = line number
 308          * arg3 = message
 309          */
 310         DTRACE_PROBE4(zfs__dprintf,
 311             char *, newfile, char *, func, int, line, char *, buf);
 312 }
 313 
 314 static void
 315 history_str_free(char *buf)
 316 {
 317         kmem_free(buf, HIS_MAX_RECORD_LEN);
 318 }
 319 
 320 static char *
 321 history_str_get(zfs_cmd_t *zc)
 322 {
 323         char *buf;
 324 
 325         if (zc->zc_history == 0)
 326                 return (NULL);
 327 
 328         buf = kmem_alloc(HIS_MAX_RECORD_LEN, KM_SLEEP);
 329         if (copyinstr((void *)(uintptr_t)zc->zc_history,
 330             buf, HIS_MAX_RECORD_LEN, NULL) != 0) {
 331                 history_str_free(buf);
 332                 return (NULL);
 333         }
 334 
 335         buf[HIS_MAX_RECORD_LEN -1] = '\0';
 336 
 337         return (buf);
 338 }
 339 
 340 /*
 341  * Check to see if the named dataset is currently defined as bootable
 342  */
 343 static boolean_t
 344 zfs_is_bootfs(const char *name)
 345 {
 346         objset_t *os;
 347 
 348         if (dmu_objset_hold(name, FTAG, &os) == 0) {
 349                 boolean_t ret;
 350                 ret = (dmu_objset_id(os) == spa_bootfs(dmu_objset_spa(os)));
 351                 dmu_objset_rele(os, FTAG);
 352                 return (ret);
 353         }
 354         return (B_FALSE);
 355 }
 356 
 357 /*
 358  * Return non-zero if the spa version is less than requested version.
 359  */
 360 static int
 361 zfs_earlier_version(const char *name, int version)
 362 {
 363         spa_t *spa;
 364 
 365         if (spa_open(name, &spa, FTAG) == 0) {
 366                 if (spa_version(spa) < version) {
 367                         spa_close(spa, FTAG);
 368                         return (1);
 369                 }
 370                 spa_close(spa, FTAG);
 371         }
 372         return (0);
 373 }
 374 
 375 /*
 376  * Return TRUE if the ZPL version is less than requested version.
 377  */
 378 static boolean_t
 379 zpl_earlier_version(const char *name, int version)
 380 {
 381         objset_t *os;
 382         boolean_t rc = B_TRUE;
 383 
 384         if (dmu_objset_hold(name, FTAG, &os) == 0) {
 385                 uint64_t zplversion;
 386 
 387                 if (dmu_objset_type(os) != DMU_OST_ZFS) {
 388                         dmu_objset_rele(os, FTAG);
 389                         return (B_TRUE);
 390                 }
 391                 /* XXX reading from non-owned objset */
 392                 if (zfs_get_zplprop(os, ZFS_PROP_VERSION, &zplversion) == 0)
 393                         rc = zplversion < version;
 394                 dmu_objset_rele(os, FTAG);
 395         }
 396         return (rc);
 397 }
 398 
 399 static void
 400 zfs_log_history(zfs_cmd_t *zc)
 401 {
 402         spa_t *spa;
 403         char *buf;
 404 
 405         if ((buf = history_str_get(zc)) == NULL)
 406                 return;
 407 
 408         if (spa_open(zc->zc_name, &spa, FTAG) == 0) {
 409                 if (spa_version(spa) >= SPA_VERSION_ZPOOL_HISTORY)
 410                         (void) spa_history_log(spa, buf);
 411                 spa_close(spa, FTAG);
 412         }
 413         history_str_free(buf);
 414 }
 415 
 416 /*
 417  * Policy for top-level read operations (list pools).  Requires no privileges,
 418  * and can be used in the local zone, as there is no associated dataset.
 419  */
 420 /* ARGSUSED */
 421 static int
 422 zfs_secpolicy_none(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
 423 {
 424         return (0);
 425 }
 426 
 427 /*
 428  * Policy for dataset read operations (list children, get statistics).  Requires
 429  * no privileges, but must be visible in the local zone.
 430  */
 431 /* ARGSUSED */
 432 static int
 433 zfs_secpolicy_read(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
 434 {
 435         if (INGLOBALZONE(curproc) ||
 436             zone_dataset_visible(zc->zc_name, NULL))
 437                 return (0);
 438 
 439         return (SET_ERROR(ENOENT));
 440 }
 441 
 442 static int
 443 zfs_dozonecheck_impl(const char *dataset, uint64_t zoned, cred_t *cr)
 444 {
 445         int writable = 1;
 446 
 447         /*
 448          * The dataset must be visible by this zone -- check this first
 449          * so they don't see EPERM on something they shouldn't know about.
 450          */
 451         if (!INGLOBALZONE(curproc) &&
 452             !zone_dataset_visible(dataset, &writable))
 453                 return (SET_ERROR(ENOENT));
 454 
 455         if (INGLOBALZONE(curproc)) {
 456                 /*
 457                  * If the fs is zoned, only root can access it from the
 458                  * global zone.
 459                  */
 460                 if (secpolicy_zfs(cr) && zoned)
 461                         return (SET_ERROR(EPERM));
 462         } else {
 463                 /*
 464                  * If we are in a local zone, the 'zoned' property must be set.
 465                  */
 466                 if (!zoned)
 467                         return (SET_ERROR(EPERM));
 468 
 469                 /* must be writable by this zone */
 470                 if (!writable)
 471                         return (SET_ERROR(EPERM));
 472         }
 473         return (0);
 474 }
 475 
 476 static int
 477 zfs_dozonecheck(const char *dataset, cred_t *cr)
 478 {
 479         uint64_t zoned;
 480 
 481         if (dsl_prop_get_integer(dataset, "zoned", &zoned, NULL))
 482                 return (SET_ERROR(ENOENT));
 483 
 484         return (zfs_dozonecheck_impl(dataset, zoned, cr));
 485 }
 486 
 487 static int
 488 zfs_dozonecheck_ds(const char *dataset, dsl_dataset_t *ds, cred_t *cr)
 489 {
 490         uint64_t zoned;
 491 
 492         if (dsl_prop_get_int_ds(ds, "zoned", &zoned))
 493                 return (SET_ERROR(ENOENT));
 494 
 495         return (zfs_dozonecheck_impl(dataset, zoned, cr));
 496 }
 497 
 498 static int
 499 zfs_secpolicy_write_perms_ds(const char *name, dsl_dataset_t *ds,
 500     const char *perm, cred_t *cr)
 501 {
 502         int error;
 503 
 504         error = zfs_dozonecheck_ds(name, ds, cr);
 505         if (error == 0) {
 506                 error = secpolicy_zfs(cr);
 507                 if (error != 0)
 508                         error = dsl_deleg_access_impl(ds, perm, cr);
 509         }
 510         return (error);
 511 }
 512 
 513 static int
 514 zfs_secpolicy_write_perms(const char *name, const char *perm, cred_t *cr)
 515 {
 516         int error;
 517         dsl_dataset_t *ds;
 518         dsl_pool_t *dp;
 519 
 520         /*
 521          * First do a quick check for root in the global zone, which
 522          * is allowed to do all write_perms.  This ensures that zfs_ioc_*
 523          * will get to handle nonexistent datasets.
 524          */
 525         if (INGLOBALZONE(curproc) && secpolicy_zfs(cr) == 0)
 526                 return (0);
 527 
 528         error = dsl_pool_hold(name, FTAG, &dp);
 529         if (error != 0)
 530                 return (error);
 531 
 532         error = dsl_dataset_hold(dp, name, FTAG, &ds);
 533         if (error != 0) {
 534                 dsl_pool_rele(dp, FTAG);
 535                 return (error);
 536         }
 537 
 538         error = zfs_secpolicy_write_perms_ds(name, ds, perm, cr);
 539 
 540         dsl_dataset_rele(ds, FTAG);
 541         dsl_pool_rele(dp, FTAG);
 542         return (error);
 543 }
 544 
 545 /*
 546  * Policy for setting the security label property.
 547  *
 548  * Returns 0 for success, non-zero for access and other errors.
 549  */
 550 static int
 551 zfs_set_slabel_policy(const char *name, char *strval, cred_t *cr)
 552 {
 553         char            ds_hexsl[MAXNAMELEN];
 554         bslabel_t       ds_sl, new_sl;
 555         boolean_t       new_default = FALSE;
 556         uint64_t        zoned;
 557         int             needed_priv = -1;
 558         int             error;
 559 
 560         /* First get the existing dataset label. */
 561         error = dsl_prop_get(name, zfs_prop_to_name(ZFS_PROP_MLSLABEL),
 562             1, sizeof (ds_hexsl), &ds_hexsl, NULL);
 563         if (error != 0)
 564                 return (SET_ERROR(EPERM));
 565 
 566         if (strcasecmp(strval, ZFS_MLSLABEL_DEFAULT) == 0)
 567                 new_default = TRUE;
 568 
 569         /* The label must be translatable */
 570         if (!new_default && (hexstr_to_label(strval, &new_sl) != 0))
 571                 return (SET_ERROR(EINVAL));
 572 
 573         /*
 574          * In a non-global zone, disallow attempts to set a label that
 575          * doesn't match that of the zone; otherwise no other checks
 576          * are needed.
 577          */
 578         if (!INGLOBALZONE(curproc)) {
 579                 if (new_default || !blequal(&new_sl, CR_SL(CRED())))
 580                         return (SET_ERROR(EPERM));
 581                 return (0);
 582         }
 583 
 584         /*
 585          * For global-zone datasets (i.e., those whose zoned property is
 586          * "off", verify that the specified new label is valid for the
 587          * global zone.
 588          */
 589         if (dsl_prop_get_integer(name,
 590             zfs_prop_to_name(ZFS_PROP_ZONED), &zoned, NULL))
 591                 return (SET_ERROR(EPERM));
 592         if (!zoned) {
 593                 if (zfs_check_global_label(name, strval) != 0)
 594                         return (SET_ERROR(EPERM));
 595         }
 596 
 597         /*
 598          * If the existing dataset label is nondefault, check if the
 599          * dataset is mounted (label cannot be changed while mounted).
 600          * Get the zfsvfs; if there isn't one, then the dataset isn't
 601          * mounted (or isn't a dataset, doesn't exist, ...).
 602          */
 603         if (strcasecmp(ds_hexsl, ZFS_MLSLABEL_DEFAULT) != 0) {
 604                 objset_t *os;
 605                 static char *setsl_tag = "setsl_tag";
 606 
 607                 /*
 608                  * Try to own the dataset; abort if there is any error,
 609                  * (e.g., already mounted, in use, or other error).
 610                  */
 611                 error = dmu_objset_own(name, DMU_OST_ZFS, B_TRUE, B_TRUE,
 612                     setsl_tag, &os);
 613                 if (error != 0)
 614                         return (SET_ERROR(EPERM));
 615 
 616                 dmu_objset_disown(os, B_TRUE, setsl_tag);
 617 
 618                 if (new_default) {
 619                         needed_priv = PRIV_FILE_DOWNGRADE_SL;
 620                         goto out_check;
 621                 }
 622 
 623                 if (hexstr_to_label(strval, &new_sl) != 0)
 624                         return (SET_ERROR(EPERM));
 625 
 626                 if (blstrictdom(&ds_sl, &new_sl))
 627                         needed_priv = PRIV_FILE_DOWNGRADE_SL;
 628                 else if (blstrictdom(&new_sl, &ds_sl))
 629                         needed_priv = PRIV_FILE_UPGRADE_SL;
 630         } else {
 631                 /* dataset currently has a default label */
 632                 if (!new_default)
 633                         needed_priv = PRIV_FILE_UPGRADE_SL;
 634         }
 635 
 636 out_check:
 637         if (needed_priv != -1)
 638                 return (PRIV_POLICY(cr, needed_priv, B_FALSE, EPERM, NULL));
 639         return (0);
 640 }
 641 
 642 static int
 643 zfs_secpolicy_setprop(const char *dsname, zfs_prop_t prop, nvpair_t *propval,
 644     cred_t *cr)
 645 {
 646         char *strval;
 647 
 648         /*
 649          * Check permissions for special properties.
 650          */
 651         switch (prop) {
 652         case ZFS_PROP_ZONED:
 653                 /*
 654                  * Disallow setting of 'zoned' from within a local zone.
 655                  */
 656                 if (!INGLOBALZONE(curproc))
 657                         return (SET_ERROR(EPERM));
 658                 break;
 659 
 660         case ZFS_PROP_QUOTA:
 661         case ZFS_PROP_FILESYSTEM_LIMIT:
 662         case ZFS_PROP_SNAPSHOT_LIMIT:
 663                 if (!INGLOBALZONE(curproc)) {
 664                         uint64_t zoned;
 665                         char setpoint[ZFS_MAX_DATASET_NAME_LEN];
 666                         /*
 667                          * Unprivileged users are allowed to modify the
 668                          * limit on things *under* (ie. contained by)
 669                          * the thing they own.
 670                          */
 671                         if (dsl_prop_get_integer(dsname, "zoned", &zoned,
 672                             setpoint))
 673                                 return (SET_ERROR(EPERM));
 674                         if (!zoned || strlen(dsname) <= strlen(setpoint))
 675                                 return (SET_ERROR(EPERM));
 676                 }
 677                 break;
 678 
 679         case ZFS_PROP_MLSLABEL:
 680                 if (!is_system_labeled())
 681                         return (SET_ERROR(EPERM));
 682 
 683                 if (nvpair_value_string(propval, &strval) == 0) {
 684                         int err;
 685 
 686                         err = zfs_set_slabel_policy(dsname, strval, CRED());
 687                         if (err != 0)
 688                                 return (err);
 689                 }
 690                 break;
 691         }
 692 
 693         return (zfs_secpolicy_write_perms(dsname, zfs_prop_to_name(prop), cr));
 694 }
 695 
 696 /* ARGSUSED */
 697 static int
 698 zfs_secpolicy_set_fsacl(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
 699 {
 700         int error;
 701 
 702         error = zfs_dozonecheck(zc->zc_name, cr);
 703         if (error != 0)
 704                 return (error);
 705 
 706         /*
 707          * permission to set permissions will be evaluated later in
 708          * dsl_deleg_can_allow()
 709          */
 710         return (0);
 711 }
 712 
 713 /* ARGSUSED */
 714 static int
 715 zfs_secpolicy_rollback(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
 716 {
 717         return (zfs_secpolicy_write_perms(zc->zc_name,
 718             ZFS_DELEG_PERM_ROLLBACK, cr));
 719 }
 720 
 721 /* ARGSUSED */
 722 static int
 723 zfs_secpolicy_send(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
 724 {
 725         dsl_pool_t *dp;
 726         dsl_dataset_t *ds;
 727         char *cp;
 728         int error;
 729 
 730         /*
 731          * Generate the current snapshot name from the given objsetid, then
 732          * use that name for the secpolicy/zone checks.
 733          */
 734         cp = strchr(zc->zc_name, '@');
 735         if (cp == NULL)
 736                 return (SET_ERROR(EINVAL));
 737         error = dsl_pool_hold(zc->zc_name, FTAG, &dp);
 738         if (error != 0)
 739                 return (error);
 740 
 741         error = dsl_dataset_hold_obj(dp, zc->zc_sendobj, FTAG, &ds);
 742         if (error != 0) {
 743                 dsl_pool_rele(dp, FTAG);
 744                 return (error);
 745         }
 746 
 747         dsl_dataset_name(ds, zc->zc_name);
 748 
 749         error = zfs_secpolicy_write_perms_ds(zc->zc_name, ds,
 750             ZFS_DELEG_PERM_SEND, cr);
 751         dsl_dataset_rele(ds, FTAG);
 752         dsl_pool_rele(dp, FTAG);
 753 
 754         return (error);
 755 }
 756 
 757 /* ARGSUSED */
 758 static int
 759 zfs_secpolicy_send_new(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
 760 {
 761         return (zfs_secpolicy_write_perms(zc->zc_name,
 762             ZFS_DELEG_PERM_SEND, cr));
 763 }
 764 
 765 /* ARGSUSED */
 766 static int
 767 zfs_secpolicy_deleg_share(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
 768 {
 769         vnode_t *vp;
 770         int error;
 771 
 772         if ((error = lookupname(zc->zc_value, UIO_SYSSPACE,
 773             NO_FOLLOW, NULL, &vp)) != 0)
 774                 return (error);
 775 
 776         /* Now make sure mntpnt and dataset are ZFS */
 777 
 778         if (vp->v_vfsp->vfs_fstype != zfsfstype ||
 779             (strcmp((char *)refstr_value(vp->v_vfsp->vfs_resource),
 780             zc->zc_name) != 0)) {
 781                 VN_RELE(vp);
 782                 return (SET_ERROR(EPERM));
 783         }
 784 
 785         VN_RELE(vp);
 786         return (dsl_deleg_access(zc->zc_name,
 787             ZFS_DELEG_PERM_SHARE, cr));
 788 }
 789 
 790 int
 791 zfs_secpolicy_share(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
 792 {
 793         if (secpolicy_nfs(cr) == 0) {
 794                 return (0);
 795         } else {
 796                 return (zfs_secpolicy_deleg_share(zc, innvl, cr));
 797         }
 798 }
 799 
 800 int
 801 zfs_secpolicy_smb_acl(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
 802 {
 803         if (secpolicy_smb(cr) == 0) {
 804                 return (0);
 805         } else {
 806                 return (zfs_secpolicy_deleg_share(zc, innvl, cr));
 807         }
 808 }
 809 
 810 static int
 811 zfs_get_parent(const char *datasetname, char *parent, int parentsize)
 812 {
 813         char *cp;
 814 
 815         /*
 816          * Remove the @bla or /bla from the end of the name to get the parent.
 817          */
 818         (void) strncpy(parent, datasetname, parentsize);
 819         cp = strrchr(parent, '@');
 820         if (cp != NULL) {
 821                 cp[0] = '\0';
 822         } else {
 823                 cp = strrchr(parent, '/');
 824                 if (cp == NULL)
 825                         return (SET_ERROR(ENOENT));
 826                 cp[0] = '\0';
 827         }
 828 
 829         return (0);
 830 }
 831 
 832 int
 833 zfs_secpolicy_destroy_perms(const char *name, cred_t *cr)
 834 {
 835         int error;
 836 
 837         if ((error = zfs_secpolicy_write_perms(name,
 838             ZFS_DELEG_PERM_MOUNT, cr)) != 0)
 839                 return (error);
 840 
 841         return (zfs_secpolicy_write_perms(name, ZFS_DELEG_PERM_DESTROY, cr));
 842 }
 843 
 844 /* ARGSUSED */
 845 static int
 846 zfs_secpolicy_destroy(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
 847 {
 848         return (zfs_secpolicy_destroy_perms(zc->zc_name, cr));
 849 }
 850 
 851 /*
 852  * Destroying snapshots with delegated permissions requires
 853  * descendant mount and destroy permissions.
 854  */
 855 /* ARGSUSED */
 856 static int
 857 zfs_secpolicy_destroy_snaps(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
 858 {
 859         nvlist_t *snaps;
 860         nvpair_t *pair, *nextpair;
 861         int error = 0;
 862 
 863         if (nvlist_lookup_nvlist(innvl, "snaps", &snaps) != 0)
 864                 return (SET_ERROR(EINVAL));
 865         for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL;
 866             pair = nextpair) {
 867                 nextpair = nvlist_next_nvpair(snaps, pair);
 868                 error = zfs_secpolicy_destroy_perms(nvpair_name(pair), cr);
 869                 if (error == ENOENT) {
 870                         /*
 871                          * Ignore any snapshots that don't exist (we consider
 872                          * them "already destroyed").  Remove the name from the
 873                          * nvl here in case the snapshot is created between
 874                          * now and when we try to destroy it (in which case
 875                          * we don't want to destroy it since we haven't
 876                          * checked for permission).
 877                          */
 878                         fnvlist_remove_nvpair(snaps, pair);
 879                         error = 0;
 880                 }
 881                 if (error != 0)
 882                         break;
 883         }
 884 
 885         return (error);
 886 }
 887 
 888 int
 889 zfs_secpolicy_rename_perms(const char *from, const char *to, cred_t *cr)
 890 {
 891         char    parentname[ZFS_MAX_DATASET_NAME_LEN];
 892         int     error;
 893 
 894         if ((error = zfs_secpolicy_write_perms(from,
 895             ZFS_DELEG_PERM_RENAME, cr)) != 0)
 896                 return (error);
 897 
 898         if ((error = zfs_secpolicy_write_perms(from,
 899             ZFS_DELEG_PERM_MOUNT, cr)) != 0)
 900                 return (error);
 901 
 902         if ((error = zfs_get_parent(to, parentname,
 903             sizeof (parentname))) != 0)
 904                 return (error);
 905 
 906         if ((error = zfs_secpolicy_write_perms(parentname,
 907             ZFS_DELEG_PERM_CREATE, cr)) != 0)
 908                 return (error);
 909 
 910         if ((error = zfs_secpolicy_write_perms(parentname,
 911             ZFS_DELEG_PERM_MOUNT, cr)) != 0)
 912                 return (error);
 913 
 914         return (error);
 915 }
 916 
 917 /* ARGSUSED */
 918 static int
 919 zfs_secpolicy_rename(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
 920 {
 921         return (zfs_secpolicy_rename_perms(zc->zc_name, zc->zc_value, cr));
 922 }
 923 
 924 /* ARGSUSED */
 925 static int
 926 zfs_secpolicy_promote(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
 927 {
 928         dsl_pool_t *dp;
 929         dsl_dataset_t *clone;
 930         int error;
 931 
 932         error = zfs_secpolicy_write_perms(zc->zc_name,
 933             ZFS_DELEG_PERM_PROMOTE, cr);
 934         if (error != 0)
 935                 return (error);
 936 
 937         error = dsl_pool_hold(zc->zc_name, FTAG, &dp);
 938         if (error != 0)
 939                 return (error);
 940 
 941         error = dsl_dataset_hold(dp, zc->zc_name, FTAG, &clone);
 942 
 943         if (error == 0) {
 944                 char parentname[ZFS_MAX_DATASET_NAME_LEN];
 945                 dsl_dataset_t *origin = NULL;
 946                 dsl_dir_t *dd;
 947                 dd = clone->ds_dir;
 948 
 949                 error = dsl_dataset_hold_obj(dd->dd_pool,
 950                     dsl_dir_phys(dd)->dd_origin_obj, FTAG, &origin);
 951                 if (error != 0) {
 952                         dsl_dataset_rele(clone, FTAG);
 953                         dsl_pool_rele(dp, FTAG);
 954                         return (error);
 955                 }
 956 
 957                 error = zfs_secpolicy_write_perms_ds(zc->zc_name, clone,
 958                     ZFS_DELEG_PERM_MOUNT, cr);
 959 
 960                 dsl_dataset_name(origin, parentname);
 961                 if (error == 0) {
 962                         error = zfs_secpolicy_write_perms_ds(parentname, origin,
 963                             ZFS_DELEG_PERM_PROMOTE, cr);
 964                 }
 965                 dsl_dataset_rele(clone, FTAG);
 966                 dsl_dataset_rele(origin, FTAG);
 967         }
 968         dsl_pool_rele(dp, FTAG);
 969         return (error);
 970 }
 971 
 972 /* ARGSUSED */
 973 static int
 974 zfs_secpolicy_recv(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
 975 {
 976         int error;
 977 
 978         if ((error = zfs_secpolicy_write_perms(zc->zc_name,
 979             ZFS_DELEG_PERM_RECEIVE, cr)) != 0)
 980                 return (error);
 981 
 982         if ((error = zfs_secpolicy_write_perms(zc->zc_name,
 983             ZFS_DELEG_PERM_MOUNT, cr)) != 0)
 984                 return (error);
 985 
 986         return (zfs_secpolicy_write_perms(zc->zc_name,
 987             ZFS_DELEG_PERM_CREATE, cr));
 988 }
 989 
 990 int
 991 zfs_secpolicy_snapshot_perms(const char *name, cred_t *cr)
 992 {
 993         return (zfs_secpolicy_write_perms(name,
 994             ZFS_DELEG_PERM_SNAPSHOT, cr));
 995 }
 996 
 997 /*
 998  * Check for permission to create each snapshot in the nvlist.
 999  */
1000 /* ARGSUSED */
1001 static int
1002 zfs_secpolicy_snapshot(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1003 {
1004         nvlist_t *snaps;
1005         int error = 0;
1006         nvpair_t *pair;
1007 
1008         if (nvlist_lookup_nvlist(innvl, "snaps", &snaps) != 0)
1009                 return (SET_ERROR(EINVAL));
1010         for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL;
1011             pair = nvlist_next_nvpair(snaps, pair)) {
1012                 char *name = nvpair_name(pair);
1013                 char *atp = strchr(name, '@');
1014 
1015                 if (atp == NULL) {
1016                         error = SET_ERROR(EINVAL);
1017                         break;
1018                 }
1019                 *atp = '\0';
1020                 error = zfs_secpolicy_snapshot_perms(name, cr);
1021                 *atp = '@';
1022                 if (error != 0)
1023                         break;
1024         }
1025         return (error);
1026 }
1027 
1028 /*
1029  * Check for permission to create each snapshot in the nvlist.
1030  */
1031 /* ARGSUSED */
1032 static int
1033 zfs_secpolicy_bookmark(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1034 {
1035         int error = 0;
1036 
1037         for (nvpair_t *pair = nvlist_next_nvpair(innvl, NULL);
1038             pair != NULL; pair = nvlist_next_nvpair(innvl, pair)) {
1039                 char *name = nvpair_name(pair);
1040                 char *hashp = strchr(name, '#');
1041 
1042                 if (hashp == NULL) {
1043                         error = SET_ERROR(EINVAL);
1044                         break;
1045                 }
1046                 *hashp = '\0';
1047                 error = zfs_secpolicy_write_perms(name,
1048                     ZFS_DELEG_PERM_BOOKMARK, cr);
1049                 *hashp = '#';
1050                 if (error != 0)
1051                         break;
1052         }
1053         return (error);
1054 }
1055 
1056 /* ARGSUSED */
1057 static int
1058 zfs_secpolicy_remap(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1059 {
1060         return (zfs_secpolicy_write_perms(zc->zc_name,
1061             ZFS_DELEG_PERM_REMAP, cr));
1062 }
1063 
1064 /* ARGSUSED */
1065 static int
1066 zfs_secpolicy_destroy_bookmarks(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1067 {
1068         nvpair_t *pair, *nextpair;
1069         int error = 0;
1070 
1071         for (pair = nvlist_next_nvpair(innvl, NULL); pair != NULL;
1072             pair = nextpair) {
1073                 char *name = nvpair_name(pair);
1074                 char *hashp = strchr(name, '#');
1075                 nextpair = nvlist_next_nvpair(innvl, pair);
1076 
1077                 if (hashp == NULL) {
1078                         error = SET_ERROR(EINVAL);
1079                         break;
1080                 }
1081 
1082                 *hashp = '\0';
1083                 error = zfs_secpolicy_write_perms(name,
1084                     ZFS_DELEG_PERM_DESTROY, cr);
1085                 *hashp = '#';
1086                 if (error == ENOENT) {
1087                         /*
1088                          * Ignore any filesystems that don't exist (we consider
1089                          * their bookmarks "already destroyed").  Remove
1090                          * the name from the nvl here in case the filesystem
1091                          * is created between now and when we try to destroy
1092                          * the bookmark (in which case we don't want to
1093                          * destroy it since we haven't checked for permission).
1094                          */
1095                         fnvlist_remove_nvpair(innvl, pair);
1096                         error = 0;
1097                 }
1098                 if (error != 0)
1099                         break;
1100         }
1101 
1102         return (error);
1103 }
1104 
1105 /* ARGSUSED */
1106 static int
1107 zfs_secpolicy_log_history(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1108 {
1109         /*
1110          * Even root must have a proper TSD so that we know what pool
1111          * to log to.
1112          */
1113         if (tsd_get(zfs_allow_log_key) == NULL)
1114                 return (SET_ERROR(EPERM));
1115         return (0);
1116 }
1117 
1118 static int
1119 zfs_secpolicy_create_clone(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1120 {
1121         char    parentname[ZFS_MAX_DATASET_NAME_LEN];
1122         int     error;
1123         char    *origin;
1124 
1125         if ((error = zfs_get_parent(zc->zc_name, parentname,
1126             sizeof (parentname))) != 0)
1127                 return (error);
1128 
1129         if (nvlist_lookup_string(innvl, "origin", &origin) == 0 &&
1130             (error = zfs_secpolicy_write_perms(origin,
1131             ZFS_DELEG_PERM_CLONE, cr)) != 0)
1132                 return (error);
1133 
1134         if ((error = zfs_secpolicy_write_perms(parentname,
1135             ZFS_DELEG_PERM_CREATE, cr)) != 0)
1136                 return (error);
1137 
1138         return (zfs_secpolicy_write_perms(parentname,
1139             ZFS_DELEG_PERM_MOUNT, cr));
1140 }
1141 
1142 /*
1143  * Policy for pool operations - create/destroy pools, add vdevs, etc.  Requires
1144  * SYS_CONFIG privilege, which is not available in a local zone.
1145  */
1146 /* ARGSUSED */
1147 static int
1148 zfs_secpolicy_config(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1149 {
1150         if (secpolicy_sys_config(cr, B_FALSE) != 0)
1151                 return (SET_ERROR(EPERM));
1152 
1153         return (0);
1154 }
1155 
1156 /*
1157  * Policy for object to name lookups.
1158  */
1159 /* ARGSUSED */
1160 static int
1161 zfs_secpolicy_diff(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1162 {
1163         int error;
1164 
1165         if ((error = secpolicy_sys_config(cr, B_FALSE)) == 0)
1166                 return (0);
1167 
1168         error = zfs_secpolicy_write_perms(zc->zc_name, ZFS_DELEG_PERM_DIFF, cr);
1169         return (error);
1170 }
1171 
1172 /*
1173  * Policy for fault injection.  Requires all privileges.
1174  */
1175 /* ARGSUSED */
1176 static int
1177 zfs_secpolicy_inject(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1178 {
1179         return (secpolicy_zinject(cr));
1180 }
1181 
1182 /* ARGSUSED */
1183 static int
1184 zfs_secpolicy_inherit_prop(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1185 {
1186         zfs_prop_t prop = zfs_name_to_prop(zc->zc_value);
1187 
1188         if (prop == ZPROP_INVAL) {
1189                 if (!zfs_prop_user(zc->zc_value))
1190                         return (SET_ERROR(EINVAL));
1191                 return (zfs_secpolicy_write_perms(zc->zc_name,
1192                     ZFS_DELEG_PERM_USERPROP, cr));
1193         } else {
1194                 return (zfs_secpolicy_setprop(zc->zc_name, prop,
1195                     NULL, cr));
1196         }
1197 }
1198 
1199 static int
1200 zfs_secpolicy_userspace_one(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1201 {
1202         int err = zfs_secpolicy_read(zc, innvl, cr);
1203         if (err)
1204                 return (err);
1205 
1206         if (zc->zc_objset_type >= ZFS_NUM_USERQUOTA_PROPS)
1207                 return (SET_ERROR(EINVAL));
1208 
1209         if (zc->zc_value[0] == 0) {
1210                 /*
1211                  * They are asking about a posix uid/gid.  If it's
1212                  * themself, allow it.
1213                  */
1214                 if (zc->zc_objset_type == ZFS_PROP_USERUSED ||
1215                     zc->zc_objset_type == ZFS_PROP_USERQUOTA ||
1216                     zc->zc_objset_type == ZFS_PROP_USEROBJUSED ||
1217                     zc->zc_objset_type == ZFS_PROP_USEROBJQUOTA) {
1218                         if (zc->zc_guid == crgetuid(cr))
1219                                 return (0);
1220                 } else if (zc->zc_objset_type == ZFS_PROP_GROUPUSED ||
1221                     zc->zc_objset_type == ZFS_PROP_GROUPQUOTA ||
1222                     zc->zc_objset_type == ZFS_PROP_GROUPOBJUSED ||
1223                     zc->zc_objset_type == ZFS_PROP_GROUPOBJQUOTA) {
1224                         if (groupmember(zc->zc_guid, cr))
1225                                 return (0);
1226                 }
1227                 /* else is for project quota/used */
1228         }
1229 
1230         return (zfs_secpolicy_write_perms(zc->zc_name,
1231             userquota_perms[zc->zc_objset_type], cr));
1232 }
1233 
1234 static int
1235 zfs_secpolicy_userspace_many(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1236 {
1237         int err = zfs_secpolicy_read(zc, innvl, cr);
1238         if (err)
1239                 return (err);
1240 
1241         if (zc->zc_objset_type >= ZFS_NUM_USERQUOTA_PROPS)
1242                 return (SET_ERROR(EINVAL));
1243 
1244         return (zfs_secpolicy_write_perms(zc->zc_name,
1245             userquota_perms[zc->zc_objset_type], cr));
1246 }
1247 
1248 /* ARGSUSED */
1249 static int
1250 zfs_secpolicy_userspace_upgrade(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1251 {
1252         return (zfs_secpolicy_setprop(zc->zc_name, ZFS_PROP_VERSION,
1253             NULL, cr));
1254 }
1255 
1256 /* ARGSUSED */
1257 static int
1258 zfs_secpolicy_hold(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1259 {
1260         nvpair_t *pair;
1261         nvlist_t *holds;
1262         int error;
1263 
1264         error = nvlist_lookup_nvlist(innvl, "holds", &holds);
1265         if (error != 0)
1266                 return (SET_ERROR(EINVAL));
1267 
1268         for (pair = nvlist_next_nvpair(holds, NULL); pair != NULL;
1269             pair = nvlist_next_nvpair(holds, pair)) {
1270                 char fsname[ZFS_MAX_DATASET_NAME_LEN];
1271                 error = dmu_fsname(nvpair_name(pair), fsname);
1272                 if (error != 0)
1273                         return (error);
1274                 error = zfs_secpolicy_write_perms(fsname,
1275                     ZFS_DELEG_PERM_HOLD, cr);
1276                 if (error != 0)
1277                         return (error);
1278         }
1279         return (0);
1280 }
1281 
1282 /* ARGSUSED */
1283 static int
1284 zfs_secpolicy_release(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1285 {
1286         nvpair_t *pair;
1287         int error;
1288 
1289         for (pair = nvlist_next_nvpair(innvl, NULL); pair != NULL;
1290             pair = nvlist_next_nvpair(innvl, pair)) {
1291                 char fsname[ZFS_MAX_DATASET_NAME_LEN];
1292                 error = dmu_fsname(nvpair_name(pair), fsname);
1293                 if (error != 0)
1294                         return (error);
1295                 error = zfs_secpolicy_write_perms(fsname,
1296                     ZFS_DELEG_PERM_RELEASE, cr);
1297                 if (error != 0)
1298                         return (error);
1299         }
1300         return (0);
1301 }
1302 
1303 /* ARGSUSED */
1304 static int
1305 zfs_secpolicy_load_key(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1306 {
1307         return (zfs_secpolicy_write_perms(zc->zc_name,
1308             ZFS_DELEG_PERM_LOAD_KEY, cr));
1309 }
1310 
1311 /* ARGSUSED */
1312 static int
1313 zfs_secpolicy_change_key(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1314 {
1315         return (zfs_secpolicy_write_perms(zc->zc_name,
1316             ZFS_DELEG_PERM_CHANGE_KEY, cr));
1317 }
1318 
1319 /*
1320  * Policy for allowing temporary snapshots to be taken or released
1321  */
1322 static int
1323 zfs_secpolicy_tmp_snapshot(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1324 {
1325         /*
1326          * A temporary snapshot is the same as a snapshot,
1327          * hold, destroy and release all rolled into one.
1328          * Delegated diff alone is sufficient that we allow this.
1329          */
1330         int error;
1331 
1332         if ((error = zfs_secpolicy_write_perms(zc->zc_name,
1333             ZFS_DELEG_PERM_DIFF, cr)) == 0)
1334                 return (0);
1335 
1336         error = zfs_secpolicy_snapshot_perms(zc->zc_name, cr);
1337         if (error == 0)
1338                 error = zfs_secpolicy_hold(zc, innvl, cr);
1339         if (error == 0)
1340                 error = zfs_secpolicy_release(zc, innvl, cr);
1341         if (error == 0)
1342                 error = zfs_secpolicy_destroy(zc, innvl, cr);
1343         return (error);
1344 }
1345 
1346 /*
1347  * Returns the nvlist as specified by the user in the zfs_cmd_t.
1348  */
1349 static int
1350 get_nvlist(uint64_t nvl, uint64_t size, int iflag, nvlist_t **nvp)
1351 {
1352         char *packed;
1353         int error;
1354         nvlist_t *list = NULL;
1355 
1356         /*
1357          * Read in and unpack the user-supplied nvlist.
1358          */
1359         if (size == 0)
1360                 return (SET_ERROR(EINVAL));
1361 
1362         packed = kmem_alloc(size, KM_SLEEP);
1363 
1364         if ((error = ddi_copyin((void *)(uintptr_t)nvl, packed, size,
1365             iflag)) != 0) {
1366                 kmem_free(packed, size);
1367                 return (SET_ERROR(EFAULT));
1368         }
1369 
1370         if ((error = nvlist_unpack(packed, size, &list, 0)) != 0) {
1371                 kmem_free(packed, size);
1372                 return (error);
1373         }
1374 
1375         kmem_free(packed, size);
1376 
1377         *nvp = list;
1378         return (0);
1379 }
1380 
1381 /*
1382  * Reduce the size of this nvlist until it can be serialized in 'max' bytes.
1383  * Entries will be removed from the end of the nvlist, and one int32 entry
1384  * named "N_MORE_ERRORS" will be added indicating how many entries were
1385  * removed.
1386  */
1387 static int
1388 nvlist_smush(nvlist_t *errors, size_t max)
1389 {
1390         size_t size;
1391 
1392         size = fnvlist_size(errors);
1393 
1394         if (size > max) {
1395                 nvpair_t *more_errors;
1396                 int n = 0;
1397 
1398                 if (max < 1024)
1399                         return (SET_ERROR(ENOMEM));
1400 
1401                 fnvlist_add_int32(errors, ZPROP_N_MORE_ERRORS, 0);
1402                 more_errors = nvlist_prev_nvpair(errors, NULL);
1403 
1404                 do {
1405                         nvpair_t *pair = nvlist_prev_nvpair(errors,
1406                             more_errors);
1407                         fnvlist_remove_nvpair(errors, pair);
1408                         n++;
1409                         size = fnvlist_size(errors);
1410                 } while (size > max);
1411 
1412                 fnvlist_remove_nvpair(errors, more_errors);
1413                 fnvlist_add_int32(errors, ZPROP_N_MORE_ERRORS, n);
1414                 ASSERT3U(fnvlist_size(errors), <=, max);
1415         }
1416 
1417         return (0);
1418 }
1419 
1420 static int
1421 put_nvlist(zfs_cmd_t *zc, nvlist_t *nvl)
1422 {
1423         char *packed = NULL;
1424         int error = 0;
1425         size_t size;
1426 
1427         size = fnvlist_size(nvl);
1428 
1429         if (size > zc->zc_nvlist_dst_size) {
1430                 error = SET_ERROR(ENOMEM);
1431         } else {
1432                 packed = fnvlist_pack(nvl, &size);
1433                 if (ddi_copyout(packed, (void *)(uintptr_t)zc->zc_nvlist_dst,
1434                     size, zc->zc_iflags) != 0)
1435                         error = SET_ERROR(EFAULT);
1436                 fnvlist_pack_free(packed, size);
1437         }
1438 
1439         zc->zc_nvlist_dst_size = size;
1440         zc->zc_nvlist_dst_filled = B_TRUE;
1441         return (error);
1442 }
1443 
1444 int
1445 getzfsvfs_impl(objset_t *os, zfsvfs_t **zfvp)
1446 {
1447         int error = 0;
1448         if (dmu_objset_type(os) != DMU_OST_ZFS) {
1449                 return (SET_ERROR(EINVAL));
1450         }
1451 
1452         mutex_enter(&os->os_user_ptr_lock);
1453         *zfvp = dmu_objset_get_user(os);
1454         if (*zfvp) {
1455                 VFS_HOLD((*zfvp)->z_vfs);
1456         } else {
1457                 error = SET_ERROR(ESRCH);
1458         }
1459         mutex_exit(&os->os_user_ptr_lock);
1460         return (error);
1461 }
1462 
1463 int
1464 getzfsvfs(const char *dsname, zfsvfs_t **zfvp)
1465 {
1466         objset_t *os;
1467         int error;
1468 
1469         error = dmu_objset_hold(dsname, FTAG, &os);
1470         if (error != 0)
1471                 return (error);
1472 
1473         error = getzfsvfs_impl(os, zfvp);
1474         dmu_objset_rele(os, FTAG);
1475         return (error);
1476 }
1477 
1478 /*
1479  * Find a zfsvfs_t for a mounted filesystem, or create our own, in which
1480  * case its z_vfs will be NULL, and it will be opened as the owner.
1481  * If 'writer' is set, the z_teardown_lock will be held for RW_WRITER,
1482  * which prevents all vnode ops from running.
1483  */
1484 static int
1485 zfsvfs_hold(const char *name, void *tag, zfsvfs_t **zfvp, boolean_t writer)
1486 {
1487         int error = 0;
1488 
1489         if (getzfsvfs(name, zfvp) != 0)
1490                 error = zfsvfs_create(name, B_FALSE, zfvp);
1491         if (error == 0) {
1492                 rrm_enter(&(*zfvp)->z_teardown_lock, (writer) ? RW_WRITER :
1493                     RW_READER, tag);
1494                 if ((*zfvp)->z_unmounted) {
1495                         /*
1496                          * XXX we could probably try again, since the unmounting
1497                          * thread should be just about to disassociate the
1498                          * objset from the zfsvfs.
1499                          */
1500                         rrm_exit(&(*zfvp)->z_teardown_lock, tag);
1501                         return (SET_ERROR(EBUSY));
1502                 }
1503         }
1504         return (error);
1505 }
1506 
1507 static void
1508 zfsvfs_rele(zfsvfs_t *zfsvfs, void *tag)
1509 {
1510         rrm_exit(&zfsvfs->z_teardown_lock, tag);
1511 
1512         if (zfsvfs->z_vfs) {
1513                 VFS_RELE(zfsvfs->z_vfs);
1514         } else {
1515                 dmu_objset_disown(zfsvfs->z_os, B_TRUE, zfsvfs);
1516                 zfsvfs_free(zfsvfs);
1517         }
1518 }
1519 
1520 static int
1521 zfs_ioc_pool_create(zfs_cmd_t *zc)
1522 {
1523         int error;
1524         nvlist_t *config, *props = NULL;
1525         nvlist_t *rootprops = NULL;
1526         nvlist_t *zplprops = NULL;
1527         char *spa_name = zc->zc_name;
1528         dsl_crypto_params_t *dcp = NULL;
1529 
1530         if (error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
1531             zc->zc_iflags, &config))
1532                 return (error);
1533 
1534         if (zc->zc_nvlist_src_size != 0 && (error =
1535             get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
1536             zc->zc_iflags, &props))) {
1537                 nvlist_free(config);
1538                 return (error);
1539         }
1540 
1541         if (props) {
1542                 nvlist_t *nvl = NULL;
1543                 nvlist_t *hidden_args = NULL;
1544                 uint64_t version = SPA_VERSION;
1545                 char *tname;
1546 
1547                 (void) nvlist_lookup_uint64(props,
1548                     zpool_prop_to_name(ZPOOL_PROP_VERSION), &version);
1549                 if (!SPA_VERSION_IS_SUPPORTED(version)) {
1550                         error = SET_ERROR(EINVAL);
1551                         goto pool_props_bad;
1552                 }
1553                 (void) nvlist_lookup_nvlist(props, ZPOOL_ROOTFS_PROPS, &nvl);
1554                 if (nvl) {
1555                         error = nvlist_dup(nvl, &rootprops, KM_SLEEP);
1556                         if (error != 0) {
1557                                 nvlist_free(config);
1558                                 nvlist_free(props);
1559                                 return (error);
1560                         }
1561                         (void) nvlist_remove_all(props, ZPOOL_ROOTFS_PROPS);
1562                 }
1563 
1564                 (void) nvlist_lookup_nvlist(props, ZPOOL_HIDDEN_ARGS,
1565                     &hidden_args);
1566                 error = dsl_crypto_params_create_nvlist(DCP_CMD_NONE,
1567                     rootprops, hidden_args, &dcp);
1568                 if (error != 0) {
1569                         nvlist_free(config);
1570                         nvlist_free(props);
1571                         return (error);
1572                 }
1573                 (void) nvlist_remove_all(props, ZPOOL_HIDDEN_ARGS);
1574 
1575                 VERIFY(nvlist_alloc(&zplprops, NV_UNIQUE_NAME, KM_SLEEP) == 0);
1576                 error = zfs_fill_zplprops_root(version, rootprops,
1577                     zplprops, NULL);
1578                 if (error != 0)
1579                         goto pool_props_bad;
1580 
1581                 if (nvlist_lookup_string(props,
1582                     zpool_prop_to_name(ZPOOL_PROP_TNAME), &tname) == 0)
1583                         spa_name = tname;
1584         }
1585 
1586         error = spa_create(zc->zc_name, config, props, zplprops, dcp);
1587 
1588         /*
1589          * Set the remaining root properties
1590          */
1591         if (!error && (error = zfs_set_prop_nvlist(spa_name,
1592             ZPROP_SRC_LOCAL, rootprops, NULL)) != 0)
1593                 (void) spa_destroy(spa_name);
1594 
1595 pool_props_bad:
1596         nvlist_free(rootprops);
1597         nvlist_free(zplprops);
1598         nvlist_free(config);
1599         nvlist_free(props);
1600         dsl_crypto_params_free(dcp, !!error);
1601 
1602         return (error);
1603 }
1604 
1605 static int
1606 zfs_ioc_pool_destroy(zfs_cmd_t *zc)
1607 {
1608         int error;
1609         zfs_log_history(zc);
1610         error = spa_destroy(zc->zc_name);
1611         if (error == 0)
1612                 zvol_remove_minors(zc->zc_name);
1613         return (error);
1614 }
1615 
1616 static int
1617 zfs_ioc_pool_import(zfs_cmd_t *zc)
1618 {
1619         nvlist_t *config, *props = NULL;
1620         uint64_t guid;
1621         int error;
1622 
1623         if ((error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
1624             zc->zc_iflags, &config)) != 0)
1625                 return (error);
1626 
1627         if (zc->zc_nvlist_src_size != 0 && (error =
1628             get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
1629             zc->zc_iflags, &props))) {
1630                 nvlist_free(config);
1631                 return (error);
1632         }
1633 
1634         if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID, &guid) != 0 ||
1635             guid != zc->zc_guid)
1636                 error = SET_ERROR(EINVAL);
1637         else
1638                 error = spa_import(zc->zc_name, config, props, zc->zc_cookie);
1639 
1640         if (zc->zc_nvlist_dst != 0) {
1641                 int err;
1642 
1643                 if ((err = put_nvlist(zc, config)) != 0)
1644                         error = err;
1645         }
1646 
1647         nvlist_free(config);
1648 
1649         nvlist_free(props);
1650 
1651         return (error);
1652 }
1653 
1654 static int
1655 zfs_ioc_pool_export(zfs_cmd_t *zc)
1656 {
1657         int error;
1658         boolean_t force = (boolean_t)zc->zc_cookie;
1659         boolean_t hardforce = (boolean_t)zc->zc_guid;
1660 
1661         zfs_log_history(zc);
1662         error = spa_export(zc->zc_name, NULL, force, hardforce);
1663         if (error == 0)
1664                 zvol_remove_minors(zc->zc_name);
1665         return (error);
1666 }
1667 
1668 static int
1669 zfs_ioc_pool_configs(zfs_cmd_t *zc)
1670 {
1671         nvlist_t *configs;
1672         int error;
1673 
1674         if ((configs = spa_all_configs(&zc->zc_cookie)) == NULL)
1675                 return (SET_ERROR(EEXIST));
1676 
1677         error = put_nvlist(zc, configs);
1678 
1679         nvlist_free(configs);
1680 
1681         return (error);
1682 }
1683 
1684 /*
1685  * inputs:
1686  * zc_name              name of the pool
1687  *
1688  * outputs:
1689  * zc_cookie            real errno
1690  * zc_nvlist_dst        config nvlist
1691  * zc_nvlist_dst_size   size of config nvlist
1692  */
1693 static int
1694 zfs_ioc_pool_stats(zfs_cmd_t *zc)
1695 {
1696         nvlist_t *config;
1697         int error;
1698         int ret = 0;
1699 
1700         error = spa_get_stats(zc->zc_name, &config, zc->zc_value,
1701             sizeof (zc->zc_value));
1702 
1703         if (config != NULL) {
1704                 ret = put_nvlist(zc, config);
1705                 nvlist_free(config);
1706 
1707                 /*
1708                  * The config may be present even if 'error' is non-zero.
1709                  * In this case we return success, and preserve the real errno
1710                  * in 'zc_cookie'.
1711                  */
1712                 zc->zc_cookie = error;
1713         } else {
1714                 ret = error;
1715         }
1716 
1717         return (ret);
1718 }
1719 
1720 /*
1721  * Try to import the given pool, returning pool stats as appropriate so that
1722  * user land knows which devices are available and overall pool health.
1723  */
1724 static int
1725 zfs_ioc_pool_tryimport(zfs_cmd_t *zc)
1726 {
1727         nvlist_t *tryconfig, *config;
1728         int error;
1729 
1730         if ((error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
1731             zc->zc_iflags, &tryconfig)) != 0)
1732                 return (error);
1733 
1734         config = spa_tryimport(tryconfig);
1735 
1736         nvlist_free(tryconfig);
1737 
1738         if (config == NULL)
1739                 return (SET_ERROR(EINVAL));
1740 
1741         error = put_nvlist(zc, config);
1742         nvlist_free(config);
1743 
1744         return (error);
1745 }
1746 
1747 /*
1748  * inputs:
1749  * zc_name              name of the pool
1750  * zc_cookie            scan func (pool_scan_func_t)
1751  * zc_flags             scrub pause/resume flag (pool_scrub_cmd_t)
1752  */
1753 static int
1754 zfs_ioc_pool_scan(zfs_cmd_t *zc)
1755 {
1756         spa_t *spa;
1757         int error;
1758 
1759         if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
1760                 return (error);
1761 
1762         if (zc->zc_flags >= POOL_SCRUB_FLAGS_END)
1763                 return (SET_ERROR(EINVAL));
1764 
1765         if (zc->zc_flags == POOL_SCRUB_PAUSE)
1766                 error = spa_scrub_pause_resume(spa, POOL_SCRUB_PAUSE);
1767         else if (zc->zc_cookie == POOL_SCAN_NONE)
1768                 error = spa_scan_stop(spa);
1769         else
1770                 error = spa_scan(spa, zc->zc_cookie);
1771 
1772         spa_close(spa, FTAG);
1773 
1774         return (error);
1775 }
1776 
1777 static int
1778 zfs_ioc_pool_freeze(zfs_cmd_t *zc)
1779 {
1780         spa_t *spa;
1781         int error;
1782 
1783         error = spa_open(zc->zc_name, &spa, FTAG);
1784         if (error == 0) {
1785                 spa_freeze(spa);
1786                 spa_close(spa, FTAG);
1787         }
1788         return (error);
1789 }
1790 
1791 static int
1792 zfs_ioc_pool_upgrade(zfs_cmd_t *zc)
1793 {
1794         spa_t *spa;
1795         int error;
1796 
1797         if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
1798                 return (error);
1799 
1800         if (zc->zc_cookie < spa_version(spa) ||
1801             !SPA_VERSION_IS_SUPPORTED(zc->zc_cookie)) {
1802                 spa_close(spa, FTAG);
1803                 return (SET_ERROR(EINVAL));
1804         }
1805 
1806         spa_upgrade(spa, zc->zc_cookie);
1807         spa_close(spa, FTAG);
1808 
1809         return (error);
1810 }
1811 
1812 static int
1813 zfs_ioc_pool_get_history(zfs_cmd_t *zc)
1814 {
1815         spa_t *spa;
1816         char *hist_buf;
1817         uint64_t size;
1818         int error;
1819 
1820         if ((size = zc->zc_history_len) == 0)
1821                 return (SET_ERROR(EINVAL));
1822 
1823         if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
1824                 return (error);
1825 
1826         if (spa_version(spa) < SPA_VERSION_ZPOOL_HISTORY) {
1827                 spa_close(spa, FTAG);
1828                 return (SET_ERROR(ENOTSUP));
1829         }
1830 
1831         hist_buf = kmem_alloc(size, KM_SLEEP);
1832         if ((error = spa_history_get(spa, &zc->zc_history_offset,
1833             &zc->zc_history_len, hist_buf)) == 0) {
1834                 error = ddi_copyout(hist_buf,
1835                     (void *)(uintptr_t)zc->zc_history,
1836                     zc->zc_history_len, zc->zc_iflags);
1837         }
1838 
1839         spa_close(spa, FTAG);
1840         kmem_free(hist_buf, size);
1841         return (error);
1842 }
1843 
1844 static int
1845 zfs_ioc_pool_reguid(zfs_cmd_t *zc)
1846 {
1847         spa_t *spa;
1848         int error;
1849 
1850         error = spa_open(zc->zc_name, &spa, FTAG);
1851         if (error == 0) {
1852                 error = spa_change_guid(spa);
1853                 spa_close(spa, FTAG);
1854         }
1855         return (error);
1856 }
1857 
1858 static int
1859 zfs_ioc_dsobj_to_dsname(zfs_cmd_t *zc)
1860 {
1861         return (dsl_dsobj_to_dsname(zc->zc_name, zc->zc_obj, zc->zc_value));
1862 }
1863 
1864 /*
1865  * inputs:
1866  * zc_name              name of filesystem
1867  * zc_obj               object to find
1868  *
1869  * outputs:
1870  * zc_value             name of object
1871  */
1872 static int
1873 zfs_ioc_obj_to_path(zfs_cmd_t *zc)
1874 {
1875         objset_t *os;
1876         int error;
1877 
1878         /* XXX reading from objset not owned */
1879         if ((error = dmu_objset_hold_flags(zc->zc_name, B_TRUE,
1880             FTAG, &os)) != 0)
1881                 return (error);
1882         if (dmu_objset_type(os) != DMU_OST_ZFS) {
1883                 dmu_objset_rele_flags(os, B_TRUE, FTAG);
1884                 return (SET_ERROR(EINVAL));
1885         }
1886         error = zfs_obj_to_path(os, zc->zc_obj, zc->zc_value,
1887             sizeof (zc->zc_value));
1888         dmu_objset_rele_flags(os, B_TRUE, FTAG);
1889 
1890         return (error);
1891 }
1892 
1893 /*
1894  * inputs:
1895  * zc_name              name of filesystem
1896  * zc_obj               object to find
1897  *
1898  * outputs:
1899  * zc_stat              stats on object
1900  * zc_value             path to object
1901  */
1902 static int
1903 zfs_ioc_obj_to_stats(zfs_cmd_t *zc)
1904 {
1905         objset_t *os;
1906         int error;
1907 
1908         /* XXX reading from objset not owned */
1909         if ((error = dmu_objset_hold_flags(zc->zc_name, B_TRUE,
1910             FTAG, &os)) != 0)
1911                 return (error);
1912         if (dmu_objset_type(os) != DMU_OST_ZFS) {
1913                 dmu_objset_rele_flags(os, B_TRUE, FTAG);
1914                 return (SET_ERROR(EINVAL));
1915         }
1916         error = zfs_obj_to_stats(os, zc->zc_obj, &zc->zc_stat, zc->zc_value,
1917             sizeof (zc->zc_value));
1918         dmu_objset_rele_flags(os, B_TRUE, FTAG);
1919 
1920         return (error);
1921 }
1922 
1923 static int
1924 zfs_ioc_vdev_add(zfs_cmd_t *zc)
1925 {
1926         spa_t *spa;
1927         int error;
1928         nvlist_t *config, **l2cache, **spares;
1929         uint_t nl2cache = 0, nspares = 0;
1930 
1931         error = spa_open(zc->zc_name, &spa, FTAG);
1932         if (error != 0)
1933                 return (error);
1934 
1935         error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
1936             zc->zc_iflags, &config);
1937         (void) nvlist_lookup_nvlist_array(config, ZPOOL_CONFIG_L2CACHE,
1938             &l2cache, &nl2cache);
1939 
1940         (void) nvlist_lookup_nvlist_array(config, ZPOOL_CONFIG_SPARES,
1941             &spares, &nspares);
1942 
1943         /*
1944          * A root pool with concatenated devices is not supported.
1945          * Thus, can not add a device to a root pool.
1946          *
1947          * Intent log device can not be added to a rootpool because
1948          * during mountroot, zil is replayed, a seperated log device
1949          * can not be accessed during the mountroot time.
1950          *
1951          * l2cache and spare devices are ok to be added to a rootpool.
1952          */
1953         if (spa_bootfs(spa) != 0 && nl2cache == 0 && nspares == 0) {
1954                 nvlist_free(config);
1955                 spa_close(spa, FTAG);
1956                 return (SET_ERROR(EDOM));
1957         }
1958 
1959         if (error == 0) {
1960                 error = spa_vdev_add(spa, config);
1961                 nvlist_free(config);
1962         }
1963         spa_close(spa, FTAG);
1964         return (error);
1965 }
1966 
1967 /*
1968  * inputs:
1969  * zc_name              name of the pool
1970  * zc_guid              guid of vdev to remove
1971  * zc_cookie            cancel removal
1972  */
1973 static int
1974 zfs_ioc_vdev_remove(zfs_cmd_t *zc)
1975 {
1976         spa_t *spa;
1977         int error;
1978 
1979         error = spa_open(zc->zc_name, &spa, FTAG);
1980         if (error != 0)
1981                 return (error);
1982         if (zc->zc_cookie != 0) {
1983                 error = spa_vdev_remove_cancel(spa);
1984         } else {
1985                 error = spa_vdev_remove(spa, zc->zc_guid, B_FALSE);
1986         }
1987         spa_close(spa, FTAG);
1988         return (error);
1989 }
1990 
1991 static int
1992 zfs_ioc_vdev_set_state(zfs_cmd_t *zc)
1993 {
1994         spa_t *spa;
1995         int error;
1996         vdev_state_t newstate = VDEV_STATE_UNKNOWN;
1997 
1998         if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
1999                 return (error);
2000         switch (zc->zc_cookie) {
2001         case VDEV_STATE_ONLINE:
2002                 error = vdev_online(spa, zc->zc_guid, zc->zc_obj, &newstate);
2003                 break;
2004 
2005         case VDEV_STATE_OFFLINE:
2006                 error = vdev_offline(spa, zc->zc_guid, zc->zc_obj);
2007                 break;
2008 
2009         case VDEV_STATE_FAULTED:
2010                 if (zc->zc_obj != VDEV_AUX_ERR_EXCEEDED &&
2011                     zc->zc_obj != VDEV_AUX_EXTERNAL)
2012                         zc->zc_obj = VDEV_AUX_ERR_EXCEEDED;
2013 
2014                 error = vdev_fault(spa, zc->zc_guid, zc->zc_obj);
2015                 break;
2016 
2017         case VDEV_STATE_DEGRADED:
2018                 if (zc->zc_obj != VDEV_AUX_ERR_EXCEEDED &&
2019                     zc->zc_obj != VDEV_AUX_EXTERNAL)
2020                         zc->zc_obj = VDEV_AUX_ERR_EXCEEDED;
2021 
2022                 error = vdev_degrade(spa, zc->zc_guid, zc->zc_obj);
2023                 break;
2024 
2025         default:
2026                 error = SET_ERROR(EINVAL);
2027         }
2028         zc->zc_cookie = newstate;
2029         spa_close(spa, FTAG);
2030         return (error);
2031 }
2032 
2033 static int
2034 zfs_ioc_vdev_attach(zfs_cmd_t *zc)
2035 {
2036         spa_t *spa;
2037         int replacing = zc->zc_cookie;
2038         nvlist_t *config;
2039         int error;
2040 
2041         if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
2042                 return (error);
2043 
2044         if ((error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
2045             zc->zc_iflags, &config)) == 0) {
2046                 error = spa_vdev_attach(spa, zc->zc_guid, config, replacing);
2047                 nvlist_free(config);
2048         }
2049 
2050         spa_close(spa, FTAG);
2051         return (error);
2052 }
2053 
2054 static int
2055 zfs_ioc_vdev_detach(zfs_cmd_t *zc)
2056 {
2057         spa_t *spa;
2058         int error;
2059 
2060         if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
2061                 return (error);
2062 
2063         error = spa_vdev_detach(spa, zc->zc_guid, 0, B_FALSE);
2064 
2065         spa_close(spa, FTAG);
2066         return (error);
2067 }
2068 
2069 static int
2070 zfs_ioc_vdev_split(zfs_cmd_t *zc)
2071 {
2072         spa_t *spa;
2073         nvlist_t *config, *props = NULL;
2074         int error;
2075         boolean_t exp = !!(zc->zc_cookie & ZPOOL_EXPORT_AFTER_SPLIT);
2076 
2077         if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
2078                 return (error);
2079 
2080         if (error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
2081             zc->zc_iflags, &config)) {
2082                 spa_close(spa, FTAG);
2083                 return (error);
2084         }
2085 
2086         if (zc->zc_nvlist_src_size != 0 && (error =
2087             get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
2088             zc->zc_iflags, &props))) {
2089                 spa_close(spa, FTAG);
2090                 nvlist_free(config);
2091                 return (error);
2092         }
2093 
2094         error = spa_vdev_split_mirror(spa, zc->zc_string, config, props, exp);
2095 
2096         spa_close(spa, FTAG);
2097 
2098         nvlist_free(config);
2099         nvlist_free(props);
2100 
2101         return (error);
2102 }
2103 
2104 static int
2105 zfs_ioc_vdev_setpath(zfs_cmd_t *zc)
2106 {
2107         spa_t *spa;
2108         char *path = zc->zc_value;
2109         uint64_t guid = zc->zc_guid;
2110         int error;
2111 
2112         error = spa_open(zc->zc_name, &spa, FTAG);
2113         if (error != 0)
2114                 return (error);
2115 
2116         error = spa_vdev_setpath(spa, guid, path);
2117         spa_close(spa, FTAG);
2118         return (error);
2119 }
2120 
2121 static int
2122 zfs_ioc_vdev_setfru(zfs_cmd_t *zc)
2123 {
2124         spa_t *spa;
2125         char *fru = zc->zc_value;
2126         uint64_t guid = zc->zc_guid;
2127         int error;
2128 
2129         error = spa_open(zc->zc_name, &spa, FTAG);
2130         if (error != 0)
2131                 return (error);
2132 
2133         error = spa_vdev_setfru(spa, guid, fru);
2134         spa_close(spa, FTAG);
2135         return (error);
2136 }
2137 
2138 static int
2139 zfs_ioc_objset_stats_impl(zfs_cmd_t *zc, objset_t *os)
2140 {
2141         int error = 0;
2142         nvlist_t *nv;
2143 
2144         dmu_objset_fast_stat(os, &zc->zc_objset_stats);
2145 
2146         if (zc->zc_nvlist_dst != 0 &&
2147             (error = dsl_prop_get_all(os, &nv)) == 0) {
2148                 dmu_objset_stats(os, nv);
2149                 /*
2150                  * NB: zvol_get_stats() will read the objset contents,
2151                  * which we aren't supposed to do with a
2152                  * DS_MODE_USER hold, because it could be
2153                  * inconsistent.  So this is a bit of a workaround...
2154                  * XXX reading with out owning
2155                  */
2156                 if (!zc->zc_objset_stats.dds_inconsistent &&
2157                     dmu_objset_type(os) == DMU_OST_ZVOL) {
2158                         error = zvol_get_stats(os, nv);
2159                         if (error == EIO)
2160                                 return (error);
2161                         VERIFY0(error);
2162                 }
2163                 error = put_nvlist(zc, nv);
2164                 nvlist_free(nv);
2165         }
2166 
2167         return (error);
2168 }
2169 
2170 /*
2171  * inputs:
2172  * zc_name              name of filesystem
2173  * zc_nvlist_dst_size   size of buffer for property nvlist
2174  *
2175  * outputs:
2176  * zc_objset_stats      stats
2177  * zc_nvlist_dst        property nvlist
2178  * zc_nvlist_dst_size   size of property nvlist
2179  */
2180 static int
2181 zfs_ioc_objset_stats(zfs_cmd_t *zc)
2182 {
2183         objset_t *os;
2184         int error;
2185 
2186         error = dmu_objset_hold(zc->zc_name, FTAG, &os);
2187         if (error == 0) {
2188                 error = zfs_ioc_objset_stats_impl(zc, os);
2189                 dmu_objset_rele(os, FTAG);
2190         }
2191 
2192         return (error);
2193 }
2194 
2195 /*
2196  * inputs:
2197  * zc_name              name of filesystem
2198  * zc_nvlist_dst_size   size of buffer for property nvlist
2199  *
2200  * outputs:
2201  * zc_nvlist_dst        received property nvlist
2202  * zc_nvlist_dst_size   size of received property nvlist
2203  *
2204  * Gets received properties (distinct from local properties on or after
2205  * SPA_VERSION_RECVD_PROPS) for callers who want to differentiate received from
2206  * local property values.
2207  */
2208 static int
2209 zfs_ioc_objset_recvd_props(zfs_cmd_t *zc)
2210 {
2211         int error = 0;
2212         nvlist_t *nv;
2213 
2214         /*
2215          * Without this check, we would return local property values if the
2216          * caller has not already received properties on or after
2217          * SPA_VERSION_RECVD_PROPS.
2218          */
2219         if (!dsl_prop_get_hasrecvd(zc->zc_name))
2220                 return (SET_ERROR(ENOTSUP));
2221 
2222         if (zc->zc_nvlist_dst != 0 &&
2223             (error = dsl_prop_get_received(zc->zc_name, &nv)) == 0) {
2224                 error = put_nvlist(zc, nv);
2225                 nvlist_free(nv);
2226         }
2227 
2228         return (error);
2229 }
2230 
2231 static int
2232 nvl_add_zplprop(objset_t *os, nvlist_t *props, zfs_prop_t prop)
2233 {
2234         uint64_t value;
2235         int error;
2236 
2237         /*
2238          * zfs_get_zplprop() will either find a value or give us
2239          * the default value (if there is one).
2240          */
2241         if ((error = zfs_get_zplprop(os, prop, &value)) != 0)
2242                 return (error);
2243         VERIFY(nvlist_add_uint64(props, zfs_prop_to_name(prop), value) == 0);
2244         return (0);
2245 }
2246 
2247 /*
2248  * inputs:
2249  * zc_name              name of filesystem
2250  * zc_nvlist_dst_size   size of buffer for zpl property nvlist
2251  *
2252  * outputs:
2253  * zc_nvlist_dst        zpl property nvlist
2254  * zc_nvlist_dst_size   size of zpl property nvlist
2255  */
2256 static int
2257 zfs_ioc_objset_zplprops(zfs_cmd_t *zc)
2258 {
2259         objset_t *os;
2260         int err;
2261 
2262         /* XXX reading without owning */
2263         if (err = dmu_objset_hold(zc->zc_name, FTAG, &os))
2264                 return (err);
2265 
2266         dmu_objset_fast_stat(os, &zc->zc_objset_stats);
2267 
2268         /*
2269          * NB: nvl_add_zplprop() will read the objset contents,
2270          * which we aren't supposed to do with a DS_MODE_USER
2271          * hold, because it could be inconsistent.
2272          */
2273         if (zc->zc_nvlist_dst != 0 &&
2274             !zc->zc_objset_stats.dds_inconsistent &&
2275             dmu_objset_type(os) == DMU_OST_ZFS) {
2276                 nvlist_t *nv;
2277 
2278                 VERIFY(nvlist_alloc(&nv, NV_UNIQUE_NAME, KM_SLEEP) == 0);
2279                 if ((err = nvl_add_zplprop(os, nv, ZFS_PROP_VERSION)) == 0 &&
2280                     (err = nvl_add_zplprop(os, nv, ZFS_PROP_NORMALIZE)) == 0 &&
2281                     (err = nvl_add_zplprop(os, nv, ZFS_PROP_UTF8ONLY)) == 0 &&
2282                     (err = nvl_add_zplprop(os, nv, ZFS_PROP_CASE)) == 0)
2283                         err = put_nvlist(zc, nv);
2284                 nvlist_free(nv);
2285         } else {
2286                 err = SET_ERROR(ENOENT);
2287         }
2288         dmu_objset_rele(os, FTAG);
2289         return (err);
2290 }
2291 
2292 static boolean_t
2293 dataset_name_hidden(const char *name)
2294 {
2295         /*
2296          * Skip over datasets that are not visible in this zone,
2297          * internal datasets (which have a $ in their name), and
2298          * temporary datasets (which have a % in their name).
2299          */
2300         if (strchr(name, '$') != NULL)
2301                 return (B_TRUE);
2302         if (strchr(name, '%') != NULL)
2303                 return (B_TRUE);
2304         if (!INGLOBALZONE(curproc) && !zone_dataset_visible(name, NULL))
2305                 return (B_TRUE);
2306         return (B_FALSE);
2307 }
2308 
2309 /*
2310  * inputs:
2311  * zc_name              name of filesystem
2312  * zc_cookie            zap cursor
2313  * zc_nvlist_dst_size   size of buffer for property nvlist
2314  *
2315  * outputs:
2316  * zc_name              name of next filesystem
2317  * zc_cookie            zap cursor
2318  * zc_objset_stats      stats
2319  * zc_nvlist_dst        property nvlist
2320  * zc_nvlist_dst_size   size of property nvlist
2321  */
2322 static int
2323 zfs_ioc_dataset_list_next(zfs_cmd_t *zc)
2324 {
2325         objset_t *os;
2326         int error;
2327         char *p;
2328         size_t orig_len = strlen(zc->zc_name);
2329 
2330 top:
2331         if (error = dmu_objset_hold(zc->zc_name, FTAG, &os)) {
2332                 if (error == ENOENT)
2333                         error = SET_ERROR(ESRCH);
2334                 return (error);
2335         }
2336 
2337         p = strrchr(zc->zc_name, '/');
2338         if (p == NULL || p[1] != '\0')
2339                 (void) strlcat(zc->zc_name, "/", sizeof (zc->zc_name));
2340         p = zc->zc_name + strlen(zc->zc_name);
2341 
2342         do {
2343                 error = dmu_dir_list_next(os,
2344                     sizeof (zc->zc_name) - (p - zc->zc_name), p,
2345                     NULL, &zc->zc_cookie);
2346                 if (error == ENOENT)
2347                         error = SET_ERROR(ESRCH);
2348         } while (error == 0 && dataset_name_hidden(zc->zc_name));
2349         dmu_objset_rele(os, FTAG);
2350 
2351         /*
2352          * If it's an internal dataset (ie. with a '$' in its name),
2353          * don't try to get stats for it, otherwise we'll return ENOENT.
2354          */
2355         if (error == 0 && strchr(zc->zc_name, '$') == NULL) {
2356                 error = zfs_ioc_objset_stats(zc); /* fill in the stats */
2357                 if (error == ENOENT) {
2358                         /* We lost a race with destroy, get the next one. */
2359                         zc->zc_name[orig_len] = '\0';
2360                         goto top;
2361                 }
2362         }
2363         return (error);
2364 }
2365 
2366 /*
2367  * inputs:
2368  * zc_name              name of filesystem
2369  * zc_cookie            zap cursor
2370  * zc_nvlist_dst_size   size of buffer for property nvlist
2371  * zc_simple            when set, only name is requested
2372  *
2373  * outputs:
2374  * zc_name              name of next snapshot
2375  * zc_objset_stats      stats
2376  * zc_nvlist_dst        property nvlist
2377  * zc_nvlist_dst_size   size of property nvlist
2378  */
2379 static int
2380 zfs_ioc_snapshot_list_next(zfs_cmd_t *zc)
2381 {
2382         objset_t *os;
2383         int error;
2384 
2385         error = dmu_objset_hold(zc->zc_name, FTAG, &os);
2386         if (error != 0) {
2387                 return (error == ENOENT ? ESRCH : error);
2388         }
2389 
2390         /*
2391          * A dataset name of maximum length cannot have any snapshots,
2392          * so exit immediately.
2393          */
2394         if (strlcat(zc->zc_name, "@", sizeof (zc->zc_name)) >=
2395             ZFS_MAX_DATASET_NAME_LEN) {
2396                 dmu_objset_rele(os, FTAG);
2397                 return (SET_ERROR(ESRCH));
2398         }
2399 
2400         error = dmu_snapshot_list_next(os,
2401             sizeof (zc->zc_name) - strlen(zc->zc_name),
2402             zc->zc_name + strlen(zc->zc_name), &zc->zc_obj, &zc->zc_cookie,
2403             NULL);
2404 
2405         if (error == 0 && !zc->zc_simple) {
2406                 dsl_dataset_t *ds;
2407                 dsl_pool_t *dp = os->os_dsl_dataset->ds_dir->dd_pool;
2408 
2409                 error = dsl_dataset_hold_obj(dp, zc->zc_obj, FTAG, &ds);
2410                 if (error == 0) {
2411                         objset_t *ossnap;
2412 
2413                         error = dmu_objset_from_ds(ds, &ossnap);
2414                         if (error == 0)
2415                                 error = zfs_ioc_objset_stats_impl(zc, ossnap);
2416                         dsl_dataset_rele(ds, FTAG);
2417                 }
2418         } else if (error == ENOENT) {
2419                 error = SET_ERROR(ESRCH);
2420         }
2421 
2422         dmu_objset_rele(os, FTAG);
2423         /* if we failed, undo the @ that we tacked on to zc_name */
2424         if (error != 0)
2425                 *strchr(zc->zc_name, '@') = '\0';
2426         return (error);
2427 }
2428 
2429 static int
2430 zfs_prop_set_userquota(const char *dsname, nvpair_t *pair)
2431 {
2432         const char *propname = nvpair_name(pair);
2433         uint64_t *valary;
2434         unsigned int vallen;
2435         const char *domain;
2436         char *dash;
2437         zfs_userquota_prop_t type;
2438         uint64_t rid;
2439         uint64_t quota;
2440         zfsvfs_t *zfsvfs;
2441         int err;
2442 
2443         if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
2444                 nvlist_t *attrs;
2445                 VERIFY(nvpair_value_nvlist(pair, &attrs) == 0);
2446                 if (nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
2447                     &pair) != 0)
2448                         return (SET_ERROR(EINVAL));
2449         }
2450 
2451         /*
2452          * A correctly constructed propname is encoded as
2453          * userquota@<rid>-<domain>.
2454          */
2455         if ((dash = strchr(propname, '-')) == NULL ||
2456             nvpair_value_uint64_array(pair, &valary, &vallen) != 0 ||
2457             vallen != 3)
2458                 return (SET_ERROR(EINVAL));
2459 
2460         domain = dash + 1;
2461         type = valary[0];
2462         rid = valary[1];
2463         quota = valary[2];
2464 
2465         err = zfsvfs_hold(dsname, FTAG, &zfsvfs, B_FALSE);
2466         if (err == 0) {
2467                 err = zfs_set_userquota(zfsvfs, type, domain, rid, quota);
2468                 zfsvfs_rele(zfsvfs, FTAG);
2469         }
2470 
2471         return (err);
2472 }
2473 
2474 /*
2475  * If the named property is one that has a special function to set its value,
2476  * return 0 on success and a positive error code on failure; otherwise if it is
2477  * not one of the special properties handled by this function, return -1.
2478  *
2479  * XXX: It would be better for callers of the property interface if we handled
2480  * these special cases in dsl_prop.c (in the dsl layer).
2481  */
2482 static int
2483 zfs_prop_set_special(const char *dsname, zprop_source_t source,
2484     nvpair_t *pair)
2485 {
2486         const char *propname = nvpair_name(pair);
2487         zfs_prop_t prop = zfs_name_to_prop(propname);
2488         uint64_t intval = 0;
2489         char *strval = NULL;
2490         int err = -1;
2491 
2492         if (prop == ZPROP_INVAL) {
2493                 if (zfs_prop_userquota(propname))
2494                         return (zfs_prop_set_userquota(dsname, pair));
2495                 return (-1);
2496         }
2497 
2498         if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
2499                 nvlist_t *attrs;
2500                 VERIFY(nvpair_value_nvlist(pair, &attrs) == 0);
2501                 VERIFY(nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
2502                     &pair) == 0);
2503         }
2504 
2505         /* all special properties are numeric except for keylocation */
2506         if (zfs_prop_get_type(prop) == PROP_TYPE_STRING) {
2507                 strval = fnvpair_value_string(pair);
2508         } else {
2509                 intval = fnvpair_value_uint64(pair);
2510         }
2511 
2512         switch (prop) {
2513         case ZFS_PROP_QUOTA:
2514                 err = dsl_dir_set_quota(dsname, source, intval);
2515                 break;
2516         case ZFS_PROP_REFQUOTA:
2517                 err = dsl_dataset_set_refquota(dsname, source, intval);
2518                 break;
2519         case ZFS_PROP_FILESYSTEM_LIMIT:
2520         case ZFS_PROP_SNAPSHOT_LIMIT:
2521                 if (intval == UINT64_MAX) {
2522                         /* clearing the limit, just do it */
2523                         err = 0;
2524                 } else {
2525                         err = dsl_dir_activate_fs_ss_limit(dsname);
2526                 }
2527                 /*
2528                  * Set err to -1 to force the zfs_set_prop_nvlist code down the
2529                  * default path to set the value in the nvlist.
2530                  */
2531                 if (err == 0)
2532                         err = -1;
2533                 break;
2534         case ZFS_PROP_KEYLOCATION:
2535                 err = dsl_crypto_can_set_keylocation(dsname, strval);
2536 
2537                 /*
2538                  * Set err to -1 to force the zfs_set_prop_nvlist code down the
2539                  * default path to set the value in the nvlist.
2540                  */
2541                 if (err == 0)
2542                         err = -1;
2543                 break;
2544         case ZFS_PROP_RESERVATION:
2545                 err = dsl_dir_set_reservation(dsname, source, intval);
2546                 break;
2547         case ZFS_PROP_REFRESERVATION:
2548                 err = dsl_dataset_set_refreservation(dsname, source, intval);
2549                 break;
2550         case ZFS_PROP_VOLSIZE:
2551                 err = zvol_set_volsize(dsname, intval);
2552                 break;
2553         case ZFS_PROP_VERSION:
2554         {
2555                 zfsvfs_t *zfsvfs;
2556 
2557                 if ((err = zfsvfs_hold(dsname, FTAG, &zfsvfs, B_TRUE)) != 0)
2558                         break;
2559 
2560                 err = zfs_set_version(zfsvfs, intval);
2561                 zfsvfs_rele(zfsvfs, FTAG);
2562 
2563                 if (err == 0 && intval >= ZPL_VERSION_USERSPACE) {
2564                         zfs_cmd_t *zc;
2565 
2566                         zc = kmem_zalloc(sizeof (zfs_cmd_t), KM_SLEEP);
2567                         (void) strcpy(zc->zc_name, dsname);
2568                         (void) zfs_ioc_userspace_upgrade(zc);
2569                         (void) zfs_ioc_id_quota_upgrade(zc);
2570                         kmem_free(zc, sizeof (zfs_cmd_t));
2571                 }
2572                 break;
2573         }
2574         default:
2575                 err = -1;
2576         }
2577 
2578         return (err);
2579 }
2580 
2581 /*
2582  * This function is best effort. If it fails to set any of the given properties,
2583  * it continues to set as many as it can and returns the last error
2584  * encountered. If the caller provides a non-NULL errlist, it will be filled in
2585  * with the list of names of all the properties that failed along with the
2586  * corresponding error numbers.
2587  *
2588  * If every property is set successfully, zero is returned and errlist is not
2589  * modified.
2590  */
2591 int
2592 zfs_set_prop_nvlist(const char *dsname, zprop_source_t source, nvlist_t *nvl,
2593     nvlist_t *errlist)
2594 {
2595         nvpair_t *pair;
2596         nvpair_t *propval;
2597         int rv = 0;
2598         uint64_t intval;
2599         char *strval;
2600         nvlist_t *genericnvl = fnvlist_alloc();
2601         nvlist_t *retrynvl = fnvlist_alloc();
2602 
2603 retry:
2604         pair = NULL;
2605         while ((pair = nvlist_next_nvpair(nvl, pair)) != NULL) {
2606                 const char *propname = nvpair_name(pair);
2607                 zfs_prop_t prop = zfs_name_to_prop(propname);
2608                 int err = 0;
2609 
2610                 /* decode the property value */
2611                 propval = pair;
2612                 if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
2613                         nvlist_t *attrs;
2614                         attrs = fnvpair_value_nvlist(pair);
2615                         if (nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
2616                             &propval) != 0)
2617                                 err = SET_ERROR(EINVAL);
2618                 }
2619 
2620                 /* Validate value type */
2621                 if (err == 0 && source == ZPROP_SRC_INHERITED) {
2622                         /* inherited properties are expected to be booleans */
2623                         if (nvpair_type(propval) != DATA_TYPE_BOOLEAN)
2624                                 err = SET_ERROR(EINVAL);
2625                 } else if (err == 0 && prop == ZPROP_INVAL) {
2626                         if (zfs_prop_user(propname)) {
2627                                 if (nvpair_type(propval) != DATA_TYPE_STRING)
2628                                         err = SET_ERROR(EINVAL);
2629                         } else if (zfs_prop_userquota(propname)) {
2630                                 if (nvpair_type(propval) !=
2631                                     DATA_TYPE_UINT64_ARRAY)
2632                                         err = SET_ERROR(EINVAL);
2633                         } else {
2634                                 err = SET_ERROR(EINVAL);
2635                         }
2636                 } else if (err == 0) {
2637                         if (nvpair_type(propval) == DATA_TYPE_STRING) {
2638                                 if (zfs_prop_get_type(prop) != PROP_TYPE_STRING)
2639                                         err = SET_ERROR(EINVAL);
2640                         } else if (nvpair_type(propval) == DATA_TYPE_UINT64) {
2641                                 const char *unused;
2642 
2643                                 intval = fnvpair_value_uint64(propval);
2644 
2645                                 switch (zfs_prop_get_type(prop)) {
2646                                 case PROP_TYPE_NUMBER:
2647                                         break;
2648                                 case PROP_TYPE_STRING:
2649                                         err = SET_ERROR(EINVAL);
2650                                         break;
2651                                 case PROP_TYPE_INDEX:
2652                                         if (zfs_prop_index_to_string(prop,
2653                                             intval, &unused) != 0)
2654                                                 err = SET_ERROR(EINVAL);
2655                                         break;
2656                                 default:
2657                                         cmn_err(CE_PANIC,
2658                                             "unknown property type");
2659                                 }
2660                         } else {
2661                                 err = SET_ERROR(EINVAL);
2662                         }
2663                 }
2664 
2665                 /* Validate permissions */
2666                 if (err == 0)
2667                         err = zfs_check_settable(dsname, pair, CRED());
2668 
2669                 if (err == 0) {
2670                         if (source == ZPROP_SRC_INHERITED)
2671                                 err = -1; /* does not need special handling */
2672                         else
2673                                 err = zfs_prop_set_special(dsname, source,
2674                                     pair);
2675                         if (err == -1) {
2676                                 /*
2677                                  * For better performance we build up a list of
2678                                  * properties to set in a single transaction.
2679                                  */
2680                                 err = nvlist_add_nvpair(genericnvl, pair);
2681                         } else if (err != 0 && nvl != retrynvl) {
2682                                 /*
2683                                  * This may be a spurious error caused by
2684                                  * receiving quota and reservation out of order.
2685                                  * Try again in a second pass.
2686                                  */
2687                                 err = nvlist_add_nvpair(retrynvl, pair);
2688                         }
2689                 }
2690 
2691                 if (err != 0) {
2692                         if (errlist != NULL)
2693                                 fnvlist_add_int32(errlist, propname, err);
2694                         rv = err;
2695                 }
2696         }
2697 
2698         if (nvl != retrynvl && !nvlist_empty(retrynvl)) {
2699                 nvl = retrynvl;
2700                 goto retry;
2701         }
2702 
2703         if (!nvlist_empty(genericnvl) &&
2704             dsl_props_set(dsname, source, genericnvl) != 0) {
2705                 /*
2706                  * If this fails, we still want to set as many properties as we
2707                  * can, so try setting them individually.
2708                  */
2709                 pair = NULL;
2710                 while ((pair = nvlist_next_nvpair(genericnvl, pair)) != NULL) {
2711                         const char *propname = nvpair_name(pair);
2712                         int err = 0;
2713 
2714                         propval = pair;
2715                         if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
2716                                 nvlist_t *attrs;
2717                                 attrs = fnvpair_value_nvlist(pair);
2718                                 propval = fnvlist_lookup_nvpair(attrs,
2719                                     ZPROP_VALUE);
2720                         }
2721 
2722                         if (nvpair_type(propval) == DATA_TYPE_STRING) {
2723                                 strval = fnvpair_value_string(propval);
2724                                 err = dsl_prop_set_string(dsname, propname,
2725                                     source, strval);
2726                         } else if (nvpair_type(propval) == DATA_TYPE_BOOLEAN) {
2727                                 err = dsl_prop_inherit(dsname, propname,
2728                                     source);
2729                         } else {
2730                                 intval = fnvpair_value_uint64(propval);
2731                                 err = dsl_prop_set_int(dsname, propname, source,
2732                                     intval);
2733                         }
2734 
2735                         if (err != 0) {
2736                                 if (errlist != NULL) {
2737                                         fnvlist_add_int32(errlist, propname,
2738                                             err);
2739                                 }
2740                                 rv = err;
2741                         }
2742                 }
2743         }
2744         nvlist_free(genericnvl);
2745         nvlist_free(retrynvl);
2746 
2747         return (rv);
2748 }
2749 
2750 /*
2751  * Check that all the properties are valid user properties.
2752  */
2753 static int
2754 zfs_check_userprops(const char *fsname, nvlist_t *nvl)
2755 {
2756         nvpair_t *pair = NULL;
2757         int error = 0;
2758 
2759         while ((pair = nvlist_next_nvpair(nvl, pair)) != NULL) {
2760                 const char *propname = nvpair_name(pair);
2761 
2762                 if (!zfs_prop_user(propname) ||
2763                     nvpair_type(pair) != DATA_TYPE_STRING)
2764                         return (SET_ERROR(EINVAL));
2765 
2766                 if (error = zfs_secpolicy_write_perms(fsname,
2767                     ZFS_DELEG_PERM_USERPROP, CRED()))
2768                         return (error);
2769 
2770                 if (strlen(propname) >= ZAP_MAXNAMELEN)
2771                         return (SET_ERROR(ENAMETOOLONG));
2772 
2773                 if (strlen(fnvpair_value_string(pair)) >= ZAP_MAXVALUELEN)
2774                         return (E2BIG);
2775         }
2776         return (0);
2777 }
2778 
2779 static void
2780 props_skip(nvlist_t *props, nvlist_t *skipped, nvlist_t **newprops)
2781 {
2782         nvpair_t *pair;
2783 
2784         VERIFY(nvlist_alloc(newprops, NV_UNIQUE_NAME, KM_SLEEP) == 0);
2785 
2786         pair = NULL;
2787         while ((pair = nvlist_next_nvpair(props, pair)) != NULL) {
2788                 if (nvlist_exists(skipped, nvpair_name(pair)))
2789                         continue;
2790 
2791                 VERIFY(nvlist_add_nvpair(*newprops, pair) == 0);
2792         }
2793 }
2794 
2795 static int
2796 clear_received_props(const char *dsname, nvlist_t *props,
2797     nvlist_t *skipped)
2798 {
2799         int err = 0;
2800         nvlist_t *cleared_props = NULL;
2801         props_skip(props, skipped, &cleared_props);
2802         if (!nvlist_empty(cleared_props)) {
2803                 /*
2804                  * Acts on local properties until the dataset has received
2805                  * properties at least once on or after SPA_VERSION_RECVD_PROPS.
2806                  */
2807                 zprop_source_t flags = (ZPROP_SRC_NONE |
2808                     (dsl_prop_get_hasrecvd(dsname) ? ZPROP_SRC_RECEIVED : 0));
2809                 err = zfs_set_prop_nvlist(dsname, flags, cleared_props, NULL);
2810         }
2811         nvlist_free(cleared_props);
2812         return (err);
2813 }
2814 
2815 /*
2816  * inputs:
2817  * zc_name              name of filesystem
2818  * zc_value             name of property to set
2819  * zc_nvlist_src{_size} nvlist of properties to apply
2820  * zc_cookie            received properties flag
2821  *
2822  * outputs:
2823  * zc_nvlist_dst{_size} error for each unapplied received property
2824  */
2825 static int
2826 zfs_ioc_set_prop(zfs_cmd_t *zc)
2827 {
2828         nvlist_t *nvl;
2829         boolean_t received = zc->zc_cookie;
2830         zprop_source_t source = (received ? ZPROP_SRC_RECEIVED :
2831             ZPROP_SRC_LOCAL);
2832         nvlist_t *errors;
2833         int error;
2834 
2835         if ((error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
2836             zc->zc_iflags, &nvl)) != 0)
2837                 return (error);
2838 
2839         if (received) {
2840                 nvlist_t *origprops;
2841 
2842                 if (dsl_prop_get_received(zc->zc_name, &origprops) == 0) {
2843                         (void) clear_received_props(zc->zc_name,
2844                             origprops, nvl);
2845                         nvlist_free(origprops);
2846                 }
2847 
2848                 error = dsl_prop_set_hasrecvd(zc->zc_name);
2849         }
2850 
2851         errors = fnvlist_alloc();
2852         if (error == 0)
2853                 error = zfs_set_prop_nvlist(zc->zc_name, source, nvl, errors);
2854 
2855         if (zc->zc_nvlist_dst != 0 && errors != NULL) {
2856                 (void) put_nvlist(zc, errors);
2857         }
2858 
2859         nvlist_free(errors);
2860         nvlist_free(nvl);
2861         return (error);
2862 }
2863 
2864 /*
2865  * inputs:
2866  * zc_name              name of filesystem
2867  * zc_value             name of property to inherit
2868  * zc_cookie            revert to received value if TRUE
2869  *
2870  * outputs:             none
2871  */
2872 static int
2873 zfs_ioc_inherit_prop(zfs_cmd_t *zc)
2874 {
2875         const char *propname = zc->zc_value;
2876         zfs_prop_t prop = zfs_name_to_prop(propname);
2877         boolean_t received = zc->zc_cookie;
2878         zprop_source_t source = (received
2879             ? ZPROP_SRC_NONE            /* revert to received value, if any */
2880             : ZPROP_SRC_INHERITED);     /* explicitly inherit */
2881 
2882         if (received) {
2883                 nvlist_t *dummy;
2884                 nvpair_t *pair;
2885                 zprop_type_t type;
2886                 int err;
2887 
2888                 /*
2889                  * zfs_prop_set_special() expects properties in the form of an
2890                  * nvpair with type info.
2891                  */
2892                 if (prop == ZPROP_INVAL) {
2893                         if (!zfs_prop_user(propname))
2894                                 return (SET_ERROR(EINVAL));
2895 
2896                         type = PROP_TYPE_STRING;
2897                 } else if (prop == ZFS_PROP_VOLSIZE ||
2898                     prop == ZFS_PROP_VERSION) {
2899                         return (SET_ERROR(EINVAL));
2900                 } else {
2901                         type = zfs_prop_get_type(prop);
2902                 }
2903 
2904                 VERIFY(nvlist_alloc(&dummy, NV_UNIQUE_NAME, KM_SLEEP) == 0);
2905 
2906                 switch (type) {
2907                 case PROP_TYPE_STRING:
2908                         VERIFY(0 == nvlist_add_string(dummy, propname, ""));
2909                         break;
2910                 case PROP_TYPE_NUMBER:
2911                 case PROP_TYPE_INDEX:
2912                         VERIFY(0 == nvlist_add_uint64(dummy, propname, 0));
2913                         break;
2914                 default:
2915                         nvlist_free(dummy);
2916                         return (SET_ERROR(EINVAL));
2917                 }
2918 
2919                 pair = nvlist_next_nvpair(dummy, NULL);
2920                 err = zfs_prop_set_special(zc->zc_name, source, pair);
2921                 nvlist_free(dummy);
2922                 if (err != -1)
2923                         return (err); /* special property already handled */
2924         } else {
2925                 /*
2926                  * Only check this in the non-received case. We want to allow
2927                  * 'inherit -S' to revert non-inheritable properties like quota
2928                  * and reservation to the received or default values even though
2929                  * they are not considered inheritable.
2930                  */
2931                 if (prop != ZPROP_INVAL && !zfs_prop_inheritable(prop))
2932                         return (SET_ERROR(EINVAL));
2933         }
2934 
2935         /* property name has been validated by zfs_secpolicy_inherit_prop() */
2936         return (dsl_prop_inherit(zc->zc_name, zc->zc_value, source));
2937 }
2938 
2939 static int
2940 zfs_ioc_pool_set_props(zfs_cmd_t *zc)
2941 {
2942         nvlist_t *props;
2943         spa_t *spa;
2944         int error;
2945         nvpair_t *pair;
2946 
2947         if (error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
2948             zc->zc_iflags, &props))
2949                 return (error);
2950 
2951         /*
2952          * If the only property is the configfile, then just do a spa_lookup()
2953          * to handle the faulted case.
2954          */
2955         pair = nvlist_next_nvpair(props, NULL);
2956         if (pair != NULL && strcmp(nvpair_name(pair),
2957             zpool_prop_to_name(ZPOOL_PROP_CACHEFILE)) == 0 &&
2958             nvlist_next_nvpair(props, pair) == NULL) {
2959                 mutex_enter(&spa_namespace_lock);
2960                 if ((spa = spa_lookup(zc->zc_name)) != NULL) {
2961                         spa_configfile_set(spa, props, B_FALSE);
2962                         spa_write_cachefile(spa, B_FALSE, B_TRUE);
2963                 }
2964                 mutex_exit(&spa_namespace_lock);
2965                 if (spa != NULL) {
2966                         nvlist_free(props);
2967                         return (0);
2968                 }
2969         }
2970 
2971         if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0) {
2972                 nvlist_free(props);
2973                 return (error);
2974         }
2975 
2976         error = spa_prop_set(spa, props);
2977 
2978         nvlist_free(props);
2979         spa_close(spa, FTAG);
2980 
2981         return (error);
2982 }
2983 
2984 static int
2985 zfs_ioc_pool_get_props(zfs_cmd_t *zc)
2986 {
2987         spa_t *spa;
2988         int error;
2989         nvlist_t *nvp = NULL;
2990 
2991         if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0) {
2992                 /*
2993                  * If the pool is faulted, there may be properties we can still
2994                  * get (such as altroot and cachefile), so attempt to get them
2995                  * anyway.
2996                  */
2997                 mutex_enter(&spa_namespace_lock);
2998                 if ((spa = spa_lookup(zc->zc_name)) != NULL)
2999                         error = spa_prop_get(spa, &nvp);
3000                 mutex_exit(&spa_namespace_lock);
3001         } else {
3002                 error = spa_prop_get(spa, &nvp);
3003                 spa_close(spa, FTAG);
3004         }
3005 
3006         if (error == 0 && zc->zc_nvlist_dst != 0)
3007                 error = put_nvlist(zc, nvp);
3008         else
3009                 error = SET_ERROR(EFAULT);
3010 
3011         nvlist_free(nvp);
3012         return (error);
3013 }
3014 
3015 /*
3016  * inputs:
3017  * zc_name              name of filesystem
3018  * zc_nvlist_src{_size} nvlist of delegated permissions
3019  * zc_perm_action       allow/unallow flag
3020  *
3021  * outputs:             none
3022  */
3023 static int
3024 zfs_ioc_set_fsacl(zfs_cmd_t *zc)
3025 {
3026         int error;
3027         nvlist_t *fsaclnv = NULL;
3028 
3029         if ((error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
3030             zc->zc_iflags, &fsaclnv)) != 0)
3031                 return (error);
3032 
3033         /*
3034          * Verify nvlist is constructed correctly
3035          */
3036         if ((error = zfs_deleg_verify_nvlist(fsaclnv)) != 0) {
3037                 nvlist_free(fsaclnv);
3038                 return (SET_ERROR(EINVAL));
3039         }
3040 
3041         /*
3042          * If we don't have PRIV_SYS_MOUNT, then validate
3043          * that user is allowed to hand out each permission in
3044          * the nvlist(s)
3045          */
3046 
3047         error = secpolicy_zfs(CRED());
3048         if (error != 0) {
3049                 if (zc->zc_perm_action == B_FALSE) {
3050                         error = dsl_deleg_can_allow(zc->zc_name,
3051                             fsaclnv, CRED());
3052                 } else {
3053                         error = dsl_deleg_can_unallow(zc->zc_name,
3054                             fsaclnv, CRED());
3055                 }
3056         }
3057 
3058         if (error == 0)
3059                 error = dsl_deleg_set(zc->zc_name, fsaclnv, zc->zc_perm_action);
3060 
3061         nvlist_free(fsaclnv);
3062         return (error);
3063 }
3064 
3065 /*
3066  * inputs:
3067  * zc_name              name of filesystem
3068  *
3069  * outputs:
3070  * zc_nvlist_src{_size} nvlist of delegated permissions
3071  */
3072 static int
3073 zfs_ioc_get_fsacl(zfs_cmd_t *zc)
3074 {
3075         nvlist_t *nvp;
3076         int error;
3077 
3078         if ((error = dsl_deleg_get(zc->zc_name, &nvp)) == 0) {
3079                 error = put_nvlist(zc, nvp);
3080                 nvlist_free(nvp);
3081         }
3082 
3083         return (error);
3084 }
3085 
3086 /* ARGSUSED */
3087 static void
3088 zfs_create_cb(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx)
3089 {
3090         zfs_creat_t *zct = arg;
3091 
3092         zfs_create_fs(os, cr, zct->zct_zplprops, tx);
3093 }
3094 
3095 #define ZFS_PROP_UNDEFINED      ((uint64_t)-1)
3096 
3097 /*
3098  * inputs:
3099  * os                   parent objset pointer (NULL if root fs)
3100  * fuids_ok             fuids allowed in this version of the spa?
3101  * sa_ok                SAs allowed in this version of the spa?
3102  * createprops          list of properties requested by creator
3103  *
3104  * outputs:
3105  * zplprops     values for the zplprops we attach to the master node object
3106  * is_ci        true if requested file system will be purely case-insensitive
3107  *
3108  * Determine the settings for utf8only, normalization and
3109  * casesensitivity.  Specific values may have been requested by the
3110  * creator and/or we can inherit values from the parent dataset.  If
3111  * the file system is of too early a vintage, a creator can not
3112  * request settings for these properties, even if the requested
3113  * setting is the default value.  We don't actually want to create dsl
3114  * properties for these, so remove them from the source nvlist after
3115  * processing.
3116  */
3117 static int
3118 zfs_fill_zplprops_impl(objset_t *os, uint64_t zplver,
3119     boolean_t fuids_ok, boolean_t sa_ok, nvlist_t *createprops,
3120     nvlist_t *zplprops, boolean_t *is_ci)
3121 {
3122         uint64_t sense = ZFS_PROP_UNDEFINED;
3123         uint64_t norm = ZFS_PROP_UNDEFINED;
3124         uint64_t u8 = ZFS_PROP_UNDEFINED;
3125 
3126         ASSERT(zplprops != NULL);
3127 
3128         if (os != NULL && os->os_phys->os_type != DMU_OST_ZFS)
3129                 return (SET_ERROR(EINVAL));
3130 
3131         /*
3132          * Pull out creator prop choices, if any.
3133          */
3134         if (createprops) {
3135                 (void) nvlist_lookup_uint64(createprops,
3136                     zfs_prop_to_name(ZFS_PROP_VERSION), &zplver);
3137                 (void) nvlist_lookup_uint64(createprops,
3138                     zfs_prop_to_name(ZFS_PROP_NORMALIZE), &norm);
3139                 (void) nvlist_remove_all(createprops,
3140                     zfs_prop_to_name(ZFS_PROP_NORMALIZE));
3141                 (void) nvlist_lookup_uint64(createprops,
3142                     zfs_prop_to_name(ZFS_PROP_UTF8ONLY), &u8);
3143                 (void) nvlist_remove_all(createprops,
3144                     zfs_prop_to_name(ZFS_PROP_UTF8ONLY));
3145                 (void) nvlist_lookup_uint64(createprops,
3146                     zfs_prop_to_name(ZFS_PROP_CASE), &sense);
3147                 (void) nvlist_remove_all(createprops,
3148                     zfs_prop_to_name(ZFS_PROP_CASE));
3149         }
3150 
3151         /*
3152          * If the zpl version requested is whacky or the file system
3153          * or pool is version is too "young" to support normalization
3154          * and the creator tried to set a value for one of the props,
3155          * error out.
3156          */
3157         if ((zplver < ZPL_VERSION_INITIAL || zplver > ZPL_VERSION) ||
3158             (zplver >= ZPL_VERSION_FUID && !fuids_ok) ||
3159             (zplver >= ZPL_VERSION_SA && !sa_ok) ||
3160             (zplver < ZPL_VERSION_NORMALIZATION &&
3161             (norm != ZFS_PROP_UNDEFINED || u8 != ZFS_PROP_UNDEFINED ||
3162             sense != ZFS_PROP_UNDEFINED)))
3163                 return (SET_ERROR(ENOTSUP));
3164 
3165         /*
3166          * Put the version in the zplprops
3167          */
3168         VERIFY(nvlist_add_uint64(zplprops,
3169             zfs_prop_to_name(ZFS_PROP_VERSION), zplver) == 0);
3170 
3171         if (norm == ZFS_PROP_UNDEFINED)
3172                 VERIFY(zfs_get_zplprop(os, ZFS_PROP_NORMALIZE, &norm) == 0);
3173         VERIFY(nvlist_add_uint64(zplprops,
3174             zfs_prop_to_name(ZFS_PROP_NORMALIZE), norm) == 0);
3175 
3176         /*
3177          * If we're normalizing, names must always be valid UTF-8 strings.
3178          */
3179         if (norm)
3180                 u8 = 1;
3181         if (u8 == ZFS_PROP_UNDEFINED)
3182                 VERIFY(zfs_get_zplprop(os, ZFS_PROP_UTF8ONLY, &u8) == 0);
3183         VERIFY(nvlist_add_uint64(zplprops,
3184             zfs_prop_to_name(ZFS_PROP_UTF8ONLY), u8) == 0);
3185 
3186         if (sense == ZFS_PROP_UNDEFINED)
3187                 VERIFY(zfs_get_zplprop(os, ZFS_PROP_CASE, &sense) == 0);
3188         VERIFY(nvlist_add_uint64(zplprops,
3189             zfs_prop_to_name(ZFS_PROP_CASE), sense) == 0);
3190 
3191         if (is_ci)
3192                 *is_ci = (sense == ZFS_CASE_INSENSITIVE);
3193 
3194         return (0);
3195 }
3196 
3197 static int
3198 zfs_fill_zplprops(const char *dataset, nvlist_t *createprops,
3199     nvlist_t *zplprops, boolean_t *is_ci)
3200 {
3201         boolean_t fuids_ok, sa_ok;
3202         uint64_t zplver = ZPL_VERSION;
3203         objset_t *os = NULL;
3204         char parentname[ZFS_MAX_DATASET_NAME_LEN];
3205         char *cp;
3206         spa_t *spa;
3207         uint64_t spa_vers;
3208         int error;
3209 
3210         (void) strlcpy(parentname, dataset, sizeof (parentname));
3211         cp = strrchr(parentname, '/');
3212         ASSERT(cp != NULL);
3213         cp[0] = '\0';
3214 
3215         if ((error = spa_open(dataset, &spa, FTAG)) != 0)
3216                 return (error);
3217 
3218         spa_vers = spa_version(spa);
3219         spa_close(spa, FTAG);
3220 
3221         zplver = zfs_zpl_version_map(spa_vers);
3222         fuids_ok = (zplver >= ZPL_VERSION_FUID);
3223         sa_ok = (zplver >= ZPL_VERSION_SA);
3224 
3225         /*
3226          * Open parent object set so we can inherit zplprop values.
3227          */
3228         if ((error = dmu_objset_hold(parentname, FTAG, &os)) != 0)
3229                 return (error);
3230 
3231         error = zfs_fill_zplprops_impl(os, zplver, fuids_ok, sa_ok, createprops,
3232             zplprops, is_ci);
3233         dmu_objset_rele(os, FTAG);
3234         return (error);
3235 }
3236 
3237 static int
3238 zfs_fill_zplprops_root(uint64_t spa_vers, nvlist_t *createprops,
3239     nvlist_t *zplprops, boolean_t *is_ci)
3240 {
3241         boolean_t fuids_ok;
3242         boolean_t sa_ok;
3243         uint64_t zplver = ZPL_VERSION;
3244         int error;
3245 
3246         zplver = zfs_zpl_version_map(spa_vers);
3247         fuids_ok = (zplver >= ZPL_VERSION_FUID);
3248         sa_ok = (zplver >= ZPL_VERSION_SA);
3249 
3250         error = zfs_fill_zplprops_impl(NULL, zplver, fuids_ok, sa_ok,
3251             createprops, zplprops, is_ci);
3252         return (error);
3253 }
3254 
3255 /*
3256  * innvl: {
3257  *     "type" -> dmu_objset_type_t (int32)
3258  *     (optional) "props" -> { prop -> value }
3259  *     (optional) "hidden_args" -> { "wkeydata" -> value }
3260  *         raw uint8_t array of encryption wrapping key data (32 bytes)
3261  * }
3262  *
3263  * outnvl: propname -> error code (int32)
3264  */
3265 static int
3266 zfs_ioc_create(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl)
3267 {
3268         int error = 0;
3269         zfs_creat_t zct = { 0 };
3270         nvlist_t *nvprops = NULL;
3271         nvlist_t *hidden_args = NULL;
3272         void (*cbfunc)(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx);
3273         int32_t type32;
3274         dmu_objset_type_t type;
3275         boolean_t is_insensitive = B_FALSE;
3276         dsl_crypto_params_t *dcp = NULL;
3277 
3278         if (nvlist_lookup_int32(innvl, "type", &type32) != 0)
3279                 return (SET_ERROR(EINVAL));
3280         type = type32;
3281         (void) nvlist_lookup_nvlist(innvl, "props", &nvprops);
3282         (void) nvlist_lookup_nvlist(innvl, ZPOOL_HIDDEN_ARGS, &hidden_args);
3283 
3284         switch (type) {
3285         case DMU_OST_ZFS:
3286                 cbfunc = zfs_create_cb;
3287                 break;
3288 
3289         case DMU_OST_ZVOL:
3290                 cbfunc = zvol_create_cb;
3291                 break;
3292 
3293         default:
3294                 cbfunc = NULL;
3295                 break;
3296         }
3297         if (strchr(fsname, '@') ||
3298             strchr(fsname, '%'))
3299                 return (SET_ERROR(EINVAL));
3300 
3301         zct.zct_props = nvprops;
3302 
3303         if (cbfunc == NULL)
3304                 return (SET_ERROR(EINVAL));
3305 
3306         if (type == DMU_OST_ZVOL) {
3307                 uint64_t volsize, volblocksize;
3308 
3309                 if (nvprops == NULL)
3310                         return (SET_ERROR(EINVAL));
3311                 if (nvlist_lookup_uint64(nvprops,
3312                     zfs_prop_to_name(ZFS_PROP_VOLSIZE), &volsize) != 0)
3313                         return (SET_ERROR(EINVAL));
3314 
3315                 if ((error = nvlist_lookup_uint64(nvprops,
3316                     zfs_prop_to_name(ZFS_PROP_VOLBLOCKSIZE),
3317                     &volblocksize)) != 0 && error != ENOENT)
3318                         return (SET_ERROR(EINVAL));
3319 
3320                 if (error != 0)
3321                         volblocksize = zfs_prop_default_numeric(
3322                             ZFS_PROP_VOLBLOCKSIZE);
3323 
3324                 if ((error = zvol_check_volblocksize(
3325                     volblocksize)) != 0 ||
3326                     (error = zvol_check_volsize(volsize,
3327                     volblocksize)) != 0)
3328                         return (error);
3329         } else if (type == DMU_OST_ZFS) {
3330                 int error;
3331 
3332                 /*
3333                  * We have to have normalization and
3334                  * case-folding flags correct when we do the
3335                  * file system creation, so go figure them out
3336                  * now.
3337                  */
3338                 VERIFY(nvlist_alloc(&zct.zct_zplprops,
3339                     NV_UNIQUE_NAME, KM_SLEEP) == 0);
3340                 error = zfs_fill_zplprops(fsname, nvprops,
3341                     zct.zct_zplprops, &is_insensitive);
3342                 if (error != 0) {
3343                         nvlist_free(zct.zct_zplprops);
3344                         return (error);
3345                 }
3346         }
3347 
3348         error = dsl_crypto_params_create_nvlist(DCP_CMD_NONE, nvprops,
3349             hidden_args, &dcp);
3350         if (error != 0) {
3351                 nvlist_free(zct.zct_zplprops);
3352                 return (error);
3353         }
3354 
3355         error = dmu_objset_create(fsname, type,
3356             is_insensitive ? DS_FLAG_CI_DATASET : 0, dcp, cbfunc, &zct);
3357 
3358         nvlist_free(zct.zct_zplprops);
3359         dsl_crypto_params_free(dcp, !!error);
3360 
3361         /*
3362          * It would be nice to do this atomically.
3363          */
3364         if (error == 0) {
3365                 error = zfs_set_prop_nvlist(fsname, ZPROP_SRC_LOCAL,
3366                     nvprops, outnvl);
3367                 if (error != 0)
3368                         (void) dsl_destroy_head(fsname);
3369         }
3370         return (error);
3371 }
3372 
3373 /*
3374  * innvl: {
3375  *     "origin" -> name of origin snapshot
3376  *     (optional) "props" -> { prop -> value }
3377  *     (optional) "hidden_args" -> { "wkeydata" -> value }
3378  *         raw uint8_t array of encryption wrapping key data (32 bytes)
3379  * }
3380  *
3381  * outnvl: propname -> error code (int32)
3382  */
3383 static int
3384 zfs_ioc_clone(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl)
3385 {
3386         int error = 0;
3387         nvlist_t *nvprops = NULL;
3388         char *origin_name;
3389 
3390         if (nvlist_lookup_string(innvl, "origin", &origin_name) != 0)
3391                 return (SET_ERROR(EINVAL));
3392         (void) nvlist_lookup_nvlist(innvl, "props", &nvprops);
3393 
3394         if (strchr(fsname, '@') ||
3395             strchr(fsname, '%'))
3396                 return (SET_ERROR(EINVAL));
3397 
3398         if (dataset_namecheck(origin_name, NULL, NULL) != 0)
3399                 return (SET_ERROR(EINVAL));
3400 
3401         error = dmu_objset_clone(fsname, origin_name);
3402 
3403         /*
3404          * It would be nice to do this atomically.
3405          */
3406         if (error == 0) {
3407                 error = zfs_set_prop_nvlist(fsname, ZPROP_SRC_LOCAL,
3408                     nvprops, outnvl);
3409                 if (error != 0)
3410                         (void) dsl_destroy_head(fsname);
3411         }
3412         return (error);
3413 }
3414 
3415 /* ARGSUSED */
3416 static int
3417 zfs_ioc_remap(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl)
3418 {
3419         if (strchr(fsname, '@') ||
3420             strchr(fsname, '%'))
3421                 return (SET_ERROR(EINVAL));
3422 
3423         return (dmu_objset_remap_indirects(fsname));
3424 }
3425 
3426 /*
3427  * innvl: {
3428  *     "snaps" -> { snapshot1, snapshot2 }
3429  *     (optional) "props" -> { prop -> value (string) }
3430  * }
3431  *
3432  * outnvl: snapshot -> error code (int32)
3433  */
3434 static int
3435 zfs_ioc_snapshot(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
3436 {
3437         nvlist_t *snaps;
3438         nvlist_t *props = NULL;
3439         int error, poollen;
3440         nvpair_t *pair;
3441 
3442         (void) nvlist_lookup_nvlist(innvl, "props", &props);
3443         if ((error = zfs_check_userprops(poolname, props)) != 0)
3444                 return (error);
3445 
3446         if (!nvlist_empty(props) &&
3447             zfs_earlier_version(poolname, SPA_VERSION_SNAP_PROPS))
3448                 return (SET_ERROR(ENOTSUP));
3449 
3450         if (nvlist_lookup_nvlist(innvl, "snaps", &snaps) != 0)
3451                 return (SET_ERROR(EINVAL));
3452         poollen = strlen(poolname);
3453         for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL;
3454             pair = nvlist_next_nvpair(snaps, pair)) {
3455                 const char *name = nvpair_name(pair);
3456                 const char *cp = strchr(name, '@');
3457 
3458                 /*
3459                  * The snap name must contain an @, and the part after it must
3460                  * contain only valid characters.
3461                  */
3462                 if (cp == NULL ||
3463                     zfs_component_namecheck(cp + 1, NULL, NULL) != 0)
3464                         return (SET_ERROR(EINVAL));
3465 
3466                 /*
3467                  * The snap must be in the specified pool.
3468                  */
3469                 if (strncmp(name, poolname, poollen) != 0 ||
3470                     (name[poollen] != '/' && name[poollen] != '@'))
3471                         return (SET_ERROR(EXDEV));
3472 
3473                 /* This must be the only snap of this fs. */
3474                 for (nvpair_t *pair2 = nvlist_next_nvpair(snaps, pair);
3475                     pair2 != NULL; pair2 = nvlist_next_nvpair(snaps, pair2)) {
3476                         if (strncmp(name, nvpair_name(pair2), cp - name + 1)
3477                             == 0) {
3478                                 return (SET_ERROR(EXDEV));
3479                         }
3480                 }
3481         }
3482 
3483         error = dsl_dataset_snapshot(snaps, props, outnvl);
3484         return (error);
3485 }
3486 
3487 /*
3488  * innvl: "message" -> string
3489  */
3490 /* ARGSUSED */
3491 static int
3492 zfs_ioc_log_history(const char *unused, nvlist_t *innvl, nvlist_t *outnvl)
3493 {
3494         char *message;
3495         spa_t *spa;
3496         int error;
3497         char *poolname;
3498 
3499         /*
3500          * The poolname in the ioctl is not set, we get it from the TSD,
3501          * which was set at the end of the last successful ioctl that allows
3502          * logging.  The secpolicy func already checked that it is set.
3503          * Only one log ioctl is allowed after each successful ioctl, so
3504          * we clear the TSD here.
3505          */
3506         poolname = tsd_get(zfs_allow_log_key);
3507         (void) tsd_set(zfs_allow_log_key, NULL);
3508         error = spa_open(poolname, &spa, FTAG);
3509         strfree(poolname);
3510         if (error != 0)
3511                 return (error);
3512 
3513         if (nvlist_lookup_string(innvl, "message", &message) != 0)  {
3514                 spa_close(spa, FTAG);
3515                 return (SET_ERROR(EINVAL));
3516         }
3517 
3518         if (spa_version(spa) < SPA_VERSION_ZPOOL_HISTORY) {
3519                 spa_close(spa, FTAG);
3520                 return (SET_ERROR(ENOTSUP));
3521         }
3522 
3523         error = spa_history_log(spa, message);
3524         spa_close(spa, FTAG);
3525         return (error);
3526 }
3527 
3528 /*
3529  * The dp_config_rwlock must not be held when calling this, because the
3530  * unmount may need to write out data.
3531  *
3532  * This function is best-effort.  Callers must deal gracefully if it
3533  * remains mounted (or is remounted after this call).
3534  *
3535  * Returns 0 if the argument is not a snapshot, or it is not currently a
3536  * filesystem, or we were able to unmount it.  Returns error code otherwise.
3537  */
3538 void
3539 zfs_unmount_snap(const char *snapname)
3540 {
3541         vfs_t *vfsp = NULL;
3542         zfsvfs_t *zfsvfs = NULL;
3543 
3544         if (strchr(snapname, '@') == NULL)
3545                 return;
3546 
3547         int err = getzfsvfs(snapname, &zfsvfs);
3548         if (err != 0) {
3549                 ASSERT3P(zfsvfs, ==, NULL);
3550                 return;
3551         }
3552         vfsp = zfsvfs->z_vfs;
3553 
3554         ASSERT(!dsl_pool_config_held(dmu_objset_pool(zfsvfs->z_os)));
3555 
3556         err = vn_vfswlock(vfsp->vfs_vnodecovered);
3557         VFS_RELE(vfsp);
3558         if (err != 0)
3559                 return;
3560 
3561         /*
3562          * Always force the unmount for snapshots.
3563          */
3564         (void) dounmount(vfsp, MS_FORCE, kcred);
3565 }
3566 
3567 /* ARGSUSED */
3568 static int
3569 zfs_unmount_snap_cb(const char *snapname, void *arg)
3570 {
3571         zfs_unmount_snap(snapname);
3572         return (0);
3573 }
3574 
3575 /*
3576  * When a clone is destroyed, its origin may also need to be destroyed,
3577  * in which case it must be unmounted.  This routine will do that unmount
3578  * if necessary.
3579  */
3580 void
3581 zfs_destroy_unmount_origin(const char *fsname)
3582 {
3583         int error;
3584         objset_t *os;
3585         dsl_dataset_t *ds;
3586 
3587         error = dmu_objset_hold(fsname, FTAG, &os);
3588         if (error != 0)
3589                 return;
3590         ds = dmu_objset_ds(os);
3591         if (dsl_dir_is_clone(ds->ds_dir) && DS_IS_DEFER_DESTROY(ds->ds_prev)) {
3592                 char originname[ZFS_MAX_DATASET_NAME_LEN];
3593                 dsl_dataset_name(ds->ds_prev, originname);
3594                 dmu_objset_rele(os, FTAG);
3595                 zfs_unmount_snap(originname);
3596         } else {
3597                 dmu_objset_rele(os, FTAG);
3598         }
3599 }
3600 
3601 /*
3602  * innvl: {
3603  *     "snaps" -> { snapshot1, snapshot2 }
3604  *     (optional boolean) "defer"
3605  * }
3606  *
3607  * outnvl: snapshot -> error code (int32)
3608  *
3609  */
3610 /* ARGSUSED */
3611 static int
3612 zfs_ioc_destroy_snaps(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
3613 {
3614         nvlist_t *snaps;
3615         nvpair_t *pair;
3616         boolean_t defer;
3617 
3618         if (nvlist_lookup_nvlist(innvl, "snaps", &snaps) != 0)
3619                 return (SET_ERROR(EINVAL));
3620         defer = nvlist_exists(innvl, "defer");
3621 
3622         for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL;
3623             pair = nvlist_next_nvpair(snaps, pair)) {
3624                 zfs_unmount_snap(nvpair_name(pair));
3625         }
3626 
3627         return (dsl_destroy_snapshots_nvl(snaps, defer, outnvl));
3628 }
3629 
3630 /*
3631  * Create bookmarks.  Bookmark names are of the form <fs>#<bmark>.
3632  * All bookmarks must be in the same pool.
3633  *
3634  * innvl: {
3635  *     bookmark1 -> snapshot1, bookmark2 -> snapshot2
3636  * }
3637  *
3638  * outnvl: bookmark -> error code (int32)
3639  *
3640  */
3641 /* ARGSUSED */
3642 static int
3643 zfs_ioc_bookmark(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
3644 {
3645         for (nvpair_t *pair = nvlist_next_nvpair(innvl, NULL);
3646             pair != NULL; pair = nvlist_next_nvpair(innvl, pair)) {
3647                 char *snap_name;
3648 
3649                 /*
3650                  * Verify the snapshot argument.
3651                  */
3652                 if (nvpair_value_string(pair, &snap_name) != 0)
3653                         return (SET_ERROR(EINVAL));
3654 
3655 
3656                 /* Verify that the keys (bookmarks) are unique */
3657                 for (nvpair_t *pair2 = nvlist_next_nvpair(innvl, pair);
3658                     pair2 != NULL; pair2 = nvlist_next_nvpair(innvl, pair2)) {
3659                         if (strcmp(nvpair_name(pair), nvpair_name(pair2)) == 0)
3660                                 return (SET_ERROR(EINVAL));
3661                 }
3662         }
3663 
3664         return (dsl_bookmark_create(innvl, outnvl));
3665 }
3666 
3667 /*
3668  * innvl: {
3669  *     property 1, property 2, ...
3670  * }
3671  *
3672  * outnvl: {
3673  *     bookmark name 1 -> { property 1, property 2, ... },
3674  *     bookmark name 2 -> { property 1, property 2, ... }
3675  * }
3676  *
3677  */
3678 static int
3679 zfs_ioc_get_bookmarks(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl)
3680 {
3681         return (dsl_get_bookmarks(fsname, innvl, outnvl));
3682 }
3683 
3684 /*
3685  * innvl: {
3686  *     bookmark name 1, bookmark name 2
3687  * }
3688  *
3689  * outnvl: bookmark -> error code (int32)
3690  *
3691  */
3692 static int
3693 zfs_ioc_destroy_bookmarks(const char *poolname, nvlist_t *innvl,
3694     nvlist_t *outnvl)
3695 {
3696         int error, poollen;
3697 
3698         poollen = strlen(poolname);
3699         for (nvpair_t *pair = nvlist_next_nvpair(innvl, NULL);
3700             pair != NULL; pair = nvlist_next_nvpair(innvl, pair)) {
3701                 const char *name = nvpair_name(pair);
3702                 const char *cp = strchr(name, '#');
3703 
3704                 /*
3705                  * The bookmark name must contain an #, and the part after it
3706                  * must contain only valid characters.
3707                  */
3708                 if (cp == NULL ||
3709                     zfs_component_namecheck(cp + 1, NULL, NULL) != 0)
3710                         return (SET_ERROR(EINVAL));
3711 
3712                 /*
3713                  * The bookmark must be in the specified pool.
3714                  */
3715                 if (strncmp(name, poolname, poollen) != 0 ||
3716                     (name[poollen] != '/' && name[poollen] != '#'))
3717                         return (SET_ERROR(EXDEV));
3718         }
3719 
3720         error = dsl_bookmark_destroy(innvl, outnvl);
3721         return (error);
3722 }
3723 
3724 static int
3725 zfs_ioc_channel_program(const char *poolname, nvlist_t *innvl,
3726     nvlist_t *outnvl)
3727 {
3728         char *program;
3729         uint64_t instrlimit, memlimit;
3730         boolean_t sync_flag;
3731         nvpair_t *nvarg = NULL;
3732 
3733         if (0 != nvlist_lookup_string(innvl, ZCP_ARG_PROGRAM, &program)) {
3734                 return (EINVAL);
3735         }
3736         if (0 != nvlist_lookup_boolean_value(innvl, ZCP_ARG_SYNC, &sync_flag)) {
3737                 sync_flag = B_TRUE;
3738         }
3739         if (0 != nvlist_lookup_uint64(innvl, ZCP_ARG_INSTRLIMIT, &instrlimit)) {
3740                 instrlimit = ZCP_DEFAULT_INSTRLIMIT;
3741         }
3742         if (0 != nvlist_lookup_uint64(innvl, ZCP_ARG_MEMLIMIT, &memlimit)) {
3743                 memlimit = ZCP_DEFAULT_MEMLIMIT;
3744         }
3745         if (0 != nvlist_lookup_nvpair(innvl, ZCP_ARG_ARGLIST, &nvarg)) {
3746                 return (EINVAL);
3747         }
3748 
3749         if (instrlimit == 0 || instrlimit > zfs_lua_max_instrlimit)
3750                 return (EINVAL);
3751         if (memlimit == 0 || memlimit > zfs_lua_max_memlimit)
3752                 return (EINVAL);
3753 
3754         return (zcp_eval(poolname, program, sync_flag, instrlimit, memlimit,
3755             nvarg, outnvl));
3756 }
3757 
3758 /*
3759  * innvl: unused
3760  * outnvl: empty
3761  */
3762 /* ARGSUSED */
3763 static int
3764 zfs_ioc_pool_checkpoint(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
3765 {
3766         return (spa_checkpoint(poolname));
3767 }
3768 
3769 /*
3770  * innvl: unused
3771  * outnvl: empty
3772  */
3773 /* ARGSUSED */
3774 static int
3775 zfs_ioc_pool_discard_checkpoint(const char *poolname, nvlist_t *innvl,
3776     nvlist_t *outnvl)
3777 {
3778         return (spa_checkpoint_discard(poolname));
3779 }
3780 
3781 /*
3782  * inputs:
3783  * zc_name              name of dataset to destroy
3784  * zc_defer_destroy     mark for deferred destroy
3785  *
3786  * outputs:             none
3787  */
3788 static int
3789 zfs_ioc_destroy(zfs_cmd_t *zc)
3790 {
3791         objset_t *os;
3792         dmu_objset_type_t ost;
3793         int err;
3794 
3795         err = dmu_objset_hold(zc->zc_name, FTAG, &os);
3796         if (err != 0)
3797                 return (err);
3798         ost = dmu_objset_type(os);
3799         dmu_objset_rele(os, FTAG);
3800 
3801         if (ost == DMU_OST_ZFS)
3802                 zfs_unmount_snap(zc->zc_name);
3803 
3804         if (strchr(zc->zc_name, '@')) {
3805                 err = dsl_destroy_snapshot(zc->zc_name, zc->zc_defer_destroy);
3806         } else {
3807                 err = dsl_destroy_head(zc->zc_name);
3808                 if (err == EEXIST) {
3809                         /*
3810                          * It is possible that the given DS may have
3811                          * hidden child (%recv) datasets - "leftovers"
3812                          * resulting from the previously interrupted
3813                          * 'zfs receive'.
3814                          *
3815                          * 6 extra bytes for /%recv
3816                          */
3817                         char namebuf[ZFS_MAX_DATASET_NAME_LEN + 6];
3818 
3819                         if (snprintf(namebuf, sizeof (namebuf), "%s/%s",
3820                             zc->zc_name, recv_clone_name) >=
3821                             sizeof (namebuf))
3822                                 return (SET_ERROR(EINVAL));
3823 
3824                         /*
3825                          * Try to remove the hidden child (%recv) and after
3826                          * that try to remove the target dataset.
3827                          * If the hidden child (%recv) does not exist
3828                          * the original error (EEXIST) will be returned
3829                          */
3830                         err = dsl_destroy_head(namebuf);
3831                         if (err == 0)
3832                                 err = dsl_destroy_head(zc->zc_name);
3833                         else if (err == ENOENT)
3834                                 err = SET_ERROR(EEXIST);
3835                 }
3836         }
3837         if (ost == DMU_OST_ZVOL && err == 0)
3838                 (void) zvol_remove_minor(zc->zc_name);
3839         return (err);
3840 }
3841 
3842 /*
3843  * innvl: {
3844  *     "initialize_command" -> POOL_INITIALIZE_{CANCEL|START|SUSPEND} (uint64)
3845  *     "initialize_vdevs": { -> guids to initialize (nvlist)
3846  *         "vdev_path_1": vdev_guid_1, (uint64),
3847  *         "vdev_path_2": vdev_guid_2, (uint64),
3848  *         ...
3849  *     },
3850  * }
3851  *
3852  * outnvl: {
3853  *     "initialize_vdevs": { -> initialization errors (nvlist)
3854  *         "vdev_path_1": errno, see function body for possible errnos (uint64)
3855  *         "vdev_path_2": errno, ... (uint64)
3856  *         ...
3857  *     }
3858  * }
3859  *
3860  * EINVAL is returned for an unknown command or if any of the provided vdev
3861  * guids have be specified with a type other than uint64.
3862  */
3863 static int
3864 zfs_ioc_pool_initialize(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
3865 {
3866         uint64_t cmd_type;
3867         if (nvlist_lookup_uint64(innvl, ZPOOL_INITIALIZE_COMMAND,
3868             &cmd_type) != 0) {
3869                 return (SET_ERROR(EINVAL));
3870         }
3871 
3872         if (!(cmd_type == POOL_INITIALIZE_CANCEL ||
3873             cmd_type == POOL_INITIALIZE_START ||
3874             cmd_type == POOL_INITIALIZE_SUSPEND)) {
3875                 return (SET_ERROR(EINVAL));
3876         }
3877 
3878         nvlist_t *vdev_guids;
3879         if (nvlist_lookup_nvlist(innvl, ZPOOL_INITIALIZE_VDEVS,
3880             &vdev_guids) != 0) {
3881                 return (SET_ERROR(EINVAL));
3882         }
3883 
3884         for (nvpair_t *pair = nvlist_next_nvpair(vdev_guids, NULL);
3885             pair != NULL; pair = nvlist_next_nvpair(vdev_guids, pair)) {
3886                 uint64_t vdev_guid;
3887                 if (nvpair_value_uint64(pair, &vdev_guid) != 0) {
3888                         return (SET_ERROR(EINVAL));
3889                 }
3890         }
3891 
3892         spa_t *spa;
3893         int error = spa_open(poolname, &spa, FTAG);
3894         if (error != 0)
3895                 return (error);
3896 
3897         nvlist_t *vdev_errlist = fnvlist_alloc();
3898         int total_errors = spa_vdev_initialize(spa, vdev_guids, cmd_type,
3899             vdev_errlist);
3900 
3901         if (fnvlist_size(vdev_errlist) > 0) {
3902                 fnvlist_add_nvlist(outnvl, ZPOOL_INITIALIZE_VDEVS,
3903                     vdev_errlist);
3904         }
3905         fnvlist_free(vdev_errlist);
3906 
3907         spa_close(spa, FTAG);
3908         return (total_errors > 0 ? EINVAL : 0);
3909 }
3910 
3911 /*
3912  * innvl: {
3913  *     "trim_command" -> POOL_TRIM_{CANCEL|START|SUSPEND} (uint64)
3914  *     "trim_vdevs": { -> guids to TRIM (nvlist)
3915  *         "vdev_path_1": vdev_guid_1, (uint64),
3916  *         "vdev_path_2": vdev_guid_2, (uint64),
3917  *         ...
3918  *     },
3919  *     "trim_rate" -> Target TRIM rate in bytes/sec.
3920  *     "trim_secure" -> Set to request a secure TRIM.
3921  * }
3922  *
3923  * outnvl: {
3924  *     "trim_vdevs": { -> TRIM errors (nvlist)
3925  *         "vdev_path_1": errno, see function body for possible errnos (uint64)
3926  *         "vdev_path_2": errno, ... (uint64)
3927  *         ...
3928  *     }
3929  * }
3930  *
3931  * EINVAL is returned for an unknown command or if any of the provided vdev
3932  * guids have be specified with a type other than uint64.
3933  */
3934 
3935 static int
3936 zfs_ioc_pool_trim(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
3937 {
3938         uint64_t cmd_type;
3939         if (nvlist_lookup_uint64(innvl, ZPOOL_TRIM_COMMAND, &cmd_type) != 0)
3940                 return (SET_ERROR(EINVAL));
3941 
3942         if (!(cmd_type == POOL_TRIM_CANCEL ||
3943             cmd_type == POOL_TRIM_START ||
3944             cmd_type == POOL_TRIM_SUSPEND)) {
3945                 return (SET_ERROR(EINVAL));
3946         }
3947 
3948         nvlist_t *vdev_guids;
3949         if (nvlist_lookup_nvlist(innvl, ZPOOL_TRIM_VDEVS, &vdev_guids) != 0)
3950                 return (SET_ERROR(EINVAL));
3951 
3952         for (nvpair_t *pair = nvlist_next_nvpair(vdev_guids, NULL);
3953             pair != NULL; pair = nvlist_next_nvpair(vdev_guids, pair)) {
3954                 uint64_t vdev_guid;
3955                 if (nvpair_value_uint64(pair, &vdev_guid) != 0) {
3956                         return (SET_ERROR(EINVAL));
3957                 }
3958         }
3959 
3960         /* Optional, defaults to maximum rate when not provided */
3961         uint64_t rate;
3962         if (nvlist_lookup_uint64(innvl, ZPOOL_TRIM_RATE, &rate) != 0)
3963                 rate = 0;
3964 
3965         /* Optional, defaults to standard TRIM when not provided */
3966         boolean_t secure;
3967         if (nvlist_lookup_boolean_value(innvl, ZPOOL_TRIM_SECURE,
3968             &secure) != 0) {
3969                 secure = B_FALSE;
3970         }
3971 
3972         spa_t *spa;
3973         int error = spa_open(poolname, &spa, FTAG);
3974         if (error != 0)
3975                 return (error);
3976 
3977         nvlist_t *vdev_errlist = fnvlist_alloc();
3978         int total_errors = spa_vdev_trim(spa, vdev_guids, cmd_type,
3979             rate, !!zfs_trim_metaslab_skip, secure, vdev_errlist);
3980 
3981         if (fnvlist_size(vdev_errlist) > 0)
3982                 fnvlist_add_nvlist(outnvl, ZPOOL_TRIM_VDEVS, vdev_errlist);
3983 
3984         fnvlist_free(vdev_errlist);
3985 
3986         spa_close(spa, FTAG);
3987         return (total_errors > 0 ? EINVAL : 0);
3988 }
3989 
3990 /*
3991  * fsname is name of dataset to rollback (to most recent snapshot)
3992  *
3993  * innvl may contain name of expected target snapshot
3994  *
3995  * outnvl: "target" -> name of most recent snapshot
3996  * }
3997  */
3998 /* ARGSUSED */
3999 static int
4000 zfs_ioc_rollback(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl)
4001 {
4002         zfsvfs_t *zfsvfs;
4003         char *target = NULL;
4004         int error;
4005 
4006         (void) nvlist_lookup_string(innvl, "target", &target);
4007         if (target != NULL) {
4008                 const char *cp = strchr(target, '@');
4009 
4010                 /*
4011                  * The snap name must contain an @, and the part after it must
4012                  * contain only valid characters.
4013                  */
4014                 if (cp == NULL ||
4015                     zfs_component_namecheck(cp + 1, NULL, NULL) != 0)
4016                         return (SET_ERROR(EINVAL));
4017         }
4018 
4019         if (getzfsvfs(fsname, &zfsvfs) == 0) {
4020                 dsl_dataset_t *ds;
4021 
4022                 ds = dmu_objset_ds(zfsvfs->z_os);
4023                 error = zfs_suspend_fs(zfsvfs);
4024                 if (error == 0) {
4025                         int resume_err;
4026 
4027                         error = dsl_dataset_rollback(fsname, target, zfsvfs,
4028                             outnvl);
4029                         resume_err = zfs_resume_fs(zfsvfs, ds);
4030                         error = error ? error : resume_err;
4031                 }
4032                 VFS_RELE(zfsvfs->z_vfs);
4033         } else {
4034                 error = dsl_dataset_rollback(fsname, target, NULL, outnvl);
4035         }
4036         return (error);
4037 }
4038 
4039 static int
4040 recursive_unmount(const char *fsname, void *arg)
4041 {
4042         const char *snapname = arg;
4043         char fullname[ZFS_MAX_DATASET_NAME_LEN];
4044 
4045         (void) snprintf(fullname, sizeof (fullname), "%s@%s", fsname, snapname);
4046         zfs_unmount_snap(fullname);
4047 
4048         return (0);
4049 }
4050 
4051 /*
4052  * inputs:
4053  * zc_name      old name of dataset
4054  * zc_value     new name of dataset
4055  * zc_cookie    recursive flag (only valid for snapshots)
4056  *
4057  * outputs:     none
4058  */
4059 static int
4060 zfs_ioc_rename(zfs_cmd_t *zc)
4061 {
4062         objset_t *os;
4063         dmu_objset_type_t ost;
4064         boolean_t recursive = zc->zc_cookie & 1;
4065         char *at;
4066         int err;
4067 
4068         /* "zfs rename" from and to ...%recv datasets should both fail */
4069         zc->zc_name[sizeof (zc->zc_name) - 1] = '\0';
4070         zc->zc_value[sizeof (zc->zc_value) - 1] = '\0';
4071         if (dataset_namecheck(zc->zc_name, NULL, NULL) != 0 ||
4072             dataset_namecheck(zc->zc_value, NULL, NULL) != 0 ||
4073             strchr(zc->zc_name, '%') || strchr(zc->zc_value, '%'))
4074                 return (SET_ERROR(EINVAL));
4075 
4076         err = dmu_objset_hold(zc->zc_name, FTAG, &os);
4077         if (err != 0)
4078                 return (err);
4079         ost = dmu_objset_type(os);
4080         dmu_objset_rele(os, FTAG);
4081 
4082         at = strchr(zc->zc_name, '@');
4083         if (at != NULL) {
4084                 /* snaps must be in same fs */
4085                 int error;
4086 
4087                 if (strncmp(zc->zc_name, zc->zc_value, at - zc->zc_name + 1))
4088                         return (SET_ERROR(EXDEV));
4089                 *at = '\0';
4090                 if (ost == DMU_OST_ZFS) {
4091                         error = dmu_objset_find(zc->zc_name,
4092                             recursive_unmount, at + 1,
4093                             recursive ? DS_FIND_CHILDREN : 0);
4094                         if (error != 0) {
4095                                 *at = '@';
4096                                 return (error);
4097                         }
4098                 }
4099                 error = dsl_dataset_rename_snapshot(zc->zc_name,
4100                     at + 1, strchr(zc->zc_value, '@') + 1, recursive);
4101                 *at = '@';
4102 
4103                 return (error);
4104         } else {
4105                 if (ost == DMU_OST_ZVOL)
4106                         (void) zvol_remove_minor(zc->zc_name);
4107                 return (dsl_dir_rename(zc->zc_name, zc->zc_value));
4108         }
4109 }
4110 
4111 static int
4112 zfs_check_settable(const char *dsname, nvpair_t *pair, cred_t *cr)
4113 {
4114         const char *propname = nvpair_name(pair);
4115         boolean_t issnap = (strchr(dsname, '@') != NULL);
4116         zfs_prop_t prop = zfs_name_to_prop(propname);
4117         uint64_t intval;
4118         int err;
4119 
4120         if (prop == ZPROP_INVAL) {
4121                 if (zfs_prop_user(propname)) {
4122                         if (err = zfs_secpolicy_write_perms(dsname,
4123                             ZFS_DELEG_PERM_USERPROP, cr))
4124                                 return (err);
4125                         return (0);
4126                 }
4127 
4128                 if (!issnap && zfs_prop_userquota(propname)) {
4129                         const char *perm = NULL;
4130                         const char *uq_prefix =
4131                             zfs_userquota_prop_prefixes[ZFS_PROP_USERQUOTA];
4132                         const char *gq_prefix =
4133                             zfs_userquota_prop_prefixes[ZFS_PROP_GROUPQUOTA];
4134                         const char *uiq_prefix =
4135                             zfs_userquota_prop_prefixes[ZFS_PROP_USEROBJQUOTA];
4136                         const char *giq_prefix =
4137                             zfs_userquota_prop_prefixes[ZFS_PROP_GROUPOBJQUOTA];
4138                         const char *pq_prefix =
4139                             zfs_userquota_prop_prefixes[ZFS_PROP_PROJECTQUOTA];
4140                         const char *piq_prefix = zfs_userquota_prop_prefixes[\
4141                             ZFS_PROP_PROJECTOBJQUOTA];
4142 
4143                         if (strncmp(propname, uq_prefix,
4144                             strlen(uq_prefix)) == 0) {
4145                                 perm = ZFS_DELEG_PERM_USERQUOTA;
4146                         } else if (strncmp(propname, uiq_prefix,
4147                             strlen(uiq_prefix)) == 0) {
4148                                 perm = ZFS_DELEG_PERM_USEROBJQUOTA;
4149                         } else if (strncmp(propname, gq_prefix,
4150                             strlen(gq_prefix)) == 0) {
4151                                 perm = ZFS_DELEG_PERM_GROUPQUOTA;
4152                         } else if (strncmp(propname, giq_prefix,
4153                             strlen(giq_prefix)) == 0) {
4154                                 perm = ZFS_DELEG_PERM_GROUPOBJQUOTA;
4155                         } else if (strncmp(propname, pq_prefix,
4156                             strlen(pq_prefix)) == 0) {
4157                                 perm = ZFS_DELEG_PERM_PROJECTQUOTA;
4158                         } else if (strncmp(propname, piq_prefix,
4159                             strlen(piq_prefix)) == 0) {
4160                                 perm = ZFS_DELEG_PERM_PROJECTOBJQUOTA;
4161                         } else {
4162                                 /* {USER|GROUP|PROJECT}USED are read-only */
4163                                 return (SET_ERROR(EINVAL));
4164                         }
4165 
4166                         if (err = zfs_secpolicy_write_perms(dsname, perm, cr))
4167                                 return (err);
4168                         return (0);
4169                 }
4170 
4171                 return (SET_ERROR(EINVAL));
4172         }
4173 
4174         if (issnap)
4175                 return (SET_ERROR(EINVAL));
4176 
4177         if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
4178                 /*
4179                  * dsl_prop_get_all_impl() returns properties in this
4180                  * format.
4181                  */
4182                 nvlist_t *attrs;
4183                 VERIFY(nvpair_value_nvlist(pair, &attrs) == 0);
4184                 VERIFY(nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
4185                     &pair) == 0);
4186         }
4187 
4188         /*
4189          * Check that this value is valid for this pool version
4190          */
4191         switch (prop) {
4192         case ZFS_PROP_COMPRESSION:
4193                 /*
4194                  * If the user specified gzip compression, make sure
4195                  * the SPA supports it. We ignore any errors here since
4196                  * we'll catch them later.
4197                  */
4198                 if (nvpair_value_uint64(pair, &intval) == 0) {
4199                         if (intval >= ZIO_COMPRESS_GZIP_1 &&
4200                             intval <= ZIO_COMPRESS_GZIP_9 &&
4201                             zfs_earlier_version(dsname,
4202                             SPA_VERSION_GZIP_COMPRESSION)) {
4203                                 return (SET_ERROR(ENOTSUP));
4204                         }
4205 
4206                         if (intval == ZIO_COMPRESS_ZLE &&
4207                             zfs_earlier_version(dsname,
4208                             SPA_VERSION_ZLE_COMPRESSION))
4209                                 return (SET_ERROR(ENOTSUP));
4210 
4211                         if (intval == ZIO_COMPRESS_LZ4) {
4212                                 spa_t *spa;
4213 
4214                                 if ((err = spa_open(dsname, &spa, FTAG)) != 0)
4215                                         return (err);
4216 
4217                                 if (!spa_feature_is_enabled(spa,
4218                                     SPA_FEATURE_LZ4_COMPRESS)) {
4219                                         spa_close(spa, FTAG);
4220                                         return (SET_ERROR(ENOTSUP));
4221                                 }
4222                                 spa_close(spa, FTAG);
4223                         }
4224 
4225                         /*
4226                          * If this is a bootable dataset then
4227                          * verify that the compression algorithm
4228                          * is supported for booting. We must return
4229                          * something other than ENOTSUP since it
4230                          * implies a downrev pool version.
4231                          */
4232                         if (zfs_is_bootfs(dsname) &&
4233                             !BOOTFS_COMPRESS_VALID(intval)) {
4234                                 return (SET_ERROR(ERANGE));
4235                         }
4236                 }
4237                 break;
4238 
4239         case ZFS_PROP_COPIES:
4240                 if (zfs_earlier_version(dsname, SPA_VERSION_DITTO_BLOCKS))
4241                         return (SET_ERROR(ENOTSUP));
4242                 break;
4243 
4244         case ZFS_PROP_RECORDSIZE:
4245                 /* Record sizes above 128k need the feature to be enabled */
4246                 if (nvpair_value_uint64(pair, &intval) == 0 &&
4247                     intval > SPA_OLD_MAXBLOCKSIZE) {
4248                         spa_t *spa;
4249 
4250                         /*
4251                          * We don't allow setting the property above 1MB,
4252                          * unless the tunable has been changed.
4253                          */
4254                         if (intval > zfs_max_recordsize ||
4255                             intval > SPA_MAXBLOCKSIZE)
4256                                 return (SET_ERROR(ERANGE));
4257 
4258                         if ((err = spa_open(dsname, &spa, FTAG)) != 0)
4259                                 return (err);
4260 
4261                         if (!spa_feature_is_enabled(spa,
4262                             SPA_FEATURE_LARGE_BLOCKS)) {
4263                                 spa_close(spa, FTAG);
4264                                 return (SET_ERROR(ENOTSUP));
4265                         }
4266                         spa_close(spa, FTAG);
4267                 }
4268                 break;
4269 
4270         case ZFS_PROP_DNODESIZE:
4271                 /* Dnode sizes above 512 need the feature to be enabled */
4272                 if (nvpair_value_uint64(pair, &intval) == 0 &&
4273                     intval != ZFS_DNSIZE_LEGACY) {
4274                         spa_t *spa;
4275 
4276                         if ((err = spa_open(dsname, &spa, FTAG)) != 0)
4277                                 return (err);
4278 
4279                         if (!spa_feature_is_enabled(spa,
4280                             SPA_FEATURE_LARGE_DNODE)) {
4281                                 spa_close(spa, FTAG);
4282                                 return (SET_ERROR(ENOTSUP));
4283                         }
4284                         spa_close(spa, FTAG);
4285                 }
4286                 break;
4287 
4288         case ZFS_PROP_SPECIAL_SMALL_BLOCKS:
4289                 /*
4290                  * This property could require the allocation classes
4291                  * feature to be active for setting, however we allow
4292                  * it so that tests of settable properties succeed.
4293                  * The CLI will issue a warning in this case.
4294                  */
4295                 break;
4296 
4297         case ZFS_PROP_SHARESMB:
4298                 if (zpl_earlier_version(dsname, ZPL_VERSION_FUID))
4299                         return (SET_ERROR(ENOTSUP));
4300                 break;
4301 
4302         case ZFS_PROP_ACLINHERIT:
4303                 if (nvpair_type(pair) == DATA_TYPE_UINT64 &&
4304                     nvpair_value_uint64(pair, &intval) == 0) {
4305                         if (intval == ZFS_ACL_PASSTHROUGH_X &&
4306                             zfs_earlier_version(dsname,
4307                             SPA_VERSION_PASSTHROUGH_X))
4308                                 return (SET_ERROR(ENOTSUP));
4309                 }
4310                 break;
4311 
4312         case ZFS_PROP_CHECKSUM:
4313         case ZFS_PROP_DEDUP:
4314         {
4315                 spa_feature_t feature;
4316                 spa_t *spa;
4317 
4318                 /* dedup feature version checks */
4319                 if (prop == ZFS_PROP_DEDUP &&
4320                     zfs_earlier_version(dsname, SPA_VERSION_DEDUP))
4321                         return (SET_ERROR(ENOTSUP));
4322 
4323                 if (nvpair_value_uint64(pair, &intval) != 0)
4324                         return (SET_ERROR(EINVAL));
4325 
4326                 /* check prop value is enabled in features */
4327                 feature = zio_checksum_to_feature(intval & ZIO_CHECKSUM_MASK);
4328                 if (feature == SPA_FEATURE_NONE)
4329                         break;
4330 
4331                 if ((err = spa_open(dsname, &spa, FTAG)) != 0)
4332                         return (err);
4333 
4334                 if (!spa_feature_is_enabled(spa, feature)) {
4335                         spa_close(spa, FTAG);
4336                         return (SET_ERROR(ENOTSUP));
4337                 }
4338                 spa_close(spa, FTAG);
4339                 break;
4340         }
4341         }
4342 
4343         return (zfs_secpolicy_setprop(dsname, prop, pair, CRED()));
4344 }
4345 
4346 /*
4347  * Checks for a race condition to make sure we don't increment a feature flag
4348  * multiple times.
4349  */
4350 static int
4351 zfs_prop_activate_feature_check(void *arg, dmu_tx_t *tx)
4352 {
4353         spa_t *spa = dmu_tx_pool(tx)->dp_spa;
4354         spa_feature_t *featurep = arg;
4355 
4356         if (!spa_feature_is_active(spa, *featurep))
4357                 return (0);
4358         else
4359                 return (SET_ERROR(EBUSY));
4360 }
4361 
4362 /*
4363  * The callback invoked on feature activation in the sync task caused by
4364  * zfs_prop_activate_feature.
4365  */
4366 static void
4367 zfs_prop_activate_feature_sync(void *arg, dmu_tx_t *tx)
4368 {
4369         spa_t *spa = dmu_tx_pool(tx)->dp_spa;
4370         spa_feature_t *featurep = arg;
4371 
4372         spa_feature_incr(spa, *featurep, tx);
4373 }
4374 
4375 /*
4376  * Activates a feature on a pool in response to a property setting. This
4377  * creates a new sync task which modifies the pool to reflect the feature
4378  * as being active.
4379  */
4380 static int
4381 zfs_prop_activate_feature(spa_t *spa, spa_feature_t feature)
4382 {
4383         int err;
4384 
4385         /* EBUSY here indicates that the feature is already active */
4386         err = dsl_sync_task(spa_name(spa),
4387             zfs_prop_activate_feature_check, zfs_prop_activate_feature_sync,
4388             &feature, 2, ZFS_SPACE_CHECK_RESERVED);
4389 
4390         if (err != 0 && err != EBUSY)
4391                 return (err);
4392         else
4393                 return (0);
4394 }
4395 
4396 /*
4397  * Removes properties from the given props list that fail permission checks
4398  * needed to clear them and to restore them in case of a receive error. For each
4399  * property, make sure we have both set and inherit permissions.
4400  *
4401  * Returns the first error encountered if any permission checks fail. If the
4402  * caller provides a non-NULL errlist, it also gives the complete list of names
4403  * of all the properties that failed a permission check along with the
4404  * corresponding error numbers. The caller is responsible for freeing the
4405  * returned errlist.
4406  *
4407  * If every property checks out successfully, zero is returned and the list
4408  * pointed at by errlist is NULL.
4409  */
4410 static int
4411 zfs_check_clearable(char *dataset, nvlist_t *props, nvlist_t **errlist)
4412 {
4413         zfs_cmd_t *zc;
4414         nvpair_t *pair, *next_pair;
4415         nvlist_t *errors;
4416         int err, rv = 0;
4417 
4418         if (props == NULL)
4419                 return (0);
4420 
4421         VERIFY(nvlist_alloc(&errors, NV_UNIQUE_NAME, KM_SLEEP) == 0);
4422 
4423         zc = kmem_alloc(sizeof (zfs_cmd_t), KM_SLEEP);
4424         (void) strcpy(zc->zc_name, dataset);
4425         pair = nvlist_next_nvpair(props, NULL);
4426         while (pair != NULL) {
4427                 next_pair = nvlist_next_nvpair(props, pair);
4428 
4429                 (void) strcpy(zc->zc_value, nvpair_name(pair));
4430                 if ((err = zfs_check_settable(dataset, pair, CRED())) != 0 ||
4431                     (err = zfs_secpolicy_inherit_prop(zc, NULL, CRED())) != 0) {
4432                         VERIFY(nvlist_remove_nvpair(props, pair) == 0);
4433                         VERIFY(nvlist_add_int32(errors,
4434                             zc->zc_value, err) == 0);
4435                 }
4436                 pair = next_pair;
4437         }
4438         kmem_free(zc, sizeof (zfs_cmd_t));
4439 
4440         if ((pair = nvlist_next_nvpair(errors, NULL)) == NULL) {
4441                 nvlist_free(errors);
4442                 errors = NULL;
4443         } else {
4444                 VERIFY(nvpair_value_int32(pair, &rv) == 0);
4445         }
4446 
4447         if (errlist == NULL)
4448                 nvlist_free(errors);
4449         else
4450                 *errlist = errors;
4451 
4452         return (rv);
4453 }
4454 
4455 static boolean_t
4456 propval_equals(nvpair_t *p1, nvpair_t *p2)
4457 {
4458         if (nvpair_type(p1) == DATA_TYPE_NVLIST) {
4459                 /* dsl_prop_get_all_impl() format */
4460                 nvlist_t *attrs;
4461                 VERIFY(nvpair_value_nvlist(p1, &attrs) == 0);
4462                 VERIFY(nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
4463                     &p1) == 0);
4464         }
4465 
4466         if (nvpair_type(p2) == DATA_TYPE_NVLIST) {
4467                 nvlist_t *attrs;
4468                 VERIFY(nvpair_value_nvlist(p2, &attrs) == 0);
4469                 VERIFY(nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
4470                     &p2) == 0);
4471         }
4472 
4473         if (nvpair_type(p1) != nvpair_type(p2))
4474                 return (B_FALSE);
4475 
4476         if (nvpair_type(p1) == DATA_TYPE_STRING) {
4477                 char *valstr1, *valstr2;
4478 
4479                 VERIFY(nvpair_value_string(p1, (char **)&valstr1) == 0);
4480                 VERIFY(nvpair_value_string(p2, (char **)&valstr2) == 0);
4481                 return (strcmp(valstr1, valstr2) == 0);
4482         } else {
4483                 uint64_t intval1, intval2;
4484 
4485                 VERIFY(nvpair_value_uint64(p1, &intval1) == 0);
4486                 VERIFY(nvpair_value_uint64(p2, &intval2) == 0);
4487                 return (intval1 == intval2);
4488         }
4489 }
4490 
4491 /*
4492  * Remove properties from props if they are not going to change (as determined
4493  * by comparison with origprops). Remove them from origprops as well, since we
4494  * do not need to clear or restore properties that won't change.
4495  */
4496 static void
4497 props_reduce(nvlist_t *props, nvlist_t *origprops)
4498 {
4499         nvpair_t *pair, *next_pair;
4500 
4501         if (origprops == NULL)
4502                 return; /* all props need to be received */
4503 
4504         pair = nvlist_next_nvpair(props, NULL);
4505         while (pair != NULL) {
4506                 const char *propname = nvpair_name(pair);
4507                 nvpair_t *match;
4508 
4509                 next_pair = nvlist_next_nvpair(props, pair);
4510 
4511                 if ((nvlist_lookup_nvpair(origprops, propname,
4512                     &match) != 0) || !propval_equals(pair, match))
4513                         goto next; /* need to set received value */
4514 
4515                 /* don't clear the existing received value */
4516                 (void) nvlist_remove_nvpair(origprops, match);
4517                 /* don't bother receiving the property */
4518                 (void) nvlist_remove_nvpair(props, pair);
4519 next:
4520                 pair = next_pair;
4521         }
4522 }
4523 
4524 /*
4525  * Extract properties that cannot be set PRIOR to the receipt of a dataset.
4526  * For example, refquota cannot be set until after the receipt of a dataset,
4527  * because in replication streams, an older/earlier snapshot may exceed the
4528  * refquota.  We want to receive the older/earlier snapshot, but setting
4529  * refquota pre-receipt will set the dsl's ACTUAL quota, which will prevent
4530  * the older/earlier snapshot from being received (with EDQUOT).
4531  *
4532  * The ZFS test "zfs_receive_011_pos" demonstrates such a scenario.
4533  *
4534  * libzfs will need to be judicious handling errors encountered by props
4535  * extracted by this function.
4536  */
4537 static nvlist_t *
4538 extract_delay_props(nvlist_t *props)
4539 {
4540         nvlist_t *delayprops;
4541         nvpair_t *nvp, *tmp;
4542         static const zfs_prop_t delayable[] = {
4543                 ZFS_PROP_REFQUOTA,
4544                 ZFS_PROP_KEYLOCATION,
4545                 0
4546         };
4547         int i;
4548 
4549         VERIFY(nvlist_alloc(&delayprops, NV_UNIQUE_NAME, KM_SLEEP) == 0);
4550 
4551         for (nvp = nvlist_next_nvpair(props, NULL); nvp != NULL;
4552             nvp = nvlist_next_nvpair(props, nvp)) {
4553                 /*
4554                  * strcmp() is safe because zfs_prop_to_name() always returns
4555                  * a bounded string.
4556                  */
4557                 for (i = 0; delayable[i] != 0; i++) {
4558                         if (strcmp(zfs_prop_to_name(delayable[i]),
4559                             nvpair_name(nvp)) == 0) {
4560                                 break;
4561                         }
4562                 }
4563                 if (delayable[i] != 0) {
4564                         tmp = nvlist_prev_nvpair(props, nvp);
4565                         VERIFY(nvlist_add_nvpair(delayprops, nvp) == 0);
4566                         VERIFY(nvlist_remove_nvpair(props, nvp) == 0);
4567                         nvp = tmp;
4568                 }
4569         }
4570 
4571         if (nvlist_empty(delayprops)) {
4572                 nvlist_free(delayprops);
4573                 delayprops = NULL;
4574         }
4575         return (delayprops);
4576 }
4577 
4578 #ifdef  DEBUG
4579 static boolean_t zfs_ioc_recv_inject_err;
4580 #endif
4581 
4582 /*
4583  * nvlist 'errors' is always allocated. It will contain descriptions of
4584  * encountered errors, if any. It's the callers responsibility to free.
4585  */
4586 static int
4587 zfs_ioc_recv_impl(char *tofs, char *tosnap, char *origin, nvlist_t *recvprops,
4588     nvlist_t *localprops, nvlist_t *hidden_args, boolean_t force,
4589     boolean_t resumable, int input_fd, dmu_replay_record_t *begin_record,
4590     int cleanup_fd, uint64_t *read_bytes, uint64_t *errflags,
4591     uint64_t *action_handle, nvlist_t **errors)
4592 {
4593         dmu_recv_cookie_t drc;
4594         int error = 0;
4595         int props_error = 0;
4596         offset_t off;
4597         nvlist_t *local_delayprops = NULL;
4598         nvlist_t *recv_delayprops = NULL;
4599         nvlist_t *origprops = NULL; /* existing properties */
4600         nvlist_t *origrecvd = NULL; /* existing received properties */
4601         boolean_t first_recvd_props = B_FALSE;
4602         file_t *input_fp;
4603 
4604         *read_bytes = 0;
4605         *errflags = 0;
4606         *errors = fnvlist_alloc();
4607 
4608         input_fp = getf(input_fd);
4609         if (input_fp == NULL)
4610                 return (SET_ERROR(EBADF));
4611 
4612         error = dmu_recv_begin(tofs, tosnap, begin_record, force,
4613             resumable, localprops, hidden_args, origin, &drc);
4614         if (error != 0)
4615                 goto out;
4616 
4617         /*
4618          * Set properties before we receive the stream so that they are applied
4619          * to the new data. Note that we must call dmu_recv_stream() if
4620          * dmu_recv_begin() succeeds.
4621          */
4622         if (recvprops != NULL && !drc.drc_newfs) {
4623                 if (spa_version(dsl_dataset_get_spa(drc.drc_ds)) >=
4624                     SPA_VERSION_RECVD_PROPS &&
4625                     !dsl_prop_get_hasrecvd(tofs))
4626                         first_recvd_props = B_TRUE;
4627 
4628                 /*
4629                  * If new received properties are supplied, they are to
4630                  * completely replace the existing received properties,
4631                  * so stash away the existing ones.
4632                  */
4633                 if (dsl_prop_get_received(tofs, &origrecvd) == 0) {
4634                         nvlist_t *errlist = NULL;
4635                         /*
4636                          * Don't bother writing a property if its value won't
4637                          * change (and avoid the unnecessary security checks).
4638                          *
4639                          * The first receive after SPA_VERSION_RECVD_PROPS is a
4640                          * special case where we blow away all local properties
4641                          * regardless.
4642                          */
4643                         if (!first_recvd_props)
4644                                 props_reduce(recvprops, origrecvd);
4645                         if (zfs_check_clearable(tofs, origrecvd, &errlist) != 0)
4646                                 (void) nvlist_merge(*errors, errlist, 0);
4647                         nvlist_free(errlist);
4648 
4649                         if (clear_received_props(tofs, origrecvd,
4650                             first_recvd_props ? NULL : recvprops) != 0)
4651                                 *errflags |= ZPROP_ERR_NOCLEAR;
4652                 } else {
4653                         *errflags |= ZPROP_ERR_NOCLEAR;
4654                 }
4655         }
4656 
4657         /*
4658          * Stash away existing properties so we can restore them on error unless
4659          * we're doing the first receive after SPA_VERSION_RECVD_PROPS, in which
4660          * case "origrecvd" will take care of that.
4661          */
4662         if (localprops != NULL && !drc.drc_newfs && !first_recvd_props) {
4663                 objset_t *os;
4664                 if (dmu_objset_hold(tofs, FTAG, &os) == 0) {
4665                         if (dsl_prop_get_all(os, &origprops) != 0) {
4666                                 *errflags |= ZPROP_ERR_NOCLEAR;
4667                         }
4668                         dmu_objset_rele(os, FTAG);
4669                 } else {
4670                         *errflags |= ZPROP_ERR_NOCLEAR;
4671                 }
4672         }
4673 
4674         if (recvprops != NULL) {
4675                 props_error = dsl_prop_set_hasrecvd(tofs);
4676 
4677                 if (props_error == 0) {
4678                         recv_delayprops = extract_delay_props(recvprops);
4679                         (void) zfs_set_prop_nvlist(tofs, ZPROP_SRC_RECEIVED,
4680                             recvprops, *errors);
4681                 }
4682         }
4683 
4684         if (localprops != NULL) {
4685                 nvlist_t *oprops = fnvlist_alloc();
4686                 nvlist_t *xprops = fnvlist_alloc();
4687                 nvpair_t *nvp = NULL;
4688 
4689                 while ((nvp = nvlist_next_nvpair(localprops, nvp)) != NULL) {
4690                         if (nvpair_type(nvp) == DATA_TYPE_BOOLEAN) {
4691                                 /* -x property */
4692                                 const char *name = nvpair_name(nvp);
4693                                 zfs_prop_t prop = zfs_name_to_prop(name);
4694                                 if (prop != ZPROP_INVAL) {
4695                                         if (!zfs_prop_inheritable(prop))
4696                                                 continue;
4697                                 } else if (!zfs_prop_user(name))
4698                                         continue;
4699                                 fnvlist_add_boolean(xprops, name);
4700                         } else {
4701                                 /* -o property=value */
4702                                 fnvlist_add_nvpair(oprops, nvp);
4703                         }
4704                 }
4705 
4706                 local_delayprops = extract_delay_props(oprops);
4707                 (void) zfs_set_prop_nvlist(tofs, ZPROP_SRC_LOCAL,
4708                     oprops, *errors);
4709                 (void) zfs_set_prop_nvlist(tofs, ZPROP_SRC_INHERITED,
4710                     xprops, *errors);
4711 
4712                 nvlist_free(oprops);
4713                 nvlist_free(xprops);
4714         }
4715 
4716         off = input_fp->f_offset;
4717         error = dmu_recv_stream(&drc, input_fp->f_vnode, &off, cleanup_fd,
4718             action_handle);
4719 
4720         if (error == 0) {
4721                 zfsvfs_t *zfsvfs = NULL;
4722 
4723                 if (getzfsvfs(tofs, &zfsvfs) == 0) {
4724                         /* online recv */
4725                         dsl_dataset_t *ds;
4726                         int end_err;
4727 
4728                         ds = dmu_objset_ds(zfsvfs->z_os);
4729                         error = zfs_suspend_fs(zfsvfs);
4730                         /*
4731                          * If the suspend fails, then the recv_end will
4732                          * likely also fail, and clean up after itself.
4733                          */
4734                         end_err = dmu_recv_end(&drc, zfsvfs);
4735                         if (error == 0)
4736                                 error = zfs_resume_fs(zfsvfs, ds);
4737                         error = error ? error : end_err;
4738                         VFS_RELE(zfsvfs->z_vfs);
4739                 } else {
4740                         error = dmu_recv_end(&drc, NULL);
4741                 }
4742 
4743                 /* Set delayed properties now, after we're done receiving. */
4744                 if (recv_delayprops != NULL && error == 0) {
4745                         (void) zfs_set_prop_nvlist(tofs, ZPROP_SRC_RECEIVED,
4746                             recv_delayprops, *errors);
4747                 }
4748                 if (local_delayprops != NULL && error == 0) {
4749                         (void) zfs_set_prop_nvlist(tofs, ZPROP_SRC_LOCAL,
4750                             local_delayprops, *errors);
4751                 }
4752         }
4753 
4754         /*
4755          * Merge delayed props back in with initial props, in case
4756          * we're DEBUG and zfs_ioc_recv_inject_err is set (which means
4757          * we have to make sure clear_received_props() includes
4758          * the delayed properties).
4759          *
4760          * Since zfs_ioc_recv_inject_err is only in DEBUG kernels,
4761          * using ASSERT() will be just like a VERIFY.
4762          */
4763         if (recv_delayprops != NULL) {
4764                 ASSERT(nvlist_merge(recvprops, recv_delayprops, 0) == 0);
4765                 nvlist_free(recv_delayprops);
4766         }
4767         if (local_delayprops != NULL) {
4768                 ASSERT(nvlist_merge(localprops, local_delayprops, 0) == 0);
4769                 nvlist_free(local_delayprops);
4770         }
4771 
4772         *read_bytes = off - input_fp->f_offset;
4773         if (VOP_SEEK(input_fp->f_vnode, input_fp->f_offset, &off, NULL) == 0)
4774                 input_fp->f_offset = off;
4775 
4776 #ifdef  DEBUG
4777         if (zfs_ioc_recv_inject_err) {
4778                 zfs_ioc_recv_inject_err = B_FALSE;
4779                 error = 1;
4780         }
4781 #endif
4782 
4783         /*
4784          * On error, restore the original props.
4785          */
4786         if (error != 0 && recvprops != NULL && !drc.drc_newfs) {
4787                 if (clear_received_props(tofs, recvprops, NULL) != 0) {
4788                         /*
4789                          * We failed to clear the received properties.
4790                          * Since we may have left a $recvd value on the
4791                          * system, we can't clear the $hasrecvd flag.
4792                          */
4793                         *errflags |= ZPROP_ERR_NORESTORE;
4794                 } else if (first_recvd_props) {
4795                         dsl_prop_unset_hasrecvd(tofs);
4796                 }
4797 
4798                 if (origrecvd == NULL && !drc.drc_newfs) {
4799                         /* We failed to stash the original properties. */
4800                         *errflags |= ZPROP_ERR_NORESTORE;
4801                 }
4802 
4803                 /*
4804                  * dsl_props_set() will not convert RECEIVED to LOCAL on or
4805                  * after SPA_VERSION_RECVD_PROPS, so we need to specify LOCAL
4806                  * explicitly if we're restoring local properties cleared in the
4807                  * first new-style receive.
4808                  */
4809                 if (origrecvd != NULL &&
4810                     zfs_set_prop_nvlist(tofs, (first_recvd_props ?
4811                     ZPROP_SRC_LOCAL : ZPROP_SRC_RECEIVED),
4812                     origrecvd, NULL) != 0) {
4813                         /*
4814                          * We stashed the original properties but failed to
4815                          * restore them.
4816                          */
4817                         *errflags |= ZPROP_ERR_NORESTORE;
4818                 }
4819         }
4820         if (error != 0 && localprops != NULL && !drc.drc_newfs &&
4821             !first_recvd_props) {
4822                 nvlist_t *setprops;
4823                 nvlist_t *inheritprops;
4824                 nvpair_t *nvp;
4825 
4826                 if (origprops == NULL) {
4827                         /* We failed to stash the original properties. */
4828                         *errflags |= ZPROP_ERR_NORESTORE;
4829                         goto out;
4830                 }
4831 
4832                 /* Restore original props */
4833                 setprops = fnvlist_alloc();
4834                 inheritprops = fnvlist_alloc();
4835                 nvp = NULL;
4836                 while ((nvp = nvlist_next_nvpair(localprops, nvp)) != NULL) {
4837                         const char *name = nvpair_name(nvp);
4838                         const char *source;
4839                         nvlist_t *attrs;
4840 
4841                         if (!nvlist_exists(origprops, name)) {
4842                                 /*
4843                                  * Property was not present or was explicitly
4844                                  * inherited before the receive, restore this.
4845                                  */
4846                                 fnvlist_add_boolean(inheritprops, name);
4847                                 continue;
4848                         }
4849                         attrs = fnvlist_lookup_nvlist(origprops, name);
4850                         source = fnvlist_lookup_string(attrs, ZPROP_SOURCE);
4851 
4852                         /* Skip received properties */
4853                         if (strcmp(source, ZPROP_SOURCE_VAL_RECVD) == 0)
4854                                 continue;
4855 
4856                         if (strcmp(source, tofs) == 0) {
4857                                 /* Property was locally set */
4858                                 fnvlist_add_nvlist(setprops, name, attrs);
4859                         } else {
4860                                 /* Property was implicitly inherited */
4861                                 fnvlist_add_boolean(inheritprops, name);
4862                         }
4863                 }
4864 
4865                 if (zfs_set_prop_nvlist(tofs, ZPROP_SRC_LOCAL, setprops,
4866                     NULL) != 0)
4867                         *errflags |= ZPROP_ERR_NORESTORE;
4868                 if (zfs_set_prop_nvlist(tofs, ZPROP_SRC_INHERITED, inheritprops,
4869                     NULL) != 0)
4870                         *errflags |= ZPROP_ERR_NORESTORE;
4871 
4872                 nvlist_free(setprops);
4873                 nvlist_free(inheritprops);
4874         }
4875 out:
4876         releasef(input_fd);
4877         nvlist_free(origrecvd);
4878         nvlist_free(origprops);
4879 
4880         if (error == 0)
4881                 error = props_error;
4882 
4883         return (error);
4884 }
4885 
4886 /*
4887  * inputs:
4888  * zc_name              name of containing filesystem
4889  * zc_nvlist_src{_size} nvlist of received properties to apply
4890  * zc_nvlist_conf{_size} nvlist of local properties to apply
4891  * zc_history_offset{_len} nvlist of hidden args { "wkeydata" -> value }
4892  * zc_value             name of snapshot to create
4893  * zc_string            name of clone origin (if DRR_FLAG_CLONE)
4894  * zc_cookie            file descriptor to recv from
4895  * zc_begin_record      the BEGIN record of the stream (not byteswapped)
4896  * zc_guid              force flag
4897  * zc_cleanup_fd        cleanup-on-exit file descriptor
4898  * zc_action_handle     handle for this guid/ds mapping (or zero on first call)
4899  * zc_resumable         if data is incomplete assume sender will resume
4900  *
4901  * outputs:
4902  * zc_cookie            number of bytes read
4903  * zc_nvlist_dst{_size} error for each unapplied received property
4904  * zc_obj               zprop_errflags_t
4905  * zc_action_handle     handle for this guid/ds mapping
4906  */
4907 static int
4908 zfs_ioc_recv(zfs_cmd_t *zc)
4909 {
4910         dmu_replay_record_t begin_record;
4911         nvlist_t *errors = NULL;
4912         nvlist_t *recvdprops = NULL;
4913         nvlist_t *localprops = NULL;
4914         nvlist_t *hidden_args = NULL;
4915         char *origin = NULL;
4916         char *tosnap;
4917         char tofs[ZFS_MAX_DATASET_NAME_LEN];
4918         int error = 0;
4919 
4920         if (dataset_namecheck(zc->zc_value, NULL, NULL) != 0 ||
4921             strchr(zc->zc_value, '@') == NULL ||
4922             strchr(zc->zc_value, '%'))
4923                 return (SET_ERROR(EINVAL));
4924 
4925         (void) strlcpy(tofs, zc->zc_value, sizeof (tofs));
4926         tosnap = strchr(tofs, '@');
4927         *tosnap++ = '\0';
4928 
4929         if (zc->zc_nvlist_src != 0 &&
4930             (error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
4931             zc->zc_iflags, &recvdprops)) != 0)
4932                 return (error);
4933 
4934         if (zc->zc_nvlist_conf != 0 &&
4935             (error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
4936             zc->zc_iflags, &localprops)) != 0)
4937                 return (error);
4938 
4939         if (zc->zc_history_offset != 0 &&
4940             (error = get_nvlist(zc->zc_history_offset, zc->zc_history_len,
4941             zc->zc_iflags, &hidden_args)) != 0)
4942                 return (error);
4943 
4944         if (zc->zc_string[0])
4945                 origin = zc->zc_string;
4946 
4947         begin_record.drr_type = DRR_BEGIN;
4948         begin_record.drr_payloadlen = zc->zc_begin_record.drr_payloadlen;
4949         begin_record.drr_u.drr_begin = zc->zc_begin_record.drr_u.drr_begin;
4950 
4951         error = zfs_ioc_recv_impl(tofs, tosnap, origin, recvdprops, localprops,
4952             hidden_args, zc->zc_guid, zc->zc_resumable, zc->zc_cookie,
4953             &begin_record, zc->zc_cleanup_fd, &zc->zc_cookie, &zc->zc_obj,
4954             &zc->zc_action_handle, &errors);
4955         nvlist_free(recvdprops);
4956         nvlist_free(localprops);
4957 
4958         /*
4959          * Now that all props, initial and delayed, are set, report the prop
4960          * errors to the caller.
4961          */
4962         if (zc->zc_nvlist_dst_size != 0 && errors != NULL &&
4963             (nvlist_smush(errors, zc->zc_nvlist_dst_size) != 0 ||
4964             put_nvlist(zc, errors) != 0)) {
4965                 /*
4966                  * Caller made zc->zc_nvlist_dst less than the minimum expected
4967                  * size or supplied an invalid address.
4968                  */
4969                 error = SET_ERROR(EINVAL);
4970         }
4971 
4972         nvlist_free(errors);
4973 
4974         return (error);
4975 }
4976 
4977 /*
4978  * inputs:
4979  * zc_name      name of snapshot to send
4980  * zc_cookie    file descriptor to send stream to
4981  * zc_obj       fromorigin flag (mutually exclusive with zc_fromobj)
4982  * zc_sendobj   objsetid of snapshot to send
4983  * zc_fromobj   objsetid of incremental fromsnap (may be zero)
4984  * zc_guid      if set, estimate size of stream only.  zc_cookie is ignored.
4985  *              output size in zc_objset_type.
4986  * zc_flags     lzc_send_flags
4987  *
4988  * outputs:
4989  * zc_objset_type       estimated size, if zc_guid is set
4990  */
4991 static int
4992 zfs_ioc_send(zfs_cmd_t *zc)
4993 {
4994         int error;
4995         offset_t off;
4996         boolean_t estimate = (zc->zc_guid != 0);
4997         boolean_t embedok = (zc->zc_flags & 0x1);
4998         boolean_t large_block_ok = (zc->zc_flags & 0x2);
4999         boolean_t compressok = (zc->zc_flags & 0x4);
5000         boolean_t rawok = (zc->zc_flags & 0x8);
5001 
5002         if (zc->zc_obj != 0) {
5003                 dsl_pool_t *dp;
5004                 dsl_dataset_t *tosnap;
5005 
5006                 error = dsl_pool_hold(zc->zc_name, FTAG, &dp);
5007                 if (error != 0)
5008                         return (error);
5009 
5010                 error = dsl_dataset_hold_obj(dp, zc->zc_sendobj, FTAG, &tosnap);
5011                 if (error != 0) {
5012                         dsl_pool_rele(dp, FTAG);
5013                         return (error);
5014                 }
5015 
5016                 if (dsl_dir_is_clone(tosnap->ds_dir))
5017                         zc->zc_fromobj =
5018                             dsl_dir_phys(tosnap->ds_dir)->dd_origin_obj;
5019                 dsl_dataset_rele(tosnap, FTAG);
5020                 dsl_pool_rele(dp, FTAG);
5021         }
5022 
5023         if (estimate) {
5024                 dsl_pool_t *dp;
5025                 dsl_dataset_t *tosnap;
5026                 dsl_dataset_t *fromsnap = NULL;
5027 
5028                 error = dsl_pool_hold(zc->zc_name, FTAG, &dp);
5029                 if (error != 0)
5030                         return (error);
5031 
5032                 error = dsl_dataset_hold_obj(dp, zc->zc_sendobj,
5033                     FTAG, &tosnap);
5034                 if (error != 0) {
5035                         dsl_pool_rele(dp, FTAG);
5036                         return (error);
5037                 }
5038 
5039                 if (zc->zc_fromobj != 0) {
5040                         error = dsl_dataset_hold_obj(dp, zc->zc_fromobj,
5041                             FTAG, &fromsnap);
5042                         if (error != 0) {
5043                                 dsl_dataset_rele(tosnap, FTAG);
5044                                 dsl_pool_rele(dp, FTAG);
5045                                 return (error);
5046                         }
5047                 }
5048 
5049                 error = dmu_send_estimate(tosnap, fromsnap, compressok || rawok,
5050                     &zc->zc_objset_type);
5051 
5052                 if (fromsnap != NULL)
5053                         dsl_dataset_rele(fromsnap, FTAG);
5054                 dsl_dataset_rele(tosnap, FTAG);
5055                 dsl_pool_rele(dp, FTAG);
5056         } else {
5057                 file_t *fp = getf(zc->zc_cookie);
5058                 if (fp == NULL)
5059                         return (SET_ERROR(EBADF));
5060 
5061                 off = fp->f_offset;
5062                 error = dmu_send_obj(zc->zc_name, zc->zc_sendobj,
5063                     zc->zc_fromobj, embedok, large_block_ok, compressok, rawok,
5064                     zc->zc_cookie, fp->f_vnode, &off);
5065 
5066                 if (VOP_SEEK(fp->f_vnode, fp->f_offset, &off, NULL) == 0)
5067                         fp->f_offset = off;
5068                 releasef(zc->zc_cookie);
5069         }
5070         return (error);
5071 }
5072 
5073 /*
5074  * inputs:
5075  * zc_name      name of snapshot on which to report progress
5076  * zc_cookie    file descriptor of send stream
5077  *
5078  * outputs:
5079  * zc_cookie    number of bytes written in send stream thus far
5080  */
5081 static int
5082 zfs_ioc_send_progress(zfs_cmd_t *zc)
5083 {
5084         dsl_pool_t *dp;
5085         dsl_dataset_t *ds;
5086         dmu_sendarg_t *dsp = NULL;
5087         int error;
5088 
5089         error = dsl_pool_hold(zc->zc_name, FTAG, &dp);
5090         if (error != 0)
5091                 return (error);
5092 
5093         error = dsl_dataset_hold(dp, zc->zc_name, FTAG, &ds);
5094         if (error != 0) {
5095                 dsl_pool_rele(dp, FTAG);
5096                 return (error);
5097         }
5098 
5099         mutex_enter(&ds->ds_sendstream_lock);
5100 
5101         /*
5102          * Iterate over all the send streams currently active on this dataset.
5103          * If there's one which matches the specified file descriptor _and_ the
5104          * stream was started by the current process, return the progress of
5105          * that stream.
5106          */
5107         for (dsp = list_head(&ds->ds_sendstreams); dsp != NULL;
5108             dsp = list_next(&ds->ds_sendstreams, dsp)) {
5109                 if (dsp->dsa_outfd == zc->zc_cookie &&
5110                     dsp->dsa_proc == curproc)
5111                         break;
5112         }
5113 
5114         if (dsp != NULL)
5115                 zc->zc_cookie = *(dsp->dsa_off);
5116         else
5117                 error = SET_ERROR(ENOENT);
5118 
5119         mutex_exit(&ds->ds_sendstream_lock);
5120         dsl_dataset_rele(ds, FTAG);
5121         dsl_pool_rele(dp, FTAG);
5122         return (error);
5123 }
5124 
5125 static int
5126 zfs_ioc_inject_fault(zfs_cmd_t *zc)
5127 {
5128         int id, error;
5129 
5130         error = zio_inject_fault(zc->zc_name, (int)zc->zc_guid, &id,
5131             &zc->zc_inject_record);
5132 
5133         if (error == 0)
5134                 zc->zc_guid = (uint64_t)id;
5135 
5136         return (error);
5137 }
5138 
5139 static int
5140 zfs_ioc_clear_fault(zfs_cmd_t *zc)
5141 {
5142         return (zio_clear_fault((int)zc->zc_guid));
5143 }
5144 
5145 static int
5146 zfs_ioc_inject_list_next(zfs_cmd_t *zc)
5147 {
5148         int id = (int)zc->zc_guid;
5149         int error;
5150 
5151         error = zio_inject_list_next(&id, zc->zc_name, sizeof (zc->zc_name),
5152             &zc->zc_inject_record);
5153 
5154         zc->zc_guid = id;
5155 
5156         return (error);
5157 }
5158 
5159 static int
5160 zfs_ioc_error_log(zfs_cmd_t *zc)
5161 {
5162         spa_t *spa;
5163         int error;
5164         size_t count = (size_t)zc->zc_nvlist_dst_size;
5165 
5166         if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
5167                 return (error);
5168 
5169         error = spa_get_errlog(spa, (void *)(uintptr_t)zc->zc_nvlist_dst,
5170             &count);
5171         if (error == 0)
5172                 zc->zc_nvlist_dst_size = count;
5173         else
5174                 zc->zc_nvlist_dst_size = spa_get_errlog_size(spa);
5175 
5176         spa_close(spa, FTAG);
5177 
5178         return (error);
5179 }
5180 
5181 static int
5182 zfs_ioc_clear(zfs_cmd_t *zc)
5183 {
5184         spa_t *spa;
5185         vdev_t *vd;
5186         int error;
5187 
5188         /*
5189          * On zpool clear we also fix up missing slogs
5190          */
5191         mutex_enter(&spa_namespace_lock);
5192         spa = spa_lookup(zc->zc_name);
5193         if (spa == NULL) {
5194                 mutex_exit(&spa_namespace_lock);
5195                 return (SET_ERROR(EIO));
5196         }
5197         if (spa_get_log_state(spa) == SPA_LOG_MISSING) {
5198                 /* we need to let spa_open/spa_load clear the chains */
5199                 spa_set_log_state(spa, SPA_LOG_CLEAR);
5200         }
5201         spa->spa_last_open_failed = 0;
5202         mutex_exit(&spa_namespace_lock);
5203 
5204         if (zc->zc_cookie & ZPOOL_NO_REWIND) {
5205                 error = spa_open(zc->zc_name, &spa, FTAG);
5206         } else {
5207                 nvlist_t *policy;
5208                 nvlist_t *config = NULL;
5209 
5210                 if (zc->zc_nvlist_src == 0)
5211                         return (SET_ERROR(EINVAL));
5212 
5213                 if ((error = get_nvlist(zc->zc_nvlist_src,
5214                     zc->zc_nvlist_src_size, zc->zc_iflags, &policy)) == 0) {
5215                         error = spa_open_rewind(zc->zc_name, &spa, FTAG,
5216                             policy, &config);
5217                         if (config != NULL) {
5218                                 int err;
5219 
5220                                 if ((err = put_nvlist(zc, config)) != 0)
5221                                         error = err;
5222                                 nvlist_free(config);
5223                         }
5224                         nvlist_free(policy);
5225                 }
5226         }
5227 
5228         if (error != 0)
5229                 return (error);
5230 
5231         /*
5232          * If multihost is enabled, resuming I/O is unsafe as another
5233          * host may have imported the pool.
5234          */
5235         if (spa_multihost(spa) && spa_suspended(spa))
5236                 return (SET_ERROR(EINVAL));
5237 
5238         spa_vdev_state_enter(spa, SCL_NONE);
5239 
5240         if (zc->zc_guid == 0) {
5241                 vd = NULL;
5242         } else {
5243                 vd = spa_lookup_by_guid(spa, zc->zc_guid, B_TRUE);
5244                 if (vd == NULL) {
5245                         (void) spa_vdev_state_exit(spa, NULL, ENODEV);
5246                         spa_close(spa, FTAG);
5247                         return (SET_ERROR(ENODEV));
5248                 }
5249         }
5250 
5251         vdev_clear(spa, vd);
5252 
5253         (void) spa_vdev_state_exit(spa, NULL, 0);
5254 
5255         /*
5256          * Resume any suspended I/Os.
5257          */
5258         if (zio_resume(spa) != 0)
5259                 error = SET_ERROR(EIO);
5260 
5261         spa_close(spa, FTAG);
5262 
5263         return (error);
5264 }
5265 
5266 static int
5267 zfs_ioc_pool_reopen(zfs_cmd_t *zc)
5268 {
5269         spa_t *spa;
5270         int error;
5271 
5272         error = spa_open(zc->zc_name, &spa, FTAG);
5273         if (error != 0)
5274                 return (error);
5275 
5276         spa_vdev_state_enter(spa, SCL_NONE);
5277 
5278         /*
5279          * If a resilver is already in progress then set the
5280          * spa_scrub_reopen flag to B_TRUE so that we don't restart
5281          * the scan as a side effect of the reopen. Otherwise, let
5282          * vdev_open() decided if a resilver is required.
5283          */
5284         spa->spa_scrub_reopen = dsl_scan_resilvering(spa->spa_dsl_pool);
5285         vdev_reopen(spa->spa_root_vdev);
5286         spa->spa_scrub_reopen = B_FALSE;
5287 
5288         (void) spa_vdev_state_exit(spa, NULL, 0);
5289         spa_close(spa, FTAG);
5290         return (0);
5291 }
5292 /*
5293  * inputs:
5294  * zc_name      name of filesystem
5295  *
5296  * outputs:
5297  * zc_string    name of conflicting snapshot, if there is one
5298  */
5299 static int
5300 zfs_ioc_promote(zfs_cmd_t *zc)
5301 {
5302         dsl_pool_t *dp;
5303         dsl_dataset_t *ds, *ods;
5304         char origin[ZFS_MAX_DATASET_NAME_LEN];
5305         char *cp;
5306         int error;
5307 
5308         zc->zc_name[sizeof (zc->zc_name) - 1] = '\0';
5309         if (dataset_namecheck(zc->zc_name, NULL, NULL) != 0 ||
5310             strchr(zc->zc_name, '%'))
5311                 return (SET_ERROR(EINVAL));
5312 
5313         error = dsl_pool_hold(zc->zc_name, FTAG, &dp);
5314         if (error != 0)
5315                 return (error);
5316 
5317         error = dsl_dataset_hold(dp, zc->zc_name, FTAG, &ds);
5318         if (error != 0) {
5319                 dsl_pool_rele(dp, FTAG);
5320                 return (error);
5321         }
5322 
5323         if (!dsl_dir_is_clone(ds->ds_dir)) {
5324                 dsl_dataset_rele(ds, FTAG);
5325                 dsl_pool_rele(dp, FTAG);
5326                 return (SET_ERROR(EINVAL));
5327         }
5328 
5329         error = dsl_dataset_hold_obj(dp,
5330             dsl_dir_phys(ds->ds_dir)->dd_origin_obj, FTAG, &ods);
5331         if (error != 0) {
5332                 dsl_dataset_rele(ds, FTAG);
5333                 dsl_pool_rele(dp, FTAG);
5334                 return (error);
5335         }
5336 
5337         dsl_dataset_name(ods, origin);
5338         dsl_dataset_rele(ods, FTAG);
5339         dsl_dataset_rele(ds, FTAG);
5340         dsl_pool_rele(dp, FTAG);
5341 
5342         /*
5343          * We don't need to unmount *all* the origin fs's snapshots, but
5344          * it's easier.
5345          */
5346         cp = strchr(origin, '@');
5347         if (cp)
5348                 *cp = '\0';
5349         (void) dmu_objset_find(origin,
5350             zfs_unmount_snap_cb, NULL, DS_FIND_SNAPSHOTS);
5351         return (dsl_dataset_promote(zc->zc_name, zc->zc_string));
5352 }
5353 
5354 /*
5355  * Retrieve a single {user|group|project}{used|quota}@... property.
5356  *
5357  * inputs:
5358  * zc_name      name of filesystem
5359  * zc_objset_type zfs_userquota_prop_t
5360  * zc_value     domain name (eg. "S-1-234-567-89")
5361  * zc_guid      RID/UID/GID
5362  *
5363  * outputs:
5364  * zc_cookie    property value
5365  */
5366 static int
5367 zfs_ioc_userspace_one(zfs_cmd_t *zc)
5368 {
5369         zfsvfs_t *zfsvfs;
5370         int error;
5371 
5372         if (zc->zc_objset_type >= ZFS_NUM_USERQUOTA_PROPS)
5373                 return (SET_ERROR(EINVAL));
5374 
5375         error = zfsvfs_hold(zc->zc_name, FTAG, &zfsvfs, B_FALSE);
5376         if (error != 0)
5377                 return (error);
5378 
5379         error = zfs_userspace_one(zfsvfs,
5380             zc->zc_objset_type, zc->zc_value, zc->zc_guid, &zc->zc_cookie);
5381         zfsvfs_rele(zfsvfs, FTAG);
5382 
5383         return (error);
5384 }
5385 
5386 /*
5387  * inputs:
5388  * zc_name              name of filesystem
5389  * zc_cookie            zap cursor
5390  * zc_objset_type       zfs_userquota_prop_t
5391  * zc_nvlist_dst[_size] buffer to fill (not really an nvlist)
5392  *
5393  * outputs:
5394  * zc_nvlist_dst[_size] data buffer (array of zfs_useracct_t)
5395  * zc_cookie    zap cursor
5396  */
5397 static int
5398 zfs_ioc_userspace_many(zfs_cmd_t *zc)
5399 {
5400         zfsvfs_t *zfsvfs;
5401         int bufsize = zc->zc_nvlist_dst_size;
5402 
5403         if (bufsize <= 0)
5404                 return (SET_ERROR(ENOMEM));
5405 
5406         int error = zfsvfs_hold(zc->zc_name, FTAG, &zfsvfs, B_FALSE);
5407         if (error != 0)
5408                 return (error);
5409 
5410         void *buf = kmem_alloc(bufsize, KM_SLEEP);
5411 
5412         error = zfs_userspace_many(zfsvfs, zc->zc_objset_type, &zc->zc_cookie,
5413             buf, &zc->zc_nvlist_dst_size);
5414 
5415         if (error == 0) {
5416                 error = xcopyout(buf,
5417                     (void *)(uintptr_t)zc->zc_nvlist_dst,
5418                     zc->zc_nvlist_dst_size);
5419         }
5420         kmem_free(buf, bufsize);
5421         zfsvfs_rele(zfsvfs, FTAG);
5422 
5423         return (error);
5424 }
5425 
5426 /*
5427  * inputs:
5428  * zc_name              name of filesystem
5429  *
5430  * outputs:
5431  * none
5432  */
5433 static int
5434 zfs_ioc_userspace_upgrade(zfs_cmd_t *zc)
5435 {
5436         objset_t *os;
5437         int error = 0;
5438         zfsvfs_t *zfsvfs;
5439 
5440         if (getzfsvfs(zc->zc_name, &zfsvfs) == 0) {
5441                 if (!dmu_objset_userused_enabled(zfsvfs->z_os)) {
5442                         /*
5443                          * If userused is not enabled, it may be because the
5444                          * objset needs to be closed & reopened (to grow the
5445                          * objset_phys_t).  Suspend/resume the fs will do that.
5446                          */
5447                         dsl_dataset_t *ds, *newds;
5448 
5449                         ds = dmu_objset_ds(zfsvfs->z_os);
5450                         error = zfs_suspend_fs(zfsvfs);
5451                         if (error == 0) {
5452                                 dmu_objset_refresh_ownership(ds, &newds,
5453                                     B_TRUE, zfsvfs);
5454                                 error = zfs_resume_fs(zfsvfs, newds);
5455                         }
5456                 }
5457                 if (error == 0)
5458                         error = dmu_objset_userspace_upgrade(zfsvfs->z_os);
5459                 VFS_RELE(zfsvfs->z_vfs);
5460         } else {
5461                 /* XXX kind of reading contents without owning */
5462                 error = dmu_objset_hold_flags(zc->zc_name, B_TRUE, FTAG, &os);
5463                 if (error != 0)
5464                         return (error);
5465 
5466                 error = dmu_objset_userspace_upgrade(os);
5467                 dmu_objset_rele_flags(os, B_TRUE, FTAG);
5468         }
5469 
5470         return (error);
5471 }
5472 
5473 /*
5474  * inputs:
5475  * zc_name              name of filesystem
5476  *
5477  * outputs:
5478  * none
5479  */
5480 static int
5481 zfs_ioc_id_quota_upgrade(zfs_cmd_t *zc)
5482 {
5483         objset_t *os;
5484         int error;
5485 
5486         error = dmu_objset_hold(zc->zc_name, FTAG, &os);
5487         if (error != 0)
5488                 return (error);
5489 
5490         dsl_dataset_long_hold(dmu_objset_ds(os), FTAG);
5491         dsl_pool_rele(dmu_objset_pool(os), FTAG);
5492 
5493         if (dmu_objset_userobjspace_upgradable(os) ||
5494             dmu_objset_projectquota_upgradable(os)) {
5495                 mutex_enter(&os->os_upgrade_lock);
5496                 if (os->os_upgrade_id == 0) {
5497                         /* clear potential error code and retry */
5498                         os->os_upgrade_status = 0;
5499                         mutex_exit(&os->os_upgrade_lock);
5500 
5501                         dmu_objset_id_quota_upgrade(os);
5502                 } else {
5503                         mutex_exit(&os->os_upgrade_lock);
5504                 }
5505 
5506                 taskq_wait_id(os->os_spa->spa_upgrade_taskq, os->os_upgrade_id);
5507                 error = os->os_upgrade_status;
5508         }
5509 
5510         dsl_dataset_long_rele(dmu_objset_ds(os), FTAG);
5511         dsl_dataset_rele(dmu_objset_ds(os), FTAG);
5512 
5513         return (error);
5514 }
5515 
5516 /*
5517  * We don't want to have a hard dependency
5518  * against some special symbols in sharefs
5519  * nfs, and smbsrv.  Determine them if needed when
5520  * the first file system is shared.
5521  * Neither sharefs, nfs or smbsrv are unloadable modules.
5522  */
5523 int (*znfsexport_fs)(void *arg);
5524 int (*zshare_fs)(enum sharefs_sys_op, share_t *, uint32_t);
5525 int (*zsmbexport_fs)(void *arg, boolean_t add_share);
5526 
5527 int zfs_nfsshare_inited;
5528 int zfs_smbshare_inited;
5529 
5530 ddi_modhandle_t nfs_mod;
5531 ddi_modhandle_t sharefs_mod;
5532 ddi_modhandle_t smbsrv_mod;
5533 kmutex_t zfs_share_lock;
5534 
5535 static int
5536 zfs_init_sharefs()
5537 {
5538         int error;
5539 
5540         ASSERT(MUTEX_HELD(&zfs_share_lock));
5541         /* Both NFS and SMB shares also require sharetab support. */
5542         if (sharefs_mod == NULL && ((sharefs_mod =
5543             ddi_modopen("fs/sharefs",
5544             KRTLD_MODE_FIRST, &error)) == NULL)) {
5545                 return (SET_ERROR(ENOSYS));
5546         }
5547         if (zshare_fs == NULL && ((zshare_fs =
5548             (int (*)(enum sharefs_sys_op, share_t *, uint32_t))
5549             ddi_modsym(sharefs_mod, "sharefs_impl", &error)) == NULL)) {
5550                 return (SET_ERROR(ENOSYS));
5551         }
5552         return (0);
5553 }
5554 
5555 static int
5556 zfs_ioc_share(zfs_cmd_t *zc)
5557 {
5558         int error;
5559         int opcode;
5560 
5561         switch (zc->zc_share.z_sharetype) {
5562         case ZFS_SHARE_NFS:
5563         case ZFS_UNSHARE_NFS:
5564                 if (zfs_nfsshare_inited == 0) {
5565                         mutex_enter(&zfs_share_lock);
5566                         if (nfs_mod == NULL && ((nfs_mod = ddi_modopen("fs/nfs",
5567                             KRTLD_MODE_FIRST, &error)) == NULL)) {
5568                                 mutex_exit(&zfs_share_lock);
5569                                 return (SET_ERROR(ENOSYS));
5570                         }
5571                         if (znfsexport_fs == NULL &&
5572                             ((znfsexport_fs = (int (*)(void *))
5573                             ddi_modsym(nfs_mod,
5574                             "nfs_export", &error)) == NULL)) {
5575                                 mutex_exit(&zfs_share_lock);
5576                                 return (SET_ERROR(ENOSYS));
5577                         }
5578                         error = zfs_init_sharefs();
5579                         if (error != 0) {
5580                                 mutex_exit(&zfs_share_lock);
5581                                 return (SET_ERROR(ENOSYS));
5582                         }
5583                         zfs_nfsshare_inited = 1;
5584                         mutex_exit(&zfs_share_lock);
5585                 }
5586                 break;
5587         case ZFS_SHARE_SMB:
5588         case ZFS_UNSHARE_SMB:
5589                 if (zfs_smbshare_inited == 0) {
5590                         mutex_enter(&zfs_share_lock);
5591                         if (smbsrv_mod == NULL && ((smbsrv_mod =
5592                             ddi_modopen("drv/smbsrv",
5593                             KRTLD_MODE_FIRST, &error)) == NULL)) {
5594                                 mutex_exit(&zfs_share_lock);
5595                                 return (SET_ERROR(ENOSYS));
5596                         }
5597                         if (zsmbexport_fs == NULL && ((zsmbexport_fs =
5598                             (int (*)(void *, boolean_t))ddi_modsym(smbsrv_mod,
5599                             "smb_server_share", &error)) == NULL)) {
5600                                 mutex_exit(&zfs_share_lock);
5601                                 return (SET_ERROR(ENOSYS));
5602                         }
5603                         error = zfs_init_sharefs();
5604                         if (error != 0) {
5605                                 mutex_exit(&zfs_share_lock);
5606                                 return (SET_ERROR(ENOSYS));
5607                         }
5608                         zfs_smbshare_inited = 1;
5609                         mutex_exit(&zfs_share_lock);
5610                 }
5611                 break;
5612         default:
5613                 return (SET_ERROR(EINVAL));
5614         }
5615 
5616         switch (zc->zc_share.z_sharetype) {
5617         case ZFS_SHARE_NFS:
5618         case ZFS_UNSHARE_NFS:
5619                 if (error =
5620                     znfsexport_fs((void *)
5621                     (uintptr_t)zc->zc_share.z_exportdata))
5622                         return (error);
5623                 break;
5624         case ZFS_SHARE_SMB:
5625         case ZFS_UNSHARE_SMB:
5626                 if (error = zsmbexport_fs((void *)
5627                     (uintptr_t)zc->zc_share.z_exportdata,
5628                     zc->zc_share.z_sharetype == ZFS_SHARE_SMB ?
5629                     B_TRUE: B_FALSE)) {
5630                         return (error);
5631                 }
5632                 break;
5633         }
5634 
5635         opcode = (zc->zc_share.z_sharetype == ZFS_SHARE_NFS ||
5636             zc->zc_share.z_sharetype == ZFS_SHARE_SMB) ?
5637             SHAREFS_ADD : SHAREFS_REMOVE;
5638 
5639         /*
5640          * Add or remove share from sharetab
5641          */
5642         error = zshare_fs(opcode,
5643             (void *)(uintptr_t)zc->zc_share.z_sharedata,
5644             zc->zc_share.z_sharemax);
5645 
5646         return (error);
5647 
5648 }
5649 
5650 ace_t full_access[] = {
5651         {(uid_t)-1, ACE_ALL_PERMS, ACE_EVERYONE, 0}
5652 };
5653 
5654 /*
5655  * inputs:
5656  * zc_name              name of containing filesystem
5657  * zc_obj               object # beyond which we want next in-use object #
5658  *
5659  * outputs:
5660  * zc_obj               next in-use object #
5661  */
5662 static int
5663 zfs_ioc_next_obj(zfs_cmd_t *zc)
5664 {
5665         objset_t *os = NULL;
5666         int error;
5667 
5668         error = dmu_objset_hold(zc->zc_name, FTAG, &os);
5669         if (error != 0)
5670                 return (error);
5671 
5672         error = dmu_object_next(os, &zc->zc_obj, B_FALSE,
5673             dsl_dataset_phys(os->os_dsl_dataset)->ds_prev_snap_txg);
5674 
5675         dmu_objset_rele(os, FTAG);
5676         return (error);
5677 }
5678 
5679 /*
5680  * inputs:
5681  * zc_name              name of filesystem
5682  * zc_value             prefix name for snapshot
5683  * zc_cleanup_fd        cleanup-on-exit file descriptor for calling process
5684  *
5685  * outputs:
5686  * zc_value             short name of new snapshot
5687  */
5688 static int
5689 zfs_ioc_tmp_snapshot(zfs_cmd_t *zc)
5690 {
5691         char *snap_name;
5692         char *hold_name;
5693         int error;
5694         minor_t minor;
5695 
5696         error = zfs_onexit_fd_hold(zc->zc_cleanup_fd, &minor);
5697         if (error != 0)
5698                 return (error);
5699 
5700         snap_name = kmem_asprintf("%s-%016llx", zc->zc_value,
5701             (u_longlong_t)ddi_get_lbolt64());
5702         hold_name = kmem_asprintf("%%%s", zc->zc_value);
5703 
5704         error = dsl_dataset_snapshot_tmp(zc->zc_name, snap_name, minor,
5705             hold_name);
5706         if (error == 0)
5707                 (void) strcpy(zc->zc_value, snap_name);
5708         strfree(snap_name);
5709         strfree(hold_name);
5710         zfs_onexit_fd_rele(zc->zc_cleanup_fd);
5711         return (error);
5712 }
5713 
5714 /*
5715  * inputs:
5716  * zc_name              name of "to" snapshot
5717  * zc_value             name of "from" snapshot
5718  * zc_cookie            file descriptor to write diff data on
5719  *
5720  * outputs:
5721  * dmu_diff_record_t's to the file descriptor
5722  */
5723 static int
5724 zfs_ioc_diff(zfs_cmd_t *zc)
5725 {
5726         file_t *fp;
5727         offset_t off;
5728         int error;
5729 
5730         fp = getf(zc->zc_cookie);
5731         if (fp == NULL)
5732                 return (SET_ERROR(EBADF));
5733 
5734         off = fp->f_offset;
5735 
5736         error = dmu_diff(zc->zc_name, zc->zc_value, fp->f_vnode, &off);
5737 
5738         if (VOP_SEEK(fp->f_vnode, fp->f_offset, &off, NULL) == 0)
5739                 fp->f_offset = off;
5740         releasef(zc->zc_cookie);
5741 
5742         return (error);
5743 }
5744 
5745 /*
5746  * Remove all ACL files in shares dir
5747  */
5748 static int
5749 zfs_smb_acl_purge(znode_t *dzp)
5750 {
5751         zap_cursor_t    zc;
5752         zap_attribute_t zap;
5753         zfsvfs_t *zfsvfs = dzp->z_zfsvfs;
5754         int error;
5755 
5756         for (zap_cursor_init(&zc, zfsvfs->z_os, dzp->z_id);
5757             (error = zap_cursor_retrieve(&zc, &zap)) == 0;
5758             zap_cursor_advance(&zc)) {
5759                 if ((error = VOP_REMOVE(ZTOV(dzp), zap.za_name, kcred,
5760                     NULL, 0)) != 0)
5761                         break;
5762         }
5763         zap_cursor_fini(&zc);
5764         return (error);
5765 }
5766 
5767 static int
5768 zfs_ioc_smb_acl(zfs_cmd_t *zc)
5769 {
5770         vnode_t *vp;
5771         znode_t *dzp;
5772         vnode_t *resourcevp = NULL;
5773         znode_t *sharedir;
5774         zfsvfs_t *zfsvfs;
5775         nvlist_t *nvlist;
5776         char *src, *target;
5777         vattr_t vattr;
5778         vsecattr_t vsec;
5779         int error = 0;
5780 
5781         if ((error = lookupname(zc->zc_value, UIO_SYSSPACE,
5782             NO_FOLLOW, NULL, &vp)) != 0)
5783                 return (error);
5784 
5785         /* Now make sure mntpnt and dataset are ZFS */
5786 
5787         if (vp->v_vfsp->vfs_fstype != zfsfstype ||
5788             (strcmp((char *)refstr_value(vp->v_vfsp->vfs_resource),
5789             zc->zc_name) != 0)) {
5790                 VN_RELE(vp);
5791                 return (SET_ERROR(EINVAL));
5792         }
5793 
5794         dzp = VTOZ(vp);
5795         zfsvfs = dzp->z_zfsvfs;
5796         ZFS_ENTER(zfsvfs);
5797 
5798         /*
5799          * Create share dir if its missing.
5800          */
5801         mutex_enter(&zfsvfs->z_lock);
5802         if (zfsvfs->z_shares_dir == 0) {
5803                 dmu_tx_t *tx;
5804 
5805                 tx = dmu_tx_create(zfsvfs->z_os);
5806                 dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, TRUE,
5807                     ZFS_SHARES_DIR);
5808                 dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, FALSE, NULL);
5809                 error = dmu_tx_assign(tx, TXG_WAIT);
5810                 if (error != 0) {
5811                         dmu_tx_abort(tx);
5812                 } else {
5813                         error = zfs_create_share_dir(zfsvfs, tx);
5814                         dmu_tx_commit(tx);
5815                 }
5816                 if (error != 0) {
5817                         mutex_exit(&zfsvfs->z_lock);
5818                         VN_RELE(vp);
5819                         ZFS_EXIT(zfsvfs);
5820                         return (error);
5821                 }
5822         }
5823         mutex_exit(&zfsvfs->z_lock);
5824 
5825         ASSERT(zfsvfs->z_shares_dir);
5826         if ((error = zfs_zget(zfsvfs, zfsvfs->z_shares_dir, &sharedir)) != 0) {
5827                 VN_RELE(vp);
5828                 ZFS_EXIT(zfsvfs);
5829                 return (error);
5830         }
5831 
5832         switch (zc->zc_cookie) {
5833         case ZFS_SMB_ACL_ADD:
5834                 vattr.va_mask = AT_MODE|AT_UID|AT_GID|AT_TYPE;
5835                 vattr.va_type = VREG;
5836                 vattr.va_mode = S_IFREG|0777;
5837                 vattr.va_uid = 0;
5838                 vattr.va_gid = 0;
5839 
5840                 vsec.vsa_mask = VSA_ACE;
5841                 vsec.vsa_aclentp = &full_access;
5842                 vsec.vsa_aclentsz = sizeof (full_access);
5843                 vsec.vsa_aclcnt = 1;
5844 
5845                 error = VOP_CREATE(ZTOV(sharedir), zc->zc_string,
5846                     &vattr, EXCL, 0, &resourcevp, kcred, 0, NULL, &vsec);
5847                 if (resourcevp)
5848                         VN_RELE(resourcevp);
5849                 break;
5850 
5851         case ZFS_SMB_ACL_REMOVE:
5852                 error = VOP_REMOVE(ZTOV(sharedir), zc->zc_string, kcred,
5853                     NULL, 0);
5854                 break;
5855 
5856         case ZFS_SMB_ACL_RENAME:
5857                 if ((error = get_nvlist(zc->zc_nvlist_src,
5858                     zc->zc_nvlist_src_size, zc->zc_iflags, &nvlist)) != 0) {
5859                         VN_RELE(vp);
5860                         VN_RELE(ZTOV(sharedir));
5861                         ZFS_EXIT(zfsvfs);
5862                         return (error);
5863                 }
5864                 if (nvlist_lookup_string(nvlist, ZFS_SMB_ACL_SRC, &src) ||
5865                     nvlist_lookup_string(nvlist, ZFS_SMB_ACL_TARGET,
5866                     &target)) {
5867                         VN_RELE(vp);
5868                         VN_RELE(ZTOV(sharedir));
5869                         ZFS_EXIT(zfsvfs);
5870                         nvlist_free(nvlist);
5871                         return (error);
5872                 }
5873                 error = VOP_RENAME(ZTOV(sharedir), src, ZTOV(sharedir), target,
5874                     kcred, NULL, 0);
5875                 nvlist_free(nvlist);
5876                 break;
5877 
5878         case ZFS_SMB_ACL_PURGE:
5879                 error = zfs_smb_acl_purge(sharedir);
5880                 break;
5881 
5882         default:
5883                 error = SET_ERROR(EINVAL);
5884                 break;
5885         }
5886 
5887         VN_RELE(vp);
5888         VN_RELE(ZTOV(sharedir));
5889 
5890         ZFS_EXIT(zfsvfs);
5891 
5892         return (error);
5893 }
5894 
5895 /*
5896  * innvl: {
5897  *     "holds" -> { snapname -> holdname (string), ... }
5898  *     (optional) "cleanup_fd" -> fd (int32)
5899  * }
5900  *
5901  * outnvl: {
5902  *     snapname -> error value (int32)
5903  *     ...
5904  * }
5905  */
5906 /* ARGSUSED */
5907 static int
5908 zfs_ioc_hold(const char *pool, nvlist_t *args, nvlist_t *errlist)
5909 {
5910         nvpair_t *pair;
5911         nvlist_t *holds;
5912         int cleanup_fd = -1;
5913         int error;
5914         minor_t minor = 0;
5915 
5916         error = nvlist_lookup_nvlist(args, "holds", &holds);
5917         if (error != 0)
5918                 return (SET_ERROR(EINVAL));
5919 
5920         /* make sure the user didn't pass us any invalid (empty) tags */
5921         for (pair = nvlist_next_nvpair(holds, NULL); pair != NULL;
5922             pair = nvlist_next_nvpair(holds, pair)) {
5923                 char *htag;
5924 
5925                 error = nvpair_value_string(pair, &htag);
5926                 if (error != 0)
5927                         return (SET_ERROR(error));
5928 
5929                 if (strlen(htag) == 0)
5930                         return (SET_ERROR(EINVAL));
5931         }
5932 
5933         if (nvlist_lookup_int32(args, "cleanup_fd", &cleanup_fd) == 0) {
5934                 error = zfs_onexit_fd_hold(cleanup_fd, &minor);
5935                 if (error != 0)
5936                         return (error);
5937         }
5938 
5939         error = dsl_dataset_user_hold(holds, minor, errlist);
5940         if (minor != 0)
5941                 zfs_onexit_fd_rele(cleanup_fd);
5942         return (error);
5943 }
5944 
5945 /*
5946  * innvl is not used.
5947  *
5948  * outnvl: {
5949  *    holdname -> time added (uint64 seconds since epoch)
5950  *    ...
5951  * }
5952  */
5953 /* ARGSUSED */
5954 static int
5955 zfs_ioc_get_holds(const char *snapname, nvlist_t *args, nvlist_t *outnvl)
5956 {
5957         ASSERT3P(args, ==, NULL);
5958         return (dsl_dataset_get_holds(snapname, outnvl));
5959 }
5960 
5961 /*
5962  * innvl: {
5963  *     snapname -> { holdname, ... }
5964  *     ...
5965  * }
5966  *
5967  * outnvl: {
5968  *     snapname -> error value (int32)
5969  *     ...
5970  * }
5971  */
5972 /* ARGSUSED */
5973 static int
5974 zfs_ioc_release(const char *pool, nvlist_t *holds, nvlist_t *errlist)
5975 {
5976         return (dsl_dataset_user_release(holds, errlist));
5977 }
5978 
5979 /*
5980  * inputs:
5981  * zc_name              name of new filesystem or snapshot
5982  * zc_value             full name of old snapshot
5983  *
5984  * outputs:
5985  * zc_cookie            space in bytes
5986  * zc_objset_type       compressed space in bytes
5987  * zc_perm_action       uncompressed space in bytes
5988  */
5989 static int
5990 zfs_ioc_space_written(zfs_cmd_t *zc)
5991 {
5992         int error;
5993         dsl_pool_t *dp;
5994         dsl_dataset_t *new, *old;
5995 
5996         error = dsl_pool_hold(zc->zc_name, FTAG, &dp);
5997         if (error != 0)
5998                 return (error);
5999         error = dsl_dataset_hold(dp, zc->zc_name, FTAG, &new);
6000         if (error != 0) {
6001                 dsl_pool_rele(dp, FTAG);
6002                 return (error);
6003         }
6004         error = dsl_dataset_hold(dp, zc->zc_value, FTAG, &old);
6005         if (error != 0) {
6006                 dsl_dataset_rele(new, FTAG);
6007                 dsl_pool_rele(dp, FTAG);
6008                 return (error);
6009         }
6010 
6011         error = dsl_dataset_space_written(old, new, &zc->zc_cookie,
6012             &zc->zc_objset_type, &zc->zc_perm_action);
6013         dsl_dataset_rele(old, FTAG);
6014         dsl_dataset_rele(new, FTAG);
6015         dsl_pool_rele(dp, FTAG);
6016         return (error);
6017 }
6018 
6019 /*
6020  * innvl: {
6021  *     "firstsnap" -> snapshot name
6022  * }
6023  *
6024  * outnvl: {
6025  *     "used" -> space in bytes
6026  *     "compressed" -> compressed space in bytes
6027  *     "uncompressed" -> uncompressed space in bytes
6028  * }
6029  */
6030 static int
6031 zfs_ioc_space_snaps(const char *lastsnap, nvlist_t *innvl, nvlist_t *outnvl)
6032 {
6033         int error;
6034         dsl_pool_t *dp;
6035         dsl_dataset_t *new, *old;
6036         char *firstsnap;
6037         uint64_t used, comp, uncomp;
6038 
6039         if (nvlist_lookup_string(innvl, "firstsnap", &firstsnap) != 0)
6040                 return (SET_ERROR(EINVAL));
6041 
6042         error = dsl_pool_hold(lastsnap, FTAG, &dp);
6043         if (error != 0)
6044                 return (error);
6045 
6046         error = dsl_dataset_hold(dp, lastsnap, FTAG, &new);
6047         if (error == 0 && !new->ds_is_snapshot) {
6048                 dsl_dataset_rele(new, FTAG);
6049                 error = SET_ERROR(EINVAL);
6050         }
6051         if (error != 0) {
6052                 dsl_pool_rele(dp, FTAG);
6053                 return (error);
6054         }
6055         error = dsl_dataset_hold(dp, firstsnap, FTAG, &old);
6056         if (error == 0 && !old->ds_is_snapshot) {
6057                 dsl_dataset_rele(old, FTAG);
6058                 error = SET_ERROR(EINVAL);
6059         }
6060         if (error != 0) {
6061                 dsl_dataset_rele(new, FTAG);
6062                 dsl_pool_rele(dp, FTAG);
6063                 return (error);
6064         }
6065 
6066         error = dsl_dataset_space_wouldfree(old, new, &used, &comp, &uncomp);
6067         dsl_dataset_rele(old, FTAG);
6068         dsl_dataset_rele(new, FTAG);
6069         dsl_pool_rele(dp, FTAG);
6070         fnvlist_add_uint64(outnvl, "used", used);
6071         fnvlist_add_uint64(outnvl, "compressed", comp);
6072         fnvlist_add_uint64(outnvl, "uncompressed", uncomp);
6073         return (error);
6074 }
6075 
6076 /*
6077  * innvl: {
6078  *     "fd" -> file descriptor to write stream to (int32)
6079  *     (optional) "fromsnap" -> full snap name to send an incremental from
6080  *     (optional) "largeblockok" -> (value ignored)
6081  *         indicates that blocks > 128KB are permitted
6082  *     (optional) "embedok" -> (value ignored)
6083  *         presence indicates DRR_WRITE_EMBEDDED records are permitted
6084  *     (optional) "compressok" -> (value ignored)
6085  *         presence indicates compressed DRR_WRITE records are permitted
6086  *     (optional) "rawok" -> (value ignored)
6087  *         presence indicates raw encrypted records should be used.
6088  *     (optional) "resume_object" and "resume_offset" -> (uint64)
6089  *         if present, resume send stream from specified object and offset.
6090  * }
6091  *
6092  * outnvl is unused
6093  */
6094 /* ARGSUSED */
6095 static int
6096 zfs_ioc_send_new(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl)
6097 {
6098         int error;
6099         offset_t off;
6100         char *fromname = NULL;
6101         int fd;
6102         boolean_t largeblockok;
6103         boolean_t embedok;
6104         boolean_t compressok;
6105         boolean_t rawok;
6106         uint64_t resumeobj = 0;
6107         uint64_t resumeoff = 0;
6108 
6109         error = nvlist_lookup_int32(innvl, "fd", &fd);
6110         if (error != 0)
6111                 return (SET_ERROR(EINVAL));
6112 
6113         (void) nvlist_lookup_string(innvl, "fromsnap", &fromname);
6114 
6115         largeblockok = nvlist_exists(innvl, "largeblockok");
6116         embedok = nvlist_exists(innvl, "embedok");
6117         compressok = nvlist_exists(innvl, "compressok");
6118         rawok = nvlist_exists(innvl, "rawok");
6119 
6120         (void) nvlist_lookup_uint64(innvl, "resume_object", &resumeobj);
6121         (void) nvlist_lookup_uint64(innvl, "resume_offset", &resumeoff);
6122 
6123         file_t *fp = getf(fd);
6124         if (fp == NULL)
6125                 return (SET_ERROR(EBADF));
6126 
6127         off = fp->f_offset;
6128         error = dmu_send(snapname, fromname, embedok, largeblockok, compressok,
6129             rawok, fd, resumeobj, resumeoff, fp->f_vnode, &off);
6130 
6131         if (VOP_SEEK(fp->f_vnode, fp->f_offset, &off, NULL) == 0)
6132                 fp->f_offset = off;
6133         releasef(fd);
6134         return (error);
6135 }
6136 
6137 /*
6138  * Determine approximately how large a zfs send stream will be -- the number
6139  * of bytes that will be written to the fd supplied to zfs_ioc_send_new().
6140  *
6141  * innvl: {
6142  *     (optional) "from" -> full snap or bookmark name to send an incremental
6143  *                          from
6144  *     (optional) "largeblockok" -> (value ignored)
6145  *         indicates that blocks > 128KB are permitted
6146  *     (optional) "embedok" -> (value ignored)
6147  *         presence indicates DRR_WRITE_EMBEDDED records are permitted
6148  *     (optional) "compressok" -> (value ignored)
6149  *         presence indicates compressed DRR_WRITE records are permitted
6150  * }
6151  *
6152  * outnvl: {
6153  *     "space" -> bytes of space (uint64)
6154  * }
6155  */
6156 static int
6157 zfs_ioc_send_space(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl)
6158 {
6159         dsl_pool_t *dp;
6160         dsl_dataset_t *tosnap;
6161         int error;
6162         char *fromname;
6163         boolean_t compressok;
6164         boolean_t rawok;
6165         uint64_t space;
6166 
6167         error = dsl_pool_hold(snapname, FTAG, &dp);
6168         if (error != 0)
6169                 return (error);
6170 
6171         error = dsl_dataset_hold(dp, snapname, FTAG, &tosnap);
6172         if (error != 0) {
6173                 dsl_pool_rele(dp, FTAG);
6174                 return (error);
6175         }
6176 
6177         compressok = nvlist_exists(innvl, "compressok");
6178         rawok = nvlist_exists(innvl, "rawok");
6179 
6180         error = nvlist_lookup_string(innvl, "from", &fromname);
6181         if (error == 0) {
6182                 if (strchr(fromname, '@') != NULL) {
6183                         /*
6184                          * If from is a snapshot, hold it and use the more
6185                          * efficient dmu_send_estimate to estimate send space
6186                          * size using deadlists.
6187                          */
6188                         dsl_dataset_t *fromsnap;
6189                         error = dsl_dataset_hold(dp, fromname, FTAG, &fromsnap);
6190                         if (error != 0)
6191                                 goto out;
6192                         error = dmu_send_estimate(tosnap, fromsnap,
6193                             compressok || rawok, &space);
6194                         dsl_dataset_rele(fromsnap, FTAG);
6195                 } else if (strchr(fromname, '#') != NULL) {
6196                         /*
6197                          * If from is a bookmark, fetch the creation TXG of the
6198                          * snapshot it was created from and use that to find
6199                          * blocks that were born after it.
6200                          */
6201                         zfs_bookmark_phys_t frombm;
6202 
6203                         error = dsl_bookmark_lookup(dp, fromname, tosnap,
6204                             &frombm);
6205                         if (error != 0)
6206                                 goto out;
6207                         error = dmu_send_estimate_from_txg(tosnap,
6208                             frombm.zbm_creation_txg, compressok || rawok,
6209                             &space);
6210                 } else {
6211                         /*
6212                          * from is not properly formatted as a snapshot or
6213                          * bookmark
6214                          */
6215                         error = SET_ERROR(EINVAL);
6216                         goto out;
6217                 }
6218         } else {
6219                 /*
6220                  * If estimating the size of a full send, use dmu_send_estimate.
6221                  */
6222                 error = dmu_send_estimate(tosnap, NULL, compressok || rawok,
6223                     &space);
6224         }
6225 
6226         fnvlist_add_uint64(outnvl, "space", space);
6227 
6228 out:
6229         dsl_dataset_rele(tosnap, FTAG);
6230         dsl_pool_rele(dp, FTAG);
6231         return (error);
6232 }
6233 
6234 /*
6235  * Sync the currently open TXG to disk for the specified pool.
6236  * This is somewhat similar to 'zfs_sync()'.
6237  * For cases that do not result in error this ioctl will wait for
6238  * the currently open TXG to commit before returning back to the caller.
6239  *
6240  * innvl: {
6241  *  "force" -> when true, force uberblock update even if there is no dirty data.
6242  *             In addition this will cause the vdev configuration to be written
6243  *             out including updating the zpool cache file. (boolean_t)
6244  * }
6245  *
6246  * onvl is unused
6247  */
6248 /* ARGSUSED */
6249 static int
6250 zfs_ioc_pool_sync(const char *pool, nvlist_t *innvl, nvlist_t *onvl)
6251 {
6252         int err;
6253         boolean_t force;
6254         spa_t *spa;
6255 
6256         if ((err = spa_open(pool, &spa, FTAG)) != 0)
6257                 return (err);
6258 
6259         force = fnvlist_lookup_boolean_value(innvl, "force");
6260         if (force) {
6261                 spa_config_enter(spa, SCL_CONFIG, FTAG, RW_WRITER);
6262                 vdev_config_dirty(spa->spa_root_vdev);
6263                 spa_config_exit(spa, SCL_CONFIG, FTAG);
6264         }
6265         txg_wait_synced(spa_get_dsl(spa), 0);
6266 
6267         spa_close(spa, FTAG);
6268 
6269         return (err);
6270 }
6271 
6272 /*
6273  * Load a user's wrapping key into the kernel.
6274  * innvl: {
6275  *     "hidden_args" -> { "wkeydata" -> value }
6276  *         raw uint8_t array of encryption wrapping key data (32 bytes)
6277  *     (optional) "noop" -> (value ignored)
6278  *         presence indicated key should only be verified, not loaded
6279  * }
6280  */
6281 /* ARGSUSED */
6282 static int
6283 zfs_ioc_load_key(const char *dsname, nvlist_t *innvl, nvlist_t *outnvl)
6284 {
6285         int ret = 0;
6286         dsl_crypto_params_t *dcp = NULL;
6287         nvlist_t *hidden_args;
6288         boolean_t noop = nvlist_exists(innvl, "noop");
6289 
6290         if (strchr(dsname, '@') != NULL || strchr(dsname, '%') != NULL) {
6291                 ret = SET_ERROR(EINVAL);
6292                 goto error;
6293         }
6294 
6295         ret = nvlist_lookup_nvlist(innvl, ZPOOL_HIDDEN_ARGS, &hidden_args);
6296         if (ret != 0) {
6297                 ret = SET_ERROR(EINVAL);
6298                 goto error;
6299         }
6300 
6301         ret = dsl_crypto_params_create_nvlist(DCP_CMD_NONE, NULL,
6302             hidden_args, &dcp);
6303         if (ret != 0)
6304                 goto error;
6305 
6306         ret = spa_keystore_load_wkey(dsname, dcp, noop);
6307         if (ret != 0)
6308                 goto error;
6309 
6310         dsl_crypto_params_free(dcp, noop);
6311 
6312         return (0);
6313 
6314 error:
6315         dsl_crypto_params_free(dcp, B_TRUE);
6316         return (ret);
6317 }
6318 
6319 /*
6320  * Unload a user's wrapping key from the kernel.
6321  * Both innvl and outnvl are unused.
6322  */
6323 /* ARGSUSED */
6324 static int
6325 zfs_ioc_unload_key(const char *dsname, nvlist_t *innvl, nvlist_t *outnvl)
6326 {
6327         int ret = 0;
6328 
6329         if (strchr(dsname, '@') != NULL || strchr(dsname, '%') != NULL) {
6330                 ret = (SET_ERROR(EINVAL));
6331                 goto out;
6332         }
6333 
6334         ret = spa_keystore_unload_wkey(dsname);
6335         if (ret != 0)
6336                 goto out;
6337 
6338 out:
6339         return (ret);
6340 }
6341 
6342 /*
6343  * Changes a user's wrapping key used to decrypt a dataset. The keyformat,
6344  * keylocation, pbkdf2salt, and  pbkdf2iters properties can also be specified
6345  * here to change how the key is derived in userspace.
6346  *
6347  * innvl: {
6348  *    "hidden_args" (optional) -> { "wkeydata" -> value }
6349  *         raw uint8_t array of new encryption wrapping key data (32 bytes)
6350  *    "props" (optional) -> { prop -> value }
6351  * }
6352  *
6353  * outnvl is unused
6354  */
6355 /* ARGSUSED */
6356 static int
6357 zfs_ioc_change_key(const char *dsname, nvlist_t *innvl, nvlist_t *outnvl)
6358 {
6359         int ret;
6360         uint64_t cmd = DCP_CMD_NONE;
6361         dsl_crypto_params_t *dcp = NULL;
6362         nvlist_t *args = NULL, *hidden_args = NULL;
6363 
6364         if (strchr(dsname, '@') != NULL || strchr(dsname, '%') != NULL) {
6365                 ret = (SET_ERROR(EINVAL));
6366                 goto error;
6367         }
6368 
6369         (void) nvlist_lookup_uint64(innvl, "crypt_cmd", &cmd);
6370         (void) nvlist_lookup_nvlist(innvl, "props", &args);
6371         (void) nvlist_lookup_nvlist(innvl, ZPOOL_HIDDEN_ARGS, &hidden_args);
6372 
6373         ret = dsl_crypto_params_create_nvlist(cmd, args, hidden_args, &dcp);
6374         if (ret != 0)
6375                 goto error;
6376 
6377         ret = spa_keystore_change_key(dsname, dcp);
6378         if (ret != 0)
6379                 goto error;
6380 
6381         dsl_crypto_params_free(dcp, B_FALSE);
6382 
6383         return (0);
6384 
6385 error:
6386         dsl_crypto_params_free(dcp, B_TRUE);
6387         return (ret);
6388 }
6389 
6390 static zfs_ioc_vec_t zfs_ioc_vec[ZFS_IOC_LAST - ZFS_IOC_FIRST];
6391 
6392 static void
6393 zfs_ioctl_register_legacy(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func,
6394     zfs_secpolicy_func_t *secpolicy, zfs_ioc_namecheck_t namecheck,
6395     boolean_t log_history, zfs_ioc_poolcheck_t pool_check)
6396 {
6397         zfs_ioc_vec_t *vec = &zfs_ioc_vec[ioc - ZFS_IOC_FIRST];
6398 
6399         ASSERT3U(ioc, >=, ZFS_IOC_FIRST);
6400         ASSERT3U(ioc, <, ZFS_IOC_LAST);
6401         ASSERT3P(vec->zvec_legacy_func, ==, NULL);
6402         ASSERT3P(vec->zvec_func, ==, NULL);
6403 
6404         vec->zvec_legacy_func = func;
6405         vec->zvec_secpolicy = secpolicy;
6406         vec->zvec_namecheck = namecheck;
6407         vec->zvec_allow_log = log_history;
6408         vec->zvec_pool_check = pool_check;
6409 }
6410 
6411 /*
6412  * See the block comment at the beginning of this file for details on
6413  * each argument to this function.
6414  */
6415 static void
6416 zfs_ioctl_register(const char *name, zfs_ioc_t ioc, zfs_ioc_func_t *func,
6417     zfs_secpolicy_func_t *secpolicy, zfs_ioc_namecheck_t namecheck,
6418     zfs_ioc_poolcheck_t pool_check, boolean_t smush_outnvlist,
6419     boolean_t allow_log)
6420 {
6421         zfs_ioc_vec_t *vec = &zfs_ioc_vec[ioc - ZFS_IOC_FIRST];
6422 
6423         ASSERT3U(ioc, >=, ZFS_IOC_FIRST);
6424         ASSERT3U(ioc, <, ZFS_IOC_LAST);
6425         ASSERT3P(vec->zvec_legacy_func, ==, NULL);
6426         ASSERT3P(vec->zvec_func, ==, NULL);
6427 
6428         /* if we are logging, the name must be valid */
6429         ASSERT(!allow_log || namecheck != NO_NAME);
6430 
6431         vec->zvec_name = name;
6432         vec->zvec_func = func;
6433         vec->zvec_secpolicy = secpolicy;
6434         vec->zvec_namecheck = namecheck;
6435         vec->zvec_pool_check = pool_check;
6436         vec->zvec_smush_outnvlist = smush_outnvlist;
6437         vec->zvec_allow_log = allow_log;
6438 }
6439 
6440 static void
6441 zfs_ioctl_register_pool(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func,
6442     zfs_secpolicy_func_t *secpolicy, boolean_t log_history,
6443     zfs_ioc_poolcheck_t pool_check)
6444 {
6445         zfs_ioctl_register_legacy(ioc, func, secpolicy,
6446             POOL_NAME, log_history, pool_check);
6447 }
6448 
6449 static void
6450 zfs_ioctl_register_dataset_nolog(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func,
6451     zfs_secpolicy_func_t *secpolicy, zfs_ioc_poolcheck_t pool_check)
6452 {
6453         zfs_ioctl_register_legacy(ioc, func, secpolicy,
6454             DATASET_NAME, B_FALSE, pool_check);
6455 }
6456 
6457 static void
6458 zfs_ioctl_register_pool_modify(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func)
6459 {
6460         zfs_ioctl_register_legacy(ioc, func, zfs_secpolicy_config,
6461             POOL_NAME, B_TRUE, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY);
6462 }
6463 
6464 static void
6465 zfs_ioctl_register_pool_meta(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func,
6466     zfs_secpolicy_func_t *secpolicy)
6467 {
6468         zfs_ioctl_register_legacy(ioc, func, secpolicy,
6469             NO_NAME, B_FALSE, POOL_CHECK_NONE);
6470 }
6471 
6472 static void
6473 zfs_ioctl_register_dataset_read_secpolicy(zfs_ioc_t ioc,
6474     zfs_ioc_legacy_func_t *func, zfs_secpolicy_func_t *secpolicy)
6475 {
6476         zfs_ioctl_register_legacy(ioc, func, secpolicy,
6477             DATASET_NAME, B_FALSE, POOL_CHECK_SUSPENDED);
6478 }
6479 
6480 static void
6481 zfs_ioctl_register_dataset_read(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func)
6482 {
6483         zfs_ioctl_register_dataset_read_secpolicy(ioc, func,
6484             zfs_secpolicy_read);
6485 }
6486 
6487 static void
6488 zfs_ioctl_register_dataset_modify(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func,
6489     zfs_secpolicy_func_t *secpolicy)
6490 {
6491         zfs_ioctl_register_legacy(ioc, func, secpolicy,
6492             DATASET_NAME, B_TRUE, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY);
6493 }
6494 
6495 static void
6496 zfs_ioctl_init(void)
6497 {
6498         zfs_ioctl_register("snapshot", ZFS_IOC_SNAPSHOT,
6499             zfs_ioc_snapshot, zfs_secpolicy_snapshot, POOL_NAME,
6500             POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE);
6501 
6502         zfs_ioctl_register("log_history", ZFS_IOC_LOG_HISTORY,
6503             zfs_ioc_log_history, zfs_secpolicy_log_history, NO_NAME,
6504             POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_FALSE, B_FALSE);
6505 
6506         zfs_ioctl_register("space_snaps", ZFS_IOC_SPACE_SNAPS,
6507             zfs_ioc_space_snaps, zfs_secpolicy_read, DATASET_NAME,
6508             POOL_CHECK_SUSPENDED, B_FALSE, B_FALSE);
6509 
6510         zfs_ioctl_register("send", ZFS_IOC_SEND_NEW,
6511             zfs_ioc_send_new, zfs_secpolicy_send_new, DATASET_NAME,
6512             POOL_CHECK_SUSPENDED, B_FALSE, B_FALSE);
6513 
6514         zfs_ioctl_register("send_space", ZFS_IOC_SEND_SPACE,
6515             zfs_ioc_send_space, zfs_secpolicy_read, DATASET_NAME,
6516             POOL_CHECK_SUSPENDED, B_FALSE, B_FALSE);
6517 
6518         zfs_ioctl_register("create", ZFS_IOC_CREATE,
6519             zfs_ioc_create, zfs_secpolicy_create_clone, DATASET_NAME,
6520             POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE);
6521 
6522         zfs_ioctl_register("clone", ZFS_IOC_CLONE,
6523             zfs_ioc_clone, zfs_secpolicy_create_clone, DATASET_NAME,
6524             POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE);
6525 
6526         zfs_ioctl_register("remap", ZFS_IOC_REMAP,
6527             zfs_ioc_remap, zfs_secpolicy_remap, DATASET_NAME,
6528             POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_FALSE, B_TRUE);
6529 
6530         zfs_ioctl_register("destroy_snaps", ZFS_IOC_DESTROY_SNAPS,
6531             zfs_ioc_destroy_snaps, zfs_secpolicy_destroy_snaps, POOL_NAME,
6532             POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE);
6533 
6534         zfs_ioctl_register("hold", ZFS_IOC_HOLD,
6535             zfs_ioc_hold, zfs_secpolicy_hold, POOL_NAME,
6536             POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE);
6537         zfs_ioctl_register("release", ZFS_IOC_RELEASE,
6538             zfs_ioc_release, zfs_secpolicy_release, POOL_NAME,
6539             POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE);
6540 
6541         zfs_ioctl_register("get_holds", ZFS_IOC_GET_HOLDS,
6542             zfs_ioc_get_holds, zfs_secpolicy_read, DATASET_NAME,
6543             POOL_CHECK_SUSPENDED, B_FALSE, B_FALSE);
6544 
6545         zfs_ioctl_register("rollback", ZFS_IOC_ROLLBACK,
6546             zfs_ioc_rollback, zfs_secpolicy_rollback, DATASET_NAME,
6547             POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_FALSE, B_TRUE);
6548 
6549         zfs_ioctl_register("bookmark", ZFS_IOC_BOOKMARK,
6550             zfs_ioc_bookmark, zfs_secpolicy_bookmark, POOL_NAME,
6551             POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE);
6552 
6553         zfs_ioctl_register("get_bookmarks", ZFS_IOC_GET_BOOKMARKS,
6554             zfs_ioc_get_bookmarks, zfs_secpolicy_read, DATASET_NAME,
6555             POOL_CHECK_SUSPENDED, B_FALSE, B_FALSE);
6556 
6557         zfs_ioctl_register("destroy_bookmarks", ZFS_IOC_DESTROY_BOOKMARKS,
6558             zfs_ioc_destroy_bookmarks, zfs_secpolicy_destroy_bookmarks,
6559             POOL_NAME,
6560             POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE);
6561 
6562         zfs_ioctl_register("channel_program", ZFS_IOC_CHANNEL_PROGRAM,
6563             zfs_ioc_channel_program, zfs_secpolicy_config,
6564             POOL_NAME, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE,
6565             B_TRUE);
6566 
6567         zfs_ioctl_register("zpool_checkpoint", ZFS_IOC_POOL_CHECKPOINT,
6568             zfs_ioc_pool_checkpoint, zfs_secpolicy_config, POOL_NAME,
6569             POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE);
6570 
6571         zfs_ioctl_register("zpool_discard_checkpoint",
6572             ZFS_IOC_POOL_DISCARD_CHECKPOINT, zfs_ioc_pool_discard_checkpoint,
6573             zfs_secpolicy_config, POOL_NAME,
6574             POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE);
6575 
6576         zfs_ioctl_register("initialize", ZFS_IOC_POOL_INITIALIZE,
6577             zfs_ioc_pool_initialize, zfs_secpolicy_config, POOL_NAME,
6578             POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE);
6579 
6580         zfs_ioctl_register("trim", ZFS_IOC_POOL_TRIM,
6581             zfs_ioc_pool_trim, zfs_secpolicy_config, POOL_NAME,
6582             POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE);
6583 
6584         zfs_ioctl_register("sync", ZFS_IOC_POOL_SYNC,
6585             zfs_ioc_pool_sync, zfs_secpolicy_none, POOL_NAME,
6586             POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_FALSE, B_FALSE);
6587 
6588         zfs_ioctl_register("load-key", ZFS_IOC_LOAD_KEY,
6589             zfs_ioc_load_key, zfs_secpolicy_load_key,
6590             DATASET_NAME, POOL_CHECK_SUSPENDED, B_TRUE, B_TRUE);
6591         zfs_ioctl_register("unload-key", ZFS_IOC_UNLOAD_KEY,
6592             zfs_ioc_unload_key, zfs_secpolicy_load_key,
6593             DATASET_NAME, POOL_CHECK_SUSPENDED, B_TRUE, B_TRUE);
6594         zfs_ioctl_register("change-key", ZFS_IOC_CHANGE_KEY,
6595             zfs_ioc_change_key, zfs_secpolicy_change_key,
6596             DATASET_NAME, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY,
6597             B_TRUE, B_TRUE);
6598 
6599         /* IOCTLS that use the legacy function signature */
6600 
6601         zfs_ioctl_register_legacy(ZFS_IOC_POOL_FREEZE, zfs_ioc_pool_freeze,
6602             zfs_secpolicy_config, NO_NAME, B_FALSE, POOL_CHECK_READONLY);
6603 
6604         zfs_ioctl_register_pool(ZFS_IOC_POOL_CREATE, zfs_ioc_pool_create,
6605             zfs_secpolicy_config, B_TRUE, POOL_CHECK_NONE);
6606         zfs_ioctl_register_pool_modify(ZFS_IOC_POOL_SCAN,
6607             zfs_ioc_pool_scan);
6608         zfs_ioctl_register_pool_modify(ZFS_IOC_POOL_UPGRADE,
6609             zfs_ioc_pool_upgrade);
6610         zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_ADD,
6611             zfs_ioc_vdev_add);
6612         zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_REMOVE,
6613             zfs_ioc_vdev_remove);
6614         zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_SET_STATE,
6615             zfs_ioc_vdev_set_state);
6616         zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_ATTACH,
6617             zfs_ioc_vdev_attach);
6618         zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_DETACH,
6619             zfs_ioc_vdev_detach);
6620         zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_SETPATH,
6621             zfs_ioc_vdev_setpath);
6622         zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_SETFRU,
6623             zfs_ioc_vdev_setfru);
6624         zfs_ioctl_register_pool_modify(ZFS_IOC_POOL_SET_PROPS,
6625             zfs_ioc_pool_set_props);
6626         zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_SPLIT,
6627             zfs_ioc_vdev_split);
6628         zfs_ioctl_register_pool_modify(ZFS_IOC_POOL_REGUID,
6629             zfs_ioc_pool_reguid);
6630 
6631         zfs_ioctl_register_pool_meta(ZFS_IOC_POOL_CONFIGS,
6632             zfs_ioc_pool_configs, zfs_secpolicy_none);
6633         zfs_ioctl_register_pool_meta(ZFS_IOC_POOL_TRYIMPORT,
6634             zfs_ioc_pool_tryimport, zfs_secpolicy_config);
6635         zfs_ioctl_register_pool_meta(ZFS_IOC_INJECT_FAULT,
6636             zfs_ioc_inject_fault, zfs_secpolicy_inject);
6637         zfs_ioctl_register_pool_meta(ZFS_IOC_CLEAR_FAULT,
6638             zfs_ioc_clear_fault, zfs_secpolicy_inject);
6639         zfs_ioctl_register_pool_meta(ZFS_IOC_INJECT_LIST_NEXT,
6640             zfs_ioc_inject_list_next, zfs_secpolicy_inject);
6641 
6642         /*
6643          * pool destroy, and export don't log the history as part of
6644          * zfsdev_ioctl, but rather zfs_ioc_pool_export
6645          * does the logging of those commands.
6646          */
6647         zfs_ioctl_register_pool(ZFS_IOC_POOL_DESTROY, zfs_ioc_pool_destroy,
6648             zfs_secpolicy_config, B_FALSE, POOL_CHECK_NONE);
6649         zfs_ioctl_register_pool(ZFS_IOC_POOL_EXPORT, zfs_ioc_pool_export,
6650             zfs_secpolicy_config, B_FALSE, POOL_CHECK_NONE);
6651 
6652         zfs_ioctl_register_pool(ZFS_IOC_POOL_STATS, zfs_ioc_pool_stats,
6653             zfs_secpolicy_read, B_FALSE, POOL_CHECK_NONE);
6654         zfs_ioctl_register_pool(ZFS_IOC_POOL_GET_PROPS, zfs_ioc_pool_get_props,
6655             zfs_secpolicy_read, B_FALSE, POOL_CHECK_NONE);
6656 
6657         zfs_ioctl_register_pool(ZFS_IOC_ERROR_LOG, zfs_ioc_error_log,
6658             zfs_secpolicy_inject, B_FALSE, POOL_CHECK_SUSPENDED);
6659         zfs_ioctl_register_pool(ZFS_IOC_DSOBJ_TO_DSNAME,
6660             zfs_ioc_dsobj_to_dsname,
6661             zfs_secpolicy_diff, B_FALSE, POOL_CHECK_SUSPENDED);
6662         zfs_ioctl_register_pool(ZFS_IOC_POOL_GET_HISTORY,
6663             zfs_ioc_pool_get_history,
6664             zfs_secpolicy_config, B_FALSE, POOL_CHECK_SUSPENDED);
6665 
6666         zfs_ioctl_register_pool(ZFS_IOC_POOL_IMPORT, zfs_ioc_pool_import,
6667             zfs_secpolicy_config, B_TRUE, POOL_CHECK_NONE);
6668 
6669         zfs_ioctl_register_pool(ZFS_IOC_CLEAR, zfs_ioc_clear,
6670             zfs_secpolicy_config, B_TRUE, POOL_CHECK_READONLY);
6671         zfs_ioctl_register_pool(ZFS_IOC_POOL_REOPEN, zfs_ioc_pool_reopen,
6672             zfs_secpolicy_config, B_TRUE, POOL_CHECK_SUSPENDED);
6673 
6674         zfs_ioctl_register_dataset_read(ZFS_IOC_SPACE_WRITTEN,
6675             zfs_ioc_space_written);
6676         zfs_ioctl_register_dataset_read(ZFS_IOC_OBJSET_RECVD_PROPS,
6677             zfs_ioc_objset_recvd_props);
6678         zfs_ioctl_register_dataset_read(ZFS_IOC_NEXT_OBJ,
6679             zfs_ioc_next_obj);
6680         zfs_ioctl_register_dataset_read(ZFS_IOC_GET_FSACL,
6681             zfs_ioc_get_fsacl);
6682         zfs_ioctl_register_dataset_read(ZFS_IOC_OBJSET_STATS,
6683             zfs_ioc_objset_stats);
6684         zfs_ioctl_register_dataset_read(ZFS_IOC_OBJSET_ZPLPROPS,
6685             zfs_ioc_objset_zplprops);
6686         zfs_ioctl_register_dataset_read(ZFS_IOC_DATASET_LIST_NEXT,
6687             zfs_ioc_dataset_list_next);
6688         zfs_ioctl_register_dataset_read(ZFS_IOC_SNAPSHOT_LIST_NEXT,
6689             zfs_ioc_snapshot_list_next);
6690         zfs_ioctl_register_dataset_read(ZFS_IOC_SEND_PROGRESS,
6691             zfs_ioc_send_progress);
6692 
6693         zfs_ioctl_register_dataset_read_secpolicy(ZFS_IOC_DIFF,
6694             zfs_ioc_diff, zfs_secpolicy_diff);
6695         zfs_ioctl_register_dataset_read_secpolicy(ZFS_IOC_OBJ_TO_STATS,
6696             zfs_ioc_obj_to_stats, zfs_secpolicy_diff);
6697         zfs_ioctl_register_dataset_read_secpolicy(ZFS_IOC_OBJ_TO_PATH,
6698             zfs_ioc_obj_to_path, zfs_secpolicy_diff);
6699         zfs_ioctl_register_dataset_read_secpolicy(ZFS_IOC_USERSPACE_ONE,
6700             zfs_ioc_userspace_one, zfs_secpolicy_userspace_one);
6701         zfs_ioctl_register_dataset_read_secpolicy(ZFS_IOC_USERSPACE_MANY,
6702             zfs_ioc_userspace_many, zfs_secpolicy_userspace_many);
6703         zfs_ioctl_register_dataset_read_secpolicy(ZFS_IOC_SEND,
6704             zfs_ioc_send, zfs_secpolicy_send);
6705 
6706         zfs_ioctl_register_dataset_modify(ZFS_IOC_SET_PROP, zfs_ioc_set_prop,
6707             zfs_secpolicy_none);
6708         zfs_ioctl_register_dataset_modify(ZFS_IOC_DESTROY, zfs_ioc_destroy,
6709             zfs_secpolicy_destroy);
6710         zfs_ioctl_register_dataset_modify(ZFS_IOC_RENAME, zfs_ioc_rename,
6711             zfs_secpolicy_rename);
6712         zfs_ioctl_register_dataset_modify(ZFS_IOC_RECV, zfs_ioc_recv,
6713             zfs_secpolicy_recv);
6714         zfs_ioctl_register_dataset_modify(ZFS_IOC_PROMOTE, zfs_ioc_promote,
6715             zfs_secpolicy_promote);
6716         zfs_ioctl_register_dataset_modify(ZFS_IOC_INHERIT_PROP,
6717             zfs_ioc_inherit_prop, zfs_secpolicy_inherit_prop);
6718         zfs_ioctl_register_dataset_modify(ZFS_IOC_SET_FSACL, zfs_ioc_set_fsacl,
6719             zfs_secpolicy_set_fsacl);
6720 
6721         zfs_ioctl_register_dataset_nolog(ZFS_IOC_SHARE, zfs_ioc_share,
6722             zfs_secpolicy_share, POOL_CHECK_NONE);
6723         zfs_ioctl_register_dataset_nolog(ZFS_IOC_SMB_ACL, zfs_ioc_smb_acl,
6724             zfs_secpolicy_smb_acl, POOL_CHECK_NONE);
6725         zfs_ioctl_register_dataset_nolog(ZFS_IOC_USERSPACE_UPGRADE,
6726             zfs_ioc_userspace_upgrade, zfs_secpolicy_userspace_upgrade,
6727             POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY);
6728         zfs_ioctl_register_dataset_nolog(ZFS_IOC_TMP_SNAPSHOT,
6729             zfs_ioc_tmp_snapshot, zfs_secpolicy_tmp_snapshot,
6730             POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY);
6731 }
6732 
6733 int
6734 pool_status_check(const char *name, zfs_ioc_namecheck_t type,
6735     zfs_ioc_poolcheck_t check)
6736 {
6737         spa_t *spa;
6738         int error;
6739 
6740         ASSERT(type == POOL_NAME || type == DATASET_NAME);
6741 
6742         if (check & POOL_CHECK_NONE)
6743                 return (0);
6744 
6745         error = spa_open(name, &spa, FTAG);
6746         if (error == 0) {
6747                 if ((check & POOL_CHECK_SUSPENDED) && spa_suspended(spa))
6748                         error = SET_ERROR(EAGAIN);
6749                 else if ((check & POOL_CHECK_READONLY) && !spa_writeable(spa))
6750                         error = SET_ERROR(EROFS);
6751                 spa_close(spa, FTAG);
6752         }
6753         return (error);
6754 }
6755 
6756 /*
6757  * Find a free minor number.
6758  */
6759 minor_t
6760 zfsdev_minor_alloc(void)
6761 {
6762         static minor_t last_minor;
6763         minor_t m;
6764 
6765         ASSERT(MUTEX_HELD(&zfsdev_state_lock));
6766 
6767         for (m = last_minor + 1; m != last_minor; m++) {
6768                 if (m > ZFSDEV_MAX_MINOR)
6769                         m = 1;
6770                 if (ddi_get_soft_state(zfsdev_state, m) == NULL) {
6771                         last_minor = m;
6772                         return (m);
6773                 }
6774         }
6775 
6776         return (0);
6777 }
6778 
6779 static int
6780 zfs_ctldev_init(dev_t *devp)
6781 {
6782         minor_t minor;
6783         zfs_soft_state_t *zs;
6784 
6785         ASSERT(MUTEX_HELD(&zfsdev_state_lock));
6786         ASSERT(getminor(*devp) == 0);
6787 
6788         minor = zfsdev_minor_alloc();
6789         if (minor == 0)
6790                 return (SET_ERROR(ENXIO));
6791 
6792         if (ddi_soft_state_zalloc(zfsdev_state, minor) != DDI_SUCCESS)
6793                 return (SET_ERROR(EAGAIN));
6794 
6795         *devp = makedevice(getemajor(*devp), minor);
6796 
6797         zs = ddi_get_soft_state(zfsdev_state, minor);
6798         zs->zss_type = ZSST_CTLDEV;
6799         zfs_onexit_init((zfs_onexit_t **)&zs->zss_data);
6800 
6801         return (0);
6802 }
6803 
6804 static void
6805 zfs_ctldev_destroy(zfs_onexit_t *zo, minor_t minor)
6806 {
6807         ASSERT(MUTEX_HELD(&zfsdev_state_lock));
6808 
6809         zfs_onexit_destroy(zo);
6810         ddi_soft_state_free(zfsdev_state, minor);
6811 }
6812 
6813 void *
6814 zfsdev_get_soft_state(minor_t minor, enum zfs_soft_state_type which)
6815 {
6816         zfs_soft_state_t *zp;
6817 
6818         zp = ddi_get_soft_state(zfsdev_state, minor);
6819         if (zp == NULL || zp->zss_type != which)
6820                 return (NULL);
6821 
6822         return (zp->zss_data);
6823 }
6824 
6825 static int
6826 zfsdev_open(dev_t *devp, int flag, int otyp, cred_t *cr)
6827 {
6828         int error = 0;
6829 
6830         if (getminor(*devp) != 0)
6831                 return (zvol_open(devp, flag, otyp, cr));
6832 
6833         /* This is the control device. Allocate a new minor if requested. */
6834         if (flag & FEXCL) {
6835                 mutex_enter(&zfsdev_state_lock);
6836                 error = zfs_ctldev_init(devp);
6837                 mutex_exit(&zfsdev_state_lock);
6838         }
6839 
6840         return (error);
6841 }
6842 
6843 static int
6844 zfsdev_close(dev_t dev, int flag, int otyp, cred_t *cr)
6845 {
6846         zfs_onexit_t *zo;
6847         minor_t minor = getminor(dev);
6848 
6849         if (minor == 0)
6850                 return (0);
6851 
6852         mutex_enter(&zfsdev_state_lock);
6853         zo = zfsdev_get_soft_state(minor, ZSST_CTLDEV);
6854         if (zo == NULL) {
6855                 mutex_exit(&zfsdev_state_lock);
6856                 return (zvol_close(dev, flag, otyp, cr));
6857         }
6858         zfs_ctldev_destroy(zo, minor);
6859         mutex_exit(&zfsdev_state_lock);
6860 
6861         return (0);
6862 }
6863 
6864 static int
6865 zfsdev_ioctl(dev_t dev, int cmd, intptr_t arg, int flag, cred_t *cr, int *rvalp)
6866 {
6867         zfs_cmd_t *zc;
6868         uint_t vecnum;
6869         int error, rc, len;
6870         minor_t minor = getminor(dev);
6871         const zfs_ioc_vec_t *vec;
6872         char *saved_poolname = NULL;
6873         nvlist_t *innvl = NULL;
6874 
6875         if (minor != 0 &&
6876             zfsdev_get_soft_state(minor, ZSST_CTLDEV) == NULL)
6877                 return (zvol_ioctl(dev, cmd, arg, flag, cr, rvalp));
6878 
6879         vecnum = cmd - ZFS_IOC_FIRST;
6880         ASSERT3U(getmajor(dev), ==, ddi_driver_major(zfs_dip));
6881 
6882         if (vecnum >= sizeof (zfs_ioc_vec) / sizeof (zfs_ioc_vec[0]))
6883                 return (SET_ERROR(EINVAL));
6884         vec = &zfs_ioc_vec[vecnum];
6885 
6886         zc = kmem_zalloc(sizeof (zfs_cmd_t), KM_SLEEP);
6887 
6888         error = ddi_copyin((void *)arg, zc, sizeof (zfs_cmd_t), flag);
6889         if (error != 0) {
6890                 error = SET_ERROR(EFAULT);
6891                 goto out;
6892         }
6893 
6894         zc->zc_iflags = flag & FKIOCTL;
6895         if (zc->zc_nvlist_src_size != 0) {
6896                 error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
6897                     zc->zc_iflags, &innvl);
6898                 if (error != 0)
6899                         goto out;
6900         }
6901 
6902         /*
6903          * Ensure that all pool/dataset names are valid before we pass down to
6904          * the lower layers.
6905          */
6906         zc->zc_name[sizeof (zc->zc_name) - 1] = '\0';
6907         switch (vec->zvec_namecheck) {
6908         case POOL_NAME:
6909                 if (pool_namecheck(zc->zc_name, NULL, NULL) != 0)
6910                         error = SET_ERROR(EINVAL);
6911                 else
6912                         error = pool_status_check(zc->zc_name,
6913                             vec->zvec_namecheck, vec->zvec_pool_check);
6914                 break;
6915 
6916         case DATASET_NAME:
6917                 if (dataset_namecheck(zc->zc_name, NULL, NULL) != 0)
6918                         error = SET_ERROR(EINVAL);
6919                 else
6920                         error = pool_status_check(zc->zc_name,
6921                             vec->zvec_namecheck, vec->zvec_pool_check);
6922                 break;
6923 
6924         case NO_NAME:
6925                 break;
6926         }
6927 
6928 
6929         if (error == 0)
6930                 error = vec->zvec_secpolicy(zc, innvl, cr);
6931 
6932         if (error != 0)
6933                 goto out;
6934 
6935         /* legacy ioctls can modify zc_name */
6936         len = strcspn(zc->zc_name, "/@#") + 1;
6937         saved_poolname = kmem_alloc(len, KM_SLEEP);
6938         (void) strlcpy(saved_poolname, zc->zc_name, len);
6939 
6940         if (vec->zvec_func != NULL) {
6941                 nvlist_t *outnvl;
6942                 int puterror = 0;
6943                 spa_t *spa;
6944                 nvlist_t *lognv = NULL;
6945 
6946                 ASSERT(vec->zvec_legacy_func == NULL);
6947 
6948                 /*
6949                  * Add the innvl to the lognv before calling the func,
6950                  * in case the func changes the innvl.
6951                  */
6952                 if (vec->zvec_allow_log) {
6953                         lognv = fnvlist_alloc();
6954                         fnvlist_add_string(lognv, ZPOOL_HIST_IOCTL,
6955                             vec->zvec_name);
6956                         if (!nvlist_empty(innvl)) {
6957                                 fnvlist_add_nvlist(lognv, ZPOOL_HIST_INPUT_NVL,
6958                                     innvl);
6959                         }
6960                 }
6961 
6962                 outnvl = fnvlist_alloc();
6963                 error = vec->zvec_func(zc->zc_name, innvl, outnvl);
6964 
6965                 /*
6966                  * Some commands can partially execute, modify state, and still
6967                  * return an error.  In these cases, attempt to record what
6968                  * was modified.
6969                  */
6970                 if ((error == 0 ||
6971                     (cmd == ZFS_IOC_CHANNEL_PROGRAM && error != EINVAL)) &&
6972                     vec->zvec_allow_log &&
6973                     spa_open(zc->zc_name, &spa, FTAG) == 0) {
6974                         if (!nvlist_empty(outnvl)) {
6975                                 fnvlist_add_nvlist(lognv, ZPOOL_HIST_OUTPUT_NVL,
6976                                     outnvl);
6977                         }
6978                         if (error != 0) {
6979                                 fnvlist_add_int64(lognv, ZPOOL_HIST_ERRNO,
6980                                     error);
6981                         }
6982                         (void) spa_history_log_nvl(spa, lognv);
6983                         spa_close(spa, FTAG);
6984                 }
6985                 fnvlist_free(lognv);
6986 
6987                 if (!nvlist_empty(outnvl) || zc->zc_nvlist_dst_size != 0) {
6988                         int smusherror = 0;
6989                         if (vec->zvec_smush_outnvlist) {
6990                                 smusherror = nvlist_smush(outnvl,
6991                                     zc->zc_nvlist_dst_size);
6992                         }
6993                         if (smusherror == 0)
6994                                 puterror = put_nvlist(zc, outnvl);
6995                 }
6996 
6997                 if (puterror != 0)
6998                         error = puterror;
6999 
7000                 nvlist_free(outnvl);
7001         } else {
7002                 error = vec->zvec_legacy_func(zc);
7003         }
7004 
7005 out:
7006         nvlist_free(innvl);
7007         rc = ddi_copyout(zc, (void *)arg, sizeof (zfs_cmd_t), flag);
7008         if (error == 0 && rc != 0)
7009                 error = SET_ERROR(EFAULT);
7010         if (error == 0 && vec->zvec_allow_log) {
7011                 char *s = tsd_get(zfs_allow_log_key);
7012                 if (s != NULL)
7013                         strfree(s);
7014                 (void) tsd_set(zfs_allow_log_key, saved_poolname);
7015         } else {
7016                 if (saved_poolname != NULL)
7017                         strfree(saved_poolname);
7018         }
7019 
7020         kmem_free(zc, sizeof (zfs_cmd_t));
7021         return (error);
7022 }
7023 
7024 static int
7025 zfs_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
7026 {
7027         if (cmd != DDI_ATTACH)
7028                 return (DDI_FAILURE);
7029 
7030         if (ddi_create_minor_node(dip, "zfs", S_IFCHR, 0,
7031             DDI_PSEUDO, 0) == DDI_FAILURE)
7032                 return (DDI_FAILURE);
7033 
7034         zfs_dip = dip;
7035 
7036         ddi_report_dev(dip);
7037 
7038         return (DDI_SUCCESS);
7039 }
7040 
7041 static int
7042 zfs_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
7043 {
7044         if (spa_busy() || zfs_busy() || zvol_busy())
7045                 return (DDI_FAILURE);
7046 
7047         if (cmd != DDI_DETACH)
7048                 return (DDI_FAILURE);
7049 
7050         zfs_dip = NULL;
7051 
7052         ddi_prop_remove_all(dip);
7053         ddi_remove_minor_node(dip, NULL);
7054 
7055         return (DDI_SUCCESS);
7056 }
7057 
7058 /*ARGSUSED*/
7059 static int
7060 zfs_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
7061 {
7062         switch (infocmd) {
7063         case DDI_INFO_DEVT2DEVINFO:
7064                 *result = zfs_dip;
7065                 return (DDI_SUCCESS);
7066 
7067         case DDI_INFO_DEVT2INSTANCE:
7068                 *result = (void *)0;
7069                 return (DDI_SUCCESS);
7070         }
7071 
7072         return (DDI_FAILURE);
7073 }
7074 
7075 /*
7076  * OK, so this is a little weird.
7077  *
7078  * /dev/zfs is the control node, i.e. minor 0.
7079  * /dev/zvol/[r]dsk/pool/dataset are the zvols, minor > 0.
7080  *
7081  * /dev/zfs has basically nothing to do except serve up ioctls,
7082  * so most of the standard driver entry points are in zvol.c.
7083  */
7084 static struct cb_ops zfs_cb_ops = {
7085         zfsdev_open,    /* open */
7086         zfsdev_close,   /* close */
7087         zvol_strategy,  /* strategy */
7088         nodev,          /* print */
7089         zvol_dump,      /* dump */
7090         zvol_read,      /* read */
7091         zvol_write,     /* write */
7092         zfsdev_ioctl,   /* ioctl */
7093         nodev,          /* devmap */
7094         nodev,          /* mmap */
7095         nodev,          /* segmap */
7096         nochpoll,       /* poll */
7097         ddi_prop_op,    /* prop_op */
7098         NULL,           /* streamtab */
7099         D_NEW | D_MP | D_64BIT,         /* Driver compatibility flag */
7100         CB_REV,         /* version */
7101         nodev,          /* async read */
7102         nodev,          /* async write */
7103 };
7104 
7105 static struct dev_ops zfs_dev_ops = {
7106         DEVO_REV,       /* version */
7107         0,              /* refcnt */
7108         zfs_info,       /* info */
7109         nulldev,        /* identify */
7110         nulldev,        /* probe */
7111         zfs_attach,     /* attach */
7112         zfs_detach,     /* detach */
7113         nodev,          /* reset */
7114         &zfs_cb_ops,        /* driver operations */
7115         NULL,           /* no bus operations */
7116         NULL,           /* power */
7117         ddi_quiesce_not_needed, /* quiesce */
7118 };
7119 
7120 static struct modldrv zfs_modldrv = {
7121         &mod_driverops,
7122         "ZFS storage pool",
7123         &zfs_dev_ops
7124 };
7125 
7126 static struct modlinkage modlinkage = {
7127         MODREV_1,
7128         (void *)&zfs_modlfs,
7129         (void *)&zfs_modldrv,
7130         NULL
7131 };
7132 
7133 static void
7134 zfs_allow_log_destroy(void *arg)
7135 {
7136         char *poolname = arg;
7137         strfree(poolname);
7138 }
7139 
7140 int
7141 _init(void)
7142 {
7143         int error;
7144 
7145         spa_init(FREAD | FWRITE);
7146         zfs_init();
7147         zvol_init();
7148         zfs_ioctl_init();
7149 
7150         if ((error = mod_install(&modlinkage)) != 0) {
7151                 zvol_fini();
7152                 zfs_fini();
7153                 spa_fini();
7154                 return (error);
7155         }
7156 
7157         tsd_create(&zfs_fsyncer_key, NULL);
7158         tsd_create(&rrw_tsd_key, rrw_tsd_destroy);
7159         tsd_create(&zfs_allow_log_key, zfs_allow_log_destroy);
7160 
7161         error = ldi_ident_from_mod(&modlinkage, &zfs_li);
7162         ASSERT(error == 0);
7163         mutex_init(&zfs_share_lock, NULL, MUTEX_DEFAULT, NULL);
7164 
7165         return (0);
7166 }
7167 
7168 int
7169 _fini(void)
7170 {
7171         int error;
7172 
7173         if (spa_busy() || zfs_busy() || zvol_busy() || zio_injection_enabled)
7174                 return (SET_ERROR(EBUSY));
7175 
7176         if ((error = mod_remove(&modlinkage)) != 0)
7177                 return (error);
7178 
7179         zvol_fini();
7180         zfs_fini();
7181         spa_fini();
7182         if (zfs_nfsshare_inited)
7183                 (void) ddi_modclose(nfs_mod);
7184         if (zfs_smbshare_inited)
7185                 (void) ddi_modclose(smbsrv_mod);
7186         if (zfs_nfsshare_inited || zfs_smbshare_inited)
7187                 (void) ddi_modclose(sharefs_mod);
7188 
7189         tsd_destroy(&zfs_fsyncer_key);
7190         ldi_ident_release(zfs_li);
7191         zfs_li = NULL;
7192         mutex_destroy(&zfs_share_lock);
7193 
7194         return (error);
7195 }
7196 
7197 int
7198 _info(struct modinfo *modinfop)
7199 {
7200         return (mod_info(&modlinkage, modinfop));
7201 }