1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  23  */
  24 
  25 #include <sys/types.h>
  26 #include <sys/param.h>
  27 #include <sys/errno.h>
  28 #include <sys/uio.h>
  29 #include <sys/buf.h>
  30 #include <sys/modctl.h>
  31 #include <sys/open.h>
  32 #include <sys/file.h>
  33 #include <sys/kmem.h>
  34 #include <sys/conf.h>
  35 #include <sys/cmn_err.h>
  36 #include <sys/stat.h>
  37 #include <sys/zfs_ioctl.h>
  38 #include <sys/zfs_vfsops.h>
  39 #include <sys/zfs_znode.h>
  40 #include <sys/zap.h>
  41 #include <sys/spa.h>
  42 #include <sys/spa_impl.h>
  43 #include <sys/vdev.h>
  44 #include <sys/priv_impl.h>
  45 #include <sys/dmu.h>
  46 #include <sys/dsl_dir.h>
  47 #include <sys/dsl_dataset.h>
  48 #include <sys/dsl_prop.h>
  49 #include <sys/dsl_deleg.h>
  50 #include <sys/dmu_objset.h>
  51 #include <sys/ddi.h>
  52 #include <sys/sunddi.h>
  53 #include <sys/sunldi.h>
  54 #include <sys/policy.h>
  55 #include <sys/zone.h>
  56 #include <sys/nvpair.h>
  57 #include <sys/pathname.h>
  58 #include <sys/mount.h>
  59 #include <sys/sdt.h>
  60 #include <sys/fs/zfs.h>
  61 #include <sys/zfs_ctldir.h>
  62 #include <sys/zfs_dir.h>
  63 #include <sys/zfs_onexit.h>
  64 #include <sys/zvol.h>
  65 #include <sys/dsl_scan.h>
  66 #include <sharefs/share.h>
  67 #include <sys/dmu_objset.h>
  68 
  69 #include "zfs_namecheck.h"
  70 #include "zfs_prop.h"
  71 #include "zfs_deleg.h"
  72 #include "zfs_comutil.h"
  73 
  74 extern struct modlfs zfs_modlfs;
  75 
  76 extern void zfs_init(void);
  77 extern void zfs_fini(void);
  78 
  79 ldi_ident_t zfs_li = NULL;
  80 dev_info_t *zfs_dip;
  81 
  82 typedef int zfs_ioc_func_t(zfs_cmd_t *);
  83 typedef int zfs_secpolicy_func_t(zfs_cmd_t *, cred_t *);
  84 
  85 typedef enum {
  86         NO_NAME,
  87         POOL_NAME,
  88         DATASET_NAME
  89 } zfs_ioc_namecheck_t;
  90 
  91 typedef enum {
  92         POOL_CHECK_NONE         = 1 << 0,
  93         POOL_CHECK_SUSPENDED    = 1 << 1,
  94         POOL_CHECK_READONLY     = 1 << 2
  95 } zfs_ioc_poolcheck_t;
  96 
  97 typedef struct zfs_ioc_vec {
  98         zfs_ioc_func_t          *zvec_func;
  99         zfs_secpolicy_func_t    *zvec_secpolicy;
 100         zfs_ioc_namecheck_t     zvec_namecheck;
 101         boolean_t               zvec_his_log;
 102         zfs_ioc_poolcheck_t     zvec_pool_check;
 103 } zfs_ioc_vec_t;
 104 
 105 /* This array is indexed by zfs_userquota_prop_t */
 106 static const char *userquota_perms[] = {
 107         ZFS_DELEG_PERM_USERUSED,
 108         ZFS_DELEG_PERM_USERQUOTA,
 109         ZFS_DELEG_PERM_GROUPUSED,
 110         ZFS_DELEG_PERM_GROUPQUOTA,
 111 };
 112 
 113 static int zfs_ioc_userspace_upgrade(zfs_cmd_t *zc);
 114 static int zfs_check_settable(const char *name, nvpair_t *property,
 115     cred_t *cr);
 116 static int zfs_check_clearable(char *dataset, nvlist_t *props,
 117     nvlist_t **errors);
 118 static int zfs_fill_zplprops_root(uint64_t, nvlist_t *, nvlist_t *,
 119     boolean_t *);
 120 int zfs_set_prop_nvlist(const char *, zprop_source_t, nvlist_t *, nvlist_t **);
 121 
 122 /* _NOTE(PRINTFLIKE(4)) - this is printf-like, but lint is too whiney */
 123 void
 124 __dprintf(const char *file, const char *func, int line, const char *fmt, ...)
 125 {
 126         const char *newfile;
 127         char buf[512];
 128         va_list adx;
 129 
 130         /*
 131          * Get rid of annoying "../common/" prefix to filename.
 132          */
 133         newfile = strrchr(file, '/');
 134         if (newfile != NULL) {
 135                 newfile = newfile + 1; /* Get rid of leading / */
 136         } else {
 137                 newfile = file;
 138         }
 139 
 140         va_start(adx, fmt);
 141         (void) vsnprintf(buf, sizeof (buf), fmt, adx);
 142         va_end(adx);
 143 
 144         /*
 145          * To get this data, use the zfs-dprintf probe as so:
 146          * dtrace -q -n 'zfs-dprintf \
 147          *      /stringof(arg0) == "dbuf.c"/ \
 148          *      {printf("%s: %s", stringof(arg1), stringof(arg3))}'
 149          * arg0 = file name
 150          * arg1 = function name
 151          * arg2 = line number
 152          * arg3 = message
 153          */
 154         DTRACE_PROBE4(zfs__dprintf,
 155             char *, newfile, char *, func, int, line, char *, buf);
 156 }
 157 
 158 static void
 159 history_str_free(char *buf)
 160 {
 161         kmem_free(buf, HIS_MAX_RECORD_LEN);
 162 }
 163 
 164 static char *
 165 history_str_get(zfs_cmd_t *zc)
 166 {
 167         char *buf;
 168 
 169         if (zc->zc_history == NULL)
 170                 return (NULL);
 171 
 172         buf = kmem_alloc(HIS_MAX_RECORD_LEN, KM_SLEEP);
 173         if (copyinstr((void *)(uintptr_t)zc->zc_history,
 174             buf, HIS_MAX_RECORD_LEN, NULL) != 0) {
 175                 history_str_free(buf);
 176                 return (NULL);
 177         }
 178 
 179         buf[HIS_MAX_RECORD_LEN -1] = '\0';
 180 
 181         return (buf);
 182 }
 183 
 184 /*
 185  * Check to see if the named dataset is currently defined as bootable
 186  */
 187 static boolean_t
 188 zfs_is_bootfs(const char *name)
 189 {
 190         objset_t *os;
 191 
 192         if (dmu_objset_hold(name, FTAG, &os) == 0) {
 193                 boolean_t ret;
 194                 ret = (dmu_objset_id(os) == spa_bootfs(dmu_objset_spa(os)));
 195                 dmu_objset_rele(os, FTAG);
 196                 return (ret);
 197         }
 198         return (B_FALSE);
 199 }
 200 
 201 /*
 202  * zfs_earlier_version
 203  *
 204  *      Return non-zero if the spa version is less than requested version.
 205  */
 206 static int
 207 zfs_earlier_version(const char *name, int version)
 208 {
 209         spa_t *spa;
 210 
 211         if (spa_open(name, &spa, FTAG) == 0) {
 212                 if (spa_version(spa) < version) {
 213                         spa_close(spa, FTAG);
 214                         return (1);
 215                 }
 216                 spa_close(spa, FTAG);
 217         }
 218         return (0);
 219 }
 220 
 221 /*
 222  * zpl_earlier_version
 223  *
 224  * Return TRUE if the ZPL version is less than requested version.
 225  */
 226 static boolean_t
 227 zpl_earlier_version(const char *name, int version)
 228 {
 229         objset_t *os;
 230         boolean_t rc = B_TRUE;
 231 
 232         if (dmu_objset_hold(name, FTAG, &os) == 0) {
 233                 uint64_t zplversion;
 234 
 235                 if (dmu_objset_type(os) != DMU_OST_ZFS) {
 236                         dmu_objset_rele(os, FTAG);
 237                         return (B_TRUE);
 238                 }
 239                 /* XXX reading from non-owned objset */
 240                 if (zfs_get_zplprop(os, ZFS_PROP_VERSION, &zplversion) == 0)
 241                         rc = zplversion < version;
 242                 dmu_objset_rele(os, FTAG);
 243         }
 244         return (rc);
 245 }
 246 
 247 static void
 248 zfs_log_history(zfs_cmd_t *zc)
 249 {
 250         spa_t *spa;
 251         char *buf;
 252 
 253         if ((buf = history_str_get(zc)) == NULL)
 254                 return;
 255 
 256         if (spa_open(zc->zc_name, &spa, FTAG) == 0) {
 257                 if (spa_version(spa) >= SPA_VERSION_ZPOOL_HISTORY)
 258                         (void) spa_history_log(spa, buf, LOG_CMD_NORMAL);
 259                 spa_close(spa, FTAG);
 260         }
 261         history_str_free(buf);
 262 }
 263 
 264 /*
 265  * Policy for top-level read operations (list pools).  Requires no privileges,
 266  * and can be used in the local zone, as there is no associated dataset.
 267  */
 268 /* ARGSUSED */
 269 static int
 270 zfs_secpolicy_none(zfs_cmd_t *zc, cred_t *cr)
 271 {
 272         return (0);
 273 }
 274 
 275 /*
 276  * Policy for dataset read operations (list children, get statistics).  Requires
 277  * no privileges, but must be visible in the local zone.
 278  */
 279 /* ARGSUSED */
 280 static int
 281 zfs_secpolicy_read(zfs_cmd_t *zc, cred_t *cr)
 282 {
 283         if (INGLOBALZONE(curproc) ||
 284             zone_dataset_visible(zc->zc_name, NULL))
 285                 return (0);
 286 
 287         return (ENOENT);
 288 }
 289 
 290 static int
 291 zfs_dozonecheck_impl(const char *dataset, uint64_t zoned, cred_t *cr)
 292 {
 293         int writable = 1;
 294 
 295         /*
 296          * The dataset must be visible by this zone -- check this first
 297          * so they don't see EPERM on something they shouldn't know about.
 298          */
 299         if (!INGLOBALZONE(curproc) &&
 300             !zone_dataset_visible(dataset, &writable))
 301                 return (ENOENT);
 302 
 303         if (INGLOBALZONE(curproc)) {
 304                 /*
 305                  * If the fs is zoned, only root can access it from the
 306                  * global zone.
 307                  */
 308                 if (secpolicy_zfs(cr) && zoned)
 309                         return (EPERM);
 310         } else {
 311                 /*
 312                  * If we are in a local zone, the 'zoned' property must be set.
 313                  */
 314                 if (!zoned)
 315                         return (EPERM);
 316 
 317                 /* must be writable by this zone */
 318                 if (!writable)
 319                         return (EPERM);
 320         }
 321         return (0);
 322 }
 323 
 324 static int
 325 zfs_dozonecheck(const char *dataset, cred_t *cr)
 326 {
 327         uint64_t zoned;
 328 
 329         if (dsl_prop_get_integer(dataset, "zoned", &zoned, NULL))
 330                 return (ENOENT);
 331 
 332         return (zfs_dozonecheck_impl(dataset, zoned, cr));
 333 }
 334 
 335 static int
 336 zfs_dozonecheck_ds(const char *dataset, dsl_dataset_t *ds, cred_t *cr)
 337 {
 338         uint64_t zoned;
 339 
 340         rw_enter(&ds->ds_dir->dd_pool->dp_config_rwlock, RW_READER);
 341         if (dsl_prop_get_ds(ds, "zoned", 8, 1, &zoned, NULL)) {
 342                 rw_exit(&ds->ds_dir->dd_pool->dp_config_rwlock);
 343                 return (ENOENT);
 344         }
 345         rw_exit(&ds->ds_dir->dd_pool->dp_config_rwlock);
 346 
 347         return (zfs_dozonecheck_impl(dataset, zoned, cr));
 348 }
 349 
 350 int
 351 zfs_secpolicy_write_perms(const char *name, const char *perm, cred_t *cr)
 352 {
 353         int error;
 354 
 355         error = zfs_dozonecheck(name, cr);
 356         if (error == 0) {
 357                 error = secpolicy_zfs(cr);
 358                 if (error)
 359                         error = dsl_deleg_access(name, perm, cr);
 360         }
 361         return (error);
 362 }
 363 
 364 int
 365 zfs_secpolicy_write_perms_ds(const char *name, dsl_dataset_t *ds,
 366     const char *perm, cred_t *cr)
 367 {
 368         int error;
 369 
 370         error = zfs_dozonecheck_ds(name, ds, cr);
 371         if (error == 0) {
 372                 error = secpolicy_zfs(cr);
 373                 if (error)
 374                         error = dsl_deleg_access_impl(ds, perm, cr);
 375         }
 376         return (error);
 377 }
 378 
 379 /*
 380  * Policy for setting the security label property.
 381  *
 382  * Returns 0 for success, non-zero for access and other errors.
 383  */
 384 static int
 385 zfs_set_slabel_policy(const char *name, char *strval, cred_t *cr)
 386 {
 387         char            ds_hexsl[MAXNAMELEN];
 388         bslabel_t       ds_sl, new_sl;
 389         boolean_t       new_default = FALSE;
 390         uint64_t        zoned;
 391         int             needed_priv = -1;
 392         int             error;
 393 
 394         /* First get the existing dataset label. */
 395         error = dsl_prop_get(name, zfs_prop_to_name(ZFS_PROP_MLSLABEL),
 396             1, sizeof (ds_hexsl), &ds_hexsl, NULL);
 397         if (error)
 398                 return (EPERM);
 399 
 400         if (strcasecmp(strval, ZFS_MLSLABEL_DEFAULT) == 0)
 401                 new_default = TRUE;
 402 
 403         /* The label must be translatable */
 404         if (!new_default && (hexstr_to_label(strval, &new_sl) != 0))
 405                 return (EINVAL);
 406 
 407         /*
 408          * In a non-global zone, disallow attempts to set a label that
 409          * doesn't match that of the zone; otherwise no other checks
 410          * are needed.
 411          */
 412         if (!INGLOBALZONE(curproc)) {
 413                 if (new_default || !blequal(&new_sl, CR_SL(CRED())))
 414                         return (EPERM);
 415                 return (0);
 416         }
 417 
 418         /*
 419          * For global-zone datasets (i.e., those whose zoned property is
 420          * "off", verify that the specified new label is valid for the
 421          * global zone.
 422          */
 423         if (dsl_prop_get_integer(name,
 424             zfs_prop_to_name(ZFS_PROP_ZONED), &zoned, NULL))
 425                 return (EPERM);
 426         if (!zoned) {
 427                 if (zfs_check_global_label(name, strval) != 0)
 428                         return (EPERM);
 429         }
 430 
 431         /*
 432          * If the existing dataset label is nondefault, check if the
 433          * dataset is mounted (label cannot be changed while mounted).
 434          * Get the zfsvfs; if there isn't one, then the dataset isn't
 435          * mounted (or isn't a dataset, doesn't exist, ...).
 436          */
 437         if (strcasecmp(ds_hexsl, ZFS_MLSLABEL_DEFAULT) != 0) {
 438                 objset_t *os;
 439                 static char *setsl_tag = "setsl_tag";
 440 
 441                 /*
 442                  * Try to own the dataset; abort if there is any error,
 443                  * (e.g., already mounted, in use, or other error).
 444                  */
 445                 error = dmu_objset_own(name, DMU_OST_ZFS, B_TRUE,
 446                     setsl_tag, &os);
 447                 if (error)
 448                         return (EPERM);
 449 
 450                 dmu_objset_disown(os, setsl_tag);
 451 
 452                 if (new_default) {
 453                         needed_priv = PRIV_FILE_DOWNGRADE_SL;
 454                         goto out_check;
 455                 }
 456 
 457                 if (hexstr_to_label(strval, &new_sl) != 0)
 458                         return (EPERM);
 459 
 460                 if (blstrictdom(&ds_sl, &new_sl))
 461                         needed_priv = PRIV_FILE_DOWNGRADE_SL;
 462                 else if (blstrictdom(&new_sl, &ds_sl))
 463                         needed_priv = PRIV_FILE_UPGRADE_SL;
 464         } else {
 465                 /* dataset currently has a default label */
 466                 if (!new_default)
 467                         needed_priv = PRIV_FILE_UPGRADE_SL;
 468         }
 469 
 470 out_check:
 471         if (needed_priv != -1)
 472                 return (PRIV_POLICY(cr, needed_priv, B_FALSE, EPERM, NULL));
 473         return (0);
 474 }
 475 
 476 static int
 477 zfs_secpolicy_setprop(const char *dsname, zfs_prop_t prop, nvpair_t *propval,
 478     cred_t *cr)
 479 {
 480         char *strval;
 481 
 482         /*
 483          * Check permissions for special properties.
 484          */
 485         switch (prop) {
 486         case ZFS_PROP_ZONED:
 487                 /*
 488                  * Disallow setting of 'zoned' from within a local zone.
 489                  */
 490                 if (!INGLOBALZONE(curproc))
 491                         return (EPERM);
 492                 break;
 493 
 494         case ZFS_PROP_QUOTA:
 495                 if (!INGLOBALZONE(curproc)) {
 496                         uint64_t zoned;
 497                         char setpoint[MAXNAMELEN];
 498                         /*
 499                          * Unprivileged users are allowed to modify the
 500                          * quota on things *under* (ie. contained by)
 501                          * the thing they own.
 502                          */
 503                         if (dsl_prop_get_integer(dsname, "zoned", &zoned,
 504                             setpoint))
 505                                 return (EPERM);
 506                         if (!zoned || strlen(dsname) <= strlen(setpoint))
 507                                 return (EPERM);
 508                 }
 509                 break;
 510 
 511         case ZFS_PROP_MLSLABEL:
 512                 if (!is_system_labeled())
 513                         return (EPERM);
 514 
 515                 if (nvpair_value_string(propval, &strval) == 0) {
 516                         int err;
 517 
 518                         err = zfs_set_slabel_policy(dsname, strval, CRED());
 519                         if (err != 0)
 520                                 return (err);
 521                 }
 522                 break;
 523         }
 524 
 525         return (zfs_secpolicy_write_perms(dsname, zfs_prop_to_name(prop), cr));
 526 }
 527 
 528 int
 529 zfs_secpolicy_fsacl(zfs_cmd_t *zc, cred_t *cr)
 530 {
 531         int error;
 532 
 533         error = zfs_dozonecheck(zc->zc_name, cr);
 534         if (error)
 535                 return (error);
 536 
 537         /*
 538          * permission to set permissions will be evaluated later in
 539          * dsl_deleg_can_allow()
 540          */
 541         return (0);
 542 }
 543 
 544 int
 545 zfs_secpolicy_rollback(zfs_cmd_t *zc, cred_t *cr)
 546 {
 547         return (zfs_secpolicy_write_perms(zc->zc_name,
 548             ZFS_DELEG_PERM_ROLLBACK, cr));
 549 }
 550 
 551 int
 552 zfs_secpolicy_send(zfs_cmd_t *zc, cred_t *cr)
 553 {
 554         spa_t *spa;
 555         dsl_pool_t *dp;
 556         dsl_dataset_t *ds;
 557         char *cp;
 558         int error;
 559 
 560         /*
 561          * Generate the current snapshot name from the given objsetid, then
 562          * use that name for the secpolicy/zone checks.
 563          */
 564         cp = strchr(zc->zc_name, '@');
 565         if (cp == NULL)
 566                 return (EINVAL);
 567         error = spa_open(zc->zc_name, &spa, FTAG);
 568         if (error)
 569                 return (error);
 570 
 571         dp = spa_get_dsl(spa);
 572         rw_enter(&dp->dp_config_rwlock, RW_READER);
 573         error = dsl_dataset_hold_obj(dp, zc->zc_sendobj, FTAG, &ds);
 574         rw_exit(&dp->dp_config_rwlock);
 575         spa_close(spa, FTAG);
 576         if (error)
 577                 return (error);
 578 
 579         dsl_dataset_name(ds, zc->zc_name);
 580 
 581         error = zfs_secpolicy_write_perms_ds(zc->zc_name, ds,
 582             ZFS_DELEG_PERM_SEND, cr);
 583         dsl_dataset_rele(ds, FTAG);
 584 
 585         return (error);
 586 }
 587 
 588 static int
 589 zfs_secpolicy_deleg_share(zfs_cmd_t *zc, cred_t *cr)
 590 {
 591         vnode_t *vp;
 592         int error;
 593 
 594         if ((error = lookupname(zc->zc_value, UIO_SYSSPACE,
 595             NO_FOLLOW, NULL, &vp)) != 0)
 596                 return (error);
 597 
 598         /* Now make sure mntpnt and dataset are ZFS */
 599 
 600         if (vp->v_vfsp->vfs_fstype != zfsfstype ||
 601             (strcmp((char *)refstr_value(vp->v_vfsp->vfs_resource),
 602             zc->zc_name) != 0)) {
 603                 VN_RELE(vp);
 604                 return (EPERM);
 605         }
 606 
 607         VN_RELE(vp);
 608         return (dsl_deleg_access(zc->zc_name,
 609             ZFS_DELEG_PERM_SHARE, cr));
 610 }
 611 
 612 int
 613 zfs_secpolicy_share(zfs_cmd_t *zc, cred_t *cr)
 614 {
 615         if (!INGLOBALZONE(curproc))
 616                 return (EPERM);
 617 
 618         if (secpolicy_nfs(cr) == 0) {
 619                 return (0);
 620         } else {
 621                 return (zfs_secpolicy_deleg_share(zc, cr));
 622         }
 623 }
 624 
 625 int
 626 zfs_secpolicy_smb_acl(zfs_cmd_t *zc, cred_t *cr)
 627 {
 628         if (!INGLOBALZONE(curproc))
 629                 return (EPERM);
 630 
 631         if (secpolicy_smb(cr) == 0) {
 632                 return (0);
 633         } else {
 634                 return (zfs_secpolicy_deleg_share(zc, cr));
 635         }
 636 }
 637 
 638 static int
 639 zfs_get_parent(const char *datasetname, char *parent, int parentsize)
 640 {
 641         char *cp;
 642 
 643         /*
 644          * Remove the @bla or /bla from the end of the name to get the parent.
 645          */
 646         (void) strncpy(parent, datasetname, parentsize);
 647         cp = strrchr(parent, '@');
 648         if (cp != NULL) {
 649                 cp[0] = '\0';
 650         } else {
 651                 cp = strrchr(parent, '/');
 652                 if (cp == NULL)
 653                         return (ENOENT);
 654                 cp[0] = '\0';
 655         }
 656 
 657         return (0);
 658 }
 659 
 660 int
 661 zfs_secpolicy_destroy_perms(const char *name, cred_t *cr)
 662 {
 663         int error;
 664 
 665         if ((error = zfs_secpolicy_write_perms(name,
 666             ZFS_DELEG_PERM_MOUNT, cr)) != 0)
 667                 return (error);
 668 
 669         return (zfs_secpolicy_write_perms(name, ZFS_DELEG_PERM_DESTROY, cr));
 670 }
 671 
 672 static int
 673 zfs_secpolicy_destroy(zfs_cmd_t *zc, cred_t *cr)
 674 {
 675         return (zfs_secpolicy_destroy_perms(zc->zc_name, cr));
 676 }
 677 
 678 /*
 679  * Destroying snapshots with delegated permissions requires
 680  * descendent mount and destroy permissions.
 681  * Reassemble the full filesystem@snap name so dsl_deleg_access()
 682  * can do the correct permission check.
 683  *
 684  * Since this routine is used when doing a recursive destroy of snapshots
 685  * and destroying snapshots requires descendent permissions, a successfull
 686  * check of the top level snapshot applies to snapshots of all descendent
 687  * datasets as well.
 688  *
 689  * The target snapshot may not exist when doing a recursive destroy.
 690  * In this case fallback to permissions of the parent dataset.
 691  */
 692 static int
 693 zfs_secpolicy_destroy_snaps(zfs_cmd_t *zc, cred_t *cr)
 694 {
 695         int error;
 696         char *dsname;
 697 
 698         dsname = kmem_asprintf("%s@%s", zc->zc_name, zc->zc_value);
 699 
 700         error = zfs_secpolicy_destroy_perms(dsname, cr);
 701 
 702         if (error == ENOENT)
 703                 error = zfs_secpolicy_destroy_perms(zc->zc_name, cr);
 704 
 705         strfree(dsname);
 706         return (error);
 707 }
 708 
 709 int
 710 zfs_secpolicy_rename_perms(const char *from, const char *to, cred_t *cr)
 711 {
 712         char    parentname[MAXNAMELEN];
 713         int     error;
 714 
 715         if ((error = zfs_secpolicy_write_perms(from,
 716             ZFS_DELEG_PERM_RENAME, cr)) != 0)
 717                 return (error);
 718 
 719         if ((error = zfs_secpolicy_write_perms(from,
 720             ZFS_DELEG_PERM_MOUNT, cr)) != 0)
 721                 return (error);
 722 
 723         if ((error = zfs_get_parent(to, parentname,
 724             sizeof (parentname))) != 0)
 725                 return (error);
 726 
 727         if ((error = zfs_secpolicy_write_perms(parentname,
 728             ZFS_DELEG_PERM_CREATE, cr)) != 0)
 729                 return (error);
 730 
 731         if ((error = zfs_secpolicy_write_perms(parentname,
 732             ZFS_DELEG_PERM_MOUNT, cr)) != 0)
 733                 return (error);
 734 
 735         return (error);
 736 }
 737 
 738 static int
 739 zfs_secpolicy_rename(zfs_cmd_t *zc, cred_t *cr)
 740 {
 741         return (zfs_secpolicy_rename_perms(zc->zc_name, zc->zc_value, cr));
 742 }
 743 
 744 static int
 745 zfs_secpolicy_promote(zfs_cmd_t *zc, cred_t *cr)
 746 {
 747         char    parentname[MAXNAMELEN];
 748         objset_t *clone;
 749         int error;
 750 
 751         error = zfs_secpolicy_write_perms(zc->zc_name,
 752             ZFS_DELEG_PERM_PROMOTE, cr);
 753         if (error)
 754                 return (error);
 755 
 756         error = dmu_objset_hold(zc->zc_name, FTAG, &clone);
 757 
 758         if (error == 0) {
 759                 dsl_dataset_t *pclone = NULL;
 760                 dsl_dir_t *dd;
 761                 dd = clone->os_dsl_dataset->ds_dir;
 762 
 763                 rw_enter(&dd->dd_pool->dp_config_rwlock, RW_READER);
 764                 error = dsl_dataset_hold_obj(dd->dd_pool,
 765                     dd->dd_phys->dd_origin_obj, FTAG, &pclone);
 766                 rw_exit(&dd->dd_pool->dp_config_rwlock);
 767                 if (error) {
 768                         dmu_objset_rele(clone, FTAG);
 769                         return (error);
 770                 }
 771 
 772                 error = zfs_secpolicy_write_perms(zc->zc_name,
 773                     ZFS_DELEG_PERM_MOUNT, cr);
 774 
 775                 dsl_dataset_name(pclone, parentname);
 776                 dmu_objset_rele(clone, FTAG);
 777                 dsl_dataset_rele(pclone, FTAG);
 778                 if (error == 0)
 779                         error = zfs_secpolicy_write_perms(parentname,
 780                             ZFS_DELEG_PERM_PROMOTE, cr);
 781         }
 782         return (error);
 783 }
 784 
 785 static int
 786 zfs_secpolicy_receive(zfs_cmd_t *zc, cred_t *cr)
 787 {
 788         int error;
 789 
 790         if ((error = zfs_secpolicy_write_perms(zc->zc_name,
 791             ZFS_DELEG_PERM_RECEIVE, cr)) != 0)
 792                 return (error);
 793 
 794         if ((error = zfs_secpolicy_write_perms(zc->zc_name,
 795             ZFS_DELEG_PERM_MOUNT, cr)) != 0)
 796                 return (error);
 797 
 798         return (zfs_secpolicy_write_perms(zc->zc_name,
 799             ZFS_DELEG_PERM_CREATE, cr));
 800 }
 801 
 802 int
 803 zfs_secpolicy_snapshot_perms(const char *name, cred_t *cr)
 804 {
 805         return (zfs_secpolicy_write_perms(name,
 806             ZFS_DELEG_PERM_SNAPSHOT, cr));
 807 }
 808 
 809 static int
 810 zfs_secpolicy_snapshot(zfs_cmd_t *zc, cred_t *cr)
 811 {
 812 
 813         return (zfs_secpolicy_snapshot_perms(zc->zc_name, cr));
 814 }
 815 
 816 static int
 817 zfs_secpolicy_create(zfs_cmd_t *zc, cred_t *cr)
 818 {
 819         char    parentname[MAXNAMELEN];
 820         int     error;
 821 
 822         if ((error = zfs_get_parent(zc->zc_name, parentname,
 823             sizeof (parentname))) != 0)
 824                 return (error);
 825 
 826         if (zc->zc_value[0] != '\0') {
 827                 if ((error = zfs_secpolicy_write_perms(zc->zc_value,
 828                     ZFS_DELEG_PERM_CLONE, cr)) != 0)
 829                         return (error);
 830         }
 831 
 832         if ((error = zfs_secpolicy_write_perms(parentname,
 833             ZFS_DELEG_PERM_CREATE, cr)) != 0)
 834                 return (error);
 835 
 836         error = zfs_secpolicy_write_perms(parentname,
 837             ZFS_DELEG_PERM_MOUNT, cr);
 838 
 839         return (error);
 840 }
 841 
 842 static int
 843 zfs_secpolicy_umount(zfs_cmd_t *zc, cred_t *cr)
 844 {
 845         int error;
 846 
 847         error = secpolicy_fs_unmount(cr, NULL);
 848         if (error) {
 849                 error = dsl_deleg_access(zc->zc_name, ZFS_DELEG_PERM_MOUNT, cr);
 850         }
 851         return (error);
 852 }
 853 
 854 /*
 855  * Policy for pool operations - create/destroy pools, add vdevs, etc.  Requires
 856  * SYS_CONFIG privilege, which is not available in a local zone.
 857  */
 858 /* ARGSUSED */
 859 static int
 860 zfs_secpolicy_config(zfs_cmd_t *zc, cred_t *cr)
 861 {
 862         if (secpolicy_sys_config(cr, B_FALSE) != 0)
 863                 return (EPERM);
 864 
 865         return (0);
 866 }
 867 
 868 /*
 869  * Policy for object to name lookups.
 870  */
 871 /* ARGSUSED */
 872 static int
 873 zfs_secpolicy_diff(zfs_cmd_t *zc, cred_t *cr)
 874 {
 875         int error;
 876 
 877         if ((error = secpolicy_sys_config(cr, B_FALSE)) == 0)
 878                 return (0);
 879 
 880         error = zfs_secpolicy_write_perms(zc->zc_name, ZFS_DELEG_PERM_DIFF, cr);
 881         return (error);
 882 }
 883 
 884 /*
 885  * Policy for fault injection.  Requires all privileges.
 886  */
 887 /* ARGSUSED */
 888 static int
 889 zfs_secpolicy_inject(zfs_cmd_t *zc, cred_t *cr)
 890 {
 891         return (secpolicy_zinject(cr));
 892 }
 893 
 894 static int
 895 zfs_secpolicy_inherit(zfs_cmd_t *zc, cred_t *cr)
 896 {
 897         zfs_prop_t prop = zfs_name_to_prop(zc->zc_value);
 898 
 899         if (prop == ZPROP_INVAL) {
 900                 if (!zfs_prop_user(zc->zc_value))
 901                         return (EINVAL);
 902                 return (zfs_secpolicy_write_perms(zc->zc_name,
 903                     ZFS_DELEG_PERM_USERPROP, cr));
 904         } else {
 905                 return (zfs_secpolicy_setprop(zc->zc_name, prop,
 906                     NULL, cr));
 907         }
 908 }
 909 
 910 static int
 911 zfs_secpolicy_userspace_one(zfs_cmd_t *zc, cred_t *cr)
 912 {
 913         int err = zfs_secpolicy_read(zc, cr);
 914         if (err)
 915                 return (err);
 916 
 917         if (zc->zc_objset_type >= ZFS_NUM_USERQUOTA_PROPS)
 918                 return (EINVAL);
 919 
 920         if (zc->zc_value[0] == 0) {
 921                 /*
 922                  * They are asking about a posix uid/gid.  If it's
 923                  * themself, allow it.
 924                  */
 925                 if (zc->zc_objset_type == ZFS_PROP_USERUSED ||
 926                     zc->zc_objset_type == ZFS_PROP_USERQUOTA) {
 927                         if (zc->zc_guid == crgetuid(cr))
 928                                 return (0);
 929                 } else {
 930                         if (groupmember(zc->zc_guid, cr))
 931                                 return (0);
 932                 }
 933         }
 934 
 935         return (zfs_secpolicy_write_perms(zc->zc_name,
 936             userquota_perms[zc->zc_objset_type], cr));
 937 }
 938 
 939 static int
 940 zfs_secpolicy_userspace_many(zfs_cmd_t *zc, cred_t *cr)
 941 {
 942         int err = zfs_secpolicy_read(zc, cr);
 943         if (err)
 944                 return (err);
 945 
 946         if (zc->zc_objset_type >= ZFS_NUM_USERQUOTA_PROPS)
 947                 return (EINVAL);
 948 
 949         return (zfs_secpolicy_write_perms(zc->zc_name,
 950             userquota_perms[zc->zc_objset_type], cr));
 951 }
 952 
 953 static int
 954 zfs_secpolicy_userspace_upgrade(zfs_cmd_t *zc, cred_t *cr)
 955 {
 956         return (zfs_secpolicy_setprop(zc->zc_name, ZFS_PROP_VERSION,
 957             NULL, cr));
 958 }
 959 
 960 static int
 961 zfs_secpolicy_hold(zfs_cmd_t *zc, cred_t *cr)
 962 {
 963         return (zfs_secpolicy_write_perms(zc->zc_name,
 964             ZFS_DELEG_PERM_HOLD, cr));
 965 }
 966 
 967 static int
 968 zfs_secpolicy_release(zfs_cmd_t *zc, cred_t *cr)
 969 {
 970         return (zfs_secpolicy_write_perms(zc->zc_name,
 971             ZFS_DELEG_PERM_RELEASE, cr));
 972 }
 973 
 974 /*
 975  * Policy for allowing temporary snapshots to be taken or released
 976  */
 977 static int
 978 zfs_secpolicy_tmp_snapshot(zfs_cmd_t *zc, cred_t *cr)
 979 {
 980         /*
 981          * A temporary snapshot is the same as a snapshot,
 982          * hold, destroy and release all rolled into one.
 983          * Delegated diff alone is sufficient that we allow this.
 984          */
 985         int error;
 986 
 987         if ((error = zfs_secpolicy_write_perms(zc->zc_name,
 988             ZFS_DELEG_PERM_DIFF, cr)) == 0)
 989                 return (0);
 990 
 991         error = zfs_secpolicy_snapshot(zc, cr);
 992         if (!error)
 993                 error = zfs_secpolicy_hold(zc, cr);
 994         if (!error)
 995                 error = zfs_secpolicy_release(zc, cr);
 996         if (!error)
 997                 error = zfs_secpolicy_destroy(zc, cr);
 998         return (error);
 999 }
1000 
1001 /*
1002  * Returns the nvlist as specified by the user in the zfs_cmd_t.
1003  */
1004 static int
1005 get_nvlist(uint64_t nvl, uint64_t size, int iflag, nvlist_t **nvp)
1006 {
1007         char *packed;
1008         int error;
1009         nvlist_t *list = NULL;
1010 
1011         /*
1012          * Read in and unpack the user-supplied nvlist.
1013          */
1014         if (size == 0)
1015                 return (EINVAL);
1016 
1017         packed = kmem_alloc(size, KM_SLEEP);
1018 
1019         if ((error = ddi_copyin((void *)(uintptr_t)nvl, packed, size,
1020             iflag)) != 0) {
1021                 kmem_free(packed, size);
1022                 return (error);
1023         }
1024 
1025         if ((error = nvlist_unpack(packed, size, &list, 0)) != 0) {
1026                 kmem_free(packed, size);
1027                 return (error);
1028         }
1029 
1030         kmem_free(packed, size);
1031 
1032         *nvp = list;
1033         return (0);
1034 }
1035 
1036 static int
1037 fit_error_list(zfs_cmd_t *zc, nvlist_t **errors)
1038 {
1039         size_t size;
1040 
1041         VERIFY(nvlist_size(*errors, &size, NV_ENCODE_NATIVE) == 0);
1042 
1043         if (size > zc->zc_nvlist_dst_size) {
1044                 nvpair_t *more_errors;
1045                 int n = 0;
1046 
1047                 if (zc->zc_nvlist_dst_size < 1024)
1048                         return (ENOMEM);
1049 
1050                 VERIFY(nvlist_add_int32(*errors, ZPROP_N_MORE_ERRORS, 0) == 0);
1051                 more_errors = nvlist_prev_nvpair(*errors, NULL);
1052 
1053                 do {
1054                         nvpair_t *pair = nvlist_prev_nvpair(*errors,
1055                             more_errors);
1056                         VERIFY(nvlist_remove_nvpair(*errors, pair) == 0);
1057                         n++;
1058                         VERIFY(nvlist_size(*errors, &size,
1059                             NV_ENCODE_NATIVE) == 0);
1060                 } while (size > zc->zc_nvlist_dst_size);
1061 
1062                 VERIFY(nvlist_remove_nvpair(*errors, more_errors) == 0);
1063                 VERIFY(nvlist_add_int32(*errors, ZPROP_N_MORE_ERRORS, n) == 0);
1064                 ASSERT(nvlist_size(*errors, &size, NV_ENCODE_NATIVE) == 0);
1065                 ASSERT(size <= zc->zc_nvlist_dst_size);
1066         }
1067 
1068         return (0);
1069 }
1070 
1071 static int
1072 put_nvlist(zfs_cmd_t *zc, nvlist_t *nvl)
1073 {
1074         char *packed = NULL;
1075         int error = 0;
1076         size_t size;
1077 
1078         VERIFY(nvlist_size(nvl, &size, NV_ENCODE_NATIVE) == 0);
1079 
1080         if (size > zc->zc_nvlist_dst_size) {
1081                 error = ENOMEM;
1082         } else {
1083                 packed = kmem_alloc(size, KM_SLEEP);
1084                 VERIFY(nvlist_pack(nvl, &packed, &size, NV_ENCODE_NATIVE,
1085                     KM_SLEEP) == 0);
1086                 if (ddi_copyout(packed, (void *)(uintptr_t)zc->zc_nvlist_dst,
1087                     size, zc->zc_iflags) != 0)
1088                         error = EFAULT;
1089                 kmem_free(packed, size);
1090         }
1091 
1092         zc->zc_nvlist_dst_size = size;
1093         return (error);
1094 }
1095 
1096 static int
1097 getzfsvfs(const char *dsname, zfsvfs_t **zfvp)
1098 {
1099         objset_t *os;
1100         int error;
1101 
1102         error = dmu_objset_hold(dsname, FTAG, &os);
1103         if (error)
1104                 return (error);
1105         if (dmu_objset_type(os) != DMU_OST_ZFS) {
1106                 dmu_objset_rele(os, FTAG);
1107                 return (EINVAL);
1108         }
1109 
1110         mutex_enter(&os->os_user_ptr_lock);
1111         *zfvp = dmu_objset_get_user(os);
1112         if (*zfvp) {
1113                 VFS_HOLD((*zfvp)->z_vfs);
1114         } else {
1115                 error = ESRCH;
1116         }
1117         mutex_exit(&os->os_user_ptr_lock);
1118         dmu_objset_rele(os, FTAG);
1119         return (error);
1120 }
1121 
1122 /*
1123  * Find a zfsvfs_t for a mounted filesystem, or create our own, in which
1124  * case its z_vfs will be NULL, and it will be opened as the owner.
1125  */
1126 static int
1127 zfsvfs_hold(const char *name, void *tag, zfsvfs_t **zfvp, boolean_t writer)
1128 {
1129         int error = 0;
1130 
1131         if (getzfsvfs(name, zfvp) != 0)
1132                 error = zfsvfs_create(name, zfvp);
1133         if (error == 0) {
1134                 rrw_enter(&(*zfvp)->z_teardown_lock, (writer) ? RW_WRITER :
1135                     RW_READER, tag);
1136                 if ((*zfvp)->z_unmounted) {
1137                         /*
1138                          * XXX we could probably try again, since the unmounting
1139                          * thread should be just about to disassociate the
1140                          * objset from the zfsvfs.
1141                          */
1142                         rrw_exit(&(*zfvp)->z_teardown_lock, tag);
1143                         return (EBUSY);
1144                 }
1145         }
1146         return (error);
1147 }
1148 
1149 static void
1150 zfsvfs_rele(zfsvfs_t *zfsvfs, void *tag)
1151 {
1152         rrw_exit(&zfsvfs->z_teardown_lock, tag);
1153 
1154         if (zfsvfs->z_vfs) {
1155                 VFS_RELE(zfsvfs->z_vfs);
1156         } else {
1157                 dmu_objset_disown(zfsvfs->z_os, zfsvfs);
1158                 zfsvfs_free(zfsvfs);
1159         }
1160 }
1161 
1162 static int
1163 zfs_ioc_pool_create(zfs_cmd_t *zc)
1164 {
1165         int error;
1166         nvlist_t *config, *props = NULL;
1167         nvlist_t *rootprops = NULL;
1168         nvlist_t *zplprops = NULL;
1169         char *buf;
1170 
1171         if (error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
1172             zc->zc_iflags, &config))
1173                 return (error);
1174 
1175         if (zc->zc_nvlist_src_size != 0 && (error =
1176             get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
1177             zc->zc_iflags, &props))) {
1178                 nvlist_free(config);
1179                 return (error);
1180         }
1181 
1182         if (props) {
1183                 nvlist_t *nvl = NULL;
1184                 uint64_t version = SPA_VERSION;
1185 
1186                 (void) nvlist_lookup_uint64(props,
1187                     zpool_prop_to_name(ZPOOL_PROP_VERSION), &version);
1188                 if (version < SPA_VERSION_INITIAL || version > SPA_VERSION) {
1189                         error = EINVAL;
1190                         goto pool_props_bad;
1191                 }
1192                 (void) nvlist_lookup_nvlist(props, ZPOOL_ROOTFS_PROPS, &nvl);
1193                 if (nvl) {
1194                         error = nvlist_dup(nvl, &rootprops, KM_SLEEP);
1195                         if (error != 0) {
1196                                 nvlist_free(config);
1197                                 nvlist_free(props);
1198                                 return (error);
1199                         }
1200                         (void) nvlist_remove_all(props, ZPOOL_ROOTFS_PROPS);
1201                 }
1202                 VERIFY(nvlist_alloc(&zplprops, NV_UNIQUE_NAME, KM_SLEEP) == 0);
1203                 error = zfs_fill_zplprops_root(version, rootprops,
1204                     zplprops, NULL);
1205                 if (error)
1206                         goto pool_props_bad;
1207         }
1208 
1209         buf = history_str_get(zc);
1210 
1211         error = spa_create(zc->zc_name, config, props, buf, zplprops);
1212 
1213         /*
1214          * Set the remaining root properties
1215          */
1216         if (!error && (error = zfs_set_prop_nvlist(zc->zc_name,
1217             ZPROP_SRC_LOCAL, rootprops, NULL)) != 0)
1218                 (void) spa_destroy(zc->zc_name);
1219 
1220         if (buf != NULL)
1221                 history_str_free(buf);
1222 
1223 pool_props_bad:
1224         nvlist_free(rootprops);
1225         nvlist_free(zplprops);
1226         nvlist_free(config);
1227         nvlist_free(props);
1228 
1229         return (error);
1230 }
1231 
1232 static int
1233 zfs_ioc_pool_destroy(zfs_cmd_t *zc)
1234 {
1235         int error;
1236         zfs_log_history(zc);
1237         error = spa_destroy(zc->zc_name);
1238         if (error == 0)
1239                 zvol_remove_minors(zc->zc_name);
1240         return (error);
1241 }
1242 
1243 static int
1244 zfs_ioc_pool_import(zfs_cmd_t *zc)
1245 {
1246         nvlist_t *config, *props = NULL;
1247         uint64_t guid;
1248         int error;
1249 
1250         if ((error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
1251             zc->zc_iflags, &config)) != 0)
1252                 return (error);
1253 
1254         if (zc->zc_nvlist_src_size != 0 && (error =
1255             get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
1256             zc->zc_iflags, &props))) {
1257                 nvlist_free(config);
1258                 return (error);
1259         }
1260 
1261         if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID, &guid) != 0 ||
1262             guid != zc->zc_guid)
1263                 error = EINVAL;
1264         else
1265                 error = spa_import(zc->zc_name, config, props, zc->zc_cookie);
1266 
1267         if (zc->zc_nvlist_dst != 0) {
1268                 int err;
1269 
1270                 if ((err = put_nvlist(zc, config)) != 0)
1271                         error = err;
1272         }
1273 
1274         nvlist_free(config);
1275 
1276         if (props)
1277                 nvlist_free(props);
1278 
1279         return (error);
1280 }
1281 
1282 static int
1283 zfs_ioc_pool_export(zfs_cmd_t *zc)
1284 {
1285         int error;
1286         boolean_t force = (boolean_t)zc->zc_cookie;
1287         boolean_t hardforce = (boolean_t)zc->zc_guid;
1288 
1289         zfs_log_history(zc);
1290         error = spa_export(zc->zc_name, NULL, force, hardforce);
1291         if (error == 0)
1292                 zvol_remove_minors(zc->zc_name);
1293         return (error);
1294 }
1295 
1296 static int
1297 zfs_ioc_pool_configs(zfs_cmd_t *zc)
1298 {
1299         nvlist_t *configs;
1300         int error;
1301 
1302         if ((configs = spa_all_configs(&zc->zc_cookie)) == NULL)
1303                 return (EEXIST);
1304 
1305         error = put_nvlist(zc, configs);
1306 
1307         nvlist_free(configs);
1308 
1309         return (error);
1310 }
1311 
1312 static int
1313 zfs_ioc_pool_stats(zfs_cmd_t *zc)
1314 {
1315         nvlist_t *config;
1316         int error;
1317         int ret = 0;
1318 
1319         error = spa_get_stats(zc->zc_name, &config, zc->zc_value,
1320             sizeof (zc->zc_value));
1321 
1322         if (config != NULL) {
1323                 ret = put_nvlist(zc, config);
1324                 nvlist_free(config);
1325 
1326                 /*
1327                  * The config may be present even if 'error' is non-zero.
1328                  * In this case we return success, and preserve the real errno
1329                  * in 'zc_cookie'.
1330                  */
1331                 zc->zc_cookie = error;
1332         } else {
1333                 ret = error;
1334         }
1335 
1336         return (ret);
1337 }
1338 
1339 /*
1340  * Try to import the given pool, returning pool stats as appropriate so that
1341  * user land knows which devices are available and overall pool health.
1342  */
1343 static int
1344 zfs_ioc_pool_tryimport(zfs_cmd_t *zc)
1345 {
1346         nvlist_t *tryconfig, *config;
1347         int error;
1348 
1349         if ((error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
1350             zc->zc_iflags, &tryconfig)) != 0)
1351                 return (error);
1352 
1353         config = spa_tryimport(tryconfig);
1354 
1355         nvlist_free(tryconfig);
1356 
1357         if (config == NULL)
1358                 return (EINVAL);
1359 
1360         error = put_nvlist(zc, config);
1361         nvlist_free(config);
1362 
1363         return (error);
1364 }
1365 
1366 /*
1367  * inputs:
1368  * zc_name              name of the pool
1369  * zc_cookie            scan func (pool_scan_func_t)
1370  */
1371 static int
1372 zfs_ioc_pool_scan(zfs_cmd_t *zc)
1373 {
1374         spa_t *spa;
1375         int error;
1376 
1377         if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
1378                 return (error);
1379 
1380         if (zc->zc_cookie == POOL_SCAN_NONE)
1381                 error = spa_scan_stop(spa);
1382         else
1383                 error = spa_scan(spa, zc->zc_cookie);
1384 
1385         spa_close(spa, FTAG);
1386 
1387         return (error);
1388 }
1389 
1390 static int
1391 zfs_ioc_pool_freeze(zfs_cmd_t *zc)
1392 {
1393         spa_t *spa;
1394         int error;
1395 
1396         error = spa_open(zc->zc_name, &spa, FTAG);
1397         if (error == 0) {
1398                 spa_freeze(spa);
1399                 spa_close(spa, FTAG);
1400         }
1401         return (error);
1402 }
1403 
1404 static int
1405 zfs_ioc_pool_upgrade(zfs_cmd_t *zc)
1406 {
1407         spa_t *spa;
1408         int error;
1409 
1410         if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
1411                 return (error);
1412 
1413         if (zc->zc_cookie < spa_version(spa) || zc->zc_cookie > SPA_VERSION) {
1414                 spa_close(spa, FTAG);
1415                 return (EINVAL);
1416         }
1417 
1418         spa_upgrade(spa, zc->zc_cookie);
1419         spa_close(spa, FTAG);
1420 
1421         return (error);
1422 }
1423 
1424 static int
1425 zfs_ioc_pool_get_history(zfs_cmd_t *zc)
1426 {
1427         spa_t *spa;
1428         char *hist_buf;
1429         uint64_t size;
1430         int error;
1431 
1432         if ((size = zc->zc_history_len) == 0)
1433                 return (EINVAL);
1434 
1435         if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
1436                 return (error);
1437 
1438         if (spa_version(spa) < SPA_VERSION_ZPOOL_HISTORY) {
1439                 spa_close(spa, FTAG);
1440                 return (ENOTSUP);
1441         }
1442 
1443         hist_buf = kmem_alloc(size, KM_SLEEP);
1444         if ((error = spa_history_get(spa, &zc->zc_history_offset,
1445             &zc->zc_history_len, hist_buf)) == 0) {
1446                 error = ddi_copyout(hist_buf,
1447                     (void *)(uintptr_t)zc->zc_history,
1448                     zc->zc_history_len, zc->zc_iflags);
1449         }
1450 
1451         spa_close(spa, FTAG);
1452         kmem_free(hist_buf, size);
1453         return (error);
1454 }
1455 
1456 static int
1457 zfs_ioc_dsobj_to_dsname(zfs_cmd_t *zc)
1458 {
1459         int error;
1460 
1461         if (error = dsl_dsobj_to_dsname(zc->zc_name, zc->zc_obj, zc->zc_value))
1462                 return (error);
1463 
1464         return (0);
1465 }
1466 
1467 /*
1468  * inputs:
1469  * zc_name              name of filesystem
1470  * zc_obj               object to find
1471  *
1472  * outputs:
1473  * zc_value             name of object
1474  */
1475 static int
1476 zfs_ioc_obj_to_path(zfs_cmd_t *zc)
1477 {
1478         objset_t *os;
1479         int error;
1480 
1481         /* XXX reading from objset not owned */
1482         if ((error = dmu_objset_hold(zc->zc_name, FTAG, &os)) != 0)
1483                 return (error);
1484         if (dmu_objset_type(os) != DMU_OST_ZFS) {
1485                 dmu_objset_rele(os, FTAG);
1486                 return (EINVAL);
1487         }
1488         error = zfs_obj_to_path(os, zc->zc_obj, zc->zc_value,
1489             sizeof (zc->zc_value));
1490         dmu_objset_rele(os, FTAG);
1491 
1492         return (error);
1493 }
1494 
1495 /*
1496  * inputs:
1497  * zc_name              name of filesystem
1498  * zc_obj               object to find
1499  *
1500  * outputs:
1501  * zc_stat              stats on object
1502  * zc_value             path to object
1503  */
1504 static int
1505 zfs_ioc_obj_to_stats(zfs_cmd_t *zc)
1506 {
1507         objset_t *os;
1508         int error;
1509 
1510         /* XXX reading from objset not owned */
1511         if ((error = dmu_objset_hold(zc->zc_name, FTAG, &os)) != 0)
1512                 return (error);
1513         if (dmu_objset_type(os) != DMU_OST_ZFS) {
1514                 dmu_objset_rele(os, FTAG);
1515                 return (EINVAL);
1516         }
1517         error = zfs_obj_to_stats(os, zc->zc_obj, &zc->zc_stat, zc->zc_value,
1518             sizeof (zc->zc_value));
1519         dmu_objset_rele(os, FTAG);
1520 
1521         return (error);
1522 }
1523 
1524 static int
1525 zfs_ioc_vdev_add(zfs_cmd_t *zc)
1526 {
1527         spa_t *spa;
1528         int error;
1529         nvlist_t *config, **l2cache, **spares;
1530         uint_t nl2cache = 0, nspares = 0;
1531 
1532         error = spa_open(zc->zc_name, &spa, FTAG);
1533         if (error != 0)
1534                 return (error);
1535 
1536         error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
1537             zc->zc_iflags, &config);
1538         (void) nvlist_lookup_nvlist_array(config, ZPOOL_CONFIG_L2CACHE,
1539             &l2cache, &nl2cache);
1540 
1541         (void) nvlist_lookup_nvlist_array(config, ZPOOL_CONFIG_SPARES,
1542             &spares, &nspares);
1543 
1544         /*
1545          * A root pool with concatenated devices is not supported.
1546          * Thus, can not add a device to a root pool.
1547          *
1548          * Intent log device can not be added to a rootpool because
1549          * during mountroot, zil is replayed, a seperated log device
1550          * can not be accessed during the mountroot time.
1551          *
1552          * l2cache and spare devices are ok to be added to a rootpool.
1553          */
1554         if (spa_bootfs(spa) != 0 && nl2cache == 0 && nspares == 0) {
1555                 nvlist_free(config);
1556                 spa_close(spa, FTAG);
1557                 return (EDOM);
1558         }
1559 
1560         if (error == 0) {
1561                 error = spa_vdev_add(spa, config);
1562                 nvlist_free(config);
1563         }
1564         spa_close(spa, FTAG);
1565         return (error);
1566 }
1567 
1568 /*
1569  * inputs:
1570  * zc_name              name of the pool
1571  * zc_nvlist_conf       nvlist of devices to remove
1572  * zc_cookie            to stop the remove?
1573  */
1574 static int
1575 zfs_ioc_vdev_remove(zfs_cmd_t *zc)
1576 {
1577         spa_t *spa;
1578         int error;
1579 
1580         error = spa_open(zc->zc_name, &spa, FTAG);
1581         if (error != 0)
1582                 return (error);
1583         error = spa_vdev_remove(spa, zc->zc_guid, B_FALSE);
1584         spa_close(spa, FTAG);
1585         return (error);
1586 }
1587 
1588 static int
1589 zfs_ioc_vdev_set_state(zfs_cmd_t *zc)
1590 {
1591         spa_t *spa;
1592         int error;
1593         vdev_state_t newstate = VDEV_STATE_UNKNOWN;
1594 
1595         if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
1596                 return (error);
1597         switch (zc->zc_cookie) {
1598         case VDEV_STATE_ONLINE:
1599                 error = vdev_online(spa, zc->zc_guid, zc->zc_obj, &newstate);
1600                 break;
1601 
1602         case VDEV_STATE_OFFLINE:
1603                 error = vdev_offline(spa, zc->zc_guid, zc->zc_obj);
1604                 break;
1605 
1606         case VDEV_STATE_FAULTED:
1607                 if (zc->zc_obj != VDEV_AUX_ERR_EXCEEDED &&
1608                     zc->zc_obj != VDEV_AUX_EXTERNAL)
1609                         zc->zc_obj = VDEV_AUX_ERR_EXCEEDED;
1610 
1611                 error = vdev_fault(spa, zc->zc_guid, zc->zc_obj);
1612                 break;
1613 
1614         case VDEV_STATE_DEGRADED:
1615                 if (zc->zc_obj != VDEV_AUX_ERR_EXCEEDED &&
1616                     zc->zc_obj != VDEV_AUX_EXTERNAL)
1617                         zc->zc_obj = VDEV_AUX_ERR_EXCEEDED;
1618 
1619                 error = vdev_degrade(spa, zc->zc_guid, zc->zc_obj);
1620                 break;
1621 
1622         default:
1623                 error = EINVAL;
1624         }
1625         zc->zc_cookie = newstate;
1626         spa_close(spa, FTAG);
1627         return (error);
1628 }
1629 
1630 static int
1631 zfs_ioc_vdev_attach(zfs_cmd_t *zc)
1632 {
1633         spa_t *spa;
1634         int replacing = zc->zc_cookie;
1635         nvlist_t *config;
1636         int error;
1637 
1638         if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
1639                 return (error);
1640 
1641         if ((error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
1642             zc->zc_iflags, &config)) == 0) {
1643                 error = spa_vdev_attach(spa, zc->zc_guid, config, replacing);
1644                 nvlist_free(config);
1645         }
1646 
1647         spa_close(spa, FTAG);
1648         return (error);
1649 }
1650 
1651 static int
1652 zfs_ioc_vdev_detach(zfs_cmd_t *zc)
1653 {
1654         spa_t *spa;
1655         int error;
1656 
1657         if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
1658                 return (error);
1659 
1660         error = spa_vdev_detach(spa, zc->zc_guid, 0, B_FALSE);
1661 
1662         spa_close(spa, FTAG);
1663         return (error);
1664 }
1665 
1666 static int
1667 zfs_ioc_vdev_split(zfs_cmd_t *zc)
1668 {
1669         spa_t *spa;
1670         nvlist_t *config, *props = NULL;
1671         int error;
1672         boolean_t exp = !!(zc->zc_cookie & ZPOOL_EXPORT_AFTER_SPLIT);
1673 
1674         if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
1675                 return (error);
1676 
1677         if (error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
1678             zc->zc_iflags, &config)) {
1679                 spa_close(spa, FTAG);
1680                 return (error);
1681         }
1682 
1683         if (zc->zc_nvlist_src_size != 0 && (error =
1684             get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
1685             zc->zc_iflags, &props))) {
1686                 spa_close(spa, FTAG);
1687                 nvlist_free(config);
1688                 return (error);
1689         }
1690 
1691         error = spa_vdev_split_mirror(spa, zc->zc_string, config, props, exp);
1692 
1693         spa_close(spa, FTAG);
1694 
1695         nvlist_free(config);
1696         nvlist_free(props);
1697 
1698         return (error);
1699 }
1700 
1701 static int
1702 zfs_ioc_vdev_setpath(zfs_cmd_t *zc)
1703 {
1704         spa_t *spa;
1705         char *path = zc->zc_value;
1706         uint64_t guid = zc->zc_guid;
1707         int error;
1708 
1709         error = spa_open(zc->zc_name, &spa, FTAG);
1710         if (error != 0)
1711                 return (error);
1712 
1713         error = spa_vdev_setpath(spa, guid, path);
1714         spa_close(spa, FTAG);
1715         return (error);
1716 }
1717 
1718 static int
1719 zfs_ioc_vdev_setfru(zfs_cmd_t *zc)
1720 {
1721         spa_t *spa;
1722         char *fru = zc->zc_value;
1723         uint64_t guid = zc->zc_guid;
1724         int error;
1725 
1726         error = spa_open(zc->zc_name, &spa, FTAG);
1727         if (error != 0)
1728                 return (error);
1729 
1730         error = spa_vdev_setfru(spa, guid, fru);
1731         spa_close(spa, FTAG);
1732         return (error);
1733 }
1734 
1735 static int
1736 zfs_ioc_objset_stats_impl(zfs_cmd_t *zc, objset_t *os)
1737 {
1738         int error = 0;
1739         nvlist_t *nv;
1740 
1741         dmu_objset_fast_stat(os, &zc->zc_objset_stats);
1742 
1743         if (zc->zc_nvlist_dst != 0 &&
1744             (error = dsl_prop_get_all(os, &nv)) == 0) {
1745                 dmu_objset_stats(os, nv);
1746                 /*
1747                  * NB: zvol_get_stats() will read the objset contents,
1748                  * which we aren't supposed to do with a
1749                  * DS_MODE_USER hold, because it could be
1750                  * inconsistent.  So this is a bit of a workaround...
1751                  * XXX reading with out owning
1752                  */
1753                 if (!zc->zc_objset_stats.dds_inconsistent) {
1754                         if (dmu_objset_type(os) == DMU_OST_ZVOL)
1755                                 VERIFY(zvol_get_stats(os, nv) == 0);
1756                 }
1757                 error = put_nvlist(zc, nv);
1758                 nvlist_free(nv);
1759         }
1760 
1761         return (error);
1762 }
1763 
1764 /*
1765  * inputs:
1766  * zc_name              name of filesystem
1767  * zc_nvlist_dst_size   size of buffer for property nvlist
1768  *
1769  * outputs:
1770  * zc_objset_stats      stats
1771  * zc_nvlist_dst        property nvlist
1772  * zc_nvlist_dst_size   size of property nvlist
1773  */
1774 static int
1775 zfs_ioc_objset_stats(zfs_cmd_t *zc)
1776 {
1777         objset_t *os = NULL;
1778         int error;
1779 
1780         if (error = dmu_objset_hold(zc->zc_name, FTAG, &os))
1781                 return (error);
1782 
1783         error = zfs_ioc_objset_stats_impl(zc, os);
1784 
1785         dmu_objset_rele(os, FTAG);
1786 
1787         return (error);
1788 }
1789 
1790 /*
1791  * inputs:
1792  * zc_name              name of filesystem
1793  * zc_nvlist_dst_size   size of buffer for property nvlist
1794  *
1795  * outputs:
1796  * zc_nvlist_dst        received property nvlist
1797  * zc_nvlist_dst_size   size of received property nvlist
1798  *
1799  * Gets received properties (distinct from local properties on or after
1800  * SPA_VERSION_RECVD_PROPS) for callers who want to differentiate received from
1801  * local property values.
1802  */
1803 static int
1804 zfs_ioc_objset_recvd_props(zfs_cmd_t *zc)
1805 {
1806         objset_t *os = NULL;
1807         int error;
1808         nvlist_t *nv;
1809 
1810         if (error = dmu_objset_hold(zc->zc_name, FTAG, &os))
1811                 return (error);
1812 
1813         /*
1814          * Without this check, we would return local property values if the
1815          * caller has not already received properties on or after
1816          * SPA_VERSION_RECVD_PROPS.
1817          */
1818         if (!dsl_prop_get_hasrecvd(os)) {
1819                 dmu_objset_rele(os, FTAG);
1820                 return (ENOTSUP);
1821         }
1822 
1823         if (zc->zc_nvlist_dst != 0 &&
1824             (error = dsl_prop_get_received(os, &nv)) == 0) {
1825                 error = put_nvlist(zc, nv);
1826                 nvlist_free(nv);
1827         }
1828 
1829         dmu_objset_rele(os, FTAG);
1830         return (error);
1831 }
1832 
1833 static int
1834 nvl_add_zplprop(objset_t *os, nvlist_t *props, zfs_prop_t prop)
1835 {
1836         uint64_t value;
1837         int error;
1838 
1839         /*
1840          * zfs_get_zplprop() will either find a value or give us
1841          * the default value (if there is one).
1842          */
1843         if ((error = zfs_get_zplprop(os, prop, &value)) != 0)
1844                 return (error);
1845         VERIFY(nvlist_add_uint64(props, zfs_prop_to_name(prop), value) == 0);
1846         return (0);
1847 }
1848 
1849 /*
1850  * inputs:
1851  * zc_name              name of filesystem
1852  * zc_nvlist_dst_size   size of buffer for zpl property nvlist
1853  *
1854  * outputs:
1855  * zc_nvlist_dst        zpl property nvlist
1856  * zc_nvlist_dst_size   size of zpl property nvlist
1857  */
1858 static int
1859 zfs_ioc_objset_zplprops(zfs_cmd_t *zc)
1860 {
1861         objset_t *os;
1862         int err;
1863 
1864         /* XXX reading without owning */
1865         if (err = dmu_objset_hold(zc->zc_name, FTAG, &os))
1866                 return (err);
1867 
1868         dmu_objset_fast_stat(os, &zc->zc_objset_stats);
1869 
1870         /*
1871          * NB: nvl_add_zplprop() will read the objset contents,
1872          * which we aren't supposed to do with a DS_MODE_USER
1873          * hold, because it could be inconsistent.
1874          */
1875         if (zc->zc_nvlist_dst != NULL &&
1876             !zc->zc_objset_stats.dds_inconsistent &&
1877             dmu_objset_type(os) == DMU_OST_ZFS) {
1878                 nvlist_t *nv;
1879 
1880                 VERIFY(nvlist_alloc(&nv, NV_UNIQUE_NAME, KM_SLEEP) == 0);
1881                 if ((err = nvl_add_zplprop(os, nv, ZFS_PROP_VERSION)) == 0 &&
1882                     (err = nvl_add_zplprop(os, nv, ZFS_PROP_NORMALIZE)) == 0 &&
1883                     (err = nvl_add_zplprop(os, nv, ZFS_PROP_UTF8ONLY)) == 0 &&
1884                     (err = nvl_add_zplprop(os, nv, ZFS_PROP_CASE)) == 0)
1885                         err = put_nvlist(zc, nv);
1886                 nvlist_free(nv);
1887         } else {
1888                 err = ENOENT;
1889         }
1890         dmu_objset_rele(os, FTAG);
1891         return (err);
1892 }
1893 
1894 static boolean_t
1895 dataset_name_hidden(const char *name)
1896 {
1897         /*
1898          * Skip over datasets that are not visible in this zone,
1899          * internal datasets (which have a $ in their name), and
1900          * temporary datasets (which have a % in their name).
1901          */
1902         if (strchr(name, '$') != NULL)
1903                 return (B_TRUE);
1904         if (strchr(name, '%') != NULL)
1905                 return (B_TRUE);
1906         if (!INGLOBALZONE(curproc) && !zone_dataset_visible(name, NULL))
1907                 return (B_TRUE);
1908         return (B_FALSE);
1909 }
1910 
1911 /*
1912  * inputs:
1913  * zc_name              name of filesystem
1914  * zc_cookie            zap cursor
1915  * zc_nvlist_dst_size   size of buffer for property nvlist
1916  *
1917  * outputs:
1918  * zc_name              name of next filesystem
1919  * zc_cookie            zap cursor
1920  * zc_objset_stats      stats
1921  * zc_nvlist_dst        property nvlist
1922  * zc_nvlist_dst_size   size of property nvlist
1923  */
1924 static int
1925 zfs_ioc_dataset_list_next(zfs_cmd_t *zc)
1926 {
1927         objset_t *os;
1928         int error;
1929         char *p;
1930         size_t orig_len = strlen(zc->zc_name);
1931 
1932 top:
1933         if (error = dmu_objset_hold(zc->zc_name, FTAG, &os)) {
1934                 if (error == ENOENT)
1935                         error = ESRCH;
1936                 return (error);
1937         }
1938 
1939         p = strrchr(zc->zc_name, '/');
1940         if (p == NULL || p[1] != '\0')
1941                 (void) strlcat(zc->zc_name, "/", sizeof (zc->zc_name));
1942         p = zc->zc_name + strlen(zc->zc_name);
1943 
1944         /*
1945          * Pre-fetch the datasets.  dmu_objset_prefetch() always returns 0
1946          * but is not declared void because its called by dmu_objset_find().
1947          */
1948         if (zc->zc_cookie == 0) {
1949                 uint64_t cookie = 0;
1950                 int len = sizeof (zc->zc_name) - (p - zc->zc_name);
1951 
1952                 while (dmu_dir_list_next(os, len, p, NULL, &cookie) == 0)
1953                         (void) dmu_objset_prefetch(p, NULL);
1954         }
1955 
1956         do {
1957                 error = dmu_dir_list_next(os,
1958                     sizeof (zc->zc_name) - (p - zc->zc_name), p,
1959                     NULL, &zc->zc_cookie);
1960                 if (error == ENOENT)
1961                         error = ESRCH;
1962         } while (error == 0 && dataset_name_hidden(zc->zc_name) &&
1963             !(zc->zc_iflags & FKIOCTL));
1964         dmu_objset_rele(os, FTAG);
1965 
1966         /*
1967          * If it's an internal dataset (ie. with a '$' in its name),
1968          * don't try to get stats for it, otherwise we'll return ENOENT.
1969          */
1970         if (error == 0 && strchr(zc->zc_name, '$') == NULL) {
1971                 error = zfs_ioc_objset_stats(zc); /* fill in the stats */
1972                 if (error == ENOENT) {
1973                         /* We lost a race with destroy, get the next one. */
1974                         zc->zc_name[orig_len] = '\0';
1975                         goto top;
1976                 }
1977         }
1978         return (error);
1979 }
1980 
1981 /*
1982  * inputs:
1983  * zc_name              name of filesystem
1984  * zc_cookie            zap cursor
1985  * zc_nvlist_dst_size   size of buffer for property nvlist
1986  *
1987  * outputs:
1988  * zc_name              name of next snapshot
1989  * zc_objset_stats      stats
1990  * zc_nvlist_dst        property nvlist
1991  * zc_nvlist_dst_size   size of property nvlist
1992  */
1993 static int
1994 zfs_ioc_snapshot_list_next(zfs_cmd_t *zc)
1995 {
1996         objset_t *os;
1997         int error;
1998 
1999 top:
2000         if (zc->zc_cookie == 0)
2001                 (void) dmu_objset_find(zc->zc_name, dmu_objset_prefetch,
2002                     NULL, DS_FIND_SNAPSHOTS);
2003 
2004         error = dmu_objset_hold(zc->zc_name, FTAG, &os);
2005         if (error)
2006                 return (error == ENOENT ? ESRCH : error);
2007 
2008         /*
2009          * A dataset name of maximum length cannot have any snapshots,
2010          * so exit immediately.
2011          */
2012         if (strlcat(zc->zc_name, "@", sizeof (zc->zc_name)) >= MAXNAMELEN) {
2013                 dmu_objset_rele(os, FTAG);
2014                 return (ESRCH);
2015         }
2016 
2017         error = dmu_snapshot_list_next(os,
2018             sizeof (zc->zc_name) - strlen(zc->zc_name),
2019             zc->zc_name + strlen(zc->zc_name), &zc->zc_obj, &zc->zc_cookie,
2020             NULL);
2021 
2022         if (error == 0) {
2023                 dsl_dataset_t *ds;
2024                 dsl_pool_t *dp = os->os_dsl_dataset->ds_dir->dd_pool;
2025 
2026                 /*
2027                  * Since we probably don't have a hold on this snapshot,
2028                  * it's possible that the objsetid could have been destroyed
2029                  * and reused for a new objset. It's OK if this happens during
2030                  * a zfs send operation, since the new createtxg will be
2031                  * beyond the range we're interested in.
2032                  */
2033                 rw_enter(&dp->dp_config_rwlock, RW_READER);
2034                 error = dsl_dataset_hold_obj(dp, zc->zc_obj, FTAG, &ds);
2035                 rw_exit(&dp->dp_config_rwlock);
2036                 if (error) {
2037                         if (error == ENOENT) {
2038                                 /* Racing with destroy, get the next one. */
2039                                 *strchr(zc->zc_name, '@') = '\0';
2040                                 dmu_objset_rele(os, FTAG);
2041                                 goto top;
2042                         }
2043                 } else {
2044                         objset_t *ossnap;
2045 
2046                         error = dmu_objset_from_ds(ds, &ossnap);
2047                         if (error == 0)
2048                                 error = zfs_ioc_objset_stats_impl(zc, ossnap);
2049                         dsl_dataset_rele(ds, FTAG);
2050                 }
2051         } else if (error == ENOENT) {
2052                 error = ESRCH;
2053         }
2054 
2055         dmu_objset_rele(os, FTAG);
2056         /* if we failed, undo the @ that we tacked on to zc_name */
2057         if (error)
2058                 *strchr(zc->zc_name, '@') = '\0';
2059         return (error);
2060 }
2061 
2062 static int
2063 zfs_prop_set_userquota(const char *dsname, nvpair_t *pair)
2064 {
2065         const char *propname = nvpair_name(pair);
2066         uint64_t *valary;
2067         unsigned int vallen;
2068         const char *domain;
2069         char *dash;
2070         zfs_userquota_prop_t type;
2071         uint64_t rid;
2072         uint64_t quota;
2073         zfsvfs_t *zfsvfs;
2074         int err;
2075 
2076         if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
2077                 nvlist_t *attrs;
2078                 VERIFY(nvpair_value_nvlist(pair, &attrs) == 0);
2079                 if (nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
2080                     &pair) != 0)
2081                         return (EINVAL);
2082         }
2083 
2084         /*
2085          * A correctly constructed propname is encoded as
2086          * userquota@<rid>-<domain>.
2087          */
2088         if ((dash = strchr(propname, '-')) == NULL ||
2089             nvpair_value_uint64_array(pair, &valary, &vallen) != 0 ||
2090             vallen != 3)
2091                 return (EINVAL);
2092 
2093         domain = dash + 1;
2094         type = valary[0];
2095         rid = valary[1];
2096         quota = valary[2];
2097 
2098         err = zfsvfs_hold(dsname, FTAG, &zfsvfs, B_FALSE);
2099         if (err == 0) {
2100                 err = zfs_set_userquota(zfsvfs, type, domain, rid, quota);
2101                 zfsvfs_rele(zfsvfs, FTAG);
2102         }
2103 
2104         return (err);
2105 }
2106 
2107 /*
2108  * If the named property is one that has a special function to set its value,
2109  * return 0 on success and a positive error code on failure; otherwise if it is
2110  * not one of the special properties handled by this function, return -1.
2111  *
2112  * XXX: It would be better for callers of the property interface if we handled
2113  * these special cases in dsl_prop.c (in the dsl layer).
2114  */
2115 static int
2116 zfs_prop_set_special(const char *dsname, zprop_source_t source,
2117     nvpair_t *pair)
2118 {
2119         const char *propname = nvpair_name(pair);
2120         zfs_prop_t prop = zfs_name_to_prop(propname);
2121         uint64_t intval;
2122         int err;
2123 
2124         if (prop == ZPROP_INVAL) {
2125                 if (zfs_prop_userquota(propname))
2126                         return (zfs_prop_set_userquota(dsname, pair));
2127                 return (-1);
2128         }
2129 
2130         if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
2131                 nvlist_t *attrs;
2132                 VERIFY(nvpair_value_nvlist(pair, &attrs) == 0);
2133                 VERIFY(nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
2134                     &pair) == 0);
2135         }
2136 
2137         if (zfs_prop_get_type(prop) == PROP_TYPE_STRING)
2138                 return (-1);
2139 
2140         VERIFY(0 == nvpair_value_uint64(pair, &intval));
2141 
2142         switch (prop) {
2143         case ZFS_PROP_QUOTA:
2144                 err = dsl_dir_set_quota(dsname, source, intval);
2145                 break;
2146         case ZFS_PROP_REFQUOTA:
2147                 err = dsl_dataset_set_quota(dsname, source, intval);
2148                 break;
2149         case ZFS_PROP_RESERVATION:
2150                 err = dsl_dir_set_reservation(dsname, source, intval);
2151                 break;
2152         case ZFS_PROP_REFRESERVATION:
2153                 err = dsl_dataset_set_reservation(dsname, source, intval);
2154                 break;
2155         case ZFS_PROP_VOLSIZE:
2156                 err = zvol_set_volsize(dsname, ddi_driver_major(zfs_dip),
2157                     intval);
2158                 break;
2159         case ZFS_PROP_VERSION:
2160         {
2161                 zfsvfs_t *zfsvfs;
2162 
2163                 if ((err = zfsvfs_hold(dsname, FTAG, &zfsvfs, B_TRUE)) != 0)
2164                         break;
2165 
2166                 err = zfs_set_version(zfsvfs, intval);
2167                 zfsvfs_rele(zfsvfs, FTAG);
2168 
2169                 if (err == 0 && intval >= ZPL_VERSION_USERSPACE) {
2170                         zfs_cmd_t *zc;
2171 
2172                         zc = kmem_zalloc(sizeof (zfs_cmd_t), KM_SLEEP);
2173                         (void) strcpy(zc->zc_name, dsname);
2174                         (void) zfs_ioc_userspace_upgrade(zc);
2175                         kmem_free(zc, sizeof (zfs_cmd_t));
2176                 }
2177                 break;
2178         }
2179 
2180         default:
2181                 err = -1;
2182         }
2183 
2184         return (err);
2185 }
2186 
2187 /*
2188  * This function is best effort. If it fails to set any of the given properties,
2189  * it continues to set as many as it can and returns the first error
2190  * encountered. If the caller provides a non-NULL errlist, it also gives the
2191  * complete list of names of all the properties it failed to set along with the
2192  * corresponding error numbers. The caller is responsible for freeing the
2193  * returned errlist.
2194  *
2195  * If every property is set successfully, zero is returned and the list pointed
2196  * at by errlist is NULL.
2197  */
2198 int
2199 zfs_set_prop_nvlist(const char *dsname, zprop_source_t source, nvlist_t *nvl,
2200     nvlist_t **errlist)
2201 {
2202         nvpair_t *pair;
2203         nvpair_t *propval;
2204         int rv = 0;
2205         uint64_t intval;
2206         char *strval;
2207         nvlist_t *genericnvl;
2208         nvlist_t *errors;
2209         nvlist_t *retrynvl;
2210 
2211         VERIFY(nvlist_alloc(&genericnvl, NV_UNIQUE_NAME, KM_SLEEP) == 0);
2212         VERIFY(nvlist_alloc(&errors, NV_UNIQUE_NAME, KM_SLEEP) == 0);
2213         VERIFY(nvlist_alloc(&retrynvl, NV_UNIQUE_NAME, KM_SLEEP) == 0);
2214 
2215 retry:
2216         pair = NULL;
2217         while ((pair = nvlist_next_nvpair(nvl, pair)) != NULL) {
2218                 const char *propname = nvpair_name(pair);
2219                 zfs_prop_t prop = zfs_name_to_prop(propname);
2220                 int err = 0;
2221 
2222                 /* decode the property value */
2223                 propval = pair;
2224                 if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
2225                         nvlist_t *attrs;
2226                         VERIFY(nvpair_value_nvlist(pair, &attrs) == 0);
2227                         if (nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
2228                             &propval) != 0)
2229                                 err = EINVAL;
2230                 }
2231 
2232                 /* Validate value type */
2233                 if (err == 0 && prop == ZPROP_INVAL) {
2234                         if (zfs_prop_user(propname)) {
2235                                 if (nvpair_type(propval) != DATA_TYPE_STRING)
2236                                         err = EINVAL;
2237                         } else if (zfs_prop_userquota(propname)) {
2238                                 if (nvpair_type(propval) !=
2239                                     DATA_TYPE_UINT64_ARRAY)
2240                                         err = EINVAL;
2241                         }
2242                 } else if (err == 0) {
2243                         if (nvpair_type(propval) == DATA_TYPE_STRING) {
2244                                 if (zfs_prop_get_type(prop) != PROP_TYPE_STRING)
2245                                         err = EINVAL;
2246                         } else if (nvpair_type(propval) == DATA_TYPE_UINT64) {
2247                                 const char *unused;
2248 
2249                                 VERIFY(nvpair_value_uint64(propval,
2250                                     &intval) == 0);
2251 
2252                                 switch (zfs_prop_get_type(prop)) {
2253                                 case PROP_TYPE_NUMBER:
2254                                         break;
2255                                 case PROP_TYPE_STRING:
2256                                         err = EINVAL;
2257                                         break;
2258                                 case PROP_TYPE_INDEX:
2259                                         if (zfs_prop_index_to_string(prop,
2260                                             intval, &unused) != 0)
2261                                                 err = EINVAL;
2262                                         break;
2263                                 default:
2264                                         cmn_err(CE_PANIC,
2265                                             "unknown property type");
2266                                 }
2267                         } else {
2268                                 err = EINVAL;
2269                         }
2270                 }
2271 
2272                 /* Validate permissions */
2273                 if (err == 0)
2274                         err = zfs_check_settable(dsname, pair, CRED());
2275 
2276                 if (err == 0) {
2277                         err = zfs_prop_set_special(dsname, source, pair);
2278                         if (err == -1) {
2279                                 /*
2280                                  * For better performance we build up a list of
2281                                  * properties to set in a single transaction.
2282                                  */
2283                                 err = nvlist_add_nvpair(genericnvl, pair);
2284                         } else if (err != 0 && nvl != retrynvl) {
2285                                 /*
2286                                  * This may be a spurious error caused by
2287                                  * receiving quota and reservation out of order.
2288                                  * Try again in a second pass.
2289                                  */
2290                                 err = nvlist_add_nvpair(retrynvl, pair);
2291                         }
2292                 }
2293 
2294                 if (err != 0)
2295                         VERIFY(nvlist_add_int32(errors, propname, err) == 0);
2296         }
2297 
2298         if (nvl != retrynvl && !nvlist_empty(retrynvl)) {
2299                 nvl = retrynvl;
2300                 goto retry;
2301         }
2302 
2303         if (!nvlist_empty(genericnvl) &&
2304             dsl_props_set(dsname, source, genericnvl) != 0) {
2305                 /*
2306                  * If this fails, we still want to set as many properties as we
2307                  * can, so try setting them individually.
2308                  */
2309                 pair = NULL;
2310                 while ((pair = nvlist_next_nvpair(genericnvl, pair)) != NULL) {
2311                         const char *propname = nvpair_name(pair);
2312                         int err = 0;
2313 
2314                         propval = pair;
2315                         if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
2316                                 nvlist_t *attrs;
2317                                 VERIFY(nvpair_value_nvlist(pair, &attrs) == 0);
2318                                 VERIFY(nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
2319                                     &propval) == 0);
2320                         }
2321 
2322                         if (nvpair_type(propval) == DATA_TYPE_STRING) {
2323                                 VERIFY(nvpair_value_string(propval,
2324                                     &strval) == 0);
2325                                 err = dsl_prop_set(dsname, propname, source, 1,
2326                                     strlen(strval) + 1, strval);
2327                         } else {
2328                                 VERIFY(nvpair_value_uint64(propval,
2329                                     &intval) == 0);
2330                                 err = dsl_prop_set(dsname, propname, source, 8,
2331                                     1, &intval);
2332                         }
2333 
2334                         if (err != 0) {
2335                                 VERIFY(nvlist_add_int32(errors, propname,
2336                                     err) == 0);
2337                         }
2338                 }
2339         }
2340         nvlist_free(genericnvl);
2341         nvlist_free(retrynvl);
2342 
2343         if ((pair = nvlist_next_nvpair(errors, NULL)) == NULL) {
2344                 nvlist_free(errors);
2345                 errors = NULL;
2346         } else {
2347                 VERIFY(nvpair_value_int32(pair, &rv) == 0);
2348         }
2349 
2350         if (errlist == NULL)
2351                 nvlist_free(errors);
2352         else
2353                 *errlist = errors;
2354 
2355         return (rv);
2356 }
2357 
2358 /*
2359  * Check that all the properties are valid user properties.
2360  */
2361 static int
2362 zfs_check_userprops(char *fsname, nvlist_t *nvl)
2363 {
2364         nvpair_t *pair = NULL;
2365         int error = 0;
2366 
2367         while ((pair = nvlist_next_nvpair(nvl, pair)) != NULL) {
2368                 const char *propname = nvpair_name(pair);
2369                 char *valstr;
2370 
2371                 if (!zfs_prop_user(propname) ||
2372                     nvpair_type(pair) != DATA_TYPE_STRING)
2373                         return (EINVAL);
2374 
2375                 if (error = zfs_secpolicy_write_perms(fsname,
2376                     ZFS_DELEG_PERM_USERPROP, CRED()))
2377                         return (error);
2378 
2379                 if (strlen(propname) >= ZAP_MAXNAMELEN)
2380                         return (ENAMETOOLONG);
2381 
2382                 VERIFY(nvpair_value_string(pair, &valstr) == 0);
2383                 if (strlen(valstr) >= ZAP_MAXVALUELEN)
2384                         return (E2BIG);
2385         }
2386         return (0);
2387 }
2388 
2389 static void
2390 props_skip(nvlist_t *props, nvlist_t *skipped, nvlist_t **newprops)
2391 {
2392         nvpair_t *pair;
2393 
2394         VERIFY(nvlist_alloc(newprops, NV_UNIQUE_NAME, KM_SLEEP) == 0);
2395 
2396         pair = NULL;
2397         while ((pair = nvlist_next_nvpair(props, pair)) != NULL) {
2398                 if (nvlist_exists(skipped, nvpair_name(pair)))
2399                         continue;
2400 
2401                 VERIFY(nvlist_add_nvpair(*newprops, pair) == 0);
2402         }
2403 }
2404 
2405 static int
2406 clear_received_props(objset_t *os, const char *fs, nvlist_t *props,
2407     nvlist_t *skipped)
2408 {
2409         int err = 0;
2410         nvlist_t *cleared_props = NULL;
2411         props_skip(props, skipped, &cleared_props);
2412         if (!nvlist_empty(cleared_props)) {
2413                 /*
2414                  * Acts on local properties until the dataset has received
2415                  * properties at least once on or after SPA_VERSION_RECVD_PROPS.
2416                  */
2417                 zprop_source_t flags = (ZPROP_SRC_NONE |
2418                     (dsl_prop_get_hasrecvd(os) ? ZPROP_SRC_RECEIVED : 0));
2419                 err = zfs_set_prop_nvlist(fs, flags, cleared_props, NULL);
2420         }
2421         nvlist_free(cleared_props);
2422         return (err);
2423 }
2424 
2425 /*
2426  * inputs:
2427  * zc_name              name of filesystem
2428  * zc_value             name of property to set
2429  * zc_nvlist_src{_size} nvlist of properties to apply
2430  * zc_cookie            received properties flag
2431  *
2432  * outputs:
2433  * zc_nvlist_dst{_size} error for each unapplied received property
2434  */
2435 static int
2436 zfs_ioc_set_prop(zfs_cmd_t *zc)
2437 {
2438         nvlist_t *nvl;
2439         boolean_t received = zc->zc_cookie;
2440         zprop_source_t source = (received ? ZPROP_SRC_RECEIVED :
2441             ZPROP_SRC_LOCAL);
2442         nvlist_t *errors = NULL;
2443         int error;
2444 
2445         if ((error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
2446             zc->zc_iflags, &nvl)) != 0)
2447                 return (error);
2448 
2449         if (received) {
2450                 nvlist_t *origprops;
2451                 objset_t *os;
2452 
2453                 if (dmu_objset_hold(zc->zc_name, FTAG, &os) == 0) {
2454                         if (dsl_prop_get_received(os, &origprops) == 0) {
2455                                 (void) clear_received_props(os,
2456                                     zc->zc_name, origprops, nvl);
2457                                 nvlist_free(origprops);
2458                         }
2459 
2460                         dsl_prop_set_hasrecvd(os);
2461                         dmu_objset_rele(os, FTAG);
2462                 }
2463         }
2464 
2465         error = zfs_set_prop_nvlist(zc->zc_name, source, nvl, &errors);
2466 
2467         if (zc->zc_nvlist_dst != NULL && errors != NULL) {
2468                 (void) put_nvlist(zc, errors);
2469         }
2470 
2471         nvlist_free(errors);
2472         nvlist_free(nvl);
2473         return (error);
2474 }
2475 
2476 /*
2477  * inputs:
2478  * zc_name              name of filesystem
2479  * zc_value             name of property to inherit
2480  * zc_cookie            revert to received value if TRUE
2481  *
2482  * outputs:             none
2483  */
2484 static int
2485 zfs_ioc_inherit_prop(zfs_cmd_t *zc)
2486 {
2487         const char *propname = zc->zc_value;
2488         zfs_prop_t prop = zfs_name_to_prop(propname);
2489         boolean_t received = zc->zc_cookie;
2490         zprop_source_t source = (received
2491             ? ZPROP_SRC_NONE            /* revert to received value, if any */
2492             : ZPROP_SRC_INHERITED);     /* explicitly inherit */
2493 
2494         if (received) {
2495                 nvlist_t *dummy;
2496                 nvpair_t *pair;
2497                 zprop_type_t type;
2498                 int err;
2499 
2500                 /*
2501                  * zfs_prop_set_special() expects properties in the form of an
2502                  * nvpair with type info.
2503                  */
2504                 if (prop == ZPROP_INVAL) {
2505                         if (!zfs_prop_user(propname))
2506                                 return (EINVAL);
2507 
2508                         type = PROP_TYPE_STRING;
2509                 } else if (prop == ZFS_PROP_VOLSIZE ||
2510                     prop == ZFS_PROP_VERSION) {
2511                         return (EINVAL);
2512                 } else {
2513                         type = zfs_prop_get_type(prop);
2514                 }
2515 
2516                 VERIFY(nvlist_alloc(&dummy, NV_UNIQUE_NAME, KM_SLEEP) == 0);
2517 
2518                 switch (type) {
2519                 case PROP_TYPE_STRING:
2520                         VERIFY(0 == nvlist_add_string(dummy, propname, ""));
2521                         break;
2522                 case PROP_TYPE_NUMBER:
2523                 case PROP_TYPE_INDEX:
2524                         VERIFY(0 == nvlist_add_uint64(dummy, propname, 0));
2525                         break;
2526                 default:
2527                         nvlist_free(dummy);
2528                         return (EINVAL);
2529                 }
2530 
2531                 pair = nvlist_next_nvpair(dummy, NULL);
2532                 err = zfs_prop_set_special(zc->zc_name, source, pair);
2533                 nvlist_free(dummy);
2534                 if (err != -1)
2535                         return (err); /* special property already handled */
2536         } else {
2537                 /*
2538                  * Only check this in the non-received case. We want to allow
2539                  * 'inherit -S' to revert non-inheritable properties like quota
2540                  * and reservation to the received or default values even though
2541                  * they are not considered inheritable.
2542                  */
2543                 if (prop != ZPROP_INVAL && !zfs_prop_inheritable(prop))
2544                         return (EINVAL);
2545         }
2546 
2547         /* the property name has been validated by zfs_secpolicy_inherit() */
2548         return (dsl_prop_set(zc->zc_name, zc->zc_value, source, 0, 0, NULL));
2549 }
2550 
2551 static int
2552 zfs_ioc_pool_set_props(zfs_cmd_t *zc)
2553 {
2554         nvlist_t *props;
2555         spa_t *spa;
2556         int error;
2557         nvpair_t *pair;
2558 
2559         if (error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
2560             zc->zc_iflags, &props))
2561                 return (error);
2562 
2563         /*
2564          * If the only property is the configfile, then just do a spa_lookup()
2565          * to handle the faulted case.
2566          */
2567         pair = nvlist_next_nvpair(props, NULL);
2568         if (pair != NULL && strcmp(nvpair_name(pair),
2569             zpool_prop_to_name(ZPOOL_PROP_CACHEFILE)) == 0 &&
2570             nvlist_next_nvpair(props, pair) == NULL) {
2571                 mutex_enter(&spa_namespace_lock);
2572                 if ((spa = spa_lookup(zc->zc_name)) != NULL) {
2573                         spa_configfile_set(spa, props, B_FALSE);
2574                         spa_config_sync(spa, B_FALSE, B_TRUE);
2575                 }
2576                 mutex_exit(&spa_namespace_lock);
2577                 if (spa != NULL) {
2578                         nvlist_free(props);
2579                         return (0);
2580                 }
2581         }
2582 
2583         if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0) {
2584                 nvlist_free(props);
2585                 return (error);
2586         }
2587 
2588         error = spa_prop_set(spa, props);
2589 
2590         nvlist_free(props);
2591         spa_close(spa, FTAG);
2592 
2593         return (error);
2594 }
2595 
2596 static int
2597 zfs_ioc_pool_get_props(zfs_cmd_t *zc)
2598 {
2599         spa_t *spa;
2600         int error;
2601         nvlist_t *nvp = NULL;
2602 
2603         if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0) {
2604                 /*
2605                  * If the pool is faulted, there may be properties we can still
2606                  * get (such as altroot and cachefile), so attempt to get them
2607                  * anyway.
2608                  */
2609                 mutex_enter(&spa_namespace_lock);
2610                 if ((spa = spa_lookup(zc->zc_name)) != NULL)
2611                         error = spa_prop_get(spa, &nvp);
2612                 mutex_exit(&spa_namespace_lock);
2613         } else {
2614                 error = spa_prop_get(spa, &nvp);
2615                 spa_close(spa, FTAG);
2616         }
2617 
2618         if (error == 0 && zc->zc_nvlist_dst != NULL)
2619                 error = put_nvlist(zc, nvp);
2620         else
2621                 error = EFAULT;
2622 
2623         nvlist_free(nvp);
2624         return (error);
2625 }
2626 
2627 /*
2628  * inputs:
2629  * zc_name              name of filesystem
2630  * zc_nvlist_src{_size} nvlist of delegated permissions
2631  * zc_perm_action       allow/unallow flag
2632  *
2633  * outputs:             none
2634  */
2635 static int
2636 zfs_ioc_set_fsacl(zfs_cmd_t *zc)
2637 {
2638         int error;
2639         nvlist_t *fsaclnv = NULL;
2640 
2641         if ((error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
2642             zc->zc_iflags, &fsaclnv)) != 0)
2643                 return (error);
2644 
2645         /*
2646          * Verify nvlist is constructed correctly
2647          */
2648         if ((error = zfs_deleg_verify_nvlist(fsaclnv)) != 0) {
2649                 nvlist_free(fsaclnv);
2650                 return (EINVAL);
2651         }
2652 
2653         /*
2654          * If we don't have PRIV_SYS_MOUNT, then validate
2655          * that user is allowed to hand out each permission in
2656          * the nvlist(s)
2657          */
2658 
2659         error = secpolicy_zfs(CRED());
2660         if (error) {
2661                 if (zc->zc_perm_action == B_FALSE) {
2662                         error = dsl_deleg_can_allow(zc->zc_name,
2663                             fsaclnv, CRED());
2664                 } else {
2665                         error = dsl_deleg_can_unallow(zc->zc_name,
2666                             fsaclnv, CRED());
2667                 }
2668         }
2669 
2670         if (error == 0)
2671                 error = dsl_deleg_set(zc->zc_name, fsaclnv, zc->zc_perm_action);
2672 
2673         nvlist_free(fsaclnv);
2674         return (error);
2675 }
2676 
2677 /*
2678  * inputs:
2679  * zc_name              name of filesystem
2680  *
2681  * outputs:
2682  * zc_nvlist_src{_size} nvlist of delegated permissions
2683  */
2684 static int
2685 zfs_ioc_get_fsacl(zfs_cmd_t *zc)
2686 {
2687         nvlist_t *nvp;
2688         int error;
2689 
2690         if ((error = dsl_deleg_get(zc->zc_name, &nvp)) == 0) {
2691                 error = put_nvlist(zc, nvp);
2692                 nvlist_free(nvp);
2693         }
2694 
2695         return (error);
2696 }
2697 
2698 /*
2699  * Search the vfs list for a specified resource.  Returns a pointer to it
2700  * or NULL if no suitable entry is found. The caller of this routine
2701  * is responsible for releasing the returned vfs pointer.
2702  */
2703 static vfs_t *
2704 zfs_get_vfs(const char *resource)
2705 {
2706         struct vfs *vfsp;
2707         struct vfs *vfs_found = NULL;
2708 
2709         vfs_list_read_lock();
2710         vfsp = rootvfs;
2711         do {
2712                 if (strcmp(refstr_value(vfsp->vfs_resource), resource) == 0) {
2713                         VFS_HOLD(vfsp);
2714                         vfs_found = vfsp;
2715                         break;
2716                 }
2717                 vfsp = vfsp->vfs_next;
2718         } while (vfsp != rootvfs);
2719         vfs_list_unlock();
2720         return (vfs_found);
2721 }
2722 
2723 /* ARGSUSED */
2724 static void
2725 zfs_create_cb(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx)
2726 {
2727         zfs_creat_t *zct = arg;
2728 
2729         zfs_create_fs(os, cr, zct->zct_zplprops, tx);
2730 }
2731 
2732 #define ZFS_PROP_UNDEFINED      ((uint64_t)-1)
2733 
2734 /*
2735  * inputs:
2736  * createprops          list of properties requested by creator
2737  * default_zplver       zpl version to use if unspecified in createprops
2738  * fuids_ok             fuids allowed in this version of the spa?
2739  * os                   parent objset pointer (NULL if root fs)
2740  *
2741  * outputs:
2742  * zplprops     values for the zplprops we attach to the master node object
2743  * is_ci        true if requested file system will be purely case-insensitive
2744  *
2745  * Determine the settings for utf8only, normalization and
2746  * casesensitivity.  Specific values may have been requested by the
2747  * creator and/or we can inherit values from the parent dataset.  If
2748  * the file system is of too early a vintage, a creator can not
2749  * request settings for these properties, even if the requested
2750  * setting is the default value.  We don't actually want to create dsl
2751  * properties for these, so remove them from the source nvlist after
2752  * processing.
2753  */
2754 static int
2755 zfs_fill_zplprops_impl(objset_t *os, uint64_t zplver,
2756     boolean_t fuids_ok, boolean_t sa_ok, nvlist_t *createprops,
2757     nvlist_t *zplprops, boolean_t *is_ci)
2758 {
2759         uint64_t sense = ZFS_PROP_UNDEFINED;
2760         uint64_t norm = ZFS_PROP_UNDEFINED;
2761         uint64_t u8 = ZFS_PROP_UNDEFINED;
2762 
2763         ASSERT(zplprops != NULL);
2764 
2765         /*
2766          * Pull out creator prop choices, if any.
2767          */
2768         if (createprops) {
2769                 (void) nvlist_lookup_uint64(createprops,
2770                     zfs_prop_to_name(ZFS_PROP_VERSION), &zplver);
2771                 (void) nvlist_lookup_uint64(createprops,
2772                     zfs_prop_to_name(ZFS_PROP_NORMALIZE), &norm);
2773                 (void) nvlist_remove_all(createprops,
2774                     zfs_prop_to_name(ZFS_PROP_NORMALIZE));
2775                 (void) nvlist_lookup_uint64(createprops,
2776                     zfs_prop_to_name(ZFS_PROP_UTF8ONLY), &u8);
2777                 (void) nvlist_remove_all(createprops,
2778                     zfs_prop_to_name(ZFS_PROP_UTF8ONLY));
2779                 (void) nvlist_lookup_uint64(createprops,
2780                     zfs_prop_to_name(ZFS_PROP_CASE), &sense);
2781                 (void) nvlist_remove_all(createprops,
2782                     zfs_prop_to_name(ZFS_PROP_CASE));
2783         }
2784 
2785         /*
2786          * If the zpl version requested is whacky or the file system
2787          * or pool is version is too "young" to support normalization
2788          * and the creator tried to set a value for one of the props,
2789          * error out.
2790          */
2791         if ((zplver < ZPL_VERSION_INITIAL || zplver > ZPL_VERSION) ||
2792             (zplver >= ZPL_VERSION_FUID && !fuids_ok) ||
2793             (zplver >= ZPL_VERSION_SA && !sa_ok) ||
2794             (zplver < ZPL_VERSION_NORMALIZATION &&
2795             (norm != ZFS_PROP_UNDEFINED || u8 != ZFS_PROP_UNDEFINED ||
2796             sense != ZFS_PROP_UNDEFINED)))
2797                 return (ENOTSUP);
2798 
2799         /*
2800          * Put the version in the zplprops
2801          */
2802         VERIFY(nvlist_add_uint64(zplprops,
2803             zfs_prop_to_name(ZFS_PROP_VERSION), zplver) == 0);
2804 
2805         if (norm == ZFS_PROP_UNDEFINED)
2806                 VERIFY(zfs_get_zplprop(os, ZFS_PROP_NORMALIZE, &norm) == 0);
2807         VERIFY(nvlist_add_uint64(zplprops,
2808             zfs_prop_to_name(ZFS_PROP_NORMALIZE), norm) == 0);
2809 
2810         /*
2811          * If we're normalizing, names must always be valid UTF-8 strings.
2812          */
2813         if (norm)
2814                 u8 = 1;
2815         if (u8 == ZFS_PROP_UNDEFINED)
2816                 VERIFY(zfs_get_zplprop(os, ZFS_PROP_UTF8ONLY, &u8) == 0);
2817         VERIFY(nvlist_add_uint64(zplprops,
2818             zfs_prop_to_name(ZFS_PROP_UTF8ONLY), u8) == 0);
2819 
2820         if (sense == ZFS_PROP_UNDEFINED)
2821                 VERIFY(zfs_get_zplprop(os, ZFS_PROP_CASE, &sense) == 0);
2822         VERIFY(nvlist_add_uint64(zplprops,
2823             zfs_prop_to_name(ZFS_PROP_CASE), sense) == 0);
2824 
2825         if (is_ci)
2826                 *is_ci = (sense == ZFS_CASE_INSENSITIVE);
2827 
2828         return (0);
2829 }
2830 
2831 static int
2832 zfs_fill_zplprops(const char *dataset, nvlist_t *createprops,
2833     nvlist_t *zplprops, boolean_t *is_ci)
2834 {
2835         boolean_t fuids_ok, sa_ok;
2836         uint64_t zplver = ZPL_VERSION;
2837         objset_t *os = NULL;
2838         char parentname[MAXNAMELEN];
2839         char *cp;
2840         spa_t *spa;
2841         uint64_t spa_vers;
2842         int error;
2843 
2844         (void) strlcpy(parentname, dataset, sizeof (parentname));
2845         cp = strrchr(parentname, '/');
2846         ASSERT(cp != NULL);
2847         cp[0] = '\0';
2848 
2849         if ((error = spa_open(dataset, &spa, FTAG)) != 0)
2850                 return (error);
2851 
2852         spa_vers = spa_version(spa);
2853         spa_close(spa, FTAG);
2854 
2855         zplver = zfs_zpl_version_map(spa_vers);
2856         fuids_ok = (zplver >= ZPL_VERSION_FUID);
2857         sa_ok = (zplver >= ZPL_VERSION_SA);
2858 
2859         /*
2860          * Open parent object set so we can inherit zplprop values.
2861          */
2862         if ((error = dmu_objset_hold(parentname, FTAG, &os)) != 0)
2863                 return (error);
2864 
2865         error = zfs_fill_zplprops_impl(os, zplver, fuids_ok, sa_ok, createprops,
2866             zplprops, is_ci);
2867         dmu_objset_rele(os, FTAG);
2868         return (error);
2869 }
2870 
2871 static int
2872 zfs_fill_zplprops_root(uint64_t spa_vers, nvlist_t *createprops,
2873     nvlist_t *zplprops, boolean_t *is_ci)
2874 {
2875         boolean_t fuids_ok;
2876         boolean_t sa_ok;
2877         uint64_t zplver = ZPL_VERSION;
2878         int error;
2879 
2880         zplver = zfs_zpl_version_map(spa_vers);
2881         fuids_ok = (zplver >= ZPL_VERSION_FUID);
2882         sa_ok = (zplver >= ZPL_VERSION_SA);
2883 
2884         error = zfs_fill_zplprops_impl(NULL, zplver, fuids_ok, sa_ok,
2885             createprops, zplprops, is_ci);
2886         return (error);
2887 }
2888 
2889 /*
2890  * inputs:
2891  * zc_objset_type       type of objset to create (fs vs zvol)
2892  * zc_name              name of new objset
2893  * zc_value             name of snapshot to clone from (may be empty)
2894  * zc_nvlist_src{_size} nvlist of properties to apply
2895  *
2896  * outputs: none
2897  */
2898 static int
2899 zfs_ioc_create(zfs_cmd_t *zc)
2900 {
2901         objset_t *clone;
2902         int error = 0;
2903         zfs_creat_t zct;
2904         nvlist_t *nvprops = NULL;
2905         void (*cbfunc)(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx);
2906         dmu_objset_type_t type = zc->zc_objset_type;
2907 
2908         switch (type) {
2909 
2910         case DMU_OST_ZFS:
2911                 cbfunc = zfs_create_cb;
2912                 break;
2913 
2914         case DMU_OST_ZVOL:
2915                 cbfunc = zvol_create_cb;
2916                 break;
2917 
2918         default:
2919                 cbfunc = NULL;
2920                 break;
2921         }
2922         if (strchr(zc->zc_name, '@') ||
2923             strchr(zc->zc_name, '%'))
2924                 return (EINVAL);
2925 
2926         if (zc->zc_nvlist_src != NULL &&
2927             (error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
2928             zc->zc_iflags, &nvprops)) != 0)
2929                 return (error);
2930 
2931         zct.zct_zplprops = NULL;
2932         zct.zct_props = nvprops;
2933 
2934         if (zc->zc_value[0] != '\0') {
2935                 /*
2936                  * We're creating a clone of an existing snapshot.
2937                  */
2938                 zc->zc_value[sizeof (zc->zc_value) - 1] = '\0';
2939                 if (dataset_namecheck(zc->zc_value, NULL, NULL) != 0) {
2940                         nvlist_free(nvprops);
2941                         return (EINVAL);
2942                 }
2943 
2944                 error = dmu_objset_hold(zc->zc_value, FTAG, &clone);
2945                 if (error) {
2946                         nvlist_free(nvprops);
2947                         return (error);
2948                 }
2949 
2950                 error = dmu_objset_clone(zc->zc_name, dmu_objset_ds(clone), 0);
2951                 dmu_objset_rele(clone, FTAG);
2952                 if (error) {
2953                         nvlist_free(nvprops);
2954                         return (error);
2955                 }
2956         } else {
2957                 boolean_t is_insensitive = B_FALSE;
2958 
2959                 if (cbfunc == NULL) {
2960                         nvlist_free(nvprops);
2961                         return (EINVAL);
2962                 }
2963 
2964                 if (type == DMU_OST_ZVOL) {
2965                         uint64_t volsize, volblocksize;
2966 
2967                         if (nvprops == NULL ||
2968                             nvlist_lookup_uint64(nvprops,
2969                             zfs_prop_to_name(ZFS_PROP_VOLSIZE),
2970                             &volsize) != 0) {
2971                                 nvlist_free(nvprops);
2972                                 return (EINVAL);
2973                         }
2974 
2975                         if ((error = nvlist_lookup_uint64(nvprops,
2976                             zfs_prop_to_name(ZFS_PROP_VOLBLOCKSIZE),
2977                             &volblocksize)) != 0 && error != ENOENT) {
2978                                 nvlist_free(nvprops);
2979                                 return (EINVAL);
2980                         }
2981 
2982                         if (error != 0)
2983                                 volblocksize = zfs_prop_default_numeric(
2984                                     ZFS_PROP_VOLBLOCKSIZE);
2985 
2986                         if ((error = zvol_check_volblocksize(
2987                             volblocksize)) != 0 ||
2988                             (error = zvol_check_volsize(volsize,
2989                             volblocksize)) != 0) {
2990                                 nvlist_free(nvprops);
2991                                 return (error);
2992                         }
2993                 } else if (type == DMU_OST_ZFS) {
2994                         int error;
2995 
2996                         /*
2997                          * We have to have normalization and
2998                          * case-folding flags correct when we do the
2999                          * file system creation, so go figure them out
3000                          * now.
3001                          */
3002                         VERIFY(nvlist_alloc(&zct.zct_zplprops,
3003                             NV_UNIQUE_NAME, KM_SLEEP) == 0);
3004                         error = zfs_fill_zplprops(zc->zc_name, nvprops,
3005                             zct.zct_zplprops, &is_insensitive);
3006                         if (error != 0) {
3007                                 nvlist_free(nvprops);
3008                                 nvlist_free(zct.zct_zplprops);
3009                                 return (error);
3010                         }
3011                 }
3012                 error = dmu_objset_create(zc->zc_name, type,
3013                     is_insensitive ? DS_FLAG_CI_DATASET : 0, cbfunc, &zct);
3014                 nvlist_free(zct.zct_zplprops);
3015         }
3016 
3017         /*
3018          * It would be nice to do this atomically.
3019          */
3020         if (error == 0) {
3021                 error = zfs_set_prop_nvlist(zc->zc_name, ZPROP_SRC_LOCAL,
3022                     nvprops, NULL);
3023                 if (error != 0)
3024                         (void) dmu_objset_destroy(zc->zc_name, B_FALSE);
3025         }
3026         nvlist_free(nvprops);
3027         return (error);
3028 }
3029 
3030 /*
3031  * inputs:
3032  * zc_name      name of filesystem
3033  * zc_value     short name of snapshot
3034  * zc_cookie    recursive flag
3035  * zc_nvlist_src[_size] property list
3036  *
3037  * outputs:
3038  * zc_value     short snapname (i.e. part after the '@')
3039  */
3040 static int
3041 zfs_ioc_snapshot(zfs_cmd_t *zc)
3042 {
3043         nvlist_t *nvprops = NULL;
3044         int error;
3045         boolean_t recursive = zc->zc_cookie;
3046 
3047         if (snapshot_namecheck(zc->zc_value, NULL, NULL) != 0)
3048                 return (EINVAL);
3049 
3050         if (zc->zc_nvlist_src != NULL &&
3051             (error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
3052             zc->zc_iflags, &nvprops)) != 0)
3053                 return (error);
3054 
3055         error = zfs_check_userprops(zc->zc_name, nvprops);
3056         if (error)
3057                 goto out;
3058 
3059         if (!nvlist_empty(nvprops) &&
3060             zfs_earlier_version(zc->zc_name, SPA_VERSION_SNAP_PROPS)) {
3061                 error = ENOTSUP;
3062                 goto out;
3063         }
3064 
3065         error = dmu_objset_snapshot(zc->zc_name, zc->zc_value, NULL,
3066             nvprops, recursive, B_FALSE, -1);
3067 
3068 out:
3069         nvlist_free(nvprops);
3070         return (error);
3071 }
3072 
3073 int
3074 zfs_unmount_snap(const char *name, void *arg)
3075 {
3076         vfs_t *vfsp = NULL;
3077 
3078         if (arg) {
3079                 char *snapname = arg;
3080                 char *fullname = kmem_asprintf("%s@%s", name, snapname);
3081                 vfsp = zfs_get_vfs(fullname);
3082                 strfree(fullname);
3083         } else if (strchr(name, '@')) {
3084                 vfsp = zfs_get_vfs(name);
3085         }
3086 
3087         if (vfsp) {
3088                 /*
3089                  * Always force the unmount for snapshots.
3090                  */
3091                 int flag = MS_FORCE;
3092                 int err;
3093 
3094                 if ((err = vn_vfswlock(vfsp->vfs_vnodecovered)) != 0) {
3095                         VFS_RELE(vfsp);
3096                         return (err);
3097                 }
3098                 VFS_RELE(vfsp);
3099                 if ((err = dounmount(vfsp, flag, kcred)) != 0)
3100                         return (err);
3101         }
3102         return (0);
3103 }
3104 
3105 /*
3106  * inputs:
3107  * zc_name              name of filesystem
3108  * zc_value             short name of snapshot
3109  * zc_defer_destroy     mark for deferred destroy
3110  *
3111  * outputs:     none
3112  */
3113 static int
3114 zfs_ioc_destroy_snaps(zfs_cmd_t *zc)
3115 {
3116         int err;
3117 
3118         if (snapshot_namecheck(zc->zc_value, NULL, NULL) != 0)
3119                 return (EINVAL);
3120         err = dmu_objset_find(zc->zc_name,
3121             zfs_unmount_snap, zc->zc_value, DS_FIND_CHILDREN);
3122         if (err)
3123                 return (err);
3124         return (dmu_snapshots_destroy(zc->zc_name, zc->zc_value,
3125             zc->zc_defer_destroy));
3126 }
3127 
3128 /*
3129  * inputs:
3130  * zc_name              name of dataset to destroy
3131  * zc_objset_type       type of objset
3132  * zc_defer_destroy     mark for deferred destroy
3133  *
3134  * outputs:             none
3135  */
3136 static int
3137 zfs_ioc_destroy(zfs_cmd_t *zc)
3138 {
3139         int err;
3140         if (strchr(zc->zc_name, '@') && zc->zc_objset_type == DMU_OST_ZFS) {
3141                 err = zfs_unmount_snap(zc->zc_name, NULL);
3142                 if (err)
3143                         return (err);
3144         }
3145 
3146         err = dmu_objset_destroy(zc->zc_name, zc->zc_defer_destroy);
3147         if (zc->zc_objset_type == DMU_OST_ZVOL && err == 0)
3148                 (void) zvol_remove_minor(zc->zc_name);
3149         return (err);
3150 }
3151 
3152 /*
3153  * inputs:
3154  * zc_name      name of dataset to rollback (to most recent snapshot)
3155  *
3156  * outputs:     none
3157  */
3158 static int
3159 zfs_ioc_rollback(zfs_cmd_t *zc)
3160 {
3161         dsl_dataset_t *ds, *clone;
3162         int error;
3163         zfsvfs_t *zfsvfs;
3164         char *clone_name;
3165 
3166         error = dsl_dataset_hold(zc->zc_name, FTAG, &ds);
3167         if (error)
3168                 return (error);
3169 
3170         /* must not be a snapshot */
3171         if (dsl_dataset_is_snapshot(ds)) {
3172                 dsl_dataset_rele(ds, FTAG);
3173                 return (EINVAL);
3174         }
3175 
3176         /* must have a most recent snapshot */
3177         if (ds->ds_phys->ds_prev_snap_txg < TXG_INITIAL) {
3178                 dsl_dataset_rele(ds, FTAG);
3179                 return (EINVAL);
3180         }
3181 
3182         /*
3183          * Create clone of most recent snapshot.
3184          */
3185         clone_name = kmem_asprintf("%s/%%rollback", zc->zc_name);
3186         error = dmu_objset_clone(clone_name, ds->ds_prev, DS_FLAG_INCONSISTENT);
3187         if (error)
3188                 goto out;
3189 
3190         error = dsl_dataset_own(clone_name, B_TRUE, FTAG, &clone);
3191         if (error)
3192                 goto out;
3193 
3194         /*
3195          * Do clone swap.
3196          */
3197         if (getzfsvfs(zc->zc_name, &zfsvfs) == 0) {
3198                 error = zfs_suspend_fs(zfsvfs);
3199                 if (error == 0) {
3200                         int resume_err;
3201 
3202                         if (dsl_dataset_tryown(ds, B_FALSE, FTAG)) {
3203                                 error = dsl_dataset_clone_swap(clone, ds,
3204                                     B_TRUE);
3205                                 dsl_dataset_disown(ds, FTAG);
3206                                 ds = NULL;
3207                         } else {
3208                                 error = EBUSY;
3209                         }
3210                         resume_err = zfs_resume_fs(zfsvfs, zc->zc_name);
3211                         error = error ? error : resume_err;
3212                 }
3213                 VFS_RELE(zfsvfs->z_vfs);
3214         } else {
3215                 if (dsl_dataset_tryown(ds, B_FALSE, FTAG)) {
3216                         error = dsl_dataset_clone_swap(clone, ds, B_TRUE);
3217                         dsl_dataset_disown(ds, FTAG);
3218                         ds = NULL;
3219                 } else {
3220                         error = EBUSY;
3221                 }
3222         }
3223 
3224         /*
3225          * Destroy clone (which also closes it).
3226          */
3227         (void) dsl_dataset_destroy(clone, FTAG, B_FALSE);
3228 
3229 out:
3230         strfree(clone_name);
3231         if (ds)
3232                 dsl_dataset_rele(ds, FTAG);
3233         return (error);
3234 }
3235 
3236 /*
3237  * inputs:
3238  * zc_name      old name of dataset
3239  * zc_value     new name of dataset
3240  * zc_cookie    recursive flag (only valid for snapshots)
3241  *
3242  * outputs:     none
3243  */
3244 static int
3245 zfs_ioc_rename(zfs_cmd_t *zc)
3246 {
3247         boolean_t recursive = zc->zc_cookie & 1;
3248 
3249         zc->zc_value[sizeof (zc->zc_value) - 1] = '\0';
3250         if (dataset_namecheck(zc->zc_value, NULL, NULL) != 0 ||
3251             strchr(zc->zc_value, '%'))
3252                 return (EINVAL);
3253 
3254         /*
3255          * Unmount snapshot unless we're doing a recursive rename,
3256          * in which case the dataset code figures out which snapshots
3257          * to unmount.
3258          */
3259         if (!recursive && strchr(zc->zc_name, '@') != NULL &&
3260             zc->zc_objset_type == DMU_OST_ZFS) {
3261                 int err = zfs_unmount_snap(zc->zc_name, NULL);
3262                 if (err)
3263                         return (err);
3264         }
3265         if (zc->zc_objset_type == DMU_OST_ZVOL)
3266                 (void) zvol_remove_minor(zc->zc_name);
3267         return (dmu_objset_rename(zc->zc_name, zc->zc_value, recursive));
3268 }
3269 
3270 static int
3271 zfs_check_settable(const char *dsname, nvpair_t *pair, cred_t *cr)
3272 {
3273         const char *propname = nvpair_name(pair);
3274         boolean_t issnap = (strchr(dsname, '@') != NULL);
3275         zfs_prop_t prop = zfs_name_to_prop(propname);
3276         uint64_t intval;
3277         int err;
3278 
3279         if (prop == ZPROP_INVAL) {
3280                 if (zfs_prop_user(propname)) {
3281                         if (err = zfs_secpolicy_write_perms(dsname,
3282                             ZFS_DELEG_PERM_USERPROP, cr))
3283                                 return (err);
3284                         return (0);
3285                 }
3286 
3287                 if (!issnap && zfs_prop_userquota(propname)) {
3288                         const char *perm = NULL;
3289                         const char *uq_prefix =
3290                             zfs_userquota_prop_prefixes[ZFS_PROP_USERQUOTA];
3291                         const char *gq_prefix =
3292                             zfs_userquota_prop_prefixes[ZFS_PROP_GROUPQUOTA];
3293 
3294                         if (strncmp(propname, uq_prefix,
3295                             strlen(uq_prefix)) == 0) {
3296                                 perm = ZFS_DELEG_PERM_USERQUOTA;
3297                         } else if (strncmp(propname, gq_prefix,
3298                             strlen(gq_prefix)) == 0) {
3299                                 perm = ZFS_DELEG_PERM_GROUPQUOTA;
3300                         } else {
3301                                 /* USERUSED and GROUPUSED are read-only */
3302                                 return (EINVAL);
3303                         }
3304 
3305                         if (err = zfs_secpolicy_write_perms(dsname, perm, cr))
3306                                 return (err);
3307                         return (0);
3308                 }
3309 
3310                 return (EINVAL);
3311         }
3312 
3313         if (issnap)
3314                 return (EINVAL);
3315 
3316         if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
3317                 /*
3318                  * dsl_prop_get_all_impl() returns properties in this
3319                  * format.
3320                  */
3321                 nvlist_t *attrs;
3322                 VERIFY(nvpair_value_nvlist(pair, &attrs) == 0);
3323                 VERIFY(nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
3324                     &pair) == 0);
3325         }
3326 
3327         /*
3328          * Check that this value is valid for this pool version
3329          */
3330         switch (prop) {
3331         case ZFS_PROP_COMPRESSION:
3332                 /*
3333                  * If the user specified gzip compression, make sure
3334                  * the SPA supports it. We ignore any errors here since
3335                  * we'll catch them later.
3336                  */
3337                 if (nvpair_type(pair) == DATA_TYPE_UINT64 &&
3338                     nvpair_value_uint64(pair, &intval) == 0) {
3339                         if (intval >= ZIO_COMPRESS_GZIP_1 &&
3340                             intval <= ZIO_COMPRESS_GZIP_9 &&
3341                             zfs_earlier_version(dsname,
3342                             SPA_VERSION_GZIP_COMPRESSION)) {
3343                                 return (ENOTSUP);
3344                         }
3345 
3346                         if (intval == ZIO_COMPRESS_ZLE &&
3347                             zfs_earlier_version(dsname,
3348                             SPA_VERSION_ZLE_COMPRESSION))
3349                                 return (ENOTSUP);
3350 
3351                         /*
3352                          * If this is a bootable dataset then
3353                          * verify that the compression algorithm
3354                          * is supported for booting. We must return
3355                          * something other than ENOTSUP since it
3356                          * implies a downrev pool version.
3357                          */
3358                         if (zfs_is_bootfs(dsname) &&
3359                             !BOOTFS_COMPRESS_VALID(intval)) {
3360                                 return (ERANGE);
3361                         }
3362                 }
3363                 break;
3364 
3365         case ZFS_PROP_COPIES:
3366                 if (zfs_earlier_version(dsname, SPA_VERSION_DITTO_BLOCKS))
3367                         return (ENOTSUP);
3368                 break;
3369 
3370         case ZFS_PROP_DEDUP:
3371                 if (zfs_earlier_version(dsname, SPA_VERSION_DEDUP))
3372                         return (ENOTSUP);
3373                 break;
3374 
3375         case ZFS_PROP_SHARESMB:
3376                 if (zpl_earlier_version(dsname, ZPL_VERSION_FUID))
3377                         return (ENOTSUP);
3378                 break;
3379 
3380         case ZFS_PROP_ACLINHERIT:
3381                 if (nvpair_type(pair) == DATA_TYPE_UINT64 &&
3382                     nvpair_value_uint64(pair, &intval) == 0) {
3383                         if (intval == ZFS_ACL_PASSTHROUGH_X &&
3384                             zfs_earlier_version(dsname,
3385                             SPA_VERSION_PASSTHROUGH_X))
3386                                 return (ENOTSUP);
3387                 }
3388                 break;
3389         }
3390 
3391         return (zfs_secpolicy_setprop(dsname, prop, pair, CRED()));
3392 }
3393 
3394 /*
3395  * Removes properties from the given props list that fail permission checks
3396  * needed to clear them and to restore them in case of a receive error. For each
3397  * property, make sure we have both set and inherit permissions.
3398  *
3399  * Returns the first error encountered if any permission checks fail. If the
3400  * caller provides a non-NULL errlist, it also gives the complete list of names
3401  * of all the properties that failed a permission check along with the
3402  * corresponding error numbers. The caller is responsible for freeing the
3403  * returned errlist.
3404  *
3405  * If every property checks out successfully, zero is returned and the list
3406  * pointed at by errlist is NULL.
3407  */
3408 static int
3409 zfs_check_clearable(char *dataset, nvlist_t *props, nvlist_t **errlist)
3410 {
3411         zfs_cmd_t *zc;
3412         nvpair_t *pair, *next_pair;
3413         nvlist_t *errors;
3414         int err, rv = 0;
3415 
3416         if (props == NULL)
3417                 return (0);
3418 
3419         VERIFY(nvlist_alloc(&errors, NV_UNIQUE_NAME, KM_SLEEP) == 0);
3420 
3421         zc = kmem_alloc(sizeof (zfs_cmd_t), KM_SLEEP);
3422         (void) strcpy(zc->zc_name, dataset);
3423         pair = nvlist_next_nvpair(props, NULL);
3424         while (pair != NULL) {
3425                 next_pair = nvlist_next_nvpair(props, pair);
3426 
3427                 (void) strcpy(zc->zc_value, nvpair_name(pair));
3428                 if ((err = zfs_check_settable(dataset, pair, CRED())) != 0 ||
3429                     (err = zfs_secpolicy_inherit(zc, CRED())) != 0) {
3430                         VERIFY(nvlist_remove_nvpair(props, pair) == 0);
3431                         VERIFY(nvlist_add_int32(errors,
3432                             zc->zc_value, err) == 0);
3433                 }
3434                 pair = next_pair;
3435         }
3436         kmem_free(zc, sizeof (zfs_cmd_t));
3437 
3438         if ((pair = nvlist_next_nvpair(errors, NULL)) == NULL) {
3439                 nvlist_free(errors);
3440                 errors = NULL;
3441         } else {
3442                 VERIFY(nvpair_value_int32(pair, &rv) == 0);
3443         }
3444 
3445         if (errlist == NULL)
3446                 nvlist_free(errors);
3447         else
3448                 *errlist = errors;
3449 
3450         return (rv);
3451 }
3452 
3453 static boolean_t
3454 propval_equals(nvpair_t *p1, nvpair_t *p2)
3455 {
3456         if (nvpair_type(p1) == DATA_TYPE_NVLIST) {
3457                 /* dsl_prop_get_all_impl() format */
3458                 nvlist_t *attrs;
3459                 VERIFY(nvpair_value_nvlist(p1, &attrs) == 0);
3460                 VERIFY(nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
3461                     &p1) == 0);
3462         }
3463 
3464         if (nvpair_type(p2) == DATA_TYPE_NVLIST) {
3465                 nvlist_t *attrs;
3466                 VERIFY(nvpair_value_nvlist(p2, &attrs) == 0);
3467                 VERIFY(nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
3468                     &p2) == 0);
3469         }
3470 
3471         if (nvpair_type(p1) != nvpair_type(p2))
3472                 return (B_FALSE);
3473 
3474         if (nvpair_type(p1) == DATA_TYPE_STRING) {
3475                 char *valstr1, *valstr2;
3476 
3477                 VERIFY(nvpair_value_string(p1, (char **)&valstr1) == 0);
3478                 VERIFY(nvpair_value_string(p2, (char **)&valstr2) == 0);
3479                 return (strcmp(valstr1, valstr2) == 0);
3480         } else {
3481                 uint64_t intval1, intval2;
3482 
3483                 VERIFY(nvpair_value_uint64(p1, &intval1) == 0);
3484                 VERIFY(nvpair_value_uint64(p2, &intval2) == 0);
3485                 return (intval1 == intval2);
3486         }
3487 }
3488 
3489 /*
3490  * Remove properties from props if they are not going to change (as determined
3491  * by comparison with origprops). Remove them from origprops as well, since we
3492  * do not need to clear or restore properties that won't change.
3493  */
3494 static void
3495 props_reduce(nvlist_t *props, nvlist_t *origprops)
3496 {
3497         nvpair_t *pair, *next_pair;
3498 
3499         if (origprops == NULL)
3500                 return; /* all props need to be received */
3501 
3502         pair = nvlist_next_nvpair(props, NULL);
3503         while (pair != NULL) {
3504                 const char *propname = nvpair_name(pair);
3505                 nvpair_t *match;
3506 
3507                 next_pair = nvlist_next_nvpair(props, pair);
3508 
3509                 if ((nvlist_lookup_nvpair(origprops, propname,
3510                     &match) != 0) || !propval_equals(pair, match))
3511                         goto next; /* need to set received value */
3512 
3513                 /* don't clear the existing received value */
3514                 (void) nvlist_remove_nvpair(origprops, match);
3515                 /* don't bother receiving the property */
3516                 (void) nvlist_remove_nvpair(props, pair);
3517 next:
3518                 pair = next_pair;
3519         }
3520 }
3521 
3522 #ifdef  DEBUG
3523 static boolean_t zfs_ioc_recv_inject_err;
3524 #endif
3525 
3526 /*
3527  * inputs:
3528  * zc_name              name of containing filesystem
3529  * zc_nvlist_src{_size} nvlist of properties to apply
3530  * zc_value             name of snapshot to create
3531  * zc_string            name of clone origin (if DRR_FLAG_CLONE)
3532  * zc_cookie            file descriptor to recv from
3533  * zc_begin_record      the BEGIN record of the stream (not byteswapped)
3534  * zc_guid              force flag
3535  * zc_cleanup_fd        cleanup-on-exit file descriptor
3536  * zc_action_handle     handle for this guid/ds mapping (or zero on first call)
3537  *
3538  * outputs:
3539  * zc_cookie            number of bytes read
3540  * zc_nvlist_dst{_size} error for each unapplied received property
3541  * zc_obj               zprop_errflags_t
3542  * zc_action_handle     handle for this guid/ds mapping
3543  */
3544 static int
3545 zfs_ioc_recv(zfs_cmd_t *zc)
3546 {
3547         file_t *fp;
3548         objset_t *os;
3549         dmu_recv_cookie_t drc;
3550         boolean_t force = (boolean_t)zc->zc_guid;
3551         int fd;
3552         int error = 0;
3553         int props_error = 0;
3554         nvlist_t *errors;
3555         offset_t off;
3556         nvlist_t *props = NULL; /* sent properties */
3557         nvlist_t *origprops = NULL; /* existing properties */
3558         objset_t *origin = NULL;
3559         char *tosnap;
3560         char tofs[ZFS_MAXNAMELEN];
3561         boolean_t first_recvd_props = B_FALSE;
3562 
3563         if (dataset_namecheck(zc->zc_value, NULL, NULL) != 0 ||
3564             strchr(zc->zc_value, '@') == NULL ||
3565             strchr(zc->zc_value, '%'))
3566                 return (EINVAL);
3567 
3568         (void) strcpy(tofs, zc->zc_value);
3569         tosnap = strchr(tofs, '@');
3570         *tosnap++ = '\0';
3571 
3572         if (zc->zc_nvlist_src != NULL &&
3573             (error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
3574             zc->zc_iflags, &props)) != 0)
3575                 return (error);
3576 
3577         fd = zc->zc_cookie;
3578         fp = getf(fd);
3579         if (fp == NULL) {
3580                 nvlist_free(props);
3581                 return (EBADF);
3582         }
3583 
3584         VERIFY(nvlist_alloc(&errors, NV_UNIQUE_NAME, KM_SLEEP) == 0);
3585 
3586         if (props && dmu_objset_hold(tofs, FTAG, &os) == 0) {
3587                 if ((spa_version(os->os_spa) >= SPA_VERSION_RECVD_PROPS) &&
3588                     !dsl_prop_get_hasrecvd(os)) {
3589                         first_recvd_props = B_TRUE;
3590                 }
3591 
3592                 /*
3593                  * If new received properties are supplied, they are to
3594                  * completely replace the existing received properties, so stash
3595                  * away the existing ones.
3596                  */
3597                 if (dsl_prop_get_received(os, &origprops) == 0) {
3598                         nvlist_t *errlist = NULL;
3599                         /*
3600                          * Don't bother writing a property if its value won't
3601                          * change (and avoid the unnecessary security checks).
3602                          *
3603                          * The first receive after SPA_VERSION_RECVD_PROPS is a
3604                          * special case where we blow away all local properties
3605                          * regardless.
3606                          */
3607                         if (!first_recvd_props)
3608                                 props_reduce(props, origprops);
3609                         if (zfs_check_clearable(tofs, origprops,
3610                             &errlist) != 0)
3611                                 (void) nvlist_merge(errors, errlist, 0);
3612                         nvlist_free(errlist);
3613                 }
3614 
3615                 dmu_objset_rele(os, FTAG);
3616         }
3617 
3618         if (zc->zc_string[0]) {
3619                 error = dmu_objset_hold(zc->zc_string, FTAG, &origin);
3620                 if (error)
3621                         goto out;
3622         }
3623 
3624         error = dmu_recv_begin(tofs, tosnap, zc->zc_top_ds,
3625             &zc->zc_begin_record, force, origin, &drc);
3626         if (origin)
3627                 dmu_objset_rele(origin, FTAG);
3628         if (error)
3629                 goto out;
3630 
3631         /*
3632          * Set properties before we receive the stream so that they are applied
3633          * to the new data. Note that we must call dmu_recv_stream() if
3634          * dmu_recv_begin() succeeds.
3635          */
3636         if (props) {
3637                 nvlist_t *errlist;
3638 
3639                 if (dmu_objset_from_ds(drc.drc_logical_ds, &os) == 0) {
3640                         if (drc.drc_newfs) {
3641                                 if (spa_version(os->os_spa) >=
3642                                     SPA_VERSION_RECVD_PROPS)
3643                                         first_recvd_props = B_TRUE;
3644                         } else if (origprops != NULL) {
3645                                 if (clear_received_props(os, tofs, origprops,
3646                                     first_recvd_props ? NULL : props) != 0)
3647                                         zc->zc_obj |= ZPROP_ERR_NOCLEAR;
3648                         } else {
3649                                 zc->zc_obj |= ZPROP_ERR_NOCLEAR;
3650                         }
3651                         dsl_prop_set_hasrecvd(os);
3652                 } else if (!drc.drc_newfs) {
3653                         zc->zc_obj |= ZPROP_ERR_NOCLEAR;
3654                 }
3655 
3656                 (void) zfs_set_prop_nvlist(tofs, ZPROP_SRC_RECEIVED,
3657                     props, &errlist);
3658                 (void) nvlist_merge(errors, errlist, 0);
3659                 nvlist_free(errlist);
3660         }
3661 
3662         if (fit_error_list(zc, &errors) != 0 || put_nvlist(zc, errors) != 0) {
3663                 /*
3664                  * Caller made zc->zc_nvlist_dst less than the minimum expected
3665                  * size or supplied an invalid address.
3666                  */
3667                 props_error = EINVAL;
3668         }
3669 
3670         off = fp->f_offset;
3671         error = dmu_recv_stream(&drc, fp->f_vnode, &off, zc->zc_cleanup_fd,
3672             &zc->zc_action_handle);
3673 
3674         if (error == 0) {
3675                 zfsvfs_t *zfsvfs = NULL;
3676 
3677                 if (getzfsvfs(tofs, &zfsvfs) == 0) {
3678                         /* online recv */
3679                         int end_err;
3680 
3681                         error = zfs_suspend_fs(zfsvfs);
3682                         /*
3683                          * If the suspend fails, then the recv_end will
3684                          * likely also fail, and clean up after itself.
3685                          */
3686                         end_err = dmu_recv_end(&drc);
3687                         if (error == 0)
3688                                 error = zfs_resume_fs(zfsvfs, tofs);
3689                         error = error ? error : end_err;
3690                         VFS_RELE(zfsvfs->z_vfs);
3691                 } else {
3692                         error = dmu_recv_end(&drc);
3693                 }
3694         }
3695 
3696         zc->zc_cookie = off - fp->f_offset;
3697         if (VOP_SEEK(fp->f_vnode, fp->f_offset, &off, NULL) == 0)
3698                 fp->f_offset = off;
3699 
3700 #ifdef  DEBUG
3701         if (zfs_ioc_recv_inject_err) {
3702                 zfs_ioc_recv_inject_err = B_FALSE;
3703                 error = 1;
3704         }
3705 #endif
3706         /*
3707          * On error, restore the original props.
3708          */
3709         if (error && props) {
3710                 if (dmu_objset_hold(tofs, FTAG, &os) == 0) {
3711                         if (clear_received_props(os, tofs, props, NULL) != 0) {
3712                                 /*
3713                                  * We failed to clear the received properties.
3714                                  * Since we may have left a $recvd value on the
3715                                  * system, we can't clear the $hasrecvd flag.
3716                                  */
3717                                 zc->zc_obj |= ZPROP_ERR_NORESTORE;
3718                         } else if (first_recvd_props) {
3719                                 dsl_prop_unset_hasrecvd(os);
3720                         }
3721                         dmu_objset_rele(os, FTAG);
3722                 } else if (!drc.drc_newfs) {
3723                         /* We failed to clear the received properties. */
3724                         zc->zc_obj |= ZPROP_ERR_NORESTORE;
3725                 }
3726 
3727                 if (origprops == NULL && !drc.drc_newfs) {
3728                         /* We failed to stash the original properties. */
3729                         zc->zc_obj |= ZPROP_ERR_NORESTORE;
3730                 }
3731 
3732                 /*
3733                  * dsl_props_set() will not convert RECEIVED to LOCAL on or
3734                  * after SPA_VERSION_RECVD_PROPS, so we need to specify LOCAL
3735                  * explictly if we're restoring local properties cleared in the
3736                  * first new-style receive.
3737                  */
3738                 if (origprops != NULL &&
3739                     zfs_set_prop_nvlist(tofs, (first_recvd_props ?
3740                     ZPROP_SRC_LOCAL : ZPROP_SRC_RECEIVED),
3741                     origprops, NULL) != 0) {
3742                         /*
3743                          * We stashed the original properties but failed to
3744                          * restore them.
3745                          */
3746                         zc->zc_obj |= ZPROP_ERR_NORESTORE;
3747                 }
3748         }
3749 out:
3750         nvlist_free(props);
3751         nvlist_free(origprops);
3752         nvlist_free(errors);
3753         releasef(fd);
3754 
3755         if (error == 0)
3756                 error = props_error;
3757 
3758         return (error);
3759 }
3760 
3761 /*
3762  * inputs:
3763  * zc_name      name of snapshot to send
3764  * zc_cookie    file descriptor to send stream to
3765  * zc_obj       fromorigin flag (mutually exclusive with zc_fromobj)
3766  * zc_sendobj   objsetid of snapshot to send
3767  * zc_fromobj   objsetid of incremental fromsnap (may be zero)
3768  *
3769  * outputs: none
3770  */
3771 static int
3772 zfs_ioc_send(zfs_cmd_t *zc)
3773 {
3774         objset_t *fromsnap = NULL;
3775         objset_t *tosnap;
3776         file_t *fp;
3777         int error;
3778         offset_t off;
3779         dsl_dataset_t *ds;
3780         dsl_dataset_t *dsfrom = NULL;
3781         spa_t *spa;
3782         dsl_pool_t *dp;
3783 
3784         error = spa_open(zc->zc_name, &spa, FTAG);
3785         if (error)
3786                 return (error);
3787 
3788         dp = spa_get_dsl(spa);
3789         rw_enter(&dp->dp_config_rwlock, RW_READER);
3790         error = dsl_dataset_hold_obj(dp, zc->zc_sendobj, FTAG, &ds);
3791         rw_exit(&dp->dp_config_rwlock);
3792         if (error) {
3793                 spa_close(spa, FTAG);
3794                 return (error);
3795         }
3796 
3797         error = dmu_objset_from_ds(ds, &tosnap);
3798         if (error) {
3799                 dsl_dataset_rele(ds, FTAG);
3800                 spa_close(spa, FTAG);
3801                 return (error);
3802         }
3803 
3804         if (zc->zc_fromobj != 0) {
3805                 rw_enter(&dp->dp_config_rwlock, RW_READER);
3806                 error = dsl_dataset_hold_obj(dp, zc->zc_fromobj, FTAG, &dsfrom);
3807                 rw_exit(&dp->dp_config_rwlock);
3808                 spa_close(spa, FTAG);
3809                 if (error) {
3810                         dsl_dataset_rele(ds, FTAG);
3811                         return (error);
3812                 }
3813                 error = dmu_objset_from_ds(dsfrom, &fromsnap);
3814                 if (error) {
3815                         dsl_dataset_rele(dsfrom, FTAG);
3816                         dsl_dataset_rele(ds, FTAG);
3817                         return (error);
3818                 }
3819         } else {
3820                 spa_close(spa, FTAG);
3821         }
3822 
3823         fp = getf(zc->zc_cookie);
3824         if (fp == NULL) {
3825                 dsl_dataset_rele(ds, FTAG);
3826                 if (dsfrom)
3827                         dsl_dataset_rele(dsfrom, FTAG);
3828                 return (EBADF);
3829         }
3830 
3831         off = fp->f_offset;
3832         error = dmu_sendbackup(tosnap, fromsnap, zc->zc_obj, fp->f_vnode, &off);
3833 
3834         if (VOP_SEEK(fp->f_vnode, fp->f_offset, &off, NULL) == 0)
3835                 fp->f_offset = off;
3836         releasef(zc->zc_cookie);
3837         if (dsfrom)
3838                 dsl_dataset_rele(dsfrom, FTAG);
3839         dsl_dataset_rele(ds, FTAG);
3840         return (error);
3841 }
3842 
3843 static int
3844 zfs_ioc_inject_fault(zfs_cmd_t *zc)
3845 {
3846         int id, error;
3847 
3848         error = zio_inject_fault(zc->zc_name, (int)zc->zc_guid, &id,
3849             &zc->zc_inject_record);
3850 
3851         if (error == 0)
3852                 zc->zc_guid = (uint64_t)id;
3853 
3854         return (error);
3855 }
3856 
3857 static int
3858 zfs_ioc_clear_fault(zfs_cmd_t *zc)
3859 {
3860         return (zio_clear_fault((int)zc->zc_guid));
3861 }
3862 
3863 static int
3864 zfs_ioc_inject_list_next(zfs_cmd_t *zc)
3865 {
3866         int id = (int)zc->zc_guid;
3867         int error;
3868 
3869         error = zio_inject_list_next(&id, zc->zc_name, sizeof (zc->zc_name),
3870             &zc->zc_inject_record);
3871 
3872         zc->zc_guid = id;
3873 
3874         return (error);
3875 }
3876 
3877 static int
3878 zfs_ioc_error_log(zfs_cmd_t *zc)
3879 {
3880         spa_t *spa;
3881         int error;
3882         size_t count = (size_t)zc->zc_nvlist_dst_size;
3883 
3884         if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
3885                 return (error);
3886 
3887         error = spa_get_errlog(spa, (void *)(uintptr_t)zc->zc_nvlist_dst,
3888             &count);
3889         if (error == 0)
3890                 zc->zc_nvlist_dst_size = count;
3891         else
3892                 zc->zc_nvlist_dst_size = spa_get_errlog_size(spa);
3893 
3894         spa_close(spa, FTAG);
3895 
3896         return (error);
3897 }
3898 
3899 static int
3900 zfs_ioc_clear(zfs_cmd_t *zc)
3901 {
3902         spa_t *spa;
3903         vdev_t *vd;
3904         int error;
3905 
3906         /*
3907          * On zpool clear we also fix up missing slogs
3908          */
3909         mutex_enter(&spa_namespace_lock);
3910         spa = spa_lookup(zc->zc_name);
3911         if (spa == NULL) {
3912                 mutex_exit(&spa_namespace_lock);
3913                 return (EIO);
3914         }
3915         if (spa_get_log_state(spa) == SPA_LOG_MISSING) {
3916                 /* we need to let spa_open/spa_load clear the chains */
3917                 spa_set_log_state(spa, SPA_LOG_CLEAR);
3918         }
3919         spa->spa_last_open_failed = 0;
3920         mutex_exit(&spa_namespace_lock);
3921 
3922         if (zc->zc_cookie & ZPOOL_NO_REWIND) {
3923                 error = spa_open(zc->zc_name, &spa, FTAG);
3924         } else {
3925                 nvlist_t *policy;
3926                 nvlist_t *config = NULL;
3927 
3928                 if (zc->zc_nvlist_src == NULL)
3929                         return (EINVAL);
3930 
3931                 if ((error = get_nvlist(zc->zc_nvlist_src,
3932                     zc->zc_nvlist_src_size, zc->zc_iflags, &policy)) == 0) {
3933                         error = spa_open_rewind(zc->zc_name, &spa, FTAG,
3934                             policy, &config);
3935                         if (config != NULL) {
3936                                 int err;
3937 
3938                                 if ((err = put_nvlist(zc, config)) != 0)
3939                                         error = err;
3940                                 nvlist_free(config);
3941                         }
3942                         nvlist_free(policy);
3943                 }
3944         }
3945 
3946         if (error)
3947                 return (error);
3948 
3949         spa_vdev_state_enter(spa, SCL_NONE);
3950 
3951         if (zc->zc_guid == 0) {
3952                 vd = NULL;
3953         } else {
3954                 vd = spa_lookup_by_guid(spa, zc->zc_guid, B_TRUE);
3955                 if (vd == NULL) {
3956                         (void) spa_vdev_state_exit(spa, NULL, ENODEV);
3957                         spa_close(spa, FTAG);
3958                         return (ENODEV);
3959                 }
3960         }
3961 
3962         vdev_clear(spa, vd);
3963 
3964         (void) spa_vdev_state_exit(spa, NULL, 0);
3965 
3966         /*
3967          * Resume any suspended I/Os.
3968          */
3969         if (zio_resume(spa) != 0)
3970                 error = EIO;
3971 
3972         spa_close(spa, FTAG);
3973 
3974         return (error);
3975 }
3976 
3977 /*
3978  * inputs:
3979  * zc_name      name of filesystem
3980  * zc_value     name of origin snapshot
3981  *
3982  * outputs:
3983  * zc_string    name of conflicting snapshot, if there is one
3984  */
3985 static int
3986 zfs_ioc_promote(zfs_cmd_t *zc)
3987 {
3988         char *cp;
3989 
3990         /*
3991          * We don't need to unmount *all* the origin fs's snapshots, but
3992          * it's easier.
3993          */
3994         cp = strchr(zc->zc_value, '@');
3995         if (cp)
3996                 *cp = '\0';
3997         (void) dmu_objset_find(zc->zc_value,
3998             zfs_unmount_snap, NULL, DS_FIND_SNAPSHOTS);
3999         return (dsl_dataset_promote(zc->zc_name, zc->zc_string));
4000 }
4001 
4002 /*
4003  * Retrieve a single {user|group}{used|quota}@... property.
4004  *
4005  * inputs:
4006  * zc_name      name of filesystem
4007  * zc_objset_type zfs_userquota_prop_t
4008  * zc_value     domain name (eg. "S-1-234-567-89")
4009  * zc_guid      RID/UID/GID
4010  *
4011  * outputs:
4012  * zc_cookie    property value
4013  */
4014 static int
4015 zfs_ioc_userspace_one(zfs_cmd_t *zc)
4016 {
4017         zfsvfs_t *zfsvfs;
4018         int error;
4019 
4020         if (zc->zc_objset_type >= ZFS_NUM_USERQUOTA_PROPS)
4021                 return (EINVAL);
4022 
4023         error = zfsvfs_hold(zc->zc_name, FTAG, &zfsvfs, B_FALSE);
4024         if (error)
4025                 return (error);
4026 
4027         error = zfs_userspace_one(zfsvfs,
4028             zc->zc_objset_type, zc->zc_value, zc->zc_guid, &zc->zc_cookie);
4029         zfsvfs_rele(zfsvfs, FTAG);
4030 
4031         return (error);
4032 }
4033 
4034 /*
4035  * inputs:
4036  * zc_name              name of filesystem
4037  * zc_cookie            zap cursor
4038  * zc_objset_type       zfs_userquota_prop_t
4039  * zc_nvlist_dst[_size] buffer to fill (not really an nvlist)
4040  *
4041  * outputs:
4042  * zc_nvlist_dst[_size] data buffer (array of zfs_useracct_t)
4043  * zc_cookie    zap cursor
4044  */
4045 static int
4046 zfs_ioc_userspace_many(zfs_cmd_t *zc)
4047 {
4048         zfsvfs_t *zfsvfs;
4049         int bufsize = zc->zc_nvlist_dst_size;
4050 
4051         if (bufsize <= 0)
4052                 return (ENOMEM);
4053 
4054         int error = zfsvfs_hold(zc->zc_name, FTAG, &zfsvfs, B_FALSE);
4055         if (error)
4056                 return (error);
4057 
4058         void *buf = kmem_alloc(bufsize, KM_SLEEP);
4059 
4060         error = zfs_userspace_many(zfsvfs, zc->zc_objset_type, &zc->zc_cookie,
4061             buf, &zc->zc_nvlist_dst_size);
4062 
4063         if (error == 0) {
4064                 error = xcopyout(buf,
4065                     (void *)(uintptr_t)zc->zc_nvlist_dst,
4066                     zc->zc_nvlist_dst_size);
4067         }
4068         kmem_free(buf, bufsize);
4069         zfsvfs_rele(zfsvfs, FTAG);
4070 
4071         return (error);
4072 }
4073 
4074 /*
4075  * inputs:
4076  * zc_name              name of filesystem
4077  *
4078  * outputs:
4079  * none
4080  */
4081 static int
4082 zfs_ioc_userspace_upgrade(zfs_cmd_t *zc)
4083 {
4084         objset_t *os;
4085         int error = 0;
4086         zfsvfs_t *zfsvfs;
4087 
4088         if (getzfsvfs(zc->zc_name, &zfsvfs) == 0) {
4089                 if (!dmu_objset_userused_enabled(zfsvfs->z_os)) {
4090                         /*
4091                          * If userused is not enabled, it may be because the
4092                          * objset needs to be closed & reopened (to grow the
4093                          * objset_phys_t).  Suspend/resume the fs will do that.
4094                          */
4095                         error = zfs_suspend_fs(zfsvfs);
4096                         if (error == 0)
4097                                 error = zfs_resume_fs(zfsvfs, zc->zc_name);
4098                 }
4099                 if (error == 0)
4100                         error = dmu_objset_userspace_upgrade(zfsvfs->z_os);
4101                 VFS_RELE(zfsvfs->z_vfs);
4102         } else {
4103                 /* XXX kind of reading contents without owning */
4104                 error = dmu_objset_hold(zc->zc_name, FTAG, &os);
4105                 if (error)
4106                         return (error);
4107 
4108                 error = dmu_objset_userspace_upgrade(os);
4109                 dmu_objset_rele(os, FTAG);
4110         }
4111 
4112         return (error);
4113 }
4114 
4115 /*
4116  * We don't want to have a hard dependency
4117  * against some special symbols in sharefs
4118  * nfs, and smbsrv.  Determine them if needed when
4119  * the first file system is shared.
4120  * Neither sharefs, nfs or smbsrv are unloadable modules.
4121  */
4122 int (*znfsexport_fs)(void *arg);
4123 int (*zshare_fs)(enum sharefs_sys_op, share_t *, uint32_t);
4124 int (*zsmbexport_fs)(void *arg, boolean_t add_share);
4125 
4126 int zfs_nfsshare_inited;
4127 int zfs_smbshare_inited;
4128 
4129 ddi_modhandle_t nfs_mod;
4130 ddi_modhandle_t sharefs_mod;
4131 ddi_modhandle_t smbsrv_mod;
4132 kmutex_t zfs_share_lock;
4133 
4134 static int
4135 zfs_init_sharefs()
4136 {
4137         int error;
4138 
4139         ASSERT(MUTEX_HELD(&zfs_share_lock));
4140         /* Both NFS and SMB shares also require sharetab support. */
4141         if (sharefs_mod == NULL && ((sharefs_mod =
4142             ddi_modopen("fs/sharefs",
4143             KRTLD_MODE_FIRST, &error)) == NULL)) {
4144                 return (ENOSYS);
4145         }
4146         if (zshare_fs == NULL && ((zshare_fs =
4147             (int (*)(enum sharefs_sys_op, share_t *, uint32_t))
4148             ddi_modsym(sharefs_mod, "sharefs_impl", &error)) == NULL)) {
4149                 return (ENOSYS);
4150         }
4151         return (0);
4152 }
4153 
4154 static int
4155 zfs_ioc_share(zfs_cmd_t *zc)
4156 {
4157         int error;
4158         int opcode;
4159 
4160         switch (zc->zc_share.z_sharetype) {
4161         case ZFS_SHARE_NFS:
4162         case ZFS_UNSHARE_NFS:
4163                 if (zfs_nfsshare_inited == 0) {
4164                         mutex_enter(&zfs_share_lock);
4165                         if (nfs_mod == NULL && ((nfs_mod = ddi_modopen("fs/nfs",
4166                             KRTLD_MODE_FIRST, &error)) == NULL)) {
4167                                 mutex_exit(&zfs_share_lock);
4168                                 return (ENOSYS);
4169                         }
4170                         if (znfsexport_fs == NULL &&
4171                             ((znfsexport_fs = (int (*)(void *))
4172                             ddi_modsym(nfs_mod,
4173                             "nfs_export", &error)) == NULL)) {
4174                                 mutex_exit(&zfs_share_lock);
4175                                 return (ENOSYS);
4176                         }
4177                         error = zfs_init_sharefs();
4178                         if (error) {
4179                                 mutex_exit(&zfs_share_lock);
4180                                 return (ENOSYS);
4181                         }
4182                         zfs_nfsshare_inited = 1;
4183                         mutex_exit(&zfs_share_lock);
4184                 }
4185                 break;
4186         case ZFS_SHARE_SMB:
4187         case ZFS_UNSHARE_SMB:
4188                 if (zfs_smbshare_inited == 0) {
4189                         mutex_enter(&zfs_share_lock);
4190                         if (smbsrv_mod == NULL && ((smbsrv_mod =
4191                             ddi_modopen("drv/smbsrv",
4192                             KRTLD_MODE_FIRST, &error)) == NULL)) {
4193                                 mutex_exit(&zfs_share_lock);
4194                                 return (ENOSYS);
4195                         }
4196                         if (zsmbexport_fs == NULL && ((zsmbexport_fs =
4197                             (int (*)(void *, boolean_t))ddi_modsym(smbsrv_mod,
4198                             "smb_server_share", &error)) == NULL)) {
4199                                 mutex_exit(&zfs_share_lock);
4200                                 return (ENOSYS);
4201                         }
4202                         error = zfs_init_sharefs();
4203                         if (error) {
4204                                 mutex_exit(&zfs_share_lock);
4205                                 return (ENOSYS);
4206                         }
4207                         zfs_smbshare_inited = 1;
4208                         mutex_exit(&zfs_share_lock);
4209                 }
4210                 break;
4211         default:
4212                 return (EINVAL);
4213         }
4214 
4215         switch (zc->zc_share.z_sharetype) {
4216         case ZFS_SHARE_NFS:
4217         case ZFS_UNSHARE_NFS:
4218                 if (error =
4219                     znfsexport_fs((void *)
4220                     (uintptr_t)zc->zc_share.z_exportdata))
4221                         return (error);
4222                 break;
4223         case ZFS_SHARE_SMB:
4224         case ZFS_UNSHARE_SMB:
4225                 if (error = zsmbexport_fs((void *)
4226                     (uintptr_t)zc->zc_share.z_exportdata,
4227                     zc->zc_share.z_sharetype == ZFS_SHARE_SMB ?
4228                     B_TRUE: B_FALSE)) {
4229                         return (error);
4230                 }
4231                 break;
4232         }
4233 
4234         opcode = (zc->zc_share.z_sharetype == ZFS_SHARE_NFS ||
4235             zc->zc_share.z_sharetype == ZFS_SHARE_SMB) ?
4236             SHAREFS_ADD : SHAREFS_REMOVE;
4237 
4238         /*
4239          * Add or remove share from sharetab
4240          */
4241         error = zshare_fs(opcode,
4242             (void *)(uintptr_t)zc->zc_share.z_sharedata,
4243             zc->zc_share.z_sharemax);
4244 
4245         return (error);
4246 
4247 }
4248 
4249 ace_t full_access[] = {
4250         {(uid_t)-1, ACE_ALL_PERMS, ACE_EVERYONE, 0}
4251 };
4252 
4253 /*
4254  * inputs:
4255  * zc_name              name of containing filesystem
4256  * zc_obj               object # beyond which we want next in-use object #
4257  *
4258  * outputs:
4259  * zc_obj               next in-use object #
4260  */
4261 static int
4262 zfs_ioc_next_obj(zfs_cmd_t *zc)
4263 {
4264         objset_t *os = NULL;
4265         int error;
4266 
4267         error = dmu_objset_hold(zc->zc_name, FTAG, &os);
4268         if (error)
4269                 return (error);
4270 
4271         error = dmu_object_next(os, &zc->zc_obj, B_FALSE,
4272             os->os_dsl_dataset->ds_phys->ds_prev_snap_txg);
4273 
4274         dmu_objset_rele(os, FTAG);
4275         return (error);
4276 }
4277 
4278 /*
4279  * inputs:
4280  * zc_name              name of filesystem
4281  * zc_value             prefix name for snapshot
4282  * zc_cleanup_fd        cleanup-on-exit file descriptor for calling process
4283  *
4284  * outputs:
4285  */
4286 static int
4287 zfs_ioc_tmp_snapshot(zfs_cmd_t *zc)
4288 {
4289         char *snap_name;
4290         int error;
4291 
4292         snap_name = kmem_asprintf("%s-%016llx", zc->zc_value,
4293             (u_longlong_t)ddi_get_lbolt64());
4294 
4295         if (strlen(snap_name) >= MAXNAMELEN) {
4296                 strfree(snap_name);
4297                 return (E2BIG);
4298         }
4299 
4300         error = dmu_objset_snapshot(zc->zc_name, snap_name, snap_name,
4301             NULL, B_FALSE, B_TRUE, zc->zc_cleanup_fd);
4302         if (error != 0) {
4303                 strfree(snap_name);
4304                 return (error);
4305         }
4306 
4307         (void) strcpy(zc->zc_value, snap_name);
4308         strfree(snap_name);
4309         return (0);
4310 }
4311 
4312 /*
4313  * inputs:
4314  * zc_name              name of "to" snapshot
4315  * zc_value             name of "from" snapshot
4316  * zc_cookie            file descriptor to write diff data on
4317  *
4318  * outputs:
4319  * dmu_diff_record_t's to the file descriptor
4320  */
4321 static int
4322 zfs_ioc_diff(zfs_cmd_t *zc)
4323 {
4324         objset_t *fromsnap;
4325         objset_t *tosnap;
4326         file_t *fp;
4327         offset_t off;
4328         int error;
4329 
4330         error = dmu_objset_hold(zc->zc_name, FTAG, &tosnap);
4331         if (error)
4332                 return (error);
4333 
4334         error = dmu_objset_hold(zc->zc_value, FTAG, &fromsnap);
4335         if (error) {
4336                 dmu_objset_rele(tosnap, FTAG);
4337                 return (error);
4338         }
4339 
4340         fp = getf(zc->zc_cookie);
4341         if (fp == NULL) {
4342                 dmu_objset_rele(fromsnap, FTAG);
4343                 dmu_objset_rele(tosnap, FTAG);
4344                 return (EBADF);
4345         }
4346 
4347         off = fp->f_offset;
4348 
4349         error = dmu_diff(tosnap, fromsnap, fp->f_vnode, &off);
4350 
4351         if (VOP_SEEK(fp->f_vnode, fp->f_offset, &off, NULL) == 0)
4352                 fp->f_offset = off;
4353         releasef(zc->zc_cookie);
4354 
4355         dmu_objset_rele(fromsnap, FTAG);
4356         dmu_objset_rele(tosnap, FTAG);
4357         return (error);
4358 }
4359 
4360 /*
4361  * Remove all ACL files in shares dir
4362  */
4363 static int
4364 zfs_smb_acl_purge(znode_t *dzp)
4365 {
4366         zap_cursor_t    zc;
4367         zap_attribute_t zap;
4368         zfsvfs_t *zfsvfs = dzp->z_zfsvfs;
4369         int error;
4370 
4371         for (zap_cursor_init(&zc, zfsvfs->z_os, dzp->z_id);
4372             (error = zap_cursor_retrieve(&zc, &zap)) == 0;
4373             zap_cursor_advance(&zc)) {
4374                 if ((error = VOP_REMOVE(ZTOV(dzp), zap.za_name, kcred,
4375                     NULL, 0)) != 0)
4376                         break;
4377         }
4378         zap_cursor_fini(&zc);
4379         return (error);
4380 }
4381 
4382 static int
4383 zfs_ioc_smb_acl(zfs_cmd_t *zc)
4384 {
4385         vnode_t *vp;
4386         znode_t *dzp;
4387         vnode_t *resourcevp = NULL;
4388         znode_t *sharedir;
4389         zfsvfs_t *zfsvfs;
4390         nvlist_t *nvlist;
4391         char *src, *target;
4392         vattr_t vattr;
4393         vsecattr_t vsec;
4394         int error = 0;
4395 
4396         if ((error = lookupname(zc->zc_value, UIO_SYSSPACE,
4397             NO_FOLLOW, NULL, &vp)) != 0)
4398                 return (error);
4399 
4400         /* Now make sure mntpnt and dataset are ZFS */
4401 
4402         if (vp->v_vfsp->vfs_fstype != zfsfstype ||
4403             (strcmp((char *)refstr_value(vp->v_vfsp->vfs_resource),
4404             zc->zc_name) != 0)) {
4405                 VN_RELE(vp);
4406                 return (EINVAL);
4407         }
4408 
4409         dzp = VTOZ(vp);
4410         zfsvfs = dzp->z_zfsvfs;
4411         ZFS_ENTER(zfsvfs);
4412 
4413         /*
4414          * Create share dir if its missing.
4415          */
4416         mutex_enter(&zfsvfs->z_lock);
4417         if (zfsvfs->z_shares_dir == 0) {
4418                 dmu_tx_t *tx;
4419 
4420                 tx = dmu_tx_create(zfsvfs->z_os);
4421                 dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, TRUE,
4422                     ZFS_SHARES_DIR);
4423                 dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, FALSE, NULL);
4424                 error = dmu_tx_assign(tx, TXG_WAIT);
4425                 if (error) {
4426                         dmu_tx_abort(tx);
4427                 } else {
4428                         error = zfs_create_share_dir(zfsvfs, tx);
4429                         dmu_tx_commit(tx);
4430                 }
4431                 if (error) {
4432                         mutex_exit(&zfsvfs->z_lock);
4433                         VN_RELE(vp);
4434                         ZFS_EXIT(zfsvfs);
4435                         return (error);
4436                 }
4437         }
4438         mutex_exit(&zfsvfs->z_lock);
4439 
4440         ASSERT(zfsvfs->z_shares_dir);
4441         if ((error = zfs_zget(zfsvfs, zfsvfs->z_shares_dir, &sharedir)) != 0) {
4442                 VN_RELE(vp);
4443                 ZFS_EXIT(zfsvfs);
4444                 return (error);
4445         }
4446 
4447         switch (zc->zc_cookie) {
4448         case ZFS_SMB_ACL_ADD:
4449                 vattr.va_mask = AT_MODE|AT_UID|AT_GID|AT_TYPE;
4450                 vattr.va_type = VREG;
4451                 vattr.va_mode = S_IFREG|0777;
4452                 vattr.va_uid = 0;
4453                 vattr.va_gid = 0;
4454 
4455                 vsec.vsa_mask = VSA_ACE;
4456                 vsec.vsa_aclentp = &full_access;
4457                 vsec.vsa_aclentsz = sizeof (full_access);
4458                 vsec.vsa_aclcnt = 1;
4459 
4460                 error = VOP_CREATE(ZTOV(sharedir), zc->zc_string,
4461                     &vattr, EXCL, 0, &resourcevp, kcred, 0, NULL, &vsec);
4462                 if (resourcevp)
4463                         VN_RELE(resourcevp);
4464                 break;
4465 
4466         case ZFS_SMB_ACL_REMOVE:
4467                 error = VOP_REMOVE(ZTOV(sharedir), zc->zc_string, kcred,
4468                     NULL, 0);
4469                 break;
4470 
4471         case ZFS_SMB_ACL_RENAME:
4472                 if ((error = get_nvlist(zc->zc_nvlist_src,
4473                     zc->zc_nvlist_src_size, zc->zc_iflags, &nvlist)) != 0) {
4474                         VN_RELE(vp);
4475                         ZFS_EXIT(zfsvfs);
4476                         return (error);
4477                 }
4478                 if (nvlist_lookup_string(nvlist, ZFS_SMB_ACL_SRC, &src) ||
4479                     nvlist_lookup_string(nvlist, ZFS_SMB_ACL_TARGET,
4480                     &target)) {
4481                         VN_RELE(vp);
4482                         VN_RELE(ZTOV(sharedir));
4483                         ZFS_EXIT(zfsvfs);
4484                         nvlist_free(nvlist);
4485                         return (error);
4486                 }
4487                 error = VOP_RENAME(ZTOV(sharedir), src, ZTOV(sharedir), target,
4488                     kcred, NULL, 0);
4489                 nvlist_free(nvlist);
4490                 break;
4491 
4492         case ZFS_SMB_ACL_PURGE:
4493                 error = zfs_smb_acl_purge(sharedir);
4494                 break;
4495 
4496         default:
4497                 error = EINVAL;
4498                 break;
4499         }
4500 
4501         VN_RELE(vp);
4502         VN_RELE(ZTOV(sharedir));
4503 
4504         ZFS_EXIT(zfsvfs);
4505 
4506         return (error);
4507 }
4508 
4509 /*
4510  * inputs:
4511  * zc_name              name of filesystem
4512  * zc_value             short name of snap
4513  * zc_string            user-supplied tag for this hold
4514  * zc_cookie            recursive flag
4515  * zc_temphold          set if hold is temporary
4516  * zc_cleanup_fd        cleanup-on-exit file descriptor for calling process
4517  * zc_sendobj           if non-zero, the objid for zc_name@zc_value
4518  * zc_createtxg         if zc_sendobj is non-zero, snap must have zc_createtxg
4519  *
4520  * outputs:             none
4521  */
4522 static int
4523 zfs_ioc_hold(zfs_cmd_t *zc)
4524 {
4525         boolean_t recursive = zc->zc_cookie;
4526         spa_t *spa;
4527         dsl_pool_t *dp;
4528         dsl_dataset_t *ds;
4529         int error;
4530         minor_t minor = 0;
4531 
4532         if (snapshot_namecheck(zc->zc_value, NULL, NULL) != 0)
4533                 return (EINVAL);
4534 
4535         if (zc->zc_sendobj == 0) {
4536                 return (dsl_dataset_user_hold(zc->zc_name, zc->zc_value,
4537                     zc->zc_string, recursive, zc->zc_temphold,
4538                     zc->zc_cleanup_fd));
4539         }
4540 
4541         if (recursive)
4542                 return (EINVAL);
4543 
4544         error = spa_open(zc->zc_name, &spa, FTAG);
4545         if (error)
4546                 return (error);
4547 
4548         dp = spa_get_dsl(spa);
4549         rw_enter(&dp->dp_config_rwlock, RW_READER);
4550         error = dsl_dataset_hold_obj(dp, zc->zc_sendobj, FTAG, &ds);
4551         rw_exit(&dp->dp_config_rwlock);
4552         spa_close(spa, FTAG);
4553         if (error)
4554                 return (error);
4555 
4556         /*
4557          * Until we have a hold on this snapshot, it's possible that
4558          * zc_sendobj could've been destroyed and reused as part
4559          * of a later txg.  Make sure we're looking at the right object.
4560          */
4561         if (zc->zc_createtxg != ds->ds_phys->ds_creation_txg) {
4562                 dsl_dataset_rele(ds, FTAG);
4563                 return (ENOENT);
4564         }
4565 
4566         if (zc->zc_cleanup_fd != -1 && zc->zc_temphold) {
4567                 error = zfs_onexit_fd_hold(zc->zc_cleanup_fd, &minor);
4568                 if (error) {
4569                         dsl_dataset_rele(ds, FTAG);
4570                         return (error);
4571                 }
4572         }
4573 
4574         error = dsl_dataset_user_hold_for_send(ds, zc->zc_string,
4575             zc->zc_temphold);
4576         if (minor != 0) {
4577                 if (error == 0) {
4578                         dsl_register_onexit_hold_cleanup(ds, zc->zc_string,
4579                             minor);
4580                 }
4581                 zfs_onexit_fd_rele(zc->zc_cleanup_fd);
4582         }
4583         dsl_dataset_rele(ds, FTAG);
4584 
4585         return (error);
4586 }
4587 
4588 /*
4589  * inputs:
4590  * zc_name      name of dataset from which we're releasing a user hold
4591  * zc_value     short name of snap
4592  * zc_string    user-supplied tag for this hold
4593  * zc_cookie    recursive flag
4594  *
4595  * outputs:     none
4596  */
4597 static int
4598 zfs_ioc_release(zfs_cmd_t *zc)
4599 {
4600         boolean_t recursive = zc->zc_cookie;
4601 
4602         if (snapshot_namecheck(zc->zc_value, NULL, NULL) != 0)
4603                 return (EINVAL);
4604 
4605         return (dsl_dataset_user_release(zc->zc_name, zc->zc_value,
4606             zc->zc_string, recursive));
4607 }
4608 
4609 /*
4610  * inputs:
4611  * zc_name              name of filesystem
4612  *
4613  * outputs:
4614  * zc_nvlist_src{_size} nvlist of snapshot holds
4615  */
4616 static int
4617 zfs_ioc_get_holds(zfs_cmd_t *zc)
4618 {
4619         nvlist_t *nvp;
4620         int error;
4621 
4622         if ((error = dsl_dataset_get_holds(zc->zc_name, &nvp)) == 0) {
4623                 error = put_nvlist(zc, nvp);
4624                 nvlist_free(nvp);
4625         }
4626 
4627         return (error);
4628 }
4629 
4630 /*
4631  * pool create, destroy, and export don't log the history as part of
4632  * zfsdev_ioctl, but rather zfs_ioc_pool_create, and zfs_ioc_pool_export
4633  * do the logging of those commands.
4634  */
4635 static zfs_ioc_vec_t zfs_ioc_vec[] = {
4636         { zfs_ioc_pool_create, zfs_secpolicy_config, POOL_NAME, B_FALSE,
4637             POOL_CHECK_NONE },
4638         { zfs_ioc_pool_destroy, zfs_secpolicy_config, POOL_NAME, B_FALSE,
4639             POOL_CHECK_NONE },
4640         { zfs_ioc_pool_import, zfs_secpolicy_config, POOL_NAME, B_TRUE,
4641             POOL_CHECK_NONE },
4642         { zfs_ioc_pool_export, zfs_secpolicy_config, POOL_NAME, B_FALSE,
4643             POOL_CHECK_NONE },
4644         { zfs_ioc_pool_configs, zfs_secpolicy_none, NO_NAME, B_FALSE,
4645             POOL_CHECK_NONE },
4646         { zfs_ioc_pool_stats, zfs_secpolicy_read, POOL_NAME, B_FALSE,
4647             POOL_CHECK_NONE },
4648         { zfs_ioc_pool_tryimport, zfs_secpolicy_config, NO_NAME, B_FALSE,
4649             POOL_CHECK_NONE },
4650         { zfs_ioc_pool_scan, zfs_secpolicy_config, POOL_NAME, B_TRUE,
4651             POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
4652         { zfs_ioc_pool_freeze, zfs_secpolicy_config, NO_NAME, B_FALSE,
4653             POOL_CHECK_READONLY },
4654         { zfs_ioc_pool_upgrade, zfs_secpolicy_config, POOL_NAME, B_TRUE,
4655             POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
4656         { zfs_ioc_pool_get_history, zfs_secpolicy_config, POOL_NAME, B_FALSE,
4657             POOL_CHECK_NONE },
4658         { zfs_ioc_vdev_add, zfs_secpolicy_config, POOL_NAME, B_TRUE,
4659             POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
4660         { zfs_ioc_vdev_remove, zfs_secpolicy_config, POOL_NAME, B_TRUE,
4661             POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
4662         { zfs_ioc_vdev_set_state, zfs_secpolicy_config, POOL_NAME, B_TRUE,
4663             POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
4664         { zfs_ioc_vdev_attach, zfs_secpolicy_config, POOL_NAME, B_TRUE,
4665             POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
4666         { zfs_ioc_vdev_detach, zfs_secpolicy_config, POOL_NAME, B_TRUE,
4667             POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
4668         { zfs_ioc_vdev_setpath, zfs_secpolicy_config, POOL_NAME, B_FALSE,
4669             POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
4670         { zfs_ioc_vdev_setfru,  zfs_secpolicy_config, POOL_NAME, B_FALSE,
4671             POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
4672         { zfs_ioc_objset_stats, zfs_secpolicy_read, DATASET_NAME, B_FALSE,
4673             POOL_CHECK_SUSPENDED },
4674         { zfs_ioc_objset_zplprops, zfs_secpolicy_read, DATASET_NAME, B_FALSE,
4675             POOL_CHECK_NONE },
4676         { zfs_ioc_dataset_list_next, zfs_secpolicy_read, DATASET_NAME, B_FALSE,
4677             POOL_CHECK_SUSPENDED },
4678         { zfs_ioc_snapshot_list_next, zfs_secpolicy_read, DATASET_NAME, B_FALSE,
4679             POOL_CHECK_SUSPENDED },
4680         { zfs_ioc_set_prop, zfs_secpolicy_none, DATASET_NAME, B_TRUE,
4681             POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
4682         { zfs_ioc_create, zfs_secpolicy_create, DATASET_NAME, B_TRUE,
4683             POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
4684         { zfs_ioc_destroy, zfs_secpolicy_destroy, DATASET_NAME, B_TRUE,
4685             POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
4686         { zfs_ioc_rollback, zfs_secpolicy_rollback, DATASET_NAME, B_TRUE,
4687             POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
4688         { zfs_ioc_rename, zfs_secpolicy_rename, DATASET_NAME, B_TRUE,
4689             POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
4690         { zfs_ioc_recv, zfs_secpolicy_receive, DATASET_NAME, B_TRUE,
4691             POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
4692         { zfs_ioc_send, zfs_secpolicy_send, DATASET_NAME, B_TRUE,
4693             POOL_CHECK_NONE },
4694         { zfs_ioc_inject_fault, zfs_secpolicy_inject, NO_NAME, B_FALSE,
4695             POOL_CHECK_NONE },
4696         { zfs_ioc_clear_fault, zfs_secpolicy_inject, NO_NAME, B_FALSE,
4697             POOL_CHECK_NONE },
4698         { zfs_ioc_inject_list_next, zfs_secpolicy_inject, NO_NAME, B_FALSE,
4699             POOL_CHECK_NONE },
4700         { zfs_ioc_error_log, zfs_secpolicy_inject, POOL_NAME, B_FALSE,
4701             POOL_CHECK_NONE },
4702         { zfs_ioc_clear, zfs_secpolicy_config, POOL_NAME, B_TRUE,
4703             POOL_CHECK_NONE },
4704         { zfs_ioc_promote, zfs_secpolicy_promote, DATASET_NAME, B_TRUE,
4705             POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
4706         { zfs_ioc_destroy_snaps, zfs_secpolicy_destroy_snaps, DATASET_NAME,
4707             B_TRUE, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
4708         { zfs_ioc_snapshot, zfs_secpolicy_snapshot, DATASET_NAME, B_TRUE,
4709             POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
4710         { zfs_ioc_dsobj_to_dsname, zfs_secpolicy_diff, POOL_NAME, B_FALSE,
4711             POOL_CHECK_NONE },
4712         { zfs_ioc_obj_to_path, zfs_secpolicy_diff, DATASET_NAME, B_FALSE,
4713             POOL_CHECK_SUSPENDED },
4714         { zfs_ioc_pool_set_props, zfs_secpolicy_config, POOL_NAME, B_TRUE,
4715             POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
4716         { zfs_ioc_pool_get_props, zfs_secpolicy_read, POOL_NAME, B_FALSE,
4717             POOL_CHECK_NONE },
4718         { zfs_ioc_set_fsacl, zfs_secpolicy_fsacl, DATASET_NAME, B_TRUE,
4719             POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
4720         { zfs_ioc_get_fsacl, zfs_secpolicy_read, DATASET_NAME, B_FALSE,
4721             POOL_CHECK_NONE },
4722         { zfs_ioc_share, zfs_secpolicy_share, DATASET_NAME, B_FALSE,
4723             POOL_CHECK_NONE },
4724         { zfs_ioc_inherit_prop, zfs_secpolicy_inherit, DATASET_NAME, B_TRUE,
4725             POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
4726         { zfs_ioc_smb_acl, zfs_secpolicy_smb_acl, DATASET_NAME, B_FALSE,
4727             POOL_CHECK_NONE },
4728         { zfs_ioc_userspace_one, zfs_secpolicy_userspace_one, DATASET_NAME,
4729             B_FALSE, POOL_CHECK_NONE },
4730         { zfs_ioc_userspace_many, zfs_secpolicy_userspace_many, DATASET_NAME,
4731             B_FALSE, POOL_CHECK_NONE },
4732         { zfs_ioc_userspace_upgrade, zfs_secpolicy_userspace_upgrade,
4733             DATASET_NAME, B_FALSE, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
4734         { zfs_ioc_hold, zfs_secpolicy_hold, DATASET_NAME, B_TRUE,
4735             POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
4736         { zfs_ioc_release, zfs_secpolicy_release, DATASET_NAME, B_TRUE,
4737             POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
4738         { zfs_ioc_get_holds, zfs_secpolicy_read, DATASET_NAME, B_FALSE,
4739             POOL_CHECK_SUSPENDED },
4740         { zfs_ioc_objset_recvd_props, zfs_secpolicy_read, DATASET_NAME, B_FALSE,
4741             POOL_CHECK_NONE },
4742         { zfs_ioc_vdev_split, zfs_secpolicy_config, POOL_NAME, B_TRUE,
4743             POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
4744         { zfs_ioc_next_obj, zfs_secpolicy_read, DATASET_NAME, B_FALSE,
4745             POOL_CHECK_NONE },
4746         { zfs_ioc_diff, zfs_secpolicy_diff, DATASET_NAME, B_FALSE,
4747             POOL_CHECK_NONE },
4748         { zfs_ioc_tmp_snapshot, zfs_secpolicy_tmp_snapshot, DATASET_NAME,
4749             B_FALSE, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
4750         { zfs_ioc_obj_to_stats, zfs_secpolicy_diff, DATASET_NAME, B_FALSE,
4751             POOL_CHECK_SUSPENDED }
4752 };
4753 
4754 int
4755 pool_status_check(const char *name, zfs_ioc_namecheck_t type,
4756     zfs_ioc_poolcheck_t check)
4757 {
4758         spa_t *spa;
4759         int error;
4760 
4761         ASSERT(type == POOL_NAME || type == DATASET_NAME);
4762 
4763         if (check & POOL_CHECK_NONE)
4764                 return (0);
4765 
4766         error = spa_open(name, &spa, FTAG);
4767         if (error == 0) {
4768                 if ((check & POOL_CHECK_SUSPENDED) && spa_suspended(spa))
4769                         error = EAGAIN;
4770                 else if ((check & POOL_CHECK_READONLY) && !spa_writeable(spa))
4771                         error = EROFS;
4772                 spa_close(spa, FTAG);
4773         }
4774         return (error);
4775 }
4776 
4777 /*
4778  * Find a free minor number.
4779  */
4780 minor_t
4781 zfsdev_minor_alloc(void)
4782 {
4783         static minor_t last_minor;
4784         minor_t m;
4785 
4786         ASSERT(MUTEX_HELD(&zfsdev_state_lock));
4787 
4788         for (m = last_minor + 1; m != last_minor; m++) {
4789                 if (m > ZFSDEV_MAX_MINOR)
4790                         m = 1;
4791                 if (ddi_get_soft_state(zfsdev_state, m) == NULL) {
4792                         last_minor = m;
4793                         return (m);
4794                 }
4795         }
4796 
4797         return (0);
4798 }
4799 
4800 static int
4801 zfs_ctldev_init(dev_t *devp)
4802 {
4803         minor_t minor;
4804         zfs_soft_state_t *zs;
4805 
4806         ASSERT(MUTEX_HELD(&zfsdev_state_lock));
4807         ASSERT(getminor(*devp) == 0);
4808 
4809         minor = zfsdev_minor_alloc();
4810         if (minor == 0)
4811                 return (ENXIO);
4812 
4813         if (ddi_soft_state_zalloc(zfsdev_state, minor) != DDI_SUCCESS)
4814                 return (EAGAIN);
4815 
4816         *devp = makedevice(getemajor(*devp), minor);
4817 
4818         zs = ddi_get_soft_state(zfsdev_state, minor);
4819         zs->zss_type = ZSST_CTLDEV;
4820         zfs_onexit_init((zfs_onexit_t **)&zs->zss_data);
4821 
4822         return (0);
4823 }
4824 
4825 static void
4826 zfs_ctldev_destroy(zfs_onexit_t *zo, minor_t minor)
4827 {
4828         ASSERT(MUTEX_HELD(&zfsdev_state_lock));
4829 
4830         zfs_onexit_destroy(zo);
4831         ddi_soft_state_free(zfsdev_state, minor);
4832 }
4833 
4834 void *
4835 zfsdev_get_soft_state(minor_t minor, enum zfs_soft_state_type which)
4836 {
4837         zfs_soft_state_t *zp;
4838 
4839         zp = ddi_get_soft_state(zfsdev_state, minor);
4840         if (zp == NULL || zp->zss_type != which)
4841                 return (NULL);
4842 
4843         return (zp->zss_data);
4844 }
4845 
4846 static int
4847 zfsdev_open(dev_t *devp, int flag, int otyp, cred_t *cr)
4848 {
4849         int error = 0;
4850 
4851         if (getminor(*devp) != 0)
4852                 return (zvol_open(devp, flag, otyp, cr));
4853 
4854         /* This is the control device. Allocate a new minor if requested. */
4855         if (flag & FEXCL) {
4856                 mutex_enter(&zfsdev_state_lock);
4857                 error = zfs_ctldev_init(devp);
4858                 mutex_exit(&zfsdev_state_lock);
4859         }
4860 
4861         return (error);
4862 }
4863 
4864 static int
4865 zfsdev_close(dev_t dev, int flag, int otyp, cred_t *cr)
4866 {
4867         zfs_onexit_t *zo;
4868         minor_t minor = getminor(dev);
4869 
4870         if (minor == 0)
4871                 return (0);
4872 
4873         mutex_enter(&zfsdev_state_lock);
4874         zo = zfsdev_get_soft_state(minor, ZSST_CTLDEV);
4875         if (zo == NULL) {
4876                 mutex_exit(&zfsdev_state_lock);
4877                 return (zvol_close(dev, flag, otyp, cr));
4878         }
4879         zfs_ctldev_destroy(zo, minor);
4880         mutex_exit(&zfsdev_state_lock);
4881 
4882         return (0);
4883 }
4884 
4885 static int
4886 zfsdev_ioctl(dev_t dev, int cmd, intptr_t arg, int flag, cred_t *cr, int *rvalp)
4887 {
4888         zfs_cmd_t *zc;
4889         uint_t vec;
4890         int error, rc;
4891         minor_t minor = getminor(dev);
4892 
4893         if (minor != 0 &&
4894             zfsdev_get_soft_state(minor, ZSST_CTLDEV) == NULL)
4895                 return (zvol_ioctl(dev, cmd, arg, flag, cr, rvalp));
4896 
4897         vec = cmd - ZFS_IOC;
4898         ASSERT3U(getmajor(dev), ==, ddi_driver_major(zfs_dip));
4899 
4900         if (vec >= sizeof (zfs_ioc_vec) / sizeof (zfs_ioc_vec[0]))
4901                 return (EINVAL);
4902 
4903         zc = kmem_zalloc(sizeof (zfs_cmd_t), KM_SLEEP);
4904 
4905         error = ddi_copyin((void *)arg, zc, sizeof (zfs_cmd_t), flag);
4906         if (error != 0)
4907                 error = EFAULT;
4908 
4909         if ((error == 0) && !(flag & FKIOCTL))
4910                 error = zfs_ioc_vec[vec].zvec_secpolicy(zc, cr);
4911 
4912         /*
4913          * Ensure that all pool/dataset names are valid before we pass down to
4914          * the lower layers.
4915          */
4916         if (error == 0) {
4917                 zc->zc_name[sizeof (zc->zc_name) - 1] = '\0';
4918                 zc->zc_iflags = flag & FKIOCTL;
4919                 switch (zfs_ioc_vec[vec].zvec_namecheck) {
4920                 case POOL_NAME:
4921                         if (pool_namecheck(zc->zc_name, NULL, NULL) != 0)
4922                                 error = EINVAL;
4923                         error = pool_status_check(zc->zc_name,
4924                             zfs_ioc_vec[vec].zvec_namecheck,
4925                             zfs_ioc_vec[vec].zvec_pool_check);
4926                         break;
4927 
4928                 case DATASET_NAME:
4929                         if (dataset_namecheck(zc->zc_name, NULL, NULL) != 0)
4930                                 error = EINVAL;
4931                         error = pool_status_check(zc->zc_name,
4932                             zfs_ioc_vec[vec].zvec_namecheck,
4933                             zfs_ioc_vec[vec].zvec_pool_check);
4934                         break;
4935 
4936                 case NO_NAME:
4937                         break;
4938                 }
4939         }
4940 
4941         if (error == 0)
4942                 error = zfs_ioc_vec[vec].zvec_func(zc);
4943 
4944         rc = ddi_copyout(zc, (void *)arg, sizeof (zfs_cmd_t), flag);
4945         if (error == 0) {
4946                 if (rc != 0)
4947                         error = EFAULT;
4948                 if (zfs_ioc_vec[vec].zvec_his_log)
4949                         zfs_log_history(zc);
4950         }
4951 
4952         kmem_free(zc, sizeof (zfs_cmd_t));
4953         return (error);
4954 }
4955 
4956 static int
4957 zfs_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
4958 {
4959         if (cmd != DDI_ATTACH)
4960                 return (DDI_FAILURE);
4961 
4962         if (ddi_create_minor_node(dip, "zfs", S_IFCHR, 0,
4963             DDI_PSEUDO, 0) == DDI_FAILURE)
4964                 return (DDI_FAILURE);
4965 
4966         zfs_dip = dip;
4967 
4968         ddi_report_dev(dip);
4969 
4970         return (DDI_SUCCESS);
4971 }
4972 
4973 static int
4974 zfs_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
4975 {
4976         if (spa_busy() || zfs_busy() || zvol_busy())
4977                 return (DDI_FAILURE);
4978 
4979         if (cmd != DDI_DETACH)
4980                 return (DDI_FAILURE);
4981 
4982         zfs_dip = NULL;
4983 
4984         ddi_prop_remove_all(dip);
4985         ddi_remove_minor_node(dip, NULL);
4986 
4987         return (DDI_SUCCESS);
4988 }
4989 
4990 /*ARGSUSED*/
4991 static int
4992 zfs_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
4993 {
4994         switch (infocmd) {
4995         case DDI_INFO_DEVT2DEVINFO:
4996                 *result = zfs_dip;
4997                 return (DDI_SUCCESS);
4998 
4999         case DDI_INFO_DEVT2INSTANCE:
5000                 *result = (void *)0;
5001                 return (DDI_SUCCESS);
5002         }
5003 
5004         return (DDI_FAILURE);
5005 }
5006 
5007 /*
5008  * OK, so this is a little weird.
5009  *
5010  * /dev/zfs is the control node, i.e. minor 0.
5011  * /dev/zvol/[r]dsk/pool/dataset are the zvols, minor > 0.
5012  *
5013  * /dev/zfs has basically nothing to do except serve up ioctls,
5014  * so most of the standard driver entry points are in zvol.c.
5015  */
5016 static struct cb_ops zfs_cb_ops = {
5017         zfsdev_open,    /* open */
5018         zfsdev_close,   /* close */
5019         zvol_strategy,  /* strategy */
5020         nodev,          /* print */
5021         zvol_dump,      /* dump */
5022         zvol_read,      /* read */
5023         zvol_write,     /* write */
5024         zfsdev_ioctl,   /* ioctl */
5025         nodev,          /* devmap */
5026         nodev,          /* mmap */
5027         nodev,          /* segmap */
5028         nochpoll,       /* poll */
5029         ddi_prop_op,    /* prop_op */
5030         NULL,           /* streamtab */
5031         D_NEW | D_MP | D_64BIT,         /* Driver compatibility flag */
5032         CB_REV,         /* version */
5033         nodev,          /* async read */
5034         nodev,          /* async write */
5035 };
5036 
5037 static struct dev_ops zfs_dev_ops = {
5038         DEVO_REV,       /* version */
5039         0,              /* refcnt */
5040         zfs_info,       /* info */
5041         nulldev,        /* identify */
5042         nulldev,        /* probe */
5043         zfs_attach,     /* attach */
5044         zfs_detach,     /* detach */
5045         nodev,          /* reset */
5046         &zfs_cb_ops,        /* driver operations */
5047         NULL,           /* no bus operations */
5048         NULL,           /* power */
5049         ddi_quiesce_not_needed, /* quiesce */
5050 };
5051 
5052 static struct modldrv zfs_modldrv = {
5053         &mod_driverops,
5054         "ZFS storage pool",
5055         &zfs_dev_ops
5056 };
5057 
5058 static struct modlinkage modlinkage = {
5059         MODREV_1,
5060         (void *)&zfs_modlfs,
5061         (void *)&zfs_modldrv,
5062         NULL
5063 };
5064 
5065 
5066 uint_t zfs_fsyncer_key;
5067 extern uint_t rrw_tsd_key;
5068 
5069 int
5070 _init(void)
5071 {
5072         int error;
5073 
5074         spa_init(FREAD | FWRITE);
5075         zfs_init();
5076         zvol_init();
5077 
5078         if ((error = mod_install(&modlinkage)) != 0) {
5079                 zvol_fini();
5080                 zfs_fini();
5081                 spa_fini();
5082                 return (error);
5083         }
5084 
5085         tsd_create(&zfs_fsyncer_key, NULL);
5086         tsd_create(&rrw_tsd_key, NULL);
5087 
5088         error = ldi_ident_from_mod(&modlinkage, &zfs_li);
5089         ASSERT(error == 0);
5090         mutex_init(&zfs_share_lock, NULL, MUTEX_DEFAULT, NULL);
5091 
5092         return (0);
5093 }
5094 
5095 int
5096 _fini(void)
5097 {
5098         int error;
5099 
5100         if (spa_busy() || zfs_busy() || zvol_busy() || zio_injection_enabled)
5101                 return (EBUSY);
5102 
5103         if ((error = mod_remove(&modlinkage)) != 0)
5104                 return (error);
5105 
5106         zvol_fini();
5107         zfs_fini();
5108         spa_fini();
5109         if (zfs_nfsshare_inited)
5110                 (void) ddi_modclose(nfs_mod);
5111         if (zfs_smbshare_inited)
5112                 (void) ddi_modclose(smbsrv_mod);
5113         if (zfs_nfsshare_inited || zfs_smbshare_inited)
5114                 (void) ddi_modclose(sharefs_mod);
5115 
5116         tsd_destroy(&zfs_fsyncer_key);
5117         ldi_ident_release(zfs_li);
5118         zfs_li = NULL;
5119         mutex_destroy(&zfs_share_lock);
5120 
5121         return (error);
5122 }
5123 
5124 int
5125 _info(struct modinfo *modinfop)
5126 {
5127         return (mod_info(&modlinkage, modinfop));
5128 }