4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  23  * Copyright (c) 2011, 2016 by Delphix. All rights reserved.
  24  * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
  25  * Copyright (c) 2013 Steven Hartland. All rights reserved.
  26  * Copyright (c) 2014 Integros [integros.com]
  27  * Copyright 2017 Joyent, Inc.
  28  * Copyright 2017 RackTop Systems.
  29  */
  30 
  31 /*
  32  * The objective of this program is to provide a DMU/ZAP/SPA stress test
  33  * that runs entirely in userland, is easy to use, and easy to extend.
  34  *
  35  * The overall design of the ztest program is as follows:
  36  *
  37  * (1) For each major functional area (e.g. adding vdevs to a pool,
  38  *     creating and destroying datasets, reading and writing objects, etc)
  39  *     we have a simple routine to test that functionality.  These
  40  *     individual routines do not have to do anything "stressful".
  41  *
  42  * (2) We turn these simple functionality tests into a stress test by
  43  *     running them all in parallel, with as many threads as desired,
  44  *     and spread across as many datasets, objects, and vdevs as desired.
 
 
  96 #include <sys/stat.h>
  97 #include <sys/time.h>
  98 #include <sys/wait.h>
  99 #include <sys/mman.h>
 100 #include <sys/resource.h>
 101 #include <sys/zio.h>
 102 #include <sys/zil.h>
 103 #include <sys/zil_impl.h>
 104 #include <sys/vdev_impl.h>
 105 #include <sys/vdev_file.h>
 106 #include <sys/spa_impl.h>
 107 #include <sys/metaslab_impl.h>
 108 #include <sys/dsl_prop.h>
 109 #include <sys/dsl_dataset.h>
 110 #include <sys/dsl_destroy.h>
 111 #include <sys/dsl_scan.h>
 112 #include <sys/zio_checksum.h>
 113 #include <sys/refcount.h>
 114 #include <sys/zfeature.h>
 115 #include <sys/dsl_userhold.h>
 116 #include <sys/abd.h>
 117 #include <stdio.h>
 118 #include <stdio_ext.h>
 119 #include <stdlib.h>
 120 #include <unistd.h>
 121 #include <signal.h>
 122 #include <umem.h>
 123 #include <dlfcn.h>
 124 #include <ctype.h>
 125 #include <math.h>
 126 #include <sys/fs/zfs.h>
 127 #include <libnvpair.h>
 128 #include <libcmdutils.h>
 129 
 130 static int ztest_fd_data = -1;
 131 static int ztest_fd_rand = -1;
 132 
 133 typedef struct ztest_shared_hdr {
 134         uint64_t        zh_hdr_size;
 135         uint64_t        zh_opts_size;
 136         uint64_t        zh_size;
 137         uint64_t        zh_stats_size;
 138         uint64_t        zh_stats_count;
 139         uint64_t        zh_ds_size;
 140         uint64_t        zh_ds_count;
 141 } ztest_shared_hdr_t;
 142 
 143 static ztest_shared_hdr_t *ztest_shared_hdr;
 144 
 145 typedef struct ztest_shared_opts {
 146         char zo_pool[ZFS_MAX_DATASET_NAME_LEN];
 147         char zo_dir[ZFS_MAX_DATASET_NAME_LEN];
 148         char zo_alt_ztest[MAXNAMELEN];
 149         char zo_alt_libpath[MAXNAMELEN];
 
 311 #define ZTEST_GET_SHARED_CALLSTATE(c) (&ztest_shared_callstate[c])
 312 
 313 /*
 314  * Note: these aren't static because we want dladdr() to work.
 315  */
 316 ztest_func_t ztest_dmu_read_write;
 317 ztest_func_t ztest_dmu_write_parallel;
 318 ztest_func_t ztest_dmu_object_alloc_free;
 319 ztest_func_t ztest_dmu_commit_callbacks;
 320 ztest_func_t ztest_zap;
 321 ztest_func_t ztest_zap_parallel;
 322 ztest_func_t ztest_zil_commit;
 323 ztest_func_t ztest_zil_remount;
 324 ztest_func_t ztest_dmu_read_write_zcopy;
 325 ztest_func_t ztest_dmu_objset_create_destroy;
 326 ztest_func_t ztest_dmu_prealloc;
 327 ztest_func_t ztest_fzap;
 328 ztest_func_t ztest_dmu_snapshot_create_destroy;
 329 ztest_func_t ztest_dsl_prop_get_set;
 330 ztest_func_t ztest_spa_prop_get_set;
 331 ztest_func_t ztest_spa_create_destroy;
 332 ztest_func_t ztest_fault_inject;
 333 ztest_func_t ztest_ddt_repair;
 334 ztest_func_t ztest_dmu_snapshot_hold;
 335 ztest_func_t ztest_spa_rename;
 336 ztest_func_t ztest_scrub;
 337 ztest_func_t ztest_dsl_dataset_promote_busy;
 338 ztest_func_t ztest_vdev_attach_detach;
 339 ztest_func_t ztest_vdev_LUN_growth;
 340 ztest_func_t ztest_vdev_add_remove;
 341 ztest_func_t ztest_vdev_aux_add_remove;
 342 ztest_func_t ztest_split_pool;
 343 ztest_func_t ztest_reguid;
 344 ztest_func_t ztest_spa_upgrade;
 345 ztest_func_t ztest_device_removal;
 346 ztest_func_t ztest_remap_blocks;
 347 
 348 uint64_t zopt_always = 0ULL * NANOSEC;          /* all the time */
 349 uint64_t zopt_incessant = 1ULL * NANOSEC / 10;  /* every 1/10 second */
 350 uint64_t zopt_often = 1ULL * NANOSEC;           /* every second */
 351 uint64_t zopt_sometimes = 10ULL * NANOSEC;      /* every 10 seconds */
 352 uint64_t zopt_rarely = 60ULL * NANOSEC;         /* every 60 seconds */
 353 
 354 ztest_info_t ztest_info[] = {
 355         { ztest_dmu_read_write,                 1,      &zopt_always        },
 356         { ztest_dmu_write_parallel,             10,     &zopt_always        },
 357         { ztest_dmu_object_alloc_free,          1,      &zopt_always        },
 358         { ztest_dmu_commit_callbacks,           1,      &zopt_always        },
 359         { ztest_zap,                            30,     &zopt_always        },
 360         { ztest_zap_parallel,                   100,    &zopt_always        },
 361         { ztest_split_pool,                     1,      &zopt_always        },
 362         { ztest_zil_commit,                     1,      &zopt_incessant     },
 363         { ztest_zil_remount,                    1,      &zopt_sometimes     },
 364         { ztest_dmu_read_write_zcopy,           1,      &zopt_often },
 365         { ztest_dmu_objset_create_destroy,      1,      &zopt_often },
 366         { ztest_dsl_prop_get_set,               1,      &zopt_often },
 367         { ztest_spa_prop_get_set,               1,      &zopt_sometimes     },
 368 #if 0
 369         { ztest_dmu_prealloc,                   1,      &zopt_sometimes     },
 370 #endif
 371         { ztest_fzap,                           1,      &zopt_sometimes     },
 372         { ztest_dmu_snapshot_create_destroy,    1,      &zopt_sometimes     },
 373         { ztest_spa_create_destroy,             1,      &zopt_sometimes     },
 374         { ztest_fault_inject,                   1,      &zopt_sometimes     },
 375         { ztest_ddt_repair,                     1,      &zopt_sometimes     },
 376         { ztest_dmu_snapshot_hold,              1,      &zopt_sometimes     },
 377         { ztest_reguid,                         1,      &zopt_rarely        },
 378         { ztest_spa_rename,                     1,      &zopt_rarely        },
 379         { ztest_scrub,                          1,      &zopt_rarely        },
 380         { ztest_spa_upgrade,                    1,      &zopt_rarely        },
 381         { ztest_dsl_dataset_promote_busy,       1,      &zopt_rarely        },
 382         { ztest_vdev_attach_detach,             1,      &zopt_sometimes     },
 383         { ztest_vdev_LUN_growth,                1,      &zopt_rarely        },
 384         { ztest_vdev_add_remove,                1,
 385             &ztest_opts.zo_vdevtime                         },
 386         { ztest_vdev_aux_add_remove,            1,
 387             &ztest_opts.zo_vdevtime                         },
 388         { ztest_device_removal,                 1,      &zopt_sometimes     },
 389         { ztest_remap_blocks,                   1,      &zopt_sometimes }
 390 };
 391 
 392 #define ZTEST_FUNCS     (sizeof (ztest_info) / sizeof (ztest_info_t))
 393 
 394 /*
 395  * The following struct is used to hold a list of uncalled commit callbacks.
 396  * The callbacks are ordered by txg number.
 397  */
 398 typedef struct ztest_cb_list {
 399         kmutex_t zcl_callbacks_lock;
 400         list_t  zcl_callbacks;
 401 } ztest_cb_list_t;
 402 
 403 /*
 404  * Stuff we need to share writably between parent and child.
 405  */
 406 typedef struct ztest_shared {
 407         boolean_t       zs_do_init;
 408         hrtime_t        zs_proc_start;
 409         hrtime_t        zs_proc_stop;
 
 417         uint64_t        zs_space;
 418         uint64_t        zs_splits;
 419         uint64_t        zs_mirrors;
 420         uint64_t        zs_metaslab_sz;
 421         uint64_t        zs_metaslab_df_alloc_threshold;
 422         uint64_t        zs_guid;
 423 } ztest_shared_t;
 424 
 425 #define ID_PARALLEL     -1ULL
 426 
 427 static char ztest_dev_template[] = "%s/%s.%llua";
 428 static char ztest_aux_template[] = "%s/%s.%s.%llu";
 429 ztest_shared_t *ztest_shared;
 430 
 431 static spa_t *ztest_spa = NULL;
 432 static ztest_ds_t *ztest_ds;
 433 
 434 static kmutex_t ztest_vdev_lock;
 435 
 436 /*
 437  * The ztest_name_lock protects the pool and dataset namespace used by
 438  * the individual tests. To modify the namespace, consumers must grab
 439  * this lock as writer. Grabbing the lock as reader will ensure that the
 440  * namespace does not change while the lock is held.
 441  */
 442 static krwlock_t ztest_name_lock;
 443 
 444 static boolean_t ztest_dump_core = B_TRUE;
 445 static boolean_t ztest_exiting;
 446 
 447 /* Global commit callback list */
 448 static ztest_cb_list_t zcl;
 449 
 450 enum ztest_object {
 451         ZTEST_META_DNODE = 0,
 452         ZTEST_DIROBJ,
 453         ZTEST_OBJECTS
 454 };
 455 
 456 static void usage(boolean_t) __NORETURN;
 
 772                             zo->zo_alt_ztest);
 773                 } else if (0 != access(zo->zo_alt_libpath, X_OK)) {
 774                         ztest_dump_core = B_FALSE;
 775                         fatal(B_TRUE, "invalid alternate lib directory %s",
 776                             zo->zo_alt_libpath);
 777                 }
 778 
 779                 umem_free(cmd, MAXPATHLEN);
 780                 umem_free(realaltdir, MAXPATHLEN);
 781         }
 782 }
 783 
 784 static void
 785 ztest_kill(ztest_shared_t *zs)
 786 {
 787         zs->zs_alloc = metaslab_class_get_alloc(spa_normal_class(ztest_spa));
 788         zs->zs_space = metaslab_class_get_space(spa_normal_class(ztest_spa));
 789 
 790         /*
 791          * Before we kill off ztest, make sure that the config is updated.
 792          * See comment above spa_write_cachefile().
 793          */
 794         mutex_enter(&spa_namespace_lock);
 795         spa_write_cachefile(ztest_spa, B_FALSE, B_FALSE);
 796         mutex_exit(&spa_namespace_lock);
 797 
 798         zfs_dbgmsg_print(FTAG);
 799         (void) kill(getpid(), SIGKILL);
 800 }
 801 
 802 static uint64_t
 803 ztest_random(uint64_t range)
 804 {
 805         uint64_t r;
 806 
 807         ASSERT3S(ztest_fd_rand, >=, 0);
 808 
 809         if (range == 0)
 810                 return (0);
 811 
 812         if (read(ztest_fd_rand, &r, sizeof (r)) != sizeof (r))
 813                 fatal(1, "short read from /dev/urandom");
 814 
 815         return (r % range);
 816 }
 817 
 818 /* ARGSUSED */
 819 static void
 820 ztest_record_enospc(const char *s)
 821 {
 822         ztest_shared->zs_enospc_count++;
 823 }
 824 
 825 static uint64_t
 826 ztest_get_ashift(void)
 827 {
 828         if (ztest_opts.zo_ashift == 0)
 829                 return (SPA_MINBLOCKSHIFT + ztest_random(5));
 830         return (ztest_opts.zo_ashift);
 831 }
 832 
 833 static nvlist_t *
 834 make_vdev_file(char *path, char *aux, char *pool, size_t size, uint64_t ashift)
 835 {
 836         char pathbuf[MAXPATHLEN];
 837         uint64_t vdev;
 838         nvlist_t *file;
 839 
 840         if (ashift == 0)
 841                 ashift = ztest_get_ashift();
 842 
 843         if (path == NULL) {
 844                 path = pathbuf;
 845 
 846                 if (aux != NULL) {
 847                         vdev = ztest_shared->zs_vdev_aux;
 848                         (void) snprintf(path, sizeof (pathbuf),
 849                             ztest_aux_template, ztest_opts.zo_dir,
 850                             pool == NULL ? ztest_opts.zo_pool : pool,
 851                             aux, vdev);
 852                 } else {
 853                         vdev = ztest_shared->zs_vdev_next_leaf++;
 854                         (void) snprintf(path, sizeof (pathbuf),
 855                             ztest_dev_template, ztest_opts.zo_dir,
 856                             pool == NULL ? ztest_opts.zo_pool : pool, vdev);
 857                 }
 858         }
 859 
 860         if (size != 0) {
 861                 int fd = open(path, O_RDWR | O_CREAT | O_TRUNC, 0666);
 862                 if (fd == -1)
 863                         fatal(1, "can't open %s", path);
 864                 if (ftruncate(fd, size) != 0)
 865                         fatal(1, "can't ftruncate %s", path);
 866                 (void) close(fd);
 867         }
 868 
 869         VERIFY(nvlist_alloc(&file, NV_UNIQUE_NAME, 0) == 0);
 870         VERIFY(nvlist_add_string(file, ZPOOL_CONFIG_TYPE, VDEV_TYPE_FILE) == 0);
 871         VERIFY(nvlist_add_string(file, ZPOOL_CONFIG_PATH, path) == 0);
 872         VERIFY(nvlist_add_uint64(file, ZPOOL_CONFIG_ASHIFT, ashift) == 0);
 873 
 874         return (file);
 875 }
 876 
 877 static nvlist_t *
 878 make_vdev_raidz(char *path, char *aux, char *pool, size_t size,
 879     uint64_t ashift, int r)
 880 {
 881         nvlist_t *raidz, **child;
 882         int c;
 883 
 884         if (r < 2)
 885                 return (make_vdev_file(path, aux, pool, size, ashift));
 886         child = umem_alloc(r * sizeof (nvlist_t *), UMEM_NOFAIL);
 887 
 888         for (c = 0; c < r; c++)
 889                 child[c] = make_vdev_file(path, aux, pool, size, ashift);
 890 
 891         VERIFY(nvlist_alloc(&raidz, NV_UNIQUE_NAME, 0) == 0);
 892         VERIFY(nvlist_add_string(raidz, ZPOOL_CONFIG_TYPE,
 893             VDEV_TYPE_RAIDZ) == 0);
 894         VERIFY(nvlist_add_uint64(raidz, ZPOOL_CONFIG_NPARITY,
 895             ztest_opts.zo_raidz_parity) == 0);
 896         VERIFY(nvlist_add_nvlist_array(raidz, ZPOOL_CONFIG_CHILDREN,
 897             child, r) == 0);
 898 
 899         for (c = 0; c < r; c++)
 900                 nvlist_free(child[c]);
 901 
 902         umem_free(child, r * sizeof (nvlist_t *));
 903 
 904         return (raidz);
 905 }
 906 
 907 static nvlist_t *
 908 make_vdev_mirror(char *path, char *aux, char *pool, size_t size,
 909     uint64_t ashift, int r, int m)
 910 {
 911         nvlist_t *mirror, **child;
 912         int c;
 913 
 914         if (m < 1)
 915                 return (make_vdev_raidz(path, aux, pool, size, ashift, r));
 916 
 917         child = umem_alloc(m * sizeof (nvlist_t *), UMEM_NOFAIL);
 918 
 919         for (c = 0; c < m; c++)
 920                 child[c] = make_vdev_raidz(path, aux, pool, size, ashift, r);
 921 
 922         VERIFY(nvlist_alloc(&mirror, NV_UNIQUE_NAME, 0) == 0);
 923         VERIFY(nvlist_add_string(mirror, ZPOOL_CONFIG_TYPE,
 924             VDEV_TYPE_MIRROR) == 0);
 925         VERIFY(nvlist_add_nvlist_array(mirror, ZPOOL_CONFIG_CHILDREN,
 926             child, m) == 0);
 927 
 928         for (c = 0; c < m; c++)
 929                 nvlist_free(child[c]);
 930 
 931         umem_free(child, m * sizeof (nvlist_t *));
 932 
 933         return (mirror);
 934 }
 935 
 936 static nvlist_t *
 937 make_vdev_root(char *path, char *aux, char *pool, size_t size, uint64_t ashift,
 938     int log, int r, int m, int t)
 939 {
 940         nvlist_t *root, **child;
 941         int c;
 942 
 943         ASSERT(t > 0);
 944 
 945         child = umem_alloc(t * sizeof (nvlist_t *), UMEM_NOFAIL);
 946 
 947         for (c = 0; c < t; c++) {
 948                 child[c] = make_vdev_mirror(path, aux, pool, size, ashift,
 949                     r, m);
 950                 VERIFY(nvlist_add_uint64(child[c], ZPOOL_CONFIG_IS_LOG,
 951                     log) == 0);
 952         }
 953 
 954         VERIFY(nvlist_alloc(&root, NV_UNIQUE_NAME, 0) == 0);
 955         VERIFY(nvlist_add_string(root, ZPOOL_CONFIG_TYPE, VDEV_TYPE_ROOT) == 0);
 956         VERIFY(nvlist_add_nvlist_array(root, aux ? aux : ZPOOL_CONFIG_CHILDREN,
 957             child, t) == 0);
 958 
 959         for (c = 0; c < t; c++)
 960                 nvlist_free(child[c]);
 961 
 962         umem_free(child, t * sizeof (nvlist_t *));
 963 
 964         return (root);
 965 }
 966 
 967 /*
 968  * Find a random spa version. Returns back a random spa version in the
 969  * range [initial_version, SPA_VERSION_FEATURES].
 970  */
 971 static uint64_t
 972 ztest_random_spa_version(uint64_t initial_version)
 973 {
 974         uint64_t version = initial_version;
 975 
 976         if (version <= SPA_VERSION_BEFORE_FEATURES) {
 977                 version = version +
 978                     ztest_random(SPA_VERSION_BEFORE_FEATURES - version + 1);
 979         }
 980 
 981         if (version > SPA_VERSION_BEFORE_FEATURES)
 982                 version = SPA_VERSION_FEATURES;
 983 
 984         ASSERT(SPA_VERSION_IS_SUPPORTED(version));
 985         return (version);
 986 }
 987 
 
1002 
1003 static int
1004 ztest_random_ibshift(void)
1005 {
1006         return (DN_MIN_INDBLKSHIFT +
1007             ztest_random(DN_MAX_INDBLKSHIFT - DN_MIN_INDBLKSHIFT + 1));
1008 }
1009 
1010 static uint64_t
1011 ztest_random_vdev_top(spa_t *spa, boolean_t log_ok)
1012 {
1013         uint64_t top;
1014         vdev_t *rvd = spa->spa_root_vdev;
1015         vdev_t *tvd;
1016 
1017         ASSERT(spa_config_held(spa, SCL_ALL, RW_READER) != 0);
1018 
1019         do {
1020                 top = ztest_random(rvd->vdev_children);
1021                 tvd = rvd->vdev_child[top];
1022         } while (!vdev_is_concrete(tvd) || (tvd->vdev_islog && !log_ok) ||
1023             tvd->vdev_mg == NULL || tvd->vdev_mg->mg_class == NULL);
1024 
1025         return (top);
1026 }
1027 
1028 static uint64_t
1029 ztest_random_dsl_prop(zfs_prop_t prop)
1030 {
1031         uint64_t value;
1032 
1033         do {
1034                 value = zfs_prop_random_value(prop, ztest_random(-1ULL));
1035         } while (prop == ZFS_PROP_CHECKSUM && value == ZIO_CHECKSUM_OFF);
1036 
1037         return (value);
1038 }
1039 
1040 static int
1041 ztest_dsl_prop_set_uint64(char *osname, zfs_prop_t prop, uint64_t value,
1042     boolean_t inherit)
 
1817         NULL,                   /* TX_RMDIR */
1818         NULL,                   /* TX_LINK */
1819         NULL,                   /* TX_RENAME */
1820         ztest_replay_write,     /* TX_WRITE */
1821         ztest_replay_truncate,  /* TX_TRUNCATE */
1822         ztest_replay_setattr,   /* TX_SETATTR */
1823         NULL,                   /* TX_ACL */
1824         NULL,                   /* TX_CREATE_ACL */
1825         NULL,                   /* TX_CREATE_ATTR */
1826         NULL,                   /* TX_CREATE_ACL_ATTR */
1827         NULL,                   /* TX_MKDIR_ACL */
1828         NULL,                   /* TX_MKDIR_ATTR */
1829         NULL,                   /* TX_MKDIR_ACL_ATTR */
1830         NULL,                   /* TX_WRITE2 */
1831 };
1832 
1833 /*
1834  * ZIL get_data callbacks
1835  */
1836 
1837 static void
1838 ztest_get_done(zgd_t *zgd, int error)
1839 {
1840         ztest_ds_t *zd = zgd->zgd_private;
1841         uint64_t object = zgd->zgd_rl->rl_object;
1842 
1843         if (zgd->zgd_db)
1844                 dmu_buf_rele(zgd->zgd_db, zgd);
1845 
1846         ztest_range_unlock(zgd->zgd_rl);
1847         ztest_object_unlock(zd, object);
1848 
1849         if (error == 0 && zgd->zgd_bp)
1850                 zil_lwb_add_block(zgd->zgd_lwb, zgd->zgd_bp);
1851 
1852         umem_free(zgd, sizeof (*zgd));
1853 }
1854 
1855 static int
1856 ztest_get_data(void *arg, lr_write_t *lr, char *buf, struct lwb *lwb,
1857     zio_t *zio)
1858 {
1859         ztest_ds_t *zd = arg;
1860         objset_t *os = zd->zd_os;
1861         uint64_t object = lr->lr_foid;
1862         uint64_t offset = lr->lr_offset;
1863         uint64_t size = lr->lr_length;
1864         uint64_t txg = lr->lr_common.lrc_txg;
1865         uint64_t crtxg;
1866         dmu_object_info_t doi;
1867         dmu_buf_t *db;
1868         zgd_t *zgd;
1869         int error;
1870 
1871         ASSERT3P(lwb, !=, NULL);
 
2359 
2360         rw_exit(&zd->zd_zilog_lock);
2361         mutex_exit(&zd->zd_dirobj_lock);
2362 }
2363 
2364 /*
2365  * Verify that we can't destroy an active pool, create an existing pool,
2366  * or create a pool with a bad vdev spec.
2367  */
2368 /* ARGSUSED */
2369 void
2370 ztest_spa_create_destroy(ztest_ds_t *zd, uint64_t id)
2371 {
2372         ztest_shared_opts_t *zo = &ztest_opts;
2373         spa_t *spa;
2374         nvlist_t *nvroot;
2375 
2376         /*
2377          * Attempt to create using a bad file.
2378          */
2379         nvroot = make_vdev_root("/dev/bogus", NULL, NULL, 0, 0, 0, 0, 0, 1);
2380         VERIFY3U(ENOENT, ==,
2381             spa_create("ztest_bad_file", nvroot, NULL, NULL));
2382         nvlist_free(nvroot);
2383 
2384         /*
2385          * Attempt to create using a bad mirror.
2386          */
2387         nvroot = make_vdev_root("/dev/bogus", NULL, NULL, 0, 0, 0, 0, 2, 1);
2388         VERIFY3U(ENOENT, ==,
2389             spa_create("ztest_bad_mirror", nvroot, NULL, NULL));
2390         nvlist_free(nvroot);
2391 
2392         /*
2393          * Attempt to create an existing pool.  It shouldn't matter
2394          * what's in the nvroot; we should fail with EEXIST.
2395          */
2396         rw_enter(&ztest_name_lock, RW_READER);
2397         nvroot = make_vdev_root("/dev/bogus", NULL, NULL, 0, 0, 0, 0, 0, 1);
2398         VERIFY3U(EEXIST, ==, spa_create(zo->zo_pool, nvroot, NULL, NULL));
2399         nvlist_free(nvroot);
2400         VERIFY3U(0, ==, spa_open(zo->zo_pool, &spa, FTAG));
2401         VERIFY3U(EBUSY, ==, spa_destroy(zo->zo_pool));
2402         spa_close(spa, FTAG);
2403 
2404         rw_exit(&ztest_name_lock);
2405 }
2406 
2407 /* ARGSUSED */
2408 void
2409 ztest_spa_upgrade(ztest_ds_t *zd, uint64_t id)
2410 {
2411         spa_t *spa;
2412         uint64_t initial_version = SPA_VERSION_INITIAL;
2413         uint64_t version, newversion;
2414         nvlist_t *nvroot, *props;
2415         char *name;
2416 
2417         mutex_enter(&ztest_vdev_lock);
2418         name = kmem_asprintf("%s_upgrade", ztest_opts.zo_pool);
2419 
2420         /*
2421          * Clean up from previous runs.
2422          */
2423         (void) spa_destroy(name);
2424 
2425         nvroot = make_vdev_root(NULL, NULL, name, ztest_opts.zo_vdev_size, 0,
2426             0, ztest_opts.zo_raidz, ztest_opts.zo_mirrors, 1);
2427 
2428         /*
2429          * If we're configuring a RAIDZ device then make sure that the
2430          * the initial version is capable of supporting that feature.
2431          */
2432         switch (ztest_opts.zo_raidz_parity) {
2433         case 0:
2434         case 1:
2435                 initial_version = SPA_VERSION_INITIAL;
2436                 break;
2437         case 2:
2438                 initial_version = SPA_VERSION_RAIDZ2;
2439                 break;
2440         case 3:
2441                 initial_version = SPA_VERSION_RAIDZ3;
2442                 break;
2443         }
2444 
2445         /*
2446          * Create a pool with a spa version that can be upgraded. Pick
 
2550                  * and destroying a dataset. Removing the slog will
2551                  * grab a reference on the dataset which may cause
2552                  * dmu_objset_destroy() to fail with EBUSY thus
2553                  * leaving the dataset in an inconsistent state.
2554                  */
2555                 rw_enter(&ztest_name_lock, RW_WRITER);
2556                 error = spa_vdev_remove(spa, guid, B_FALSE);
2557                 rw_exit(&ztest_name_lock);
2558 
2559                 if (error && error != EEXIST)
2560                         fatal(0, "spa_vdev_remove() = %d", error);
2561         } else {
2562                 spa_config_exit(spa, SCL_VDEV, FTAG);
2563 
2564                 /*
2565                  * Make 1/4 of the devices be log devices.
2566                  */
2567                 nvroot = make_vdev_root(NULL, NULL, NULL,
2568                     ztest_opts.zo_vdev_size, 0,
2569                     ztest_random(4) == 0, ztest_opts.zo_raidz,
2570                     zs->zs_mirrors, 1);
2571 
2572                 error = spa_vdev_add(spa, nvroot);
2573                 nvlist_free(nvroot);
2574 
2575                 if (error == ENOSPC)
2576                         ztest_record_enospc("spa_vdev_add");
2577                 else if (error != 0)
2578                         fatal(0, "spa_vdev_add() = %d", error);
2579         }
2580 
2581         mutex_exit(&ztest_vdev_lock);
2582 }
2583 
2584 /*
2585  * Verify that adding/removing aux devices (l2arc, hot spare) works as expected.
2586  */
2587 /* ARGSUSED */
2588 void
2589 ztest_vdev_aux_add_remove(ztest_ds_t *zd, uint64_t id)
2590 {
 
2625                             ztest_opts.zo_dir, ztest_opts.zo_pool, aux,
2626                             zs->zs_vdev_aux);
2627                         for (c = 0; c < sav->sav_count; c++)
2628                                 if (strcmp(sav->sav_vdevs[c]->vdev_path,
2629                                     path) == 0)
2630                                         break;
2631                         if (c == sav->sav_count &&
2632                             vdev_lookup_by_path(rvd, path) == NULL)
2633                                 break;
2634                         zs->zs_vdev_aux++;
2635                 }
2636         }
2637 
2638         spa_config_exit(spa, SCL_VDEV, FTAG);
2639 
2640         if (guid == 0) {
2641                 /*
2642                  * Add a new device.
2643                  */
2644                 nvlist_t *nvroot = make_vdev_root(NULL, aux, NULL,
2645                     (ztest_opts.zo_vdev_size * 5) / 4, 0, 0, 0, 0, 1);
2646                 error = spa_vdev_add(spa, nvroot);
2647                 if (error != 0)
2648                         fatal(0, "spa_vdev_add(%p) = %d", nvroot, error);
2649                 nvlist_free(nvroot);
2650         } else {
2651                 /*
2652                  * Remove an existing device.  Sometimes, dirty its
2653                  * vdev state first to make sure we handle removal
2654                  * of devices that have pending state changes.
2655                  */
2656                 if (ztest_random(2) == 0)
2657                         (void) vdev_online(spa, guid, 0, NULL);
2658 
2659                 error = spa_vdev_remove(spa, guid, B_FALSE);
2660                 if (error != 0 && error != EBUSY)
2661                         fatal(0, "spa_vdev_remove(%llu) = %d", guid, error);
2662         }
2663 
2664         mutex_exit(&ztest_vdev_lock);
2665 }
 
2771         spa_t *spa = ztest_spa;
2772         spa_aux_vdev_t *sav = &spa->spa_spares;
2773         vdev_t *rvd = spa->spa_root_vdev;
2774         vdev_t *oldvd, *newvd, *pvd;
2775         nvlist_t *root;
2776         uint64_t leaves;
2777         uint64_t leaf, top;
2778         uint64_t ashift = ztest_get_ashift();
2779         uint64_t oldguid, pguid;
2780         uint64_t oldsize, newsize;
2781         char oldpath[MAXPATHLEN], newpath[MAXPATHLEN];
2782         int replacing;
2783         int oldvd_has_siblings = B_FALSE;
2784         int newvd_is_spare = B_FALSE;
2785         int oldvd_is_log;
2786         int error, expected_error;
2787 
2788         mutex_enter(&ztest_vdev_lock);
2789         leaves = MAX(zs->zs_mirrors, 1) * ztest_opts.zo_raidz;
2790 
2791         spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
2792 
2793         /*
2794          * If a vdev is in the process of being removed, its removal may
2795          * finish while we are in progress, leading to an unexpected error
2796          * value.  Don't bother trying to attach while we are in the middle
2797          * of removal.
2798          */
2799         if (spa->spa_vdev_removal != NULL) {
2800                 spa_config_exit(spa, SCL_ALL, FTAG);
2801                 mutex_exit(&ztest_vdev_lock);
2802                 return;
2803         }
2804 
2805         /*
2806          * Decide whether to do an attach or a replace.
2807          */
2808         replacing = ztest_random(2);
2809 
2810         /*
2811          * Pick a random top-level vdev.
2812          */
2813         top = ztest_random_vdev_top(spa, B_TRUE);
2814 
2815         /*
2816          * Pick a random leaf within it.
2817          */
2818         leaf = ztest_random(leaves);
2819 
2820         /*
2821          * Locate this vdev.
2822          */
2823         oldvd = rvd->vdev_child[top];
2824         if (zs->zs_mirrors >= 1) {
2825                 ASSERT(oldvd->vdev_ops == &vdev_mirror_ops);
 
2836          * If we're already doing an attach or replace, oldvd may be a
2837          * mirror vdev -- in which case, pick a random child.
2838          */
2839         while (oldvd->vdev_children != 0) {
2840                 oldvd_has_siblings = B_TRUE;
2841                 ASSERT(oldvd->vdev_children >= 2);
2842                 oldvd = oldvd->vdev_child[ztest_random(oldvd->vdev_children)];
2843         }
2844 
2845         oldguid = oldvd->vdev_guid;
2846         oldsize = vdev_get_min_asize(oldvd);
2847         oldvd_is_log = oldvd->vdev_top->vdev_islog;
2848         (void) strcpy(oldpath, oldvd->vdev_path);
2849         pvd = oldvd->vdev_parent;
2850         pguid = pvd->vdev_guid;
2851 
2852         /*
2853          * If oldvd has siblings, then half of the time, detach it.
2854          */
2855         if (oldvd_has_siblings && ztest_random(2) == 0) {
2856                 spa_config_exit(spa, SCL_ALL, FTAG);
2857                 error = spa_vdev_detach(spa, oldguid, pguid, B_FALSE);
2858                 if (error != 0 && error != ENODEV && error != EBUSY &&
2859                     error != ENOTSUP)
2860                         fatal(0, "detach (%s) returned %d", oldpath, error);
2861                 mutex_exit(&ztest_vdev_lock);
2862                 return;
2863         }
2864 
2865         /*
2866          * For the new vdev, choose with equal probability between the two
2867          * standard paths (ending in either 'a' or 'b') or a random hot spare.
2868          */
2869         if (sav->sav_count != 0 && ztest_random(3) == 0) {
2870                 newvd = sav->sav_vdevs[ztest_random(sav->sav_count)];
2871                 newvd_is_spare = B_TRUE;
2872                 (void) strcpy(newpath, newvd->vdev_path);
2873         } else {
2874                 (void) snprintf(newpath, sizeof (newpath), ztest_dev_template,
2875                     ztest_opts.zo_dir, ztest_opts.zo_pool,
2876                     top * leaves + leaf);
2877                 if (ztest_random(2) == 0)
2878                         newpath[strlen(newpath) - 1] = 'b';
2879                 newvd = vdev_lookup_by_path(rvd, newpath);
2880         }
2881 
2882         if (newvd) {
2883                 /*
2884                  * Reopen to ensure the vdev's asize field isn't stale.
2885                  */
2886                 vdev_reopen(newvd);
2887                 newsize = vdev_get_min_asize(newvd);
2888         } else {
2889                 /*
2890                  * Make newsize a little bigger or smaller than oldsize.
2891                  * If it's smaller, the attach should fail.
2892                  * If it's larger, and we're doing a replace,
2893                  * we should get dynamic LUN growth when we're done.
2894                  */
2895                 newsize = 10 * oldsize / (9 + ztest_random(3));
2896         }
2897 
2898         /*
2899          * If pvd is not a mirror or root, the attach should fail with ENOTSUP,
2900          * unless it's a replace; in that case any non-replacing parent is OK.
2901          *
2902          * If newvd is already part of the pool, it should fail with EBUSY.
2903          *
2904          * If newvd is too small, it should fail with EOVERFLOW.
2905          */
2906         if (pvd->vdev_ops != &vdev_mirror_ops &&
2907             pvd->vdev_ops != &vdev_root_ops && (!replacing ||
2908             pvd->vdev_ops == &vdev_replacing_ops ||
2909             pvd->vdev_ops == &vdev_spare_ops))
2910                 expected_error = ENOTSUP;
2911         else if (newvd_is_spare && (!replacing || oldvd_is_log))
2912                 expected_error = ENOTSUP;
2913         else if (newvd == oldvd)
2914                 expected_error = replacing ? 0 : EBUSY;
2915         else if (vdev_lookup_by_path(rvd, newpath) != NULL)
2916                 expected_error = EBUSY;
2917         else if (newsize < oldsize)
2918                 expected_error = EOVERFLOW;
2919         else if (ashift > oldvd->vdev_top->vdev_ashift)
2920                 expected_error = EDOM;
2921         else
2922                 expected_error = 0;
2923 
2924         spa_config_exit(spa, SCL_ALL, FTAG);
2925 
2926         /*
2927          * Build the nvlist describing newpath.
2928          */
2929         root = make_vdev_root(newpath, NULL, NULL, newvd == NULL ? newsize : 0,
2930             ashift, 0, 0, 0, 1);
2931 
2932         error = spa_vdev_attach(spa, oldguid, root, replacing);
2933 
2934         nvlist_free(root);
2935 
2936         /*
2937          * If our parent was the replacing vdev, but the replace completed,
2938          * then instead of failing with ENOTSUP we may either succeed,
2939          * fail with ENODEV, or fail with EOVERFLOW.
2940          */
2941         if (expected_error == ENOTSUP &&
2942             (error == 0 || error == ENODEV || error == EOVERFLOW))
2943                 expected_error = error;
2944 
2945         /*
2946          * If someone grew the LUN, the replacement may be too small.
2947          */
2948         if (error == EOVERFLOW || error == EBUSY)
2949                 expected_error = error;
2950 
2951         /* XXX workaround 6690467 */
2952         if (error != expected_error && expected_error != EBUSY) {
2953                 fatal(0, "attach (%s %llu, %s %llu, %d) "
2954                     "returned %d, expected %d",
2955                     oldpath, oldsize, newpath,
2956                     newsize, replacing, error, expected_error);
2957         }
2958 
2959         mutex_exit(&ztest_vdev_lock);
2960 }
2961 
2962 /* ARGSUSED */
2963 void
2964 ztest_device_removal(ztest_ds_t *zd, uint64_t id)
2965 {
2966         spa_t *spa = ztest_spa;
2967         vdev_t *vd;
2968         uint64_t guid;
2969 
2970         mutex_enter(&ztest_vdev_lock);
2971 
2972         spa_config_enter(spa, SCL_VDEV, FTAG, RW_READER);
2973         vd = vdev_lookup_top(spa, ztest_random_vdev_top(spa, B_FALSE));
2974         guid = vd->vdev_guid;
2975         spa_config_exit(spa, SCL_VDEV, FTAG);
2976 
2977         (void) spa_vdev_remove(spa, guid, B_FALSE);
2978 
2979         mutex_exit(&ztest_vdev_lock);
2980 }
2981 
2982 /*
2983  * Callback function which expands the physical size of the vdev.
2984  */
2985 vdev_t *
2986 grow_vdev(vdev_t *vd, void *arg)
2987 {
2988         spa_t *spa = vd->vdev_spa;
2989         size_t *newsize = arg;
2990         size_t fsize;
2991         int fd;
2992 
2993         ASSERT(spa_config_held(spa, SCL_STATE, RW_READER) == SCL_STATE);
2994         ASSERT(vd->vdev_ops->vdev_op_leaf);
2995 
2996         if ((fd = open(vd->vdev_path, O_RDWR)) == -1)
2997                 return (vd);
2998 
2999         fsize = lseek(fd, 0, SEEK_END);
3000         (void) ftruncate(fd, *newsize);
3001 
 
3048          * still talking to the original vdev. It's possible this
3049          * vdev may have been detached/replaced while we were
3050          * trying to online it.
3051          */
3052         if (generation != spa->spa_config_generation) {
3053                 if (ztest_opts.zo_verbose >= 5) {
3054                         (void) printf("vdev configuration has changed, "
3055                             "guid %llu, state %llu, expected gen %llu, "
3056                             "got gen %llu\n",
3057                             (u_longlong_t)guid,
3058                             (u_longlong_t)tvd->vdev_state,
3059                             (u_longlong_t)generation,
3060                             (u_longlong_t)spa->spa_config_generation);
3061                 }
3062                 return (vd);
3063         }
3064         return (NULL);
3065 }
3066 
3067 /*
3068  * Traverse the vdev tree calling the supplied function.
3069  * We continue to walk the tree until we either have walked all
3070  * children or we receive a non-NULL return from the callback.
3071  * If a NULL callback is passed, then we just return back the first
3072  * leaf vdev we encounter.
3073  */
3074 vdev_t *
3075 vdev_walk_tree(vdev_t *vd, vdev_t *(*func)(vdev_t *, void *), void *arg)
3076 {
3077         if (vd->vdev_ops->vdev_op_leaf) {
3078                 if (func == NULL)
3079                         return (vd);
3080                 else
3081                         return (func(vd, arg));
3082         }
3083 
3084         for (uint_t c = 0; c < vd->vdev_children; c++) {
3085                 vdev_t *cvd = vd->vdev_child[c];
3086                 if ((cvd = vdev_walk_tree(cvd, func, arg)) != NULL)
3087                         return (cvd);
 
3090 }
3091 
3092 /*
3093  * Verify that dynamic LUN growth works as expected.
3094  */
3095 /* ARGSUSED */
3096 void
3097 ztest_vdev_LUN_growth(ztest_ds_t *zd, uint64_t id)
3098 {
3099         spa_t *spa = ztest_spa;
3100         vdev_t *vd, *tvd;
3101         metaslab_class_t *mc;
3102         metaslab_group_t *mg;
3103         size_t psize, newsize;
3104         uint64_t top;
3105         uint64_t old_class_space, new_class_space, old_ms_count, new_ms_count;
3106 
3107         mutex_enter(&ztest_vdev_lock);
3108         spa_config_enter(spa, SCL_STATE, spa, RW_READER);
3109 
3110         /*
3111          * If there is a vdev removal in progress, it could complete while
3112          * we are running, in which case we would not be able to verify
3113          * that the metaslab_class space increased (because it decreases
3114          * when the device removal completes).
3115          */
3116         if (spa->spa_vdev_removal != NULL) {
3117                 spa_config_exit(spa, SCL_STATE, FTAG);
3118                 mutex_exit(&ztest_vdev_lock);
3119                 return;
3120         }
3121 
3122         top = ztest_random_vdev_top(spa, B_TRUE);
3123 
3124         tvd = spa->spa_root_vdev->vdev_child[top];
3125         mg = tvd->vdev_mg;
3126         mc = mg->mg_class;
3127         old_ms_count = tvd->vdev_ms_count;
3128         old_class_space = metaslab_class_get_space(mc);
3129 
3130         /*
3131          * Determine the size of the first leaf vdev associated with
3132          * our top-level device.
3133          */
3134         vd = vdev_walk_tree(tvd, NULL, NULL);
3135         ASSERT3P(vd, !=, NULL);
3136         ASSERT(vd->vdev_ops->vdev_op_leaf);
3137 
3138         psize = vd->vdev_psize;
3139 
3140         /*
3141          * We only try to expand the vdev if it's healthy, less than 4x its
 
3193 
3194         spa_config_enter(spa, SCL_STATE, spa, RW_READER);
3195 
3196         tvd = spa->spa_root_vdev->vdev_child[top];
3197         new_ms_count = tvd->vdev_ms_count;
3198         new_class_space = metaslab_class_get_space(mc);
3199 
3200         if (tvd->vdev_mg != mg || mg->mg_class != mc) {
3201                 if (ztest_opts.zo_verbose >= 5) {
3202                         (void) printf("Could not verify LUN expansion due to "
3203                             "intervening vdev offline or remove.\n");
3204                 }
3205                 spa_config_exit(spa, SCL_STATE, spa);
3206                 mutex_exit(&ztest_vdev_lock);
3207                 return;
3208         }
3209 
3210         /*
3211          * Make sure we were able to grow the vdev.
3212          */
3213         if (new_ms_count <= old_ms_count) {
3214                 fatal(0, "LUN expansion failed: ms_count %llu < %llu\n",
3215                     old_ms_count, new_ms_count);
3216         }
3217 
3218         /*
3219          * Make sure we were able to grow the pool.
3220          */
3221         if (new_class_space <= old_class_space) {
3222                 fatal(0, "LUN expansion failed: class_space %llu < %llu\n",
3223                     old_class_space, new_class_space);
3224         }
3225 
3226         if (ztest_opts.zo_verbose >= 5) {
3227                 char oldnumbuf[NN_NUMBUF_SZ], newnumbuf[NN_NUMBUF_SZ];
3228 
3229                 nicenum(old_class_space, oldnumbuf, sizeof (oldnumbuf));
3230                 nicenum(new_class_space, newnumbuf, sizeof (newnumbuf));
3231                 (void) printf("%s grew from %s to %s\n",
3232                     spa->spa_name, oldnumbuf, newnumbuf);
3233         }
3234 
3235         spa_config_exit(spa, SCL_STATE, spa);
3236         mutex_exit(&ztest_vdev_lock);
3237 }
3238 
3239 /*
3240  * Verify that dmu_objset_{create,destroy,open,close} work as expected.
3241  */
3242 /* ARGSUSED */
3243 static void
3244 ztest_objset_create_cb(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx)
 
4677 ztest_dsl_prop_get_set(ztest_ds_t *zd, uint64_t id)
4678 {
4679         zfs_prop_t proplist[] = {
4680                 ZFS_PROP_CHECKSUM,
4681                 ZFS_PROP_COMPRESSION,
4682                 ZFS_PROP_COPIES,
4683                 ZFS_PROP_DEDUP
4684         };
4685 
4686         rw_enter(&ztest_name_lock, RW_READER);
4687 
4688         for (int p = 0; p < sizeof (proplist) / sizeof (proplist[0]); p++)
4689                 (void) ztest_dsl_prop_set_uint64(zd->zd_name, proplist[p],
4690                     ztest_random_dsl_prop(proplist[p]), (int)ztest_random(2));
4691 
4692         rw_exit(&ztest_name_lock);
4693 }
4694 
4695 /* ARGSUSED */
4696 void
4697 ztest_remap_blocks(ztest_ds_t *zd, uint64_t id)
4698 {
4699         rw_enter(&ztest_name_lock, RW_READER);
4700 
4701         int error = dmu_objset_remap_indirects(zd->zd_name);
4702         if (error == ENOSPC)
4703                 error = 0;
4704         ASSERT0(error);
4705 
4706         rw_exit(&ztest_name_lock);
4707 }
4708 
4709 /* ARGSUSED */
4710 void
4711 ztest_spa_prop_get_set(ztest_ds_t *zd, uint64_t id)
4712 {
4713         nvlist_t *props = NULL;
4714 
4715         rw_enter(&ztest_name_lock, RW_READER);
4716 
4717         (void) ztest_spa_prop_set_uint64(ZPOOL_PROP_DEDUPDITTO,
4718             ZIO_DEDUPDITTO_MIN + ztest_random(ZIO_DEDUPDITTO_MIN));
4719 
4720         VERIFY0(spa_prop_get(ztest_spa, &props));
4721 
4722         if (ztest_opts.zo_verbose >= 6)
4723                 dump_nvlist(props, 4);
4724 
4725         nvlist_free(props);
4726 
4727         rw_exit(&ztest_name_lock);
4728 }
4729 
4730 static int
4731 user_release_one(const char *snapname, const char *holdname)
4732 {
4733         nvlist_t *snaps, *holds;
4734         int error;
4735 
4736         snaps = fnvlist_alloc();
4737         holds = fnvlist_alloc();
4738         fnvlist_add_boolean(holds, holdname);
4739         fnvlist_add_nvlist(snaps, snapname, holds);
4740         fnvlist_free(holds);
4741         error = dsl_dataset_user_release(snaps, NULL);
4742         fnvlist_free(snaps);
4743         return (error);
4744 }
4745 
4746 /*
4747  * Test snapshot hold/release and deferred destroy.
4748  */
4749 void
 
4940                 /*
4941                  * If the top-level vdev needs to be resilvered
4942                  * then we only allow faults on the device that is
4943                  * resilvering.
4944                  */
4945                 if (vd0 != NULL && maxfaults != 1 &&
4946                     (!vdev_resilver_needed(vd0->vdev_top, NULL, NULL) ||
4947                     vd0->vdev_resilver_txg != 0)) {
4948                         /*
4949                          * Make vd0 explicitly claim to be unreadable,
4950                          * or unwriteable, or reach behind its back
4951                          * and close the underlying fd.  We can do this if
4952                          * maxfaults == 0 because we'll fail and reexecute,
4953                          * and we can do it if maxfaults >= 2 because we'll
4954                          * have enough redundancy.  If maxfaults == 1, the
4955                          * combination of this with injection of random data
4956                          * corruption below exceeds the pool's fault tolerance.
4957                          */
4958                         vdev_file_t *vf = vd0->vdev_tsd;
4959 
4960                         zfs_dbgmsg("injecting fault to vdev %llu; maxfaults=%d",
4961                             (long long)vd0->vdev_id, (int)maxfaults);
4962 
4963                         if (vf != NULL && ztest_random(3) == 0) {
4964                                 (void) close(vf->vf_vnode->v_fd);
4965                                 vf->vf_vnode->v_fd = -1;
4966                         } else if (ztest_random(2) == 0) {
4967                                 vd0->vdev_cant_read = B_TRUE;
4968                         } else {
4969                                 vd0->vdev_cant_write = B_TRUE;
4970                         }
4971                         guid0 = vd0->vdev_guid;
4972                 }
4973         } else {
4974                 /*
4975                  * Inject errors on an l2cache device.
4976                  */
4977                 spa_aux_vdev_t *sav = &spa->spa_l2cache;
4978 
4979                 if (sav->sav_count == 0) {
4980                         spa_config_exit(spa, SCL_STATE, FTAG);
4981                         rw_exit(&ztest_name_lock);
4982                         return;
 
5416 
5417         /*
5418          * Get the pool's configuration and guid.
5419          */
5420         VERIFY3U(0, ==, spa_open(oldname, &spa, FTAG));
5421 
5422         /*
5423          * Kick off a scrub to tickle scrub/export races.
5424          */
5425         if (ztest_random(2) == 0)
5426                 (void) spa_scan(spa, POOL_SCAN_SCRUB);
5427 
5428         pool_guid = spa_guid(spa);
5429         spa_close(spa, FTAG);
5430 
5431         ztest_walk_pool_directory("pools before export");
5432 
5433         /*
5434          * Export it.
5435          */
5436         VERIFY3U(0, ==, spa_export(oldname, &config, B_FALSE, B_FALSE));
5437 
5438         ztest_walk_pool_directory("pools after export");
5439 
5440         /*
5441          * Try to import it.
5442          */
5443         newconfig = spa_tryimport(config);
5444         ASSERT(newconfig != NULL);
5445         nvlist_free(newconfig);
5446 
5447         /*
5448          * Import it under the new name.
5449          */
5450         error = spa_import(newname, config, NULL, 0);
5451         if (error != 0) {
5452                 dump_nvlist(config, 0);
5453                 fatal(B_FALSE, "couldn't import pool %s as %s: error %u",
5454                     oldname, newname, error);
5455         }
5456 
 
5735 }
5736 
5737 /*
5738  * Kick off threads to run tests on all datasets in parallel.
5739  */
5740 static void
5741 ztest_run(ztest_shared_t *zs)
5742 {
5743         thread_t *tid;
5744         spa_t *spa;
5745         objset_t *os;
5746         thread_t resume_tid;
5747         int error;
5748 
5749         ztest_exiting = B_FALSE;
5750 
5751         /*
5752          * Initialize parent/child shared state.
5753          */
5754         mutex_init(&ztest_vdev_lock, NULL, USYNC_THREAD, NULL);
5755         rw_init(&ztest_name_lock, NULL, USYNC_THREAD, NULL);
5756 
5757         zs->zs_thread_start = gethrtime();
5758         zs->zs_thread_stop =
5759             zs->zs_thread_start + ztest_opts.zo_passtime * NANOSEC;
5760         zs->zs_thread_stop = MIN(zs->zs_thread_stop, zs->zs_proc_stop);
5761         zs->zs_thread_kill = zs->zs_thread_stop;
5762         if (ztest_random(100) < ztest_opts.zo_killrate) {
5763                 zs->zs_thread_kill -=
5764                     ztest_random(ztest_opts.zo_passtime * NANOSEC);
5765         }
5766 
5767         mutex_init(&zcl.zcl_callbacks_lock, NULL, USYNC_THREAD, NULL);
5768 
5769         list_create(&zcl.zcl_callbacks, sizeof (ztest_cb_data_t),
5770             offsetof(ztest_cb_data_t, zcd_node));
5771 
5772         /*
5773          * Open our pool.
5774          */
 
6024 
6025         timebuf[0] = '\0';
6026 
6027         if (d)
6028                 (void) sprintf(timebuf,
6029                     "%llud%02lluh%02llum%02llus", d, h, m, s);
6030         else if (h)
6031                 (void) sprintf(timebuf, "%lluh%02llum%02llus", h, m, s);
6032         else if (m)
6033                 (void) sprintf(timebuf, "%llum%02llus", m, s);
6034         else
6035                 (void) sprintf(timebuf, "%llus", s);
6036 }
6037 
6038 static nvlist_t *
6039 make_random_props()
6040 {
6041         nvlist_t *props;
6042 
6043         VERIFY(nvlist_alloc(&props, NV_UNIQUE_NAME, 0) == 0);
6044         if (ztest_random(2) == 0)
6045                 return (props);
6046         VERIFY(nvlist_add_uint64(props, "autoreplace", 1) == 0);
6047 
6048         return (props);
6049 }
6050 
6051 /*
6052  * Create a storage pool with the given name and initial vdev size.
6053  * Then test spa_freeze() functionality.
6054  */
6055 static void
6056 ztest_init(ztest_shared_t *zs)
6057 {
6058         spa_t *spa;
6059         nvlist_t *nvroot, *props;
6060 
6061         mutex_init(&ztest_vdev_lock, NULL, USYNC_THREAD, NULL);
6062         rw_init(&ztest_name_lock, NULL, USYNC_THREAD, NULL);
6063 
6064         kernel_init(FREAD | FWRITE);
6065 
6066         /*
6067          * Create the storage pool.
6068          */
6069         (void) spa_destroy(ztest_opts.zo_pool);
6070         ztest_shared->zs_vdev_next_leaf = 0;
6071         zs->zs_splits = 0;
6072         zs->zs_mirrors = ztest_opts.zo_mirrors;
6073         nvroot = make_vdev_root(NULL, NULL, NULL, ztest_opts.zo_vdev_size, 0,
6074             0, ztest_opts.zo_raidz, zs->zs_mirrors, 1);
6075         props = make_random_props();
6076         for (int i = 0; i < SPA_FEATURES; i++) {
6077                 char buf[1024];
6078                 (void) snprintf(buf, sizeof (buf), "feature@%s",
6079                     spa_feature_table[i].fi_uname);
6080                 VERIFY3U(0, ==, nvlist_add_uint64(props, buf, 0));
6081         }
6082         VERIFY3U(0, ==, spa_create(ztest_opts.zo_pool, nvroot, props, NULL));
6083         nvlist_free(nvroot);
6084         nvlist_free(props);
6085 
6086         VERIFY3U(0, ==, spa_open(ztest_opts.zo_pool, &spa, FTAG));
6087         zs->zs_metaslab_sz =
6088             1ULL << spa->spa_root_vdev->vdev_child[0]->vdev_ms_shift;
6089 
6090         spa_close(spa, FTAG);
6091 
6092         kernel_fini();
6093 
6094         ztest_run_zdb(ztest_opts.zo_pool);
6095 
6096         ztest_freeze();
6097 
6098         ztest_run_zdb(ztest_opts.zo_pool);
6099 
6100         rw_destroy(&ztest_name_lock);
6101         mutex_destroy(&ztest_vdev_lock);
6102 }
6103 
6104 static void
6105 setup_data_fd(void)
6106 {
6107         static char ztest_name_data[] = "/tmp/ztest.data.XXXXXX";
6108 
6109         ztest_fd_data = mkstemp(ztest_name_data);
6110         ASSERT3S(ztest_fd_data, >=, 0);
6111         (void) unlink(ztest_name_data);
6112 }
6113 
6114 
6115 static int
6116 shared_data_size(ztest_shared_hdr_t *hdr)
6117 {
6118         int size;
6119 
6120         size = hdr->zh_hdr_size;
 
 | 
 
 
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  23  * Copyright (c) 2011, 2016 by Delphix. All rights reserved.
  24  * Copyright 2018 Nexenta Systems, Inc.  All rights reserved.
  25  * Copyright (c) 2013 Steven Hartland. All rights reserved.
  26  * Copyright (c) 2014 Integros [integros.com]
  27  * Copyright 2017 Joyent, Inc.
  28  * Copyright 2017 RackTop Systems.
  29  */
  30 
  31 /*
  32  * The objective of this program is to provide a DMU/ZAP/SPA stress test
  33  * that runs entirely in userland, is easy to use, and easy to extend.
  34  *
  35  * The overall design of the ztest program is as follows:
  36  *
  37  * (1) For each major functional area (e.g. adding vdevs to a pool,
  38  *     creating and destroying datasets, reading and writing objects, etc)
  39  *     we have a simple routine to test that functionality.  These
  40  *     individual routines do not have to do anything "stressful".
  41  *
  42  * (2) We turn these simple functionality tests into a stress test by
  43  *     running them all in parallel, with as many threads as desired,
  44  *     and spread across as many datasets, objects, and vdevs as desired.
 
 
  96 #include <sys/stat.h>
  97 #include <sys/time.h>
  98 #include <sys/wait.h>
  99 #include <sys/mman.h>
 100 #include <sys/resource.h>
 101 #include <sys/zio.h>
 102 #include <sys/zil.h>
 103 #include <sys/zil_impl.h>
 104 #include <sys/vdev_impl.h>
 105 #include <sys/vdev_file.h>
 106 #include <sys/spa_impl.h>
 107 #include <sys/metaslab_impl.h>
 108 #include <sys/dsl_prop.h>
 109 #include <sys/dsl_dataset.h>
 110 #include <sys/dsl_destroy.h>
 111 #include <sys/dsl_scan.h>
 112 #include <sys/zio_checksum.h>
 113 #include <sys/refcount.h>
 114 #include <sys/zfeature.h>
 115 #include <sys/dsl_userhold.h>
 116 #include <libzfs.h>
 117 #include <sys/abd.h>
 118 #include <stdio.h>
 119 #include <stdio_ext.h>
 120 #include <stdlib.h>
 121 #include <unistd.h>
 122 #include <signal.h>
 123 #include <umem.h>
 124 #include <dlfcn.h>
 125 #include <ctype.h>
 126 #include <math.h>
 127 #include <sys/fs/zfs.h>
 128 #include <libnvpair.h>
 129 #include <libcmdutils.h>
 130 
 131 #include <sys/special.h>
 132 
 133 static int ztest_fd_data = -1;
 134 static int ztest_fd_rand = -1;
 135 
 136 typedef struct ztest_shared_hdr {
 137         uint64_t        zh_hdr_size;
 138         uint64_t        zh_opts_size;
 139         uint64_t        zh_size;
 140         uint64_t        zh_stats_size;
 141         uint64_t        zh_stats_count;
 142         uint64_t        zh_ds_size;
 143         uint64_t        zh_ds_count;
 144 } ztest_shared_hdr_t;
 145 
 146 static ztest_shared_hdr_t *ztest_shared_hdr;
 147 
 148 typedef struct ztest_shared_opts {
 149         char zo_pool[ZFS_MAX_DATASET_NAME_LEN];
 150         char zo_dir[ZFS_MAX_DATASET_NAME_LEN];
 151         char zo_alt_ztest[MAXNAMELEN];
 152         char zo_alt_libpath[MAXNAMELEN];
 
 314 #define ZTEST_GET_SHARED_CALLSTATE(c) (&ztest_shared_callstate[c])
 315 
 316 /*
 317  * Note: these aren't static because we want dladdr() to work.
 318  */
 319 ztest_func_t ztest_dmu_read_write;
 320 ztest_func_t ztest_dmu_write_parallel;
 321 ztest_func_t ztest_dmu_object_alloc_free;
 322 ztest_func_t ztest_dmu_commit_callbacks;
 323 ztest_func_t ztest_zap;
 324 ztest_func_t ztest_zap_parallel;
 325 ztest_func_t ztest_zil_commit;
 326 ztest_func_t ztest_zil_remount;
 327 ztest_func_t ztest_dmu_read_write_zcopy;
 328 ztest_func_t ztest_dmu_objset_create_destroy;
 329 ztest_func_t ztest_dmu_prealloc;
 330 ztest_func_t ztest_fzap;
 331 ztest_func_t ztest_dmu_snapshot_create_destroy;
 332 ztest_func_t ztest_dsl_prop_get_set;
 333 ztest_func_t ztest_spa_prop_get_set;
 334 ztest_func_t ztest_vdev_prop_get_set;
 335 ztest_func_t ztest_cos_prop_get_set;
 336 ztest_func_t ztest_spa_create_destroy;
 337 ztest_func_t ztest_fault_inject;
 338 ztest_func_t ztest_ddt_repair;
 339 ztest_func_t ztest_dmu_snapshot_hold;
 340 ztest_func_t ztest_spa_rename;
 341 ztest_func_t ztest_scrub;
 342 ztest_func_t ztest_dsl_dataset_promote_busy;
 343 ztest_func_t ztest_vdev_attach_detach;
 344 ztest_func_t ztest_vdev_LUN_growth;
 345 ztest_func_t ztest_vdev_add_remove;
 346 ztest_func_t ztest_vdev_aux_add_remove;
 347 ztest_func_t ztest_split_pool;
 348 ztest_func_t ztest_reguid;
 349 ztest_func_t ztest_spa_upgrade;
 350 
 351 uint64_t zopt_always = 0ULL * NANOSEC;          /* all the time */
 352 uint64_t zopt_incessant = 1ULL * NANOSEC / 10;  /* every 1/10 second */
 353 uint64_t zopt_often = 1ULL * NANOSEC;           /* every second */
 354 uint64_t zopt_sometimes = 10ULL * NANOSEC;      /* every 10 seconds */
 355 uint64_t zopt_rarely = 60ULL * NANOSEC;         /* every 60 seconds */
 356 
 357 ztest_info_t ztest_info[] = {
 358         { ztest_dmu_read_write,                 1,      &zopt_always        },
 359         { ztest_dmu_write_parallel,             10,     &zopt_always        },
 360         { ztest_dmu_object_alloc_free,          1,      &zopt_always        },
 361         { ztest_dmu_commit_callbacks,           1,      &zopt_always        },
 362         { ztest_zap,                            30,     &zopt_always        },
 363         { ztest_zap_parallel,                   100,    &zopt_always        },
 364         { ztest_split_pool,                     1,      &zopt_always        },
 365         { ztest_zil_commit,                     1,      &zopt_incessant     },
 366         { ztest_zil_remount,                    1,      &zopt_sometimes     },
 367         { ztest_dmu_read_write_zcopy,           1,      &zopt_often },
 368         { ztest_dmu_objset_create_destroy,      1,      &zopt_often },
 369         { ztest_dsl_prop_get_set,               1,      &zopt_often },
 370         { ztest_spa_prop_get_set,               1,      &zopt_sometimes     },
 371         { ztest_vdev_prop_get_set,              1,      &zopt_often },
 372         { ztest_cos_prop_get_set,               1,      &zopt_often },
 373 #if 0
 374         { ztest_dmu_prealloc,                   1,      &zopt_sometimes     },
 375 #endif
 376         { ztest_fzap,                           1,      &zopt_sometimes     },
 377         { ztest_dmu_snapshot_create_destroy,    1,      &zopt_sometimes     },
 378         { ztest_spa_create_destroy,             1,      &zopt_sometimes     },
 379         { ztest_fault_inject,                   1,      &zopt_sometimes     },
 380         { ztest_ddt_repair,                     1,      &zopt_sometimes     },
 381         { ztest_dmu_snapshot_hold,              1,      &zopt_sometimes     },
 382         { ztest_reguid,                         1,      &zopt_rarely        },
 383         { ztest_spa_rename,                     1,      &zopt_rarely        },
 384         { ztest_scrub,                          1,      &zopt_rarely        },
 385         { ztest_spa_upgrade,                    1,      &zopt_rarely        },
 386         { ztest_dsl_dataset_promote_busy,       1,      &zopt_rarely        },
 387         { ztest_vdev_attach_detach,             1,      &zopt_sometimes     },
 388         { ztest_vdev_LUN_growth,                1,      &zopt_rarely        },
 389         { ztest_vdev_add_remove,                1,
 390             &ztest_opts.zo_vdevtime                         },
 391         { ztest_vdev_aux_add_remove,            1,
 392             &ztest_opts.zo_vdevtime                         },
 393 };
 394 
 395 #define ZTEST_FUNCS     (sizeof (ztest_info) / sizeof (ztest_info_t))
 396 
 397 /*
 398  * The following struct is used to hold a list of uncalled commit callbacks.
 399  * The callbacks are ordered by txg number.
 400  */
 401 typedef struct ztest_cb_list {
 402         kmutex_t zcl_callbacks_lock;
 403         list_t  zcl_callbacks;
 404 } ztest_cb_list_t;
 405 
 406 /*
 407  * Stuff we need to share writably between parent and child.
 408  */
 409 typedef struct ztest_shared {
 410         boolean_t       zs_do_init;
 411         hrtime_t        zs_proc_start;
 412         hrtime_t        zs_proc_stop;
 
 420         uint64_t        zs_space;
 421         uint64_t        zs_splits;
 422         uint64_t        zs_mirrors;
 423         uint64_t        zs_metaslab_sz;
 424         uint64_t        zs_metaslab_df_alloc_threshold;
 425         uint64_t        zs_guid;
 426 } ztest_shared_t;
 427 
 428 #define ID_PARALLEL     -1ULL
 429 
 430 static char ztest_dev_template[] = "%s/%s.%llua";
 431 static char ztest_aux_template[] = "%s/%s.%s.%llu";
 432 ztest_shared_t *ztest_shared;
 433 
 434 static spa_t *ztest_spa = NULL;
 435 static ztest_ds_t *ztest_ds;
 436 
 437 static kmutex_t ztest_vdev_lock;
 438 
 439 /*
 440  * Make sure the "set/get/test" test does not interfere with other
 441  * concurrent tests on the same vdev/cos property
 442  */
 443 static kmutex_t ztest_props_lock;
 444 
 445 /*
 446  * The ztest_name_lock protects the pool and dataset namespace used by
 447  * the individual tests. To modify the namespace, consumers must grab
 448  * this lock as writer. Grabbing the lock as reader will ensure that the
 449  * namespace does not change while the lock is held.
 450  */
 451 static krwlock_t ztest_name_lock;
 452 
 453 static boolean_t ztest_dump_core = B_TRUE;
 454 static boolean_t ztest_exiting;
 455 
 456 /* Global commit callback list */
 457 static ztest_cb_list_t zcl;
 458 
 459 enum ztest_object {
 460         ZTEST_META_DNODE = 0,
 461         ZTEST_DIROBJ,
 462         ZTEST_OBJECTS
 463 };
 464 
 465 static void usage(boolean_t) __NORETURN;
 
 781                             zo->zo_alt_ztest);
 782                 } else if (0 != access(zo->zo_alt_libpath, X_OK)) {
 783                         ztest_dump_core = B_FALSE;
 784                         fatal(B_TRUE, "invalid alternate lib directory %s",
 785                             zo->zo_alt_libpath);
 786                 }
 787 
 788                 umem_free(cmd, MAXPATHLEN);
 789                 umem_free(realaltdir, MAXPATHLEN);
 790         }
 791 }
 792 
 793 static void
 794 ztest_kill(ztest_shared_t *zs)
 795 {
 796         zs->zs_alloc = metaslab_class_get_alloc(spa_normal_class(ztest_spa));
 797         zs->zs_space = metaslab_class_get_space(spa_normal_class(ztest_spa));
 798 
 799         /*
 800          * Before we kill off ztest, make sure that the config is updated.
 801          * See comment above spa_config_sync().
 802          */
 803         mutex_enter(&spa_namespace_lock);
 804         spa_config_sync(ztest_spa, B_FALSE, B_FALSE);
 805         mutex_exit(&spa_namespace_lock);
 806 
 807         zfs_dbgmsg_print(FTAG);
 808         (void) kill(getpid(), SIGKILL);
 809 }
 810 
 811 static uint64_t
 812 ztest_random(uint64_t range)
 813 {
 814         uint64_t r;
 815 
 816         ASSERT3S(ztest_fd_rand, >=, 0);
 817 
 818         if (range == 0)
 819                 return (0);
 820 
 821         if (read(ztest_fd_rand, &r, sizeof (r)) != sizeof (r))
 822                 fatal(1, "short read from /dev/urandom");
 823 
 824         return (r % range);
 825 }
 826 
 827 /* ARGSUSED */
 828 static void
 829 ztest_record_enospc(const char *s)
 830 {
 831         ztest_shared->zs_enospc_count++;
 832 }
 833 
 834 static uint64_t
 835 ztest_get_ashift(void)
 836 {
 837         if (ztest_opts.zo_ashift == 0)
 838                 return (SPA_MINBLOCKSHIFT + ztest_random(5));
 839         return (ztest_opts.zo_ashift);
 840 }
 841 
 842 static nvlist_t *
 843 make_vdev_file(char *path, char *aux, char *pool, size_t size,
 844     uint64_t ashift, boolean_t is_special)
 845 {
 846         char pathbuf[MAXPATHLEN];
 847         uint64_t vdev;
 848         nvlist_t *file;
 849 
 850         if (ashift == 0)
 851                 ashift = ztest_get_ashift();
 852 
 853         if (path == NULL) {
 854                 path = pathbuf;
 855 
 856                 if (aux != NULL) {
 857                         vdev = ztest_shared->zs_vdev_aux;
 858                         (void) snprintf(path, sizeof (pathbuf),
 859                             ztest_aux_template, ztest_opts.zo_dir,
 860                             pool == NULL ? ztest_opts.zo_pool : pool,
 861                             aux, vdev);
 862                 } else {
 863                         vdev = ztest_shared->zs_vdev_next_leaf++;
 864                         (void) snprintf(path, sizeof (pathbuf),
 865                             ztest_dev_template, ztest_opts.zo_dir,
 866                             pool == NULL ? ztest_opts.zo_pool : pool, vdev);
 867                 }
 868         }
 869 
 870         if (size != 0) {
 871                 int fd = open(path, O_RDWR | O_CREAT | O_TRUNC, 0666);
 872                 if (fd == -1)
 873                         fatal(1, "can't open %s", path);
 874                 if (ftruncate(fd, size) != 0)
 875                         fatal(1, "can't ftruncate %s to %lld", path, size);
 876                 (void) close(fd);
 877         }
 878 
 879         VERIFY(nvlist_alloc(&file, NV_UNIQUE_NAME, 0) == 0);
 880         VERIFY(nvlist_add_string(file, ZPOOL_CONFIG_TYPE, VDEV_TYPE_FILE) == 0);
 881         VERIFY(nvlist_add_string(file, ZPOOL_CONFIG_PATH, path) == 0);
 882         VERIFY(nvlist_add_uint64(file, ZPOOL_CONFIG_ASHIFT, ashift) == 0);
 883         VERIFY(nvlist_add_uint64(file, ZPOOL_CONFIG_IS_SPECIAL, is_special)
 884             == 0);
 885         return (file);
 886 }
 887 
 888 static nvlist_t *
 889 make_vdev_raidz(char *path, char *aux, char *pool, size_t size,
 890     uint64_t ashift, int r, boolean_t is_special)
 891 {
 892         nvlist_t *raidz, **child;
 893         int c;
 894 
 895         if (r < 2)
 896                 return (make_vdev_file(path, aux, pool, size, ashift,
 897                     is_special));
 898         child = umem_alloc(r * sizeof (nvlist_t *), UMEM_NOFAIL);
 899 
 900         for (c = 0; c < r; c++)
 901                 child[c] = make_vdev_file(path, aux, pool, size, ashift,
 902                     B_FALSE);
 903 
 904         VERIFY(nvlist_alloc(&raidz, NV_UNIQUE_NAME, 0) == 0);
 905         VERIFY(nvlist_add_string(raidz, ZPOOL_CONFIG_TYPE,
 906             VDEV_TYPE_RAIDZ) == 0);
 907         VERIFY(nvlist_add_uint64(raidz, ZPOOL_CONFIG_NPARITY,
 908             ztest_opts.zo_raidz_parity) == 0);
 909         VERIFY(nvlist_add_nvlist_array(raidz, ZPOOL_CONFIG_CHILDREN,
 910             child, r) == 0);
 911 
 912         for (c = 0; c < r; c++)
 913                 nvlist_free(child[c]);
 914 
 915         umem_free(child, r * sizeof (nvlist_t *));
 916 
 917         return (raidz);
 918 }
 919 
 920 static nvlist_t *
 921 make_vdev_mirror(char *path, char *aux, char *pool, size_t size,
 922     uint64_t ashift, int r, int m, boolean_t is_special)
 923 {
 924         nvlist_t *mirror, **child;
 925         int c;
 926 
 927         if (m < 1)
 928                 return (make_vdev_raidz(path, aux, pool, size, ashift, r,
 929                     is_special));
 930 
 931         child = umem_alloc(m * sizeof (nvlist_t *), UMEM_NOFAIL);
 932 
 933         for (c = 0; c < m; c++)
 934                 child[c] = make_vdev_raidz(path, aux, pool, size, ashift,
 935                     r, B_FALSE);
 936 
 937         VERIFY(nvlist_alloc(&mirror, NV_UNIQUE_NAME, 0) == 0);
 938         VERIFY(nvlist_add_string(mirror, ZPOOL_CONFIG_TYPE,
 939             VDEV_TYPE_MIRROR) == 0);
 940         VERIFY(nvlist_add_nvlist_array(mirror, ZPOOL_CONFIG_CHILDREN,
 941             child, m) == 0);
 942         VERIFY(nvlist_add_uint64(mirror, ZPOOL_CONFIG_IS_SPECIAL, is_special)
 943             == 0);
 944 
 945         for (c = 0; c < m; c++)
 946                 nvlist_free(child[c]);
 947 
 948         umem_free(child, m * sizeof (nvlist_t *));
 949 
 950         return (mirror);
 951 }
 952 
 953 static nvlist_t *
 954 make_vdev_root(char *path, char *aux, char *pool, size_t size, uint64_t ashift,
 955     int log, int r, int m, int t, boolean_t special)
 956 {
 957         nvlist_t *root, **child;
 958         int c;
 959 
 960         ASSERT(t > 0);
 961 
 962         child = umem_alloc(t * sizeof (nvlist_t *), UMEM_NOFAIL);
 963 
 964         for (c = 0; c < t; c++) {
 965                 child[c] = make_vdev_mirror(path, aux, pool, size, ashift,
 966                     r, m, special);
 967                 VERIFY(nvlist_add_uint64(child[c], ZPOOL_CONFIG_IS_LOG,
 968                     log) == 0);
 969         }
 970 
 971         VERIFY(nvlist_alloc(&root, NV_UNIQUE_NAME, 0) == 0);
 972         VERIFY(nvlist_add_string(root, ZPOOL_CONFIG_TYPE, VDEV_TYPE_ROOT) == 0);
 973         VERIFY(nvlist_add_nvlist_array(root, aux ? aux : ZPOOL_CONFIG_CHILDREN,
 974             child, t) == 0);
 975 
 976         for (c = 0; c < t; c++)
 977                 nvlist_free(child[c]);
 978 
 979         umem_free(child, t * sizeof (nvlist_t *));
 980 
 981         return (root);
 982 }
 983 
 984 /*
 985  * Add special top-level vdev(s) to the vdev tree
 986  */
 987 static void
 988 add_special_vdevs(nvlist_t *root, size_t size, int r, int m, int t)
 989 {
 990         nvlist_t **child = NULL, **prev_child = NULL, **new_child = NULL;
 991         int c = 0, new = 0;
 992         unsigned int prev = 0;
 993 
 994         if ((m == 0) || (t == 0))
 995                 return;
 996 
 997         child = umem_alloc(t * sizeof (nvlist_t *), UMEM_NOFAIL);
 998 
 999         /*
1000          * special flag that is added to the top-level vdevs
1001          */
1002         for (c = 0; c < t; c++) {
1003                 child[c] = make_vdev_mirror(NULL, NULL, NULL, size, 0, r, m,
1004                     B_TRUE);
1005         }
1006 
1007         /*
1008          * Extend the children's array in the root"
1009          *  - get previously added children
1010          *  - allocate new array
1011          *  - and copy the previous and new children there
1012          *  - replace the children nvlist adday with the new one
1013          */
1014         VERIFY(nvlist_lookup_nvlist_array(root, ZPOOL_CONFIG_CHILDREN,
1015             &prev_child, &prev) == 0);
1016 
1017         new = prev + t;
1018 
1019         new_child = umem_alloc(new * sizeof (nvlist_t *),
1020             UMEM_NOFAIL);
1021         for (c = 0; c < prev; c++) {
1022                 VERIFY(nvlist_dup(prev_child[c], &new_child[c], 0) == 0);
1023         }
1024         for (; c < new; c++) {
1025                 new_child[c] = child[c-prev];
1026         }
1027 
1028         VERIFY(nvlist_add_nvlist_array(root, ZPOOL_CONFIG_CHILDREN,
1029             new_child, new) == 0);
1030 
1031         /* free children */
1032         for (c = 0; c < new; c++) {
1033                 nvlist_free(new_child[c]);
1034         }
1035         umem_free(child, t * sizeof (nvlist_t *));
1036 
1037         umem_free(new_child, new * sizeof (nvlist_t *));
1038 }
1039 
1040 /*
1041  * Find a random spa version. Returns back a random spa version in the
1042  * range [initial_version, SPA_VERSION_FEATURES].
1043  */
1044 static uint64_t
1045 ztest_random_spa_version(uint64_t initial_version)
1046 {
1047         uint64_t version = initial_version;
1048 
1049         if (version <= SPA_VERSION_BEFORE_FEATURES) {
1050                 version = version +
1051                     ztest_random(SPA_VERSION_BEFORE_FEATURES - version + 1);
1052         }
1053 
1054         if (version > SPA_VERSION_BEFORE_FEATURES)
1055                 version = SPA_VERSION_FEATURES;
1056 
1057         ASSERT(SPA_VERSION_IS_SUPPORTED(version));
1058         return (version);
1059 }
1060 
 
1075 
1076 static int
1077 ztest_random_ibshift(void)
1078 {
1079         return (DN_MIN_INDBLKSHIFT +
1080             ztest_random(DN_MAX_INDBLKSHIFT - DN_MIN_INDBLKSHIFT + 1));
1081 }
1082 
1083 static uint64_t
1084 ztest_random_vdev_top(spa_t *spa, boolean_t log_ok)
1085 {
1086         uint64_t top;
1087         vdev_t *rvd = spa->spa_root_vdev;
1088         vdev_t *tvd;
1089 
1090         ASSERT(spa_config_held(spa, SCL_ALL, RW_READER) != 0);
1091 
1092         do {
1093                 top = ztest_random(rvd->vdev_children);
1094                 tvd = rvd->vdev_child[top];
1095         } while (tvd->vdev_ishole || (tvd->vdev_islog && !log_ok) ||
1096             tvd->vdev_mg == NULL || tvd->vdev_mg->mg_class == NULL);
1097 
1098         return (top);
1099 }
1100 
1101 static uint64_t
1102 ztest_random_dsl_prop(zfs_prop_t prop)
1103 {
1104         uint64_t value;
1105 
1106         do {
1107                 value = zfs_prop_random_value(prop, ztest_random(-1ULL));
1108         } while (prop == ZFS_PROP_CHECKSUM && value == ZIO_CHECKSUM_OFF);
1109 
1110         return (value);
1111 }
1112 
1113 static int
1114 ztest_dsl_prop_set_uint64(char *osname, zfs_prop_t prop, uint64_t value,
1115     boolean_t inherit)
 
1890         NULL,                   /* TX_RMDIR */
1891         NULL,                   /* TX_LINK */
1892         NULL,                   /* TX_RENAME */
1893         ztest_replay_write,     /* TX_WRITE */
1894         ztest_replay_truncate,  /* TX_TRUNCATE */
1895         ztest_replay_setattr,   /* TX_SETATTR */
1896         NULL,                   /* TX_ACL */
1897         NULL,                   /* TX_CREATE_ACL */
1898         NULL,                   /* TX_CREATE_ATTR */
1899         NULL,                   /* TX_CREATE_ACL_ATTR */
1900         NULL,                   /* TX_MKDIR_ACL */
1901         NULL,                   /* TX_MKDIR_ATTR */
1902         NULL,                   /* TX_MKDIR_ACL_ATTR */
1903         NULL,                   /* TX_WRITE2 */
1904 };
1905 
1906 /*
1907  * ZIL get_data callbacks
1908  */
1909 
1910 /* ARGSUSED */
1911 static void
1912 ztest_get_done(zgd_t *zgd, int error)
1913 {
1914         ztest_ds_t *zd = zgd->zgd_private;
1915         uint64_t object = zgd->zgd_rl->rl_object;
1916 
1917         if (zgd->zgd_db)
1918                 dmu_buf_rele(zgd->zgd_db, zgd);
1919 
1920         ztest_range_unlock(zgd->zgd_rl);
1921         ztest_object_unlock(zd, object);
1922 
1923         umem_free(zgd, sizeof (*zgd));
1924 }
1925 
1926 static int
1927 ztest_get_data(void *arg, lr_write_t *lr, char *buf, struct lwb *lwb,
1928     zio_t *zio)
1929 {
1930         ztest_ds_t *zd = arg;
1931         objset_t *os = zd->zd_os;
1932         uint64_t object = lr->lr_foid;
1933         uint64_t offset = lr->lr_offset;
1934         uint64_t size = lr->lr_length;
1935         uint64_t txg = lr->lr_common.lrc_txg;
1936         uint64_t crtxg;
1937         dmu_object_info_t doi;
1938         dmu_buf_t *db;
1939         zgd_t *zgd;
1940         int error;
1941 
1942         ASSERT3P(lwb, !=, NULL);
 
2430 
2431         rw_exit(&zd->zd_zilog_lock);
2432         mutex_exit(&zd->zd_dirobj_lock);
2433 }
2434 
2435 /*
2436  * Verify that we can't destroy an active pool, create an existing pool,
2437  * or create a pool with a bad vdev spec.
2438  */
2439 /* ARGSUSED */
2440 void
2441 ztest_spa_create_destroy(ztest_ds_t *zd, uint64_t id)
2442 {
2443         ztest_shared_opts_t *zo = &ztest_opts;
2444         spa_t *spa;
2445         nvlist_t *nvroot;
2446 
2447         /*
2448          * Attempt to create using a bad file.
2449          */
2450         nvroot = make_vdev_root("/dev/bogus", NULL, NULL, 0, 0, 0, 0, 0, 1,
2451             B_FALSE);
2452         VERIFY3U(ENOENT, ==,
2453             spa_create("ztest_bad_file", nvroot, NULL, NULL));
2454         nvlist_free(nvroot);
2455 
2456         /*
2457          * Attempt to create using a bad mirror.
2458          */
2459         nvroot = make_vdev_root("/dev/bogus", NULL, NULL, 0, 0, 0, 0, 2, 1,
2460             B_FALSE);
2461         VERIFY3U(ENOENT, ==,
2462             spa_create("ztest_bad_mirror", nvroot, NULL, NULL));
2463         nvlist_free(nvroot);
2464 
2465         /*
2466          * Attempt to create an existing pool.  It shouldn't matter
2467          * what's in the nvroot; we should fail with EEXIST.
2468          */
2469         rw_enter(&ztest_name_lock, RW_READER);
2470         nvroot = make_vdev_root("/dev/bogus", NULL, NULL, 0, 0, 0, 0, 0, 1,
2471             B_FALSE);
2472         VERIFY3U(EEXIST, ==, spa_create(zo->zo_pool, nvroot, NULL, NULL));
2473         nvlist_free(nvroot);
2474         VERIFY3U(0, ==, spa_open(zo->zo_pool, &spa, FTAG));
2475         VERIFY3U(EBUSY, ==, spa_destroy(zo->zo_pool));
2476         spa_close(spa, FTAG);
2477 
2478         rw_exit(&ztest_name_lock);
2479 }
2480 
2481 /* ARGSUSED */
2482 void
2483 ztest_spa_upgrade(ztest_ds_t *zd, uint64_t id)
2484 {
2485         spa_t *spa;
2486         uint64_t initial_version = SPA_VERSION_INITIAL;
2487         uint64_t version, newversion;
2488         nvlist_t *nvroot, *props;
2489         char *name;
2490 
2491         mutex_enter(&ztest_vdev_lock);
2492         name = kmem_asprintf("%s_upgrade", ztest_opts.zo_pool);
2493 
2494         /*
2495          * Clean up from previous runs.
2496          */
2497         (void) spa_destroy(name);
2498 
2499         nvroot = make_vdev_root(NULL, NULL, name, ztest_opts.zo_vdev_size, 0,
2500             0, ztest_opts.zo_raidz, ztest_opts.zo_mirrors, 1, B_FALSE);
2501 
2502         /*
2503          * If we're configuring a RAIDZ device then make sure that the
2504          * the initial version is capable of supporting that feature.
2505          */
2506         switch (ztest_opts.zo_raidz_parity) {
2507         case 0:
2508         case 1:
2509                 initial_version = SPA_VERSION_INITIAL;
2510                 break;
2511         case 2:
2512                 initial_version = SPA_VERSION_RAIDZ2;
2513                 break;
2514         case 3:
2515                 initial_version = SPA_VERSION_RAIDZ3;
2516                 break;
2517         }
2518 
2519         /*
2520          * Create a pool with a spa version that can be upgraded. Pick
 
2624                  * and destroying a dataset. Removing the slog will
2625                  * grab a reference on the dataset which may cause
2626                  * dmu_objset_destroy() to fail with EBUSY thus
2627                  * leaving the dataset in an inconsistent state.
2628                  */
2629                 rw_enter(&ztest_name_lock, RW_WRITER);
2630                 error = spa_vdev_remove(spa, guid, B_FALSE);
2631                 rw_exit(&ztest_name_lock);
2632 
2633                 if (error && error != EEXIST)
2634                         fatal(0, "spa_vdev_remove() = %d", error);
2635         } else {
2636                 spa_config_exit(spa, SCL_VDEV, FTAG);
2637 
2638                 /*
2639                  * Make 1/4 of the devices be log devices.
2640                  */
2641                 nvroot = make_vdev_root(NULL, NULL, NULL,
2642                     ztest_opts.zo_vdev_size, 0,
2643                     ztest_random(4) == 0, ztest_opts.zo_raidz,
2644                     zs->zs_mirrors, 1, B_FALSE);
2645 
2646                 error = spa_vdev_add(spa, nvroot);
2647                 nvlist_free(nvroot);
2648 
2649                 if (error == ENOSPC)
2650                         ztest_record_enospc("spa_vdev_add");
2651                 else if (error != 0)
2652                         fatal(0, "spa_vdev_add() = %d", error);
2653         }
2654 
2655         mutex_exit(&ztest_vdev_lock);
2656 }
2657 
2658 /*
2659  * Verify that adding/removing aux devices (l2arc, hot spare) works as expected.
2660  */
2661 /* ARGSUSED */
2662 void
2663 ztest_vdev_aux_add_remove(ztest_ds_t *zd, uint64_t id)
2664 {
 
2699                             ztest_opts.zo_dir, ztest_opts.zo_pool, aux,
2700                             zs->zs_vdev_aux);
2701                         for (c = 0; c < sav->sav_count; c++)
2702                                 if (strcmp(sav->sav_vdevs[c]->vdev_path,
2703                                     path) == 0)
2704                                         break;
2705                         if (c == sav->sav_count &&
2706                             vdev_lookup_by_path(rvd, path) == NULL)
2707                                 break;
2708                         zs->zs_vdev_aux++;
2709                 }
2710         }
2711 
2712         spa_config_exit(spa, SCL_VDEV, FTAG);
2713 
2714         if (guid == 0) {
2715                 /*
2716                  * Add a new device.
2717                  */
2718                 nvlist_t *nvroot = make_vdev_root(NULL, aux, NULL,
2719                     (ztest_opts.zo_vdev_size * 5) / 4, 0, 0, 0, 0, 1, B_FALSE);
2720                 error = spa_vdev_add(spa, nvroot);
2721                 if (error != 0)
2722                         fatal(0, "spa_vdev_add(%p) = %d", nvroot, error);
2723                 nvlist_free(nvroot);
2724         } else {
2725                 /*
2726                  * Remove an existing device.  Sometimes, dirty its
2727                  * vdev state first to make sure we handle removal
2728                  * of devices that have pending state changes.
2729                  */
2730                 if (ztest_random(2) == 0)
2731                         (void) vdev_online(spa, guid, 0, NULL);
2732 
2733                 error = spa_vdev_remove(spa, guid, B_FALSE);
2734                 if (error != 0 && error != EBUSY)
2735                         fatal(0, "spa_vdev_remove(%llu) = %d", guid, error);
2736         }
2737 
2738         mutex_exit(&ztest_vdev_lock);
2739 }
 
2845         spa_t *spa = ztest_spa;
2846         spa_aux_vdev_t *sav = &spa->spa_spares;
2847         vdev_t *rvd = spa->spa_root_vdev;
2848         vdev_t *oldvd, *newvd, *pvd;
2849         nvlist_t *root;
2850         uint64_t leaves;
2851         uint64_t leaf, top;
2852         uint64_t ashift = ztest_get_ashift();
2853         uint64_t oldguid, pguid;
2854         uint64_t oldsize, newsize;
2855         char oldpath[MAXPATHLEN], newpath[MAXPATHLEN];
2856         int replacing;
2857         int oldvd_has_siblings = B_FALSE;
2858         int newvd_is_spare = B_FALSE;
2859         int oldvd_is_log;
2860         int error, expected_error;
2861 
2862         mutex_enter(&ztest_vdev_lock);
2863         leaves = MAX(zs->zs_mirrors, 1) * ztest_opts.zo_raidz;
2864 
2865         spa_config_enter(spa, SCL_VDEV, FTAG, RW_READER);
2866 
2867         /*
2868          * Decide whether to do an attach or a replace.
2869          */
2870         replacing = ztest_random(2);
2871 
2872         /*
2873          * Pick a random top-level vdev.
2874          */
2875         top = ztest_random_vdev_top(spa, B_TRUE);
2876 
2877         /*
2878          * Pick a random leaf within it.
2879          */
2880         leaf = ztest_random(leaves);
2881 
2882         /*
2883          * Locate this vdev.
2884          */
2885         oldvd = rvd->vdev_child[top];
2886         if (zs->zs_mirrors >= 1) {
2887                 ASSERT(oldvd->vdev_ops == &vdev_mirror_ops);
 
2898          * If we're already doing an attach or replace, oldvd may be a
2899          * mirror vdev -- in which case, pick a random child.
2900          */
2901         while (oldvd->vdev_children != 0) {
2902                 oldvd_has_siblings = B_TRUE;
2903                 ASSERT(oldvd->vdev_children >= 2);
2904                 oldvd = oldvd->vdev_child[ztest_random(oldvd->vdev_children)];
2905         }
2906 
2907         oldguid = oldvd->vdev_guid;
2908         oldsize = vdev_get_min_asize(oldvd);
2909         oldvd_is_log = oldvd->vdev_top->vdev_islog;
2910         (void) strcpy(oldpath, oldvd->vdev_path);
2911         pvd = oldvd->vdev_parent;
2912         pguid = pvd->vdev_guid;
2913 
2914         /*
2915          * If oldvd has siblings, then half of the time, detach it.
2916          */
2917         if (oldvd_has_siblings && ztest_random(2) == 0) {
2918                 spa_config_exit(spa, SCL_VDEV, FTAG);
2919                 error = spa_vdev_detach(spa, oldguid, pguid, B_FALSE);
2920                 if (error != 0 && error != ENODEV && error != EBUSY &&
2921                     error != ENOTSUP)
2922                         fatal(0, "detach (%s) returned %d", oldpath, error);
2923                 mutex_exit(&ztest_vdev_lock);
2924                 return;
2925         }
2926 
2927         /*
2928          * For the new vdev, choose with equal probability between the two
2929          * standard paths (ending in either 'a' or 'b') or a random hot spare.
2930          */
2931         if (sav->sav_count != 0 && ztest_random(3) == 0) {
2932                 newvd = sav->sav_vdevs[ztest_random(sav->sav_count)];
2933                 newvd_is_spare = B_TRUE;
2934                 (void) strcpy(newpath, newvd->vdev_path);
2935         } else {
2936                 (void) snprintf(newpath, sizeof (newpath), ztest_dev_template,
2937                     ztest_opts.zo_dir, ztest_opts.zo_pool,
2938                     top * leaves + leaf);
2939                 if (ztest_random(2) == 0)
2940                         newpath[strlen(newpath) - 1] = 'b';
2941                 newvd = vdev_lookup_by_path(rvd, newpath);
2942         }
2943 
2944         if (newvd) {
2945                 newsize = vdev_get_min_asize(newvd);
2946         } else {
2947                 /*
2948                  * Make newsize a little bigger or smaller than oldsize.
2949                  * If it's smaller, the attach should fail.
2950                  * If it's larger, and we're doing a replace,
2951                  * we should get dynamic LUN growth when we're done.
2952                  */
2953                 newsize = 10 * oldsize / (9 + ztest_random(3));
2954         }
2955 
2956         /*
2957          * If pvd is not a mirror or root, the attach should fail with ENOTSUP,
2958          * unless it's a replace; in that case any non-replacing parent is OK.
2959          *
2960          * If newvd is already part of the pool, it should fail with EBUSY.
2961          *
2962          * If newvd is too small, it should fail with EOVERFLOW.
2963          */
2964         if (pvd->vdev_ops != &vdev_mirror_ops &&
2965             pvd->vdev_ops != &vdev_root_ops && (!replacing ||
2966             pvd->vdev_ops == &vdev_replacing_ops ||
2967             pvd->vdev_ops == &vdev_spare_ops))
2968                 expected_error = ENOTSUP;
2969         else if (newvd_is_spare && (!replacing || oldvd_is_log))
2970                 expected_error = ENOTSUP;
2971         else if (newvd == oldvd)
2972                 expected_error = replacing ? 0 : EBUSY;
2973         else if (vdev_lookup_by_path(rvd, newpath) != NULL)
2974                 expected_error = EBUSY;
2975         else if (newsize < oldsize)
2976                 expected_error = EOVERFLOW;
2977         else if (ashift > oldvd->vdev_top->vdev_ashift)
2978                 expected_error = EDOM;
2979         else
2980                 expected_error = 0;
2981 
2982         spa_config_exit(spa, SCL_VDEV, FTAG);
2983 
2984         /*
2985          * Build the nvlist describing newpath.
2986          */
2987         root = make_vdev_root(newpath, NULL, NULL, newvd == NULL ? newsize : 0,
2988             ashift, 0, 0, 0, 1, replacing ? oldvd->vdev_isspecial : B_FALSE);
2989 
2990         error = spa_vdev_attach(spa, oldguid, root, replacing);
2991 
2992         nvlist_free(root);
2993 
2994         /*
2995          * If our parent was the replacing vdev, but the replace completed,
2996          * then instead of failing with ENOTSUP we may either succeed,
2997          * fail with ENODEV, or fail with EOVERFLOW.
2998          */
2999         if (expected_error == ENOTSUP &&
3000             (error == 0 || error == ENODEV || error == EOVERFLOW))
3001                 expected_error = error;
3002 
3003         /*
3004          * If someone grew the LUN, the replacement may be too small.
3005          */
3006         if (error == EOVERFLOW || error == EBUSY)
3007                 expected_error = error;
3008 
3009         /* XXX workaround 6690467 */
3010         if (error != expected_error && expected_error != EBUSY) {
3011                 fatal(0, "attach (%s %llu, %s %llu, %d) "
3012                     "returned %d, expected %d",
3013                     oldpath, oldsize, newpath,
3014                     newsize, replacing, error, expected_error);
3015         }
3016 
3017         mutex_exit(&ztest_vdev_lock);
3018 }
3019 
3020 /*
3021  * Callback function which expands the physical size of the vdev.
3022  */
3023 vdev_t *
3024 grow_vdev(vdev_t *vd, void *arg)
3025 {
3026         spa_t *spa = vd->vdev_spa;
3027         size_t *newsize = arg;
3028         size_t fsize;
3029         int fd;
3030 
3031         ASSERT(spa_config_held(spa, SCL_STATE, RW_READER) == SCL_STATE);
3032         ASSERT(vd->vdev_ops->vdev_op_leaf);
3033 
3034         if ((fd = open(vd->vdev_path, O_RDWR)) == -1)
3035                 return (vd);
3036 
3037         fsize = lseek(fd, 0, SEEK_END);
3038         (void) ftruncate(fd, *newsize);
3039 
 
3086          * still talking to the original vdev. It's possible this
3087          * vdev may have been detached/replaced while we were
3088          * trying to online it.
3089          */
3090         if (generation != spa->spa_config_generation) {
3091                 if (ztest_opts.zo_verbose >= 5) {
3092                         (void) printf("vdev configuration has changed, "
3093                             "guid %llu, state %llu, expected gen %llu, "
3094                             "got gen %llu\n",
3095                             (u_longlong_t)guid,
3096                             (u_longlong_t)tvd->vdev_state,
3097                             (u_longlong_t)generation,
3098                             (u_longlong_t)spa->spa_config_generation);
3099                 }
3100                 return (vd);
3101         }
3102         return (NULL);
3103 }
3104 
3105 /*
3106  * Callback function which checks that the given vdev is
3107  * - not a part of replacing group
3108  * - not being removed
3109  * - healthy
3110  */
3111 /* ARGSUSED */
3112 vdev_t *
3113 check_valid_vdev(vdev_t *vd, void *arg)
3114 {
3115         spa_t *spa = vd->vdev_spa;
3116 
3117         ASSERT(spa_config_held(spa, SCL_STATE, RW_READER) == SCL_STATE);
3118         ASSERT(vd->vdev_ops->vdev_op_leaf);
3119 
3120         if (vd->vdev_parent->vdev_ops == &vdev_replacing_ops ||
3121             vd->vdev_removing || vd->vdev_state != VDEV_STATE_HEALTHY)
3122                 return (NULL);
3123 
3124         return (vd);
3125 }
3126 
3127 /*
3128  * Traverse the vdev tree calling the supplied function.
3129  * We continue to walk the tree until we either have walked all
3130  * children or we receive a non-NULL return from the callback.
3131  * If a NULL callback is passed, then we just return back the first
3132  * leaf vdev we encounter.
3133  */
3134 vdev_t *
3135 vdev_walk_tree(vdev_t *vd, vdev_t *(*func)(vdev_t *, void *), void *arg)
3136 {
3137         if (vd->vdev_ops->vdev_op_leaf) {
3138                 if (func == NULL)
3139                         return (vd);
3140                 else
3141                         return (func(vd, arg));
3142         }
3143 
3144         for (uint_t c = 0; c < vd->vdev_children; c++) {
3145                 vdev_t *cvd = vd->vdev_child[c];
3146                 if ((cvd = vdev_walk_tree(cvd, func, arg)) != NULL)
3147                         return (cvd);
 
3150 }
3151 
3152 /*
3153  * Verify that dynamic LUN growth works as expected.
3154  */
3155 /* ARGSUSED */
3156 void
3157 ztest_vdev_LUN_growth(ztest_ds_t *zd, uint64_t id)
3158 {
3159         spa_t *spa = ztest_spa;
3160         vdev_t *vd, *tvd;
3161         metaslab_class_t *mc;
3162         metaslab_group_t *mg;
3163         size_t psize, newsize;
3164         uint64_t top;
3165         uint64_t old_class_space, new_class_space, old_ms_count, new_ms_count;
3166 
3167         mutex_enter(&ztest_vdev_lock);
3168         spa_config_enter(spa, SCL_STATE, spa, RW_READER);
3169 
3170         top = ztest_random_vdev_top(spa, B_TRUE);
3171 
3172         tvd = spa->spa_root_vdev->vdev_child[top];
3173         mg = tvd->vdev_mg;
3174         mc = mg->mg_class;
3175         old_ms_count = tvd->vdev_ms_count;
3176         old_class_space = metaslab_class_get_space(mc);
3177 
3178         /*
3179          * Determine the size of the first leaf vdev associated with
3180          * our top-level device.
3181          */
3182         vd = vdev_walk_tree(tvd, NULL, NULL);
3183         ASSERT3P(vd, !=, NULL);
3184         ASSERT(vd->vdev_ops->vdev_op_leaf);
3185 
3186         psize = vd->vdev_psize;
3187 
3188         /*
3189          * We only try to expand the vdev if it's healthy, less than 4x its
 
3241 
3242         spa_config_enter(spa, SCL_STATE, spa, RW_READER);
3243 
3244         tvd = spa->spa_root_vdev->vdev_child[top];
3245         new_ms_count = tvd->vdev_ms_count;
3246         new_class_space = metaslab_class_get_space(mc);
3247 
3248         if (tvd->vdev_mg != mg || mg->mg_class != mc) {
3249                 if (ztest_opts.zo_verbose >= 5) {
3250                         (void) printf("Could not verify LUN expansion due to "
3251                             "intervening vdev offline or remove.\n");
3252                 }
3253                 spa_config_exit(spa, SCL_STATE, spa);
3254                 mutex_exit(&ztest_vdev_lock);
3255                 return;
3256         }
3257 
3258         /*
3259          * Make sure we were able to grow the vdev.
3260          */
3261         if (new_ms_count <= old_ms_count)
3262                 fatal(0, "LUN expansion failed: ms_count %llu <= %llu\n",
3263                     old_ms_count, new_ms_count);
3264 
3265         /*
3266          * Make sure we were able to grow the pool.
3267          */
3268         if (new_class_space <= old_class_space)
3269                 fatal(0, "LUN expansion failed: class_space %llu <= %llu\n",
3270                     old_class_space, new_class_space);
3271 
3272         if (ztest_opts.zo_verbose >= 5) {
3273                 char oldnumbuf[NN_NUMBUF_SZ], newnumbuf[NN_NUMBUF_SZ];
3274 
3275                 nicenum(old_class_space, oldnumbuf, sizeof (oldnumbuf));
3276                 nicenum(new_class_space, newnumbuf, sizeof (newnumbuf));
3277                 (void) printf("%s grew from %s to %s\n",
3278                     spa->spa_name, oldnumbuf, newnumbuf);
3279         }
3280 
3281         spa_config_exit(spa, SCL_STATE, spa);
3282         mutex_exit(&ztest_vdev_lock);
3283 }
3284 
3285 /*
3286  * Verify that dmu_objset_{create,destroy,open,close} work as expected.
3287  */
3288 /* ARGSUSED */
3289 static void
3290 ztest_objset_create_cb(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx)
 
4723 ztest_dsl_prop_get_set(ztest_ds_t *zd, uint64_t id)
4724 {
4725         zfs_prop_t proplist[] = {
4726                 ZFS_PROP_CHECKSUM,
4727                 ZFS_PROP_COMPRESSION,
4728                 ZFS_PROP_COPIES,
4729                 ZFS_PROP_DEDUP
4730         };
4731 
4732         rw_enter(&ztest_name_lock, RW_READER);
4733 
4734         for (int p = 0; p < sizeof (proplist) / sizeof (proplist[0]); p++)
4735                 (void) ztest_dsl_prop_set_uint64(zd->zd_name, proplist[p],
4736                     ztest_random_dsl_prop(proplist[p]), (int)ztest_random(2));
4737 
4738         rw_exit(&ztest_name_lock);
4739 }
4740 
4741 /* ARGSUSED */
4742 void
4743 ztest_spa_prop_get_set(ztest_ds_t *zd, uint64_t id)
4744 {
4745         nvlist_t *props = NULL;
4746 
4747         rw_enter(&ztest_name_lock, RW_READER);
4748 
4749         (void) ztest_spa_prop_set_uint64(ZPOOL_PROP_DEDUPDITTO,
4750             ZIO_DEDUPDITTO_MIN + ztest_random(ZIO_DEDUPDITTO_MIN));
4751 
4752         VERIFY0(spa_prop_get(ztest_spa, &props));
4753 
4754         if (ztest_opts.zo_verbose >= 6)
4755                 dump_nvlist(props, 4);
4756 
4757         nvlist_free(props);
4758 
4759         rw_exit(&ztest_name_lock);
4760 }
4761 
4762 /* vdev and cos property tests */
4763 typedef enum {
4764         VDEV_PROP_UINT64,
4765         VDEV_PROP_STRING,
4766         COS_PROP_UINT64
4767 } ztest_prop_t;
4768 
4769 /* common functions */
4770 static vdev_t *
4771 ztest_get_random_vdev_leaf(spa_t *spa)
4772 {
4773         vdev_t *lvd = NULL, *tvd = NULL;
4774         uint64_t top = 0;
4775 
4776         spa_config_enter(spa, SCL_ALL, FTAG, RW_READER);
4777 
4778         for (;;) {
4779                 /* Pick a leaf of a random top-level vdev */
4780                 top = ztest_random_vdev_top(spa, B_TRUE);
4781                 tvd = spa->spa_root_vdev->vdev_child[top];
4782                 lvd = vdev_walk_tree(tvd, check_valid_vdev, NULL);
4783                 if (lvd == NULL) {
4784                         /*
4785                          * We cannot  return NULL and no reasons to crash.
4786                          * Just let other threads to finish their work and
4787                          * maybe next time we will have leaf-vdev
4788                          */
4789                         spa_config_exit(spa, SCL_ALL, FTAG);
4790                         (void) poll(NULL, 0, 100);
4791                         spa_config_enter(spa, SCL_ALL, FTAG, RW_READER);
4792                         continue;
4793                 }
4794 
4795                 ASSERT(lvd->vdev_ops->vdev_op_leaf);
4796                 break;
4797         }
4798 
4799         spa_config_exit(spa, SCL_ALL, FTAG);
4800 
4801         return (lvd);
4802 }
4803 
4804 #define ZTEST_COS_NAME          "ztest_cos"
4805 
4806 /*ARGSUSED*/
4807 static nvlist_t *
4808 ztest_props_set(const vdev_t *lvd, const char *name, const ztest_prop_t t,
4809     const void *props, const size_t size)
4810 {
4811         spa_t *spa = ztest_spa;
4812         nvlist_t *sprops;
4813         int error = 0;
4814 
4815         VERIFY(0 == nvlist_alloc(&sprops, NV_UNIQUE_NAME, 0));
4816 
4817         for (int p = 0; p < size; p++) {
4818                 uint64_t ival;
4819                 char sval[16];
4820                 const char *pname =
4821                     (t == VDEV_PROP_UINT64 || t == VDEV_PROP_STRING) ?
4822                     vdev_prop_to_name(((vdev_prop_t *)props)[p]) :
4823                     cos_prop_to_name(((cos_prop_t *)props)[p]);
4824 
4825                 switch (t) {
4826                 case VDEV_PROP_UINT64:
4827                 case COS_PROP_UINT64:
4828                         /* range 0...10 is valid for all properties */
4829                         ival = ztest_random(10) + 1;
4830                         VERIFY(0 == nvlist_add_uint64(sprops, pname, ival));
4831                         break;
4832                 case VDEV_PROP_STRING:
4833                         /* use a well known name for cos property */
4834                         if (((vdev_prop_t *)props)[p] == VDEV_PROP_COS) {
4835                                 (void) snprintf(sval, 15, "%s", ZTEST_COS_NAME);
4836                         } else {
4837                                 /* any short string will do */
4838                                 (void) snprintf(sval, 15, "prop_value%d", p);
4839                         }
4840                         VERIFY(0 == nvlist_add_string(sprops, pname, sval));
4841                         break;
4842                 default:
4843                         /* unknown property */
4844                         error = EINVAL;
4845                         break;
4846                 }
4847         }
4848         VERIFY3U(0, ==, error);
4849 
4850         /* set the props */
4851         switch (t) {
4852         case VDEV_PROP_UINT64:
4853         case VDEV_PROP_STRING:
4854                 error = spa_vdev_prop_set(spa, lvd->vdev_guid, sprops);
4855                 break;
4856         case COS_PROP_UINT64:
4857                 error = spa_cos_prop_set(spa, name, sprops);
4858                 break;
4859         default:
4860                 error = EINVAL;
4861                 break;
4862         }
4863         if (error == ENOSPC) {
4864                 ztest_record_enospc(FTAG);
4865                 nvlist_free(sprops);
4866                 return (NULL);
4867         }
4868         ASSERT0(error);
4869         return (sprops);
4870 }
4871 
4872 static nvlist_t *
4873 ztest_props_get(const vdev_t *lvd, const char *name)
4874 {
4875         spa_t *spa = ztest_spa;
4876         nvlist_t *gprops = NULL;
4877         int error = 0;
4878 
4879         if (lvd)
4880                 error = spa_vdev_prop_get(spa, lvd->vdev_guid, &gprops);
4881         else
4882                 error = spa_cos_prop_get(spa, name, &gprops);
4883         if (error == ENOSPC) {
4884                 ztest_record_enospc(FTAG);
4885                 return (NULL);
4886         }
4887         ASSERT0(error);
4888         return (gprops);
4889 }
4890 
4891 static void
4892 ztest_props_test(const ztest_prop_t t, const void *props, const size_t size,
4893     nvlist_t *sprops, nvlist_t *gprops)
4894 {
4895         for (int p = 0; p < size; p++) {
4896                 const char *pname =
4897                     (t == VDEV_PROP_UINT64 || t == VDEV_PROP_STRING) ?
4898                     vdev_prop_to_name(((vdev_prop_t *)props)[p]) :
4899                     cos_prop_to_name(((cos_prop_t *)props)[p]);
4900 
4901                 switch (t) {
4902                 case VDEV_PROP_UINT64:
4903                 case COS_PROP_UINT64:
4904                 {
4905                         uint64_t sival, gival;
4906                         VERIFY3U(0, ==, nvlist_lookup_uint64(sprops, pname,
4907                             &sival));
4908                         VERIFY3U(0, ==, nvlist_lookup_uint64(gprops, pname,
4909                             &gival));
4910                         VERIFY3U(gival, ==, sival);
4911                 }
4912                 break;
4913                 case VDEV_PROP_STRING:
4914                 {
4915                         char *ssval, *gsval;
4916                         VERIFY3U(0, ==, nvlist_lookup_string(sprops, pname,
4917                             &ssval));
4918                         VERIFY3U(0, ==, nvlist_lookup_string(gprops, pname,
4919                             &gsval));
4920                         VERIFY3U(0, ==, strcmp(ssval, gsval));
4921                 }
4922                 break;
4923                 default:
4924                         /* unknown property */
4925                         VERIFY(0);
4926                         break;
4927                 }
4928         }
4929 
4930         nvlist_free(sprops);
4931         nvlist_free(gprops);
4932 }
4933 
4934 static const cos_prop_t cprops_uint64[] = {
4935         COS_PROP_READ_MINACTIVE,
4936         COS_PROP_AREAD_MINACTIVE,
4937         COS_PROP_WRITE_MINACTIVE,
4938         COS_PROP_AWRITE_MINACTIVE,
4939         COS_PROP_SCRUB_MINACTIVE,
4940         COS_PROP_RESILVER_MINACTIVE,
4941         COS_PROP_READ_MAXACTIVE,
4942         COS_PROP_AREAD_MAXACTIVE,
4943         COS_PROP_WRITE_MAXACTIVE,
4944         COS_PROP_AWRITE_MAXACTIVE,
4945         COS_PROP_SCRUB_MAXACTIVE,
4946         COS_PROP_RESILVER_MAXACTIVE,
4947         COS_PROP_PREFERRED_READ
4948 };
4949 
4950 /* ARGSUSED */
4951 void
4952 ztest_cos_prop_get_set(ztest_ds_t *zd, uint64_t id)
4953 {
4954         spa_t *spa = ztest_spa;
4955         nvlist_t *sprops = NULL, *gprops = NULL, *cos_list = NULL;
4956         char cos_name[MAXCOSNAMELEN];
4957         const char *pname = NULL;
4958         char *sval = NULL;
4959         uint64_t cos_id = ztest_random(~0ULL), val = 0;
4960         vdev_t *lvd = NULL;
4961 
4962         (void) snprintf(cos_name, MAXCOSNAMELEN-1, "cos_%llu", cos_id);
4963 
4964         mutex_enter(&ztest_props_lock);
4965 
4966         VERIFY3U(0, ==, spa_alloc_cos(spa, cos_name, cos_id));
4967 
4968         sprops = ztest_props_set(NULL, cos_name,
4969             COS_PROP_UINT64, (void *)&cprops_uint64[0],
4970             sizeof (cprops_uint64) / sizeof (cprops_uint64[0]));
4971         gprops = ztest_props_get(NULL, cos_name);
4972         ztest_props_test(COS_PROP_UINT64, (void *)&cprops_uint64[0],
4973             sizeof (cprops_uint64) / sizeof (cprops_uint64[0]),
4974             sprops, gprops);
4975 
4976         VERIFY3U(0, ==, nvlist_alloc(&cos_list, NV_UNIQUE_NAME, 0));
4977         VERIFY3U(0, ==, spa_list_cos(spa, cos_list));
4978         VERIFY3U(0, ==, nvlist_lookup_uint64(cos_list, cos_name, &val));
4979         VERIFY3U(cos_id, ==, val);
4980         nvlist_free(cos_list);
4981 
4982         VERIFY3U(0, ==, spa_free_cos(spa, cos_name, B_FALSE));
4983         VERIFY3U(0, ==, nvlist_alloc(&cos_list, NV_UNIQUE_NAME, 0));
4984         VERIFY3U(0, ==, spa_list_cos(spa, cos_list));
4985         VERIFY3U(ENOENT, ==, nvlist_lookup_uint64(cos_list, cos_name, &val));
4986         nvlist_free(cos_list);
4987 
4988         /*
4989          * force spa_free_cos() test
4990          * - allocate cos property, set vdev's cos, then free cos forcefuly
4991          * - verify everything succeeds
4992          * - verify no cos property on vdev
4993          * - verify no cos descriptor remains
4994          */
4995         VERIFY3U(0, ==, spa_alloc_cos(spa, cos_name, cos_id));
4996 
4997         /* Make sure vdevs will stay in place */
4998         mutex_enter(&ztest_vdev_lock);
4999 
5000         lvd = ztest_get_random_vdev_leaf(spa);
5001 
5002         VERIFY(0 == nvlist_alloc(&sprops, NV_UNIQUE_NAME, 0));
5003 
5004         pname = vdev_prop_to_name(VDEV_PROP_COS);
5005         VERIFY3U(0, ==, nvlist_add_string(sprops, pname, cos_name));
5006         VERIFY3U(0, ==, spa_vdev_prop_set(spa, lvd->vdev_guid, sprops));
5007 
5008         VERIFY3U(0, ==, spa_free_cos(spa, cos_name, B_TRUE));
5009 
5010         VERIFY3U(0, ==, spa_vdev_prop_get(spa, lvd->vdev_guid, &gprops));
5011 
5012         mutex_exit(&ztest_vdev_lock);
5013 
5014         /* verify the vdev cos prop gone */
5015         VERIFY3U(ENOENT, ==, nvlist_lookup_string(gprops, cos_name, &sval));
5016 
5017         /* verify the cos descriptor gone */
5018         VERIFY3U(0, ==, nvlist_alloc(&cos_list, NV_UNIQUE_NAME, 0));
5019         VERIFY3U(0, ==, spa_list_cos(spa, cos_list));
5020         VERIFY3U(ENOENT, ==, nvlist_lookup_uint64(cos_list, cos_name, &val));
5021 
5022         mutex_exit(&ztest_props_lock);
5023 
5024         nvlist_free(cos_list);
5025 }
5026 
5027 /* vdev tests */
5028 static const vdev_prop_t vprops_uint64[] = {
5029         VDEV_PROP_READ_MINACTIVE,
5030         VDEV_PROP_AREAD_MINACTIVE,
5031         VDEV_PROP_WRITE_MINACTIVE,
5032         VDEV_PROP_AWRITE_MINACTIVE,
5033         VDEV_PROP_SCRUB_MINACTIVE,
5034         VDEV_PROP_RESILVER_MINACTIVE,
5035         VDEV_PROP_READ_MAXACTIVE,
5036         VDEV_PROP_AREAD_MAXACTIVE,
5037         VDEV_PROP_WRITE_MAXACTIVE,
5038         VDEV_PROP_AWRITE_MAXACTIVE,
5039         VDEV_PROP_SCRUB_MAXACTIVE,
5040         VDEV_PROP_RESILVER_MAXACTIVE,
5041         VDEV_PROP_PREFERRED_READ
5042 };
5043 static const vdev_prop_t vprops_string[] = {
5044         VDEV_PROP_COS,
5045         VDEV_PROP_SPAREGROUP
5046 };
5047 
5048 static void
5049 ztest_cos_free(spa_t *spa, vdev_t *lvd, const char *name)
5050 {
5051         nvlist_t *sprops = NULL;
5052         int error = 0;
5053         VERIFY(0 == nvlist_alloc(&sprops, NV_UNIQUE_NAME, 0));
5054         VERIFY(0 == nvlist_add_string(sprops,
5055             vdev_prop_to_name(VDEV_PROP_COS), ""));
5056         VERIFY3U(0, ==, spa_vdev_prop_set(spa, lvd->vdev_guid, sprops));
5057         /*
5058          * this can be called in cleanup code paths when we do not know
5059          * if CoS was allocated
5060          */
5061         error = spa_free_cos(spa, name, B_TRUE);
5062         if (error)
5063                 VERIFY3U(error, ==, ENOENT);
5064         nvlist_free(sprops);
5065 }
5066 
5067 /* ARGSUSED */
5068 void
5069 ztest_vdev_prop_get_set(ztest_ds_t *zd, uint64_t id)
5070 {
5071         spa_t *spa = ztest_spa;
5072         nvlist_t *sprops = NULL, *gprops = NULL;
5073         vdev_t *lvd = NULL;
5074         int error = 0;
5075         /* Make sure vdevs will stay in place */
5076         mutex_enter(&ztest_props_lock);
5077 
5078         mutex_enter(&ztest_vdev_lock);
5079 
5080         lvd = ztest_get_random_vdev_leaf(spa);
5081 
5082         /* Test uint64 properties */
5083         sprops = ztest_props_set(lvd, NULL, VDEV_PROP_UINT64,
5084             (void *)&vprops_uint64[0],
5085             sizeof (vprops_uint64) / sizeof (vprops_uint64[0]));
5086         gprops = ztest_props_get(lvd, NULL);
5087         ztest_props_test(VDEV_PROP_UINT64, (void *)&vprops_uint64[0],
5088             sizeof (vprops_uint64) / sizeof (vprops_uint64[0]), sprops, gprops);
5089 
5090         /* Test string properties */
5091         /* Allocate CoS descriptor to have vdev-set of cos succeed */
5092         error = spa_alloc_cos(spa, ZTEST_COS_NAME, 0);
5093         if (error)
5094                 VERIFY3U(error, ==, EEXIST);
5095 
5096         sprops = ztest_props_set(lvd, NULL, VDEV_PROP_STRING,
5097             (void *)&vprops_string[0],
5098             sizeof (vprops_string) / sizeof (vprops_string[0]));
5099         gprops = ztest_props_get(lvd, NULL);
5100         ztest_props_test(VDEV_PROP_STRING, (void *)&vprops_string[0],
5101             sizeof (vprops_string) / sizeof (vprops_string[0]), sprops, gprops);
5102 
5103         /* Done, free cos to avoid collisions with other tests */
5104         ztest_cos_free(spa, lvd, ZTEST_COS_NAME);
5105 
5106         mutex_exit(&ztest_vdev_lock);
5107 
5108         mutex_exit(&ztest_props_lock);
5109 }
5110 
5111 /* end vdev and cos property tests */
5112 
5113 static int
5114 user_release_one(const char *snapname, const char *holdname)
5115 {
5116         nvlist_t *snaps, *holds;
5117         int error;
5118 
5119         snaps = fnvlist_alloc();
5120         holds = fnvlist_alloc();
5121         fnvlist_add_boolean(holds, holdname);
5122         fnvlist_add_nvlist(snaps, snapname, holds);
5123         fnvlist_free(holds);
5124         error = dsl_dataset_user_release(snaps, NULL);
5125         fnvlist_free(snaps);
5126         return (error);
5127 }
5128 
5129 /*
5130  * Test snapshot hold/release and deferred destroy.
5131  */
5132 void
 
5323                 /*
5324                  * If the top-level vdev needs to be resilvered
5325                  * then we only allow faults on the device that is
5326                  * resilvering.
5327                  */
5328                 if (vd0 != NULL && maxfaults != 1 &&
5329                     (!vdev_resilver_needed(vd0->vdev_top, NULL, NULL) ||
5330                     vd0->vdev_resilver_txg != 0)) {
5331                         /*
5332                          * Make vd0 explicitly claim to be unreadable,
5333                          * or unwriteable, or reach behind its back
5334                          * and close the underlying fd.  We can do this if
5335                          * maxfaults == 0 because we'll fail and reexecute,
5336                          * and we can do it if maxfaults >= 2 because we'll
5337                          * have enough redundancy.  If maxfaults == 1, the
5338                          * combination of this with injection of random data
5339                          * corruption below exceeds the pool's fault tolerance.
5340                          */
5341                         vdev_file_t *vf = vd0->vdev_tsd;
5342 
5343                         if (vf != NULL && ztest_random(3) == 0) {
5344                                 (void) close(vf->vf_vnode->v_fd);
5345                                 vf->vf_vnode->v_fd = -1;
5346                         } else if (ztest_random(2) == 0) {
5347                                 vd0->vdev_cant_read = B_TRUE;
5348                         } else {
5349                                 vd0->vdev_cant_write = B_TRUE;
5350                         }
5351                         guid0 = vd0->vdev_guid;
5352                 }
5353         } else {
5354                 /*
5355                  * Inject errors on an l2cache device.
5356                  */
5357                 spa_aux_vdev_t *sav = &spa->spa_l2cache;
5358 
5359                 if (sav->sav_count == 0) {
5360                         spa_config_exit(spa, SCL_STATE, FTAG);
5361                         rw_exit(&ztest_name_lock);
5362                         return;
 
5796 
5797         /*
5798          * Get the pool's configuration and guid.
5799          */
5800         VERIFY3U(0, ==, spa_open(oldname, &spa, FTAG));
5801 
5802         /*
5803          * Kick off a scrub to tickle scrub/export races.
5804          */
5805         if (ztest_random(2) == 0)
5806                 (void) spa_scan(spa, POOL_SCAN_SCRUB);
5807 
5808         pool_guid = spa_guid(spa);
5809         spa_close(spa, FTAG);
5810 
5811         ztest_walk_pool_directory("pools before export");
5812 
5813         /*
5814          * Export it.
5815          */
5816         VERIFY3U(0, ==, spa_export(oldname, &config, B_FALSE, B_FALSE,
5817             B_FALSE));
5818 
5819         ztest_walk_pool_directory("pools after export");
5820 
5821         /*
5822          * Try to import it.
5823          */
5824         newconfig = spa_tryimport(config);
5825         ASSERT(newconfig != NULL);
5826         nvlist_free(newconfig);
5827 
5828         /*
5829          * Import it under the new name.
5830          */
5831         error = spa_import(newname, config, NULL, 0);
5832         if (error != 0) {
5833                 dump_nvlist(config, 0);
5834                 fatal(B_FALSE, "couldn't import pool %s as %s: error %u",
5835                     oldname, newname, error);
5836         }
5837 
 
6116 }
6117 
6118 /*
6119  * Kick off threads to run tests on all datasets in parallel.
6120  */
6121 static void
6122 ztest_run(ztest_shared_t *zs)
6123 {
6124         thread_t *tid;
6125         spa_t *spa;
6126         objset_t *os;
6127         thread_t resume_tid;
6128         int error;
6129 
6130         ztest_exiting = B_FALSE;
6131 
6132         /*
6133          * Initialize parent/child shared state.
6134          */
6135         mutex_init(&ztest_vdev_lock, NULL, USYNC_THREAD, NULL);
6136         mutex_init(&ztest_props_lock, NULL, USYNC_THREAD, NULL);
6137         rw_init(&ztest_name_lock, NULL, USYNC_THREAD, NULL);
6138 
6139         zs->zs_thread_start = gethrtime();
6140         zs->zs_thread_stop =
6141             zs->zs_thread_start + ztest_opts.zo_passtime * NANOSEC;
6142         zs->zs_thread_stop = MIN(zs->zs_thread_stop, zs->zs_proc_stop);
6143         zs->zs_thread_kill = zs->zs_thread_stop;
6144         if (ztest_random(100) < ztest_opts.zo_killrate) {
6145                 zs->zs_thread_kill -=
6146                     ztest_random(ztest_opts.zo_passtime * NANOSEC);
6147         }
6148 
6149         mutex_init(&zcl.zcl_callbacks_lock, NULL, USYNC_THREAD, NULL);
6150 
6151         list_create(&zcl.zcl_callbacks, sizeof (ztest_cb_data_t),
6152             offsetof(ztest_cb_data_t, zcd_node));
6153 
6154         /*
6155          * Open our pool.
6156          */
 
6406 
6407         timebuf[0] = '\0';
6408 
6409         if (d)
6410                 (void) sprintf(timebuf,
6411                     "%llud%02lluh%02llum%02llus", d, h, m, s);
6412         else if (h)
6413                 (void) sprintf(timebuf, "%lluh%02llum%02llus", h, m, s);
6414         else if (m)
6415                 (void) sprintf(timebuf, "%llum%02llus", m, s);
6416         else
6417                 (void) sprintf(timebuf, "%llus", s);
6418 }
6419 
6420 static nvlist_t *
6421 make_random_props()
6422 {
6423         nvlist_t *props;
6424 
6425         VERIFY(nvlist_alloc(&props, NV_UNIQUE_NAME, 0) == 0);
6426         switch (ztest_random(5)) {
6427         case 0:
6428                 break;
6429         case 1:
6430                 VERIFY(nvlist_add_uint64(props, "autoreplace", 1) == 0);
6431                 break;
6432         case 2:
6433                 VERIFY(nvlist_add_uint64(props, "enablespecial", 1) == 0);
6434                 VERIFY(nvlist_add_uint64(props, "small_data_to_metadev", 1) ==
6435                     0);
6436                 break;
6437         case 3:
6438                 VERIFY(nvlist_add_uint64(props, "enablespecial", 1) == 0);
6439                 VERIFY(nvlist_add_uint64(props, "autoreplace", 1) == 0);
6440                 break;
6441         case 4:
6442                 VERIFY(nvlist_add_uint64(props, "enablespecial", 1) == 0);
6443                 VERIFY(nvlist_add_uint64(props, "meta_placement", 1) == 0);
6444                 VERIFY(nvlist_add_uint64(props, "zfs_meta_to_metadev", 1) == 0);
6445                 break;
6446         }
6447 
6448         return (props);
6449 }
6450 
6451 static void
6452 set_random_ds_props(char *dsname)
6453 {
6454         uint64_t value = META_PLACEMENT_OFF;
6455 
6456         switch (ztest_random(3)) {
6457         case 0:
6458                 break;
6459         case 1:
6460                 value = META_PLACEMENT_ON;
6461                 break;
6462         case 2:
6463                 value = META_PLACEMENT_DUAL;
6464                 break;
6465         }
6466 
6467         VERIFY(ztest_dsl_prop_set_uint64(dsname, ZFS_PROP_ZPL_META_TO_METADEV,
6468             value, B_TRUE) == 0);
6469 }
6470 
6471 /*
6472  * Create a storage pool with the given name and initial vdev size.
6473  * Then test spa_freeze() functionality.
6474  */
6475 static void
6476 ztest_init(ztest_shared_t *zs)
6477 {
6478         spa_t *spa;
6479         nvlist_t *nvroot, *props;
6480 
6481         mutex_init(&ztest_vdev_lock, NULL, USYNC_THREAD, NULL);
6482         rw_init(&ztest_name_lock, NULL, USYNC_THREAD, NULL);
6483 
6484         kernel_init(FREAD | FWRITE);
6485 
6486         /*
6487          * Create the storage pool.
6488          */
6489         (void) spa_destroy(ztest_opts.zo_pool);
6490         ztest_shared->zs_vdev_next_leaf = 0;
6491         zs->zs_splits = 0;
6492         zs->zs_mirrors = ztest_opts.zo_mirrors;
6493         nvroot = make_vdev_root(NULL, NULL, NULL, ztest_opts.zo_vdev_size, 0,
6494             0, ztest_opts.zo_raidz, zs->zs_mirrors, 1, B_FALSE);
6495         /*
6496          * Add special vdevs
6497          */
6498         add_special_vdevs(nvroot, ztest_opts.zo_vdev_size, ztest_opts.zo_raidz,
6499             zs->zs_mirrors, 1);
6500 
6501         props = make_random_props();
6502         for (int i = 0; i < SPA_FEATURES; i++) {
6503                 char buf[1024];
6504                 (void) snprintf(buf, sizeof (buf), "feature@%s",
6505                     spa_feature_table[i].fi_uname);
6506                 VERIFY3U(0, ==, nvlist_add_uint64(props, buf, 0));
6507         }
6508         VERIFY3U(0, ==, spa_create(ztest_opts.zo_pool, nvroot, props, NULL));
6509         nvlist_free(nvroot);
6510         nvlist_free(props);
6511 
6512         VERIFY3U(0, ==, spa_open(ztest_opts.zo_pool, &spa, FTAG));
6513         zs->zs_metaslab_sz =
6514             1ULL << spa->spa_root_vdev->vdev_child[0]->vdev_ms_shift;
6515 
6516         /* set props on the root dataset */
6517         set_random_ds_props(ztest_opts.zo_pool);
6518 
6519         spa_close(spa, FTAG);
6520 
6521         kernel_fini();
6522 
6523         ztest_run_zdb(ztest_opts.zo_pool);
6524 
6525         ztest_freeze();
6526 
6527         ztest_run_zdb(ztest_opts.zo_pool);
6528 
6529         rw_destroy(&ztest_name_lock);
6530         mutex_destroy(&ztest_props_lock);
6531         mutex_destroy(&ztest_vdev_lock);
6532 }
6533 
6534 static void
6535 setup_data_fd(void)
6536 {
6537         static char ztest_name_data[] = "/tmp/ztest.data.XXXXXX";
6538 
6539         ztest_fd_data = mkstemp(ztest_name_data);
6540         ASSERT3S(ztest_fd_data, >=, 0);
6541         (void) unlink(ztest_name_data);
6542 }
6543 
6544 
6545 static int
6546 shared_data_size(ztest_shared_hdr_t *hdr)
6547 {
6548         int size;
6549 
6550         size = hdr->zh_hdr_size;
 
 |