Print this page
NEX-19083 backport OS-7314 zil_commit should omit cache thrash
9962 zil_commit should omit cache thrash
Reviewed by: Matt Ahrens <matt@delphix.com>
Reviewed by: Brad Lewis <brad.lewis@delphix.com>
Reviewed by: Patrick Mooney <patrick.mooney@joyent.com>
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
Approved by: Joshua M. Clulow <josh@sysmgr.org>
NEX-18069 Unable to get/set VDEV_PROP_RESILVER_MAXACTIVE/VDEV_PROP_RESILVER_MINACTIVE props
Reviewed by: Joyce McIntosh <joyce.mcintosh@nexenta.com>
Reviewed by: Saso Kiselkov <saso.kiselkov@nexenta.com>
Reviewed by: Yuri Pankov <yuri.pankov@nexenta.com>
NEX-9752 backport illumos 6950 ARC should cache compressed data
Reviewed by: Saso Kiselkov <saso.kiselkov@nexenta.com>
Reviewed by: Yuri Pankov <yuri.pankov@nexenta.com>
6950 ARC should cache compressed data
Reviewed by: Prakash Surya <prakash.surya@delphix.com>
Reviewed by: Dan Kimmel <dan.kimmel@delphix.com>
Reviewed by: Matt Ahrens <mahrens@delphix.com>
Reviewed by: Paul Dagnelie <pcd@delphix.com>
Reviewed by: Don Brady <don.brady@intel.com>
Reviewed by: Richard Elling <Richard.Elling@RichardElling.com>
Approved by: Richard Lowe <richlowe@richlowe.net>
NEX-5802 ztest: assertion failed for thread 0xe092a240, thread-id 1173: error == 0 (0x16 == 0x0), file ../ztest.c, line 4816
Reviewed by: Sanjay Nadkarni <sanjay.nadkarni@nexenta.com>
Reviewed by: Yuri Pankov <yuri.pankov@nexenta.com>
NEX-5318 Cleanup specialclass property (obsolete, not used) and fix related meta-to-special case
Reviewed by: Roman Strashkin <roman.strashkin@nexenta.com>
Reviewed by: Alek Pinchuk <alek.pinchuk@nexenta.com>
NEX-4582 update wrc test cases for allow to use write back cache per tree of datasets
Reviewed by: Steve Peng <steve.peng@nexenta.com>
Reviewed by: Alex Aizman <alex.aizman@nexenta.com>
5960 zfs recv should prefetch indirect blocks
5925 zfs receive -o origin=
Reviewed by: Prakash Surya <prakash.surya@delphix.com>
Reviewed by: Matthew Ahrens <mahrens@delphix.com>
5818 zfs {ref}compressratio is incorrect with 4k sector size
Reviewed by: Alex Reece <alex@delphix.com>
Reviewed by: George Wilson <george@delphix.com>
Reviewed by: Richard Elling <richard.elling@richardelling.com>
Reviewed by: Steven Hartland <killing@multiplay.co.uk>
Reviewed by: Don Brady <dev.fs.zfs@gmail.com>
Approved by: Albert Lee <trisk@omniti.com>
NEX-3079 port illumos ARC improvements
OS-115 Heap leaks related to OS-114 and SUP-577
OS-114 Heap leak when exporting/destroying pools with CoS
OS-103 handle CoS descriptor persistent references across vdev operations
OS-102 add man page info and tests for vdev/CoS properties and ZFS meta features
OS-80 support for vdev and CoS properties for the new I/O scheduler
OS-95 lint warning introduced by OS-61
Remaining fixes for the illumos merge
Make special vdev subtree topology the same as regular vdev subtree to simplify testcase setup
Fixup merge issues
Issue #38: various ztest failures
Fixup merge results
re 13748 added zpool export -c option
zpool export -c command exports specified pool while keeping its latest
configuration in the cache file for subsequent zpool import -c.
re #13232 rb4433 ztest assertion failed: oldvd->vdev_ops == &vdev_raidz_ops
re #8279 rb3915 need a mechanism to notify NMS about ZFS config changes (fix lint -courtesy of Yuri Pankov)
re #12584 rb4049 zfsxx latest code merge (fix lint - courtesy of Yuri Pankov)
re #12585 rb4049 ZFS++ work port - refactoring to improve separation of open/closed code, bug fixes, performance improvements - open code

@@ -19,11 +19,11 @@
  * CDDL HEADER END
  */
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  * Copyright (c) 2011, 2016 by Delphix. All rights reserved.
- * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
+ * Copyright 2018 Nexenta Systems, Inc.  All rights reserved.
  * Copyright (c) 2013 Steven Hartland. All rights reserved.
  * Copyright (c) 2014 Integros [integros.com]
  * Copyright 2017 Joyent, Inc.
  * Copyright 2017 RackTop Systems.
  */

@@ -111,10 +111,11 @@
 #include <sys/dsl_scan.h>
 #include <sys/zio_checksum.h>
 #include <sys/refcount.h>
 #include <sys/zfeature.h>
 #include <sys/dsl_userhold.h>
+#include <libzfs.h>
 #include <sys/abd.h>
 #include <stdio.h>
 #include <stdio_ext.h>
 #include <stdlib.h>
 #include <unistd.h>

@@ -125,10 +126,12 @@
 #include <math.h>
 #include <sys/fs/zfs.h>
 #include <libnvpair.h>
 #include <libcmdutils.h>
 
+#include <sys/special.h>
+
 static int ztest_fd_data = -1;
 static int ztest_fd_rand = -1;
 
 typedef struct ztest_shared_hdr {
         uint64_t        zh_hdr_size;

@@ -326,10 +329,12 @@
 ztest_func_t ztest_dmu_prealloc;
 ztest_func_t ztest_fzap;
 ztest_func_t ztest_dmu_snapshot_create_destroy;
 ztest_func_t ztest_dsl_prop_get_set;
 ztest_func_t ztest_spa_prop_get_set;
+ztest_func_t ztest_vdev_prop_get_set;
+ztest_func_t ztest_cos_prop_get_set;
 ztest_func_t ztest_spa_create_destroy;
 ztest_func_t ztest_fault_inject;
 ztest_func_t ztest_ddt_repair;
 ztest_func_t ztest_dmu_snapshot_hold;
 ztest_func_t ztest_spa_rename;

@@ -340,12 +345,10 @@
 ztest_func_t ztest_vdev_add_remove;
 ztest_func_t ztest_vdev_aux_add_remove;
 ztest_func_t ztest_split_pool;
 ztest_func_t ztest_reguid;
 ztest_func_t ztest_spa_upgrade;
-ztest_func_t ztest_device_removal;
-ztest_func_t ztest_remap_blocks;
 
 uint64_t zopt_always = 0ULL * NANOSEC;          /* all the time */
 uint64_t zopt_incessant = 1ULL * NANOSEC / 10;  /* every 1/10 second */
 uint64_t zopt_often = 1ULL * NANOSEC;           /* every second */
 uint64_t zopt_sometimes = 10ULL * NANOSEC;      /* every 10 seconds */

@@ -363,10 +366,12 @@
         { ztest_zil_remount,                    1,      &zopt_sometimes },
         { ztest_dmu_read_write_zcopy,           1,      &zopt_often     },
         { ztest_dmu_objset_create_destroy,      1,      &zopt_often     },
         { ztest_dsl_prop_get_set,               1,      &zopt_often     },
         { ztest_spa_prop_get_set,               1,      &zopt_sometimes },
+        { ztest_vdev_prop_get_set,              1,      &zopt_often     },
+        { ztest_cos_prop_get_set,               1,      &zopt_often     },
 #if 0
         { ztest_dmu_prealloc,                   1,      &zopt_sometimes },
 #endif
         { ztest_fzap,                           1,      &zopt_sometimes },
         { ztest_dmu_snapshot_create_destroy,    1,      &zopt_sometimes },

@@ -383,12 +388,10 @@
         { ztest_vdev_LUN_growth,                1,      &zopt_rarely    },
         { ztest_vdev_add_remove,                1,
             &ztest_opts.zo_vdevtime                             },
         { ztest_vdev_aux_add_remove,            1,
             &ztest_opts.zo_vdevtime                             },
-        { ztest_device_removal,                 1,      &zopt_sometimes },
-        { ztest_remap_blocks,                   1,      &zopt_sometimes }
 };
 
 #define ZTEST_FUNCS     (sizeof (ztest_info) / sizeof (ztest_info_t))
 
 /*

@@ -432,10 +435,16 @@
 static ztest_ds_t *ztest_ds;
 
 static kmutex_t ztest_vdev_lock;
 
 /*
+ * Make sure the "set/get/test" test does not interfere with other
+ * concurrent tests on the same vdev/cos property
+ */
+static kmutex_t ztest_props_lock;
+
+/*
  * The ztest_name_lock protects the pool and dataset namespace used by
  * the individual tests. To modify the namespace, consumers must grab
  * this lock as writer. Grabbing the lock as reader will ensure that the
  * namespace does not change while the lock is held.
  */

@@ -787,14 +796,14 @@
         zs->zs_alloc = metaslab_class_get_alloc(spa_normal_class(ztest_spa));
         zs->zs_space = metaslab_class_get_space(spa_normal_class(ztest_spa));
 
         /*
          * Before we kill off ztest, make sure that the config is updated.
-         * See comment above spa_write_cachefile().
+         * See comment above spa_config_sync().
          */
         mutex_enter(&spa_namespace_lock);
-        spa_write_cachefile(ztest_spa, B_FALSE, B_FALSE);
+        spa_config_sync(ztest_spa, B_FALSE, B_FALSE);
         mutex_exit(&spa_namespace_lock);
 
         zfs_dbgmsg_print(FTAG);
         (void) kill(getpid(), SIGKILL);
 }

@@ -829,11 +838,12 @@
                 return (SPA_MINBLOCKSHIFT + ztest_random(5));
         return (ztest_opts.zo_ashift);
 }
 
 static nvlist_t *
-make_vdev_file(char *path, char *aux, char *pool, size_t size, uint64_t ashift)
+make_vdev_file(char *path, char *aux, char *pool, size_t size,
+    uint64_t ashift, boolean_t is_special)
 {
         char pathbuf[MAXPATHLEN];
         uint64_t vdev;
         nvlist_t *file;
 

@@ -860,35 +870,38 @@
         if (size != 0) {
                 int fd = open(path, O_RDWR | O_CREAT | O_TRUNC, 0666);
                 if (fd == -1)
                         fatal(1, "can't open %s", path);
                 if (ftruncate(fd, size) != 0)
-                        fatal(1, "can't ftruncate %s", path);
+                        fatal(1, "can't ftruncate %s to %lld", path, size);
                 (void) close(fd);
         }
 
         VERIFY(nvlist_alloc(&file, NV_UNIQUE_NAME, 0) == 0);
         VERIFY(nvlist_add_string(file, ZPOOL_CONFIG_TYPE, VDEV_TYPE_FILE) == 0);
         VERIFY(nvlist_add_string(file, ZPOOL_CONFIG_PATH, path) == 0);
         VERIFY(nvlist_add_uint64(file, ZPOOL_CONFIG_ASHIFT, ashift) == 0);
-
+        VERIFY(nvlist_add_uint64(file, ZPOOL_CONFIG_IS_SPECIAL, is_special)
+            == 0);
         return (file);
 }
 
 static nvlist_t *
 make_vdev_raidz(char *path, char *aux, char *pool, size_t size,
-    uint64_t ashift, int r)
+    uint64_t ashift, int r, boolean_t is_special)
 {
         nvlist_t *raidz, **child;
         int c;
 
         if (r < 2)
-                return (make_vdev_file(path, aux, pool, size, ashift));
+                return (make_vdev_file(path, aux, pool, size, ashift,
+                    is_special));
         child = umem_alloc(r * sizeof (nvlist_t *), UMEM_NOFAIL);
 
         for (c = 0; c < r; c++)
-                child[c] = make_vdev_file(path, aux, pool, size, ashift);
+                child[c] = make_vdev_file(path, aux, pool, size, ashift,
+                    B_FALSE);
 
         VERIFY(nvlist_alloc(&raidz, NV_UNIQUE_NAME, 0) == 0);
         VERIFY(nvlist_add_string(raidz, ZPOOL_CONFIG_TYPE,
             VDEV_TYPE_RAIDZ) == 0);
         VERIFY(nvlist_add_uint64(raidz, ZPOOL_CONFIG_NPARITY,

@@ -904,28 +917,32 @@
         return (raidz);
 }
 
 static nvlist_t *
 make_vdev_mirror(char *path, char *aux, char *pool, size_t size,
-    uint64_t ashift, int r, int m)
+    uint64_t ashift, int r, int m, boolean_t is_special)
 {
         nvlist_t *mirror, **child;
         int c;
 
         if (m < 1)
-                return (make_vdev_raidz(path, aux, pool, size, ashift, r));
+                return (make_vdev_raidz(path, aux, pool, size, ashift, r,
+                    is_special));
 
         child = umem_alloc(m * sizeof (nvlist_t *), UMEM_NOFAIL);
 
         for (c = 0; c < m; c++)
-                child[c] = make_vdev_raidz(path, aux, pool, size, ashift, r);
+                child[c] = make_vdev_raidz(path, aux, pool, size, ashift,
+                    r, B_FALSE);
 
         VERIFY(nvlist_alloc(&mirror, NV_UNIQUE_NAME, 0) == 0);
         VERIFY(nvlist_add_string(mirror, ZPOOL_CONFIG_TYPE,
             VDEV_TYPE_MIRROR) == 0);
         VERIFY(nvlist_add_nvlist_array(mirror, ZPOOL_CONFIG_CHILDREN,
             child, m) == 0);
+        VERIFY(nvlist_add_uint64(mirror, ZPOOL_CONFIG_IS_SPECIAL, is_special)
+            == 0);
 
         for (c = 0; c < m; c++)
                 nvlist_free(child[c]);
 
         umem_free(child, m * sizeof (nvlist_t *));

@@ -933,11 +950,11 @@
         return (mirror);
 }
 
 static nvlist_t *
 make_vdev_root(char *path, char *aux, char *pool, size_t size, uint64_t ashift,
-    int log, int r, int m, int t)
+    int log, int r, int m, int t, boolean_t special)
 {
         nvlist_t *root, **child;
         int c;
 
         ASSERT(t > 0);

@@ -944,11 +961,11 @@
 
         child = umem_alloc(t * sizeof (nvlist_t *), UMEM_NOFAIL);
 
         for (c = 0; c < t; c++) {
                 child[c] = make_vdev_mirror(path, aux, pool, size, ashift,
-                    r, m);
+                    r, m, special);
                 VERIFY(nvlist_add_uint64(child[c], ZPOOL_CONFIG_IS_LOG,
                     log) == 0);
         }
 
         VERIFY(nvlist_alloc(&root, NV_UNIQUE_NAME, 0) == 0);

@@ -963,10 +980,66 @@
 
         return (root);
 }
 
 /*
+ * Add special top-level vdev(s) to the vdev tree
+ */
+static void
+add_special_vdevs(nvlist_t *root, size_t size, int r, int m, int t)
+{
+        nvlist_t **child = NULL, **prev_child = NULL, **new_child = NULL;
+        int c = 0, new = 0;
+        unsigned int prev = 0;
+
+        if ((m == 0) || (t == 0))
+                return;
+
+        child = umem_alloc(t * sizeof (nvlist_t *), UMEM_NOFAIL);
+
+        /*
+         * special flag that is added to the top-level vdevs
+         */
+        for (c = 0; c < t; c++) {
+                child[c] = make_vdev_mirror(NULL, NULL, NULL, size, 0, r, m,
+                    B_TRUE);
+        }
+
+        /*
+         * Extend the children's array in the root"
+         *  - get previously added children
+         *  - allocate new array
+         *  - and copy the previous and new children there
+         *  - replace the children nvlist adday with the new one
+         */
+        VERIFY(nvlist_lookup_nvlist_array(root, ZPOOL_CONFIG_CHILDREN,
+            &prev_child, &prev) == 0);
+
+        new = prev + t;
+
+        new_child = umem_alloc(new * sizeof (nvlist_t *),
+            UMEM_NOFAIL);
+        for (c = 0; c < prev; c++) {
+                VERIFY(nvlist_dup(prev_child[c], &new_child[c], 0) == 0);
+        }
+        for (; c < new; c++) {
+                new_child[c] = child[c-prev];
+        }
+
+        VERIFY(nvlist_add_nvlist_array(root, ZPOOL_CONFIG_CHILDREN,
+            new_child, new) == 0);
+
+        /* free children */
+        for (c = 0; c < new; c++) {
+                nvlist_free(new_child[c]);
+        }
+        umem_free(child, t * sizeof (nvlist_t *));
+
+        umem_free(new_child, new * sizeof (nvlist_t *));
+}
+
+/*
  * Find a random spa version. Returns back a random spa version in the
  * range [initial_version, SPA_VERSION_FEATURES].
  */
 static uint64_t
 ztest_random_spa_version(uint64_t initial_version)

@@ -1017,11 +1090,11 @@
         ASSERT(spa_config_held(spa, SCL_ALL, RW_READER) != 0);
 
         do {
                 top = ztest_random(rvd->vdev_children);
                 tvd = rvd->vdev_child[top];
-        } while (!vdev_is_concrete(tvd) || (tvd->vdev_islog && !log_ok) ||
+        } while (tvd->vdev_ishole || (tvd->vdev_islog && !log_ok) ||
             tvd->vdev_mg == NULL || tvd->vdev_mg->mg_class == NULL);
 
         return (top);
 }
 

@@ -1832,10 +1905,11 @@
 
 /*
  * ZIL get_data callbacks
  */
 
+/* ARGSUSED */
 static void
 ztest_get_done(zgd_t *zgd, int error)
 {
         ztest_ds_t *zd = zgd->zgd_private;
         uint64_t object = zgd->zgd_rl->rl_object;

@@ -1844,13 +1918,10 @@
                 dmu_buf_rele(zgd->zgd_db, zgd);
 
         ztest_range_unlock(zgd->zgd_rl);
         ztest_object_unlock(zd, object);
 
-        if (error == 0 && zgd->zgd_bp)
-                zil_lwb_add_block(zgd->zgd_lwb, zgd->zgd_bp);
-
         umem_free(zgd, sizeof (*zgd));
 }
 
 static int
 ztest_get_data(void *arg, lr_write_t *lr, char *buf, struct lwb *lwb,

@@ -2374,29 +2445,32 @@
         nvlist_t *nvroot;
 
         /*
          * Attempt to create using a bad file.
          */
-        nvroot = make_vdev_root("/dev/bogus", NULL, NULL, 0, 0, 0, 0, 0, 1);
+        nvroot = make_vdev_root("/dev/bogus", NULL, NULL, 0, 0, 0, 0, 0, 1,
+            B_FALSE);
         VERIFY3U(ENOENT, ==,
             spa_create("ztest_bad_file", nvroot, NULL, NULL));
         nvlist_free(nvroot);
 
         /*
          * Attempt to create using a bad mirror.
          */
-        nvroot = make_vdev_root("/dev/bogus", NULL, NULL, 0, 0, 0, 0, 2, 1);
+        nvroot = make_vdev_root("/dev/bogus", NULL, NULL, 0, 0, 0, 0, 2, 1,
+            B_FALSE);
         VERIFY3U(ENOENT, ==,
             spa_create("ztest_bad_mirror", nvroot, NULL, NULL));
         nvlist_free(nvroot);
 
         /*
          * Attempt to create an existing pool.  It shouldn't matter
          * what's in the nvroot; we should fail with EEXIST.
          */
         rw_enter(&ztest_name_lock, RW_READER);
-        nvroot = make_vdev_root("/dev/bogus", NULL, NULL, 0, 0, 0, 0, 0, 1);
+        nvroot = make_vdev_root("/dev/bogus", NULL, NULL, 0, 0, 0, 0, 0, 1,
+            B_FALSE);
         VERIFY3U(EEXIST, ==, spa_create(zo->zo_pool, nvroot, NULL, NULL));
         nvlist_free(nvroot);
         VERIFY3U(0, ==, spa_open(zo->zo_pool, &spa, FTAG));
         VERIFY3U(EBUSY, ==, spa_destroy(zo->zo_pool));
         spa_close(spa, FTAG);

@@ -2421,11 +2495,11 @@
          * Clean up from previous runs.
          */
         (void) spa_destroy(name);
 
         nvroot = make_vdev_root(NULL, NULL, name, ztest_opts.zo_vdev_size, 0,
-            0, ztest_opts.zo_raidz, ztest_opts.zo_mirrors, 1);
+            0, ztest_opts.zo_raidz, ztest_opts.zo_mirrors, 1, B_FALSE);
 
         /*
          * If we're configuring a RAIDZ device then make sure that the
          * the initial version is capable of supporting that feature.
          */

@@ -2565,11 +2639,11 @@
                  * Make 1/4 of the devices be log devices.
                  */
                 nvroot = make_vdev_root(NULL, NULL, NULL,
                     ztest_opts.zo_vdev_size, 0,
                     ztest_random(4) == 0, ztest_opts.zo_raidz,
-                    zs->zs_mirrors, 1);
+                    zs->zs_mirrors, 1, B_FALSE);
 
                 error = spa_vdev_add(spa, nvroot);
                 nvlist_free(nvroot);
 
                 if (error == ENOSPC)

@@ -2640,11 +2714,11 @@
         if (guid == 0) {
                 /*
                  * Add a new device.
                  */
                 nvlist_t *nvroot = make_vdev_root(NULL, aux, NULL,
-                    (ztest_opts.zo_vdev_size * 5) / 4, 0, 0, 0, 0, 1);
+                    (ztest_opts.zo_vdev_size * 5) / 4, 0, 0, 0, 0, 1, B_FALSE);
                 error = spa_vdev_add(spa, nvroot);
                 if (error != 0)
                         fatal(0, "spa_vdev_add(%p) = %d", nvroot, error);
                 nvlist_free(nvroot);
         } else {

@@ -2786,25 +2860,13 @@
         int error, expected_error;
 
         mutex_enter(&ztest_vdev_lock);
         leaves = MAX(zs->zs_mirrors, 1) * ztest_opts.zo_raidz;
 
-        spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
+        spa_config_enter(spa, SCL_VDEV, FTAG, RW_READER);
 
         /*
-         * If a vdev is in the process of being removed, its removal may
-         * finish while we are in progress, leading to an unexpected error
-         * value.  Don't bother trying to attach while we are in the middle
-         * of removal.
-         */
-        if (spa->spa_vdev_removal != NULL) {
-                spa_config_exit(spa, SCL_ALL, FTAG);
-                mutex_exit(&ztest_vdev_lock);
-                return;
-        }
-
-        /*
          * Decide whether to do an attach or a replace.
          */
         replacing = ztest_random(2);
 
         /*

@@ -2851,11 +2913,11 @@
 
         /*
          * If oldvd has siblings, then half of the time, detach it.
          */
         if (oldvd_has_siblings && ztest_random(2) == 0) {
-                spa_config_exit(spa, SCL_ALL, FTAG);
+                spa_config_exit(spa, SCL_VDEV, FTAG);
                 error = spa_vdev_detach(spa, oldguid, pguid, B_FALSE);
                 if (error != 0 && error != ENODEV && error != EBUSY &&
                     error != ENOTSUP)
                         fatal(0, "detach (%s) returned %d", oldpath, error);
                 mutex_exit(&ztest_vdev_lock);

@@ -2878,14 +2940,10 @@
                         newpath[strlen(newpath) - 1] = 'b';
                 newvd = vdev_lookup_by_path(rvd, newpath);
         }
 
         if (newvd) {
-                /*
-                 * Reopen to ensure the vdev's asize field isn't stale.
-                 */
-                vdev_reopen(newvd);
                 newsize = vdev_get_min_asize(newvd);
         } else {
                 /*
                  * Make newsize a little bigger or smaller than oldsize.
                  * If it's smaller, the attach should fail.

@@ -2919,17 +2977,17 @@
         else if (ashift > oldvd->vdev_top->vdev_ashift)
                 expected_error = EDOM;
         else
                 expected_error = 0;
 
-        spa_config_exit(spa, SCL_ALL, FTAG);
+        spa_config_exit(spa, SCL_VDEV, FTAG);
 
         /*
          * Build the nvlist describing newpath.
          */
         root = make_vdev_root(newpath, NULL, NULL, newvd == NULL ? newsize : 0,
-            ashift, 0, 0, 0, 1);
+            ashift, 0, 0, 0, 1, replacing ? oldvd->vdev_isspecial : B_FALSE);
 
         error = spa_vdev_attach(spa, oldguid, root, replacing);
 
         nvlist_free(root);
 

@@ -2957,30 +3015,10 @@
         }
 
         mutex_exit(&ztest_vdev_lock);
 }
 
-/* ARGSUSED */
-void
-ztest_device_removal(ztest_ds_t *zd, uint64_t id)
-{
-        spa_t *spa = ztest_spa;
-        vdev_t *vd;
-        uint64_t guid;
-
-        mutex_enter(&ztest_vdev_lock);
-
-        spa_config_enter(spa, SCL_VDEV, FTAG, RW_READER);
-        vd = vdev_lookup_top(spa, ztest_random_vdev_top(spa, B_FALSE));
-        guid = vd->vdev_guid;
-        spa_config_exit(spa, SCL_VDEV, FTAG);
-
-        (void) spa_vdev_remove(spa, guid, B_FALSE);
-
-        mutex_exit(&ztest_vdev_lock);
-}
-
 /*
  * Callback function which expands the physical size of the vdev.
  */
 vdev_t *
 grow_vdev(vdev_t *vd, void *arg)

@@ -3063,10 +3101,32 @@
         }
         return (NULL);
 }
 
 /*
+ * Callback function which checks that the given vdev is
+ * - not a part of replacing group
+ * - not being removed
+ * - healthy
+ */
+/* ARGSUSED */
+vdev_t *
+check_valid_vdev(vdev_t *vd, void *arg)
+{
+        spa_t *spa = vd->vdev_spa;
+
+        ASSERT(spa_config_held(spa, SCL_STATE, RW_READER) == SCL_STATE);
+        ASSERT(vd->vdev_ops->vdev_op_leaf);
+
+        if (vd->vdev_parent->vdev_ops == &vdev_replacing_ops ||
+            vd->vdev_removing || vd->vdev_state != VDEV_STATE_HEALTHY)
+                return (NULL);
+
+        return (vd);
+}
+
+/*
  * Traverse the vdev tree calling the supplied function.
  * We continue to walk the tree until we either have walked all
  * children or we receive a non-NULL return from the callback.
  * If a NULL callback is passed, then we just return back the first
  * leaf vdev we encounter.

@@ -3105,22 +3165,10 @@
         uint64_t old_class_space, new_class_space, old_ms_count, new_ms_count;
 
         mutex_enter(&ztest_vdev_lock);
         spa_config_enter(spa, SCL_STATE, spa, RW_READER);
 
-        /*
-         * If there is a vdev removal in progress, it could complete while
-         * we are running, in which case we would not be able to verify
-         * that the metaslab_class space increased (because it decreases
-         * when the device removal completes).
-         */
-        if (spa->spa_vdev_removal != NULL) {
-                spa_config_exit(spa, SCL_STATE, FTAG);
-                mutex_exit(&ztest_vdev_lock);
-                return;
-        }
-
         top = ztest_random_vdev_top(spa, B_TRUE);
 
         tvd = spa->spa_root_vdev->vdev_child[top];
         mg = tvd->vdev_mg;
         mc = mg->mg_class;

@@ -3208,22 +3256,20 @@
         }
 
         /*
          * Make sure we were able to grow the vdev.
          */
-        if (new_ms_count <= old_ms_count) {
-                fatal(0, "LUN expansion failed: ms_count %llu < %llu\n",
+        if (new_ms_count <= old_ms_count)
+                fatal(0, "LUN expansion failed: ms_count %llu <= %llu\n",
                     old_ms_count, new_ms_count);
-        }
 
         /*
          * Make sure we were able to grow the pool.
          */
-        if (new_class_space <= old_class_space) {
-                fatal(0, "LUN expansion failed: class_space %llu < %llu\n",
+        if (new_class_space <= old_class_space)
+                fatal(0, "LUN expansion failed: class_space %llu <= %llu\n",
                     old_class_space, new_class_space);
-        }
 
         if (ztest_opts.zo_verbose >= 5) {
                 char oldnumbuf[NN_NUMBUF_SZ], newnumbuf[NN_NUMBUF_SZ];
 
                 nicenum(old_class_space, oldnumbuf, sizeof (oldnumbuf));

@@ -4692,24 +4738,10 @@
         rw_exit(&ztest_name_lock);
 }
 
 /* ARGSUSED */
 void
-ztest_remap_blocks(ztest_ds_t *zd, uint64_t id)
-{
-        rw_enter(&ztest_name_lock, RW_READER);
-
-        int error = dmu_objset_remap_indirects(zd->zd_name);
-        if (error == ENOSPC)
-                error = 0;
-        ASSERT0(error);
-
-        rw_exit(&ztest_name_lock);
-}
-
-/* ARGSUSED */
-void
 ztest_spa_prop_get_set(ztest_ds_t *zd, uint64_t id)
 {
         nvlist_t *props = NULL;
 
         rw_enter(&ztest_name_lock, RW_READER);

@@ -4725,10 +4757,361 @@
         nvlist_free(props);
 
         rw_exit(&ztest_name_lock);
 }
 
+/* vdev and cos property tests */
+typedef enum {
+        VDEV_PROP_UINT64,
+        VDEV_PROP_STRING,
+        COS_PROP_UINT64
+} ztest_prop_t;
+
+/* common functions */
+static vdev_t *
+ztest_get_random_vdev_leaf(spa_t *spa)
+{
+        vdev_t *lvd = NULL, *tvd = NULL;
+        uint64_t top = 0;
+
+        spa_config_enter(spa, SCL_ALL, FTAG, RW_READER);
+
+        for (;;) {
+                /* Pick a leaf of a random top-level vdev */
+                top = ztest_random_vdev_top(spa, B_TRUE);
+                tvd = spa->spa_root_vdev->vdev_child[top];
+                lvd = vdev_walk_tree(tvd, check_valid_vdev, NULL);
+                if (lvd == NULL) {
+                        /*
+                         * We cannot  return NULL and no reasons to crash.
+                         * Just let other threads to finish their work and
+                         * maybe next time we will have leaf-vdev
+                         */
+                        spa_config_exit(spa, SCL_ALL, FTAG);
+                        (void) poll(NULL, 0, 100);
+                        spa_config_enter(spa, SCL_ALL, FTAG, RW_READER);
+                        continue;
+                }
+
+                ASSERT(lvd->vdev_ops->vdev_op_leaf);
+                break;
+        }
+
+        spa_config_exit(spa, SCL_ALL, FTAG);
+
+        return (lvd);
+}
+
+#define ZTEST_COS_NAME          "ztest_cos"
+
+/*ARGSUSED*/
+static nvlist_t *
+ztest_props_set(const vdev_t *lvd, const char *name, const ztest_prop_t t,
+    const void *props, const size_t size)
+{
+        spa_t *spa = ztest_spa;
+        nvlist_t *sprops;
+        int error = 0;
+
+        VERIFY(0 == nvlist_alloc(&sprops, NV_UNIQUE_NAME, 0));
+
+        for (int p = 0; p < size; p++) {
+                uint64_t ival;
+                char sval[16];
+                const char *pname =
+                    (t == VDEV_PROP_UINT64 || t == VDEV_PROP_STRING) ?
+                    vdev_prop_to_name(((vdev_prop_t *)props)[p]) :
+                    cos_prop_to_name(((cos_prop_t *)props)[p]);
+
+                switch (t) {
+                case VDEV_PROP_UINT64:
+                case COS_PROP_UINT64:
+                        /* range 0...10 is valid for all properties */
+                        ival = ztest_random(10) + 1;
+                        VERIFY(0 == nvlist_add_uint64(sprops, pname, ival));
+                        break;
+                case VDEV_PROP_STRING:
+                        /* use a well known name for cos property */
+                        if (((vdev_prop_t *)props)[p] == VDEV_PROP_COS) {
+                                (void) snprintf(sval, 15, "%s", ZTEST_COS_NAME);
+                        } else {
+                                /* any short string will do */
+                                (void) snprintf(sval, 15, "prop_value%d", p);
+                        }
+                        VERIFY(0 == nvlist_add_string(sprops, pname, sval));
+                        break;
+                default:
+                        /* unknown property */
+                        error = EINVAL;
+                        break;
+                }
+        }
+        VERIFY3U(0, ==, error);
+
+        /* set the props */
+        switch (t) {
+        case VDEV_PROP_UINT64:
+        case VDEV_PROP_STRING:
+                error = spa_vdev_prop_set(spa, lvd->vdev_guid, sprops);
+                break;
+        case COS_PROP_UINT64:
+                error = spa_cos_prop_set(spa, name, sprops);
+                break;
+        default:
+                error = EINVAL;
+                break;
+        }
+        if (error == ENOSPC) {
+                ztest_record_enospc(FTAG);
+                nvlist_free(sprops);
+                return (NULL);
+        }
+        ASSERT0(error);
+        return (sprops);
+}
+
+static nvlist_t *
+ztest_props_get(const vdev_t *lvd, const char *name)
+{
+        spa_t *spa = ztest_spa;
+        nvlist_t *gprops = NULL;
+        int error = 0;
+
+        if (lvd)
+                error = spa_vdev_prop_get(spa, lvd->vdev_guid, &gprops);
+        else
+                error = spa_cos_prop_get(spa, name, &gprops);
+        if (error == ENOSPC) {
+                ztest_record_enospc(FTAG);
+                return (NULL);
+        }
+        ASSERT0(error);
+        return (gprops);
+}
+
+static void
+ztest_props_test(const ztest_prop_t t, const void *props, const size_t size,
+    nvlist_t *sprops, nvlist_t *gprops)
+{
+        for (int p = 0; p < size; p++) {
+                const char *pname =
+                    (t == VDEV_PROP_UINT64 || t == VDEV_PROP_STRING) ?
+                    vdev_prop_to_name(((vdev_prop_t *)props)[p]) :
+                    cos_prop_to_name(((cos_prop_t *)props)[p]);
+
+                switch (t) {
+                case VDEV_PROP_UINT64:
+                case COS_PROP_UINT64:
+                {
+                        uint64_t sival, gival;
+                        VERIFY3U(0, ==, nvlist_lookup_uint64(sprops, pname,
+                            &sival));
+                        VERIFY3U(0, ==, nvlist_lookup_uint64(gprops, pname,
+                            &gival));
+                        VERIFY3U(gival, ==, sival);
+                }
+                break;
+                case VDEV_PROP_STRING:
+                {
+                        char *ssval, *gsval;
+                        VERIFY3U(0, ==, nvlist_lookup_string(sprops, pname,
+                            &ssval));
+                        VERIFY3U(0, ==, nvlist_lookup_string(gprops, pname,
+                            &gsval));
+                        VERIFY3U(0, ==, strcmp(ssval, gsval));
+                }
+                break;
+                default:
+                        /* unknown property */
+                        VERIFY(0);
+                        break;
+                }
+        }
+
+        nvlist_free(sprops);
+        nvlist_free(gprops);
+}
+
+static const cos_prop_t cprops_uint64[] = {
+        COS_PROP_READ_MINACTIVE,
+        COS_PROP_AREAD_MINACTIVE,
+        COS_PROP_WRITE_MINACTIVE,
+        COS_PROP_AWRITE_MINACTIVE,
+        COS_PROP_SCRUB_MINACTIVE,
+        COS_PROP_RESILVER_MINACTIVE,
+        COS_PROP_READ_MAXACTIVE,
+        COS_PROP_AREAD_MAXACTIVE,
+        COS_PROP_WRITE_MAXACTIVE,
+        COS_PROP_AWRITE_MAXACTIVE,
+        COS_PROP_SCRUB_MAXACTIVE,
+        COS_PROP_RESILVER_MAXACTIVE,
+        COS_PROP_PREFERRED_READ
+};
+
+/* ARGSUSED */
+void
+ztest_cos_prop_get_set(ztest_ds_t *zd, uint64_t id)
+{
+        spa_t *spa = ztest_spa;
+        nvlist_t *sprops = NULL, *gprops = NULL, *cos_list = NULL;
+        char cos_name[MAXCOSNAMELEN];
+        const char *pname = NULL;
+        char *sval = NULL;
+        uint64_t cos_id = ztest_random(~0ULL), val = 0;
+        vdev_t *lvd = NULL;
+
+        (void) snprintf(cos_name, MAXCOSNAMELEN-1, "cos_%llu", cos_id);
+
+        mutex_enter(&ztest_props_lock);
+
+        VERIFY3U(0, ==, spa_alloc_cos(spa, cos_name, cos_id));
+
+        sprops = ztest_props_set(NULL, cos_name,
+            COS_PROP_UINT64, (void *)&cprops_uint64[0],
+            sizeof (cprops_uint64) / sizeof (cprops_uint64[0]));
+        gprops = ztest_props_get(NULL, cos_name);
+        ztest_props_test(COS_PROP_UINT64, (void *)&cprops_uint64[0],
+            sizeof (cprops_uint64) / sizeof (cprops_uint64[0]),
+            sprops, gprops);
+
+        VERIFY3U(0, ==, nvlist_alloc(&cos_list, NV_UNIQUE_NAME, 0));
+        VERIFY3U(0, ==, spa_list_cos(spa, cos_list));
+        VERIFY3U(0, ==, nvlist_lookup_uint64(cos_list, cos_name, &val));
+        VERIFY3U(cos_id, ==, val);
+        nvlist_free(cos_list);
+
+        VERIFY3U(0, ==, spa_free_cos(spa, cos_name, B_FALSE));
+        VERIFY3U(0, ==, nvlist_alloc(&cos_list, NV_UNIQUE_NAME, 0));
+        VERIFY3U(0, ==, spa_list_cos(spa, cos_list));
+        VERIFY3U(ENOENT, ==, nvlist_lookup_uint64(cos_list, cos_name, &val));
+        nvlist_free(cos_list);
+
+        /*
+         * force spa_free_cos() test
+         * - allocate cos property, set vdev's cos, then free cos forcefuly
+         * - verify everything succeeds
+         * - verify no cos property on vdev
+         * - verify no cos descriptor remains
+         */
+        VERIFY3U(0, ==, spa_alloc_cos(spa, cos_name, cos_id));
+
+        /* Make sure vdevs will stay in place */
+        mutex_enter(&ztest_vdev_lock);
+
+        lvd = ztest_get_random_vdev_leaf(spa);
+
+        VERIFY(0 == nvlist_alloc(&sprops, NV_UNIQUE_NAME, 0));
+
+        pname = vdev_prop_to_name(VDEV_PROP_COS);
+        VERIFY3U(0, ==, nvlist_add_string(sprops, pname, cos_name));
+        VERIFY3U(0, ==, spa_vdev_prop_set(spa, lvd->vdev_guid, sprops));
+
+        VERIFY3U(0, ==, spa_free_cos(spa, cos_name, B_TRUE));
+
+        VERIFY3U(0, ==, spa_vdev_prop_get(spa, lvd->vdev_guid, &gprops));
+
+        mutex_exit(&ztest_vdev_lock);
+
+        /* verify the vdev cos prop gone */
+        VERIFY3U(ENOENT, ==, nvlist_lookup_string(gprops, cos_name, &sval));
+
+        /* verify the cos descriptor gone */
+        VERIFY3U(0, ==, nvlist_alloc(&cos_list, NV_UNIQUE_NAME, 0));
+        VERIFY3U(0, ==, spa_list_cos(spa, cos_list));
+        VERIFY3U(ENOENT, ==, nvlist_lookup_uint64(cos_list, cos_name, &val));
+
+        mutex_exit(&ztest_props_lock);
+
+        nvlist_free(cos_list);
+}
+
+/* vdev tests */
+static const vdev_prop_t vprops_uint64[] = {
+        VDEV_PROP_READ_MINACTIVE,
+        VDEV_PROP_AREAD_MINACTIVE,
+        VDEV_PROP_WRITE_MINACTIVE,
+        VDEV_PROP_AWRITE_MINACTIVE,
+        VDEV_PROP_SCRUB_MINACTIVE,
+        VDEV_PROP_RESILVER_MINACTIVE,
+        VDEV_PROP_READ_MAXACTIVE,
+        VDEV_PROP_AREAD_MAXACTIVE,
+        VDEV_PROP_WRITE_MAXACTIVE,
+        VDEV_PROP_AWRITE_MAXACTIVE,
+        VDEV_PROP_SCRUB_MAXACTIVE,
+        VDEV_PROP_RESILVER_MAXACTIVE,
+        VDEV_PROP_PREFERRED_READ
+};
+static const vdev_prop_t vprops_string[] = {
+        VDEV_PROP_COS,
+        VDEV_PROP_SPAREGROUP
+};
+
+static void
+ztest_cos_free(spa_t *spa, vdev_t *lvd, const char *name)
+{
+        nvlist_t *sprops = NULL;
+        int error = 0;
+        VERIFY(0 == nvlist_alloc(&sprops, NV_UNIQUE_NAME, 0));
+        VERIFY(0 == nvlist_add_string(sprops,
+            vdev_prop_to_name(VDEV_PROP_COS), ""));
+        VERIFY3U(0, ==, spa_vdev_prop_set(spa, lvd->vdev_guid, sprops));
+        /*
+         * this can be called in cleanup code paths when we do not know
+         * if CoS was allocated
+         */
+        error = spa_free_cos(spa, name, B_TRUE);
+        if (error)
+                VERIFY3U(error, ==, ENOENT);
+        nvlist_free(sprops);
+}
+
+/* ARGSUSED */
+void
+ztest_vdev_prop_get_set(ztest_ds_t *zd, uint64_t id)
+{
+        spa_t *spa = ztest_spa;
+        nvlist_t *sprops = NULL, *gprops = NULL;
+        vdev_t *lvd = NULL;
+        int error = 0;
+        /* Make sure vdevs will stay in place */
+        mutex_enter(&ztest_props_lock);
+
+        mutex_enter(&ztest_vdev_lock);
+
+        lvd = ztest_get_random_vdev_leaf(spa);
+
+        /* Test uint64 properties */
+        sprops = ztest_props_set(lvd, NULL, VDEV_PROP_UINT64,
+            (void *)&vprops_uint64[0],
+            sizeof (vprops_uint64) / sizeof (vprops_uint64[0]));
+        gprops = ztest_props_get(lvd, NULL);
+        ztest_props_test(VDEV_PROP_UINT64, (void *)&vprops_uint64[0],
+            sizeof (vprops_uint64) / sizeof (vprops_uint64[0]), sprops, gprops);
+
+        /* Test string properties */
+        /* Allocate CoS descriptor to have vdev-set of cos succeed */
+        error = spa_alloc_cos(spa, ZTEST_COS_NAME, 0);
+        if (error)
+                VERIFY3U(error, ==, EEXIST);
+
+        sprops = ztest_props_set(lvd, NULL, VDEV_PROP_STRING,
+            (void *)&vprops_string[0],
+            sizeof (vprops_string) / sizeof (vprops_string[0]));
+        gprops = ztest_props_get(lvd, NULL);
+        ztest_props_test(VDEV_PROP_STRING, (void *)&vprops_string[0],
+            sizeof (vprops_string) / sizeof (vprops_string[0]), sprops, gprops);
+
+        /* Done, free cos to avoid collisions with other tests */
+        ztest_cos_free(spa, lvd, ZTEST_COS_NAME);
+
+        mutex_exit(&ztest_vdev_lock);
+
+        mutex_exit(&ztest_props_lock);
+}
+
+/* end vdev and cos property tests */
+
 static int
 user_release_one(const char *snapname, const char *holdname)
 {
         nvlist_t *snaps, *holds;
         int error;

@@ -4955,13 +5338,10 @@
                          * combination of this with injection of random data
                          * corruption below exceeds the pool's fault tolerance.
                          */
                         vdev_file_t *vf = vd0->vdev_tsd;
 
-                        zfs_dbgmsg("injecting fault to vdev %llu; maxfaults=%d",
-                            (long long)vd0->vdev_id, (int)maxfaults);
-
                         if (vf != NULL && ztest_random(3) == 0) {
                                 (void) close(vf->vf_vnode->v_fd);
                                 vf->vf_vnode->v_fd = -1;
                         } else if (ztest_random(2) == 0) {
                                 vd0->vdev_cant_read = B_TRUE;

@@ -5431,11 +5811,12 @@
         ztest_walk_pool_directory("pools before export");
 
         /*
          * Export it.
          */
-        VERIFY3U(0, ==, spa_export(oldname, &config, B_FALSE, B_FALSE));
+        VERIFY3U(0, ==, spa_export(oldname, &config, B_FALSE, B_FALSE,
+            B_FALSE));
 
         ztest_walk_pool_directory("pools after export");
 
         /*
          * Try to import it.

@@ -5750,10 +6131,11 @@
 
         /*
          * Initialize parent/child shared state.
          */
         mutex_init(&ztest_vdev_lock, NULL, USYNC_THREAD, NULL);
+        mutex_init(&ztest_props_lock, NULL, USYNC_THREAD, NULL);
         rw_init(&ztest_name_lock, NULL, USYNC_THREAD, NULL);
 
         zs->zs_thread_start = gethrtime();
         zs->zs_thread_stop =
             zs->zs_thread_start + ztest_opts.zo_passtime * NANOSEC;

@@ -6039,17 +6421,55 @@
 make_random_props()
 {
         nvlist_t *props;
 
         VERIFY(nvlist_alloc(&props, NV_UNIQUE_NAME, 0) == 0);
-        if (ztest_random(2) == 0)
-                return (props);
+        switch (ztest_random(5)) {
+        case 0:
+                break;
+        case 1:
         VERIFY(nvlist_add_uint64(props, "autoreplace", 1) == 0);
+                break;
+        case 2:
+                VERIFY(nvlist_add_uint64(props, "enablespecial", 1) == 0);
+                VERIFY(nvlist_add_uint64(props, "small_data_to_metadev", 1) ==
+                    0);
+                break;
+        case 3:
+                VERIFY(nvlist_add_uint64(props, "enablespecial", 1) == 0);
+                VERIFY(nvlist_add_uint64(props, "autoreplace", 1) == 0);
+                break;
+        case 4:
+                VERIFY(nvlist_add_uint64(props, "enablespecial", 1) == 0);
+                VERIFY(nvlist_add_uint64(props, "meta_placement", 1) == 0);
+                VERIFY(nvlist_add_uint64(props, "zfs_meta_to_metadev", 1) == 0);
+                break;
+        }
 
         return (props);
 }
 
+static void
+set_random_ds_props(char *dsname)
+{
+        uint64_t value = META_PLACEMENT_OFF;
+
+        switch (ztest_random(3)) {
+        case 0:
+                break;
+        case 1:
+                value = META_PLACEMENT_ON;
+                break;
+        case 2:
+                value = META_PLACEMENT_DUAL;
+                break;
+        }
+
+        VERIFY(ztest_dsl_prop_set_uint64(dsname, ZFS_PROP_ZPL_META_TO_METADEV,
+            value, B_TRUE) == 0);
+}
+
 /*
  * Create a storage pool with the given name and initial vdev size.
  * Then test spa_freeze() functionality.
  */
 static void

@@ -6069,11 +6489,17 @@
         (void) spa_destroy(ztest_opts.zo_pool);
         ztest_shared->zs_vdev_next_leaf = 0;
         zs->zs_splits = 0;
         zs->zs_mirrors = ztest_opts.zo_mirrors;
         nvroot = make_vdev_root(NULL, NULL, NULL, ztest_opts.zo_vdev_size, 0,
-            0, ztest_opts.zo_raidz, zs->zs_mirrors, 1);
+            0, ztest_opts.zo_raidz, zs->zs_mirrors, 1, B_FALSE);
+        /*
+         * Add special vdevs
+         */
+        add_special_vdevs(nvroot, ztest_opts.zo_vdev_size, ztest_opts.zo_raidz,
+            zs->zs_mirrors, 1);
+
         props = make_random_props();
         for (int i = 0; i < SPA_FEATURES; i++) {
                 char buf[1024];
                 (void) snprintf(buf, sizeof (buf), "feature@%s",
                     spa_feature_table[i].fi_uname);

@@ -6085,10 +6511,13 @@
 
         VERIFY3U(0, ==, spa_open(ztest_opts.zo_pool, &spa, FTAG));
         zs->zs_metaslab_sz =
             1ULL << spa->spa_root_vdev->vdev_child[0]->vdev_ms_shift;
 
+        /* set props on the root dataset */
+        set_random_ds_props(ztest_opts.zo_pool);
+
         spa_close(spa, FTAG);
 
         kernel_fini();
 
         ztest_run_zdb(ztest_opts.zo_pool);

@@ -6096,10 +6525,11 @@
         ztest_freeze();
 
         ztest_run_zdb(ztest_opts.zo_pool);
 
         rw_destroy(&ztest_name_lock);
+        mutex_destroy(&ztest_props_lock);
         mutex_destroy(&ztest_vdev_lock);
 }
 
 static void
 setup_data_fd(void)