Print this page
usr/src/uts/common/fs/zfs/ddt.c

@@ -43,16 +43,19 @@
 #include <sys/zfs_ioctl.h>
 #include <sys/zap.h>
 #include <sys/zio_checksum.h>
 #include <sys/zio_compress.h>
 #include <sys/sa.h>
+#include <sys/spa_impl.h>
 #include <sys/zfeature.h>
 #include <sys/abd.h>
 #ifdef _KERNEL
 #include <sys/vmsystm.h>
 #include <sys/zfs_znode.h>
+#include <sys/zfs_vfsops.h>
 #endif
+#include <sys/special.h>
 
 /*
  * Enable/disable nopwrite feature.
  */
 int zfs_nopwrite_enabled = 1;

@@ -63,72 +66,65 @@
  * wait until the next TXG.
  * A value of zero will disable this throttle.
  */
 uint32_t zfs_per_txg_dirty_frees_percent = 30;
 
-/*
- * This can be used for testing, to ensure that certain actions happen
- * while in the middle of a remap (which might otherwise complete too
- * quickly).
- */
-int zfs_object_remap_one_indirect_delay_ticks = 0;
-
 const dmu_object_type_info_t dmu_ot[DMU_OT_NUMTYPES] = {
-        {       DMU_BSWAP_UINT8,        TRUE,   "unallocated"           },
-        {       DMU_BSWAP_ZAP,          TRUE,   "object directory"      },
-        {       DMU_BSWAP_UINT64,       TRUE,   "object array"          },
-        {       DMU_BSWAP_UINT8,        TRUE,   "packed nvlist"         },
-        {       DMU_BSWAP_UINT64,       TRUE,   "packed nvlist size"    },
-        {       DMU_BSWAP_UINT64,       TRUE,   "bpobj"                 },
-        {       DMU_BSWAP_UINT64,       TRUE,   "bpobj header"          },
-        {       DMU_BSWAP_UINT64,       TRUE,   "SPA space map header"  },
-        {       DMU_BSWAP_UINT64,       TRUE,   "SPA space map"         },
-        {       DMU_BSWAP_UINT64,       TRUE,   "ZIL intent log"        },
-        {       DMU_BSWAP_DNODE,        TRUE,   "DMU dnode"             },
-        {       DMU_BSWAP_OBJSET,       TRUE,   "DMU objset"            },
-        {       DMU_BSWAP_UINT64,       TRUE,   "DSL directory"         },
-        {       DMU_BSWAP_ZAP,          TRUE,   "DSL directory child map"},
-        {       DMU_BSWAP_ZAP,          TRUE,   "DSL dataset snap map"  },
-        {       DMU_BSWAP_ZAP,          TRUE,   "DSL props"             },
-        {       DMU_BSWAP_UINT64,       TRUE,   "DSL dataset"           },
-        {       DMU_BSWAP_ZNODE,        TRUE,   "ZFS znode"             },
-        {       DMU_BSWAP_OLDACL,       TRUE,   "ZFS V0 ACL"            },
-        {       DMU_BSWAP_UINT8,        FALSE,  "ZFS plain file"        },
-        {       DMU_BSWAP_ZAP,          TRUE,   "ZFS directory"         },
-        {       DMU_BSWAP_ZAP,          TRUE,   "ZFS master node"       },
-        {       DMU_BSWAP_ZAP,          TRUE,   "ZFS delete queue"      },
-        {       DMU_BSWAP_UINT8,        FALSE,  "zvol object"           },
-        {       DMU_BSWAP_ZAP,          TRUE,   "zvol prop"             },
-        {       DMU_BSWAP_UINT8,        FALSE,  "other uint8[]"         },
-        {       DMU_BSWAP_UINT64,       FALSE,  "other uint64[]"        },
-        {       DMU_BSWAP_ZAP,          TRUE,   "other ZAP"             },
-        {       DMU_BSWAP_ZAP,          TRUE,   "persistent error log"  },
-        {       DMU_BSWAP_UINT8,        TRUE,   "SPA history"           },
-        {       DMU_BSWAP_UINT64,       TRUE,   "SPA history offsets"   },
-        {       DMU_BSWAP_ZAP,          TRUE,   "Pool properties"       },
-        {       DMU_BSWAP_ZAP,          TRUE,   "DSL permissions"       },
-        {       DMU_BSWAP_ACL,          TRUE,   "ZFS ACL"               },
-        {       DMU_BSWAP_UINT8,        TRUE,   "ZFS SYSACL"            },
-        {       DMU_BSWAP_UINT8,        TRUE,   "FUID table"            },
-        {       DMU_BSWAP_UINT64,       TRUE,   "FUID table size"       },
-        {       DMU_BSWAP_ZAP,          TRUE,   "DSL dataset next clones"},
-        {       DMU_BSWAP_ZAP,          TRUE,   "scan work queue"       },
-        {       DMU_BSWAP_ZAP,          TRUE,   "ZFS user/group used"   },
-        {       DMU_BSWAP_ZAP,          TRUE,   "ZFS user/group quota"  },
-        {       DMU_BSWAP_ZAP,          TRUE,   "snapshot refcount tags"},
-        {       DMU_BSWAP_ZAP,          TRUE,   "DDT ZAP algorithm"     },
-        {       DMU_BSWAP_ZAP,          TRUE,   "DDT statistics"        },
-        {       DMU_BSWAP_UINT8,        TRUE,   "System attributes"     },
-        {       DMU_BSWAP_ZAP,          TRUE,   "SA master node"        },
-        {       DMU_BSWAP_ZAP,          TRUE,   "SA attr registration"  },
-        {       DMU_BSWAP_ZAP,          TRUE,   "SA attr layouts"       },
-        {       DMU_BSWAP_ZAP,          TRUE,   "scan translations"     },
-        {       DMU_BSWAP_UINT8,        FALSE,  "deduplicated block"    },
-        {       DMU_BSWAP_ZAP,          TRUE,   "DSL deadlist map"      },
-        {       DMU_BSWAP_UINT64,       TRUE,   "DSL deadlist map hdr"  },
-        {       DMU_BSWAP_ZAP,          TRUE,   "DSL dir clones"        },
-        {       DMU_BSWAP_UINT64,       TRUE,   "bpobj subobj"          }
+        { DMU_BSWAP_UINT8,  TRUE,  FALSE,  "unallocated"                },
+        { DMU_BSWAP_ZAP,    TRUE,  TRUE,   "object directory"           },
+        { DMU_BSWAP_UINT64, TRUE,  TRUE,   "object array"               },
+        { DMU_BSWAP_UINT8,  TRUE,  FALSE,  "packed nvlist"              },
+        { DMU_BSWAP_UINT64, TRUE,  FALSE,  "packed nvlist size"         },
+        { DMU_BSWAP_UINT64, TRUE,  FALSE,  "bpobj"                      },
+        { DMU_BSWAP_UINT64, TRUE,  FALSE,  "bpobj header"               },
+        { DMU_BSWAP_UINT64, TRUE,  FALSE,  "SPA space map header"       },
+        { DMU_BSWAP_UINT64, TRUE,  FALSE,  "SPA space map"              },
+        { DMU_BSWAP_UINT64, TRUE,  FALSE,  "ZIL intent log"             },
+        { DMU_BSWAP_DNODE,  TRUE,  FALSE,  "DMU dnode"                  },
+        { DMU_BSWAP_OBJSET, TRUE,  TRUE,   "DMU objset"                 },
+        { DMU_BSWAP_UINT64, TRUE,  TRUE,   "DSL directory"              },
+        { DMU_BSWAP_ZAP,    TRUE,  TRUE,   "DSL directory child map"    },
+        { DMU_BSWAP_ZAP,    TRUE,  TRUE,   "DSL dataset snap map"       },
+        { DMU_BSWAP_ZAP,    TRUE,  TRUE,   "DSL props"                  },
+        { DMU_BSWAP_UINT64, TRUE,  TRUE,   "DSL dataset"                },
+        { DMU_BSWAP_ZNODE,  TRUE,  FALSE,  "ZFS znode"                  },
+        { DMU_BSWAP_OLDACL, TRUE,  FALSE,  "ZFS V0 ACL"                 },
+        { DMU_BSWAP_UINT8,  FALSE, FALSE,  "ZFS plain file"             },
+        { DMU_BSWAP_ZAP,    TRUE,  FALSE,  "ZFS directory"              },
+        { DMU_BSWAP_ZAP,    TRUE,  FALSE,  "ZFS master node"            },
+        { DMU_BSWAP_ZAP,    TRUE,  FALSE,  "ZFS delete queue"           },
+        { DMU_BSWAP_UINT8,  FALSE, FALSE,  "zvol object"                },
+        { DMU_BSWAP_ZAP,    TRUE,  TRUE,   "zvol prop"                  },
+        { DMU_BSWAP_UINT8,  FALSE, FALSE,  "other uint8[]"              },
+        { DMU_BSWAP_UINT64, FALSE, FALSE,  "other uint64[]"             },
+        { DMU_BSWAP_ZAP,    TRUE,  FALSE,  "other ZAP"                  },
+        { DMU_BSWAP_ZAP,    TRUE,  FALSE,  "persistent error log"       },
+        { DMU_BSWAP_UINT8,  TRUE,  FALSE,  "SPA history"                },
+        { DMU_BSWAP_UINT64, TRUE,  FALSE,  "SPA history offsets"        },
+        { DMU_BSWAP_ZAP,    TRUE,  TRUE,   "Pool properties"            },
+        { DMU_BSWAP_ZAP,    TRUE,  TRUE,   "DSL permissions"            },
+        { DMU_BSWAP_ACL,    TRUE,  FALSE,  "ZFS ACL"                    },
+        { DMU_BSWAP_UINT8,  TRUE,  FALSE,  "ZFS SYSACL"                 },
+        { DMU_BSWAP_UINT8,  TRUE,  FALSE,  "FUID table"                 },
+        { DMU_BSWAP_UINT64, TRUE,  FALSE,  "FUID table size"            },
+        { DMU_BSWAP_ZAP,    TRUE,  TRUE,   "DSL dataset next clones"    },
+        { DMU_BSWAP_ZAP,    TRUE,  FALSE,  "scan work queue"            },
+        { DMU_BSWAP_ZAP,    TRUE,  FALSE,  "ZFS user/group used"        },
+        { DMU_BSWAP_ZAP,    TRUE,  FALSE,  "ZFS user/group quota"       },
+        { DMU_BSWAP_ZAP,    TRUE,  TRUE,   "snapshot refcount tags"     },
+        { DMU_BSWAP_ZAP,    TRUE,  FALSE,  "DDT ZAP algorithm"          },
+        { DMU_BSWAP_ZAP,    TRUE,  FALSE,  "DDT statistics"             },
+        { DMU_BSWAP_UINT8,  TRUE,  FALSE,  "System attributes"          },
+        { DMU_BSWAP_ZAP,    TRUE,  FALSE,  "SA master node"             },
+        { DMU_BSWAP_ZAP,    TRUE,  FALSE,  "SA attr registration"       },
+        { DMU_BSWAP_ZAP,    TRUE,  FALSE,  "SA attr layouts"            },
+        { DMU_BSWAP_ZAP,    TRUE,  FALSE,  "scan translations"          },
+        { DMU_BSWAP_UINT8,  FALSE, FALSE,  "deduplicated block"         },
+        { DMU_BSWAP_ZAP,    TRUE,  TRUE,   "DSL deadlist map"           },
+        { DMU_BSWAP_UINT64, TRUE,  TRUE,   "DSL deadlist map hdr"       },
+        { DMU_BSWAP_ZAP,    TRUE,  TRUE,   "DSL dir clones"             },
+        { DMU_BSWAP_UINT64, TRUE,  FALSE,  "bpobj subobj"               }
 };
 
 const dmu_object_byteswap_info_t dmu_ot_byteswap[DMU_BSWAP_NUMFUNCS] = {
         {       byteswap_uint8_array,   "uint8"         },
         {       byteswap_uint16_array,  "uint16"        },

@@ -710,23 +706,37 @@
                 *start = minimum;
         return (0);
 }
 
 /*
- * If this objset is of type OST_ZFS return true if vfs's unmounted flag is set,
- * otherwise return false.
- * Used below in dmu_free_long_range_impl() to enable abort when unmounting
+ * If this dnode is in the ZFS object set
+ * return true if vfs's unmounted flag is set or the
+ * zfsvfs is currently suspended, otherwise return false.
  */
 /*ARGSUSED*/
 static boolean_t
-dmu_objset_zfs_unmounting(objset_t *os)
+dmu_dnode_fs_unmounting_or_suspended(dnode_t *freeing_dn)
 {
 #ifdef _KERNEL
-        if (dmu_objset_type(os) == DMU_OST_ZFS)
-                return (zfs_get_vfs_flag_unmounted(os));
-#endif
+        boolean_t busy = B_FALSE;
+        objset_t *os = freeing_dn->dn_objset;
+        zfsvfs_t *zfsvfs;
+
+        if (dmu_objset_type(os) == DMU_OST_ZFS) {
+                mutex_enter(&os->os_user_ptr_lock);
+                zfsvfs = dmu_objset_get_user(os);
+                if (zfsvfs != NULL && zfsvfs->z_vfs != NULL &&
+                    ((zfsvfs->z_vfs->vfs_flag & VFS_UNMOUNTED) ||
+                     zfsvfs->z_busy))
+                        busy = B_TRUE;
+                mutex_exit(&os->os_user_ptr_lock);
+        }
+
+        return (busy);
+#else
         return (B_FALSE);
+#endif
 }
 
 static int
 dmu_free_long_range_impl(objset_t *os, dnode_t *dn, uint64_t offset,
     uint64_t length)

@@ -743,20 +753,32 @@
                 dirty_frees_threshold =
                     zfs_per_txg_dirty_frees_percent * zfs_dirty_data_max / 100;
         else
                 dirty_frees_threshold = zfs_dirty_data_max / 4;
 
+        if (length == DMU_OBJECT_END && offset == 0)
+                dnode_evict_dbufs(dn, 0);
+
         if (length == DMU_OBJECT_END || offset + length > object_size)
                 length = object_size - offset;
 
+        mutex_enter(&dp->dp_lock);
+        dp->dp_long_freeing_total += length;
+        mutex_exit(&dp->dp_lock);
+
         while (length != 0) {
                 uint64_t chunk_end, chunk_begin, chunk_len;
                 uint64_t long_free_dirty_all_txgs = 0;
                 dmu_tx_t *tx;
 
-                if (dmu_objset_zfs_unmounting(dn->dn_objset))
+                if (dmu_dnode_fs_unmounting_or_suspended(dn)) {
+                        mutex_enter(&dp->dp_lock);
+                        dp->dp_long_freeing_total -= length;
+                        mutex_exit(&dp->dp_lock);
+
                         return (SET_ERROR(EINTR));
+                }
 
                 chunk_end = chunk_begin = offset + length;
 
                 /* move chunk_begin backwards to the beginning of this chunk */
                 err = get_next_chunk(dn, &chunk_begin, offset);

@@ -794,10 +816,13 @@
                  */
                 dmu_tx_mark_netfree(tx);
                 err = dmu_tx_assign(tx, TXG_WAIT);
                 if (err) {
                         dmu_tx_abort(tx);
+                        mutex_enter(&dp->dp_lock);
+                        dp->dp_long_freeing_total -= length - chunk_len;
+                        mutex_exit(&dp->dp_lock);
                         return (err);
                 }
 
                 mutex_enter(&dp->dp_lock);
                 dp->dp_long_free_dirty_pertxg[dmu_tx_get_txg(tx) & TXG_MASK] +=

@@ -1019,127 +1044,10 @@
             FALSE, FTAG, &numbufs, &dbp, DMU_READ_PREFETCH));
         dmu_write_impl(dbp, numbufs, offset, size, buf, tx);
         dmu_buf_rele_array(dbp, numbufs, FTAG);
 }
 
-static int
-dmu_object_remap_one_indirect(objset_t *os, dnode_t *dn,
-    uint64_t last_removal_txg, uint64_t offset)
-{
-        uint64_t l1blkid = dbuf_whichblock(dn, 1, offset);
-        int err = 0;
-
-        rw_enter(&dn->dn_struct_rwlock, RW_READER);
-        dmu_buf_impl_t *dbuf = dbuf_hold_level(dn, 1, l1blkid, FTAG);
-        ASSERT3P(dbuf, !=, NULL);
-
-        /*
-         * If the block hasn't been written yet, this default will ensure
-         * we don't try to remap it.
-         */
-        uint64_t birth = UINT64_MAX;
-        ASSERT3U(last_removal_txg, !=, UINT64_MAX);
-        if (dbuf->db_blkptr != NULL)
-                birth = dbuf->db_blkptr->blk_birth;
-        rw_exit(&dn->dn_struct_rwlock);
-
-        /*
-         * If this L1 was already written after the last removal, then we've
-         * already tried to remap it.
-         */
-        if (birth <= last_removal_txg &&
-            dbuf_read(dbuf, NULL, DB_RF_MUST_SUCCEED) == 0 &&
-            dbuf_can_remap(dbuf)) {
-                dmu_tx_t *tx = dmu_tx_create(os);
-                dmu_tx_hold_remap_l1indirect(tx, dn->dn_object);
-                err = dmu_tx_assign(tx, TXG_WAIT);
-                if (err == 0) {
-                        (void) dbuf_dirty(dbuf, tx);
-                        dmu_tx_commit(tx);
-                } else {
-                        dmu_tx_abort(tx);
-                }
-        }
-
-        dbuf_rele(dbuf, FTAG);
-
-        delay(zfs_object_remap_one_indirect_delay_ticks);
-
-        return (err);
-}
-
-/*
- * Remap all blockpointers in the object, if possible, so that they reference
- * only concrete vdevs.
- *
- * To do this, iterate over the L0 blockpointers and remap any that reference
- * an indirect vdev. Note that we only examine L0 blockpointers; since we
- * cannot guarantee that we can remap all blockpointer anyways (due to split
- * blocks), we do not want to make the code unnecessarily complicated to
- * catch the unlikely case that there is an L1 block on an indirect vdev that
- * contains no indirect blockpointers.
- */
-int
-dmu_object_remap_indirects(objset_t *os, uint64_t object,
-    uint64_t last_removal_txg)
-{
-        uint64_t offset, l1span;
-        int err;
-        dnode_t *dn;
-
-        err = dnode_hold(os, object, FTAG, &dn);
-        if (err != 0) {
-                return (err);
-        }
-
-        if (dn->dn_nlevels <= 1) {
-                if (issig(JUSTLOOKING) && issig(FORREAL)) {
-                        err = SET_ERROR(EINTR);
-                }
-
-                /*
-                 * If the dnode has no indirect blocks, we cannot dirty them.
-                 * We still want to remap the blkptr(s) in the dnode if
-                 * appropriate, so mark it as dirty.
-                 */
-                if (err == 0 && dnode_needs_remap(dn)) {
-                        dmu_tx_t *tx = dmu_tx_create(os);
-                        dmu_tx_hold_bonus(tx, dn->dn_object);
-                        if ((err = dmu_tx_assign(tx, TXG_WAIT)) == 0) {
-                                dnode_setdirty(dn, tx);
-                                dmu_tx_commit(tx);
-                        } else {
-                                dmu_tx_abort(tx);
-                        }
-                }
-
-                dnode_rele(dn, FTAG);
-                return (err);
-        }
-
-        offset = 0;
-        l1span = 1ULL << (dn->dn_indblkshift - SPA_BLKPTRSHIFT +
-            dn->dn_datablkshift);
-        /*
-         * Find the next L1 indirect that is not a hole.
-         */
-        while (dnode_next_offset(dn, 0, &offset, 2, 1, 0) == 0) {
-                if (issig(JUSTLOOKING) && issig(FORREAL)) {
-                        err = SET_ERROR(EINTR);
-                        break;
-                }
-                if ((err = dmu_object_remap_one_indirect(os, dn,
-                    last_removal_txg, offset)) != 0) {
-                        break;
-                }
-                offset += l1span;
-        }
-
-        dnode_rele(dn, FTAG);
-        return (err);
-}
-
 void
 dmu_prealloc(objset_t *os, uint64_t object, uint64_t offset, uint64_t size,
     dmu_tx_t *tx)
 {
         dmu_buf_t **dbp;

@@ -1685,11 +1593,20 @@
 dmu_sync_done(zio_t *zio, arc_buf_t *buf, void *varg)
 {
         dmu_sync_arg_t *dsa = varg;
         dbuf_dirty_record_t *dr = dsa->dsa_dr;
         dmu_buf_impl_t *db = dr->dr_dbuf;
+        zgd_t *zgd = dsa->dsa_zgd;
 
+        /*
+         * Record the vdev(s) backing this blkptr so they can be flushed after
+         * the writes for the lwb have completed.
+         */
+        if (zio->io_error == 0) {
+                zil_lwb_add_block(zgd->zgd_lwb, zgd->zgd_bp);
+        }
+
         mutex_enter(&db->db_mtx);
         ASSERT(dr->dt.dl.dr_override_state == DR_IN_DMU_SYNC);
         if (zio->io_error == 0) {
                 dr->dt.dl.dr_nopwrite = !!(zio->io_flags & ZIO_FLAG_NOPWRITE);
                 if (dr->dt.dl.dr_nopwrite) {

@@ -1735,18 +1652,27 @@
 dmu_sync_late_arrival_done(zio_t *zio)
 {
         blkptr_t *bp = zio->io_bp;
         dmu_sync_arg_t *dsa = zio->io_private;
         blkptr_t *bp_orig = &zio->io_bp_orig;
+        zgd_t *zgd = dsa->dsa_zgd;
 
-        if (zio->io_error == 0 && !BP_IS_HOLE(bp)) {
+        if (zio->io_error == 0) {
+                /*
+                 * Record the vdev(s) backing this blkptr so they can be
+                 * flushed after the writes for the lwb have completed.
+                 */
+                zil_lwb_add_block(zgd->zgd_lwb, zgd->zgd_bp);
+
+                if (!BP_IS_HOLE(bp)) {
                 ASSERT(!(zio->io_flags & ZIO_FLAG_NOPWRITE));
                 ASSERT(BP_IS_HOLE(bp_orig) || !BP_EQUAL(bp, bp_orig));
                 ASSERT(zio->io_bp->blk_birth == zio->io_txg);
                 ASSERT(zio->io_txg > spa_syncing_txg(zio->io_spa));
                 zio_free(zio->io_spa, zio->io_txg, zio->io_bp);
         }
+        }
 
         dmu_tx_commit(dsa->dsa_tx);
 
         dsa->dsa_done(dsa->dsa_zgd, zio->io_error);
 

@@ -1754,11 +1680,11 @@
         kmem_free(dsa, sizeof (*dsa));
 }
 
 static int
 dmu_sync_late_arrival(zio_t *pio, objset_t *os, dmu_sync_cb_t *done, zgd_t *zgd,
-    zio_prop_t *zp, zbookmark_phys_t *zb)
+    zio_prop_t *zp, zbookmark_phys_t *zb, const zio_smartcomp_info_t *sc)
 {
         dmu_sync_arg_t *dsa;
         dmu_tx_t *tx;
 
         tx = dmu_tx_create(os);

@@ -1807,11 +1733,11 @@
 
         zio_nowait(zio_write(pio, os->os_spa, dmu_tx_get_txg(tx), zgd->zgd_bp,
             abd_get_from_buf(zgd->zgd_db->db_data, zgd->zgd_db->db_size),
             zgd->zgd_db->db_size, zgd->zgd_db->db_size, zp,
             dmu_sync_late_arrival_ready, NULL, NULL, dmu_sync_late_arrival_done,
-            dsa, ZIO_PRIORITY_SYNC_WRITE, ZIO_FLAG_CANFAIL, zb));
+            dsa, ZIO_PRIORITY_SYNC_WRITE, ZIO_FLAG_CANFAIL, zb, sc));
 
         return (0);
 }
 
 /*

@@ -1837,10 +1763,11 @@
  *              The caller should log this blkptr in the done callback.
  *              It is possible that the I/O will fail, in which case
  *              the error will be reported to the done callback and
  *              propagated to pio from zio_done().
  */
+
 int
 dmu_sync(zio_t *pio, uint64_t txg, dmu_sync_cb_t *done, zgd_t *zgd)
 {
         dmu_buf_impl_t *db = (dmu_buf_impl_t *)zgd->zgd_db;
         objset_t *os = db->db_objset;

@@ -1848,27 +1775,35 @@
         dbuf_dirty_record_t *dr;
         dmu_sync_arg_t *dsa;
         zbookmark_phys_t zb;
         zio_prop_t zp;
         dnode_t *dn;
+        int flags = 0;
+        zio_smartcomp_info_t sc;
 
         ASSERT(pio != NULL);
         ASSERT(txg != 0);
 
         SET_BOOKMARK(&zb, ds->ds_object,
             db->db.db_object, db->db_level, db->db_blkid);
 
+        /* write to special only if proper conditions hold */
+        if (spa_write_data_to_special(os->os_spa, os))
+                WP_SET_SPECIALCLASS(flags, B_TRUE);
+
         DB_DNODE_ENTER(db);
         dn = DB_DNODE(db);
-        dmu_write_policy(os, dn, db->db_level, WP_DMU_SYNC, &zp);
+        dmu_write_policy(os, dn, db->db_level, flags | WP_DMU_SYNC, &zp);
+        dnode_setup_zio_smartcomp(db, &sc);
         DB_DNODE_EXIT(db);
 
         /*
          * If we're frozen (running ziltest), we always need to generate a bp.
          */
         if (txg > spa_freeze_txg(os->os_spa))
-                return (dmu_sync_late_arrival(pio, os, done, zgd, &zp, &zb));
+                return (dmu_sync_late_arrival(pio, os, done, zgd, &zp, &zb,
+                    &sc));
 
         /*
          * Grabbing db_mtx now provides a barrier between dbuf_sync_leaf()
          * and us.  If we determine that this txg is not yet syncing,
          * but it begins to sync a moment later, that's OK because the

@@ -1888,11 +1823,12 @@
                 /*
                  * This txg is currently syncing, so we can't mess with
                  * the dirty record anymore; just write a new log block.
                  */
                 mutex_exit(&db->db_mtx);
-                return (dmu_sync_late_arrival(pio, os, done, zgd, &zp, &zb));
+                return (dmu_sync_late_arrival(pio, os, done, zgd, &zp, &zb,
+                    &sc));
         }
 
         dr = db->db_last_dirty;
         while (dr && dr->dr_txg != txg)
                 dr = dr->dr_next;

@@ -1974,11 +1910,11 @@
         dsa->dsa_tx = NULL;
 
         zio_nowait(arc_write(pio, os->os_spa, txg,
             zgd->zgd_bp, dr->dt.dl.dr_data, DBUF_IS_L2CACHEABLE(db),
             &zp, dmu_sync_ready, NULL, NULL, dmu_sync_done, dsa,
-            ZIO_PRIORITY_SYNC_WRITE, ZIO_FLAG_CANFAIL, &zb));
+            ZIO_PRIORITY_SYNC_WRITE, ZIO_FLAG_CANFAIL, &zb, &sc));
 
         return (0);
 }
 
 int

@@ -2140,20 +2076,36 @@
                 nopwrite = (!dedup && (zio_checksum_table[checksum].ci_flags &
                     ZCHECKSUM_FLAG_NOPWRITE) &&
                     compress != ZIO_COMPRESS_OFF && zfs_nopwrite_enabled);
         }
 
+        zp->zp_usesc = WP_GET_SPECIALCLASS(wp);
         zp->zp_checksum = checksum;
         zp->zp_compress = compress;
         ASSERT3U(zp->zp_compress, !=, ZIO_COMPRESS_INHERIT);
 
         zp->zp_type = (wp & WP_SPILL) ? dn->dn_bonustype : type;
         zp->zp_level = level;
         zp->zp_copies = MIN(copies, spa_max_replication(os->os_spa));
         zp->zp_dedup = dedup;
         zp->zp_dedup_verify = dedup && dedup_verify;
+        zp->zp_metadata = ismd;
         zp->zp_nopwrite = nopwrite;
+        zp->zp_zpl_meta_to_special = os->os_zpl_meta_to_special;
+        zp->zp_usewbc = (zp->zp_usesc &&
+            os->os_wbc_mode == ZFS_WBC_MODE_ON && !ismd);
+
+        /* explicitly control the number for copies for DDT */
+        if (DMU_OT_IS_DDT_META(type) &&
+            os->os_spa->spa_ddt_meta_copies > 0) {
+                zp->zp_copies =
+                    MIN(os->os_spa->spa_ddt_meta_copies,
+                    spa_max_replication(os->os_spa));
+        }
+
+        DTRACE_PROBE2(dmu_wp, boolean_t, zp->zp_metadata,
+            boolean_t, zp->zp_usesc);
 }
 
 int
 dmu_offset_next(objset_t *os, uint64_t object, boolean_t hole, uint64_t *off)
 {