Print this page
usr/src/uts/common/fs/zfs/ddt.c
*** 43,58 ****
--- 43,61 ----
#include <sys/zfs_ioctl.h>
#include <sys/zap.h>
#include <sys/zio_checksum.h>
#include <sys/zio_compress.h>
#include <sys/sa.h>
+ #include <sys/spa_impl.h>
#include <sys/zfeature.h>
#include <sys/abd.h>
#ifdef _KERNEL
#include <sys/vmsystm.h>
#include <sys/zfs_znode.h>
+ #include <sys/zfs_vfsops.h>
#endif
+ #include <sys/special.h>
/*
* Enable/disable nopwrite feature.
*/
int zfs_nopwrite_enabled = 1;
*** 63,134 ****
* wait until the next TXG.
* A value of zero will disable this throttle.
*/
uint32_t zfs_per_txg_dirty_frees_percent = 30;
- /*
- * This can be used for testing, to ensure that certain actions happen
- * while in the middle of a remap (which might otherwise complete too
- * quickly).
- */
- int zfs_object_remap_one_indirect_delay_ticks = 0;
-
const dmu_object_type_info_t dmu_ot[DMU_OT_NUMTYPES] = {
! { DMU_BSWAP_UINT8, TRUE, "unallocated" },
! { DMU_BSWAP_ZAP, TRUE, "object directory" },
! { DMU_BSWAP_UINT64, TRUE, "object array" },
! { DMU_BSWAP_UINT8, TRUE, "packed nvlist" },
! { DMU_BSWAP_UINT64, TRUE, "packed nvlist size" },
! { DMU_BSWAP_UINT64, TRUE, "bpobj" },
! { DMU_BSWAP_UINT64, TRUE, "bpobj header" },
! { DMU_BSWAP_UINT64, TRUE, "SPA space map header" },
! { DMU_BSWAP_UINT64, TRUE, "SPA space map" },
! { DMU_BSWAP_UINT64, TRUE, "ZIL intent log" },
! { DMU_BSWAP_DNODE, TRUE, "DMU dnode" },
! { DMU_BSWAP_OBJSET, TRUE, "DMU objset" },
! { DMU_BSWAP_UINT64, TRUE, "DSL directory" },
! { DMU_BSWAP_ZAP, TRUE, "DSL directory child map"},
! { DMU_BSWAP_ZAP, TRUE, "DSL dataset snap map" },
! { DMU_BSWAP_ZAP, TRUE, "DSL props" },
! { DMU_BSWAP_UINT64, TRUE, "DSL dataset" },
! { DMU_BSWAP_ZNODE, TRUE, "ZFS znode" },
! { DMU_BSWAP_OLDACL, TRUE, "ZFS V0 ACL" },
! { DMU_BSWAP_UINT8, FALSE, "ZFS plain file" },
! { DMU_BSWAP_ZAP, TRUE, "ZFS directory" },
! { DMU_BSWAP_ZAP, TRUE, "ZFS master node" },
! { DMU_BSWAP_ZAP, TRUE, "ZFS delete queue" },
! { DMU_BSWAP_UINT8, FALSE, "zvol object" },
! { DMU_BSWAP_ZAP, TRUE, "zvol prop" },
! { DMU_BSWAP_UINT8, FALSE, "other uint8[]" },
! { DMU_BSWAP_UINT64, FALSE, "other uint64[]" },
! { DMU_BSWAP_ZAP, TRUE, "other ZAP" },
! { DMU_BSWAP_ZAP, TRUE, "persistent error log" },
! { DMU_BSWAP_UINT8, TRUE, "SPA history" },
! { DMU_BSWAP_UINT64, TRUE, "SPA history offsets" },
! { DMU_BSWAP_ZAP, TRUE, "Pool properties" },
! { DMU_BSWAP_ZAP, TRUE, "DSL permissions" },
! { DMU_BSWAP_ACL, TRUE, "ZFS ACL" },
! { DMU_BSWAP_UINT8, TRUE, "ZFS SYSACL" },
! { DMU_BSWAP_UINT8, TRUE, "FUID table" },
! { DMU_BSWAP_UINT64, TRUE, "FUID table size" },
! { DMU_BSWAP_ZAP, TRUE, "DSL dataset next clones"},
! { DMU_BSWAP_ZAP, TRUE, "scan work queue" },
! { DMU_BSWAP_ZAP, TRUE, "ZFS user/group used" },
! { DMU_BSWAP_ZAP, TRUE, "ZFS user/group quota" },
! { DMU_BSWAP_ZAP, TRUE, "snapshot refcount tags"},
! { DMU_BSWAP_ZAP, TRUE, "DDT ZAP algorithm" },
! { DMU_BSWAP_ZAP, TRUE, "DDT statistics" },
! { DMU_BSWAP_UINT8, TRUE, "System attributes" },
! { DMU_BSWAP_ZAP, TRUE, "SA master node" },
! { DMU_BSWAP_ZAP, TRUE, "SA attr registration" },
! { DMU_BSWAP_ZAP, TRUE, "SA attr layouts" },
! { DMU_BSWAP_ZAP, TRUE, "scan translations" },
! { DMU_BSWAP_UINT8, FALSE, "deduplicated block" },
! { DMU_BSWAP_ZAP, TRUE, "DSL deadlist map" },
! { DMU_BSWAP_UINT64, TRUE, "DSL deadlist map hdr" },
! { DMU_BSWAP_ZAP, TRUE, "DSL dir clones" },
! { DMU_BSWAP_UINT64, TRUE, "bpobj subobj" }
};
const dmu_object_byteswap_info_t dmu_ot_byteswap[DMU_BSWAP_NUMFUNCS] = {
{ byteswap_uint8_array, "uint8" },
{ byteswap_uint16_array, "uint16" },
--- 66,130 ----
* wait until the next TXG.
* A value of zero will disable this throttle.
*/
uint32_t zfs_per_txg_dirty_frees_percent = 30;
const dmu_object_type_info_t dmu_ot[DMU_OT_NUMTYPES] = {
! { DMU_BSWAP_UINT8, TRUE, FALSE, "unallocated" },
! { DMU_BSWAP_ZAP, TRUE, TRUE, "object directory" },
! { DMU_BSWAP_UINT64, TRUE, TRUE, "object array" },
! { DMU_BSWAP_UINT8, TRUE, FALSE, "packed nvlist" },
! { DMU_BSWAP_UINT64, TRUE, FALSE, "packed nvlist size" },
! { DMU_BSWAP_UINT64, TRUE, FALSE, "bpobj" },
! { DMU_BSWAP_UINT64, TRUE, FALSE, "bpobj header" },
! { DMU_BSWAP_UINT64, TRUE, FALSE, "SPA space map header" },
! { DMU_BSWAP_UINT64, TRUE, FALSE, "SPA space map" },
! { DMU_BSWAP_UINT64, TRUE, FALSE, "ZIL intent log" },
! { DMU_BSWAP_DNODE, TRUE, FALSE, "DMU dnode" },
! { DMU_BSWAP_OBJSET, TRUE, TRUE, "DMU objset" },
! { DMU_BSWAP_UINT64, TRUE, TRUE, "DSL directory" },
! { DMU_BSWAP_ZAP, TRUE, TRUE, "DSL directory child map" },
! { DMU_BSWAP_ZAP, TRUE, TRUE, "DSL dataset snap map" },
! { DMU_BSWAP_ZAP, TRUE, TRUE, "DSL props" },
! { DMU_BSWAP_UINT64, TRUE, TRUE, "DSL dataset" },
! { DMU_BSWAP_ZNODE, TRUE, FALSE, "ZFS znode" },
! { DMU_BSWAP_OLDACL, TRUE, FALSE, "ZFS V0 ACL" },
! { DMU_BSWAP_UINT8, FALSE, FALSE, "ZFS plain file" },
! { DMU_BSWAP_ZAP, TRUE, FALSE, "ZFS directory" },
! { DMU_BSWAP_ZAP, TRUE, FALSE, "ZFS master node" },
! { DMU_BSWAP_ZAP, TRUE, FALSE, "ZFS delete queue" },
! { DMU_BSWAP_UINT8, FALSE, FALSE, "zvol object" },
! { DMU_BSWAP_ZAP, TRUE, TRUE, "zvol prop" },
! { DMU_BSWAP_UINT8, FALSE, FALSE, "other uint8[]" },
! { DMU_BSWAP_UINT64, FALSE, FALSE, "other uint64[]" },
! { DMU_BSWAP_ZAP, TRUE, FALSE, "other ZAP" },
! { DMU_BSWAP_ZAP, TRUE, FALSE, "persistent error log" },
! { DMU_BSWAP_UINT8, TRUE, FALSE, "SPA history" },
! { DMU_BSWAP_UINT64, TRUE, FALSE, "SPA history offsets" },
! { DMU_BSWAP_ZAP, TRUE, TRUE, "Pool properties" },
! { DMU_BSWAP_ZAP, TRUE, TRUE, "DSL permissions" },
! { DMU_BSWAP_ACL, TRUE, FALSE, "ZFS ACL" },
! { DMU_BSWAP_UINT8, TRUE, FALSE, "ZFS SYSACL" },
! { DMU_BSWAP_UINT8, TRUE, FALSE, "FUID table" },
! { DMU_BSWAP_UINT64, TRUE, FALSE, "FUID table size" },
! { DMU_BSWAP_ZAP, TRUE, TRUE, "DSL dataset next clones" },
! { DMU_BSWAP_ZAP, TRUE, FALSE, "scan work queue" },
! { DMU_BSWAP_ZAP, TRUE, FALSE, "ZFS user/group used" },
! { DMU_BSWAP_ZAP, TRUE, FALSE, "ZFS user/group quota" },
! { DMU_BSWAP_ZAP, TRUE, TRUE, "snapshot refcount tags" },
! { DMU_BSWAP_ZAP, TRUE, FALSE, "DDT ZAP algorithm" },
! { DMU_BSWAP_ZAP, TRUE, FALSE, "DDT statistics" },
! { DMU_BSWAP_UINT8, TRUE, FALSE, "System attributes" },
! { DMU_BSWAP_ZAP, TRUE, FALSE, "SA master node" },
! { DMU_BSWAP_ZAP, TRUE, FALSE, "SA attr registration" },
! { DMU_BSWAP_ZAP, TRUE, FALSE, "SA attr layouts" },
! { DMU_BSWAP_ZAP, TRUE, FALSE, "scan translations" },
! { DMU_BSWAP_UINT8, FALSE, FALSE, "deduplicated block" },
! { DMU_BSWAP_ZAP, TRUE, TRUE, "DSL deadlist map" },
! { DMU_BSWAP_UINT64, TRUE, TRUE, "DSL deadlist map hdr" },
! { DMU_BSWAP_ZAP, TRUE, TRUE, "DSL dir clones" },
! { DMU_BSWAP_UINT64, TRUE, FALSE, "bpobj subobj" }
};
const dmu_object_byteswap_info_t dmu_ot_byteswap[DMU_BSWAP_NUMFUNCS] = {
{ byteswap_uint8_array, "uint8" },
{ byteswap_uint16_array, "uint16" },
*** 710,732 ****
*start = minimum;
return (0);
}
/*
! * If this objset is of type OST_ZFS return true if vfs's unmounted flag is set,
! * otherwise return false.
! * Used below in dmu_free_long_range_impl() to enable abort when unmounting
*/
/*ARGSUSED*/
static boolean_t
! dmu_objset_zfs_unmounting(objset_t *os)
{
#ifdef _KERNEL
! if (dmu_objset_type(os) == DMU_OST_ZFS)
! return (zfs_get_vfs_flag_unmounted(os));
! #endif
return (B_FALSE);
}
static int
dmu_free_long_range_impl(objset_t *os, dnode_t *dn, uint64_t offset,
uint64_t length)
--- 706,742 ----
*start = minimum;
return (0);
}
/*
! * If this dnode is in the ZFS object set
! * return true if vfs's unmounted flag is set or the
! * zfsvfs is currently suspended, otherwise return false.
*/
/*ARGSUSED*/
static boolean_t
! dmu_dnode_fs_unmounting_or_suspended(dnode_t *freeing_dn)
{
#ifdef _KERNEL
! boolean_t busy = B_FALSE;
! objset_t *os = freeing_dn->dn_objset;
! zfsvfs_t *zfsvfs;
!
! if (dmu_objset_type(os) == DMU_OST_ZFS) {
! mutex_enter(&os->os_user_ptr_lock);
! zfsvfs = dmu_objset_get_user(os);
! if (zfsvfs != NULL && zfsvfs->z_vfs != NULL &&
! ((zfsvfs->z_vfs->vfs_flag & VFS_UNMOUNTED) ||
! zfsvfs->z_busy))
! busy = B_TRUE;
! mutex_exit(&os->os_user_ptr_lock);
! }
!
! return (busy);
! #else
return (B_FALSE);
+ #endif
}
static int
dmu_free_long_range_impl(objset_t *os, dnode_t *dn, uint64_t offset,
uint64_t length)
*** 743,762 ****
dirty_frees_threshold =
zfs_per_txg_dirty_frees_percent * zfs_dirty_data_max / 100;
else
dirty_frees_threshold = zfs_dirty_data_max / 4;
if (length == DMU_OBJECT_END || offset + length > object_size)
length = object_size - offset;
while (length != 0) {
uint64_t chunk_end, chunk_begin, chunk_len;
uint64_t long_free_dirty_all_txgs = 0;
dmu_tx_t *tx;
! if (dmu_objset_zfs_unmounting(dn->dn_objset))
return (SET_ERROR(EINTR));
chunk_end = chunk_begin = offset + length;
/* move chunk_begin backwards to the beginning of this chunk */
err = get_next_chunk(dn, &chunk_begin, offset);
--- 753,784 ----
dirty_frees_threshold =
zfs_per_txg_dirty_frees_percent * zfs_dirty_data_max / 100;
else
dirty_frees_threshold = zfs_dirty_data_max / 4;
+ if (length == DMU_OBJECT_END && offset == 0)
+ dnode_evict_dbufs(dn, 0);
+
if (length == DMU_OBJECT_END || offset + length > object_size)
length = object_size - offset;
+ mutex_enter(&dp->dp_lock);
+ dp->dp_long_freeing_total += length;
+ mutex_exit(&dp->dp_lock);
+
while (length != 0) {
uint64_t chunk_end, chunk_begin, chunk_len;
uint64_t long_free_dirty_all_txgs = 0;
dmu_tx_t *tx;
! if (dmu_dnode_fs_unmounting_or_suspended(dn)) {
! mutex_enter(&dp->dp_lock);
! dp->dp_long_freeing_total -= length;
! mutex_exit(&dp->dp_lock);
!
return (SET_ERROR(EINTR));
+ }
chunk_end = chunk_begin = offset + length;
/* move chunk_begin backwards to the beginning of this chunk */
err = get_next_chunk(dn, &chunk_begin, offset);
*** 794,803 ****
--- 816,828 ----
*/
dmu_tx_mark_netfree(tx);
err = dmu_tx_assign(tx, TXG_WAIT);
if (err) {
dmu_tx_abort(tx);
+ mutex_enter(&dp->dp_lock);
+ dp->dp_long_freeing_total -= length - chunk_len;
+ mutex_exit(&dp->dp_lock);
return (err);
}
mutex_enter(&dp->dp_lock);
dp->dp_long_free_dirty_pertxg[dmu_tx_get_txg(tx) & TXG_MASK] +=
*** 1019,1145 ****
FALSE, FTAG, &numbufs, &dbp, DMU_READ_PREFETCH));
dmu_write_impl(dbp, numbufs, offset, size, buf, tx);
dmu_buf_rele_array(dbp, numbufs, FTAG);
}
- static int
- dmu_object_remap_one_indirect(objset_t *os, dnode_t *dn,
- uint64_t last_removal_txg, uint64_t offset)
- {
- uint64_t l1blkid = dbuf_whichblock(dn, 1, offset);
- int err = 0;
-
- rw_enter(&dn->dn_struct_rwlock, RW_READER);
- dmu_buf_impl_t *dbuf = dbuf_hold_level(dn, 1, l1blkid, FTAG);
- ASSERT3P(dbuf, !=, NULL);
-
- /*
- * If the block hasn't been written yet, this default will ensure
- * we don't try to remap it.
- */
- uint64_t birth = UINT64_MAX;
- ASSERT3U(last_removal_txg, !=, UINT64_MAX);
- if (dbuf->db_blkptr != NULL)
- birth = dbuf->db_blkptr->blk_birth;
- rw_exit(&dn->dn_struct_rwlock);
-
- /*
- * If this L1 was already written after the last removal, then we've
- * already tried to remap it.
- */
- if (birth <= last_removal_txg &&
- dbuf_read(dbuf, NULL, DB_RF_MUST_SUCCEED) == 0 &&
- dbuf_can_remap(dbuf)) {
- dmu_tx_t *tx = dmu_tx_create(os);
- dmu_tx_hold_remap_l1indirect(tx, dn->dn_object);
- err = dmu_tx_assign(tx, TXG_WAIT);
- if (err == 0) {
- (void) dbuf_dirty(dbuf, tx);
- dmu_tx_commit(tx);
- } else {
- dmu_tx_abort(tx);
- }
- }
-
- dbuf_rele(dbuf, FTAG);
-
- delay(zfs_object_remap_one_indirect_delay_ticks);
-
- return (err);
- }
-
- /*
- * Remap all blockpointers in the object, if possible, so that they reference
- * only concrete vdevs.
- *
- * To do this, iterate over the L0 blockpointers and remap any that reference
- * an indirect vdev. Note that we only examine L0 blockpointers; since we
- * cannot guarantee that we can remap all blockpointer anyways (due to split
- * blocks), we do not want to make the code unnecessarily complicated to
- * catch the unlikely case that there is an L1 block on an indirect vdev that
- * contains no indirect blockpointers.
- */
- int
- dmu_object_remap_indirects(objset_t *os, uint64_t object,
- uint64_t last_removal_txg)
- {
- uint64_t offset, l1span;
- int err;
- dnode_t *dn;
-
- err = dnode_hold(os, object, FTAG, &dn);
- if (err != 0) {
- return (err);
- }
-
- if (dn->dn_nlevels <= 1) {
- if (issig(JUSTLOOKING) && issig(FORREAL)) {
- err = SET_ERROR(EINTR);
- }
-
- /*
- * If the dnode has no indirect blocks, we cannot dirty them.
- * We still want to remap the blkptr(s) in the dnode if
- * appropriate, so mark it as dirty.
- */
- if (err == 0 && dnode_needs_remap(dn)) {
- dmu_tx_t *tx = dmu_tx_create(os);
- dmu_tx_hold_bonus(tx, dn->dn_object);
- if ((err = dmu_tx_assign(tx, TXG_WAIT)) == 0) {
- dnode_setdirty(dn, tx);
- dmu_tx_commit(tx);
- } else {
- dmu_tx_abort(tx);
- }
- }
-
- dnode_rele(dn, FTAG);
- return (err);
- }
-
- offset = 0;
- l1span = 1ULL << (dn->dn_indblkshift - SPA_BLKPTRSHIFT +
- dn->dn_datablkshift);
- /*
- * Find the next L1 indirect that is not a hole.
- */
- while (dnode_next_offset(dn, 0, &offset, 2, 1, 0) == 0) {
- if (issig(JUSTLOOKING) && issig(FORREAL)) {
- err = SET_ERROR(EINTR);
- break;
- }
- if ((err = dmu_object_remap_one_indirect(os, dn,
- last_removal_txg, offset)) != 0) {
- break;
- }
- offset += l1span;
- }
-
- dnode_rele(dn, FTAG);
- return (err);
- }
-
void
dmu_prealloc(objset_t *os, uint64_t object, uint64_t offset, uint64_t size,
dmu_tx_t *tx)
{
dmu_buf_t **dbp;
--- 1044,1053 ----
*** 1685,1695 ****
--- 1593,1612 ----
dmu_sync_done(zio_t *zio, arc_buf_t *buf, void *varg)
{
dmu_sync_arg_t *dsa = varg;
dbuf_dirty_record_t *dr = dsa->dsa_dr;
dmu_buf_impl_t *db = dr->dr_dbuf;
+ zgd_t *zgd = dsa->dsa_zgd;
+ /*
+ * Record the vdev(s) backing this blkptr so they can be flushed after
+ * the writes for the lwb have completed.
+ */
+ if (zio->io_error == 0) {
+ zil_lwb_add_block(zgd->zgd_lwb, zgd->zgd_bp);
+ }
+
mutex_enter(&db->db_mtx);
ASSERT(dr->dt.dl.dr_override_state == DR_IN_DMU_SYNC);
if (zio->io_error == 0) {
dr->dt.dl.dr_nopwrite = !!(zio->io_flags & ZIO_FLAG_NOPWRITE);
if (dr->dt.dl.dr_nopwrite) {
*** 1735,1752 ****
dmu_sync_late_arrival_done(zio_t *zio)
{
blkptr_t *bp = zio->io_bp;
dmu_sync_arg_t *dsa = zio->io_private;
blkptr_t *bp_orig = &zio->io_bp_orig;
! if (zio->io_error == 0 && !BP_IS_HOLE(bp)) {
ASSERT(!(zio->io_flags & ZIO_FLAG_NOPWRITE));
ASSERT(BP_IS_HOLE(bp_orig) || !BP_EQUAL(bp, bp_orig));
ASSERT(zio->io_bp->blk_birth == zio->io_txg);
ASSERT(zio->io_txg > spa_syncing_txg(zio->io_spa));
zio_free(zio->io_spa, zio->io_txg, zio->io_bp);
}
dmu_tx_commit(dsa->dsa_tx);
dsa->dsa_done(dsa->dsa_zgd, zio->io_error);
--- 1652,1678 ----
dmu_sync_late_arrival_done(zio_t *zio)
{
blkptr_t *bp = zio->io_bp;
dmu_sync_arg_t *dsa = zio->io_private;
blkptr_t *bp_orig = &zio->io_bp_orig;
+ zgd_t *zgd = dsa->dsa_zgd;
! if (zio->io_error == 0) {
! /*
! * Record the vdev(s) backing this blkptr so they can be
! * flushed after the writes for the lwb have completed.
! */
! zil_lwb_add_block(zgd->zgd_lwb, zgd->zgd_bp);
!
! if (!BP_IS_HOLE(bp)) {
ASSERT(!(zio->io_flags & ZIO_FLAG_NOPWRITE));
ASSERT(BP_IS_HOLE(bp_orig) || !BP_EQUAL(bp, bp_orig));
ASSERT(zio->io_bp->blk_birth == zio->io_txg);
ASSERT(zio->io_txg > spa_syncing_txg(zio->io_spa));
zio_free(zio->io_spa, zio->io_txg, zio->io_bp);
}
+ }
dmu_tx_commit(dsa->dsa_tx);
dsa->dsa_done(dsa->dsa_zgd, zio->io_error);
*** 1754,1764 ****
kmem_free(dsa, sizeof (*dsa));
}
static int
dmu_sync_late_arrival(zio_t *pio, objset_t *os, dmu_sync_cb_t *done, zgd_t *zgd,
! zio_prop_t *zp, zbookmark_phys_t *zb)
{
dmu_sync_arg_t *dsa;
dmu_tx_t *tx;
tx = dmu_tx_create(os);
--- 1680,1690 ----
kmem_free(dsa, sizeof (*dsa));
}
static int
dmu_sync_late_arrival(zio_t *pio, objset_t *os, dmu_sync_cb_t *done, zgd_t *zgd,
! zio_prop_t *zp, zbookmark_phys_t *zb, const zio_smartcomp_info_t *sc)
{
dmu_sync_arg_t *dsa;
dmu_tx_t *tx;
tx = dmu_tx_create(os);
*** 1807,1817 ****
zio_nowait(zio_write(pio, os->os_spa, dmu_tx_get_txg(tx), zgd->zgd_bp,
abd_get_from_buf(zgd->zgd_db->db_data, zgd->zgd_db->db_size),
zgd->zgd_db->db_size, zgd->zgd_db->db_size, zp,
dmu_sync_late_arrival_ready, NULL, NULL, dmu_sync_late_arrival_done,
! dsa, ZIO_PRIORITY_SYNC_WRITE, ZIO_FLAG_CANFAIL, zb));
return (0);
}
/*
--- 1733,1743 ----
zio_nowait(zio_write(pio, os->os_spa, dmu_tx_get_txg(tx), zgd->zgd_bp,
abd_get_from_buf(zgd->zgd_db->db_data, zgd->zgd_db->db_size),
zgd->zgd_db->db_size, zgd->zgd_db->db_size, zp,
dmu_sync_late_arrival_ready, NULL, NULL, dmu_sync_late_arrival_done,
! dsa, ZIO_PRIORITY_SYNC_WRITE, ZIO_FLAG_CANFAIL, zb, sc));
return (0);
}
/*
*** 1837,1846 ****
--- 1763,1773 ----
* The caller should log this blkptr in the done callback.
* It is possible that the I/O will fail, in which case
* the error will be reported to the done callback and
* propagated to pio from zio_done().
*/
+
int
dmu_sync(zio_t *pio, uint64_t txg, dmu_sync_cb_t *done, zgd_t *zgd)
{
dmu_buf_impl_t *db = (dmu_buf_impl_t *)zgd->zgd_db;
objset_t *os = db->db_objset;
*** 1848,1874 ****
dbuf_dirty_record_t *dr;
dmu_sync_arg_t *dsa;
zbookmark_phys_t zb;
zio_prop_t zp;
dnode_t *dn;
ASSERT(pio != NULL);
ASSERT(txg != 0);
SET_BOOKMARK(&zb, ds->ds_object,
db->db.db_object, db->db_level, db->db_blkid);
DB_DNODE_ENTER(db);
dn = DB_DNODE(db);
! dmu_write_policy(os, dn, db->db_level, WP_DMU_SYNC, &zp);
DB_DNODE_EXIT(db);
/*
* If we're frozen (running ziltest), we always need to generate a bp.
*/
if (txg > spa_freeze_txg(os->os_spa))
! return (dmu_sync_late_arrival(pio, os, done, zgd, &zp, &zb));
/*
* Grabbing db_mtx now provides a barrier between dbuf_sync_leaf()
* and us. If we determine that this txg is not yet syncing,
* but it begins to sync a moment later, that's OK because the
--- 1775,1809 ----
dbuf_dirty_record_t *dr;
dmu_sync_arg_t *dsa;
zbookmark_phys_t zb;
zio_prop_t zp;
dnode_t *dn;
+ int flags = 0;
+ zio_smartcomp_info_t sc;
ASSERT(pio != NULL);
ASSERT(txg != 0);
SET_BOOKMARK(&zb, ds->ds_object,
db->db.db_object, db->db_level, db->db_blkid);
+ /* write to special only if proper conditions hold */
+ if (spa_write_data_to_special(os->os_spa, os))
+ WP_SET_SPECIALCLASS(flags, B_TRUE);
+
DB_DNODE_ENTER(db);
dn = DB_DNODE(db);
! dmu_write_policy(os, dn, db->db_level, flags | WP_DMU_SYNC, &zp);
! dnode_setup_zio_smartcomp(db, &sc);
DB_DNODE_EXIT(db);
/*
* If we're frozen (running ziltest), we always need to generate a bp.
*/
if (txg > spa_freeze_txg(os->os_spa))
! return (dmu_sync_late_arrival(pio, os, done, zgd, &zp, &zb,
! &sc));
/*
* Grabbing db_mtx now provides a barrier between dbuf_sync_leaf()
* and us. If we determine that this txg is not yet syncing,
* but it begins to sync a moment later, that's OK because the
*** 1888,1898 ****
/*
* This txg is currently syncing, so we can't mess with
* the dirty record anymore; just write a new log block.
*/
mutex_exit(&db->db_mtx);
! return (dmu_sync_late_arrival(pio, os, done, zgd, &zp, &zb));
}
dr = db->db_last_dirty;
while (dr && dr->dr_txg != txg)
dr = dr->dr_next;
--- 1823,1834 ----
/*
* This txg is currently syncing, so we can't mess with
* the dirty record anymore; just write a new log block.
*/
mutex_exit(&db->db_mtx);
! return (dmu_sync_late_arrival(pio, os, done, zgd, &zp, &zb,
! &sc));
}
dr = db->db_last_dirty;
while (dr && dr->dr_txg != txg)
dr = dr->dr_next;
*** 1974,1984 ****
dsa->dsa_tx = NULL;
zio_nowait(arc_write(pio, os->os_spa, txg,
zgd->zgd_bp, dr->dt.dl.dr_data, DBUF_IS_L2CACHEABLE(db),
&zp, dmu_sync_ready, NULL, NULL, dmu_sync_done, dsa,
! ZIO_PRIORITY_SYNC_WRITE, ZIO_FLAG_CANFAIL, &zb));
return (0);
}
int
--- 1910,1920 ----
dsa->dsa_tx = NULL;
zio_nowait(arc_write(pio, os->os_spa, txg,
zgd->zgd_bp, dr->dt.dl.dr_data, DBUF_IS_L2CACHEABLE(db),
&zp, dmu_sync_ready, NULL, NULL, dmu_sync_done, dsa,
! ZIO_PRIORITY_SYNC_WRITE, ZIO_FLAG_CANFAIL, &zb, &sc));
return (0);
}
int
*** 2140,2159 ****
--- 2076,2111 ----
nopwrite = (!dedup && (zio_checksum_table[checksum].ci_flags &
ZCHECKSUM_FLAG_NOPWRITE) &&
compress != ZIO_COMPRESS_OFF && zfs_nopwrite_enabled);
}
+ zp->zp_usesc = WP_GET_SPECIALCLASS(wp);
zp->zp_checksum = checksum;
zp->zp_compress = compress;
ASSERT3U(zp->zp_compress, !=, ZIO_COMPRESS_INHERIT);
zp->zp_type = (wp & WP_SPILL) ? dn->dn_bonustype : type;
zp->zp_level = level;
zp->zp_copies = MIN(copies, spa_max_replication(os->os_spa));
zp->zp_dedup = dedup;
zp->zp_dedup_verify = dedup && dedup_verify;
+ zp->zp_metadata = ismd;
zp->zp_nopwrite = nopwrite;
+ zp->zp_zpl_meta_to_special = os->os_zpl_meta_to_special;
+ zp->zp_usewbc = (zp->zp_usesc &&
+ os->os_wbc_mode == ZFS_WBC_MODE_ON && !ismd);
+
+ /* explicitly control the number for copies for DDT */
+ if (DMU_OT_IS_DDT_META(type) &&
+ os->os_spa->spa_ddt_meta_copies > 0) {
+ zp->zp_copies =
+ MIN(os->os_spa->spa_ddt_meta_copies,
+ spa_max_replication(os->os_spa));
+ }
+
+ DTRACE_PROBE2(dmu_wp, boolean_t, zp->zp_metadata,
+ boolean_t, zp->zp_usesc);
}
int
dmu_offset_next(objset_t *os, uint64_t object, boolean_t hole, uint64_t *off)
{