Print this page
usr/src/uts/common/fs/zfs/ddt.c

*** 43,58 **** --- 43,61 ---- #include <sys/zfs_ioctl.h> #include <sys/zap.h> #include <sys/zio_checksum.h> #include <sys/zio_compress.h> #include <sys/sa.h> + #include <sys/spa_impl.h> #include <sys/zfeature.h> #include <sys/abd.h> #ifdef _KERNEL #include <sys/vmsystm.h> #include <sys/zfs_znode.h> + #include <sys/zfs_vfsops.h> #endif + #include <sys/special.h> /* * Enable/disable nopwrite feature. */ int zfs_nopwrite_enabled = 1;
*** 63,134 **** * wait until the next TXG. * A value of zero will disable this throttle. */ uint32_t zfs_per_txg_dirty_frees_percent = 30; - /* - * This can be used for testing, to ensure that certain actions happen - * while in the middle of a remap (which might otherwise complete too - * quickly). - */ - int zfs_object_remap_one_indirect_delay_ticks = 0; - const dmu_object_type_info_t dmu_ot[DMU_OT_NUMTYPES] = { ! { DMU_BSWAP_UINT8, TRUE, "unallocated" }, ! { DMU_BSWAP_ZAP, TRUE, "object directory" }, ! { DMU_BSWAP_UINT64, TRUE, "object array" }, ! { DMU_BSWAP_UINT8, TRUE, "packed nvlist" }, ! { DMU_BSWAP_UINT64, TRUE, "packed nvlist size" }, ! { DMU_BSWAP_UINT64, TRUE, "bpobj" }, ! { DMU_BSWAP_UINT64, TRUE, "bpobj header" }, ! { DMU_BSWAP_UINT64, TRUE, "SPA space map header" }, ! { DMU_BSWAP_UINT64, TRUE, "SPA space map" }, ! { DMU_BSWAP_UINT64, TRUE, "ZIL intent log" }, ! { DMU_BSWAP_DNODE, TRUE, "DMU dnode" }, ! { DMU_BSWAP_OBJSET, TRUE, "DMU objset" }, ! { DMU_BSWAP_UINT64, TRUE, "DSL directory" }, ! { DMU_BSWAP_ZAP, TRUE, "DSL directory child map"}, ! { DMU_BSWAP_ZAP, TRUE, "DSL dataset snap map" }, ! { DMU_BSWAP_ZAP, TRUE, "DSL props" }, ! { DMU_BSWAP_UINT64, TRUE, "DSL dataset" }, ! { DMU_BSWAP_ZNODE, TRUE, "ZFS znode" }, ! { DMU_BSWAP_OLDACL, TRUE, "ZFS V0 ACL" }, ! { DMU_BSWAP_UINT8, FALSE, "ZFS plain file" }, ! { DMU_BSWAP_ZAP, TRUE, "ZFS directory" }, ! { DMU_BSWAP_ZAP, TRUE, "ZFS master node" }, ! { DMU_BSWAP_ZAP, TRUE, "ZFS delete queue" }, ! { DMU_BSWAP_UINT8, FALSE, "zvol object" }, ! { DMU_BSWAP_ZAP, TRUE, "zvol prop" }, ! { DMU_BSWAP_UINT8, FALSE, "other uint8[]" }, ! { DMU_BSWAP_UINT64, FALSE, "other uint64[]" }, ! { DMU_BSWAP_ZAP, TRUE, "other ZAP" }, ! { DMU_BSWAP_ZAP, TRUE, "persistent error log" }, ! { DMU_BSWAP_UINT8, TRUE, "SPA history" }, ! { DMU_BSWAP_UINT64, TRUE, "SPA history offsets" }, ! { DMU_BSWAP_ZAP, TRUE, "Pool properties" }, ! { DMU_BSWAP_ZAP, TRUE, "DSL permissions" }, ! { DMU_BSWAP_ACL, TRUE, "ZFS ACL" }, ! { DMU_BSWAP_UINT8, TRUE, "ZFS SYSACL" }, ! { DMU_BSWAP_UINT8, TRUE, "FUID table" }, ! { DMU_BSWAP_UINT64, TRUE, "FUID table size" }, ! { DMU_BSWAP_ZAP, TRUE, "DSL dataset next clones"}, ! { DMU_BSWAP_ZAP, TRUE, "scan work queue" }, ! { DMU_BSWAP_ZAP, TRUE, "ZFS user/group used" }, ! { DMU_BSWAP_ZAP, TRUE, "ZFS user/group quota" }, ! { DMU_BSWAP_ZAP, TRUE, "snapshot refcount tags"}, ! { DMU_BSWAP_ZAP, TRUE, "DDT ZAP algorithm" }, ! { DMU_BSWAP_ZAP, TRUE, "DDT statistics" }, ! { DMU_BSWAP_UINT8, TRUE, "System attributes" }, ! { DMU_BSWAP_ZAP, TRUE, "SA master node" }, ! { DMU_BSWAP_ZAP, TRUE, "SA attr registration" }, ! { DMU_BSWAP_ZAP, TRUE, "SA attr layouts" }, ! { DMU_BSWAP_ZAP, TRUE, "scan translations" }, ! { DMU_BSWAP_UINT8, FALSE, "deduplicated block" }, ! { DMU_BSWAP_ZAP, TRUE, "DSL deadlist map" }, ! { DMU_BSWAP_UINT64, TRUE, "DSL deadlist map hdr" }, ! { DMU_BSWAP_ZAP, TRUE, "DSL dir clones" }, ! { DMU_BSWAP_UINT64, TRUE, "bpobj subobj" } }; const dmu_object_byteswap_info_t dmu_ot_byteswap[DMU_BSWAP_NUMFUNCS] = { { byteswap_uint8_array, "uint8" }, { byteswap_uint16_array, "uint16" }, --- 66,130 ---- * wait until the next TXG. * A value of zero will disable this throttle. */ uint32_t zfs_per_txg_dirty_frees_percent = 30; const dmu_object_type_info_t dmu_ot[DMU_OT_NUMTYPES] = { ! { DMU_BSWAP_UINT8, TRUE, FALSE, "unallocated" }, ! { DMU_BSWAP_ZAP, TRUE, TRUE, "object directory" }, ! { DMU_BSWAP_UINT64, TRUE, TRUE, "object array" }, ! { DMU_BSWAP_UINT8, TRUE, FALSE, "packed nvlist" }, ! { DMU_BSWAP_UINT64, TRUE, FALSE, "packed nvlist size" }, ! { DMU_BSWAP_UINT64, TRUE, FALSE, "bpobj" }, ! { DMU_BSWAP_UINT64, TRUE, FALSE, "bpobj header" }, ! { DMU_BSWAP_UINT64, TRUE, FALSE, "SPA space map header" }, ! { DMU_BSWAP_UINT64, TRUE, FALSE, "SPA space map" }, ! { DMU_BSWAP_UINT64, TRUE, FALSE, "ZIL intent log" }, ! { DMU_BSWAP_DNODE, TRUE, FALSE, "DMU dnode" }, ! { DMU_BSWAP_OBJSET, TRUE, TRUE, "DMU objset" }, ! { DMU_BSWAP_UINT64, TRUE, TRUE, "DSL directory" }, ! { DMU_BSWAP_ZAP, TRUE, TRUE, "DSL directory child map" }, ! { DMU_BSWAP_ZAP, TRUE, TRUE, "DSL dataset snap map" }, ! { DMU_BSWAP_ZAP, TRUE, TRUE, "DSL props" }, ! { DMU_BSWAP_UINT64, TRUE, TRUE, "DSL dataset" }, ! { DMU_BSWAP_ZNODE, TRUE, FALSE, "ZFS znode" }, ! { DMU_BSWAP_OLDACL, TRUE, FALSE, "ZFS V0 ACL" }, ! { DMU_BSWAP_UINT8, FALSE, FALSE, "ZFS plain file" }, ! { DMU_BSWAP_ZAP, TRUE, FALSE, "ZFS directory" }, ! { DMU_BSWAP_ZAP, TRUE, FALSE, "ZFS master node" }, ! { DMU_BSWAP_ZAP, TRUE, FALSE, "ZFS delete queue" }, ! { DMU_BSWAP_UINT8, FALSE, FALSE, "zvol object" }, ! { DMU_BSWAP_ZAP, TRUE, TRUE, "zvol prop" }, ! { DMU_BSWAP_UINT8, FALSE, FALSE, "other uint8[]" }, ! { DMU_BSWAP_UINT64, FALSE, FALSE, "other uint64[]" }, ! { DMU_BSWAP_ZAP, TRUE, FALSE, "other ZAP" }, ! { DMU_BSWAP_ZAP, TRUE, FALSE, "persistent error log" }, ! { DMU_BSWAP_UINT8, TRUE, FALSE, "SPA history" }, ! { DMU_BSWAP_UINT64, TRUE, FALSE, "SPA history offsets" }, ! { DMU_BSWAP_ZAP, TRUE, TRUE, "Pool properties" }, ! { DMU_BSWAP_ZAP, TRUE, TRUE, "DSL permissions" }, ! { DMU_BSWAP_ACL, TRUE, FALSE, "ZFS ACL" }, ! { DMU_BSWAP_UINT8, TRUE, FALSE, "ZFS SYSACL" }, ! { DMU_BSWAP_UINT8, TRUE, FALSE, "FUID table" }, ! { DMU_BSWAP_UINT64, TRUE, FALSE, "FUID table size" }, ! { DMU_BSWAP_ZAP, TRUE, TRUE, "DSL dataset next clones" }, ! { DMU_BSWAP_ZAP, TRUE, FALSE, "scan work queue" }, ! { DMU_BSWAP_ZAP, TRUE, FALSE, "ZFS user/group used" }, ! { DMU_BSWAP_ZAP, TRUE, FALSE, "ZFS user/group quota" }, ! { DMU_BSWAP_ZAP, TRUE, TRUE, "snapshot refcount tags" }, ! { DMU_BSWAP_ZAP, TRUE, FALSE, "DDT ZAP algorithm" }, ! { DMU_BSWAP_ZAP, TRUE, FALSE, "DDT statistics" }, ! { DMU_BSWAP_UINT8, TRUE, FALSE, "System attributes" }, ! { DMU_BSWAP_ZAP, TRUE, FALSE, "SA master node" }, ! { DMU_BSWAP_ZAP, TRUE, FALSE, "SA attr registration" }, ! { DMU_BSWAP_ZAP, TRUE, FALSE, "SA attr layouts" }, ! { DMU_BSWAP_ZAP, TRUE, FALSE, "scan translations" }, ! { DMU_BSWAP_UINT8, FALSE, FALSE, "deduplicated block" }, ! { DMU_BSWAP_ZAP, TRUE, TRUE, "DSL deadlist map" }, ! { DMU_BSWAP_UINT64, TRUE, TRUE, "DSL deadlist map hdr" }, ! { DMU_BSWAP_ZAP, TRUE, TRUE, "DSL dir clones" }, ! { DMU_BSWAP_UINT64, TRUE, FALSE, "bpobj subobj" } }; const dmu_object_byteswap_info_t dmu_ot_byteswap[DMU_BSWAP_NUMFUNCS] = { { byteswap_uint8_array, "uint8" }, { byteswap_uint16_array, "uint16" },
*** 710,732 **** *start = minimum; return (0); } /* ! * If this objset is of type OST_ZFS return true if vfs's unmounted flag is set, ! * otherwise return false. ! * Used below in dmu_free_long_range_impl() to enable abort when unmounting */ /*ARGSUSED*/ static boolean_t ! dmu_objset_zfs_unmounting(objset_t *os) { #ifdef _KERNEL ! if (dmu_objset_type(os) == DMU_OST_ZFS) ! return (zfs_get_vfs_flag_unmounted(os)); ! #endif return (B_FALSE); } static int dmu_free_long_range_impl(objset_t *os, dnode_t *dn, uint64_t offset, uint64_t length) --- 706,742 ---- *start = minimum; return (0); } /* ! * If this dnode is in the ZFS object set ! * return true if vfs's unmounted flag is set or the ! * zfsvfs is currently suspended, otherwise return false. */ /*ARGSUSED*/ static boolean_t ! dmu_dnode_fs_unmounting_or_suspended(dnode_t *freeing_dn) { #ifdef _KERNEL ! boolean_t busy = B_FALSE; ! objset_t *os = freeing_dn->dn_objset; ! zfsvfs_t *zfsvfs; ! ! if (dmu_objset_type(os) == DMU_OST_ZFS) { ! mutex_enter(&os->os_user_ptr_lock); ! zfsvfs = dmu_objset_get_user(os); ! if (zfsvfs != NULL && zfsvfs->z_vfs != NULL && ! ((zfsvfs->z_vfs->vfs_flag & VFS_UNMOUNTED) || ! zfsvfs->z_busy)) ! busy = B_TRUE; ! mutex_exit(&os->os_user_ptr_lock); ! } ! ! return (busy); ! #else return (B_FALSE); + #endif } static int dmu_free_long_range_impl(objset_t *os, dnode_t *dn, uint64_t offset, uint64_t length)
*** 743,762 **** dirty_frees_threshold = zfs_per_txg_dirty_frees_percent * zfs_dirty_data_max / 100; else dirty_frees_threshold = zfs_dirty_data_max / 4; if (length == DMU_OBJECT_END || offset + length > object_size) length = object_size - offset; while (length != 0) { uint64_t chunk_end, chunk_begin, chunk_len; uint64_t long_free_dirty_all_txgs = 0; dmu_tx_t *tx; ! if (dmu_objset_zfs_unmounting(dn->dn_objset)) return (SET_ERROR(EINTR)); chunk_end = chunk_begin = offset + length; /* move chunk_begin backwards to the beginning of this chunk */ err = get_next_chunk(dn, &chunk_begin, offset); --- 753,784 ---- dirty_frees_threshold = zfs_per_txg_dirty_frees_percent * zfs_dirty_data_max / 100; else dirty_frees_threshold = zfs_dirty_data_max / 4; + if (length == DMU_OBJECT_END && offset == 0) + dnode_evict_dbufs(dn, 0); + if (length == DMU_OBJECT_END || offset + length > object_size) length = object_size - offset; + mutex_enter(&dp->dp_lock); + dp->dp_long_freeing_total += length; + mutex_exit(&dp->dp_lock); + while (length != 0) { uint64_t chunk_end, chunk_begin, chunk_len; uint64_t long_free_dirty_all_txgs = 0; dmu_tx_t *tx; ! if (dmu_dnode_fs_unmounting_or_suspended(dn)) { ! mutex_enter(&dp->dp_lock); ! dp->dp_long_freeing_total -= length; ! mutex_exit(&dp->dp_lock); ! return (SET_ERROR(EINTR)); + } chunk_end = chunk_begin = offset + length; /* move chunk_begin backwards to the beginning of this chunk */ err = get_next_chunk(dn, &chunk_begin, offset);
*** 794,803 **** --- 816,828 ---- */ dmu_tx_mark_netfree(tx); err = dmu_tx_assign(tx, TXG_WAIT); if (err) { dmu_tx_abort(tx); + mutex_enter(&dp->dp_lock); + dp->dp_long_freeing_total -= length - chunk_len; + mutex_exit(&dp->dp_lock); return (err); } mutex_enter(&dp->dp_lock); dp->dp_long_free_dirty_pertxg[dmu_tx_get_txg(tx) & TXG_MASK] +=
*** 1019,1145 **** FALSE, FTAG, &numbufs, &dbp, DMU_READ_PREFETCH)); dmu_write_impl(dbp, numbufs, offset, size, buf, tx); dmu_buf_rele_array(dbp, numbufs, FTAG); } - static int - dmu_object_remap_one_indirect(objset_t *os, dnode_t *dn, - uint64_t last_removal_txg, uint64_t offset) - { - uint64_t l1blkid = dbuf_whichblock(dn, 1, offset); - int err = 0; - - rw_enter(&dn->dn_struct_rwlock, RW_READER); - dmu_buf_impl_t *dbuf = dbuf_hold_level(dn, 1, l1blkid, FTAG); - ASSERT3P(dbuf, !=, NULL); - - /* - * If the block hasn't been written yet, this default will ensure - * we don't try to remap it. - */ - uint64_t birth = UINT64_MAX; - ASSERT3U(last_removal_txg, !=, UINT64_MAX); - if (dbuf->db_blkptr != NULL) - birth = dbuf->db_blkptr->blk_birth; - rw_exit(&dn->dn_struct_rwlock); - - /* - * If this L1 was already written after the last removal, then we've - * already tried to remap it. - */ - if (birth <= last_removal_txg && - dbuf_read(dbuf, NULL, DB_RF_MUST_SUCCEED) == 0 && - dbuf_can_remap(dbuf)) { - dmu_tx_t *tx = dmu_tx_create(os); - dmu_tx_hold_remap_l1indirect(tx, dn->dn_object); - err = dmu_tx_assign(tx, TXG_WAIT); - if (err == 0) { - (void) dbuf_dirty(dbuf, tx); - dmu_tx_commit(tx); - } else { - dmu_tx_abort(tx); - } - } - - dbuf_rele(dbuf, FTAG); - - delay(zfs_object_remap_one_indirect_delay_ticks); - - return (err); - } - - /* - * Remap all blockpointers in the object, if possible, so that they reference - * only concrete vdevs. - * - * To do this, iterate over the L0 blockpointers and remap any that reference - * an indirect vdev. Note that we only examine L0 blockpointers; since we - * cannot guarantee that we can remap all blockpointer anyways (due to split - * blocks), we do not want to make the code unnecessarily complicated to - * catch the unlikely case that there is an L1 block on an indirect vdev that - * contains no indirect blockpointers. - */ - int - dmu_object_remap_indirects(objset_t *os, uint64_t object, - uint64_t last_removal_txg) - { - uint64_t offset, l1span; - int err; - dnode_t *dn; - - err = dnode_hold(os, object, FTAG, &dn); - if (err != 0) { - return (err); - } - - if (dn->dn_nlevels <= 1) { - if (issig(JUSTLOOKING) && issig(FORREAL)) { - err = SET_ERROR(EINTR); - } - - /* - * If the dnode has no indirect blocks, we cannot dirty them. - * We still want to remap the blkptr(s) in the dnode if - * appropriate, so mark it as dirty. - */ - if (err == 0 && dnode_needs_remap(dn)) { - dmu_tx_t *tx = dmu_tx_create(os); - dmu_tx_hold_bonus(tx, dn->dn_object); - if ((err = dmu_tx_assign(tx, TXG_WAIT)) == 0) { - dnode_setdirty(dn, tx); - dmu_tx_commit(tx); - } else { - dmu_tx_abort(tx); - } - } - - dnode_rele(dn, FTAG); - return (err); - } - - offset = 0; - l1span = 1ULL << (dn->dn_indblkshift - SPA_BLKPTRSHIFT + - dn->dn_datablkshift); - /* - * Find the next L1 indirect that is not a hole. - */ - while (dnode_next_offset(dn, 0, &offset, 2, 1, 0) == 0) { - if (issig(JUSTLOOKING) && issig(FORREAL)) { - err = SET_ERROR(EINTR); - break; - } - if ((err = dmu_object_remap_one_indirect(os, dn, - last_removal_txg, offset)) != 0) { - break; - } - offset += l1span; - } - - dnode_rele(dn, FTAG); - return (err); - } - void dmu_prealloc(objset_t *os, uint64_t object, uint64_t offset, uint64_t size, dmu_tx_t *tx) { dmu_buf_t **dbp; --- 1044,1053 ----
*** 1685,1695 **** --- 1593,1612 ---- dmu_sync_done(zio_t *zio, arc_buf_t *buf, void *varg) { dmu_sync_arg_t *dsa = varg; dbuf_dirty_record_t *dr = dsa->dsa_dr; dmu_buf_impl_t *db = dr->dr_dbuf; + zgd_t *zgd = dsa->dsa_zgd; + /* + * Record the vdev(s) backing this blkptr so they can be flushed after + * the writes for the lwb have completed. + */ + if (zio->io_error == 0) { + zil_lwb_add_block(zgd->zgd_lwb, zgd->zgd_bp); + } + mutex_enter(&db->db_mtx); ASSERT(dr->dt.dl.dr_override_state == DR_IN_DMU_SYNC); if (zio->io_error == 0) { dr->dt.dl.dr_nopwrite = !!(zio->io_flags & ZIO_FLAG_NOPWRITE); if (dr->dt.dl.dr_nopwrite) {
*** 1735,1752 **** dmu_sync_late_arrival_done(zio_t *zio) { blkptr_t *bp = zio->io_bp; dmu_sync_arg_t *dsa = zio->io_private; blkptr_t *bp_orig = &zio->io_bp_orig; ! if (zio->io_error == 0 && !BP_IS_HOLE(bp)) { ASSERT(!(zio->io_flags & ZIO_FLAG_NOPWRITE)); ASSERT(BP_IS_HOLE(bp_orig) || !BP_EQUAL(bp, bp_orig)); ASSERT(zio->io_bp->blk_birth == zio->io_txg); ASSERT(zio->io_txg > spa_syncing_txg(zio->io_spa)); zio_free(zio->io_spa, zio->io_txg, zio->io_bp); } dmu_tx_commit(dsa->dsa_tx); dsa->dsa_done(dsa->dsa_zgd, zio->io_error); --- 1652,1678 ---- dmu_sync_late_arrival_done(zio_t *zio) { blkptr_t *bp = zio->io_bp; dmu_sync_arg_t *dsa = zio->io_private; blkptr_t *bp_orig = &zio->io_bp_orig; + zgd_t *zgd = dsa->dsa_zgd; ! if (zio->io_error == 0) { ! /* ! * Record the vdev(s) backing this blkptr so they can be ! * flushed after the writes for the lwb have completed. ! */ ! zil_lwb_add_block(zgd->zgd_lwb, zgd->zgd_bp); ! ! if (!BP_IS_HOLE(bp)) { ASSERT(!(zio->io_flags & ZIO_FLAG_NOPWRITE)); ASSERT(BP_IS_HOLE(bp_orig) || !BP_EQUAL(bp, bp_orig)); ASSERT(zio->io_bp->blk_birth == zio->io_txg); ASSERT(zio->io_txg > spa_syncing_txg(zio->io_spa)); zio_free(zio->io_spa, zio->io_txg, zio->io_bp); } + } dmu_tx_commit(dsa->dsa_tx); dsa->dsa_done(dsa->dsa_zgd, zio->io_error);
*** 1754,1764 **** kmem_free(dsa, sizeof (*dsa)); } static int dmu_sync_late_arrival(zio_t *pio, objset_t *os, dmu_sync_cb_t *done, zgd_t *zgd, ! zio_prop_t *zp, zbookmark_phys_t *zb) { dmu_sync_arg_t *dsa; dmu_tx_t *tx; tx = dmu_tx_create(os); --- 1680,1690 ---- kmem_free(dsa, sizeof (*dsa)); } static int dmu_sync_late_arrival(zio_t *pio, objset_t *os, dmu_sync_cb_t *done, zgd_t *zgd, ! zio_prop_t *zp, zbookmark_phys_t *zb, const zio_smartcomp_info_t *sc) { dmu_sync_arg_t *dsa; dmu_tx_t *tx; tx = dmu_tx_create(os);
*** 1807,1817 **** zio_nowait(zio_write(pio, os->os_spa, dmu_tx_get_txg(tx), zgd->zgd_bp, abd_get_from_buf(zgd->zgd_db->db_data, zgd->zgd_db->db_size), zgd->zgd_db->db_size, zgd->zgd_db->db_size, zp, dmu_sync_late_arrival_ready, NULL, NULL, dmu_sync_late_arrival_done, ! dsa, ZIO_PRIORITY_SYNC_WRITE, ZIO_FLAG_CANFAIL, zb)); return (0); } /* --- 1733,1743 ---- zio_nowait(zio_write(pio, os->os_spa, dmu_tx_get_txg(tx), zgd->zgd_bp, abd_get_from_buf(zgd->zgd_db->db_data, zgd->zgd_db->db_size), zgd->zgd_db->db_size, zgd->zgd_db->db_size, zp, dmu_sync_late_arrival_ready, NULL, NULL, dmu_sync_late_arrival_done, ! dsa, ZIO_PRIORITY_SYNC_WRITE, ZIO_FLAG_CANFAIL, zb, sc)); return (0); } /*
*** 1837,1846 **** --- 1763,1773 ---- * The caller should log this blkptr in the done callback. * It is possible that the I/O will fail, in which case * the error will be reported to the done callback and * propagated to pio from zio_done(). */ + int dmu_sync(zio_t *pio, uint64_t txg, dmu_sync_cb_t *done, zgd_t *zgd) { dmu_buf_impl_t *db = (dmu_buf_impl_t *)zgd->zgd_db; objset_t *os = db->db_objset;
*** 1848,1874 **** dbuf_dirty_record_t *dr; dmu_sync_arg_t *dsa; zbookmark_phys_t zb; zio_prop_t zp; dnode_t *dn; ASSERT(pio != NULL); ASSERT(txg != 0); SET_BOOKMARK(&zb, ds->ds_object, db->db.db_object, db->db_level, db->db_blkid); DB_DNODE_ENTER(db); dn = DB_DNODE(db); ! dmu_write_policy(os, dn, db->db_level, WP_DMU_SYNC, &zp); DB_DNODE_EXIT(db); /* * If we're frozen (running ziltest), we always need to generate a bp. */ if (txg > spa_freeze_txg(os->os_spa)) ! return (dmu_sync_late_arrival(pio, os, done, zgd, &zp, &zb)); /* * Grabbing db_mtx now provides a barrier between dbuf_sync_leaf() * and us. If we determine that this txg is not yet syncing, * but it begins to sync a moment later, that's OK because the --- 1775,1809 ---- dbuf_dirty_record_t *dr; dmu_sync_arg_t *dsa; zbookmark_phys_t zb; zio_prop_t zp; dnode_t *dn; + int flags = 0; + zio_smartcomp_info_t sc; ASSERT(pio != NULL); ASSERT(txg != 0); SET_BOOKMARK(&zb, ds->ds_object, db->db.db_object, db->db_level, db->db_blkid); + /* write to special only if proper conditions hold */ + if (spa_write_data_to_special(os->os_spa, os)) + WP_SET_SPECIALCLASS(flags, B_TRUE); + DB_DNODE_ENTER(db); dn = DB_DNODE(db); ! dmu_write_policy(os, dn, db->db_level, flags | WP_DMU_SYNC, &zp); ! dnode_setup_zio_smartcomp(db, &sc); DB_DNODE_EXIT(db); /* * If we're frozen (running ziltest), we always need to generate a bp. */ if (txg > spa_freeze_txg(os->os_spa)) ! return (dmu_sync_late_arrival(pio, os, done, zgd, &zp, &zb, ! &sc)); /* * Grabbing db_mtx now provides a barrier between dbuf_sync_leaf() * and us. If we determine that this txg is not yet syncing, * but it begins to sync a moment later, that's OK because the
*** 1888,1898 **** /* * This txg is currently syncing, so we can't mess with * the dirty record anymore; just write a new log block. */ mutex_exit(&db->db_mtx); ! return (dmu_sync_late_arrival(pio, os, done, zgd, &zp, &zb)); } dr = db->db_last_dirty; while (dr && dr->dr_txg != txg) dr = dr->dr_next; --- 1823,1834 ---- /* * This txg is currently syncing, so we can't mess with * the dirty record anymore; just write a new log block. */ mutex_exit(&db->db_mtx); ! return (dmu_sync_late_arrival(pio, os, done, zgd, &zp, &zb, ! &sc)); } dr = db->db_last_dirty; while (dr && dr->dr_txg != txg) dr = dr->dr_next;
*** 1974,1984 **** dsa->dsa_tx = NULL; zio_nowait(arc_write(pio, os->os_spa, txg, zgd->zgd_bp, dr->dt.dl.dr_data, DBUF_IS_L2CACHEABLE(db), &zp, dmu_sync_ready, NULL, NULL, dmu_sync_done, dsa, ! ZIO_PRIORITY_SYNC_WRITE, ZIO_FLAG_CANFAIL, &zb)); return (0); } int --- 1910,1920 ---- dsa->dsa_tx = NULL; zio_nowait(arc_write(pio, os->os_spa, txg, zgd->zgd_bp, dr->dt.dl.dr_data, DBUF_IS_L2CACHEABLE(db), &zp, dmu_sync_ready, NULL, NULL, dmu_sync_done, dsa, ! ZIO_PRIORITY_SYNC_WRITE, ZIO_FLAG_CANFAIL, &zb, &sc)); return (0); } int
*** 2140,2159 **** --- 2076,2111 ---- nopwrite = (!dedup && (zio_checksum_table[checksum].ci_flags & ZCHECKSUM_FLAG_NOPWRITE) && compress != ZIO_COMPRESS_OFF && zfs_nopwrite_enabled); } + zp->zp_usesc = WP_GET_SPECIALCLASS(wp); zp->zp_checksum = checksum; zp->zp_compress = compress; ASSERT3U(zp->zp_compress, !=, ZIO_COMPRESS_INHERIT); zp->zp_type = (wp & WP_SPILL) ? dn->dn_bonustype : type; zp->zp_level = level; zp->zp_copies = MIN(copies, spa_max_replication(os->os_spa)); zp->zp_dedup = dedup; zp->zp_dedup_verify = dedup && dedup_verify; + zp->zp_metadata = ismd; zp->zp_nopwrite = nopwrite; + zp->zp_zpl_meta_to_special = os->os_zpl_meta_to_special; + zp->zp_usewbc = (zp->zp_usesc && + os->os_wbc_mode == ZFS_WBC_MODE_ON && !ismd); + + /* explicitly control the number for copies for DDT */ + if (DMU_OT_IS_DDT_META(type) && + os->os_spa->spa_ddt_meta_copies > 0) { + zp->zp_copies = + MIN(os->os_spa->spa_ddt_meta_copies, + spa_max_replication(os->os_spa)); + } + + DTRACE_PROBE2(dmu_wp, boolean_t, zp->zp_metadata, + boolean_t, zp->zp_usesc); } int dmu_offset_next(objset_t *os, uint64_t object, boolean_t hole, uint64_t *off) {