Print this page
6288 dmu_buf_will_dirty could be faster
Reviewed by: George Wilson <george.wilson@delphix.com>
Reviewed by: Paul Dagnelie <pcd@delphix.com>
Reviewed by: Justin Gibbs <gibbs@scsiguy.com>
Reviewed by: Richard Elling <Richard.Elling@RichardElling.com>
Approved by: Robert Mustacchi <rm@joyent.com>
6267 dn_bonus evicted too early
Reviewed by: Richard Yao <ryao@gentoo.org>
Reviewed by: Xin LI <delphij@freebsd.org>
Reviewed by: Matthew Ahrens <mahrens@delphix.com>
Approved by: Richard Lowe <richlowe@richlowe.net>
*** 270,280 ****
* dmu_bonus_hold() for an example), so we can only
* test the generic invariant that holds >= dirtycnt.
*/
ASSERT3U(holds, >=, db->db_dirtycnt);
} else {
! if (db->db_immediate_evict == TRUE)
ASSERT3U(holds, >=, db->db_dirtycnt);
else
ASSERT3U(holds, >, 0);
}
#endif
--- 270,280 ----
* dmu_bonus_hold() for an example), so we can only
* test the generic invariant that holds >= dirtycnt.
*/
ASSERT3U(holds, >=, db->db_dirtycnt);
} else {
! if (db->db_user_immediate_evict == TRUE)
ASSERT3U(holds, >=, db->db_dirtycnt);
else
ASSERT3U(holds, >, 0);
}
#endif
*** 1095,1104 ****
--- 1095,1130 ----
ASSERT(db->db_parent == NULL || arc_released(db->db_parent->db_buf));
(void) arc_release(db->db_buf, db);
}
+ /*
+ * We already have a dirty record for this TXG, and we are being
+ * dirtied again.
+ */
+ static void
+ dbuf_redirty(dbuf_dirty_record_t *dr)
+ {
+ dmu_buf_impl_t *db = dr->dr_dbuf;
+
+ ASSERT(MUTEX_HELD(&db->db_mtx));
+
+ if (db->db_level == 0 && db->db_blkid != DMU_BONUS_BLKID) {
+ /*
+ * If this buffer has already been written out,
+ * we now need to reset its state.
+ */
+ dbuf_unoverride(dr);
+ if (db->db.db_object != DMU_META_DNODE_OBJECT &&
+ db->db_state != DB_NOFILL) {
+ /* Already released on initial dirty, so just thaw. */
+ ASSERT(arc_released(db->db_buf));
+ arc_buf_thaw(db->db_buf);
+ }
+ }
+ }
+
dbuf_dirty_record_t *
dbuf_dirty(dmu_buf_impl_t *db, dmu_tx_t *tx)
{
dnode_t *dn;
objset_t *os;
*** 1167,1186 ****
while ((dr = *drp) != NULL && dr->dr_txg > tx->tx_txg)
drp = &dr->dr_next;
if (dr && dr->dr_txg == tx->tx_txg) {
DB_DNODE_EXIT(db);
! if (db->db_level == 0 && db->db_blkid != DMU_BONUS_BLKID) {
! /*
! * If this buffer has already been written out,
! * we now need to reset its state.
! */
! dbuf_unoverride(dr);
! if (db->db.db_object != DMU_META_DNODE_OBJECT &&
! db->db_state != DB_NOFILL)
! arc_buf_thaw(db->db_buf);
! }
mutex_exit(&db->db_mtx);
return (dr);
}
/*
--- 1193,1203 ----
while ((dr = *drp) != NULL && dr->dr_txg > tx->tx_txg)
drp = &dr->dr_next;
if (dr && dr->dr_txg == tx->tx_txg) {
DB_DNODE_EXIT(db);
! dbuf_redirty(dr);
mutex_exit(&db->db_mtx);
return (dr);
}
/*
*** 1486,1495 ****
--- 1503,1536 ----
int rf = DB_RF_MUST_SUCCEED | DB_RF_NOPREFETCH;
ASSERT(tx->tx_txg != 0);
ASSERT(!refcount_is_zero(&db->db_holds));
+ /*
+ * Quick check for dirtyness. For already dirty blocks, this
+ * reduces runtime of this function by >90%, and overall performance
+ * by 50% for some workloads (e.g. file deletion with indirect blocks
+ * cached).
+ */
+ mutex_enter(&db->db_mtx);
+ dbuf_dirty_record_t *dr;
+ for (dr = db->db_last_dirty;
+ dr != NULL && dr->dr_txg >= tx->tx_txg; dr = dr->dr_next) {
+ /*
+ * It's possible that it is already dirty but not cached,
+ * because there are some calls to dbuf_dirty() that don't
+ * go through dmu_buf_will_dirty().
+ */
+ if (dr->dr_txg == tx->tx_txg && db->db_state == DB_CACHED) {
+ /* This dbuf is already dirty and cached. */
+ dbuf_redirty(dr);
+ mutex_exit(&db->db_mtx);
+ return;
+ }
+ }
+ mutex_exit(&db->db_mtx);
+
DB_DNODE_ENTER(db);
if (RW_WRITE_HELD(&DB_DNODE(db)->dn_struct_rwlock))
rf |= DB_RF_HAVESTRUCT;
DB_DNODE_EXIT(db);
(void) dbuf_read(db, NULL, rf);
*** 1809,1820 ****
db->db_dnode_handle = dn->dn_handle;
db->db_parent = parent;
db->db_blkptr = blkptr;
db->db_user = NULL;
! db->db_immediate_evict = 0;
! db->db_freed_in_flight = 0;
if (blkid == DMU_BONUS_BLKID) {
ASSERT3P(parent, ==, dn->dn_dbuf);
db->db.db_size = DN_MAX_BONUSLEN -
(dn->dn_nblkptr-1) * sizeof (blkptr_t);
--- 1850,1862 ----
db->db_dnode_handle = dn->dn_handle;
db->db_parent = parent;
db->db_blkptr = blkptr;
db->db_user = NULL;
! db->db_user_immediate_evict = FALSE;
! db->db_freed_in_flight = FALSE;
! db->db_pending_evict = FALSE;
if (blkid == DMU_BONUS_BLKID) {
ASSERT3P(parent, ==, dn->dn_dbuf);
db->db.db_size = DN_MAX_BONUSLEN -
(dn->dn_nblkptr-1) * sizeof (blkptr_t);
*** 2202,2217 ****
*/
if (db->db_buf && holds == (db->db_level == 0 ? db->db_dirtycnt : 0))
arc_buf_freeze(db->db_buf);
if (holds == db->db_dirtycnt &&
! db->db_level == 0 && db->db_immediate_evict)
dbuf_evict_user(db);
if (holds == 0) {
if (db->db_blkid == DMU_BONUS_BLKID) {
dnode_t *dn;
/*
* If the dnode moves here, we cannot cross this
* barrier until the move completes.
*/
--- 2244,2260 ----
*/
if (db->db_buf && holds == (db->db_level == 0 ? db->db_dirtycnt : 0))
arc_buf_freeze(db->db_buf);
if (holds == db->db_dirtycnt &&
! db->db_level == 0 && db->db_user_immediate_evict)
dbuf_evict_user(db);
if (holds == 0) {
if (db->db_blkid == DMU_BONUS_BLKID) {
dnode_t *dn;
+ boolean_t evict_dbuf = db->db_pending_evict;
/*
* If the dnode moves here, we cannot cross this
* barrier until the move completes.
*/
*** 2222,2270 ****
/*
* Decrementing the dbuf count means that the bonus
* buffer's dnode hold is no longer discounted in
* dnode_move(). The dnode cannot move until after
! * the dnode_rele_and_unlock() below.
*/
DB_DNODE_EXIT(db);
/*
* Do not reference db after its lock is dropped.
* Another thread may evict it.
*/
mutex_exit(&db->db_mtx);
! /*
! * If the dnode has been freed, evict the bonus
! * buffer immediately. The data in the bonus
! * buffer is no longer relevant and this prevents
! * a stale bonus buffer from being associated
! * with this dnode_t should the dnode_t be reused
! * prior to being destroyed.
! */
! mutex_enter(&dn->dn_mtx);
! if (dn->dn_type == DMU_OT_NONE ||
! dn->dn_free_txg != 0) {
! /*
! * Drop dn_mtx. It is a leaf lock and
! * cannot be held when dnode_evict_bonus()
! * acquires other locks in order to
! * perform the eviction.
! *
! * Freed dnodes cannot be reused until the
! * last hold is released. Since this bonus
! * buffer has a hold, the dnode will remain
! * in the free state, even without dn_mtx
! * held, until the dnode_rele_and_unlock()
! * below.
! */
! mutex_exit(&dn->dn_mtx);
dnode_evict_bonus(dn);
! mutex_enter(&dn->dn_mtx);
! }
! dnode_rele_and_unlock(dn, db);
} else if (db->db_buf == NULL) {
/*
* This is a special case: we never associated this
* dbuf with any data allocated from the ARC.
*/
--- 2265,2288 ----
/*
* Decrementing the dbuf count means that the bonus
* buffer's dnode hold is no longer discounted in
* dnode_move(). The dnode cannot move until after
! * the dnode_rele() below.
*/
DB_DNODE_EXIT(db);
/*
* Do not reference db after its lock is dropped.
* Another thread may evict it.
*/
mutex_exit(&db->db_mtx);
! if (evict_dbuf)
dnode_evict_bonus(dn);
!
! dnode_rele(dn, db);
} else if (db->db_buf == NULL) {
/*
* This is a special case: we never associated this
* dbuf with any data allocated from the ARC.
*/
*** 2307,2317 ****
dbuf_clear(db);
arc_freed(spa, &bp);
} else {
dbuf_clear(db);
}
! } else if (db->db_objset->os_evicting ||
arc_buf_eviction_needed(db->db_buf)) {
dbuf_clear(db);
} else {
mutex_exit(&db->db_mtx);
}
--- 2325,2335 ----
dbuf_clear(db);
arc_freed(spa, &bp);
} else {
dbuf_clear(db);
}
! } else if (db->db_pending_evict ||
arc_buf_eviction_needed(db->db_buf)) {
dbuf_clear(db);
} else {
mutex_exit(&db->db_mtx);
}
*** 2355,2365 ****
void *
dmu_buf_set_user_ie(dmu_buf_t *db_fake, dmu_buf_user_t *user)
{
dmu_buf_impl_t *db = (dmu_buf_impl_t *)db_fake;
! db->db_immediate_evict = TRUE;
return (dmu_buf_set_user(db_fake, user));
}
void *
dmu_buf_remove_user(dmu_buf_t *db_fake, dmu_buf_user_t *user)
--- 2373,2383 ----
void *
dmu_buf_set_user_ie(dmu_buf_t *db_fake, dmu_buf_user_t *user)
{
dmu_buf_impl_t *db = (dmu_buf_impl_t *)db_fake;
! db->db_user_immediate_evict = TRUE;
return (dmu_buf_set_user(db_fake, user));
}
void *
dmu_buf_remove_user(dmu_buf_t *db_fake, dmu_buf_user_t *user)