Print this page
6288 dmu_buf_will_dirty could be faster
Reviewed by: George Wilson <george.wilson@delphix.com>
Reviewed by: Paul Dagnelie <pcd@delphix.com>
Reviewed by: Justin Gibbs <gibbs@scsiguy.com>
Reviewed by: Richard Elling <Richard.Elling@RichardElling.com>
Approved by: Robert Mustacchi <rm@joyent.com>
6267 dn_bonus evicted too early
Reviewed by: Richard Yao <ryao@gentoo.org>
Reviewed by: Xin LI <delphij@freebsd.org>
Reviewed by: Matthew Ahrens <mahrens@delphix.com>
Approved by: Richard Lowe <richlowe@richlowe.net>

*** 270,280 **** * dmu_bonus_hold() for an example), so we can only * test the generic invariant that holds >= dirtycnt. */ ASSERT3U(holds, >=, db->db_dirtycnt); } else { ! if (db->db_immediate_evict == TRUE) ASSERT3U(holds, >=, db->db_dirtycnt); else ASSERT3U(holds, >, 0); } #endif --- 270,280 ---- * dmu_bonus_hold() for an example), so we can only * test the generic invariant that holds >= dirtycnt. */ ASSERT3U(holds, >=, db->db_dirtycnt); } else { ! if (db->db_user_immediate_evict == TRUE) ASSERT3U(holds, >=, db->db_dirtycnt); else ASSERT3U(holds, >, 0); } #endif
*** 1095,1104 **** --- 1095,1130 ---- ASSERT(db->db_parent == NULL || arc_released(db->db_parent->db_buf)); (void) arc_release(db->db_buf, db); } + /* + * We already have a dirty record for this TXG, and we are being + * dirtied again. + */ + static void + dbuf_redirty(dbuf_dirty_record_t *dr) + { + dmu_buf_impl_t *db = dr->dr_dbuf; + + ASSERT(MUTEX_HELD(&db->db_mtx)); + + if (db->db_level == 0 && db->db_blkid != DMU_BONUS_BLKID) { + /* + * If this buffer has already been written out, + * we now need to reset its state. + */ + dbuf_unoverride(dr); + if (db->db.db_object != DMU_META_DNODE_OBJECT && + db->db_state != DB_NOFILL) { + /* Already released on initial dirty, so just thaw. */ + ASSERT(arc_released(db->db_buf)); + arc_buf_thaw(db->db_buf); + } + } + } + dbuf_dirty_record_t * dbuf_dirty(dmu_buf_impl_t *db, dmu_tx_t *tx) { dnode_t *dn; objset_t *os;
*** 1167,1186 **** while ((dr = *drp) != NULL && dr->dr_txg > tx->tx_txg) drp = &dr->dr_next; if (dr && dr->dr_txg == tx->tx_txg) { DB_DNODE_EXIT(db); ! if (db->db_level == 0 && db->db_blkid != DMU_BONUS_BLKID) { ! /* ! * If this buffer has already been written out, ! * we now need to reset its state. ! */ ! dbuf_unoverride(dr); ! if (db->db.db_object != DMU_META_DNODE_OBJECT && ! db->db_state != DB_NOFILL) ! arc_buf_thaw(db->db_buf); ! } mutex_exit(&db->db_mtx); return (dr); } /* --- 1193,1203 ---- while ((dr = *drp) != NULL && dr->dr_txg > tx->tx_txg) drp = &dr->dr_next; if (dr && dr->dr_txg == tx->tx_txg) { DB_DNODE_EXIT(db); ! dbuf_redirty(dr); mutex_exit(&db->db_mtx); return (dr); } /*
*** 1486,1495 **** --- 1503,1536 ---- int rf = DB_RF_MUST_SUCCEED | DB_RF_NOPREFETCH; ASSERT(tx->tx_txg != 0); ASSERT(!refcount_is_zero(&db->db_holds)); + /* + * Quick check for dirtyness. For already dirty blocks, this + * reduces runtime of this function by >90%, and overall performance + * by 50% for some workloads (e.g. file deletion with indirect blocks + * cached). + */ + mutex_enter(&db->db_mtx); + dbuf_dirty_record_t *dr; + for (dr = db->db_last_dirty; + dr != NULL && dr->dr_txg >= tx->tx_txg; dr = dr->dr_next) { + /* + * It's possible that it is already dirty but not cached, + * because there are some calls to dbuf_dirty() that don't + * go through dmu_buf_will_dirty(). + */ + if (dr->dr_txg == tx->tx_txg && db->db_state == DB_CACHED) { + /* This dbuf is already dirty and cached. */ + dbuf_redirty(dr); + mutex_exit(&db->db_mtx); + return; + } + } + mutex_exit(&db->db_mtx); + DB_DNODE_ENTER(db); if (RW_WRITE_HELD(&DB_DNODE(db)->dn_struct_rwlock)) rf |= DB_RF_HAVESTRUCT; DB_DNODE_EXIT(db); (void) dbuf_read(db, NULL, rf);
*** 1809,1820 **** db->db_dnode_handle = dn->dn_handle; db->db_parent = parent; db->db_blkptr = blkptr; db->db_user = NULL; ! db->db_immediate_evict = 0; ! db->db_freed_in_flight = 0; if (blkid == DMU_BONUS_BLKID) { ASSERT3P(parent, ==, dn->dn_dbuf); db->db.db_size = DN_MAX_BONUSLEN - (dn->dn_nblkptr-1) * sizeof (blkptr_t); --- 1850,1862 ---- db->db_dnode_handle = dn->dn_handle; db->db_parent = parent; db->db_blkptr = blkptr; db->db_user = NULL; ! db->db_user_immediate_evict = FALSE; ! db->db_freed_in_flight = FALSE; ! db->db_pending_evict = FALSE; if (blkid == DMU_BONUS_BLKID) { ASSERT3P(parent, ==, dn->dn_dbuf); db->db.db_size = DN_MAX_BONUSLEN - (dn->dn_nblkptr-1) * sizeof (blkptr_t);
*** 2202,2217 **** */ if (db->db_buf && holds == (db->db_level == 0 ? db->db_dirtycnt : 0)) arc_buf_freeze(db->db_buf); if (holds == db->db_dirtycnt && ! db->db_level == 0 && db->db_immediate_evict) dbuf_evict_user(db); if (holds == 0) { if (db->db_blkid == DMU_BONUS_BLKID) { dnode_t *dn; /* * If the dnode moves here, we cannot cross this * barrier until the move completes. */ --- 2244,2260 ---- */ if (db->db_buf && holds == (db->db_level == 0 ? db->db_dirtycnt : 0)) arc_buf_freeze(db->db_buf); if (holds == db->db_dirtycnt && ! db->db_level == 0 && db->db_user_immediate_evict) dbuf_evict_user(db); if (holds == 0) { if (db->db_blkid == DMU_BONUS_BLKID) { dnode_t *dn; + boolean_t evict_dbuf = db->db_pending_evict; /* * If the dnode moves here, we cannot cross this * barrier until the move completes. */
*** 2222,2270 **** /* * Decrementing the dbuf count means that the bonus * buffer's dnode hold is no longer discounted in * dnode_move(). The dnode cannot move until after ! * the dnode_rele_and_unlock() below. */ DB_DNODE_EXIT(db); /* * Do not reference db after its lock is dropped. * Another thread may evict it. */ mutex_exit(&db->db_mtx); ! /* ! * If the dnode has been freed, evict the bonus ! * buffer immediately. The data in the bonus ! * buffer is no longer relevant and this prevents ! * a stale bonus buffer from being associated ! * with this dnode_t should the dnode_t be reused ! * prior to being destroyed. ! */ ! mutex_enter(&dn->dn_mtx); ! if (dn->dn_type == DMU_OT_NONE || ! dn->dn_free_txg != 0) { ! /* ! * Drop dn_mtx. It is a leaf lock and ! * cannot be held when dnode_evict_bonus() ! * acquires other locks in order to ! * perform the eviction. ! * ! * Freed dnodes cannot be reused until the ! * last hold is released. Since this bonus ! * buffer has a hold, the dnode will remain ! * in the free state, even without dn_mtx ! * held, until the dnode_rele_and_unlock() ! * below. ! */ ! mutex_exit(&dn->dn_mtx); dnode_evict_bonus(dn); ! mutex_enter(&dn->dn_mtx); ! } ! dnode_rele_and_unlock(dn, db); } else if (db->db_buf == NULL) { /* * This is a special case: we never associated this * dbuf with any data allocated from the ARC. */ --- 2265,2288 ---- /* * Decrementing the dbuf count means that the bonus * buffer's dnode hold is no longer discounted in * dnode_move(). The dnode cannot move until after ! * the dnode_rele() below. */ DB_DNODE_EXIT(db); /* * Do not reference db after its lock is dropped. * Another thread may evict it. */ mutex_exit(&db->db_mtx); ! if (evict_dbuf) dnode_evict_bonus(dn); ! ! dnode_rele(dn, db); } else if (db->db_buf == NULL) { /* * This is a special case: we never associated this * dbuf with any data allocated from the ARC. */
*** 2307,2317 **** dbuf_clear(db); arc_freed(spa, &bp); } else { dbuf_clear(db); } ! } else if (db->db_objset->os_evicting || arc_buf_eviction_needed(db->db_buf)) { dbuf_clear(db); } else { mutex_exit(&db->db_mtx); } --- 2325,2335 ---- dbuf_clear(db); arc_freed(spa, &bp); } else { dbuf_clear(db); } ! } else if (db->db_pending_evict || arc_buf_eviction_needed(db->db_buf)) { dbuf_clear(db); } else { mutex_exit(&db->db_mtx); }
*** 2355,2365 **** void * dmu_buf_set_user_ie(dmu_buf_t *db_fake, dmu_buf_user_t *user) { dmu_buf_impl_t *db = (dmu_buf_impl_t *)db_fake; ! db->db_immediate_evict = TRUE; return (dmu_buf_set_user(db_fake, user)); } void * dmu_buf_remove_user(dmu_buf_t *db_fake, dmu_buf_user_t *user) --- 2373,2383 ---- void * dmu_buf_set_user_ie(dmu_buf_t *db_fake, dmu_buf_user_t *user) { dmu_buf_impl_t *db = (dmu_buf_impl_t *)db_fake; ! db->db_user_immediate_evict = TRUE; return (dmu_buf_set_user(db_fake, user)); } void * dmu_buf_remove_user(dmu_buf_t *db_fake, dmu_buf_user_t *user)