Print this page
6288 dmu_buf_will_dirty could be faster
Reviewed by: George Wilson <george.wilson@delphix.com>
Reviewed by: Paul Dagnelie <pcd@delphix.com>
Reviewed by: Justin Gibbs <gibbs@scsiguy.com>
Reviewed by: Richard Elling <Richard.Elling@RichardElling.com>
Approved by: Robert Mustacchi <rm@joyent.com>
6267 dn_bonus evicted too early
Reviewed by: Richard Yao <ryao@gentoo.org>
Reviewed by: Xin LI <delphij@freebsd.org>
Reviewed by: Matthew Ahrens <mahrens@delphix.com>
Approved by: Richard Lowe <richlowe@richlowe.net>

@@ -270,11 +270,11 @@
                  * dmu_bonus_hold() for an example), so we can only
                  * test the generic invariant that holds >= dirtycnt.
                  */
                 ASSERT3U(holds, >=, db->db_dirtycnt);
         } else {
-                if (db->db_immediate_evict == TRUE)
+                if (db->db_user_immediate_evict == TRUE)
                         ASSERT3U(holds, >=, db->db_dirtycnt);
                 else
                         ASSERT3U(holds, >, 0);
         }
 #endif

@@ -1095,10 +1095,36 @@
         ASSERT(db->db_parent == NULL || arc_released(db->db_parent->db_buf));
 
         (void) arc_release(db->db_buf, db);
 }
 
+/*
+ * We already have a dirty record for this TXG, and we are being
+ * dirtied again.
+ */
+static void
+dbuf_redirty(dbuf_dirty_record_t *dr)
+{
+        dmu_buf_impl_t *db = dr->dr_dbuf;
+
+        ASSERT(MUTEX_HELD(&db->db_mtx));
+
+        if (db->db_level == 0 && db->db_blkid != DMU_BONUS_BLKID) {
+                /*
+                 * If this buffer has already been written out,
+                 * we now need to reset its state.
+                 */
+                dbuf_unoverride(dr);
+                if (db->db.db_object != DMU_META_DNODE_OBJECT &&
+                    db->db_state != DB_NOFILL) {
+                        /* Already released on initial dirty, so just thaw. */
+                        ASSERT(arc_released(db->db_buf));
+                        arc_buf_thaw(db->db_buf);
+                }
+        }
+}
+
 dbuf_dirty_record_t *
 dbuf_dirty(dmu_buf_impl_t *db, dmu_tx_t *tx)
 {
         dnode_t *dn;
         objset_t *os;

@@ -1167,20 +1193,11 @@
         while ((dr = *drp) != NULL && dr->dr_txg > tx->tx_txg)
                 drp = &dr->dr_next;
         if (dr && dr->dr_txg == tx->tx_txg) {
                 DB_DNODE_EXIT(db);
 
-                if (db->db_level == 0 && db->db_blkid != DMU_BONUS_BLKID) {
-                        /*
-                         * If this buffer has already been written out,
-                         * we now need to reset its state.
-                         */
-                        dbuf_unoverride(dr);
-                        if (db->db.db_object != DMU_META_DNODE_OBJECT &&
-                            db->db_state != DB_NOFILL)
-                                arc_buf_thaw(db->db_buf);
-                }
+                dbuf_redirty(dr);
                 mutex_exit(&db->db_mtx);
                 return (dr);
         }
 
         /*

@@ -1486,10 +1503,34 @@
         int rf = DB_RF_MUST_SUCCEED | DB_RF_NOPREFETCH;
 
         ASSERT(tx->tx_txg != 0);
         ASSERT(!refcount_is_zero(&db->db_holds));
 
+        /*
+         * Quick check for dirtyness.  For already dirty blocks, this
+         * reduces runtime of this function by >90%, and overall performance
+         * by 50% for some workloads (e.g. file deletion with indirect blocks
+         * cached).
+         */
+        mutex_enter(&db->db_mtx);
+        dbuf_dirty_record_t *dr;
+        for (dr = db->db_last_dirty;
+            dr != NULL && dr->dr_txg >= tx->tx_txg; dr = dr->dr_next) {
+                /*
+                 * It's possible that it is already dirty but not cached,
+                 * because there are some calls to dbuf_dirty() that don't
+                 * go through dmu_buf_will_dirty().
+                 */
+                if (dr->dr_txg == tx->tx_txg && db->db_state == DB_CACHED) {
+                        /* This dbuf is already dirty and cached. */
+                        dbuf_redirty(dr);
+                        mutex_exit(&db->db_mtx);
+                        return;
+                }
+        }
+        mutex_exit(&db->db_mtx);
+
         DB_DNODE_ENTER(db);
         if (RW_WRITE_HELD(&DB_DNODE(db)->dn_struct_rwlock))
                 rf |= DB_RF_HAVESTRUCT;
         DB_DNODE_EXIT(db);
         (void) dbuf_read(db, NULL, rf);

@@ -1809,12 +1850,13 @@
         db->db_dnode_handle = dn->dn_handle;
         db->db_parent = parent;
         db->db_blkptr = blkptr;
 
         db->db_user = NULL;
-        db->db_immediate_evict = 0;
-        db->db_freed_in_flight = 0;
+        db->db_user_immediate_evict = FALSE;
+        db->db_freed_in_flight = FALSE;
+        db->db_pending_evict = FALSE;
 
         if (blkid == DMU_BONUS_BLKID) {
                 ASSERT3P(parent, ==, dn->dn_dbuf);
                 db->db.db_size = DN_MAX_BONUSLEN -
                     (dn->dn_nblkptr-1) * sizeof (blkptr_t);

@@ -2202,16 +2244,17 @@
          */
         if (db->db_buf && holds == (db->db_level == 0 ? db->db_dirtycnt : 0))
                 arc_buf_freeze(db->db_buf);
 
         if (holds == db->db_dirtycnt &&
-            db->db_level == 0 && db->db_immediate_evict)
+            db->db_level == 0 && db->db_user_immediate_evict)
                 dbuf_evict_user(db);
 
         if (holds == 0) {
                 if (db->db_blkid == DMU_BONUS_BLKID) {
                         dnode_t *dn;
+                        boolean_t evict_dbuf = db->db_pending_evict;
 
                         /*
                          * If the dnode moves here, we cannot cross this
                          * barrier until the move completes.
                          */

@@ -2222,49 +2265,24 @@
 
                         /*
                          * Decrementing the dbuf count means that the bonus
                          * buffer's dnode hold is no longer discounted in
                          * dnode_move(). The dnode cannot move until after
-                         * the dnode_rele_and_unlock() below.
+                         * the dnode_rele() below.
                          */
                         DB_DNODE_EXIT(db);
 
                         /*
                          * Do not reference db after its lock is dropped.
                          * Another thread may evict it.
                          */
                         mutex_exit(&db->db_mtx);
 
-                        /*
-                         * If the dnode has been freed, evict the bonus
-                         * buffer immediately.  The data in the bonus
-                         * buffer is no longer relevant and this prevents
-                         * a stale bonus buffer from being associated
-                         * with this dnode_t should the dnode_t be reused
-                         * prior to being destroyed.
-                         */
-                        mutex_enter(&dn->dn_mtx);
-                        if (dn->dn_type == DMU_OT_NONE ||
-                            dn->dn_free_txg != 0) {
-                                /*
-                                 * Drop dn_mtx.  It is a leaf lock and
-                                 * cannot be held when dnode_evict_bonus()
-                                 * acquires other locks in order to
-                                 * perform the eviction.
-                                 *
-                                 * Freed dnodes cannot be reused until the
-                                 * last hold is released.  Since this bonus
-                                 * buffer has a hold, the dnode will remain
-                                 * in the free state, even without dn_mtx
-                                 * held, until the dnode_rele_and_unlock()
-                                 * below.
-                                 */
-                                mutex_exit(&dn->dn_mtx);
+                        if (evict_dbuf)
                                 dnode_evict_bonus(dn);
-                                mutex_enter(&dn->dn_mtx);
-                        }
-                        dnode_rele_and_unlock(dn, db);
+
+                        dnode_rele(dn, db);
                 } else if (db->db_buf == NULL) {
                         /*
                          * This is a special case: we never associated this
                          * dbuf with any data allocated from the ARC.
                          */

@@ -2307,11 +2325,11 @@
                                         dbuf_clear(db);
                                         arc_freed(spa, &bp);
                                 } else {
                                         dbuf_clear(db);
                                 }
-                        } else if (db->db_objset->os_evicting ||
+                        } else if (db->db_pending_evict ||
                             arc_buf_eviction_needed(db->db_buf)) {
                                 dbuf_clear(db);
                         } else {
                                 mutex_exit(&db->db_mtx);
                         }

@@ -2355,11 +2373,11 @@
 void *
 dmu_buf_set_user_ie(dmu_buf_t *db_fake, dmu_buf_user_t *user)
 {
         dmu_buf_impl_t *db = (dmu_buf_impl_t *)db_fake;
 
-        db->db_immediate_evict = TRUE;
+        db->db_user_immediate_evict = TRUE;
         return (dmu_buf_set_user(db_fake, user));
 }
 
 void *
 dmu_buf_remove_user(dmu_buf_t *db_fake, dmu_buf_user_t *user)