Print this page
5056 ZFS deadlock on db_mtx and dn_holds
Reviewed by: Will Andrews <willa@spectralogic.com>
Reviewed by: Matt Ahrens <mahrens@delphix.com>
Reviewed by: George Wilson <george.wilson@delphix.com>
Approved by: Dan McDonald <danmcd@omniti.com>
        
@@ -20,10 +20,11 @@
  */
 
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  * Copyright (c) 2012, 2014 by Delphix. All rights reserved.
+ * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
  */
 
 #include <sys/zfs_context.h>
 #include <sys/dbuf.h>
 #include <sys/dnode.h>
@@ -394,53 +395,41 @@
  * Try to kick all the dnode's dbufs out of the cache...
  */
 void
 dnode_evict_dbufs(dnode_t *dn)
 {
-        int progress;
-        int pass = 0;
-
-        do {
+        dmu_buf_impl_t db_marker;
                 dmu_buf_impl_t *db, *db_next;
-                int evicting = FALSE;
 
-                progress = FALSE;
                 mutex_enter(&dn->dn_dbufs_mtx);
                 for (db = avl_first(&dn->dn_dbufs); db != NULL; db = db_next) {
-                        db_next = AVL_NEXT(&dn->dn_dbufs, db);
+
 #ifdef  DEBUG
                         DB_DNODE_ENTER(db);
                         ASSERT3P(DB_DNODE(db), ==, dn);
                         DB_DNODE_EXIT(db);
 #endif  /* DEBUG */
 
                         mutex_enter(&db->db_mtx);
-                        if (db->db_state == DB_EVICTING) {
-                                progress = TRUE;
-                                evicting = TRUE;
-                                mutex_exit(&db->db_mtx);
-                        } else if (refcount_is_zero(&db->db_holds)) {
-                                progress = TRUE;
-                                dbuf_clear(db); /* exits db_mtx for us */
+                if (db->db_state != DB_EVICTING &&
+                    refcount_is_zero(&db->db_holds)) {
+                        db_marker.db_level = db->db_level;
+                        db_marker.db_blkid = db->db_blkid;
+                        db_marker.db_state = DB_SEARCH;
+                        avl_insert_here(&dn->dn_dbufs, &db_marker, db,
+                            AVL_BEFORE);
+
+                        dbuf_clear(db);
+
+                        db_next = AVL_NEXT(&dn->dn_dbufs, &db_marker);
+                        avl_remove(&dn->dn_dbufs, &db_marker);
                         } else {
                                 mutex_exit(&db->db_mtx);
+                        db_next = AVL_NEXT(&dn->dn_dbufs, db);
                         }
-
                 }
-                /*
-                 * NB: we need to drop dn_dbufs_mtx between passes so
-                 * that any DB_EVICTING dbufs can make progress.
-                 * Ideally, we would have some cv we could wait on, but
-                 * since we don't, just wait a bit to give the other
-                 * thread a chance to run.
-                 */
                 mutex_exit(&dn->dn_dbufs_mtx);
-                if (evicting)
-                        delay(1);
-                pass++;
-                ASSERT(pass < 100); /* sanity check */
-        } while (progress);
 
         rw_enter(&dn->dn_struct_rwlock, RW_WRITER);
         if (dn->dn_bonus && refcount_is_zero(&dn->dn_bonus->db_holds)) {
                 mutex_enter(&dn->dn_bonus->db_mtx);
                 dbuf_evict(dn->dn_bonus);
@@ -495,11 +484,10 @@
         ASSERT(BP_IS_HOLE(dn->dn_phys->dn_blkptr));
 
         dnode_undirty_dbufs(&dn->dn_dirty_records[txgoff]);
         dnode_evict_dbufs(dn);
         ASSERT(avl_is_empty(&dn->dn_dbufs));
-        ASSERT3P(dn->dn_bonus, ==, NULL);
 
         /*
          * XXX - It would be nice to assert this, but we may still
          * have residual holds from async evictions from the arc...
          *