Print this page
5056 ZFS deadlock on db_mtx and dn_holds
Reviewed by: Will Andrews <willa@spectralogic.com>
Reviewed by: Matt Ahrens <mahrens@delphix.com>
Reviewed by: George Wilson <george.wilson@delphix.com>
Approved by: Dan McDonald <danmcd@omniti.com>
*** 22,31 ****
--- 22,32 ----
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright 2011 Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 2012, 2014 by Delphix. All rights reserved.
* Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
* Copyright (c) 2013, Joyent, Inc. All rights reserved.
+ * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
*/
#include <sys/zfs_context.h>
#include <sys/dmu.h>
#include <sys/dmu_send.h>
*** 52,65 ****
--- 53,72 ----
static void dbuf_destroy(dmu_buf_impl_t *db);
static boolean_t dbuf_undirty(dmu_buf_impl_t *db, dmu_tx_t *tx);
static void dbuf_write(dbuf_dirty_record_t *dr, arc_buf_t *data, dmu_tx_t *tx);
+ #ifndef __lint
+ extern inline void dmu_buf_init_user(dmu_buf_user_t *dbu,
+ dmu_buf_evict_func_t *evict_func, dmu_buf_t **clear_on_evict_dbufp);
+ #endif /* ! __lint */
+
/*
* Global data structures and functions for the dbuf cache.
*/
static kmem_cache_t *dbuf_cache;
+ static taskq_t *dbu_evict_taskq;
/* ARGSUSED */
static int
dbuf_cons(void *vdb, void *unused, int kmflag)
{
*** 213,233 ****
atomic_dec_64(&dbuf_hash_count);
}
static arc_evict_func_t dbuf_do_evict;
static void
dbuf_evict_user(dmu_buf_impl_t *db)
{
ASSERT(MUTEX_HELD(&db->db_mtx));
! if (db->db_level != 0 || db->db_evict_func == NULL)
return;
! db->db_evict_func(&db->db, db->db_user_ptr);
! db->db_user_ptr = NULL;
! db->db_evict_func = NULL;
}
boolean_t
dbuf_is_metadata(dmu_buf_impl_t *db)
{
--- 220,295 ----
atomic_dec_64(&dbuf_hash_count);
}
static arc_evict_func_t dbuf_do_evict;
+ typedef enum {
+ DBVU_EVICTING,
+ DBVU_NOT_EVICTING
+ } dbvu_verify_type_t;
+
static void
+ dbuf_verify_user(dmu_buf_impl_t *db, dbvu_verify_type_t verify_type)
+ {
+ #ifdef ZFS_DEBUG
+ int64_t holds;
+
+ if (db->db_user == NULL)
+ return;
+
+ /* Only data blocks support the attachment of user data. */
+ ASSERT(db->db_level == 0);
+
+ /* Clients must resolve a dbuf before attaching user data. */
+ ASSERT(db->db.db_data != NULL);
+ ASSERT3U(db->db_state, ==, DB_CACHED);
+
+ holds = refcount_count(&db->db_holds);
+ if (verify_type == DBVU_EVICTING) {
+ /*
+ * Immediate eviction occurs when holds == dirtycnt.
+ * For normal eviction buffers, holds is zero on
+ * eviction, except when dbuf_fix_old_data() calls
+ * dbuf_clear_data(). However, the hold count can grow
+ * during eviction even though db_mtx is held (see
+ * dmu_bonus_hold() for an example), so we can only
+ * test the generic invariant that holds >= dirtycnt.
+ */
+ ASSERT3U(holds, >=, db->db_dirtycnt);
+ } else {
+ if (db->db_immediate_evict == TRUE)
+ ASSERT3U(holds, >=, db->db_dirtycnt);
+ else
+ ASSERT3U(holds, >, 0);
+ }
+ #endif
+ }
+
+ static void
dbuf_evict_user(dmu_buf_impl_t *db)
{
+ dmu_buf_user_t *dbu = db->db_user;
+
ASSERT(MUTEX_HELD(&db->db_mtx));
! if (dbu == NULL)
return;
! dbuf_verify_user(db, DBVU_EVICTING);
! db->db_user = NULL;
!
! #ifdef ZFS_DEBUG
! if (dbu->dbu_clear_on_evict_dbufp != NULL)
! *dbu->dbu_clear_on_evict_dbufp = NULL;
! #endif
!
! /*
! * Invoke the callback from a taskq to avoid lock order reversals
! * and limit stack depth.
! */
! taskq_dispatch_ent(dbu_evict_taskq, dbu->dbu_evict_func, dbu, 0,
! &dbu->dbu_tqent);
}
boolean_t
dbuf_is_metadata(dmu_buf_impl_t *db)
{
*** 284,293 ****
--- 346,361 ----
sizeof (dmu_buf_impl_t),
0, dbuf_cons, dbuf_dest, NULL, NULL, NULL, 0);
for (i = 0; i < DBUF_MUTEXES; i++)
mutex_init(&h->hash_mutexes[i], NULL, MUTEX_DEFAULT, NULL);
+
+ /*
+ * All entries are queued via taskq_dispatch_ent(), so min/maxalloc
+ * configuration is not required.
+ */
+ dbu_evict_taskq = taskq_create("dbu_evict", 1, minclsyspri, 0, 0, 0);
}
void
dbuf_fini(void)
{
*** 296,305 ****
--- 364,374 ----
for (i = 0; i < DBUF_MUTEXES; i++)
mutex_destroy(&h->hash_mutexes[i]);
kmem_free(h->hash_table, (h->hash_table_mask + 1) * sizeof (void *));
kmem_cache_destroy(dbuf_cache);
+ taskq_destroy(dbu_evict_taskq);
}
/*
* Other stuff.
*/
*** 413,437 ****
DB_DNODE_EXIT(db);
}
#endif
static void
dbuf_set_data(dmu_buf_impl_t *db, arc_buf_t *buf)
{
ASSERT(MUTEX_HELD(&db->db_mtx));
db->db_buf = buf;
- if (buf != NULL) {
ASSERT(buf->b_data != NULL);
db->db.db_data = buf->b_data;
if (!arc_released(buf))
arc_set_callback(buf, dbuf_do_evict, db);
- } else {
- dbuf_evict_user(db);
- db->db.db_data = NULL;
- if (db->db_state != DB_NOFILL)
- db->db_state = DB_UNCACHED;
- }
}
/*
* Loan out an arc_buf for read. Return the loaned arc_buf.
*/
--- 482,512 ----
DB_DNODE_EXIT(db);
}
#endif
static void
+ dbuf_clear_data(dmu_buf_impl_t *db)
+ {
+ ASSERT(MUTEX_HELD(&db->db_mtx));
+ dbuf_evict_user(db);
+ db->db_buf = NULL;
+ db->db.db_data = NULL;
+ if (db->db_state != DB_NOFILL)
+ db->db_state = DB_UNCACHED;
+ }
+
+ static void
dbuf_set_data(dmu_buf_impl_t *db, arc_buf_t *buf)
{
ASSERT(MUTEX_HELD(&db->db_mtx));
+ ASSERT(buf != NULL);
+
db->db_buf = buf;
ASSERT(buf->b_data != NULL);
db->db.db_data = buf->b_data;
if (!arc_released(buf))
arc_set_callback(buf, dbuf_do_evict, db);
}
/*
* Loan out an arc_buf for read. Return the loaned arc_buf.
*/
*** 449,459 ****
abuf = arc_loan_buf(spa, blksz);
bcopy(db->db.db_data, abuf->b_data, blksz);
} else {
abuf = db->db_buf;
arc_loan_inuse_buf(abuf, db);
! dbuf_set_data(db, NULL);
mutex_exit(&db->db_mtx);
}
return (abuf);
}
--- 524,534 ----
abuf = arc_loan_buf(spa, blksz);
bcopy(db->db.db_data, abuf->b_data, blksz);
} else {
abuf = db->db_buf;
arc_loan_inuse_buf(abuf, db);
! dbuf_clear_data(db);
mutex_exit(&db->db_mtx);
}
return (abuf);
}
*** 685,695 ****
ASSERT(db->db_buf == NULL);
ASSERT(db->db.db_data == NULL);
dbuf_set_data(db, arc_buf_alloc(spa, db->db.db_size, db, type));
db->db_state = DB_FILL;
} else if (db->db_state == DB_NOFILL) {
! dbuf_set_data(db, NULL);
} else {
ASSERT3U(db->db_state, ==, DB_CACHED);
}
mutex_exit(&db->db_mtx);
}
--- 760,770 ----
ASSERT(db->db_buf == NULL);
ASSERT(db->db.db_data == NULL);
dbuf_set_data(db, arc_buf_alloc(spa, db->db.db_size, db, type));
db->db_state = DB_FILL;
} else if (db->db_state == DB_NOFILL) {
! dbuf_clear_data(db);
} else {
ASSERT3U(db->db_state, ==, DB_CACHED);
}
mutex_exit(&db->db_mtx);
}
*** 741,751 ****
spa_t *spa = db->db_objset->os_spa;
dr->dt.dl.dr_data = arc_buf_alloc(spa, size, db, type);
bcopy(db->db.db_data, dr->dt.dl.dr_data->b_data, size);
} else {
! dbuf_set_data(db, NULL);
}
}
void
dbuf_unoverride(dbuf_dirty_record_t *dr)
--- 816,826 ----
spa_t *spa = db->db_objset->os_spa;
dr->dt.dl.dr_data = arc_buf_alloc(spa, size, db, type);
bcopy(db->db.db_data, dr->dt.dl.dr_data->b_data, size);
} else {
! dbuf_clear_data(db);
}
}
void
dbuf_unoverride(dbuf_dirty_record_t *dr)
*** 792,802 ****
*/
void
dbuf_free_range(dnode_t *dn, uint64_t start_blkid, uint64_t end_blkid,
dmu_tx_t *tx)
{
! dmu_buf_impl_t *db, *db_next, db_search;
uint64_t txg = tx->tx_txg;
avl_index_t where;
if (end_blkid > dn->dn_maxblkid && (end_blkid != DMU_SPILL_BLKID))
end_blkid = dn->dn_maxblkid;
--- 867,878 ----
*/
void
dbuf_free_range(dnode_t *dn, uint64_t start_blkid, uint64_t end_blkid,
dmu_tx_t *tx)
{
! dmu_buf_impl_t db_search;
! dmu_buf_impl_t *db, *db_next;
uint64_t txg = tx->tx_txg;
avl_index_t where;
if (end_blkid > dn->dn_maxblkid && (end_blkid != DMU_SPILL_BLKID))
end_blkid = dn->dn_maxblkid;
*** 1368,1378 ****
if (refcount_remove(&db->db_holds, (void *)(uintptr_t)txg) == 0) {
arc_buf_t *buf = db->db_buf;
ASSERT(db->db_state == DB_NOFILL || arc_released(buf));
! dbuf_set_data(db, NULL);
VERIFY(arc_buf_remove_ref(buf, db));
dbuf_evict(db);
return (B_TRUE);
}
--- 1444,1454 ----
if (refcount_remove(&db->db_holds, (void *)(uintptr_t)txg) == 0) {
arc_buf_t *buf = db->db_buf;
ASSERT(db->db_state == DB_NOFILL || arc_released(buf));
! dbuf_clear_data(db);
VERIFY(arc_buf_remove_ref(buf, db));
dbuf_evict(db);
return (B_TRUE);
}
*** 1708,1719 ****
db->db_dirtycnt = 0;
db->db_dnode_handle = dn->dn_handle;
db->db_parent = parent;
db->db_blkptr = blkptr;
! db->db_user_ptr = NULL;
! db->db_evict_func = NULL;
db->db_immediate_evict = 0;
db->db_freed_in_flight = 0;
if (blkid == DMU_BONUS_BLKID) {
ASSERT3P(parent, ==, dn->dn_dbuf);
--- 1784,1794 ----
db->db_dirtycnt = 0;
db->db_dnode_handle = dn->dn_handle;
db->db_parent = parent;
db->db_blkptr = blkptr;
! db->db_user = NULL;
db->db_immediate_evict = 0;
db->db_freed_in_flight = 0;
if (blkid == DMU_BONUS_BLKID) {
ASSERT3P(parent, ==, dn->dn_dbuf);
*** 2112,2122 ****
} else if (arc_released(db->db_buf)) {
arc_buf_t *buf = db->db_buf;
/*
* This dbuf has anonymous data associated with it.
*/
! dbuf_set_data(db, NULL);
VERIFY(arc_buf_remove_ref(buf, db));
dbuf_evict(db);
} else {
VERIFY(!arc_buf_remove_ref(db->db_buf, db));
--- 2187,2197 ----
} else if (arc_released(db->db_buf)) {
arc_buf_t *buf = db->db_buf;
/*
* This dbuf has anonymous data associated with it.
*/
! dbuf_clear_data(db);
VERIFY(arc_buf_remove_ref(buf, db));
dbuf_evict(db);
} else {
VERIFY(!arc_buf_remove_ref(db->db_buf, db));
*** 2145,2155 ****
dbuf_clear(db);
arc_freed(spa, &bp);
} else {
dbuf_clear(db);
}
! } else if (arc_buf_eviction_needed(db->db_buf)) {
dbuf_clear(db);
} else {
mutex_exit(&db->db_mtx);
}
}
--- 2220,2231 ----
dbuf_clear(db);
arc_freed(spa, &bp);
} else {
dbuf_clear(db);
}
! } else if (db->db_objset->os_evicting ||
! arc_buf_eviction_needed(db->db_buf)) {
dbuf_clear(db);
} else {
mutex_exit(&db->db_mtx);
}
}
*** 2164,2220 ****
{
return (refcount_count(&db->db_holds));
}
void *
! dmu_buf_set_user(dmu_buf_t *db_fake, void *user_ptr,
! dmu_buf_evict_func_t *evict_func)
{
! return (dmu_buf_update_user(db_fake, NULL, user_ptr, evict_func));
}
void *
! dmu_buf_set_user_ie(dmu_buf_t *db_fake, void *user_ptr,
! dmu_buf_evict_func_t *evict_func)
{
! dmu_buf_impl_t *db = (dmu_buf_impl_t *)db_fake;
!
! db->db_immediate_evict = TRUE;
! return (dmu_buf_update_user(db_fake, NULL, user_ptr, evict_func));
}
void *
! dmu_buf_update_user(dmu_buf_t *db_fake, void *old_user_ptr, void *user_ptr,
! dmu_buf_evict_func_t *evict_func)
{
dmu_buf_impl_t *db = (dmu_buf_impl_t *)db_fake;
- ASSERT(db->db_level == 0);
! ASSERT((user_ptr == NULL) == (evict_func == NULL));
! mutex_enter(&db->db_mtx);
!
! if (db->db_user_ptr == old_user_ptr) {
! db->db_user_ptr = user_ptr;
! db->db_evict_func = evict_func;
! } else {
! old_user_ptr = db->db_user_ptr;
! }
!
! mutex_exit(&db->db_mtx);
! return (old_user_ptr);
}
void *
dmu_buf_get_user(dmu_buf_t *db_fake)
{
dmu_buf_impl_t *db = (dmu_buf_impl_t *)db_fake;
- ASSERT(!refcount_is_zero(&db->db_holds));
! return (db->db_user_ptr);
}
boolean_t
dmu_buf_freeable(dmu_buf_t *dbuf)
{
boolean_t res = B_FALSE;
dmu_buf_impl_t *db = (dmu_buf_impl_t *)dbuf;
--- 2240,2302 ----
{
return (refcount_count(&db->db_holds));
}
void *
! dmu_buf_replace_user(dmu_buf_t *db_fake, dmu_buf_user_t *old_user,
! dmu_buf_user_t *new_user)
{
! dmu_buf_impl_t *db = (dmu_buf_impl_t *)db_fake;
!
! mutex_enter(&db->db_mtx);
! dbuf_verify_user(db, DBVU_NOT_EVICTING);
! if (db->db_user == old_user)
! db->db_user = new_user;
! else
! old_user = db->db_user;
! dbuf_verify_user(db, DBVU_NOT_EVICTING);
! mutex_exit(&db->db_mtx);
!
! return (old_user);
}
void *
! dmu_buf_set_user(dmu_buf_t *db_fake, dmu_buf_user_t *user)
{
! return (dmu_buf_replace_user(db_fake, NULL, user));
}
void *
! dmu_buf_set_user_ie(dmu_buf_t *db_fake, dmu_buf_user_t *user)
{
dmu_buf_impl_t *db = (dmu_buf_impl_t *)db_fake;
! db->db_immediate_evict = TRUE;
! return (dmu_buf_set_user(db_fake, user));
! }
! void *
! dmu_buf_remove_user(dmu_buf_t *db_fake, dmu_buf_user_t *user)
! {
! return (dmu_buf_replace_user(db_fake, user, NULL));
}
void *
dmu_buf_get_user(dmu_buf_t *db_fake)
{
dmu_buf_impl_t *db = (dmu_buf_impl_t *)db_fake;
! dbuf_verify_user(db, DBVU_NOT_EVICTING);
! return (db->db_user);
}
+ void
+ dmu_buf_user_evict_wait()
+ {
+ taskq_wait(dbu_evict_taskq);
+ }
+
boolean_t
dmu_buf_freeable(dmu_buf_t *dbuf)
{
boolean_t res = B_FALSE;
dmu_buf_impl_t *db = (dmu_buf_impl_t *)dbuf;