Print this page
5056 ZFS deadlock on db_mtx and dn_holds
Reviewed by: Will Andrews <willa@spectralogic.com>
Reviewed by: Matt Ahrens <mahrens@delphix.com>
Reviewed by: George Wilson <george.wilson@delphix.com>
Approved by: Dan McDonald <danmcd@omniti.com>

*** 22,31 **** --- 22,32 ---- * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright 2011 Nexenta Systems, Inc. All rights reserved. * Copyright (c) 2012, 2014 by Delphix. All rights reserved. * Copyright (c) 2013 by Saso Kiselkov. All rights reserved. * Copyright (c) 2013, Joyent, Inc. All rights reserved. + * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved. */ #include <sys/zfs_context.h> #include <sys/dmu.h> #include <sys/dmu_send.h>
*** 52,65 **** --- 53,72 ---- static void dbuf_destroy(dmu_buf_impl_t *db); static boolean_t dbuf_undirty(dmu_buf_impl_t *db, dmu_tx_t *tx); static void dbuf_write(dbuf_dirty_record_t *dr, arc_buf_t *data, dmu_tx_t *tx); + #ifndef __lint + extern inline void dmu_buf_init_user(dmu_buf_user_t *dbu, + dmu_buf_evict_func_t *evict_func, dmu_buf_t **clear_on_evict_dbufp); + #endif /* ! __lint */ + /* * Global data structures and functions for the dbuf cache. */ static kmem_cache_t *dbuf_cache; + static taskq_t *dbu_evict_taskq; /* ARGSUSED */ static int dbuf_cons(void *vdb, void *unused, int kmflag) {
*** 213,233 **** atomic_dec_64(&dbuf_hash_count); } static arc_evict_func_t dbuf_do_evict; static void dbuf_evict_user(dmu_buf_impl_t *db) { ASSERT(MUTEX_HELD(&db->db_mtx)); ! if (db->db_level != 0 || db->db_evict_func == NULL) return; ! db->db_evict_func(&db->db, db->db_user_ptr); ! db->db_user_ptr = NULL; ! db->db_evict_func = NULL; } boolean_t dbuf_is_metadata(dmu_buf_impl_t *db) { --- 220,295 ---- atomic_dec_64(&dbuf_hash_count); } static arc_evict_func_t dbuf_do_evict; + typedef enum { + DBVU_EVICTING, + DBVU_NOT_EVICTING + } dbvu_verify_type_t; + static void + dbuf_verify_user(dmu_buf_impl_t *db, dbvu_verify_type_t verify_type) + { + #ifdef ZFS_DEBUG + int64_t holds; + + if (db->db_user == NULL) + return; + + /* Only data blocks support the attachment of user data. */ + ASSERT(db->db_level == 0); + + /* Clients must resolve a dbuf before attaching user data. */ + ASSERT(db->db.db_data != NULL); + ASSERT3U(db->db_state, ==, DB_CACHED); + + holds = refcount_count(&db->db_holds); + if (verify_type == DBVU_EVICTING) { + /* + * Immediate eviction occurs when holds == dirtycnt. + * For normal eviction buffers, holds is zero on + * eviction, except when dbuf_fix_old_data() calls + * dbuf_clear_data(). However, the hold count can grow + * during eviction even though db_mtx is held (see + * dmu_bonus_hold() for an example), so we can only + * test the generic invariant that holds >= dirtycnt. + */ + ASSERT3U(holds, >=, db->db_dirtycnt); + } else { + if (db->db_immediate_evict == TRUE) + ASSERT3U(holds, >=, db->db_dirtycnt); + else + ASSERT3U(holds, >, 0); + } + #endif + } + + static void dbuf_evict_user(dmu_buf_impl_t *db) { + dmu_buf_user_t *dbu = db->db_user; + ASSERT(MUTEX_HELD(&db->db_mtx)); ! if (dbu == NULL) return; ! dbuf_verify_user(db, DBVU_EVICTING); ! db->db_user = NULL; ! ! #ifdef ZFS_DEBUG ! if (dbu->dbu_clear_on_evict_dbufp != NULL) ! *dbu->dbu_clear_on_evict_dbufp = NULL; ! #endif ! ! /* ! * Invoke the callback from a taskq to avoid lock order reversals ! * and limit stack depth. ! */ ! taskq_dispatch_ent(dbu_evict_taskq, dbu->dbu_evict_func, dbu, 0, ! &dbu->dbu_tqent); } boolean_t dbuf_is_metadata(dmu_buf_impl_t *db) {
*** 284,293 **** --- 346,361 ---- sizeof (dmu_buf_impl_t), 0, dbuf_cons, dbuf_dest, NULL, NULL, NULL, 0); for (i = 0; i < DBUF_MUTEXES; i++) mutex_init(&h->hash_mutexes[i], NULL, MUTEX_DEFAULT, NULL); + + /* + * All entries are queued via taskq_dispatch_ent(), so min/maxalloc + * configuration is not required. + */ + dbu_evict_taskq = taskq_create("dbu_evict", 1, minclsyspri, 0, 0, 0); } void dbuf_fini(void) {
*** 296,305 **** --- 364,374 ---- for (i = 0; i < DBUF_MUTEXES; i++) mutex_destroy(&h->hash_mutexes[i]); kmem_free(h->hash_table, (h->hash_table_mask + 1) * sizeof (void *)); kmem_cache_destroy(dbuf_cache); + taskq_destroy(dbu_evict_taskq); } /* * Other stuff. */
*** 413,437 **** DB_DNODE_EXIT(db); } #endif static void dbuf_set_data(dmu_buf_impl_t *db, arc_buf_t *buf) { ASSERT(MUTEX_HELD(&db->db_mtx)); db->db_buf = buf; - if (buf != NULL) { ASSERT(buf->b_data != NULL); db->db.db_data = buf->b_data; if (!arc_released(buf)) arc_set_callback(buf, dbuf_do_evict, db); - } else { - dbuf_evict_user(db); - db->db.db_data = NULL; - if (db->db_state != DB_NOFILL) - db->db_state = DB_UNCACHED; - } } /* * Loan out an arc_buf for read. Return the loaned arc_buf. */ --- 482,512 ---- DB_DNODE_EXIT(db); } #endif static void + dbuf_clear_data(dmu_buf_impl_t *db) + { + ASSERT(MUTEX_HELD(&db->db_mtx)); + dbuf_evict_user(db); + db->db_buf = NULL; + db->db.db_data = NULL; + if (db->db_state != DB_NOFILL) + db->db_state = DB_UNCACHED; + } + + static void dbuf_set_data(dmu_buf_impl_t *db, arc_buf_t *buf) { ASSERT(MUTEX_HELD(&db->db_mtx)); + ASSERT(buf != NULL); + db->db_buf = buf; ASSERT(buf->b_data != NULL); db->db.db_data = buf->b_data; if (!arc_released(buf)) arc_set_callback(buf, dbuf_do_evict, db); } /* * Loan out an arc_buf for read. Return the loaned arc_buf. */
*** 449,459 **** abuf = arc_loan_buf(spa, blksz); bcopy(db->db.db_data, abuf->b_data, blksz); } else { abuf = db->db_buf; arc_loan_inuse_buf(abuf, db); ! dbuf_set_data(db, NULL); mutex_exit(&db->db_mtx); } return (abuf); } --- 524,534 ---- abuf = arc_loan_buf(spa, blksz); bcopy(db->db.db_data, abuf->b_data, blksz); } else { abuf = db->db_buf; arc_loan_inuse_buf(abuf, db); ! dbuf_clear_data(db); mutex_exit(&db->db_mtx); } return (abuf); }
*** 685,695 **** ASSERT(db->db_buf == NULL); ASSERT(db->db.db_data == NULL); dbuf_set_data(db, arc_buf_alloc(spa, db->db.db_size, db, type)); db->db_state = DB_FILL; } else if (db->db_state == DB_NOFILL) { ! dbuf_set_data(db, NULL); } else { ASSERT3U(db->db_state, ==, DB_CACHED); } mutex_exit(&db->db_mtx); } --- 760,770 ---- ASSERT(db->db_buf == NULL); ASSERT(db->db.db_data == NULL); dbuf_set_data(db, arc_buf_alloc(spa, db->db.db_size, db, type)); db->db_state = DB_FILL; } else if (db->db_state == DB_NOFILL) { ! dbuf_clear_data(db); } else { ASSERT3U(db->db_state, ==, DB_CACHED); } mutex_exit(&db->db_mtx); }
*** 741,751 **** spa_t *spa = db->db_objset->os_spa; dr->dt.dl.dr_data = arc_buf_alloc(spa, size, db, type); bcopy(db->db.db_data, dr->dt.dl.dr_data->b_data, size); } else { ! dbuf_set_data(db, NULL); } } void dbuf_unoverride(dbuf_dirty_record_t *dr) --- 816,826 ---- spa_t *spa = db->db_objset->os_spa; dr->dt.dl.dr_data = arc_buf_alloc(spa, size, db, type); bcopy(db->db.db_data, dr->dt.dl.dr_data->b_data, size); } else { ! dbuf_clear_data(db); } } void dbuf_unoverride(dbuf_dirty_record_t *dr)
*** 792,802 **** */ void dbuf_free_range(dnode_t *dn, uint64_t start_blkid, uint64_t end_blkid, dmu_tx_t *tx) { ! dmu_buf_impl_t *db, *db_next, db_search; uint64_t txg = tx->tx_txg; avl_index_t where; if (end_blkid > dn->dn_maxblkid && (end_blkid != DMU_SPILL_BLKID)) end_blkid = dn->dn_maxblkid; --- 867,878 ---- */ void dbuf_free_range(dnode_t *dn, uint64_t start_blkid, uint64_t end_blkid, dmu_tx_t *tx) { ! dmu_buf_impl_t db_search; ! dmu_buf_impl_t *db, *db_next; uint64_t txg = tx->tx_txg; avl_index_t where; if (end_blkid > dn->dn_maxblkid && (end_blkid != DMU_SPILL_BLKID)) end_blkid = dn->dn_maxblkid;
*** 1368,1378 **** if (refcount_remove(&db->db_holds, (void *)(uintptr_t)txg) == 0) { arc_buf_t *buf = db->db_buf; ASSERT(db->db_state == DB_NOFILL || arc_released(buf)); ! dbuf_set_data(db, NULL); VERIFY(arc_buf_remove_ref(buf, db)); dbuf_evict(db); return (B_TRUE); } --- 1444,1454 ---- if (refcount_remove(&db->db_holds, (void *)(uintptr_t)txg) == 0) { arc_buf_t *buf = db->db_buf; ASSERT(db->db_state == DB_NOFILL || arc_released(buf)); ! dbuf_clear_data(db); VERIFY(arc_buf_remove_ref(buf, db)); dbuf_evict(db); return (B_TRUE); }
*** 1708,1719 **** db->db_dirtycnt = 0; db->db_dnode_handle = dn->dn_handle; db->db_parent = parent; db->db_blkptr = blkptr; ! db->db_user_ptr = NULL; ! db->db_evict_func = NULL; db->db_immediate_evict = 0; db->db_freed_in_flight = 0; if (blkid == DMU_BONUS_BLKID) { ASSERT3P(parent, ==, dn->dn_dbuf); --- 1784,1794 ---- db->db_dirtycnt = 0; db->db_dnode_handle = dn->dn_handle; db->db_parent = parent; db->db_blkptr = blkptr; ! db->db_user = NULL; db->db_immediate_evict = 0; db->db_freed_in_flight = 0; if (blkid == DMU_BONUS_BLKID) { ASSERT3P(parent, ==, dn->dn_dbuf);
*** 2112,2122 **** } else if (arc_released(db->db_buf)) { arc_buf_t *buf = db->db_buf; /* * This dbuf has anonymous data associated with it. */ ! dbuf_set_data(db, NULL); VERIFY(arc_buf_remove_ref(buf, db)); dbuf_evict(db); } else { VERIFY(!arc_buf_remove_ref(db->db_buf, db)); --- 2187,2197 ---- } else if (arc_released(db->db_buf)) { arc_buf_t *buf = db->db_buf; /* * This dbuf has anonymous data associated with it. */ ! dbuf_clear_data(db); VERIFY(arc_buf_remove_ref(buf, db)); dbuf_evict(db); } else { VERIFY(!arc_buf_remove_ref(db->db_buf, db));
*** 2145,2155 **** dbuf_clear(db); arc_freed(spa, &bp); } else { dbuf_clear(db); } ! } else if (arc_buf_eviction_needed(db->db_buf)) { dbuf_clear(db); } else { mutex_exit(&db->db_mtx); } } --- 2220,2231 ---- dbuf_clear(db); arc_freed(spa, &bp); } else { dbuf_clear(db); } ! } else if (db->db_objset->os_evicting || ! arc_buf_eviction_needed(db->db_buf)) { dbuf_clear(db); } else { mutex_exit(&db->db_mtx); } }
*** 2164,2220 **** { return (refcount_count(&db->db_holds)); } void * ! dmu_buf_set_user(dmu_buf_t *db_fake, void *user_ptr, ! dmu_buf_evict_func_t *evict_func) { ! return (dmu_buf_update_user(db_fake, NULL, user_ptr, evict_func)); } void * ! dmu_buf_set_user_ie(dmu_buf_t *db_fake, void *user_ptr, ! dmu_buf_evict_func_t *evict_func) { ! dmu_buf_impl_t *db = (dmu_buf_impl_t *)db_fake; ! ! db->db_immediate_evict = TRUE; ! return (dmu_buf_update_user(db_fake, NULL, user_ptr, evict_func)); } void * ! dmu_buf_update_user(dmu_buf_t *db_fake, void *old_user_ptr, void *user_ptr, ! dmu_buf_evict_func_t *evict_func) { dmu_buf_impl_t *db = (dmu_buf_impl_t *)db_fake; - ASSERT(db->db_level == 0); ! ASSERT((user_ptr == NULL) == (evict_func == NULL)); ! mutex_enter(&db->db_mtx); ! ! if (db->db_user_ptr == old_user_ptr) { ! db->db_user_ptr = user_ptr; ! db->db_evict_func = evict_func; ! } else { ! old_user_ptr = db->db_user_ptr; ! } ! ! mutex_exit(&db->db_mtx); ! return (old_user_ptr); } void * dmu_buf_get_user(dmu_buf_t *db_fake) { dmu_buf_impl_t *db = (dmu_buf_impl_t *)db_fake; - ASSERT(!refcount_is_zero(&db->db_holds)); ! return (db->db_user_ptr); } boolean_t dmu_buf_freeable(dmu_buf_t *dbuf) { boolean_t res = B_FALSE; dmu_buf_impl_t *db = (dmu_buf_impl_t *)dbuf; --- 2240,2302 ---- { return (refcount_count(&db->db_holds)); } void * ! dmu_buf_replace_user(dmu_buf_t *db_fake, dmu_buf_user_t *old_user, ! dmu_buf_user_t *new_user) { ! dmu_buf_impl_t *db = (dmu_buf_impl_t *)db_fake; ! ! mutex_enter(&db->db_mtx); ! dbuf_verify_user(db, DBVU_NOT_EVICTING); ! if (db->db_user == old_user) ! db->db_user = new_user; ! else ! old_user = db->db_user; ! dbuf_verify_user(db, DBVU_NOT_EVICTING); ! mutex_exit(&db->db_mtx); ! ! return (old_user); } void * ! dmu_buf_set_user(dmu_buf_t *db_fake, dmu_buf_user_t *user) { ! return (dmu_buf_replace_user(db_fake, NULL, user)); } void * ! dmu_buf_set_user_ie(dmu_buf_t *db_fake, dmu_buf_user_t *user) { dmu_buf_impl_t *db = (dmu_buf_impl_t *)db_fake; ! db->db_immediate_evict = TRUE; ! return (dmu_buf_set_user(db_fake, user)); ! } ! void * ! dmu_buf_remove_user(dmu_buf_t *db_fake, dmu_buf_user_t *user) ! { ! return (dmu_buf_replace_user(db_fake, user, NULL)); } void * dmu_buf_get_user(dmu_buf_t *db_fake) { dmu_buf_impl_t *db = (dmu_buf_impl_t *)db_fake; ! dbuf_verify_user(db, DBVU_NOT_EVICTING); ! return (db->db_user); } + void + dmu_buf_user_evict_wait() + { + taskq_wait(dbu_evict_taskq); + } + boolean_t dmu_buf_freeable(dmu_buf_t *dbuf) { boolean_t res = B_FALSE; dmu_buf_impl_t *db = (dmu_buf_impl_t *)dbuf;