Print this page
5056 ZFS deadlock on db_mtx and dn_holds
Reviewed by: Will Andrews <willa@spectralogic.com>
Reviewed by: Matt Ahrens <mahrens@delphix.com>
Reviewed by: George Wilson <george.wilson@delphix.com>
Approved by: Dan McDonald <danmcd@omniti.com>

*** 21,30 **** --- 21,31 ---- /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2011, 2014 by Delphix. All rights reserved. * Copyright (c) 2014, Joyent, Inc. All rights reserved. * Copyright (c) 2014 RackTop Systems. + * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved. */ #include <sys/dmu_objset.h> #include <sys/dsl_dataset.h> #include <sys/dsl_dir.h>
*** 69,79 **** } #define DS_REF_MAX (1ULL << 62) extern inline dsl_dataset_phys_t *dsl_dataset_phys(dsl_dataset_t *ds); - extern inline boolean_t dsl_dataset_is_snapshot(dsl_dataset_t *ds); /* * Figure out how much of this delta should be propogated to the dsl_dir * layer. If there's a refreservation, that space has already been * partially accounted for in our ancestors. --- 70,79 ----
*** 153,163 **** -used, -compressed, -uncompressed); return (used); } ASSERT3P(tx->tx_pool, ==, ds->ds_dir->dd_pool); ! ASSERT(!dsl_dataset_is_snapshot(ds)); dmu_buf_will_dirty(ds->ds_dbuf, tx); if (bp->blk_birth > dsl_dataset_phys(ds)->ds_prev_snap_txg) { int64_t delta; --- 153,163 ---- -used, -compressed, -uncompressed); return (used); } ASSERT3P(tx->tx_pool, ==, ds->ds_dir->dd_pool); ! ASSERT(!ds->ds_is_snapshot); dmu_buf_will_dirty(ds->ds_dbuf, tx); if (bp->blk_birth > dsl_dataset_phys(ds)->ds_prev_snap_txg) { int64_t delta;
*** 251,268 **** ddt_prefetch(dsl_dataset_get_spa(ds), bp); return (B_TRUE); } - /* ARGSUSED */ static void ! dsl_dataset_evict(dmu_buf_t *db, void *dsv) { ! dsl_dataset_t *ds = dsv; ASSERT(ds->ds_owner == NULL); unique_remove(ds->ds_fsid_guid); if (ds->ds_objset != NULL) dmu_objset_evict(ds->ds_objset); --- 251,269 ---- ddt_prefetch(dsl_dataset_get_spa(ds), bp); return (B_TRUE); } static void ! dsl_dataset_evict(void *dbu) { ! dsl_dataset_t *ds = dbu; ASSERT(ds->ds_owner == NULL); + ds->ds_dbuf = NULL; + unique_remove(ds->ds_fsid_guid); if (ds->ds_objset != NULL) dmu_objset_evict(ds->ds_objset);
*** 270,283 **** dsl_dataset_rele(ds->ds_prev, ds); ds->ds_prev = NULL; } bplist_destroy(&ds->ds_pending_deadlist); ! if (dsl_dataset_phys(ds)->ds_deadlist_obj != 0) dsl_deadlist_close(&ds->ds_deadlist); if (ds->ds_dir) ! dsl_dir_rele(ds->ds_dir, ds); ASSERT(!list_link_active(&ds->ds_synced_link)); mutex_destroy(&ds->ds_lock); mutex_destroy(&ds->ds_opening_lock); --- 271,284 ---- dsl_dataset_rele(ds->ds_prev, ds); ds->ds_prev = NULL; } bplist_destroy(&ds->ds_pending_deadlist); ! if (ds->ds_deadlist.dl_os != NULL) dsl_deadlist_close(&ds->ds_deadlist); if (ds->ds_dir) ! dsl_dir_async_rele(ds->ds_dir, ds); ASSERT(!list_link_active(&ds->ds_synced_link)); mutex_destroy(&ds->ds_lock); mutex_destroy(&ds->ds_opening_lock);
*** 387,396 **** --- 388,398 ---- dsl_dataset_t *winner = NULL; ds = kmem_zalloc(sizeof (dsl_dataset_t), KM_SLEEP); ds->ds_dbuf = dbuf; ds->ds_object = dsobj; + ds->ds_is_snapshot = dsl_dataset_phys(ds)->ds_num_children != 0; mutex_init(&ds->ds_lock, NULL, MUTEX_DEFAULT, NULL); mutex_init(&ds->ds_opening_lock, NULL, MUTEX_DEFAULT, NULL); mutex_init(&ds->ds_sendstream_lock, NULL, MUTEX_DEFAULT, NULL); refcount_create(&ds->ds_longholds);
*** 425,435 **** kmem_free(ds, sizeof (dsl_dataset_t)); dmu_buf_rele(dbuf, tag); return (err); } ! if (!dsl_dataset_is_snapshot(ds)) { ds->ds_snapname[0] = '\0'; if (dsl_dataset_phys(ds)->ds_prev_snap_obj != 0) { err = dsl_dataset_hold_obj(dp, dsl_dataset_phys(ds)->ds_prev_snap_obj, ds, &ds->ds_prev); --- 427,437 ---- kmem_free(ds, sizeof (dsl_dataset_t)); dmu_buf_rele(dbuf, tag); return (err); } ! if (!ds->ds_is_snapshot) { ds->ds_snapname[0] = '\0'; if (dsl_dataset_phys(ds)->ds_prev_snap_obj != 0) { err = dsl_dataset_hold_obj(dp, dsl_dataset_phys(ds)->ds_prev_snap_obj, ds, &ds->ds_prev);
*** 452,462 **** dsl_dataset_phys(ds)->ds_userrefs_obj, &ds->ds_userrefs); } } ! if (err == 0 && !dsl_dataset_is_snapshot(ds)) { err = dsl_prop_get_int_ds(ds, zfs_prop_to_name(ZFS_PROP_REFRESERVATION), &ds->ds_reserved); if (err == 0) { err = dsl_prop_get_int_ds(ds, --- 454,464 ---- dsl_dataset_phys(ds)->ds_userrefs_obj, &ds->ds_userrefs); } } ! if (err == 0 && !ds->ds_is_snapshot) { err = dsl_prop_get_int_ds(ds, zfs_prop_to_name(ZFS_PROP_REFRESERVATION), &ds->ds_reserved); if (err == 0) { err = dsl_prop_get_int_ds(ds,
*** 465,476 **** } } else { ds->ds_reserved = ds->ds_quota = 0; } ! if (err != 0 || (winner = dmu_buf_set_user_ie(dbuf, ds, ! dsl_dataset_evict)) != NULL) { bplist_destroy(&ds->ds_pending_deadlist); dsl_deadlist_close(&ds->ds_deadlist); if (ds->ds_prev) dsl_dataset_rele(ds->ds_prev, ds); dsl_dir_rele(ds->ds_dir, ds); --- 467,481 ---- } } else { ds->ds_reserved = ds->ds_quota = 0; } ! dmu_buf_init_user(&ds->ds_dbu, dsl_dataset_evict, &ds->ds_dbuf); ! if (err == 0) ! winner = dmu_buf_set_user_ie(dbuf, &ds->ds_dbu); ! ! if (err != 0 || winner != NULL) { bplist_destroy(&ds->ds_pending_deadlist); dsl_deadlist_close(&ds->ds_deadlist); if (ds->ds_prev) dsl_dataset_rele(ds->ds_prev, ds); dsl_dir_rele(ds->ds_dir, ds);
*** 846,856 **** dsl_dataset_recalc_head_uniq(dsl_dataset_t *ds) { uint64_t mrs_used; uint64_t dlused, dlcomp, dluncomp; ! ASSERT(!dsl_dataset_is_snapshot(ds)); if (dsl_dataset_phys(ds)->ds_prev_snap_obj != 0) mrs_used = dsl_dataset_phys(ds->ds_prev)->ds_referenced_bytes; else mrs_used = 0; --- 851,861 ---- dsl_dataset_recalc_head_uniq(dsl_dataset_t *ds) { uint64_t mrs_used; uint64_t dlused, dlcomp, dluncomp; ! ASSERT(!ds->ds_is_snapshot); if (dsl_dataset_phys(ds)->ds_prev_snap_obj != 0) mrs_used = dsl_dataset_phys(ds->ds_prev)->ds_referenced_bytes; else mrs_used = 0;
*** 1589,1599 **** dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_REFRATIO, ratio); dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_LOGICALREFERENCED, dsl_dataset_phys(ds)->ds_uncompressed_bytes); ! if (dsl_dataset_is_snapshot(ds)) { dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_COMPRESSRATIO, ratio); dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USED, dsl_dataset_phys(ds)->ds_unique_bytes); get_clones_stat(ds, nv); } else { --- 1594,1604 ---- dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_REFRATIO, ratio); dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_LOGICALREFERENCED, dsl_dataset_phys(ds)->ds_uncompressed_bytes); ! if (ds->ds_is_snapshot) { dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_COMPRESSRATIO, ratio); dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USED, dsl_dataset_phys(ds)->ds_unique_bytes); get_clones_stat(ds, nv); } else {
*** 1657,1667 **** stat->dds_creation_txg = dsl_dataset_phys(ds)->ds_creation_txg; stat->dds_inconsistent = dsl_dataset_phys(ds)->ds_flags & DS_FLAG_INCONSISTENT; stat->dds_guid = dsl_dataset_phys(ds)->ds_guid; stat->dds_origin[0] = '\0'; ! if (dsl_dataset_is_snapshot(ds)) { stat->dds_is_snapshot = B_TRUE; stat->dds_num_clones = dsl_dataset_phys(ds)->ds_num_children - 1; } else { stat->dds_is_snapshot = B_FALSE; --- 1662,1672 ---- stat->dds_creation_txg = dsl_dataset_phys(ds)->ds_creation_txg; stat->dds_inconsistent = dsl_dataset_phys(ds)->ds_flags & DS_FLAG_INCONSISTENT; stat->dds_guid = dsl_dataset_phys(ds)->ds_guid; stat->dds_origin[0] = '\0'; ! if (ds->ds_is_snapshot) { stat->dds_is_snapshot = B_TRUE; stat->dds_num_clones = dsl_dataset_phys(ds)->ds_num_children - 1; } else { stat->dds_is_snapshot = B_FALSE;
*** 1917,1927 **** error = dsl_dataset_hold(dp, ddra->ddra_fsname, FTAG, &ds); if (error != 0) return (error); /* must not be a snapshot */ ! if (dsl_dataset_is_snapshot(ds)) { dsl_dataset_rele(ds, FTAG); return (SET_ERROR(EINVAL)); } /* must have a most recent snapshot */ --- 1922,1932 ---- error = dsl_dataset_hold(dp, ddra->ddra_fsname, FTAG, &ds); if (error != 0) return (error); /* must not be a snapshot */ ! if (ds->ds_is_snapshot) { dsl_dataset_rele(ds, FTAG); return (SET_ERROR(EINVAL)); } /* must have a most recent snapshot */
*** 2489,2499 **** &ddpa->ddpa_clone); if (error != 0) return (error); dd = ddpa->ddpa_clone->ds_dir; ! if (dsl_dataset_is_snapshot(ddpa->ddpa_clone) || !dsl_dir_is_clone(dd)) { dsl_dataset_rele(ddpa->ddpa_clone, tag); return (SET_ERROR(EINVAL)); } --- 2494,2504 ---- &ddpa->ddpa_clone); if (error != 0) return (error); dd = ddpa->ddpa_clone->ds_dir; ! if (ddpa->ddpa_clone->ds_is_snapshot || !dsl_dir_is_clone(dd)) { dsl_dataset_rele(ddpa->ddpa_clone, tag); return (SET_ERROR(EINVAL)); }
*** 2581,2592 **** dsl_dataset_t *origin_head, boolean_t force, void *owner, dmu_tx_t *tx) { int64_t unused_refres_delta; /* they should both be heads */ ! if (dsl_dataset_is_snapshot(clone) || ! dsl_dataset_is_snapshot(origin_head)) return (SET_ERROR(EINVAL)); /* if we are not forcing, the branch point should be just before them */ if (!force && clone->ds_prev != origin_head->ds_prev) return (SET_ERROR(EINVAL)); --- 2586,2597 ---- dsl_dataset_t *origin_head, boolean_t force, void *owner, dmu_tx_t *tx) { int64_t unused_refres_delta; /* they should both be heads */ ! if (clone->ds_is_snapshot || ! origin_head->ds_is_snapshot) return (SET_ERROR(EINVAL)); /* if we are not forcing, the branch point should be just before them */ if (!force && clone->ds_prev != origin_head->ds_prev) return (SET_ERROR(EINVAL));
*** 2861,2871 **** error = dsl_dataset_hold(dp, ddsqra->ddsqra_name, FTAG, &ds); if (error != 0) return (error); ! if (dsl_dataset_is_snapshot(ds)) { dsl_dataset_rele(ds, FTAG); return (SET_ERROR(EINVAL)); } error = dsl_prop_predict(ds->ds_dir, --- 2866,2876 ---- error = dsl_dataset_hold(dp, ddsqra->ddsqra_name, FTAG, &ds); if (error != 0) return (error); ! if (ds->ds_is_snapshot) { dsl_dataset_rele(ds, FTAG); return (SET_ERROR(EINVAL)); } error = dsl_prop_predict(ds->ds_dir,
*** 2944,2954 **** error = dsl_dataset_hold(dp, ddsqra->ddsqra_name, FTAG, &ds); if (error != 0) return (error); ! if (dsl_dataset_is_snapshot(ds)) { dsl_dataset_rele(ds, FTAG); return (SET_ERROR(EINVAL)); } error = dsl_prop_predict(ds->ds_dir, --- 2949,2959 ---- error = dsl_dataset_hold(dp, ddsqra->ddsqra_name, FTAG, &ds); if (error != 0) return (error); ! if (ds->ds_is_snapshot) { dsl_dataset_rele(ds, FTAG); return (SET_ERROR(EINVAL)); } error = dsl_prop_predict(ds->ds_dir,
*** 3158,3169 **** { int err = 0; uint64_t snapobj; dsl_pool_t *dp = firstsnap->ds_dir->dd_pool; ! ASSERT(dsl_dataset_is_snapshot(firstsnap)); ! ASSERT(dsl_dataset_is_snapshot(lastsnap)); /* * Check that the snapshots are in the same dsl_dir, and firstsnap * is before lastsnap. */ --- 3163,3174 ---- { int err = 0; uint64_t snapobj; dsl_pool_t *dp = firstsnap->ds_dir->dd_pool; ! ASSERT(firstsnap->ds_is_snapshot); ! ASSERT(lastsnap->ds_is_snapshot); /* * Check that the snapshots are in the same dsl_dir, and firstsnap * is before lastsnap. */
*** 3284,3299 **** dsl_pool_t *dp = later->ds_dir->dd_pool; int error; boolean_t ret; ASSERT(dsl_pool_config_held(dp)); ! ASSERT(dsl_dataset_is_snapshot(earlier) || earlier_txg != 0); if (earlier_txg == 0) earlier_txg = dsl_dataset_phys(earlier)->ds_creation_txg; ! if (dsl_dataset_is_snapshot(later) && earlier_txg >= dsl_dataset_phys(later)->ds_creation_txg) return (B_FALSE); if (later->ds_dir == earlier->ds_dir) return (B_TRUE); --- 3289,3304 ---- dsl_pool_t *dp = later->ds_dir->dd_pool; int error; boolean_t ret; ASSERT(dsl_pool_config_held(dp)); ! ASSERT(earlier->ds_is_snapshot || earlier_txg != 0); if (earlier_txg == 0) earlier_txg = dsl_dataset_phys(earlier)->ds_creation_txg; ! if (later->ds_is_snapshot && earlier_txg >= dsl_dataset_phys(later)->ds_creation_txg) return (B_FALSE); if (later->ds_dir == earlier->ds_dir) return (B_TRUE);