Print this page
5056 ZFS deadlock on db_mtx and dn_holds
Reviewed by: Will Andrews <willa@spectralogic.com>
Reviewed by: Matt Ahrens <mahrens@delphix.com>
Reviewed by: George Wilson <george.wilson@delphix.com>
Approved by: Dan McDonald <danmcd@omniti.com>
        
@@ -21,10 +21,11 @@
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  * Copyright (c) 2011, 2014 by Delphix. All rights reserved.
  * Copyright (c) 2014, Joyent, Inc. All rights reserved.
  * Copyright (c) 2014 RackTop Systems.
+ * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
  */
 
 #include <sys/dmu_objset.h>
 #include <sys/dsl_dataset.h>
 #include <sys/dsl_dir.h>
@@ -69,11 +70,10 @@
         }
 
 #define DS_REF_MAX      (1ULL << 62)
 
 extern inline dsl_dataset_phys_t *dsl_dataset_phys(dsl_dataset_t *ds);
-extern inline boolean_t dsl_dataset_is_snapshot(dsl_dataset_t *ds);
 
 /*
  * Figure out how much of this delta should be propogated to the dsl_dir
  * layer.  If there's a refreservation, that space has already been
  * partially accounted for in our ancestors.
@@ -153,11 +153,11 @@
                     -used, -compressed, -uncompressed);
                 return (used);
         }
         ASSERT3P(tx->tx_pool, ==, ds->ds_dir->dd_pool);
 
-        ASSERT(!dsl_dataset_is_snapshot(ds));
+        ASSERT(!ds->ds_is_snapshot);
         dmu_buf_will_dirty(ds->ds_dbuf, tx);
 
         if (bp->blk_birth > dsl_dataset_phys(ds)->ds_prev_snap_txg) {
                 int64_t delta;
 
@@ -251,18 +251,19 @@
         ddt_prefetch(dsl_dataset_get_spa(ds), bp);
 
         return (B_TRUE);
 }
 
-/* ARGSUSED */
 static void
-dsl_dataset_evict(dmu_buf_t *db, void *dsv)
+dsl_dataset_evict(void *dbu)
 {
-        dsl_dataset_t *ds = dsv;
+        dsl_dataset_t *ds = dbu;
 
         ASSERT(ds->ds_owner == NULL);
 
+        ds->ds_dbuf = NULL;
+
         unique_remove(ds->ds_fsid_guid);
 
         if (ds->ds_objset != NULL)
                 dmu_objset_evict(ds->ds_objset);
 
@@ -270,14 +271,14 @@
                 dsl_dataset_rele(ds->ds_prev, ds);
                 ds->ds_prev = NULL;
         }
 
         bplist_destroy(&ds->ds_pending_deadlist);
-        if (dsl_dataset_phys(ds)->ds_deadlist_obj != 0)
+        if (ds->ds_deadlist.dl_os != NULL)
                 dsl_deadlist_close(&ds->ds_deadlist);
         if (ds->ds_dir)
-                dsl_dir_rele(ds->ds_dir, ds);
+                dsl_dir_async_rele(ds->ds_dir, ds);
 
         ASSERT(!list_link_active(&ds->ds_synced_link));
 
         mutex_destroy(&ds->ds_lock);
         mutex_destroy(&ds->ds_opening_lock);
@@ -387,10 +388,11 @@
                 dsl_dataset_t *winner = NULL;
 
                 ds = kmem_zalloc(sizeof (dsl_dataset_t), KM_SLEEP);
                 ds->ds_dbuf = dbuf;
                 ds->ds_object = dsobj;
+                ds->ds_is_snapshot = dsl_dataset_phys(ds)->ds_num_children != 0;
 
                 mutex_init(&ds->ds_lock, NULL, MUTEX_DEFAULT, NULL);
                 mutex_init(&ds->ds_opening_lock, NULL, MUTEX_DEFAULT, NULL);
                 mutex_init(&ds->ds_sendstream_lock, NULL, MUTEX_DEFAULT, NULL);
                 refcount_create(&ds->ds_longholds);
@@ -425,11 +427,11 @@
                         kmem_free(ds, sizeof (dsl_dataset_t));
                         dmu_buf_rele(dbuf, tag);
                         return (err);
                 }
 
-                if (!dsl_dataset_is_snapshot(ds)) {
+                if (!ds->ds_is_snapshot) {
                         ds->ds_snapname[0] = '\0';
                         if (dsl_dataset_phys(ds)->ds_prev_snap_obj != 0) {
                                 err = dsl_dataset_hold_obj(dp,
                                     dsl_dataset_phys(ds)->ds_prev_snap_obj,
                                     ds, &ds->ds_prev);
@@ -452,11 +454,11 @@
                                     dsl_dataset_phys(ds)->ds_userrefs_obj,
                                     &ds->ds_userrefs);
                         }
                 }
 
-                if (err == 0 && !dsl_dataset_is_snapshot(ds)) {
+                if (err == 0 && !ds->ds_is_snapshot) {
                         err = dsl_prop_get_int_ds(ds,
                             zfs_prop_to_name(ZFS_PROP_REFRESERVATION),
                             &ds->ds_reserved);
                         if (err == 0) {
                                 err = dsl_prop_get_int_ds(ds,
@@ -465,12 +467,15 @@
                         }
                 } else {
                         ds->ds_reserved = ds->ds_quota = 0;
                 }
 
-                if (err != 0 || (winner = dmu_buf_set_user_ie(dbuf, ds,
-                    dsl_dataset_evict)) != NULL) {
+                dmu_buf_init_user(&ds->ds_dbu, dsl_dataset_evict, &ds->ds_dbuf);
+                if (err == 0)
+                        winner = dmu_buf_set_user_ie(dbuf, &ds->ds_dbu);
+
+                if (err != 0 || winner != NULL) {
                         bplist_destroy(&ds->ds_pending_deadlist);
                         dsl_deadlist_close(&ds->ds_deadlist);
                         if (ds->ds_prev)
                                 dsl_dataset_rele(ds->ds_prev, ds);
                         dsl_dir_rele(ds->ds_dir, ds);
@@ -846,11 +851,11 @@
 dsl_dataset_recalc_head_uniq(dsl_dataset_t *ds)
 {
         uint64_t mrs_used;
         uint64_t dlused, dlcomp, dluncomp;
 
-        ASSERT(!dsl_dataset_is_snapshot(ds));
+        ASSERT(!ds->ds_is_snapshot);
 
         if (dsl_dataset_phys(ds)->ds_prev_snap_obj != 0)
                 mrs_used = dsl_dataset_phys(ds->ds_prev)->ds_referenced_bytes;
         else
                 mrs_used = 0;
@@ -1589,11 +1594,11 @@
 
         dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_REFRATIO, ratio);
         dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_LOGICALREFERENCED,
             dsl_dataset_phys(ds)->ds_uncompressed_bytes);
 
-        if (dsl_dataset_is_snapshot(ds)) {
+        if (ds->ds_is_snapshot) {
                 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_COMPRESSRATIO, ratio);
                 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USED,
                     dsl_dataset_phys(ds)->ds_unique_bytes);
                 get_clones_stat(ds, nv);
         } else {
@@ -1657,11 +1662,11 @@
         stat->dds_creation_txg = dsl_dataset_phys(ds)->ds_creation_txg;
         stat->dds_inconsistent =
             dsl_dataset_phys(ds)->ds_flags & DS_FLAG_INCONSISTENT;
         stat->dds_guid = dsl_dataset_phys(ds)->ds_guid;
         stat->dds_origin[0] = '\0';
-        if (dsl_dataset_is_snapshot(ds)) {
+        if (ds->ds_is_snapshot) {
                 stat->dds_is_snapshot = B_TRUE;
                 stat->dds_num_clones =
                     dsl_dataset_phys(ds)->ds_num_children - 1;
         } else {
                 stat->dds_is_snapshot = B_FALSE;
@@ -1917,11 +1922,11 @@
         error = dsl_dataset_hold(dp, ddra->ddra_fsname, FTAG, &ds);
         if (error != 0)
                 return (error);
 
         /* must not be a snapshot */
-        if (dsl_dataset_is_snapshot(ds)) {
+        if (ds->ds_is_snapshot) {
                 dsl_dataset_rele(ds, FTAG);
                 return (SET_ERROR(EINVAL));
         }
 
         /* must have a most recent snapshot */
@@ -2489,11 +2494,11 @@
             &ddpa->ddpa_clone);
         if (error != 0)
                 return (error);
         dd = ddpa->ddpa_clone->ds_dir;
 
-        if (dsl_dataset_is_snapshot(ddpa->ddpa_clone) ||
+        if (ddpa->ddpa_clone->ds_is_snapshot ||
             !dsl_dir_is_clone(dd)) {
                 dsl_dataset_rele(ddpa->ddpa_clone, tag);
                 return (SET_ERROR(EINVAL));
         }
 
@@ -2581,12 +2586,12 @@
     dsl_dataset_t *origin_head, boolean_t force, void *owner, dmu_tx_t *tx)
 {
         int64_t unused_refres_delta;
 
         /* they should both be heads */
-        if (dsl_dataset_is_snapshot(clone) ||
-            dsl_dataset_is_snapshot(origin_head))
+        if (clone->ds_is_snapshot ||
+            origin_head->ds_is_snapshot)
                 return (SET_ERROR(EINVAL));
 
         /* if we are not forcing, the branch point should be just before them */
         if (!force && clone->ds_prev != origin_head->ds_prev)
                 return (SET_ERROR(EINVAL));
@@ -2861,11 +2866,11 @@
 
         error = dsl_dataset_hold(dp, ddsqra->ddsqra_name, FTAG, &ds);
         if (error != 0)
                 return (error);
 
-        if (dsl_dataset_is_snapshot(ds)) {
+        if (ds->ds_is_snapshot) {
                 dsl_dataset_rele(ds, FTAG);
                 return (SET_ERROR(EINVAL));
         }
 
         error = dsl_prop_predict(ds->ds_dir,
@@ -2944,11 +2949,11 @@
 
         error = dsl_dataset_hold(dp, ddsqra->ddsqra_name, FTAG, &ds);
         if (error != 0)
                 return (error);
 
-        if (dsl_dataset_is_snapshot(ds)) {
+        if (ds->ds_is_snapshot) {
                 dsl_dataset_rele(ds, FTAG);
                 return (SET_ERROR(EINVAL));
         }
 
         error = dsl_prop_predict(ds->ds_dir,
@@ -3158,12 +3163,12 @@
 {
         int err = 0;
         uint64_t snapobj;
         dsl_pool_t *dp = firstsnap->ds_dir->dd_pool;
 
-        ASSERT(dsl_dataset_is_snapshot(firstsnap));
-        ASSERT(dsl_dataset_is_snapshot(lastsnap));
+        ASSERT(firstsnap->ds_is_snapshot);
+        ASSERT(lastsnap->ds_is_snapshot);
 
         /*
          * Check that the snapshots are in the same dsl_dir, and firstsnap
          * is before lastsnap.
          */
@@ -3284,16 +3289,16 @@
         dsl_pool_t *dp = later->ds_dir->dd_pool;
         int error;
         boolean_t ret;
 
         ASSERT(dsl_pool_config_held(dp));
-        ASSERT(dsl_dataset_is_snapshot(earlier) || earlier_txg != 0);
+        ASSERT(earlier->ds_is_snapshot || earlier_txg != 0);
 
         if (earlier_txg == 0)
                 earlier_txg = dsl_dataset_phys(earlier)->ds_creation_txg;
 
-        if (dsl_dataset_is_snapshot(later) &&
+        if (later->ds_is_snapshot &&
             earlier_txg >= dsl_dataset_phys(later)->ds_creation_txg)
                 return (B_FALSE);
 
         if (later->ds_dir == earlier->ds_dir)
                 return (B_TRUE);