Print this page
5056 ZFS deadlock on db_mtx and dn_holds
Reviewed by: Will Andrews <willa@spectralogic.com>
Reviewed by: Matt Ahrens <mahrens@delphix.com>
Reviewed by: George Wilson <george.wilson@delphix.com>
Approved by: Dan McDonald <danmcd@omniti.com>
        
*** 21,30 ****
--- 21,31 ----
  /*
   * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
   * Copyright (c) 2011, 2014 by Delphix. All rights reserved.
   * Copyright (c) 2014, Joyent, Inc. All rights reserved.
   * Copyright (c) 2014 RackTop Systems.
+  * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
   */
  
  #include <sys/dmu_objset.h>
  #include <sys/dsl_dataset.h>
  #include <sys/dsl_dir.h>
*** 69,79 ****
          }
  
  #define DS_REF_MAX      (1ULL << 62)
  
  extern inline dsl_dataset_phys_t *dsl_dataset_phys(dsl_dataset_t *ds);
- extern inline boolean_t dsl_dataset_is_snapshot(dsl_dataset_t *ds);
  
  /*
   * Figure out how much of this delta should be propogated to the dsl_dir
   * layer.  If there's a refreservation, that space has already been
   * partially accounted for in our ancestors.
--- 70,79 ----
*** 153,163 ****
                      -used, -compressed, -uncompressed);
                  return (used);
          }
          ASSERT3P(tx->tx_pool, ==, ds->ds_dir->dd_pool);
  
!         ASSERT(!dsl_dataset_is_snapshot(ds));
          dmu_buf_will_dirty(ds->ds_dbuf, tx);
  
          if (bp->blk_birth > dsl_dataset_phys(ds)->ds_prev_snap_txg) {
                  int64_t delta;
  
--- 153,163 ----
                      -used, -compressed, -uncompressed);
                  return (used);
          }
          ASSERT3P(tx->tx_pool, ==, ds->ds_dir->dd_pool);
  
!         ASSERT(!ds->ds_is_snapshot);
          dmu_buf_will_dirty(ds->ds_dbuf, tx);
  
          if (bp->blk_birth > dsl_dataset_phys(ds)->ds_prev_snap_txg) {
                  int64_t delta;
  
*** 251,268 ****
          ddt_prefetch(dsl_dataset_get_spa(ds), bp);
  
          return (B_TRUE);
  }
  
- /* ARGSUSED */
  static void
! dsl_dataset_evict(dmu_buf_t *db, void *dsv)
  {
!         dsl_dataset_t *ds = dsv;
  
          ASSERT(ds->ds_owner == NULL);
  
          unique_remove(ds->ds_fsid_guid);
  
          if (ds->ds_objset != NULL)
                  dmu_objset_evict(ds->ds_objset);
  
--- 251,269 ----
          ddt_prefetch(dsl_dataset_get_spa(ds), bp);
  
          return (B_TRUE);
  }
  
  static void
! dsl_dataset_evict(void *dbu)
  {
!         dsl_dataset_t *ds = dbu;
  
          ASSERT(ds->ds_owner == NULL);
  
+         ds->ds_dbuf = NULL;
+ 
          unique_remove(ds->ds_fsid_guid);
  
          if (ds->ds_objset != NULL)
                  dmu_objset_evict(ds->ds_objset);
  
*** 270,283 ****
                  dsl_dataset_rele(ds->ds_prev, ds);
                  ds->ds_prev = NULL;
          }
  
          bplist_destroy(&ds->ds_pending_deadlist);
!         if (dsl_dataset_phys(ds)->ds_deadlist_obj != 0)
                  dsl_deadlist_close(&ds->ds_deadlist);
          if (ds->ds_dir)
!                 dsl_dir_rele(ds->ds_dir, ds);
  
          ASSERT(!list_link_active(&ds->ds_synced_link));
  
          mutex_destroy(&ds->ds_lock);
          mutex_destroy(&ds->ds_opening_lock);
--- 271,284 ----
                  dsl_dataset_rele(ds->ds_prev, ds);
                  ds->ds_prev = NULL;
          }
  
          bplist_destroy(&ds->ds_pending_deadlist);
!         if (ds->ds_deadlist.dl_os != NULL)
                  dsl_deadlist_close(&ds->ds_deadlist);
          if (ds->ds_dir)
!                 dsl_dir_async_rele(ds->ds_dir, ds);
  
          ASSERT(!list_link_active(&ds->ds_synced_link));
  
          mutex_destroy(&ds->ds_lock);
          mutex_destroy(&ds->ds_opening_lock);
*** 387,396 ****
--- 388,398 ----
                  dsl_dataset_t *winner = NULL;
  
                  ds = kmem_zalloc(sizeof (dsl_dataset_t), KM_SLEEP);
                  ds->ds_dbuf = dbuf;
                  ds->ds_object = dsobj;
+                 ds->ds_is_snapshot = dsl_dataset_phys(ds)->ds_num_children != 0;
  
                  mutex_init(&ds->ds_lock, NULL, MUTEX_DEFAULT, NULL);
                  mutex_init(&ds->ds_opening_lock, NULL, MUTEX_DEFAULT, NULL);
                  mutex_init(&ds->ds_sendstream_lock, NULL, MUTEX_DEFAULT, NULL);
                  refcount_create(&ds->ds_longholds);
*** 425,435 ****
                          kmem_free(ds, sizeof (dsl_dataset_t));
                          dmu_buf_rele(dbuf, tag);
                          return (err);
                  }
  
!                 if (!dsl_dataset_is_snapshot(ds)) {
                          ds->ds_snapname[0] = '\0';
                          if (dsl_dataset_phys(ds)->ds_prev_snap_obj != 0) {
                                  err = dsl_dataset_hold_obj(dp,
                                      dsl_dataset_phys(ds)->ds_prev_snap_obj,
                                      ds, &ds->ds_prev);
--- 427,437 ----
                          kmem_free(ds, sizeof (dsl_dataset_t));
                          dmu_buf_rele(dbuf, tag);
                          return (err);
                  }
  
!                 if (!ds->ds_is_snapshot) {
                          ds->ds_snapname[0] = '\0';
                          if (dsl_dataset_phys(ds)->ds_prev_snap_obj != 0) {
                                  err = dsl_dataset_hold_obj(dp,
                                      dsl_dataset_phys(ds)->ds_prev_snap_obj,
                                      ds, &ds->ds_prev);
*** 452,462 ****
                                      dsl_dataset_phys(ds)->ds_userrefs_obj,
                                      &ds->ds_userrefs);
                          }
                  }
  
!                 if (err == 0 && !dsl_dataset_is_snapshot(ds)) {
                          err = dsl_prop_get_int_ds(ds,
                              zfs_prop_to_name(ZFS_PROP_REFRESERVATION),
                              &ds->ds_reserved);
                          if (err == 0) {
                                  err = dsl_prop_get_int_ds(ds,
--- 454,464 ----
                                      dsl_dataset_phys(ds)->ds_userrefs_obj,
                                      &ds->ds_userrefs);
                          }
                  }
  
!                 if (err == 0 && !ds->ds_is_snapshot) {
                          err = dsl_prop_get_int_ds(ds,
                              zfs_prop_to_name(ZFS_PROP_REFRESERVATION),
                              &ds->ds_reserved);
                          if (err == 0) {
                                  err = dsl_prop_get_int_ds(ds,
*** 465,476 ****
                          }
                  } else {
                          ds->ds_reserved = ds->ds_quota = 0;
                  }
  
!                 if (err != 0 || (winner = dmu_buf_set_user_ie(dbuf, ds,
!                     dsl_dataset_evict)) != NULL) {
                          bplist_destroy(&ds->ds_pending_deadlist);
                          dsl_deadlist_close(&ds->ds_deadlist);
                          if (ds->ds_prev)
                                  dsl_dataset_rele(ds->ds_prev, ds);
                          dsl_dir_rele(ds->ds_dir, ds);
--- 467,481 ----
                          }
                  } else {
                          ds->ds_reserved = ds->ds_quota = 0;
                  }
  
!                 dmu_buf_init_user(&ds->ds_dbu, dsl_dataset_evict, &ds->ds_dbuf);
!                 if (err == 0)
!                         winner = dmu_buf_set_user_ie(dbuf, &ds->ds_dbu);
! 
!                 if (err != 0 || winner != NULL) {
                          bplist_destroy(&ds->ds_pending_deadlist);
                          dsl_deadlist_close(&ds->ds_deadlist);
                          if (ds->ds_prev)
                                  dsl_dataset_rele(ds->ds_prev, ds);
                          dsl_dir_rele(ds->ds_dir, ds);
*** 846,856 ****
  dsl_dataset_recalc_head_uniq(dsl_dataset_t *ds)
  {
          uint64_t mrs_used;
          uint64_t dlused, dlcomp, dluncomp;
  
!         ASSERT(!dsl_dataset_is_snapshot(ds));
  
          if (dsl_dataset_phys(ds)->ds_prev_snap_obj != 0)
                  mrs_used = dsl_dataset_phys(ds->ds_prev)->ds_referenced_bytes;
          else
                  mrs_used = 0;
--- 851,861 ----
  dsl_dataset_recalc_head_uniq(dsl_dataset_t *ds)
  {
          uint64_t mrs_used;
          uint64_t dlused, dlcomp, dluncomp;
  
!         ASSERT(!ds->ds_is_snapshot);
  
          if (dsl_dataset_phys(ds)->ds_prev_snap_obj != 0)
                  mrs_used = dsl_dataset_phys(ds->ds_prev)->ds_referenced_bytes;
          else
                  mrs_used = 0;
*** 1589,1599 ****
  
          dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_REFRATIO, ratio);
          dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_LOGICALREFERENCED,
              dsl_dataset_phys(ds)->ds_uncompressed_bytes);
  
!         if (dsl_dataset_is_snapshot(ds)) {
                  dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_COMPRESSRATIO, ratio);
                  dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USED,
                      dsl_dataset_phys(ds)->ds_unique_bytes);
                  get_clones_stat(ds, nv);
          } else {
--- 1594,1604 ----
  
          dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_REFRATIO, ratio);
          dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_LOGICALREFERENCED,
              dsl_dataset_phys(ds)->ds_uncompressed_bytes);
  
!         if (ds->ds_is_snapshot) {
                  dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_COMPRESSRATIO, ratio);
                  dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USED,
                      dsl_dataset_phys(ds)->ds_unique_bytes);
                  get_clones_stat(ds, nv);
          } else {
*** 1657,1667 ****
          stat->dds_creation_txg = dsl_dataset_phys(ds)->ds_creation_txg;
          stat->dds_inconsistent =
              dsl_dataset_phys(ds)->ds_flags & DS_FLAG_INCONSISTENT;
          stat->dds_guid = dsl_dataset_phys(ds)->ds_guid;
          stat->dds_origin[0] = '\0';
!         if (dsl_dataset_is_snapshot(ds)) {
                  stat->dds_is_snapshot = B_TRUE;
                  stat->dds_num_clones =
                      dsl_dataset_phys(ds)->ds_num_children - 1;
          } else {
                  stat->dds_is_snapshot = B_FALSE;
--- 1662,1672 ----
          stat->dds_creation_txg = dsl_dataset_phys(ds)->ds_creation_txg;
          stat->dds_inconsistent =
              dsl_dataset_phys(ds)->ds_flags & DS_FLAG_INCONSISTENT;
          stat->dds_guid = dsl_dataset_phys(ds)->ds_guid;
          stat->dds_origin[0] = '\0';
!         if (ds->ds_is_snapshot) {
                  stat->dds_is_snapshot = B_TRUE;
                  stat->dds_num_clones =
                      dsl_dataset_phys(ds)->ds_num_children - 1;
          } else {
                  stat->dds_is_snapshot = B_FALSE;
*** 1917,1927 ****
          error = dsl_dataset_hold(dp, ddra->ddra_fsname, FTAG, &ds);
          if (error != 0)
                  return (error);
  
          /* must not be a snapshot */
!         if (dsl_dataset_is_snapshot(ds)) {
                  dsl_dataset_rele(ds, FTAG);
                  return (SET_ERROR(EINVAL));
          }
  
          /* must have a most recent snapshot */
--- 1922,1932 ----
          error = dsl_dataset_hold(dp, ddra->ddra_fsname, FTAG, &ds);
          if (error != 0)
                  return (error);
  
          /* must not be a snapshot */
!         if (ds->ds_is_snapshot) {
                  dsl_dataset_rele(ds, FTAG);
                  return (SET_ERROR(EINVAL));
          }
  
          /* must have a most recent snapshot */
*** 2489,2499 ****
              &ddpa->ddpa_clone);
          if (error != 0)
                  return (error);
          dd = ddpa->ddpa_clone->ds_dir;
  
!         if (dsl_dataset_is_snapshot(ddpa->ddpa_clone) ||
              !dsl_dir_is_clone(dd)) {
                  dsl_dataset_rele(ddpa->ddpa_clone, tag);
                  return (SET_ERROR(EINVAL));
          }
  
--- 2494,2504 ----
              &ddpa->ddpa_clone);
          if (error != 0)
                  return (error);
          dd = ddpa->ddpa_clone->ds_dir;
  
!         if (ddpa->ddpa_clone->ds_is_snapshot ||
              !dsl_dir_is_clone(dd)) {
                  dsl_dataset_rele(ddpa->ddpa_clone, tag);
                  return (SET_ERROR(EINVAL));
          }
  
*** 2581,2592 ****
      dsl_dataset_t *origin_head, boolean_t force, void *owner, dmu_tx_t *tx)
  {
          int64_t unused_refres_delta;
  
          /* they should both be heads */
!         if (dsl_dataset_is_snapshot(clone) ||
!             dsl_dataset_is_snapshot(origin_head))
                  return (SET_ERROR(EINVAL));
  
          /* if we are not forcing, the branch point should be just before them */
          if (!force && clone->ds_prev != origin_head->ds_prev)
                  return (SET_ERROR(EINVAL));
--- 2586,2597 ----
      dsl_dataset_t *origin_head, boolean_t force, void *owner, dmu_tx_t *tx)
  {
          int64_t unused_refres_delta;
  
          /* they should both be heads */
!         if (clone->ds_is_snapshot ||
!             origin_head->ds_is_snapshot)
                  return (SET_ERROR(EINVAL));
  
          /* if we are not forcing, the branch point should be just before them */
          if (!force && clone->ds_prev != origin_head->ds_prev)
                  return (SET_ERROR(EINVAL));
*** 2861,2871 ****
  
          error = dsl_dataset_hold(dp, ddsqra->ddsqra_name, FTAG, &ds);
          if (error != 0)
                  return (error);
  
!         if (dsl_dataset_is_snapshot(ds)) {
                  dsl_dataset_rele(ds, FTAG);
                  return (SET_ERROR(EINVAL));
          }
  
          error = dsl_prop_predict(ds->ds_dir,
--- 2866,2876 ----
  
          error = dsl_dataset_hold(dp, ddsqra->ddsqra_name, FTAG, &ds);
          if (error != 0)
                  return (error);
  
!         if (ds->ds_is_snapshot) {
                  dsl_dataset_rele(ds, FTAG);
                  return (SET_ERROR(EINVAL));
          }
  
          error = dsl_prop_predict(ds->ds_dir,
*** 2944,2954 ****
  
          error = dsl_dataset_hold(dp, ddsqra->ddsqra_name, FTAG, &ds);
          if (error != 0)
                  return (error);
  
!         if (dsl_dataset_is_snapshot(ds)) {
                  dsl_dataset_rele(ds, FTAG);
                  return (SET_ERROR(EINVAL));
          }
  
          error = dsl_prop_predict(ds->ds_dir,
--- 2949,2959 ----
  
          error = dsl_dataset_hold(dp, ddsqra->ddsqra_name, FTAG, &ds);
          if (error != 0)
                  return (error);
  
!         if (ds->ds_is_snapshot) {
                  dsl_dataset_rele(ds, FTAG);
                  return (SET_ERROR(EINVAL));
          }
  
          error = dsl_prop_predict(ds->ds_dir,
*** 3158,3169 ****
  {
          int err = 0;
          uint64_t snapobj;
          dsl_pool_t *dp = firstsnap->ds_dir->dd_pool;
  
!         ASSERT(dsl_dataset_is_snapshot(firstsnap));
!         ASSERT(dsl_dataset_is_snapshot(lastsnap));
  
          /*
           * Check that the snapshots are in the same dsl_dir, and firstsnap
           * is before lastsnap.
           */
--- 3163,3174 ----
  {
          int err = 0;
          uint64_t snapobj;
          dsl_pool_t *dp = firstsnap->ds_dir->dd_pool;
  
!         ASSERT(firstsnap->ds_is_snapshot);
!         ASSERT(lastsnap->ds_is_snapshot);
  
          /*
           * Check that the snapshots are in the same dsl_dir, and firstsnap
           * is before lastsnap.
           */
*** 3284,3299 ****
          dsl_pool_t *dp = later->ds_dir->dd_pool;
          int error;
          boolean_t ret;
  
          ASSERT(dsl_pool_config_held(dp));
!         ASSERT(dsl_dataset_is_snapshot(earlier) || earlier_txg != 0);
  
          if (earlier_txg == 0)
                  earlier_txg = dsl_dataset_phys(earlier)->ds_creation_txg;
  
!         if (dsl_dataset_is_snapshot(later) &&
              earlier_txg >= dsl_dataset_phys(later)->ds_creation_txg)
                  return (B_FALSE);
  
          if (later->ds_dir == earlier->ds_dir)
                  return (B_TRUE);
--- 3289,3304 ----
          dsl_pool_t *dp = later->ds_dir->dd_pool;
          int error;
          boolean_t ret;
  
          ASSERT(dsl_pool_config_held(dp));
!         ASSERT(earlier->ds_is_snapshot || earlier_txg != 0);
  
          if (earlier_txg == 0)
                  earlier_txg = dsl_dataset_phys(earlier)->ds_creation_txg;
  
!         if (later->ds_is_snapshot &&
              earlier_txg >= dsl_dataset_phys(later)->ds_creation_txg)
                  return (B_FALSE);
  
          if (later->ds_dir == earlier->ds_dir)
                  return (B_TRUE);