Print this page
2619 asynchronous destruction of ZFS file systems
2747 SPA versioning with zfs feature flags
Reviewed by: Matt Ahrens <mahrens@delphix.com>
Reviewed by: George Wilson <gwilson@delphix.com>
Reviewed by: Richard Lowe <richlowe@richlowe.net>
Reviewed by: Dan Kruchinin <dan.kruchinin@gmail.com>
Approved by: Dan McDonald <danmcd@nexenta.com>
        
*** 18,28 ****
   *
   * CDDL HEADER END
   */
  /*
   * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
!  * Copyright (c) 2011 by Delphix. All rights reserved.
   * Copyright (c) 2012, Joyent, Inc. All rights reserved.
   */
  
  #include <sys/dmu_objset.h>
  #include <sys/dsl_dataset.h>
--- 18,28 ----
   *
   * CDDL HEADER END
   */
  /*
   * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
!  * Copyright (c) 2012 by Delphix. All rights reserved.
   * Copyright (c) 2012, Joyent, Inc. All rights reserved.
   */
  
  #include <sys/dmu_objset.h>
  #include <sys/dsl_dataset.h>
*** 33,42 ****
--- 33,43 ----
  #include <sys/dmu_impl.h>
  #include <sys/dmu_tx.h>
  #include <sys/arc.h>
  #include <sys/zio.h>
  #include <sys/zap.h>
+ #include <sys/zfeature.h>
  #include <sys/unique.h>
  #include <sys/zfs_context.h>
  #include <sys/zfs_ioctl.h>
  #include <sys/spa.h>
  #include <sys/zfs_znode.h>
*** 98,108 ****
          ASSERT(dmu_tx_is_syncing(tx));
          /* It could have been compressed away to nothing */
          if (BP_IS_HOLE(bp))
                  return;
          ASSERT(BP_GET_TYPE(bp) != DMU_OT_NONE);
!         ASSERT3U(BP_GET_TYPE(bp), <, DMU_OT_NUMTYPES);
          if (ds == NULL) {
                  /*
                   * Account for the meta-objset space in its placeholder
                   * dsl_dir.
                   */
--- 99,109 ----
          ASSERT(dmu_tx_is_syncing(tx));
          /* It could have been compressed away to nothing */
          if (BP_IS_HOLE(bp))
                  return;
          ASSERT(BP_GET_TYPE(bp) != DMU_OT_NONE);
!         ASSERT(DMU_OT_IS_VALID(BP_GET_TYPE(bp)));
          if (ds == NULL) {
                  /*
                   * Account for the meta-objset space in its placeholder
                   * dsl_dir.
                   */
*** 115,125 ****
          dmu_buf_will_dirty(ds->ds_dbuf, tx);
  
          mutex_enter(&ds->ds_dir->dd_lock);
          mutex_enter(&ds->ds_lock);
          delta = parent_delta(ds, used);
!         ds->ds_phys->ds_used_bytes += used;
          ds->ds_phys->ds_compressed_bytes += compressed;
          ds->ds_phys->ds_uncompressed_bytes += uncompressed;
          ds->ds_phys->ds_unique_bytes += used;
          mutex_exit(&ds->ds_lock);
          dsl_dir_diduse_space(ds->ds_dir, DD_USED_HEAD, delta,
--- 116,126 ----
          dmu_buf_will_dirty(ds->ds_dbuf, tx);
  
          mutex_enter(&ds->ds_dir->dd_lock);
          mutex_enter(&ds->ds_lock);
          delta = parent_delta(ds, used);
!         ds->ds_phys->ds_referenced_bytes += used;
          ds->ds_phys->ds_compressed_bytes += compressed;
          ds->ds_phys->ds_uncompressed_bytes += uncompressed;
          ds->ds_phys->ds_unique_bytes += used;
          mutex_exit(&ds->ds_lock);
          dsl_dir_diduse_space(ds->ds_dir, DD_USED_HEAD, delta,
*** 209,220 ****
                          dsl_dir_transfer_space(ds->ds_dir, used,
                              DD_USED_HEAD, DD_USED_SNAP, tx);
                  }
          }
          mutex_enter(&ds->ds_lock);
!         ASSERT3U(ds->ds_phys->ds_used_bytes, >=, used);
!         ds->ds_phys->ds_used_bytes -= used;
          ASSERT3U(ds->ds_phys->ds_compressed_bytes, >=, compressed);
          ds->ds_phys->ds_compressed_bytes -= compressed;
          ASSERT3U(ds->ds_phys->ds_uncompressed_bytes, >=, uncompressed);
          ds->ds_phys->ds_uncompressed_bytes -= uncompressed;
          mutex_exit(&ds->ds_lock);
--- 210,221 ----
                          dsl_dir_transfer_space(ds->ds_dir, used,
                              DD_USED_HEAD, DD_USED_SNAP, tx);
                  }
          }
          mutex_enter(&ds->ds_lock);
!         ASSERT3U(ds->ds_phys->ds_referenced_bytes, >=, used);
!         ds->ds_phys->ds_referenced_bytes -= used;
          ASSERT3U(ds->ds_phys->ds_compressed_bytes, >=, compressed);
          ds->ds_phys->ds_compressed_bytes -= compressed;
          ASSERT3U(ds->ds_phys->ds_uncompressed_bytes, >=, uncompressed);
          ds->ds_phys->ds_uncompressed_bytes -= uncompressed;
          mutex_exit(&ds->ds_lock);
*** 816,827 ****
                  dsl_dataset_t *ohds;
  
                  dsphys->ds_prev_snap_obj = origin->ds_object;
                  dsphys->ds_prev_snap_txg =
                      origin->ds_phys->ds_creation_txg;
!                 dsphys->ds_used_bytes =
!                     origin->ds_phys->ds_used_bytes;
                  dsphys->ds_compressed_bytes =
                      origin->ds_phys->ds_compressed_bytes;
                  dsphys->ds_uncompressed_bytes =
                      origin->ds_phys->ds_uncompressed_bytes;
                  dsphys->ds_bp = origin->ds_phys->ds_bp;
--- 817,828 ----
                  dsl_dataset_t *ohds;
  
                  dsphys->ds_prev_snap_obj = origin->ds_object;
                  dsphys->ds_prev_snap_txg =
                      origin->ds_phys->ds_creation_txg;
!                 dsphys->ds_referenced_bytes =
!                     origin->ds_phys->ds_referenced_bytes;
                  dsphys->ds_compressed_bytes =
                      origin->ds_phys->ds_compressed_bytes;
                  dsphys->ds_uncompressed_bytes =
                      origin->ds_phys->ds_uncompressed_bytes;
                  dsphys->ds_bp = origin->ds_phys->ds_bp;
*** 931,941 ****
          dstg = dsl_sync_task_group_create(spa_get_dsl(spa));
  
          for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL;
              pair = nvlist_next_nvpair(snaps, pair)) {
                  dsl_dataset_t *ds;
-                 int err;
  
                  err = dsl_dataset_own(nvpair_name(pair), B_TRUE, dstg, &ds);
                  if (err == 0) {
                          struct dsl_ds_destroyarg *dsda;
  
--- 932,941 ----
*** 1080,1092 ****
          err = dmu_objset_from_ds(ds, &os);
          if (err)
                  goto out;
  
          /*
!          * remove the objects in open context, so that we won't
!          * have too much to do in syncing context.
           */
          for (obj = 0; err == 0; err = dmu_object_next(os, &obj, FALSE,
              ds->ds_phys->ds_prev_snap_txg)) {
                  /*
                   * Ignore errors, if there is not enough disk space
                   * we will deal with it in dsl_dataset_destroy_sync().
--- 1080,1095 ----
          err = dmu_objset_from_ds(ds, &os);
          if (err)
                  goto out;
  
          /*
!          * If async destruction is not enabled try to remove all objects
!          * while in the open context so that there is less work to do in
!          * the syncing context.
           */
+         if (!spa_feature_is_enabled(dsl_dataset_get_spa(ds),
+             &spa_feature_table[SPA_FEATURE_ASYNC_DESTROY])) {
                  for (obj = 0; err == 0; err = dmu_object_next(os, &obj, FALSE,
                      ds->ds_phys->ds_prev_snap_txg)) {
                          /*
                           * Ignore errors, if there is not enough disk space
                           * we will deal with it in dsl_dataset_destroy_sync().
*** 1093,1102 ****
--- 1096,1106 ----
                           */
                          (void) dmu_free_object(os, obj);
                  }
                  if (err != ESRCH)
                          goto out;
+         }
  
          /*
           * Only the ZIL knows how to free log blocks.
           */
          zil_destroy(dmu_objset_zil(os), B_FALSE);
*** 1238,1256 ****
          uint64_t dlused, dlcomp, dluncomp;
  
          ASSERT(!dsl_dataset_is_snapshot(ds));
  
          if (ds->ds_phys->ds_prev_snap_obj != 0)
!                 mrs_used = ds->ds_prev->ds_phys->ds_used_bytes;
          else
                  mrs_used = 0;
  
          dsl_deadlist_space(&ds->ds_deadlist, &dlused, &dlcomp, &dluncomp);
  
          ASSERT3U(dlused, <=, mrs_used);
          ds->ds_phys->ds_unique_bytes =
!             ds->ds_phys->ds_used_bytes - (mrs_used - dlused);
  
          if (spa_version(ds->ds_dir->dd_pool->dp_spa) >=
              SPA_VERSION_UNIQUE_ACCURATE)
                  ds->ds_phys->ds_flags |= DS_FLAG_UNIQUE_ACCURATE;
  }
--- 1242,1260 ----
          uint64_t dlused, dlcomp, dluncomp;
  
          ASSERT(!dsl_dataset_is_snapshot(ds));
  
          if (ds->ds_phys->ds_prev_snap_obj != 0)
!                 mrs_used = ds->ds_prev->ds_phys->ds_referenced_bytes;
          else
                  mrs_used = 0;
  
          dsl_deadlist_space(&ds->ds_deadlist, &dlused, &dlcomp, &dluncomp);
  
          ASSERT3U(dlused, <=, mrs_used);
          ds->ds_phys->ds_unique_bytes =
!             ds->ds_phys->ds_referenced_bytes - (mrs_used - dlused);
  
          if (spa_version(ds->ds_dir->dd_pool->dp_spa) >=
              SPA_VERSION_UNIQUE_ACCURATE)
                  ds->ds_phys->ds_flags |= DS_FLAG_UNIQUE_ACCURATE;
  }
*** 1604,1613 ****
--- 1608,1641 ----
          dsl_deadlist_open(&ds->ds_deadlist, mos, ds->ds_phys->ds_deadlist_obj);
          dsl_deadlist_open(&ds_next->ds_deadlist, mos,
              ds_next->ds_phys->ds_deadlist_obj);
  }
  
+ static int
+ old_synchronous_dataset_destroy(dsl_dataset_t *ds, dmu_tx_t *tx)
+ {
+         int err;
+         struct killarg ka;
+ 
+         /*
+          * Free everything that we point to (that's born after
+          * the previous snapshot, if we are a clone)
+          *
+          * NB: this should be very quick, because we already
+          * freed all the objects in open context.
+          */
+         ka.ds = ds;
+         ka.tx = tx;
+         err = traverse_dataset(ds,
+             ds->ds_phys->ds_prev_snap_txg, TRAVERSE_POST,
+             kill_blkptr, &ka);
+         ASSERT3U(err, ==, 0);
+         ASSERT(!DS_UNIQUE_IS_ACCURATE(ds) || ds->ds_phys->ds_unique_bytes == 0);
+ 
+         return (err);
+ }
+ 
  void
  dsl_dataset_destroy_sync(void *arg1, void *tag, dmu_tx_t *tx)
  {
          struct dsl_ds_destroyarg *dsda = arg1;
          dsl_dataset_t *ds = dsda->ds;
*** 1750,1760 ****
                          dsl_deadlist_move_bpobj(&ds_next->ds_deadlist,
                              &dp->dp_free_bpobj, ds->ds_phys->ds_prev_snap_txg,
                              tx);
                          dsl_dir_diduse_space(tx->tx_pool->dp_free_dir,
                              DD_USED_HEAD, used, comp, uncomp, tx);
-                         dsl_dir_dirty(tx->tx_pool->dp_free_dir, tx);
  
                          /* Merge our deadlist into next's and free it. */
                          dsl_deadlist_merge(&ds_next->ds_deadlist,
                              ds->ds_phys->ds_deadlist_obj, tx);
                  }
--- 1778,1787 ----
*** 1826,1862 ****
                                      DD_USED_REFRSRV, -mrsdelta, 0, 0, tx);
                          }
                  }
                  dsl_dataset_rele(ds_next, FTAG);
          } else {
                  /*
                   * There's no next snapshot, so this is a head dataset.
                   * Destroy the deadlist.  Unless it's a clone, the
                   * deadlist should be empty.  (If it's a clone, it's
                   * safe to ignore the deadlist contents.)
                   */
-                 struct killarg ka;
- 
                  dsl_deadlist_close(&ds->ds_deadlist);
                  dsl_deadlist_free(mos, ds->ds_phys->ds_deadlist_obj, tx);
                  ds->ds_phys->ds_deadlist_obj = 0;
  
                  /*
!                  * Free everything that we point to (that's born after
!                  * the previous snapshot, if we are a clone)
!                  *
!                  * NB: this should be very quick, because we already
!                  * freed all the objects in open context.
                   */
!                 ka.ds = ds;
!                 ka.tx = tx;
!                 err = traverse_dataset(ds, ds->ds_phys->ds_prev_snap_txg,
!                     TRAVERSE_POST, kill_blkptr, &ka);
!                 ASSERT3U(err, ==, 0);
                  ASSERT(!DS_UNIQUE_IS_ACCURATE(ds) ||
!                     ds->ds_phys->ds_unique_bytes == 0);
  
                  if (ds->ds_prev != NULL) {
                          if (spa_version(dp->dp_spa) >= SPA_VERSION_DIR_CLONES) {
                                  VERIFY3U(0, ==, zap_remove_int(mos,
                                      ds->ds_prev->ds_dir->dd_phys->dd_clones,
                                      ds->ds_object, tx));
--- 1853,1911 ----
                                      DD_USED_REFRSRV, -mrsdelta, 0, 0, tx);
                          }
                  }
                  dsl_dataset_rele(ds_next, FTAG);
          } else {
+                 zfeature_info_t *async_destroy =
+                     &spa_feature_table[SPA_FEATURE_ASYNC_DESTROY];
+ 
                  /*
                   * There's no next snapshot, so this is a head dataset.
                   * Destroy the deadlist.  Unless it's a clone, the
                   * deadlist should be empty.  (If it's a clone, it's
                   * safe to ignore the deadlist contents.)
                   */
                  dsl_deadlist_close(&ds->ds_deadlist);
                  dsl_deadlist_free(mos, ds->ds_phys->ds_deadlist_obj, tx);
                  ds->ds_phys->ds_deadlist_obj = 0;
  
+                 if (!spa_feature_is_enabled(dp->dp_spa, async_destroy)) {
+                         err = old_synchronous_dataset_destroy(ds, tx);
+                 } else {
                          /*
!                          * Move the bptree into the pool's list of trees to
!                          * clean up and update space accounting information.
                           */
!                         uint64_t used, comp, uncomp;
! 
!                         ASSERT(err == 0 || err == EBUSY);
!                         if (!spa_feature_is_active(dp->dp_spa, async_destroy)) {
!                                 spa_feature_incr(dp->dp_spa, async_destroy, tx);
!                                 dp->dp_bptree_obj = bptree_alloc(
!                                     dp->dp_meta_objset, tx);
!                                 VERIFY(zap_add(dp->dp_meta_objset,
!                                     DMU_POOL_DIRECTORY_OBJECT,
!                                     DMU_POOL_BPTREE_OBJ, sizeof (uint64_t), 1,
!                                     &dp->dp_bptree_obj, tx) == 0);
!                         }
! 
!                         used = ds->ds_dir->dd_phys->dd_used_bytes;
!                         comp = ds->ds_dir->dd_phys->dd_compressed_bytes;
!                         uncomp = ds->ds_dir->dd_phys->dd_uncompressed_bytes;
! 
                          ASSERT(!DS_UNIQUE_IS_ACCURATE(ds) ||
!                             ds->ds_phys->ds_unique_bytes == used);
  
+                         bptree_add(dp->dp_meta_objset, dp->dp_bptree_obj,
+                             &ds->ds_phys->ds_bp, ds->ds_phys->ds_prev_snap_txg,
+                             used, comp, uncomp, tx);
+                         dsl_dir_diduse_space(ds->ds_dir, DD_USED_HEAD,
+                             -used, -comp, -uncomp, tx);
+                         dsl_dir_diduse_space(dp->dp_free_dir, DD_USED_HEAD,
+                             used, comp, uncomp, tx);
+                 }
+ 
                  if (ds->ds_prev != NULL) {
                          if (spa_version(dp->dp_spa) >= SPA_VERSION_DIR_CLONES) {
                                  VERIFY3U(0, ==, zap_remove_int(mos,
                                      ds->ds_prev->ds_dir->dd_phys->dd_clones,
                                      ds->ds_object, tx));
*** 2042,2052 ****
          dsphys->ds_next_snap_obj = ds->ds_object;
          dsphys->ds_num_children = 1;
          dsphys->ds_creation_time = gethrestime_sec();
          dsphys->ds_creation_txg = crtxg;
          dsphys->ds_deadlist_obj = ds->ds_phys->ds_deadlist_obj;
!         dsphys->ds_used_bytes = ds->ds_phys->ds_used_bytes;
          dsphys->ds_compressed_bytes = ds->ds_phys->ds_compressed_bytes;
          dsphys->ds_uncompressed_bytes = ds->ds_phys->ds_uncompressed_bytes;
          dsphys->ds_flags = ds->ds_phys->ds_flags;
          dsphys->ds_bp = ds->ds_phys->ds_bp;
          dmu_buf_rele(dbuf, FTAG);
--- 2091,2101 ----
          dsphys->ds_next_snap_obj = ds->ds_object;
          dsphys->ds_num_children = 1;
          dsphys->ds_creation_time = gethrestime_sec();
          dsphys->ds_creation_txg = crtxg;
          dsphys->ds_deadlist_obj = ds->ds_phys->ds_deadlist_obj;
!         dsphys->ds_referenced_bytes = ds->ds_phys->ds_referenced_bytes;
          dsphys->ds_compressed_bytes = ds->ds_phys->ds_compressed_bytes;
          dsphys->ds_uncompressed_bytes = ds->ds_phys->ds_uncompressed_bytes;
          dsphys->ds_flags = ds->ds_phys->ds_flags;
          dsphys->ds_bp = ds->ds_phys->ds_bp;
          dmu_buf_rele(dbuf, FTAG);
*** 2166,2179 ****
          for (zap_cursor_init(&zc, mos, ds->ds_phys->ds_next_clones_obj);
              zap_cursor_retrieve(&zc, &za) == 0;
              zap_cursor_advance(&zc)) {
                  dsl_dataset_t *clone;
                  char buf[ZFS_MAXNAMELEN];
                  if (dsl_dataset_hold_obj(ds->ds_dir->dd_pool,
!                     za.za_first_integer, FTAG, &clone) != 0) {
!                         goto fail;
!                 }
                  dsl_dir_name(clone->ds_dir, buf);
                  VERIFY(nvlist_add_boolean(val, buf) == 0);
                  dsl_dataset_rele(clone, FTAG);
          }
          zap_cursor_fini(&zc);
--- 2215,2240 ----
          for (zap_cursor_init(&zc, mos, ds->ds_phys->ds_next_clones_obj);
              zap_cursor_retrieve(&zc, &za) == 0;
              zap_cursor_advance(&zc)) {
                  dsl_dataset_t *clone;
                  char buf[ZFS_MAXNAMELEN];
+                 /*
+                  * Even though we hold the dp_config_rwlock, the dataset
+                  * may fail to open, returning ENOENT.  If there is a
+                  * thread concurrently attempting to destroy this
+                  * dataset, it will have the ds_rwlock held for
+                  * RW_WRITER.  Our call to dsl_dataset_hold_obj() ->
+                  * dsl_dataset_hold_ref() will fail its
+                  * rw_tryenter(&ds->ds_rwlock, RW_READER), drop the
+                  * dp_config_rwlock, and wait for the destroy progress
+                  * and signal ds_exclusive_cv.  If the destroy was
+                  * successful, we will see that
+                  * DSL_DATASET_IS_DESTROYED(), and return ENOENT.
+                  */
                  if (dsl_dataset_hold_obj(ds->ds_dir->dd_pool,
!                     za.za_first_integer, FTAG, &clone) != 0)
!                         continue;
                  dsl_dir_name(clone->ds_dir, buf);
                  VERIFY(nvlist_add_boolean(val, buf) == 0);
                  dsl_dataset_rele(clone, FTAG);
          }
          zap_cursor_fini(&zc);
*** 2292,2302 ****
  void
  dsl_dataset_space(dsl_dataset_t *ds,
      uint64_t *refdbytesp, uint64_t *availbytesp,
      uint64_t *usedobjsp, uint64_t *availobjsp)
  {
!         *refdbytesp = ds->ds_phys->ds_used_bytes;
          *availbytesp = dsl_dir_space_available(ds->ds_dir, NULL, 0, TRUE);
          if (ds->ds_reserved > ds->ds_phys->ds_unique_bytes)
                  *availbytesp += ds->ds_reserved - ds->ds_phys->ds_unique_bytes;
          if (ds->ds_quota != 0) {
                  /*
--- 2353,2363 ----
  void
  dsl_dataset_space(dsl_dataset_t *ds,
      uint64_t *refdbytesp, uint64_t *availbytesp,
      uint64_t *usedobjsp, uint64_t *availobjsp)
  {
!         *refdbytesp = ds->ds_phys->ds_referenced_bytes;
          *availbytesp = dsl_dir_space_available(ds->ds_dir, NULL, 0, TRUE);
          if (ds->ds_reserved > ds->ds_phys->ds_unique_bytes)
                  *availbytesp += ds->ds_reserved - ds->ds_phys->ds_unique_bytes;
          if (ds->ds_quota != 0) {
                  /*
*** 2629,2639 ****
           * Which simplifies to:
           * uN + kN + kN-1 + ... + k1 + k0
           * Note however, if we stop before we reach the ORIGIN we get:
           * uN + kN + kN-1 + ... + kM - uM-1
           */
!         pa->used = origin_ds->ds_phys->ds_used_bytes;
          pa->comp = origin_ds->ds_phys->ds_compressed_bytes;
          pa->uncomp = origin_ds->ds_phys->ds_uncompressed_bytes;
          for (snap = list_head(&pa->shared_snaps); snap;
              snap = list_next(&pa->shared_snaps, snap)) {
                  uint64_t val, dlused, dlcomp, dluncomp;
--- 2690,2700 ----
           * Which simplifies to:
           * uN + kN + kN-1 + ... + k1 + k0
           * Note however, if we stop before we reach the ORIGIN we get:
           * uN + kN + kN-1 + ... + kM - uM-1
           */
!         pa->used = origin_ds->ds_phys->ds_referenced_bytes;
          pa->comp = origin_ds->ds_phys->ds_compressed_bytes;
          pa->uncomp = origin_ds->ds_phys->ds_uncompressed_bytes;
          for (snap = list_head(&pa->shared_snaps); snap;
              snap = list_next(&pa->shared_snaps, snap)) {
                  uint64_t val, dlused, dlcomp, dluncomp;
*** 2663,2673 ****
          /*
           * If we are a clone of a clone then we never reached ORIGIN,
           * so we need to subtract out the clone origin's used space.
           */
          if (pa->origin_origin) {
!                 pa->used -= pa->origin_origin->ds_phys->ds_used_bytes;
                  pa->comp -= pa->origin_origin->ds_phys->ds_compressed_bytes;
                  pa->uncomp -= pa->origin_origin->ds_phys->ds_uncompressed_bytes;
          }
  
          /* Check that there is enough space here */
--- 2724,2734 ----
          /*
           * If we are a clone of a clone then we never reached ORIGIN,
           * so we need to subtract out the clone origin's used space.
           */
          if (pa->origin_origin) {
!                 pa->used -= pa->origin_origin->ds_phys->ds_referenced_bytes;
                  pa->comp -= pa->origin_origin->ds_phys->ds_compressed_bytes;
                  pa->uncomp -= pa->origin_origin->ds_phys->ds_uncompressed_bytes;
          }
  
          /* Check that there is enough space here */
*** 3179,3190 ****
                  dsl_deadlist_space(&csa->cds->ds_deadlist,
                      &cdl_used, &cdl_comp, &cdl_uncomp);
                  dsl_deadlist_space(&csa->ohds->ds_deadlist,
                      &odl_used, &odl_comp, &odl_uncomp);
  
!                 dused = csa->cds->ds_phys->ds_used_bytes + cdl_used -
!                     (csa->ohds->ds_phys->ds_used_bytes + odl_used);
                  dcomp = csa->cds->ds_phys->ds_compressed_bytes + cdl_comp -
                      (csa->ohds->ds_phys->ds_compressed_bytes + odl_comp);
                  duncomp = csa->cds->ds_phys->ds_uncompressed_bytes +
                      cdl_uncomp -
                      (csa->ohds->ds_phys->ds_uncompressed_bytes + odl_uncomp);
--- 3240,3251 ----
                  dsl_deadlist_space(&csa->cds->ds_deadlist,
                      &cdl_used, &cdl_comp, &cdl_uncomp);
                  dsl_deadlist_space(&csa->ohds->ds_deadlist,
                      &odl_used, &odl_comp, &odl_uncomp);
  
!                 dused = csa->cds->ds_phys->ds_referenced_bytes + cdl_used -
!                     (csa->ohds->ds_phys->ds_referenced_bytes + odl_used);
                  dcomp = csa->cds->ds_phys->ds_compressed_bytes + cdl_comp -
                      (csa->ohds->ds_phys->ds_compressed_bytes + odl_comp);
                  duncomp = csa->cds->ds_phys->ds_uncompressed_bytes +
                      cdl_uncomp -
                      (csa->ohds->ds_phys->ds_uncompressed_bytes + odl_uncomp);
*** 3209,3220 ****
                  dsl_dir_transfer_space(csa->ohds->ds_dir, cdl_used - odl_used,
                      DD_USED_HEAD, DD_USED_SNAP, tx);
          }
  
          /* swap ds_*_bytes */
!         SWITCH64(csa->ohds->ds_phys->ds_used_bytes,
!             csa->cds->ds_phys->ds_used_bytes);
          SWITCH64(csa->ohds->ds_phys->ds_compressed_bytes,
              csa->cds->ds_phys->ds_compressed_bytes);
          SWITCH64(csa->ohds->ds_phys->ds_uncompressed_bytes,
              csa->cds->ds_phys->ds_uncompressed_bytes);
          SWITCH64(csa->ohds->ds_phys->ds_unique_bytes,
--- 3270,3281 ----
                  dsl_dir_transfer_space(csa->ohds->ds_dir, cdl_used - odl_used,
                      DD_USED_HEAD, DD_USED_SNAP, tx);
          }
  
          /* swap ds_*_bytes */
!         SWITCH64(csa->ohds->ds_phys->ds_referenced_bytes,
!             csa->cds->ds_phys->ds_referenced_bytes);
          SWITCH64(csa->ohds->ds_phys->ds_compressed_bytes,
              csa->cds->ds_phys->ds_compressed_bytes);
          SWITCH64(csa->ohds->ds_phys->ds_uncompressed_bytes,
              csa->cds->ds_phys->ds_uncompressed_bytes);
          SWITCH64(csa->ohds->ds_phys->ds_unique_bytes,
*** 3339,3350 ****
           * If they are requesting more space, and our current estimate
           * is over quota, they get to try again unless the actual
           * on-disk is over quota and there are no pending changes (which
           * may free up space for us).
           */
!         if (ds->ds_phys->ds_used_bytes + inflight >= ds->ds_quota) {
!                 if (inflight > 0 || ds->ds_phys->ds_used_bytes < ds->ds_quota)
                          error = ERESTART;
                  else
                          error = EDQUOT;
          }
          mutex_exit(&ds->ds_lock);
--- 3400,3412 ----
           * If they are requesting more space, and our current estimate
           * is over quota, they get to try again unless the actual
           * on-disk is over quota and there are no pending changes (which
           * may free up space for us).
           */
!         if (ds->ds_phys->ds_referenced_bytes + inflight >= ds->ds_quota) {
!                 if (inflight > 0 ||
!                     ds->ds_phys->ds_referenced_bytes < ds->ds_quota)
                          error = ERESTART;
                  else
                          error = EDQUOT;
          }
          mutex_exit(&ds->ds_lock);
*** 3367,3377 ****
                  return (err);
  
          if (psa->psa_effective_value == 0)
                  return (0);
  
!         if (psa->psa_effective_value < ds->ds_phys->ds_used_bytes ||
              psa->psa_effective_value < ds->ds_reserved)
                  return (ENOSPC);
  
          return (0);
  }
--- 3429,3439 ----
                  return (err);
  
          if (psa->psa_effective_value == 0)
                  return (0);
  
!         if (psa->psa_effective_value < ds->ds_phys->ds_referenced_bytes ||
              psa->psa_effective_value < ds->ds_reserved)
                  return (ENOSPC);
  
          return (0);
  }
*** 4121,4132 ****
          int err = 0;
          uint64_t snapobj;
          dsl_pool_t *dp = new->ds_dir->dd_pool;
  
          *usedp = 0;
!         *usedp += new->ds_phys->ds_used_bytes;
!         *usedp -= oldsnap->ds_phys->ds_used_bytes;
  
          *compp = 0;
          *compp += new->ds_phys->ds_compressed_bytes;
          *compp -= oldsnap->ds_phys->ds_compressed_bytes;
  
--- 4183,4194 ----
          int err = 0;
          uint64_t snapobj;
          dsl_pool_t *dp = new->ds_dir->dd_pool;
  
          *usedp = 0;
!         *usedp += new->ds_phys->ds_referenced_bytes;
!         *usedp -= oldsnap->ds_phys->ds_referenced_bytes;
  
          *compp = 0;
          *compp += new->ds_phys->ds_compressed_bytes;
          *compp -= oldsnap->ds_phys->ds_compressed_bytes;
  
*** 4138,4150 ****
--- 4200,4216 ----
          snapobj = new->ds_object;
          while (snapobj != oldsnap->ds_object) {
                  dsl_dataset_t *snap;
                  uint64_t used, comp, uncomp;
  
+                 if (snapobj == new->ds_object) {
+                         snap = new;
+                 } else {
                          err = dsl_dataset_hold_obj(dp, snapobj, FTAG, &snap);
                          if (err != 0)
                                  break;
+                 }
  
                  if (snap->ds_phys->ds_prev_snap_txg ==
                      oldsnap->ds_phys->ds_creation_txg) {
                          /*
                           * The blocks in the deadlist can not be born after
*** 4169,4178 ****
--- 4235,4245 ----
                   * If we get to the beginning of the chain of snapshots
                   * (ds_prev_snap_obj == 0) before oldsnap, then oldsnap
                   * was not a snapshot of/before new.
                   */
                  snapobj = snap->ds_phys->ds_prev_snap_obj;
+                 if (snap != new)
                          dsl_dataset_rele(snap, FTAG);
                  if (snapobj == 0) {
                          err = EINVAL;
                          break;
                  }