Print this page
2619 asynchronous destruction of ZFS file systems
2747 SPA versioning with zfs feature flags
Reviewed by: Matt Ahrens <mahrens@delphix.com>
Reviewed by: George Wilson <gwilson@delphix.com>
Reviewed by: Richard Lowe <richlowe@richlowe.net>
Reviewed by: Dan Kruchinin <dan.kruchinin@gmail.com>
Approved by: Dan McDonald <danmcd@nexenta.com>

Split Close
Expand all
Collapse all
          --- old/usr/src/uts/common/fs/zfs/dsl_dataset.c
          +++ new/usr/src/uts/common/fs/zfs/dsl_dataset.c
↓ open down ↓ 12 lines elided ↑ open up ↑
  13   13   * When distributing Covered Code, include this CDDL HEADER in each
  14   14   * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15   15   * If applicable, add the following below this CDDL HEADER, with the
  16   16   * fields enclosed by brackets "[]" replaced with your own identifying
  17   17   * information: Portions Copyright [yyyy] [name of copyright owner]
  18   18   *
  19   19   * CDDL HEADER END
  20   20   */
  21   21  /*
  22   22   * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  23      - * Copyright (c) 2011 by Delphix. All rights reserved.
       23 + * Copyright (c) 2012 by Delphix. All rights reserved.
  24   24   * Copyright (c) 2012, Joyent, Inc. All rights reserved.
  25   25   */
  26   26  
  27   27  #include <sys/dmu_objset.h>
  28   28  #include <sys/dsl_dataset.h>
  29   29  #include <sys/dsl_dir.h>
  30   30  #include <sys/dsl_prop.h>
  31   31  #include <sys/dsl_synctask.h>
  32   32  #include <sys/dmu_traverse.h>
  33   33  #include <sys/dmu_impl.h>
  34   34  #include <sys/dmu_tx.h>
  35   35  #include <sys/arc.h>
  36   36  #include <sys/zio.h>
  37   37  #include <sys/zap.h>
       38 +#include <sys/zfeature.h>
  38   39  #include <sys/unique.h>
  39   40  #include <sys/zfs_context.h>
  40   41  #include <sys/zfs_ioctl.h>
  41   42  #include <sys/spa.h>
  42   43  #include <sys/zfs_znode.h>
  43   44  #include <sys/zfs_onexit.h>
  44   45  #include <sys/zvol.h>
  45   46  #include <sys/dsl_scan.h>
  46   47  #include <sys/dsl_deadlist.h>
  47   48  
↓ open down ↓ 45 lines elided ↑ open up ↑
  93   94          int uncompressed = BP_GET_UCSIZE(bp);
  94   95          int64_t delta;
  95   96  
  96   97          dprintf_bp(bp, "ds=%p", ds);
  97   98  
  98   99          ASSERT(dmu_tx_is_syncing(tx));
  99  100          /* It could have been compressed away to nothing */
 100  101          if (BP_IS_HOLE(bp))
 101  102                  return;
 102  103          ASSERT(BP_GET_TYPE(bp) != DMU_OT_NONE);
 103      -        ASSERT3U(BP_GET_TYPE(bp), <, DMU_OT_NUMTYPES);
      104 +        ASSERT(DMU_OT_IS_VALID(BP_GET_TYPE(bp)));
 104  105          if (ds == NULL) {
 105  106                  /*
 106  107                   * Account for the meta-objset space in its placeholder
 107  108                   * dsl_dir.
 108  109                   */
 109  110                  ASSERT3U(compressed, ==, uncompressed); /* it's all metadata */
 110  111                  dsl_dir_diduse_space(tx->tx_pool->dp_mos_dir, DD_USED_HEAD,
 111  112                      used, compressed, uncompressed, tx);
 112  113                  dsl_dir_dirty(tx->tx_pool->dp_mos_dir, tx);
 113  114                  return;
 114  115          }
 115  116          dmu_buf_will_dirty(ds->ds_dbuf, tx);
 116  117  
 117  118          mutex_enter(&ds->ds_dir->dd_lock);
 118  119          mutex_enter(&ds->ds_lock);
 119  120          delta = parent_delta(ds, used);
 120      -        ds->ds_phys->ds_used_bytes += used;
      121 +        ds->ds_phys->ds_referenced_bytes += used;
 121  122          ds->ds_phys->ds_compressed_bytes += compressed;
 122  123          ds->ds_phys->ds_uncompressed_bytes += uncompressed;
 123  124          ds->ds_phys->ds_unique_bytes += used;
 124  125          mutex_exit(&ds->ds_lock);
 125  126          dsl_dir_diduse_space(ds->ds_dir, DD_USED_HEAD, delta,
 126  127              compressed, uncompressed, tx);
 127  128          dsl_dir_transfer_space(ds->ds_dir, used - delta,
 128  129              DD_USED_REFRSRV, DD_USED_HEAD, tx);
 129  130          mutex_exit(&ds->ds_dir->dd_lock);
 130  131  }
↓ open down ↓ 73 lines elided ↑ open up ↑
 204  205                          mutex_enter(&ds->ds_prev->ds_lock);
 205  206                          ds->ds_prev->ds_phys->ds_unique_bytes += used;
 206  207                          mutex_exit(&ds->ds_prev->ds_lock);
 207  208                  }
 208  209                  if (bp->blk_birth > ds->ds_dir->dd_origin_txg) {
 209  210                          dsl_dir_transfer_space(ds->ds_dir, used,
 210  211                              DD_USED_HEAD, DD_USED_SNAP, tx);
 211  212                  }
 212  213          }
 213  214          mutex_enter(&ds->ds_lock);
 214      -        ASSERT3U(ds->ds_phys->ds_used_bytes, >=, used);
 215      -        ds->ds_phys->ds_used_bytes -= used;
      215 +        ASSERT3U(ds->ds_phys->ds_referenced_bytes, >=, used);
      216 +        ds->ds_phys->ds_referenced_bytes -= used;
 216  217          ASSERT3U(ds->ds_phys->ds_compressed_bytes, >=, compressed);
 217  218          ds->ds_phys->ds_compressed_bytes -= compressed;
 218  219          ASSERT3U(ds->ds_phys->ds_uncompressed_bytes, >=, uncompressed);
 219  220          ds->ds_phys->ds_uncompressed_bytes -= uncompressed;
 220  221          mutex_exit(&ds->ds_lock);
 221  222  
 222  223          return (used);
 223  224  }
 224  225  
 225  226  uint64_t
↓ open down ↓ 585 lines elided ↑ open up ↑
 811  812          dsphys->ds_creation_txg = tx->tx_txg == TXG_INITIAL ? 1 : tx->tx_txg;
 812  813  
 813  814          if (origin == NULL) {
 814  815                  dsphys->ds_deadlist_obj = dsl_deadlist_alloc(mos, tx);
 815  816          } else {
 816  817                  dsl_dataset_t *ohds;
 817  818  
 818  819                  dsphys->ds_prev_snap_obj = origin->ds_object;
 819  820                  dsphys->ds_prev_snap_txg =
 820  821                      origin->ds_phys->ds_creation_txg;
 821      -                dsphys->ds_used_bytes =
 822      -                    origin->ds_phys->ds_used_bytes;
      822 +                dsphys->ds_referenced_bytes =
      823 +                    origin->ds_phys->ds_referenced_bytes;
 823  824                  dsphys->ds_compressed_bytes =
 824  825                      origin->ds_phys->ds_compressed_bytes;
 825  826                  dsphys->ds_uncompressed_bytes =
 826  827                      origin->ds_phys->ds_uncompressed_bytes;
 827  828                  dsphys->ds_bp = origin->ds_phys->ds_bp;
 828  829                  dsphys->ds_flags |= origin->ds_phys->ds_flags;
 829  830  
 830  831                  dmu_buf_will_dirty(origin->ds_dbuf, tx);
 831  832                  origin->ds_phys->ds_num_children++;
 832  833  
↓ open down ↓ 93 lines elided ↑ open up ↑
 926  927                  return (0);
 927  928  
 928  929          err = spa_open(nvpair_name(pair), &spa, FTAG);
 929  930          if (err)
 930  931                  return (err);
 931  932          dstg = dsl_sync_task_group_create(spa_get_dsl(spa));
 932  933  
 933  934          for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL;
 934  935              pair = nvlist_next_nvpair(snaps, pair)) {
 935  936                  dsl_dataset_t *ds;
 936      -                int err;
 937  937  
 938  938                  err = dsl_dataset_own(nvpair_name(pair), B_TRUE, dstg, &ds);
 939  939                  if (err == 0) {
 940  940                          struct dsl_ds_destroyarg *dsda;
 941  941  
 942  942                          dsl_dataset_make_exclusive(ds, dstg);
 943  943                          dsda = kmem_zalloc(sizeof (struct dsl_ds_destroyarg),
 944  944                              KM_SLEEP);
 945  945                          dsda->ds = ds;
 946  946                          dsda->defer = defer;
↓ open down ↓ 128 lines elided ↑ open up ↑
1075 1075          err = dsl_sync_task_do(dd->dd_pool, dsl_dataset_destroy_begin_check,
1076 1076              dsl_dataset_destroy_begin_sync, ds, NULL, 0);
1077 1077          if (err)
1078 1078                  goto out;
1079 1079  
1080 1080          err = dmu_objset_from_ds(ds, &os);
1081 1081          if (err)
1082 1082                  goto out;
1083 1083  
1084 1084          /*
1085      -         * remove the objects in open context, so that we won't
1086      -         * have too much to do in syncing context.
     1085 +         * If async destruction is not enabled try to remove all objects
     1086 +         * while in the open context so that there is less work to do in
     1087 +         * the syncing context.
1087 1088           */
1088      -        for (obj = 0; err == 0; err = dmu_object_next(os, &obj, FALSE,
1089      -            ds->ds_phys->ds_prev_snap_txg)) {
1090      -                /*
1091      -                 * Ignore errors, if there is not enough disk space
1092      -                 * we will deal with it in dsl_dataset_destroy_sync().
1093      -                 */
1094      -                (void) dmu_free_object(os, obj);
     1089 +        if (!spa_feature_is_enabled(dsl_dataset_get_spa(ds),
     1090 +            &spa_feature_table[SPA_FEATURE_ASYNC_DESTROY])) {
     1091 +                for (obj = 0; err == 0; err = dmu_object_next(os, &obj, FALSE,
     1092 +                    ds->ds_phys->ds_prev_snap_txg)) {
     1093 +                        /*
     1094 +                         * Ignore errors, if there is not enough disk space
     1095 +                         * we will deal with it in dsl_dataset_destroy_sync().
     1096 +                         */
     1097 +                        (void) dmu_free_object(os, obj);
     1098 +                }
     1099 +                if (err != ESRCH)
     1100 +                        goto out;
1095 1101          }
1096      -        if (err != ESRCH)
1097      -                goto out;
1098 1102  
1099 1103          /*
1100 1104           * Only the ZIL knows how to free log blocks.
1101 1105           */
1102 1106          zil_destroy(dmu_objset_zil(os), B_FALSE);
1103 1107  
1104 1108          /*
1105 1109           * Sync out all in-flight IO.
1106 1110           */
1107 1111          txg_wait_synced(dd->dd_pool, 0);
↓ open down ↓ 125 lines elided ↑ open up ↑
1233 1237   */
1234 1238  static void
1235 1239  dsl_dataset_recalc_head_uniq(dsl_dataset_t *ds)
1236 1240  {
1237 1241          uint64_t mrs_used;
1238 1242          uint64_t dlused, dlcomp, dluncomp;
1239 1243  
1240 1244          ASSERT(!dsl_dataset_is_snapshot(ds));
1241 1245  
1242 1246          if (ds->ds_phys->ds_prev_snap_obj != 0)
1243      -                mrs_used = ds->ds_prev->ds_phys->ds_used_bytes;
     1247 +                mrs_used = ds->ds_prev->ds_phys->ds_referenced_bytes;
1244 1248          else
1245 1249                  mrs_used = 0;
1246 1250  
1247 1251          dsl_deadlist_space(&ds->ds_deadlist, &dlused, &dlcomp, &dluncomp);
1248 1252  
1249 1253          ASSERT3U(dlused, <=, mrs_used);
1250 1254          ds->ds_phys->ds_unique_bytes =
1251      -            ds->ds_phys->ds_used_bytes - (mrs_used - dlused);
     1255 +            ds->ds_phys->ds_referenced_bytes - (mrs_used - dlused);
1252 1256  
1253 1257          if (spa_version(ds->ds_dir->dd_pool->dp_spa) >=
1254 1258              SPA_VERSION_UNIQUE_ACCURATE)
1255 1259                  ds->ds_phys->ds_flags |= DS_FLAG_UNIQUE_ACCURATE;
1256 1260  }
1257 1261  
1258 1262  struct killarg {
1259 1263          dsl_dataset_t *ds;
1260 1264          dmu_tx_t *tx;
1261 1265  };
↓ open down ↓ 337 lines elided ↑ open up ↑
1599 1603          /* swap next's deadlist to our deadlist */
1600 1604          dsl_deadlist_close(&ds->ds_deadlist);
1601 1605          dsl_deadlist_close(&ds_next->ds_deadlist);
1602 1606          SWITCH64(ds_next->ds_phys->ds_deadlist_obj,
1603 1607              ds->ds_phys->ds_deadlist_obj);
1604 1608          dsl_deadlist_open(&ds->ds_deadlist, mos, ds->ds_phys->ds_deadlist_obj);
1605 1609          dsl_deadlist_open(&ds_next->ds_deadlist, mos,
1606 1610              ds_next->ds_phys->ds_deadlist_obj);
1607 1611  }
1608 1612  
     1613 +static int
     1614 +old_synchronous_dataset_destroy(dsl_dataset_t *ds, dmu_tx_t *tx)
     1615 +{
     1616 +        int err;
     1617 +        struct killarg ka;
     1618 +
     1619 +        /*
     1620 +         * Free everything that we point to (that's born after
     1621 +         * the previous snapshot, if we are a clone)
     1622 +         *
     1623 +         * NB: this should be very quick, because we already
     1624 +         * freed all the objects in open context.
     1625 +         */
     1626 +        ka.ds = ds;
     1627 +        ka.tx = tx;
     1628 +        err = traverse_dataset(ds,
     1629 +            ds->ds_phys->ds_prev_snap_txg, TRAVERSE_POST,
     1630 +            kill_blkptr, &ka);
     1631 +        ASSERT3U(err, ==, 0);
     1632 +        ASSERT(!DS_UNIQUE_IS_ACCURATE(ds) || ds->ds_phys->ds_unique_bytes == 0);
     1633 +
     1634 +        return (err);
     1635 +}
     1636 +
1609 1637  void
1610 1638  dsl_dataset_destroy_sync(void *arg1, void *tag, dmu_tx_t *tx)
1611 1639  {
1612 1640          struct dsl_ds_destroyarg *dsda = arg1;
1613 1641          dsl_dataset_t *ds = dsda->ds;
1614 1642          int err;
1615 1643          int after_branch_point = FALSE;
1616 1644          dsl_pool_t *dp = ds->ds_dir->dd_pool;
1617 1645          objset_t *mos = dp->dp_meta_objset;
1618 1646          dsl_dataset_t *ds_prev = NULL;
↓ open down ↓ 126 lines elided ↑ open up ↑
1745 1773                              &used, &comp, &uncomp);
1746 1774                          dsl_dir_diduse_space(ds->ds_dir, DD_USED_SNAP,
1747 1775                              -used, -comp, -uncomp, tx);
1748 1776  
1749 1777                          /* Move blocks to be freed to pool's free list. */
1750 1778                          dsl_deadlist_move_bpobj(&ds_next->ds_deadlist,
1751 1779                              &dp->dp_free_bpobj, ds->ds_phys->ds_prev_snap_txg,
1752 1780                              tx);
1753 1781                          dsl_dir_diduse_space(tx->tx_pool->dp_free_dir,
1754 1782                              DD_USED_HEAD, used, comp, uncomp, tx);
1755      -                        dsl_dir_dirty(tx->tx_pool->dp_free_dir, tx);
1756 1783  
1757 1784                          /* Merge our deadlist into next's and free it. */
1758 1785                          dsl_deadlist_merge(&ds_next->ds_deadlist,
1759 1786                              ds->ds_phys->ds_deadlist_obj, tx);
1760 1787                  }
1761 1788                  dsl_deadlist_close(&ds->ds_deadlist);
1762 1789                  dsl_deadlist_free(mos, ds->ds_phys->ds_deadlist_obj, tx);
1763 1790  
1764 1791                  /* Collapse range in clone heads */
1765 1792                  dsl_dataset_remove_clones_key(ds,
↓ open down ↓ 55 lines elided ↑ open up ↑
1821 1848  
1822 1849                                  ASSERT(old_unique <= new_unique);
1823 1850                                  mrsdelta = MIN(new_unique - old_unique,
1824 1851                                      ds_next->ds_reserved - old_unique);
1825 1852                                  dsl_dir_diduse_space(ds->ds_dir,
1826 1853                                      DD_USED_REFRSRV, -mrsdelta, 0, 0, tx);
1827 1854                          }
1828 1855                  }
1829 1856                  dsl_dataset_rele(ds_next, FTAG);
1830 1857          } else {
     1858 +                zfeature_info_t *async_destroy =
     1859 +                    &spa_feature_table[SPA_FEATURE_ASYNC_DESTROY];
     1860 +
1831 1861                  /*
1832 1862                   * There's no next snapshot, so this is a head dataset.
1833 1863                   * Destroy the deadlist.  Unless it's a clone, the
1834 1864                   * deadlist should be empty.  (If it's a clone, it's
1835 1865                   * safe to ignore the deadlist contents.)
1836 1866                   */
1837      -                struct killarg ka;
1838      -
1839 1867                  dsl_deadlist_close(&ds->ds_deadlist);
1840 1868                  dsl_deadlist_free(mos, ds->ds_phys->ds_deadlist_obj, tx);
1841 1869                  ds->ds_phys->ds_deadlist_obj = 0;
1842 1870  
1843      -                /*
1844      -                 * Free everything that we point to (that's born after
1845      -                 * the previous snapshot, if we are a clone)
1846      -                 *
1847      -                 * NB: this should be very quick, because we already
1848      -                 * freed all the objects in open context.
1849      -                 */
1850      -                ka.ds = ds;
1851      -                ka.tx = tx;
1852      -                err = traverse_dataset(ds, ds->ds_phys->ds_prev_snap_txg,
1853      -                    TRAVERSE_POST, kill_blkptr, &ka);
1854      -                ASSERT3U(err, ==, 0);
1855      -                ASSERT(!DS_UNIQUE_IS_ACCURATE(ds) ||
1856      -                    ds->ds_phys->ds_unique_bytes == 0);
     1871 +                if (!spa_feature_is_enabled(dp->dp_spa, async_destroy)) {
     1872 +                        err = old_synchronous_dataset_destroy(ds, tx);
     1873 +                } else {
     1874 +                        /*
     1875 +                         * Move the bptree into the pool's list of trees to
     1876 +                         * clean up and update space accounting information.
     1877 +                         */
     1878 +                        uint64_t used, comp, uncomp;
1857 1879  
     1880 +                        ASSERT(err == 0 || err == EBUSY);
     1881 +                        if (!spa_feature_is_active(dp->dp_spa, async_destroy)) {
     1882 +                                spa_feature_incr(dp->dp_spa, async_destroy, tx);
     1883 +                                dp->dp_bptree_obj = bptree_alloc(
     1884 +                                    dp->dp_meta_objset, tx);
     1885 +                                VERIFY(zap_add(dp->dp_meta_objset,
     1886 +                                    DMU_POOL_DIRECTORY_OBJECT,
     1887 +                                    DMU_POOL_BPTREE_OBJ, sizeof (uint64_t), 1,
     1888 +                                    &dp->dp_bptree_obj, tx) == 0);
     1889 +                        }
     1890 +
     1891 +                        used = ds->ds_dir->dd_phys->dd_used_bytes;
     1892 +                        comp = ds->ds_dir->dd_phys->dd_compressed_bytes;
     1893 +                        uncomp = ds->ds_dir->dd_phys->dd_uncompressed_bytes;
     1894 +
     1895 +                        ASSERT(!DS_UNIQUE_IS_ACCURATE(ds) ||
     1896 +                            ds->ds_phys->ds_unique_bytes == used);
     1897 +
     1898 +                        bptree_add(dp->dp_meta_objset, dp->dp_bptree_obj,
     1899 +                            &ds->ds_phys->ds_bp, ds->ds_phys->ds_prev_snap_txg,
     1900 +                            used, comp, uncomp, tx);
     1901 +                        dsl_dir_diduse_space(ds->ds_dir, DD_USED_HEAD,
     1902 +                            -used, -comp, -uncomp, tx);
     1903 +                        dsl_dir_diduse_space(dp->dp_free_dir, DD_USED_HEAD,
     1904 +                            used, comp, uncomp, tx);
     1905 +                }
     1906 +
1858 1907                  if (ds->ds_prev != NULL) {
1859 1908                          if (spa_version(dp->dp_spa) >= SPA_VERSION_DIR_CLONES) {
1860 1909                                  VERIFY3U(0, ==, zap_remove_int(mos,
1861 1910                                      ds->ds_prev->ds_dir->dd_phys->dd_clones,
1862 1911                                      ds->ds_object, tx));
1863 1912                          }
1864 1913                          dsl_dataset_rele(ds->ds_prev, ds);
1865 1914                          ds->ds_prev = ds_prev = NULL;
1866 1915                  }
1867 1916          }
↓ open down ↓ 169 lines elided ↑ open up ↑
2037 2086          dsphys->ds_fsid_guid = unique_create();
2038 2087          (void) random_get_pseudo_bytes((void*)&dsphys->ds_guid,
2039 2088              sizeof (dsphys->ds_guid));
2040 2089          dsphys->ds_prev_snap_obj = ds->ds_phys->ds_prev_snap_obj;
2041 2090          dsphys->ds_prev_snap_txg = ds->ds_phys->ds_prev_snap_txg;
2042 2091          dsphys->ds_next_snap_obj = ds->ds_object;
2043 2092          dsphys->ds_num_children = 1;
2044 2093          dsphys->ds_creation_time = gethrestime_sec();
2045 2094          dsphys->ds_creation_txg = crtxg;
2046 2095          dsphys->ds_deadlist_obj = ds->ds_phys->ds_deadlist_obj;
2047      -        dsphys->ds_used_bytes = ds->ds_phys->ds_used_bytes;
     2096 +        dsphys->ds_referenced_bytes = ds->ds_phys->ds_referenced_bytes;
2048 2097          dsphys->ds_compressed_bytes = ds->ds_phys->ds_compressed_bytes;
2049 2098          dsphys->ds_uncompressed_bytes = ds->ds_phys->ds_uncompressed_bytes;
2050 2099          dsphys->ds_flags = ds->ds_phys->ds_flags;
2051 2100          dsphys->ds_bp = ds->ds_phys->ds_bp;
2052 2101          dmu_buf_rele(dbuf, FTAG);
2053 2102  
2054 2103          ASSERT3U(ds->ds_prev != 0, ==, ds->ds_phys->ds_prev_snap_obj != 0);
2055 2104          if (ds->ds_prev) {
2056 2105                  uint64_t next_clones_obj =
2057 2106                      ds->ds_prev->ds_phys->ds_next_clones_obj;
↓ open down ↓ 103 lines elided ↑ open up ↑
2161 2210                      &count));
2162 2211          }
2163 2212          if (count != ds->ds_phys->ds_num_children - 1) {
2164 2213                  goto fail;
2165 2214          }
2166 2215          for (zap_cursor_init(&zc, mos, ds->ds_phys->ds_next_clones_obj);
2167 2216              zap_cursor_retrieve(&zc, &za) == 0;
2168 2217              zap_cursor_advance(&zc)) {
2169 2218                  dsl_dataset_t *clone;
2170 2219                  char buf[ZFS_MAXNAMELEN];
     2220 +                /*
     2221 +                 * Even though we hold the dp_config_rwlock, the dataset
     2222 +                 * may fail to open, returning ENOENT.  If there is a
     2223 +                 * thread concurrently attempting to destroy this
     2224 +                 * dataset, it will have the ds_rwlock held for
     2225 +                 * RW_WRITER.  Our call to dsl_dataset_hold_obj() ->
     2226 +                 * dsl_dataset_hold_ref() will fail its
     2227 +                 * rw_tryenter(&ds->ds_rwlock, RW_READER), drop the
     2228 +                 * dp_config_rwlock, and wait for the destroy progress
     2229 +                 * and signal ds_exclusive_cv.  If the destroy was
     2230 +                 * successful, we will see that
     2231 +                 * DSL_DATASET_IS_DESTROYED(), and return ENOENT.
     2232 +                 */
2171 2233                  if (dsl_dataset_hold_obj(ds->ds_dir->dd_pool,
2172      -                    za.za_first_integer, FTAG, &clone) != 0) {
2173      -                        goto fail;
2174      -                }
     2234 +                    za.za_first_integer, FTAG, &clone) != 0)
     2235 +                        continue;
2175 2236                  dsl_dir_name(clone->ds_dir, buf);
2176 2237                  VERIFY(nvlist_add_boolean(val, buf) == 0);
2177 2238                  dsl_dataset_rele(clone, FTAG);
2178 2239          }
2179 2240          zap_cursor_fini(&zc);
2180 2241          VERIFY(nvlist_add_nvlist(propval, ZPROP_VALUE, val) == 0);
2181 2242          VERIFY(nvlist_add_nvlist(nv, zfs_prop_to_name(ZFS_PROP_CLONES),
2182 2243              propval) == 0);
2183 2244  fail:
2184 2245          nvlist_free(val);
↓ open down ↓ 102 lines elided ↑ open up ↑
2287 2348  dsl_dataset_fsid_guid(dsl_dataset_t *ds)
2288 2349  {
2289 2350          return (ds->ds_fsid_guid);
2290 2351  }
2291 2352  
2292 2353  void
2293 2354  dsl_dataset_space(dsl_dataset_t *ds,
2294 2355      uint64_t *refdbytesp, uint64_t *availbytesp,
2295 2356      uint64_t *usedobjsp, uint64_t *availobjsp)
2296 2357  {
2297      -        *refdbytesp = ds->ds_phys->ds_used_bytes;
     2358 +        *refdbytesp = ds->ds_phys->ds_referenced_bytes;
2298 2359          *availbytesp = dsl_dir_space_available(ds->ds_dir, NULL, 0, TRUE);
2299 2360          if (ds->ds_reserved > ds->ds_phys->ds_unique_bytes)
2300 2361                  *availbytesp += ds->ds_reserved - ds->ds_phys->ds_unique_bytes;
2301 2362          if (ds->ds_quota != 0) {
2302 2363                  /*
2303 2364                   * Adjust available bytes according to refquota
2304 2365                   */
2305 2366                  if (*refdbytesp < ds->ds_quota)
2306 2367                          *availbytesp = MIN(*availbytesp,
2307 2368                              ds->ds_quota - *refdbytesp);
↓ open down ↓ 316 lines elided ↑ open up ↑
2624 2685           * (my used) = (prev's used) + (blocks born) - (blocks killed)
2625 2686           * So each snapshot gave birth to:
2626 2687           * (blocks born) = (my used) - (prev's used) + (blocks killed)
2627 2688           * So a sequence would look like:
2628 2689           * (uN - u(N-1) + kN) + ... + (u1 - u0 + k1) + (u0 - 0 + k0)
2629 2690           * Which simplifies to:
2630 2691           * uN + kN + kN-1 + ... + k1 + k0
2631 2692           * Note however, if we stop before we reach the ORIGIN we get:
2632 2693           * uN + kN + kN-1 + ... + kM - uM-1
2633 2694           */
2634      -        pa->used = origin_ds->ds_phys->ds_used_bytes;
     2695 +        pa->used = origin_ds->ds_phys->ds_referenced_bytes;
2635 2696          pa->comp = origin_ds->ds_phys->ds_compressed_bytes;
2636 2697          pa->uncomp = origin_ds->ds_phys->ds_uncompressed_bytes;
2637 2698          for (snap = list_head(&pa->shared_snaps); snap;
2638 2699              snap = list_next(&pa->shared_snaps, snap)) {
2639 2700                  uint64_t val, dlused, dlcomp, dluncomp;
2640 2701                  dsl_dataset_t *ds = snap->ds;
2641 2702  
2642 2703                  /* Check that the snapshot name does not conflict */
2643 2704                  VERIFY(0 == dsl_dataset_get_snapname(ds));
2644 2705                  err = dsl_dataset_snap_lookup(hds, ds->ds_snapname, &val);
↓ open down ↓ 13 lines elided ↑ open up ↑
2658 2719                  pa->used += dlused;
2659 2720                  pa->comp += dlcomp;
2660 2721                  pa->uncomp += dluncomp;
2661 2722          }
2662 2723  
2663 2724          /*
2664 2725           * If we are a clone of a clone then we never reached ORIGIN,
2665 2726           * so we need to subtract out the clone origin's used space.
2666 2727           */
2667 2728          if (pa->origin_origin) {
2668      -                pa->used -= pa->origin_origin->ds_phys->ds_used_bytes;
     2729 +                pa->used -= pa->origin_origin->ds_phys->ds_referenced_bytes;
2669 2730                  pa->comp -= pa->origin_origin->ds_phys->ds_compressed_bytes;
2670 2731                  pa->uncomp -= pa->origin_origin->ds_phys->ds_uncompressed_bytes;
2671 2732          }
2672 2733  
2673 2734          /* Check that there is enough space here */
2674 2735          err = dsl_dir_transfer_possible(origin_ds->ds_dir, hds->ds_dir,
2675 2736              pa->used);
2676 2737          if (err)
2677 2738                  return (err);
2678 2739  
↓ open down ↓ 495 lines elided ↑ open up ↑
3174 3235                  uint64_t odl_used, odl_comp, odl_uncomp;
3175 3236  
3176 3237                  ASSERT3U(csa->cds->ds_dir->dd_phys->
3177 3238                      dd_used_breakdown[DD_USED_SNAP], ==, 0);
3178 3239  
3179 3240                  dsl_deadlist_space(&csa->cds->ds_deadlist,
3180 3241                      &cdl_used, &cdl_comp, &cdl_uncomp);
3181 3242                  dsl_deadlist_space(&csa->ohds->ds_deadlist,
3182 3243                      &odl_used, &odl_comp, &odl_uncomp);
3183 3244  
3184      -                dused = csa->cds->ds_phys->ds_used_bytes + cdl_used -
3185      -                    (csa->ohds->ds_phys->ds_used_bytes + odl_used);
     3245 +                dused = csa->cds->ds_phys->ds_referenced_bytes + cdl_used -
     3246 +                    (csa->ohds->ds_phys->ds_referenced_bytes + odl_used);
3186 3247                  dcomp = csa->cds->ds_phys->ds_compressed_bytes + cdl_comp -
3187 3248                      (csa->ohds->ds_phys->ds_compressed_bytes + odl_comp);
3188 3249                  duncomp = csa->cds->ds_phys->ds_uncompressed_bytes +
3189 3250                      cdl_uncomp -
3190 3251                      (csa->ohds->ds_phys->ds_uncompressed_bytes + odl_uncomp);
3191 3252  
3192 3253                  dsl_dir_diduse_space(csa->ohds->ds_dir, DD_USED_HEAD,
3193 3254                      dused, dcomp, duncomp, tx);
3194 3255                  dsl_dir_diduse_space(csa->cds->ds_dir, DD_USED_HEAD,
3195 3256                      -dused, -dcomp, -duncomp, tx);
↓ open down ↓ 8 lines elided ↑ open up ↑
3204 3265                      csa->ohds->ds_dir->dd_origin_txg, UINT64_MAX,
3205 3266                      &cdl_used, &cdl_comp, &cdl_uncomp);
3206 3267                  dsl_deadlist_space_range(&csa->ohds->ds_deadlist,
3207 3268                      csa->ohds->ds_dir->dd_origin_txg, UINT64_MAX,
3208 3269                      &odl_used, &odl_comp, &odl_uncomp);
3209 3270                  dsl_dir_transfer_space(csa->ohds->ds_dir, cdl_used - odl_used,
3210 3271                      DD_USED_HEAD, DD_USED_SNAP, tx);
3211 3272          }
3212 3273  
3213 3274          /* swap ds_*_bytes */
3214      -        SWITCH64(csa->ohds->ds_phys->ds_used_bytes,
3215      -            csa->cds->ds_phys->ds_used_bytes);
     3275 +        SWITCH64(csa->ohds->ds_phys->ds_referenced_bytes,
     3276 +            csa->cds->ds_phys->ds_referenced_bytes);
3216 3277          SWITCH64(csa->ohds->ds_phys->ds_compressed_bytes,
3217 3278              csa->cds->ds_phys->ds_compressed_bytes);
3218 3279          SWITCH64(csa->ohds->ds_phys->ds_uncompressed_bytes,
3219 3280              csa->cds->ds_phys->ds_uncompressed_bytes);
3220 3281          SWITCH64(csa->ohds->ds_phys->ds_unique_bytes,
3221 3282              csa->cds->ds_phys->ds_unique_bytes);
3222 3283  
3223 3284          /* apply any parent delta for change in unconsumed refreservation */
3224 3285          dsl_dir_diduse_space(csa->ohds->ds_dir, DD_USED_REFRSRV,
3225 3286              csa->unused_refres_delta, 0, 0, tx);
↓ open down ↓ 108 lines elided ↑ open up ↑
3334 3395          if (!check_quota || ds->ds_quota == 0) {
3335 3396                  mutex_exit(&ds->ds_lock);
3336 3397                  return (0);
3337 3398          }
3338 3399          /*
3339 3400           * If they are requesting more space, and our current estimate
3340 3401           * is over quota, they get to try again unless the actual
3341 3402           * on-disk is over quota and there are no pending changes (which
3342 3403           * may free up space for us).
3343 3404           */
3344      -        if (ds->ds_phys->ds_used_bytes + inflight >= ds->ds_quota) {
3345      -                if (inflight > 0 || ds->ds_phys->ds_used_bytes < ds->ds_quota)
     3405 +        if (ds->ds_phys->ds_referenced_bytes + inflight >= ds->ds_quota) {
     3406 +                if (inflight > 0 ||
     3407 +                    ds->ds_phys->ds_referenced_bytes < ds->ds_quota)
3346 3408                          error = ERESTART;
3347 3409                  else
3348 3410                          error = EDQUOT;
3349 3411          }
3350 3412          mutex_exit(&ds->ds_lock);
3351 3413  
3352 3414          return (error);
3353 3415  }
3354 3416  
3355 3417  /* ARGSUSED */
↓ open down ↓ 6 lines elided ↑ open up ↑
3362 3424  
3363 3425          if (spa_version(ds->ds_dir->dd_pool->dp_spa) < SPA_VERSION_REFQUOTA)
3364 3426                  return (ENOTSUP);
3365 3427  
3366 3428          if ((err = dsl_prop_predict_sync(ds->ds_dir, psa)) != 0)
3367 3429                  return (err);
3368 3430  
3369 3431          if (psa->psa_effective_value == 0)
3370 3432                  return (0);
3371 3433  
3372      -        if (psa->psa_effective_value < ds->ds_phys->ds_used_bytes ||
     3434 +        if (psa->psa_effective_value < ds->ds_phys->ds_referenced_bytes ||
3373 3435              psa->psa_effective_value < ds->ds_reserved)
3374 3436                  return (ENOSPC);
3375 3437  
3376 3438          return (0);
3377 3439  }
3378 3440  
3379 3441  extern void dsl_prop_set_sync(void *, void *, dmu_tx_t *);
3380 3442  
3381 3443  void
3382 3444  dsl_dataset_set_quota_sync(void *arg1, void *arg2, dmu_tx_t *tx)
↓ open down ↓ 733 lines elided ↑ open up ↑
4116 4178   */
4117 4179  int
4118 4180  dsl_dataset_space_written(dsl_dataset_t *oldsnap, dsl_dataset_t *new,
4119 4181      uint64_t *usedp, uint64_t *compp, uint64_t *uncompp)
4120 4182  {
4121 4183          int err = 0;
4122 4184          uint64_t snapobj;
4123 4185          dsl_pool_t *dp = new->ds_dir->dd_pool;
4124 4186  
4125 4187          *usedp = 0;
4126      -        *usedp += new->ds_phys->ds_used_bytes;
4127      -        *usedp -= oldsnap->ds_phys->ds_used_bytes;
     4188 +        *usedp += new->ds_phys->ds_referenced_bytes;
     4189 +        *usedp -= oldsnap->ds_phys->ds_referenced_bytes;
4128 4190  
4129 4191          *compp = 0;
4130 4192          *compp += new->ds_phys->ds_compressed_bytes;
4131 4193          *compp -= oldsnap->ds_phys->ds_compressed_bytes;
4132 4194  
4133 4195          *uncompp = 0;
4134 4196          *uncompp += new->ds_phys->ds_uncompressed_bytes;
4135 4197          *uncompp -= oldsnap->ds_phys->ds_uncompressed_bytes;
4136 4198  
4137 4199          rw_enter(&dp->dp_config_rwlock, RW_READER);
4138 4200          snapobj = new->ds_object;
4139 4201          while (snapobj != oldsnap->ds_object) {
4140 4202                  dsl_dataset_t *snap;
4141 4203                  uint64_t used, comp, uncomp;
4142 4204  
4143      -                err = dsl_dataset_hold_obj(dp, snapobj, FTAG, &snap);
4144      -                if (err != 0)
4145      -                        break;
     4205 +                if (snapobj == new->ds_object) {
     4206 +                        snap = new;
     4207 +                } else {
     4208 +                        err = dsl_dataset_hold_obj(dp, snapobj, FTAG, &snap);
     4209 +                        if (err != 0)
     4210 +                                break;
     4211 +                }
4146 4212  
4147 4213                  if (snap->ds_phys->ds_prev_snap_txg ==
4148 4214                      oldsnap->ds_phys->ds_creation_txg) {
4149 4215                          /*
4150 4216                           * The blocks in the deadlist can not be born after
4151 4217                           * ds_prev_snap_txg, so get the whole deadlist space,
4152 4218                           * which is more efficient (especially for old-format
4153 4219                           * deadlists).  Unfortunately the deadlist code
4154 4220                           * doesn't have enough information to make this
4155 4221                           * optimization itself.
↓ open down ↓ 8 lines elided ↑ open up ↑
4164 4230                  *usedp += used;
4165 4231                  *compp += comp;
4166 4232                  *uncompp += uncomp;
4167 4233  
4168 4234                  /*
4169 4235                   * If we get to the beginning of the chain of snapshots
4170 4236                   * (ds_prev_snap_obj == 0) before oldsnap, then oldsnap
4171 4237                   * was not a snapshot of/before new.
4172 4238                   */
4173 4239                  snapobj = snap->ds_phys->ds_prev_snap_obj;
4174      -                dsl_dataset_rele(snap, FTAG);
     4240 +                if (snap != new)
     4241 +                        dsl_dataset_rele(snap, FTAG);
4175 4242                  if (snapobj == 0) {
4176 4243                          err = EINVAL;
4177 4244                          break;
4178 4245                  }
4179 4246  
4180 4247          }
4181 4248          rw_exit(&dp->dp_config_rwlock);
4182 4249          return (err);
4183 4250  }
4184 4251  
↓ open down ↓ 62 lines elided ↑ open up ↑
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX