3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
23 * Copyright (c) 2011 by Delphix. All rights reserved.
24 * Copyright (c) 2012, Joyent, Inc. All rights reserved.
25 */
26
27 #include <sys/dmu_objset.h>
28 #include <sys/dsl_dataset.h>
29 #include <sys/dsl_dir.h>
30 #include <sys/dsl_prop.h>
31 #include <sys/dsl_synctask.h>
32 #include <sys/dmu_traverse.h>
33 #include <sys/dmu_impl.h>
34 #include <sys/dmu_tx.h>
35 #include <sys/arc.h>
36 #include <sys/zio.h>
37 #include <sys/zap.h>
38 #include <sys/unique.h>
39 #include <sys/zfs_context.h>
40 #include <sys/zfs_ioctl.h>
41 #include <sys/spa.h>
42 #include <sys/zfs_znode.h>
43 #include <sys/zfs_onexit.h>
44 #include <sys/zvol.h>
45 #include <sys/dsl_scan.h>
46 #include <sys/dsl_deadlist.h>
47
48 static char *dsl_reaper = "the grim reaper";
49
50 static dsl_checkfunc_t dsl_dataset_destroy_begin_check;
51 static dsl_syncfunc_t dsl_dataset_destroy_begin_sync;
52 static dsl_syncfunc_t dsl_dataset_set_reservation_sync;
53
54 #define SWITCH64(x, y) \
55 { \
56 uint64_t __tmp = (x); \
57 (x) = (y); \
83
84 ASSERT3U(ABS((int64_t)(new_bytes - old_bytes)), <=, ABS(delta));
85 return (new_bytes - old_bytes);
86 }
87
88 void
89 dsl_dataset_block_born(dsl_dataset_t *ds, const blkptr_t *bp, dmu_tx_t *tx)
90 {
91 int used = bp_get_dsize_sync(tx->tx_pool->dp_spa, bp);
92 int compressed = BP_GET_PSIZE(bp);
93 int uncompressed = BP_GET_UCSIZE(bp);
94 int64_t delta;
95
96 dprintf_bp(bp, "ds=%p", ds);
97
98 ASSERT(dmu_tx_is_syncing(tx));
99 /* It could have been compressed away to nothing */
100 if (BP_IS_HOLE(bp))
101 return;
102 ASSERT(BP_GET_TYPE(bp) != DMU_OT_NONE);
103 ASSERT3U(BP_GET_TYPE(bp), <, DMU_OT_NUMTYPES);
104 if (ds == NULL) {
105 /*
106 * Account for the meta-objset space in its placeholder
107 * dsl_dir.
108 */
109 ASSERT3U(compressed, ==, uncompressed); /* it's all metadata */
110 dsl_dir_diduse_space(tx->tx_pool->dp_mos_dir, DD_USED_HEAD,
111 used, compressed, uncompressed, tx);
112 dsl_dir_dirty(tx->tx_pool->dp_mos_dir, tx);
113 return;
114 }
115 dmu_buf_will_dirty(ds->ds_dbuf, tx);
116
117 mutex_enter(&ds->ds_dir->dd_lock);
118 mutex_enter(&ds->ds_lock);
119 delta = parent_delta(ds, used);
120 ds->ds_phys->ds_used_bytes += used;
121 ds->ds_phys->ds_compressed_bytes += compressed;
122 ds->ds_phys->ds_uncompressed_bytes += uncompressed;
123 ds->ds_phys->ds_unique_bytes += used;
124 mutex_exit(&ds->ds_lock);
125 dsl_dir_diduse_space(ds->ds_dir, DD_USED_HEAD, delta,
126 compressed, uncompressed, tx);
127 dsl_dir_transfer_space(ds->ds_dir, used - delta,
128 DD_USED_REFRSRV, DD_USED_HEAD, tx);
129 mutex_exit(&ds->ds_dir->dd_lock);
130 }
131
132 int
133 dsl_dataset_block_kill(dsl_dataset_t *ds, const blkptr_t *bp, dmu_tx_t *tx,
134 boolean_t async)
135 {
136 if (BP_IS_HOLE(bp))
137 return (0);
138
139 ASSERT(dmu_tx_is_syncing(tx));
140 ASSERT(bp->blk_birth <= tx->tx_txg);
194 dsl_deadlist_insert(&ds->ds_deadlist, bp, tx);
195 }
196 ASSERT3U(ds->ds_prev->ds_object, ==,
197 ds->ds_phys->ds_prev_snap_obj);
198 ASSERT(ds->ds_prev->ds_phys->ds_num_children > 0);
199 /* if (bp->blk_birth > prev prev snap txg) prev unique += bs */
200 if (ds->ds_prev->ds_phys->ds_next_snap_obj ==
201 ds->ds_object && bp->blk_birth >
202 ds->ds_prev->ds_phys->ds_prev_snap_txg) {
203 dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx);
204 mutex_enter(&ds->ds_prev->ds_lock);
205 ds->ds_prev->ds_phys->ds_unique_bytes += used;
206 mutex_exit(&ds->ds_prev->ds_lock);
207 }
208 if (bp->blk_birth > ds->ds_dir->dd_origin_txg) {
209 dsl_dir_transfer_space(ds->ds_dir, used,
210 DD_USED_HEAD, DD_USED_SNAP, tx);
211 }
212 }
213 mutex_enter(&ds->ds_lock);
214 ASSERT3U(ds->ds_phys->ds_used_bytes, >=, used);
215 ds->ds_phys->ds_used_bytes -= used;
216 ASSERT3U(ds->ds_phys->ds_compressed_bytes, >=, compressed);
217 ds->ds_phys->ds_compressed_bytes -= compressed;
218 ASSERT3U(ds->ds_phys->ds_uncompressed_bytes, >=, uncompressed);
219 ds->ds_phys->ds_uncompressed_bytes -= uncompressed;
220 mutex_exit(&ds->ds_lock);
221
222 return (used);
223 }
224
225 uint64_t
226 dsl_dataset_prev_snap_txg(dsl_dataset_t *ds)
227 {
228 uint64_t trysnap = 0;
229
230 if (ds == NULL)
231 return (0);
232 /*
233 * The snapshot creation could fail, but that would cause an
234 * incorrect FALSE return, which would only result in an
235 * overestimation of the amount of space that an operation would
801 bzero(dsphys, sizeof (dsl_dataset_phys_t));
802 dsphys->ds_dir_obj = dd->dd_object;
803 dsphys->ds_flags = flags;
804 dsphys->ds_fsid_guid = unique_create();
805 (void) random_get_pseudo_bytes((void*)&dsphys->ds_guid,
806 sizeof (dsphys->ds_guid));
807 dsphys->ds_snapnames_zapobj =
808 zap_create_norm(mos, U8_TEXTPREP_TOUPPER, DMU_OT_DSL_DS_SNAP_MAP,
809 DMU_OT_NONE, 0, tx);
810 dsphys->ds_creation_time = gethrestime_sec();
811 dsphys->ds_creation_txg = tx->tx_txg == TXG_INITIAL ? 1 : tx->tx_txg;
812
813 if (origin == NULL) {
814 dsphys->ds_deadlist_obj = dsl_deadlist_alloc(mos, tx);
815 } else {
816 dsl_dataset_t *ohds;
817
818 dsphys->ds_prev_snap_obj = origin->ds_object;
819 dsphys->ds_prev_snap_txg =
820 origin->ds_phys->ds_creation_txg;
821 dsphys->ds_used_bytes =
822 origin->ds_phys->ds_used_bytes;
823 dsphys->ds_compressed_bytes =
824 origin->ds_phys->ds_compressed_bytes;
825 dsphys->ds_uncompressed_bytes =
826 origin->ds_phys->ds_uncompressed_bytes;
827 dsphys->ds_bp = origin->ds_phys->ds_bp;
828 dsphys->ds_flags |= origin->ds_phys->ds_flags;
829
830 dmu_buf_will_dirty(origin->ds_dbuf, tx);
831 origin->ds_phys->ds_num_children++;
832
833 VERIFY3U(0, ==, dsl_dataset_hold_obj(dp,
834 origin->ds_dir->dd_phys->dd_head_dataset_obj, FTAG, &ohds));
835 dsphys->ds_deadlist_obj = dsl_deadlist_clone(&ohds->ds_deadlist,
836 dsphys->ds_prev_snap_txg, dsphys->ds_prev_snap_obj, tx);
837 dsl_dataset_rele(ohds, FTAG);
838
839 if (spa_version(dp->dp_spa) >= SPA_VERSION_NEXT_CLONES) {
840 if (origin->ds_phys->ds_next_clones_obj == 0) {
841 origin->ds_phys->ds_next_clones_obj =
842 zap_create(mos,
916 dmu_snapshots_destroy_nvl(nvlist_t *snaps, boolean_t defer, char *failed)
917 {
918 int err;
919 dsl_sync_task_t *dst;
920 spa_t *spa;
921 nvpair_t *pair;
922 dsl_sync_task_group_t *dstg;
923
924 pair = nvlist_next_nvpair(snaps, NULL);
925 if (pair == NULL)
926 return (0);
927
928 err = spa_open(nvpair_name(pair), &spa, FTAG);
929 if (err)
930 return (err);
931 dstg = dsl_sync_task_group_create(spa_get_dsl(spa));
932
933 for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL;
934 pair = nvlist_next_nvpair(snaps, pair)) {
935 dsl_dataset_t *ds;
936 int err;
937
938 err = dsl_dataset_own(nvpair_name(pair), B_TRUE, dstg, &ds);
939 if (err == 0) {
940 struct dsl_ds_destroyarg *dsda;
941
942 dsl_dataset_make_exclusive(ds, dstg);
943 dsda = kmem_zalloc(sizeof (struct dsl_ds_destroyarg),
944 KM_SLEEP);
945 dsda->ds = ds;
946 dsda->defer = defer;
947 dsl_sync_task_create(dstg, dsl_dataset_destroy_check,
948 dsl_dataset_destroy_sync, dsda, dstg, 0);
949 } else if (err == ENOENT) {
950 err = 0;
951 } else {
952 (void) strcpy(failed, nvpair_name(pair));
953 break;
954 }
955 }
956
1065 }
1066
1067 dd = ds->ds_dir;
1068 dummy_ds.ds_dir = dd;
1069 dummy_ds.ds_object = ds->ds_object;
1070
1071 /*
1072 * Check for errors and mark this ds as inconsistent, in
1073 * case we crash while freeing the objects.
1074 */
1075 err = dsl_sync_task_do(dd->dd_pool, dsl_dataset_destroy_begin_check,
1076 dsl_dataset_destroy_begin_sync, ds, NULL, 0);
1077 if (err)
1078 goto out;
1079
1080 err = dmu_objset_from_ds(ds, &os);
1081 if (err)
1082 goto out;
1083
1084 /*
1085 * remove the objects in open context, so that we won't
1086 * have too much to do in syncing context.
1087 */
1088 for (obj = 0; err == 0; err = dmu_object_next(os, &obj, FALSE,
1089 ds->ds_phys->ds_prev_snap_txg)) {
1090 /*
1091 * Ignore errors, if there is not enough disk space
1092 * we will deal with it in dsl_dataset_destroy_sync().
1093 */
1094 (void) dmu_free_object(os, obj);
1095 }
1096 if (err != ESRCH)
1097 goto out;
1098
1099 /*
1100 * Only the ZIL knows how to free log blocks.
1101 */
1102 zil_destroy(dmu_objset_zil(os), B_FALSE);
1103
1104 /*
1105 * Sync out all in-flight IO.
1106 */
1107 txg_wait_synced(dd->dd_pool, 0);
1108
1109 /*
1110 * If we managed to free all the objects in open
1111 * context, the user space accounting should be zero.
1112 */
1113 if (ds->ds_phys->ds_bp.blk_fill == 0 &&
1114 dmu_objset_userused_enabled(os)) {
1115 uint64_t count;
1116
1117 ASSERT(zap_count(os, DMU_USERUSED_OBJECT, &count) != 0 ||
1223 }
1224 }
1225
1226 /*
1227 * The unique space in the head dataset can be calculated by subtracting
1228 * the space used in the most recent snapshot, that is still being used
1229 * in this file system, from the space currently in use. To figure out
1230 * the space in the most recent snapshot still in use, we need to take
1231 * the total space used in the snapshot and subtract out the space that
1232 * has been freed up since the snapshot was taken.
1233 */
1234 static void
1235 dsl_dataset_recalc_head_uniq(dsl_dataset_t *ds)
1236 {
1237 uint64_t mrs_used;
1238 uint64_t dlused, dlcomp, dluncomp;
1239
1240 ASSERT(!dsl_dataset_is_snapshot(ds));
1241
1242 if (ds->ds_phys->ds_prev_snap_obj != 0)
1243 mrs_used = ds->ds_prev->ds_phys->ds_used_bytes;
1244 else
1245 mrs_used = 0;
1246
1247 dsl_deadlist_space(&ds->ds_deadlist, &dlused, &dlcomp, &dluncomp);
1248
1249 ASSERT3U(dlused, <=, mrs_used);
1250 ds->ds_phys->ds_unique_bytes =
1251 ds->ds_phys->ds_used_bytes - (mrs_used - dlused);
1252
1253 if (spa_version(ds->ds_dir->dd_pool->dp_spa) >=
1254 SPA_VERSION_UNIQUE_ACCURATE)
1255 ds->ds_phys->ds_flags |= DS_FLAG_UNIQUE_ACCURATE;
1256 }
1257
1258 struct killarg {
1259 dsl_dataset_t *ds;
1260 dmu_tx_t *tx;
1261 };
1262
1263 /* ARGSUSED */
1264 static int
1265 kill_blkptr(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, arc_buf_t *pbuf,
1266 const zbookmark_t *zb, const dnode_phys_t *dnp, void *arg)
1267 {
1268 struct killarg *ka = arg;
1269 dmu_tx_t *tx = ka->tx;
1270
1271 if (bp == NULL)
1589 poa.pio = zio_root(dp->dp_spa, NULL, NULL, ZIO_FLAG_MUSTSUCCEED);
1590 VERIFY3U(0, ==, bpobj_iterate(&ds_next->ds_deadlist.dl_bpobj,
1591 process_old_cb, &poa, tx));
1592 VERIFY3U(zio_wait(poa.pio), ==, 0);
1593 ASSERT3U(poa.used, ==, ds->ds_phys->ds_unique_bytes);
1594
1595 /* change snapused */
1596 dsl_dir_diduse_space(ds->ds_dir, DD_USED_SNAP,
1597 -poa.used, -poa.comp, -poa.uncomp, tx);
1598
1599 /* swap next's deadlist to our deadlist */
1600 dsl_deadlist_close(&ds->ds_deadlist);
1601 dsl_deadlist_close(&ds_next->ds_deadlist);
1602 SWITCH64(ds_next->ds_phys->ds_deadlist_obj,
1603 ds->ds_phys->ds_deadlist_obj);
1604 dsl_deadlist_open(&ds->ds_deadlist, mos, ds->ds_phys->ds_deadlist_obj);
1605 dsl_deadlist_open(&ds_next->ds_deadlist, mos,
1606 ds_next->ds_phys->ds_deadlist_obj);
1607 }
1608
1609 void
1610 dsl_dataset_destroy_sync(void *arg1, void *tag, dmu_tx_t *tx)
1611 {
1612 struct dsl_ds_destroyarg *dsda = arg1;
1613 dsl_dataset_t *ds = dsda->ds;
1614 int err;
1615 int after_branch_point = FALSE;
1616 dsl_pool_t *dp = ds->ds_dir->dd_pool;
1617 objset_t *mos = dp->dp_meta_objset;
1618 dsl_dataset_t *ds_prev = NULL;
1619 boolean_t wont_destroy;
1620 uint64_t obj;
1621
1622 wont_destroy = (dsda->defer &&
1623 (ds->ds_userrefs > 0 || ds->ds_phys->ds_num_children > 1));
1624
1625 ASSERT(ds->ds_owner || wont_destroy);
1626 ASSERT(dsda->defer || ds->ds_phys->ds_num_children <= 1);
1627 ASSERT(ds->ds_prev == NULL ||
1628 ds->ds_prev->ds_phys->ds_next_snap_obj != ds->ds_object);
1735 dsl_deadlist_space_range(&ds_next->ds_deadlist,
1736 ds_prev->ds_phys->ds_prev_snap_txg,
1737 ds->ds_phys->ds_prev_snap_txg,
1738 &used, &comp, &uncomp);
1739 ds_prev->ds_phys->ds_unique_bytes += used;
1740 }
1741
1742 /* Adjust snapused. */
1743 dsl_deadlist_space_range(&ds_next->ds_deadlist,
1744 ds->ds_phys->ds_prev_snap_txg, UINT64_MAX,
1745 &used, &comp, &uncomp);
1746 dsl_dir_diduse_space(ds->ds_dir, DD_USED_SNAP,
1747 -used, -comp, -uncomp, tx);
1748
1749 /* Move blocks to be freed to pool's free list. */
1750 dsl_deadlist_move_bpobj(&ds_next->ds_deadlist,
1751 &dp->dp_free_bpobj, ds->ds_phys->ds_prev_snap_txg,
1752 tx);
1753 dsl_dir_diduse_space(tx->tx_pool->dp_free_dir,
1754 DD_USED_HEAD, used, comp, uncomp, tx);
1755 dsl_dir_dirty(tx->tx_pool->dp_free_dir, tx);
1756
1757 /* Merge our deadlist into next's and free it. */
1758 dsl_deadlist_merge(&ds_next->ds_deadlist,
1759 ds->ds_phys->ds_deadlist_obj, tx);
1760 }
1761 dsl_deadlist_close(&ds->ds_deadlist);
1762 dsl_deadlist_free(mos, ds->ds_phys->ds_deadlist_obj, tx);
1763
1764 /* Collapse range in clone heads */
1765 dsl_dataset_remove_clones_key(ds,
1766 ds->ds_phys->ds_creation_txg, tx);
1767
1768 if (dsl_dataset_is_snapshot(ds_next)) {
1769 dsl_dataset_t *ds_nextnext;
1770
1771 /*
1772 * Update next's unique to include blocks which
1773 * were previously shared by only this snapshot
1774 * and it. Those blocks will be born after the
1775 * prev snap and before this snap, and will have
1811
1812 /*
1813 * Reduce the amount of our unconsmed refreservation
1814 * being charged to our parent by the amount of
1815 * new unique data we have gained.
1816 */
1817 if (old_unique < ds_next->ds_reserved) {
1818 int64_t mrsdelta;
1819 uint64_t new_unique =
1820 ds_next->ds_phys->ds_unique_bytes;
1821
1822 ASSERT(old_unique <= new_unique);
1823 mrsdelta = MIN(new_unique - old_unique,
1824 ds_next->ds_reserved - old_unique);
1825 dsl_dir_diduse_space(ds->ds_dir,
1826 DD_USED_REFRSRV, -mrsdelta, 0, 0, tx);
1827 }
1828 }
1829 dsl_dataset_rele(ds_next, FTAG);
1830 } else {
1831 /*
1832 * There's no next snapshot, so this is a head dataset.
1833 * Destroy the deadlist. Unless it's a clone, the
1834 * deadlist should be empty. (If it's a clone, it's
1835 * safe to ignore the deadlist contents.)
1836 */
1837 struct killarg ka;
1838
1839 dsl_deadlist_close(&ds->ds_deadlist);
1840 dsl_deadlist_free(mos, ds->ds_phys->ds_deadlist_obj, tx);
1841 ds->ds_phys->ds_deadlist_obj = 0;
1842
1843 /*
1844 * Free everything that we point to (that's born after
1845 * the previous snapshot, if we are a clone)
1846 *
1847 * NB: this should be very quick, because we already
1848 * freed all the objects in open context.
1849 */
1850 ka.ds = ds;
1851 ka.tx = tx;
1852 err = traverse_dataset(ds, ds->ds_phys->ds_prev_snap_txg,
1853 TRAVERSE_POST, kill_blkptr, &ka);
1854 ASSERT3U(err, ==, 0);
1855 ASSERT(!DS_UNIQUE_IS_ACCURATE(ds) ||
1856 ds->ds_phys->ds_unique_bytes == 0);
1857
1858 if (ds->ds_prev != NULL) {
1859 if (spa_version(dp->dp_spa) >= SPA_VERSION_DIR_CLONES) {
1860 VERIFY3U(0, ==, zap_remove_int(mos,
1861 ds->ds_prev->ds_dir->dd_phys->dd_clones,
1862 ds->ds_object, tx));
1863 }
1864 dsl_dataset_rele(ds->ds_prev, ds);
1865 ds->ds_prev = ds_prev = NULL;
1866 }
1867 }
1868
1869 /*
1870 * This must be done after the dsl_traverse(), because it will
1871 * re-open the objset.
1872 */
1873 if (ds->ds_objset) {
1874 dmu_objset_evict(ds->ds_objset);
1875 ds->ds_objset = NULL;
1876 }
1877
2027 else
2028 crtxg = tx->tx_txg;
2029
2030 dsobj = dmu_object_alloc(mos, DMU_OT_DSL_DATASET, 0,
2031 DMU_OT_DSL_DATASET, sizeof (dsl_dataset_phys_t), tx);
2032 VERIFY(0 == dmu_bonus_hold(mos, dsobj, FTAG, &dbuf));
2033 dmu_buf_will_dirty(dbuf, tx);
2034 dsphys = dbuf->db_data;
2035 bzero(dsphys, sizeof (dsl_dataset_phys_t));
2036 dsphys->ds_dir_obj = ds->ds_dir->dd_object;
2037 dsphys->ds_fsid_guid = unique_create();
2038 (void) random_get_pseudo_bytes((void*)&dsphys->ds_guid,
2039 sizeof (dsphys->ds_guid));
2040 dsphys->ds_prev_snap_obj = ds->ds_phys->ds_prev_snap_obj;
2041 dsphys->ds_prev_snap_txg = ds->ds_phys->ds_prev_snap_txg;
2042 dsphys->ds_next_snap_obj = ds->ds_object;
2043 dsphys->ds_num_children = 1;
2044 dsphys->ds_creation_time = gethrestime_sec();
2045 dsphys->ds_creation_txg = crtxg;
2046 dsphys->ds_deadlist_obj = ds->ds_phys->ds_deadlist_obj;
2047 dsphys->ds_used_bytes = ds->ds_phys->ds_used_bytes;
2048 dsphys->ds_compressed_bytes = ds->ds_phys->ds_compressed_bytes;
2049 dsphys->ds_uncompressed_bytes = ds->ds_phys->ds_uncompressed_bytes;
2050 dsphys->ds_flags = ds->ds_phys->ds_flags;
2051 dsphys->ds_bp = ds->ds_phys->ds_bp;
2052 dmu_buf_rele(dbuf, FTAG);
2053
2054 ASSERT3U(ds->ds_prev != 0, ==, ds->ds_phys->ds_prev_snap_obj != 0);
2055 if (ds->ds_prev) {
2056 uint64_t next_clones_obj =
2057 ds->ds_prev->ds_phys->ds_next_clones_obj;
2058 ASSERT(ds->ds_prev->ds_phys->ds_next_snap_obj ==
2059 ds->ds_object ||
2060 ds->ds_prev->ds_phys->ds_num_children > 1);
2061 if (ds->ds_prev->ds_phys->ds_next_snap_obj == ds->ds_object) {
2062 dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx);
2063 ASSERT3U(ds->ds_phys->ds_prev_snap_txg, ==,
2064 ds->ds_prev->ds_phys->ds_creation_txg);
2065 ds->ds_prev->ds_phys->ds_next_snap_obj = dsobj;
2066 } else if (next_clones_obj != 0) {
2067 remove_from_next_clones(ds->ds_prev,
2151 VERIFY(nvlist_alloc(&propval, NV_UNIQUE_NAME, KM_SLEEP) == 0);
2152 VERIFY(nvlist_alloc(&val, NV_UNIQUE_NAME, KM_SLEEP) == 0);
2153
2154 /*
2155 * There may me missing entries in ds_next_clones_obj
2156 * due to a bug in a previous version of the code.
2157 * Only trust it if it has the right number of entries.
2158 */
2159 if (ds->ds_phys->ds_next_clones_obj != 0) {
2160 ASSERT3U(0, ==, zap_count(mos, ds->ds_phys->ds_next_clones_obj,
2161 &count));
2162 }
2163 if (count != ds->ds_phys->ds_num_children - 1) {
2164 goto fail;
2165 }
2166 for (zap_cursor_init(&zc, mos, ds->ds_phys->ds_next_clones_obj);
2167 zap_cursor_retrieve(&zc, &za) == 0;
2168 zap_cursor_advance(&zc)) {
2169 dsl_dataset_t *clone;
2170 char buf[ZFS_MAXNAMELEN];
2171 if (dsl_dataset_hold_obj(ds->ds_dir->dd_pool,
2172 za.za_first_integer, FTAG, &clone) != 0) {
2173 goto fail;
2174 }
2175 dsl_dir_name(clone->ds_dir, buf);
2176 VERIFY(nvlist_add_boolean(val, buf) == 0);
2177 dsl_dataset_rele(clone, FTAG);
2178 }
2179 zap_cursor_fini(&zc);
2180 VERIFY(nvlist_add_nvlist(propval, ZPROP_VALUE, val) == 0);
2181 VERIFY(nvlist_add_nvlist(nv, zfs_prop_to_name(ZFS_PROP_CLONES),
2182 propval) == 0);
2183 fail:
2184 nvlist_free(val);
2185 nvlist_free(propval);
2186 rw_exit(&ds->ds_dir->dd_pool->dp_config_rwlock);
2187 }
2188
2189 void
2190 dsl_dataset_stats(dsl_dataset_t *ds, nvlist_t *nv)
2191 {
2192 uint64_t refd, avail, uobjs, aobjs, ratio;
2193
2194 dsl_dir_stats(ds->ds_dir, nv);
2277 ds->ds_dir->dd_phys->dd_origin_obj, FTAG, &ods));
2278 dsl_dataset_name(ods, stat->dds_origin);
2279 dsl_dataset_drop_ref(ods, FTAG);
2280 } else {
2281 stat->dds_origin[0] = '\0';
2282 }
2283 rw_exit(&ds->ds_dir->dd_pool->dp_config_rwlock);
2284 }
2285
2286 uint64_t
2287 dsl_dataset_fsid_guid(dsl_dataset_t *ds)
2288 {
2289 return (ds->ds_fsid_guid);
2290 }
2291
2292 void
2293 dsl_dataset_space(dsl_dataset_t *ds,
2294 uint64_t *refdbytesp, uint64_t *availbytesp,
2295 uint64_t *usedobjsp, uint64_t *availobjsp)
2296 {
2297 *refdbytesp = ds->ds_phys->ds_used_bytes;
2298 *availbytesp = dsl_dir_space_available(ds->ds_dir, NULL, 0, TRUE);
2299 if (ds->ds_reserved > ds->ds_phys->ds_unique_bytes)
2300 *availbytesp += ds->ds_reserved - ds->ds_phys->ds_unique_bytes;
2301 if (ds->ds_quota != 0) {
2302 /*
2303 * Adjust available bytes according to refquota
2304 */
2305 if (*refdbytesp < ds->ds_quota)
2306 *availbytesp = MIN(*availbytesp,
2307 ds->ds_quota - *refdbytesp);
2308 else
2309 *availbytesp = 0;
2310 }
2311 *usedobjsp = ds->ds_phys->ds_bp.blk_fill;
2312 *availobjsp = DN_MAX_OBJECT - *usedobjsp;
2313 }
2314
2315 boolean_t
2316 dsl_dataset_modified_since_lastsnap(dsl_dataset_t *ds)
2317 {
2614 ASSERT3U(snap->ds->ds_phys->ds_prev_snap_obj, ==, origin_ds->ds_object);
2615 dsl_deadlist_space_range(&snap->ds->ds_deadlist,
2616 origin_ds->ds_phys->ds_prev_snap_txg, UINT64_MAX,
2617 &pa->unique, &unused, &unused);
2618
2619 /*
2620 * Walk the snapshots that we are moving
2621 *
2622 * Compute space to transfer. Consider the incremental changes
2623 * to used for each snapshot:
2624 * (my used) = (prev's used) + (blocks born) - (blocks killed)
2625 * So each snapshot gave birth to:
2626 * (blocks born) = (my used) - (prev's used) + (blocks killed)
2627 * So a sequence would look like:
2628 * (uN - u(N-1) + kN) + ... + (u1 - u0 + k1) + (u0 - 0 + k0)
2629 * Which simplifies to:
2630 * uN + kN + kN-1 + ... + k1 + k0
2631 * Note however, if we stop before we reach the ORIGIN we get:
2632 * uN + kN + kN-1 + ... + kM - uM-1
2633 */
2634 pa->used = origin_ds->ds_phys->ds_used_bytes;
2635 pa->comp = origin_ds->ds_phys->ds_compressed_bytes;
2636 pa->uncomp = origin_ds->ds_phys->ds_uncompressed_bytes;
2637 for (snap = list_head(&pa->shared_snaps); snap;
2638 snap = list_next(&pa->shared_snaps, snap)) {
2639 uint64_t val, dlused, dlcomp, dluncomp;
2640 dsl_dataset_t *ds = snap->ds;
2641
2642 /* Check that the snapshot name does not conflict */
2643 VERIFY(0 == dsl_dataset_get_snapname(ds));
2644 err = dsl_dataset_snap_lookup(hds, ds->ds_snapname, &val);
2645 if (err == 0) {
2646 err = EEXIST;
2647 goto out;
2648 }
2649 if (err != ENOENT)
2650 goto out;
2651
2652 /* The very first snapshot does not have a deadlist */
2653 if (ds->ds_phys->ds_prev_snap_obj == 0)
2654 continue;
2655
2656 dsl_deadlist_space(&ds->ds_deadlist,
2657 &dlused, &dlcomp, &dluncomp);
2658 pa->used += dlused;
2659 pa->comp += dlcomp;
2660 pa->uncomp += dluncomp;
2661 }
2662
2663 /*
2664 * If we are a clone of a clone then we never reached ORIGIN,
2665 * so we need to subtract out the clone origin's used space.
2666 */
2667 if (pa->origin_origin) {
2668 pa->used -= pa->origin_origin->ds_phys->ds_used_bytes;
2669 pa->comp -= pa->origin_origin->ds_phys->ds_compressed_bytes;
2670 pa->uncomp -= pa->origin_origin->ds_phys->ds_uncompressed_bytes;
2671 }
2672
2673 /* Check that there is enough space here */
2674 err = dsl_dir_transfer_possible(origin_ds->ds_dir, hds->ds_dir,
2675 pa->used);
2676 if (err)
2677 return (err);
2678
2679 /*
2680 * Compute the amounts of space that will be used by snapshots
2681 * after the promotion (for both origin and clone). For each,
2682 * it is the amount of space that will be on all of their
2683 * deadlists (that was not born before their new origin).
2684 */
2685 if (hds->ds_dir->dd_phys->dd_flags & DD_FLAG_USED_BREAKDOWN) {
2686 uint64_t space;
2687
2688 /*
3164 blkptr_t tmp;
3165 tmp = csa->ohds->ds_phys->ds_bp;
3166 csa->ohds->ds_phys->ds_bp = csa->cds->ds_phys->ds_bp;
3167 csa->cds->ds_phys->ds_bp = tmp;
3168 }
3169
3170 /* set dd_*_bytes */
3171 {
3172 int64_t dused, dcomp, duncomp;
3173 uint64_t cdl_used, cdl_comp, cdl_uncomp;
3174 uint64_t odl_used, odl_comp, odl_uncomp;
3175
3176 ASSERT3U(csa->cds->ds_dir->dd_phys->
3177 dd_used_breakdown[DD_USED_SNAP], ==, 0);
3178
3179 dsl_deadlist_space(&csa->cds->ds_deadlist,
3180 &cdl_used, &cdl_comp, &cdl_uncomp);
3181 dsl_deadlist_space(&csa->ohds->ds_deadlist,
3182 &odl_used, &odl_comp, &odl_uncomp);
3183
3184 dused = csa->cds->ds_phys->ds_used_bytes + cdl_used -
3185 (csa->ohds->ds_phys->ds_used_bytes + odl_used);
3186 dcomp = csa->cds->ds_phys->ds_compressed_bytes + cdl_comp -
3187 (csa->ohds->ds_phys->ds_compressed_bytes + odl_comp);
3188 duncomp = csa->cds->ds_phys->ds_uncompressed_bytes +
3189 cdl_uncomp -
3190 (csa->ohds->ds_phys->ds_uncompressed_bytes + odl_uncomp);
3191
3192 dsl_dir_diduse_space(csa->ohds->ds_dir, DD_USED_HEAD,
3193 dused, dcomp, duncomp, tx);
3194 dsl_dir_diduse_space(csa->cds->ds_dir, DD_USED_HEAD,
3195 -dused, -dcomp, -duncomp, tx);
3196
3197 /*
3198 * The difference in the space used by snapshots is the
3199 * difference in snapshot space due to the head's
3200 * deadlist (since that's the only thing that's
3201 * changing that affects the snapused).
3202 */
3203 dsl_deadlist_space_range(&csa->cds->ds_deadlist,
3204 csa->ohds->ds_dir->dd_origin_txg, UINT64_MAX,
3205 &cdl_used, &cdl_comp, &cdl_uncomp);
3206 dsl_deadlist_space_range(&csa->ohds->ds_deadlist,
3207 csa->ohds->ds_dir->dd_origin_txg, UINT64_MAX,
3208 &odl_used, &odl_comp, &odl_uncomp);
3209 dsl_dir_transfer_space(csa->ohds->ds_dir, cdl_used - odl_used,
3210 DD_USED_HEAD, DD_USED_SNAP, tx);
3211 }
3212
3213 /* swap ds_*_bytes */
3214 SWITCH64(csa->ohds->ds_phys->ds_used_bytes,
3215 csa->cds->ds_phys->ds_used_bytes);
3216 SWITCH64(csa->ohds->ds_phys->ds_compressed_bytes,
3217 csa->cds->ds_phys->ds_compressed_bytes);
3218 SWITCH64(csa->ohds->ds_phys->ds_uncompressed_bytes,
3219 csa->cds->ds_phys->ds_uncompressed_bytes);
3220 SWITCH64(csa->ohds->ds_phys->ds_unique_bytes,
3221 csa->cds->ds_phys->ds_unique_bytes);
3222
3223 /* apply any parent delta for change in unconsumed refreservation */
3224 dsl_dir_diduse_space(csa->ohds->ds_dir, DD_USED_REFRSRV,
3225 csa->unused_refres_delta, 0, 0, tx);
3226
3227 /*
3228 * Swap deadlists.
3229 */
3230 dsl_deadlist_close(&csa->cds->ds_deadlist);
3231 dsl_deadlist_close(&csa->ohds->ds_deadlist);
3232 SWITCH64(csa->ohds->ds_phys->ds_deadlist_obj,
3233 csa->cds->ds_phys->ds_deadlist_obj);
3234 dsl_deadlist_open(&csa->cds->ds_deadlist, dp->dp_meta_objset,
3235 csa->cds->ds_phys->ds_deadlist_obj);
3324 * Make a space adjustment for reserved bytes.
3325 */
3326 if (ds->ds_reserved > ds->ds_phys->ds_unique_bytes) {
3327 ASSERT3U(*used, >=,
3328 ds->ds_reserved - ds->ds_phys->ds_unique_bytes);
3329 *used -= (ds->ds_reserved - ds->ds_phys->ds_unique_bytes);
3330 *ref_rsrv =
3331 asize - MIN(asize, parent_delta(ds, asize + inflight));
3332 }
3333
3334 if (!check_quota || ds->ds_quota == 0) {
3335 mutex_exit(&ds->ds_lock);
3336 return (0);
3337 }
3338 /*
3339 * If they are requesting more space, and our current estimate
3340 * is over quota, they get to try again unless the actual
3341 * on-disk is over quota and there are no pending changes (which
3342 * may free up space for us).
3343 */
3344 if (ds->ds_phys->ds_used_bytes + inflight >= ds->ds_quota) {
3345 if (inflight > 0 || ds->ds_phys->ds_used_bytes < ds->ds_quota)
3346 error = ERESTART;
3347 else
3348 error = EDQUOT;
3349 }
3350 mutex_exit(&ds->ds_lock);
3351
3352 return (error);
3353 }
3354
3355 /* ARGSUSED */
3356 static int
3357 dsl_dataset_set_quota_check(void *arg1, void *arg2, dmu_tx_t *tx)
3358 {
3359 dsl_dataset_t *ds = arg1;
3360 dsl_prop_setarg_t *psa = arg2;
3361 int err;
3362
3363 if (spa_version(ds->ds_dir->dd_pool->dp_spa) < SPA_VERSION_REFQUOTA)
3364 return (ENOTSUP);
3365
3366 if ((err = dsl_prop_predict_sync(ds->ds_dir, psa)) != 0)
3367 return (err);
3368
3369 if (psa->psa_effective_value == 0)
3370 return (0);
3371
3372 if (psa->psa_effective_value < ds->ds_phys->ds_used_bytes ||
3373 psa->psa_effective_value < ds->ds_reserved)
3374 return (ENOSPC);
3375
3376 return (0);
3377 }
3378
3379 extern void dsl_prop_set_sync(void *, void *, dmu_tx_t *);
3380
3381 void
3382 dsl_dataset_set_quota_sync(void *arg1, void *arg2, dmu_tx_t *tx)
3383 {
3384 dsl_dataset_t *ds = arg1;
3385 dsl_prop_setarg_t *psa = arg2;
3386 uint64_t effective_value = psa->psa_effective_value;
3387
3388 dsl_prop_set_sync(ds, psa, tx);
3389 DSL_PROP_CHECK_PREDICTION(ds->ds_dir, psa);
3390
3391 if (ds->ds_quota != effective_value) {
3392 dmu_buf_will_dirty(ds->ds_dbuf, tx);
4106 * The written space is calculated by considering two components: First, we
4107 * ignore any freed space, and calculate the written as new's used space
4108 * minus old's used space. Next, we add in the amount of space that was freed
4109 * between the two snapshots, thus reducing new's used space relative to old's.
4110 * Specifically, this is the space that was born before old->ds_creation_txg,
4111 * and freed before new (ie. on new's deadlist or a previous deadlist).
4112 *
4113 * space freed [---------------------]
4114 * snapshots ---O-------O--------O-------O------
4115 * oldsnap new
4116 */
4117 int
4118 dsl_dataset_space_written(dsl_dataset_t *oldsnap, dsl_dataset_t *new,
4119 uint64_t *usedp, uint64_t *compp, uint64_t *uncompp)
4120 {
4121 int err = 0;
4122 uint64_t snapobj;
4123 dsl_pool_t *dp = new->ds_dir->dd_pool;
4124
4125 *usedp = 0;
4126 *usedp += new->ds_phys->ds_used_bytes;
4127 *usedp -= oldsnap->ds_phys->ds_used_bytes;
4128
4129 *compp = 0;
4130 *compp += new->ds_phys->ds_compressed_bytes;
4131 *compp -= oldsnap->ds_phys->ds_compressed_bytes;
4132
4133 *uncompp = 0;
4134 *uncompp += new->ds_phys->ds_uncompressed_bytes;
4135 *uncompp -= oldsnap->ds_phys->ds_uncompressed_bytes;
4136
4137 rw_enter(&dp->dp_config_rwlock, RW_READER);
4138 snapobj = new->ds_object;
4139 while (snapobj != oldsnap->ds_object) {
4140 dsl_dataset_t *snap;
4141 uint64_t used, comp, uncomp;
4142
4143 err = dsl_dataset_hold_obj(dp, snapobj, FTAG, &snap);
4144 if (err != 0)
4145 break;
4146
4147 if (snap->ds_phys->ds_prev_snap_txg ==
4148 oldsnap->ds_phys->ds_creation_txg) {
4149 /*
4150 * The blocks in the deadlist can not be born after
4151 * ds_prev_snap_txg, so get the whole deadlist space,
4152 * which is more efficient (especially for old-format
4153 * deadlists). Unfortunately the deadlist code
4154 * doesn't have enough information to make this
4155 * optimization itself.
4156 */
4157 dsl_deadlist_space(&snap->ds_deadlist,
4158 &used, &comp, &uncomp);
4159 } else {
4160 dsl_deadlist_space_range(&snap->ds_deadlist,
4161 0, oldsnap->ds_phys->ds_creation_txg,
4162 &used, &comp, &uncomp);
4163 }
4164 *usedp += used;
4165 *compp += comp;
4166 *uncompp += uncomp;
4167
4168 /*
4169 * If we get to the beginning of the chain of snapshots
4170 * (ds_prev_snap_obj == 0) before oldsnap, then oldsnap
4171 * was not a snapshot of/before new.
4172 */
4173 snapobj = snap->ds_phys->ds_prev_snap_obj;
4174 dsl_dataset_rele(snap, FTAG);
4175 if (snapobj == 0) {
4176 err = EINVAL;
4177 break;
4178 }
4179
4180 }
4181 rw_exit(&dp->dp_config_rwlock);
4182 return (err);
4183 }
4184
4185 /*
4186 * Return (in *usedp) the amount of space that will be reclaimed if firstsnap,
4187 * lastsnap, and all snapshots in between are deleted.
4188 *
4189 * blocks that would be freed [---------------------------]
4190 * snapshots ---O-------O--------O-------O--------O
4191 * firstsnap lastsnap
4192 *
4193 * This is the set of blocks that were born after the snap before firstsnap,
|
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
23 * Copyright (c) 2012 by Delphix. All rights reserved.
24 * Copyright (c) 2012, Joyent, Inc. All rights reserved.
25 */
26
27 #include <sys/dmu_objset.h>
28 #include <sys/dsl_dataset.h>
29 #include <sys/dsl_dir.h>
30 #include <sys/dsl_prop.h>
31 #include <sys/dsl_synctask.h>
32 #include <sys/dmu_traverse.h>
33 #include <sys/dmu_impl.h>
34 #include <sys/dmu_tx.h>
35 #include <sys/arc.h>
36 #include <sys/zio.h>
37 #include <sys/zap.h>
38 #include <sys/zfeature.h>
39 #include <sys/unique.h>
40 #include <sys/zfs_context.h>
41 #include <sys/zfs_ioctl.h>
42 #include <sys/spa.h>
43 #include <sys/zfs_znode.h>
44 #include <sys/zfs_onexit.h>
45 #include <sys/zvol.h>
46 #include <sys/dsl_scan.h>
47 #include <sys/dsl_deadlist.h>
48
49 static char *dsl_reaper = "the grim reaper";
50
51 static dsl_checkfunc_t dsl_dataset_destroy_begin_check;
52 static dsl_syncfunc_t dsl_dataset_destroy_begin_sync;
53 static dsl_syncfunc_t dsl_dataset_set_reservation_sync;
54
55 #define SWITCH64(x, y) \
56 { \
57 uint64_t __tmp = (x); \
58 (x) = (y); \
84
85 ASSERT3U(ABS((int64_t)(new_bytes - old_bytes)), <=, ABS(delta));
86 return (new_bytes - old_bytes);
87 }
88
89 void
90 dsl_dataset_block_born(dsl_dataset_t *ds, const blkptr_t *bp, dmu_tx_t *tx)
91 {
92 int used = bp_get_dsize_sync(tx->tx_pool->dp_spa, bp);
93 int compressed = BP_GET_PSIZE(bp);
94 int uncompressed = BP_GET_UCSIZE(bp);
95 int64_t delta;
96
97 dprintf_bp(bp, "ds=%p", ds);
98
99 ASSERT(dmu_tx_is_syncing(tx));
100 /* It could have been compressed away to nothing */
101 if (BP_IS_HOLE(bp))
102 return;
103 ASSERT(BP_GET_TYPE(bp) != DMU_OT_NONE);
104 ASSERT(DMU_OT_IS_VALID(BP_GET_TYPE(bp)));
105 if (ds == NULL) {
106 /*
107 * Account for the meta-objset space in its placeholder
108 * dsl_dir.
109 */
110 ASSERT3U(compressed, ==, uncompressed); /* it's all metadata */
111 dsl_dir_diduse_space(tx->tx_pool->dp_mos_dir, DD_USED_HEAD,
112 used, compressed, uncompressed, tx);
113 dsl_dir_dirty(tx->tx_pool->dp_mos_dir, tx);
114 return;
115 }
116 dmu_buf_will_dirty(ds->ds_dbuf, tx);
117
118 mutex_enter(&ds->ds_dir->dd_lock);
119 mutex_enter(&ds->ds_lock);
120 delta = parent_delta(ds, used);
121 ds->ds_phys->ds_referenced_bytes += used;
122 ds->ds_phys->ds_compressed_bytes += compressed;
123 ds->ds_phys->ds_uncompressed_bytes += uncompressed;
124 ds->ds_phys->ds_unique_bytes += used;
125 mutex_exit(&ds->ds_lock);
126 dsl_dir_diduse_space(ds->ds_dir, DD_USED_HEAD, delta,
127 compressed, uncompressed, tx);
128 dsl_dir_transfer_space(ds->ds_dir, used - delta,
129 DD_USED_REFRSRV, DD_USED_HEAD, tx);
130 mutex_exit(&ds->ds_dir->dd_lock);
131 }
132
133 int
134 dsl_dataset_block_kill(dsl_dataset_t *ds, const blkptr_t *bp, dmu_tx_t *tx,
135 boolean_t async)
136 {
137 if (BP_IS_HOLE(bp))
138 return (0);
139
140 ASSERT(dmu_tx_is_syncing(tx));
141 ASSERT(bp->blk_birth <= tx->tx_txg);
195 dsl_deadlist_insert(&ds->ds_deadlist, bp, tx);
196 }
197 ASSERT3U(ds->ds_prev->ds_object, ==,
198 ds->ds_phys->ds_prev_snap_obj);
199 ASSERT(ds->ds_prev->ds_phys->ds_num_children > 0);
200 /* if (bp->blk_birth > prev prev snap txg) prev unique += bs */
201 if (ds->ds_prev->ds_phys->ds_next_snap_obj ==
202 ds->ds_object && bp->blk_birth >
203 ds->ds_prev->ds_phys->ds_prev_snap_txg) {
204 dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx);
205 mutex_enter(&ds->ds_prev->ds_lock);
206 ds->ds_prev->ds_phys->ds_unique_bytes += used;
207 mutex_exit(&ds->ds_prev->ds_lock);
208 }
209 if (bp->blk_birth > ds->ds_dir->dd_origin_txg) {
210 dsl_dir_transfer_space(ds->ds_dir, used,
211 DD_USED_HEAD, DD_USED_SNAP, tx);
212 }
213 }
214 mutex_enter(&ds->ds_lock);
215 ASSERT3U(ds->ds_phys->ds_referenced_bytes, >=, used);
216 ds->ds_phys->ds_referenced_bytes -= used;
217 ASSERT3U(ds->ds_phys->ds_compressed_bytes, >=, compressed);
218 ds->ds_phys->ds_compressed_bytes -= compressed;
219 ASSERT3U(ds->ds_phys->ds_uncompressed_bytes, >=, uncompressed);
220 ds->ds_phys->ds_uncompressed_bytes -= uncompressed;
221 mutex_exit(&ds->ds_lock);
222
223 return (used);
224 }
225
226 uint64_t
227 dsl_dataset_prev_snap_txg(dsl_dataset_t *ds)
228 {
229 uint64_t trysnap = 0;
230
231 if (ds == NULL)
232 return (0);
233 /*
234 * The snapshot creation could fail, but that would cause an
235 * incorrect FALSE return, which would only result in an
236 * overestimation of the amount of space that an operation would
802 bzero(dsphys, sizeof (dsl_dataset_phys_t));
803 dsphys->ds_dir_obj = dd->dd_object;
804 dsphys->ds_flags = flags;
805 dsphys->ds_fsid_guid = unique_create();
806 (void) random_get_pseudo_bytes((void*)&dsphys->ds_guid,
807 sizeof (dsphys->ds_guid));
808 dsphys->ds_snapnames_zapobj =
809 zap_create_norm(mos, U8_TEXTPREP_TOUPPER, DMU_OT_DSL_DS_SNAP_MAP,
810 DMU_OT_NONE, 0, tx);
811 dsphys->ds_creation_time = gethrestime_sec();
812 dsphys->ds_creation_txg = tx->tx_txg == TXG_INITIAL ? 1 : tx->tx_txg;
813
814 if (origin == NULL) {
815 dsphys->ds_deadlist_obj = dsl_deadlist_alloc(mos, tx);
816 } else {
817 dsl_dataset_t *ohds;
818
819 dsphys->ds_prev_snap_obj = origin->ds_object;
820 dsphys->ds_prev_snap_txg =
821 origin->ds_phys->ds_creation_txg;
822 dsphys->ds_referenced_bytes =
823 origin->ds_phys->ds_referenced_bytes;
824 dsphys->ds_compressed_bytes =
825 origin->ds_phys->ds_compressed_bytes;
826 dsphys->ds_uncompressed_bytes =
827 origin->ds_phys->ds_uncompressed_bytes;
828 dsphys->ds_bp = origin->ds_phys->ds_bp;
829 dsphys->ds_flags |= origin->ds_phys->ds_flags;
830
831 dmu_buf_will_dirty(origin->ds_dbuf, tx);
832 origin->ds_phys->ds_num_children++;
833
834 VERIFY3U(0, ==, dsl_dataset_hold_obj(dp,
835 origin->ds_dir->dd_phys->dd_head_dataset_obj, FTAG, &ohds));
836 dsphys->ds_deadlist_obj = dsl_deadlist_clone(&ohds->ds_deadlist,
837 dsphys->ds_prev_snap_txg, dsphys->ds_prev_snap_obj, tx);
838 dsl_dataset_rele(ohds, FTAG);
839
840 if (spa_version(dp->dp_spa) >= SPA_VERSION_NEXT_CLONES) {
841 if (origin->ds_phys->ds_next_clones_obj == 0) {
842 origin->ds_phys->ds_next_clones_obj =
843 zap_create(mos,
917 dmu_snapshots_destroy_nvl(nvlist_t *snaps, boolean_t defer, char *failed)
918 {
919 int err;
920 dsl_sync_task_t *dst;
921 spa_t *spa;
922 nvpair_t *pair;
923 dsl_sync_task_group_t *dstg;
924
925 pair = nvlist_next_nvpair(snaps, NULL);
926 if (pair == NULL)
927 return (0);
928
929 err = spa_open(nvpair_name(pair), &spa, FTAG);
930 if (err)
931 return (err);
932 dstg = dsl_sync_task_group_create(spa_get_dsl(spa));
933
934 for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL;
935 pair = nvlist_next_nvpair(snaps, pair)) {
936 dsl_dataset_t *ds;
937
938 err = dsl_dataset_own(nvpair_name(pair), B_TRUE, dstg, &ds);
939 if (err == 0) {
940 struct dsl_ds_destroyarg *dsda;
941
942 dsl_dataset_make_exclusive(ds, dstg);
943 dsda = kmem_zalloc(sizeof (struct dsl_ds_destroyarg),
944 KM_SLEEP);
945 dsda->ds = ds;
946 dsda->defer = defer;
947 dsl_sync_task_create(dstg, dsl_dataset_destroy_check,
948 dsl_dataset_destroy_sync, dsda, dstg, 0);
949 } else if (err == ENOENT) {
950 err = 0;
951 } else {
952 (void) strcpy(failed, nvpair_name(pair));
953 break;
954 }
955 }
956
1065 }
1066
1067 dd = ds->ds_dir;
1068 dummy_ds.ds_dir = dd;
1069 dummy_ds.ds_object = ds->ds_object;
1070
1071 /*
1072 * Check for errors and mark this ds as inconsistent, in
1073 * case we crash while freeing the objects.
1074 */
1075 err = dsl_sync_task_do(dd->dd_pool, dsl_dataset_destroy_begin_check,
1076 dsl_dataset_destroy_begin_sync, ds, NULL, 0);
1077 if (err)
1078 goto out;
1079
1080 err = dmu_objset_from_ds(ds, &os);
1081 if (err)
1082 goto out;
1083
1084 /*
1085 * If async destruction is not enabled try to remove all objects
1086 * while in the open context so that there is less work to do in
1087 * the syncing context.
1088 */
1089 if (!spa_feature_is_enabled(dsl_dataset_get_spa(ds),
1090 &spa_feature_table[SPA_FEATURE_ASYNC_DESTROY])) {
1091 for (obj = 0; err == 0; err = dmu_object_next(os, &obj, FALSE,
1092 ds->ds_phys->ds_prev_snap_txg)) {
1093 /*
1094 * Ignore errors, if there is not enough disk space
1095 * we will deal with it in dsl_dataset_destroy_sync().
1096 */
1097 (void) dmu_free_object(os, obj);
1098 }
1099 if (err != ESRCH)
1100 goto out;
1101 }
1102
1103 /*
1104 * Only the ZIL knows how to free log blocks.
1105 */
1106 zil_destroy(dmu_objset_zil(os), B_FALSE);
1107
1108 /*
1109 * Sync out all in-flight IO.
1110 */
1111 txg_wait_synced(dd->dd_pool, 0);
1112
1113 /*
1114 * If we managed to free all the objects in open
1115 * context, the user space accounting should be zero.
1116 */
1117 if (ds->ds_phys->ds_bp.blk_fill == 0 &&
1118 dmu_objset_userused_enabled(os)) {
1119 uint64_t count;
1120
1121 ASSERT(zap_count(os, DMU_USERUSED_OBJECT, &count) != 0 ||
1227 }
1228 }
1229
1230 /*
1231 * The unique space in the head dataset can be calculated by subtracting
1232 * the space used in the most recent snapshot, that is still being used
1233 * in this file system, from the space currently in use. To figure out
1234 * the space in the most recent snapshot still in use, we need to take
1235 * the total space used in the snapshot and subtract out the space that
1236 * has been freed up since the snapshot was taken.
1237 */
1238 static void
1239 dsl_dataset_recalc_head_uniq(dsl_dataset_t *ds)
1240 {
1241 uint64_t mrs_used;
1242 uint64_t dlused, dlcomp, dluncomp;
1243
1244 ASSERT(!dsl_dataset_is_snapshot(ds));
1245
1246 if (ds->ds_phys->ds_prev_snap_obj != 0)
1247 mrs_used = ds->ds_prev->ds_phys->ds_referenced_bytes;
1248 else
1249 mrs_used = 0;
1250
1251 dsl_deadlist_space(&ds->ds_deadlist, &dlused, &dlcomp, &dluncomp);
1252
1253 ASSERT3U(dlused, <=, mrs_used);
1254 ds->ds_phys->ds_unique_bytes =
1255 ds->ds_phys->ds_referenced_bytes - (mrs_used - dlused);
1256
1257 if (spa_version(ds->ds_dir->dd_pool->dp_spa) >=
1258 SPA_VERSION_UNIQUE_ACCURATE)
1259 ds->ds_phys->ds_flags |= DS_FLAG_UNIQUE_ACCURATE;
1260 }
1261
1262 struct killarg {
1263 dsl_dataset_t *ds;
1264 dmu_tx_t *tx;
1265 };
1266
1267 /* ARGSUSED */
1268 static int
1269 kill_blkptr(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, arc_buf_t *pbuf,
1270 const zbookmark_t *zb, const dnode_phys_t *dnp, void *arg)
1271 {
1272 struct killarg *ka = arg;
1273 dmu_tx_t *tx = ka->tx;
1274
1275 if (bp == NULL)
1593 poa.pio = zio_root(dp->dp_spa, NULL, NULL, ZIO_FLAG_MUSTSUCCEED);
1594 VERIFY3U(0, ==, bpobj_iterate(&ds_next->ds_deadlist.dl_bpobj,
1595 process_old_cb, &poa, tx));
1596 VERIFY3U(zio_wait(poa.pio), ==, 0);
1597 ASSERT3U(poa.used, ==, ds->ds_phys->ds_unique_bytes);
1598
1599 /* change snapused */
1600 dsl_dir_diduse_space(ds->ds_dir, DD_USED_SNAP,
1601 -poa.used, -poa.comp, -poa.uncomp, tx);
1602
1603 /* swap next's deadlist to our deadlist */
1604 dsl_deadlist_close(&ds->ds_deadlist);
1605 dsl_deadlist_close(&ds_next->ds_deadlist);
1606 SWITCH64(ds_next->ds_phys->ds_deadlist_obj,
1607 ds->ds_phys->ds_deadlist_obj);
1608 dsl_deadlist_open(&ds->ds_deadlist, mos, ds->ds_phys->ds_deadlist_obj);
1609 dsl_deadlist_open(&ds_next->ds_deadlist, mos,
1610 ds_next->ds_phys->ds_deadlist_obj);
1611 }
1612
1613 static int
1614 old_synchronous_dataset_destroy(dsl_dataset_t *ds, dmu_tx_t *tx)
1615 {
1616 int err;
1617 struct killarg ka;
1618
1619 /*
1620 * Free everything that we point to (that's born after
1621 * the previous snapshot, if we are a clone)
1622 *
1623 * NB: this should be very quick, because we already
1624 * freed all the objects in open context.
1625 */
1626 ka.ds = ds;
1627 ka.tx = tx;
1628 err = traverse_dataset(ds,
1629 ds->ds_phys->ds_prev_snap_txg, TRAVERSE_POST,
1630 kill_blkptr, &ka);
1631 ASSERT3U(err, ==, 0);
1632 ASSERT(!DS_UNIQUE_IS_ACCURATE(ds) || ds->ds_phys->ds_unique_bytes == 0);
1633
1634 return (err);
1635 }
1636
1637 void
1638 dsl_dataset_destroy_sync(void *arg1, void *tag, dmu_tx_t *tx)
1639 {
1640 struct dsl_ds_destroyarg *dsda = arg1;
1641 dsl_dataset_t *ds = dsda->ds;
1642 int err;
1643 int after_branch_point = FALSE;
1644 dsl_pool_t *dp = ds->ds_dir->dd_pool;
1645 objset_t *mos = dp->dp_meta_objset;
1646 dsl_dataset_t *ds_prev = NULL;
1647 boolean_t wont_destroy;
1648 uint64_t obj;
1649
1650 wont_destroy = (dsda->defer &&
1651 (ds->ds_userrefs > 0 || ds->ds_phys->ds_num_children > 1));
1652
1653 ASSERT(ds->ds_owner || wont_destroy);
1654 ASSERT(dsda->defer || ds->ds_phys->ds_num_children <= 1);
1655 ASSERT(ds->ds_prev == NULL ||
1656 ds->ds_prev->ds_phys->ds_next_snap_obj != ds->ds_object);
1763 dsl_deadlist_space_range(&ds_next->ds_deadlist,
1764 ds_prev->ds_phys->ds_prev_snap_txg,
1765 ds->ds_phys->ds_prev_snap_txg,
1766 &used, &comp, &uncomp);
1767 ds_prev->ds_phys->ds_unique_bytes += used;
1768 }
1769
1770 /* Adjust snapused. */
1771 dsl_deadlist_space_range(&ds_next->ds_deadlist,
1772 ds->ds_phys->ds_prev_snap_txg, UINT64_MAX,
1773 &used, &comp, &uncomp);
1774 dsl_dir_diduse_space(ds->ds_dir, DD_USED_SNAP,
1775 -used, -comp, -uncomp, tx);
1776
1777 /* Move blocks to be freed to pool's free list. */
1778 dsl_deadlist_move_bpobj(&ds_next->ds_deadlist,
1779 &dp->dp_free_bpobj, ds->ds_phys->ds_prev_snap_txg,
1780 tx);
1781 dsl_dir_diduse_space(tx->tx_pool->dp_free_dir,
1782 DD_USED_HEAD, used, comp, uncomp, tx);
1783
1784 /* Merge our deadlist into next's and free it. */
1785 dsl_deadlist_merge(&ds_next->ds_deadlist,
1786 ds->ds_phys->ds_deadlist_obj, tx);
1787 }
1788 dsl_deadlist_close(&ds->ds_deadlist);
1789 dsl_deadlist_free(mos, ds->ds_phys->ds_deadlist_obj, tx);
1790
1791 /* Collapse range in clone heads */
1792 dsl_dataset_remove_clones_key(ds,
1793 ds->ds_phys->ds_creation_txg, tx);
1794
1795 if (dsl_dataset_is_snapshot(ds_next)) {
1796 dsl_dataset_t *ds_nextnext;
1797
1798 /*
1799 * Update next's unique to include blocks which
1800 * were previously shared by only this snapshot
1801 * and it. Those blocks will be born after the
1802 * prev snap and before this snap, and will have
1838
1839 /*
1840 * Reduce the amount of our unconsmed refreservation
1841 * being charged to our parent by the amount of
1842 * new unique data we have gained.
1843 */
1844 if (old_unique < ds_next->ds_reserved) {
1845 int64_t mrsdelta;
1846 uint64_t new_unique =
1847 ds_next->ds_phys->ds_unique_bytes;
1848
1849 ASSERT(old_unique <= new_unique);
1850 mrsdelta = MIN(new_unique - old_unique,
1851 ds_next->ds_reserved - old_unique);
1852 dsl_dir_diduse_space(ds->ds_dir,
1853 DD_USED_REFRSRV, -mrsdelta, 0, 0, tx);
1854 }
1855 }
1856 dsl_dataset_rele(ds_next, FTAG);
1857 } else {
1858 zfeature_info_t *async_destroy =
1859 &spa_feature_table[SPA_FEATURE_ASYNC_DESTROY];
1860
1861 /*
1862 * There's no next snapshot, so this is a head dataset.
1863 * Destroy the deadlist. Unless it's a clone, the
1864 * deadlist should be empty. (If it's a clone, it's
1865 * safe to ignore the deadlist contents.)
1866 */
1867 dsl_deadlist_close(&ds->ds_deadlist);
1868 dsl_deadlist_free(mos, ds->ds_phys->ds_deadlist_obj, tx);
1869 ds->ds_phys->ds_deadlist_obj = 0;
1870
1871 if (!spa_feature_is_enabled(dp->dp_spa, async_destroy)) {
1872 err = old_synchronous_dataset_destroy(ds, tx);
1873 } else {
1874 /*
1875 * Move the bptree into the pool's list of trees to
1876 * clean up and update space accounting information.
1877 */
1878 uint64_t used, comp, uncomp;
1879
1880 ASSERT(err == 0 || err == EBUSY);
1881 if (!spa_feature_is_active(dp->dp_spa, async_destroy)) {
1882 spa_feature_incr(dp->dp_spa, async_destroy, tx);
1883 dp->dp_bptree_obj = bptree_alloc(
1884 dp->dp_meta_objset, tx);
1885 VERIFY(zap_add(dp->dp_meta_objset,
1886 DMU_POOL_DIRECTORY_OBJECT,
1887 DMU_POOL_BPTREE_OBJ, sizeof (uint64_t), 1,
1888 &dp->dp_bptree_obj, tx) == 0);
1889 }
1890
1891 used = ds->ds_dir->dd_phys->dd_used_bytes;
1892 comp = ds->ds_dir->dd_phys->dd_compressed_bytes;
1893 uncomp = ds->ds_dir->dd_phys->dd_uncompressed_bytes;
1894
1895 ASSERT(!DS_UNIQUE_IS_ACCURATE(ds) ||
1896 ds->ds_phys->ds_unique_bytes == used);
1897
1898 bptree_add(dp->dp_meta_objset, dp->dp_bptree_obj,
1899 &ds->ds_phys->ds_bp, ds->ds_phys->ds_prev_snap_txg,
1900 used, comp, uncomp, tx);
1901 dsl_dir_diduse_space(ds->ds_dir, DD_USED_HEAD,
1902 -used, -comp, -uncomp, tx);
1903 dsl_dir_diduse_space(dp->dp_free_dir, DD_USED_HEAD,
1904 used, comp, uncomp, tx);
1905 }
1906
1907 if (ds->ds_prev != NULL) {
1908 if (spa_version(dp->dp_spa) >= SPA_VERSION_DIR_CLONES) {
1909 VERIFY3U(0, ==, zap_remove_int(mos,
1910 ds->ds_prev->ds_dir->dd_phys->dd_clones,
1911 ds->ds_object, tx));
1912 }
1913 dsl_dataset_rele(ds->ds_prev, ds);
1914 ds->ds_prev = ds_prev = NULL;
1915 }
1916 }
1917
1918 /*
1919 * This must be done after the dsl_traverse(), because it will
1920 * re-open the objset.
1921 */
1922 if (ds->ds_objset) {
1923 dmu_objset_evict(ds->ds_objset);
1924 ds->ds_objset = NULL;
1925 }
1926
2076 else
2077 crtxg = tx->tx_txg;
2078
2079 dsobj = dmu_object_alloc(mos, DMU_OT_DSL_DATASET, 0,
2080 DMU_OT_DSL_DATASET, sizeof (dsl_dataset_phys_t), tx);
2081 VERIFY(0 == dmu_bonus_hold(mos, dsobj, FTAG, &dbuf));
2082 dmu_buf_will_dirty(dbuf, tx);
2083 dsphys = dbuf->db_data;
2084 bzero(dsphys, sizeof (dsl_dataset_phys_t));
2085 dsphys->ds_dir_obj = ds->ds_dir->dd_object;
2086 dsphys->ds_fsid_guid = unique_create();
2087 (void) random_get_pseudo_bytes((void*)&dsphys->ds_guid,
2088 sizeof (dsphys->ds_guid));
2089 dsphys->ds_prev_snap_obj = ds->ds_phys->ds_prev_snap_obj;
2090 dsphys->ds_prev_snap_txg = ds->ds_phys->ds_prev_snap_txg;
2091 dsphys->ds_next_snap_obj = ds->ds_object;
2092 dsphys->ds_num_children = 1;
2093 dsphys->ds_creation_time = gethrestime_sec();
2094 dsphys->ds_creation_txg = crtxg;
2095 dsphys->ds_deadlist_obj = ds->ds_phys->ds_deadlist_obj;
2096 dsphys->ds_referenced_bytes = ds->ds_phys->ds_referenced_bytes;
2097 dsphys->ds_compressed_bytes = ds->ds_phys->ds_compressed_bytes;
2098 dsphys->ds_uncompressed_bytes = ds->ds_phys->ds_uncompressed_bytes;
2099 dsphys->ds_flags = ds->ds_phys->ds_flags;
2100 dsphys->ds_bp = ds->ds_phys->ds_bp;
2101 dmu_buf_rele(dbuf, FTAG);
2102
2103 ASSERT3U(ds->ds_prev != 0, ==, ds->ds_phys->ds_prev_snap_obj != 0);
2104 if (ds->ds_prev) {
2105 uint64_t next_clones_obj =
2106 ds->ds_prev->ds_phys->ds_next_clones_obj;
2107 ASSERT(ds->ds_prev->ds_phys->ds_next_snap_obj ==
2108 ds->ds_object ||
2109 ds->ds_prev->ds_phys->ds_num_children > 1);
2110 if (ds->ds_prev->ds_phys->ds_next_snap_obj == ds->ds_object) {
2111 dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx);
2112 ASSERT3U(ds->ds_phys->ds_prev_snap_txg, ==,
2113 ds->ds_prev->ds_phys->ds_creation_txg);
2114 ds->ds_prev->ds_phys->ds_next_snap_obj = dsobj;
2115 } else if (next_clones_obj != 0) {
2116 remove_from_next_clones(ds->ds_prev,
2200 VERIFY(nvlist_alloc(&propval, NV_UNIQUE_NAME, KM_SLEEP) == 0);
2201 VERIFY(nvlist_alloc(&val, NV_UNIQUE_NAME, KM_SLEEP) == 0);
2202
2203 /*
2204 * There may me missing entries in ds_next_clones_obj
2205 * due to a bug in a previous version of the code.
2206 * Only trust it if it has the right number of entries.
2207 */
2208 if (ds->ds_phys->ds_next_clones_obj != 0) {
2209 ASSERT3U(0, ==, zap_count(mos, ds->ds_phys->ds_next_clones_obj,
2210 &count));
2211 }
2212 if (count != ds->ds_phys->ds_num_children - 1) {
2213 goto fail;
2214 }
2215 for (zap_cursor_init(&zc, mos, ds->ds_phys->ds_next_clones_obj);
2216 zap_cursor_retrieve(&zc, &za) == 0;
2217 zap_cursor_advance(&zc)) {
2218 dsl_dataset_t *clone;
2219 char buf[ZFS_MAXNAMELEN];
2220 /*
2221 * Even though we hold the dp_config_rwlock, the dataset
2222 * may fail to open, returning ENOENT. If there is a
2223 * thread concurrently attempting to destroy this
2224 * dataset, it will have the ds_rwlock held for
2225 * RW_WRITER. Our call to dsl_dataset_hold_obj() ->
2226 * dsl_dataset_hold_ref() will fail its
2227 * rw_tryenter(&ds->ds_rwlock, RW_READER), drop the
2228 * dp_config_rwlock, and wait for the destroy progress
2229 * and signal ds_exclusive_cv. If the destroy was
2230 * successful, we will see that
2231 * DSL_DATASET_IS_DESTROYED(), and return ENOENT.
2232 */
2233 if (dsl_dataset_hold_obj(ds->ds_dir->dd_pool,
2234 za.za_first_integer, FTAG, &clone) != 0)
2235 continue;
2236 dsl_dir_name(clone->ds_dir, buf);
2237 VERIFY(nvlist_add_boolean(val, buf) == 0);
2238 dsl_dataset_rele(clone, FTAG);
2239 }
2240 zap_cursor_fini(&zc);
2241 VERIFY(nvlist_add_nvlist(propval, ZPROP_VALUE, val) == 0);
2242 VERIFY(nvlist_add_nvlist(nv, zfs_prop_to_name(ZFS_PROP_CLONES),
2243 propval) == 0);
2244 fail:
2245 nvlist_free(val);
2246 nvlist_free(propval);
2247 rw_exit(&ds->ds_dir->dd_pool->dp_config_rwlock);
2248 }
2249
2250 void
2251 dsl_dataset_stats(dsl_dataset_t *ds, nvlist_t *nv)
2252 {
2253 uint64_t refd, avail, uobjs, aobjs, ratio;
2254
2255 dsl_dir_stats(ds->ds_dir, nv);
2338 ds->ds_dir->dd_phys->dd_origin_obj, FTAG, &ods));
2339 dsl_dataset_name(ods, stat->dds_origin);
2340 dsl_dataset_drop_ref(ods, FTAG);
2341 } else {
2342 stat->dds_origin[0] = '\0';
2343 }
2344 rw_exit(&ds->ds_dir->dd_pool->dp_config_rwlock);
2345 }
2346
2347 uint64_t
2348 dsl_dataset_fsid_guid(dsl_dataset_t *ds)
2349 {
2350 return (ds->ds_fsid_guid);
2351 }
2352
2353 void
2354 dsl_dataset_space(dsl_dataset_t *ds,
2355 uint64_t *refdbytesp, uint64_t *availbytesp,
2356 uint64_t *usedobjsp, uint64_t *availobjsp)
2357 {
2358 *refdbytesp = ds->ds_phys->ds_referenced_bytes;
2359 *availbytesp = dsl_dir_space_available(ds->ds_dir, NULL, 0, TRUE);
2360 if (ds->ds_reserved > ds->ds_phys->ds_unique_bytes)
2361 *availbytesp += ds->ds_reserved - ds->ds_phys->ds_unique_bytes;
2362 if (ds->ds_quota != 0) {
2363 /*
2364 * Adjust available bytes according to refquota
2365 */
2366 if (*refdbytesp < ds->ds_quota)
2367 *availbytesp = MIN(*availbytesp,
2368 ds->ds_quota - *refdbytesp);
2369 else
2370 *availbytesp = 0;
2371 }
2372 *usedobjsp = ds->ds_phys->ds_bp.blk_fill;
2373 *availobjsp = DN_MAX_OBJECT - *usedobjsp;
2374 }
2375
2376 boolean_t
2377 dsl_dataset_modified_since_lastsnap(dsl_dataset_t *ds)
2378 {
2675 ASSERT3U(snap->ds->ds_phys->ds_prev_snap_obj, ==, origin_ds->ds_object);
2676 dsl_deadlist_space_range(&snap->ds->ds_deadlist,
2677 origin_ds->ds_phys->ds_prev_snap_txg, UINT64_MAX,
2678 &pa->unique, &unused, &unused);
2679
2680 /*
2681 * Walk the snapshots that we are moving
2682 *
2683 * Compute space to transfer. Consider the incremental changes
2684 * to used for each snapshot:
2685 * (my used) = (prev's used) + (blocks born) - (blocks killed)
2686 * So each snapshot gave birth to:
2687 * (blocks born) = (my used) - (prev's used) + (blocks killed)
2688 * So a sequence would look like:
2689 * (uN - u(N-1) + kN) + ... + (u1 - u0 + k1) + (u0 - 0 + k0)
2690 * Which simplifies to:
2691 * uN + kN + kN-1 + ... + k1 + k0
2692 * Note however, if we stop before we reach the ORIGIN we get:
2693 * uN + kN + kN-1 + ... + kM - uM-1
2694 */
2695 pa->used = origin_ds->ds_phys->ds_referenced_bytes;
2696 pa->comp = origin_ds->ds_phys->ds_compressed_bytes;
2697 pa->uncomp = origin_ds->ds_phys->ds_uncompressed_bytes;
2698 for (snap = list_head(&pa->shared_snaps); snap;
2699 snap = list_next(&pa->shared_snaps, snap)) {
2700 uint64_t val, dlused, dlcomp, dluncomp;
2701 dsl_dataset_t *ds = snap->ds;
2702
2703 /* Check that the snapshot name does not conflict */
2704 VERIFY(0 == dsl_dataset_get_snapname(ds));
2705 err = dsl_dataset_snap_lookup(hds, ds->ds_snapname, &val);
2706 if (err == 0) {
2707 err = EEXIST;
2708 goto out;
2709 }
2710 if (err != ENOENT)
2711 goto out;
2712
2713 /* The very first snapshot does not have a deadlist */
2714 if (ds->ds_phys->ds_prev_snap_obj == 0)
2715 continue;
2716
2717 dsl_deadlist_space(&ds->ds_deadlist,
2718 &dlused, &dlcomp, &dluncomp);
2719 pa->used += dlused;
2720 pa->comp += dlcomp;
2721 pa->uncomp += dluncomp;
2722 }
2723
2724 /*
2725 * If we are a clone of a clone then we never reached ORIGIN,
2726 * so we need to subtract out the clone origin's used space.
2727 */
2728 if (pa->origin_origin) {
2729 pa->used -= pa->origin_origin->ds_phys->ds_referenced_bytes;
2730 pa->comp -= pa->origin_origin->ds_phys->ds_compressed_bytes;
2731 pa->uncomp -= pa->origin_origin->ds_phys->ds_uncompressed_bytes;
2732 }
2733
2734 /* Check that there is enough space here */
2735 err = dsl_dir_transfer_possible(origin_ds->ds_dir, hds->ds_dir,
2736 pa->used);
2737 if (err)
2738 return (err);
2739
2740 /*
2741 * Compute the amounts of space that will be used by snapshots
2742 * after the promotion (for both origin and clone). For each,
2743 * it is the amount of space that will be on all of their
2744 * deadlists (that was not born before their new origin).
2745 */
2746 if (hds->ds_dir->dd_phys->dd_flags & DD_FLAG_USED_BREAKDOWN) {
2747 uint64_t space;
2748
2749 /*
3225 blkptr_t tmp;
3226 tmp = csa->ohds->ds_phys->ds_bp;
3227 csa->ohds->ds_phys->ds_bp = csa->cds->ds_phys->ds_bp;
3228 csa->cds->ds_phys->ds_bp = tmp;
3229 }
3230
3231 /* set dd_*_bytes */
3232 {
3233 int64_t dused, dcomp, duncomp;
3234 uint64_t cdl_used, cdl_comp, cdl_uncomp;
3235 uint64_t odl_used, odl_comp, odl_uncomp;
3236
3237 ASSERT3U(csa->cds->ds_dir->dd_phys->
3238 dd_used_breakdown[DD_USED_SNAP], ==, 0);
3239
3240 dsl_deadlist_space(&csa->cds->ds_deadlist,
3241 &cdl_used, &cdl_comp, &cdl_uncomp);
3242 dsl_deadlist_space(&csa->ohds->ds_deadlist,
3243 &odl_used, &odl_comp, &odl_uncomp);
3244
3245 dused = csa->cds->ds_phys->ds_referenced_bytes + cdl_used -
3246 (csa->ohds->ds_phys->ds_referenced_bytes + odl_used);
3247 dcomp = csa->cds->ds_phys->ds_compressed_bytes + cdl_comp -
3248 (csa->ohds->ds_phys->ds_compressed_bytes + odl_comp);
3249 duncomp = csa->cds->ds_phys->ds_uncompressed_bytes +
3250 cdl_uncomp -
3251 (csa->ohds->ds_phys->ds_uncompressed_bytes + odl_uncomp);
3252
3253 dsl_dir_diduse_space(csa->ohds->ds_dir, DD_USED_HEAD,
3254 dused, dcomp, duncomp, tx);
3255 dsl_dir_diduse_space(csa->cds->ds_dir, DD_USED_HEAD,
3256 -dused, -dcomp, -duncomp, tx);
3257
3258 /*
3259 * The difference in the space used by snapshots is the
3260 * difference in snapshot space due to the head's
3261 * deadlist (since that's the only thing that's
3262 * changing that affects the snapused).
3263 */
3264 dsl_deadlist_space_range(&csa->cds->ds_deadlist,
3265 csa->ohds->ds_dir->dd_origin_txg, UINT64_MAX,
3266 &cdl_used, &cdl_comp, &cdl_uncomp);
3267 dsl_deadlist_space_range(&csa->ohds->ds_deadlist,
3268 csa->ohds->ds_dir->dd_origin_txg, UINT64_MAX,
3269 &odl_used, &odl_comp, &odl_uncomp);
3270 dsl_dir_transfer_space(csa->ohds->ds_dir, cdl_used - odl_used,
3271 DD_USED_HEAD, DD_USED_SNAP, tx);
3272 }
3273
3274 /* swap ds_*_bytes */
3275 SWITCH64(csa->ohds->ds_phys->ds_referenced_bytes,
3276 csa->cds->ds_phys->ds_referenced_bytes);
3277 SWITCH64(csa->ohds->ds_phys->ds_compressed_bytes,
3278 csa->cds->ds_phys->ds_compressed_bytes);
3279 SWITCH64(csa->ohds->ds_phys->ds_uncompressed_bytes,
3280 csa->cds->ds_phys->ds_uncompressed_bytes);
3281 SWITCH64(csa->ohds->ds_phys->ds_unique_bytes,
3282 csa->cds->ds_phys->ds_unique_bytes);
3283
3284 /* apply any parent delta for change in unconsumed refreservation */
3285 dsl_dir_diduse_space(csa->ohds->ds_dir, DD_USED_REFRSRV,
3286 csa->unused_refres_delta, 0, 0, tx);
3287
3288 /*
3289 * Swap deadlists.
3290 */
3291 dsl_deadlist_close(&csa->cds->ds_deadlist);
3292 dsl_deadlist_close(&csa->ohds->ds_deadlist);
3293 SWITCH64(csa->ohds->ds_phys->ds_deadlist_obj,
3294 csa->cds->ds_phys->ds_deadlist_obj);
3295 dsl_deadlist_open(&csa->cds->ds_deadlist, dp->dp_meta_objset,
3296 csa->cds->ds_phys->ds_deadlist_obj);
3385 * Make a space adjustment for reserved bytes.
3386 */
3387 if (ds->ds_reserved > ds->ds_phys->ds_unique_bytes) {
3388 ASSERT3U(*used, >=,
3389 ds->ds_reserved - ds->ds_phys->ds_unique_bytes);
3390 *used -= (ds->ds_reserved - ds->ds_phys->ds_unique_bytes);
3391 *ref_rsrv =
3392 asize - MIN(asize, parent_delta(ds, asize + inflight));
3393 }
3394
3395 if (!check_quota || ds->ds_quota == 0) {
3396 mutex_exit(&ds->ds_lock);
3397 return (0);
3398 }
3399 /*
3400 * If they are requesting more space, and our current estimate
3401 * is over quota, they get to try again unless the actual
3402 * on-disk is over quota and there are no pending changes (which
3403 * may free up space for us).
3404 */
3405 if (ds->ds_phys->ds_referenced_bytes + inflight >= ds->ds_quota) {
3406 if (inflight > 0 ||
3407 ds->ds_phys->ds_referenced_bytes < ds->ds_quota)
3408 error = ERESTART;
3409 else
3410 error = EDQUOT;
3411 }
3412 mutex_exit(&ds->ds_lock);
3413
3414 return (error);
3415 }
3416
3417 /* ARGSUSED */
3418 static int
3419 dsl_dataset_set_quota_check(void *arg1, void *arg2, dmu_tx_t *tx)
3420 {
3421 dsl_dataset_t *ds = arg1;
3422 dsl_prop_setarg_t *psa = arg2;
3423 int err;
3424
3425 if (spa_version(ds->ds_dir->dd_pool->dp_spa) < SPA_VERSION_REFQUOTA)
3426 return (ENOTSUP);
3427
3428 if ((err = dsl_prop_predict_sync(ds->ds_dir, psa)) != 0)
3429 return (err);
3430
3431 if (psa->psa_effective_value == 0)
3432 return (0);
3433
3434 if (psa->psa_effective_value < ds->ds_phys->ds_referenced_bytes ||
3435 psa->psa_effective_value < ds->ds_reserved)
3436 return (ENOSPC);
3437
3438 return (0);
3439 }
3440
3441 extern void dsl_prop_set_sync(void *, void *, dmu_tx_t *);
3442
3443 void
3444 dsl_dataset_set_quota_sync(void *arg1, void *arg2, dmu_tx_t *tx)
3445 {
3446 dsl_dataset_t *ds = arg1;
3447 dsl_prop_setarg_t *psa = arg2;
3448 uint64_t effective_value = psa->psa_effective_value;
3449
3450 dsl_prop_set_sync(ds, psa, tx);
3451 DSL_PROP_CHECK_PREDICTION(ds->ds_dir, psa);
3452
3453 if (ds->ds_quota != effective_value) {
3454 dmu_buf_will_dirty(ds->ds_dbuf, tx);
4168 * The written space is calculated by considering two components: First, we
4169 * ignore any freed space, and calculate the written as new's used space
4170 * minus old's used space. Next, we add in the amount of space that was freed
4171 * between the two snapshots, thus reducing new's used space relative to old's.
4172 * Specifically, this is the space that was born before old->ds_creation_txg,
4173 * and freed before new (ie. on new's deadlist or a previous deadlist).
4174 *
4175 * space freed [---------------------]
4176 * snapshots ---O-------O--------O-------O------
4177 * oldsnap new
4178 */
4179 int
4180 dsl_dataset_space_written(dsl_dataset_t *oldsnap, dsl_dataset_t *new,
4181 uint64_t *usedp, uint64_t *compp, uint64_t *uncompp)
4182 {
4183 int err = 0;
4184 uint64_t snapobj;
4185 dsl_pool_t *dp = new->ds_dir->dd_pool;
4186
4187 *usedp = 0;
4188 *usedp += new->ds_phys->ds_referenced_bytes;
4189 *usedp -= oldsnap->ds_phys->ds_referenced_bytes;
4190
4191 *compp = 0;
4192 *compp += new->ds_phys->ds_compressed_bytes;
4193 *compp -= oldsnap->ds_phys->ds_compressed_bytes;
4194
4195 *uncompp = 0;
4196 *uncompp += new->ds_phys->ds_uncompressed_bytes;
4197 *uncompp -= oldsnap->ds_phys->ds_uncompressed_bytes;
4198
4199 rw_enter(&dp->dp_config_rwlock, RW_READER);
4200 snapobj = new->ds_object;
4201 while (snapobj != oldsnap->ds_object) {
4202 dsl_dataset_t *snap;
4203 uint64_t used, comp, uncomp;
4204
4205 if (snapobj == new->ds_object) {
4206 snap = new;
4207 } else {
4208 err = dsl_dataset_hold_obj(dp, snapobj, FTAG, &snap);
4209 if (err != 0)
4210 break;
4211 }
4212
4213 if (snap->ds_phys->ds_prev_snap_txg ==
4214 oldsnap->ds_phys->ds_creation_txg) {
4215 /*
4216 * The blocks in the deadlist can not be born after
4217 * ds_prev_snap_txg, so get the whole deadlist space,
4218 * which is more efficient (especially for old-format
4219 * deadlists). Unfortunately the deadlist code
4220 * doesn't have enough information to make this
4221 * optimization itself.
4222 */
4223 dsl_deadlist_space(&snap->ds_deadlist,
4224 &used, &comp, &uncomp);
4225 } else {
4226 dsl_deadlist_space_range(&snap->ds_deadlist,
4227 0, oldsnap->ds_phys->ds_creation_txg,
4228 &used, &comp, &uncomp);
4229 }
4230 *usedp += used;
4231 *compp += comp;
4232 *uncompp += uncomp;
4233
4234 /*
4235 * If we get to the beginning of the chain of snapshots
4236 * (ds_prev_snap_obj == 0) before oldsnap, then oldsnap
4237 * was not a snapshot of/before new.
4238 */
4239 snapobj = snap->ds_phys->ds_prev_snap_obj;
4240 if (snap != new)
4241 dsl_dataset_rele(snap, FTAG);
4242 if (snapobj == 0) {
4243 err = EINVAL;
4244 break;
4245 }
4246
4247 }
4248 rw_exit(&dp->dp_config_rwlock);
4249 return (err);
4250 }
4251
4252 /*
4253 * Return (in *usedp) the amount of space that will be reclaimed if firstsnap,
4254 * lastsnap, and all snapshots in between are deleted.
4255 *
4256 * blocks that would be freed [---------------------------]
4257 * snapshots ---O-------O--------O-------O--------O
4258 * firstsnap lastsnap
4259 *
4260 * This is the set of blocks that were born after the snap before firstsnap,
|