6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
23 * Copyright (c) 2011, 2014 by Delphix. All rights reserved.
24 * Copyright (c) 2014, Joyent, Inc. All rights reserved.
25 * Copyright (c) 2014 RackTop Systems.
26 */
27
28 #include <sys/dmu_objset.h>
29 #include <sys/dsl_dataset.h>
30 #include <sys/dsl_dir.h>
31 #include <sys/dsl_prop.h>
32 #include <sys/dsl_synctask.h>
33 #include <sys/dmu_traverse.h>
34 #include <sys/dmu_impl.h>
35 #include <sys/dmu_tx.h>
36 #include <sys/arc.h>
37 #include <sys/zio.h>
38 #include <sys/zap.h>
39 #include <sys/zfeature.h>
40 #include <sys/unique.h>
41 #include <sys/zfs_context.h>
42 #include <sys/zfs_ioctl.h>
43 #include <sys/spa.h>
44 #include <sys/zfs_znode.h>
45 #include <sys/zfs_onexit.h>
54 * The SPA supports block sizes up to 16MB. However, very large blocks
55 * can have an impact on i/o latency (e.g. tying up a spinning disk for
56 * ~300ms), and also potentially on the memory allocator. Therefore,
57 * we do not allow the recordsize to be set larger than zfs_max_recordsize
58 * (default 1MB). Larger blocks can be created by changing this tunable,
59 * and pools with larger blocks can always be imported and used, regardless
60 * of this setting.
61 */
62 int zfs_max_recordsize = 1 * 1024 * 1024;
63
64 #define SWITCH64(x, y) \
65 { \
66 uint64_t __tmp = (x); \
67 (x) = (y); \
68 (y) = __tmp; \
69 }
70
71 #define DS_REF_MAX (1ULL << 62)
72
73 extern inline dsl_dataset_phys_t *dsl_dataset_phys(dsl_dataset_t *ds);
74 extern inline boolean_t dsl_dataset_is_snapshot(dsl_dataset_t *ds);
75
76 /*
77 * Figure out how much of this delta should be propogated to the dsl_dir
78 * layer. If there's a refreservation, that space has already been
79 * partially accounted for in our ancestors.
80 */
81 static int64_t
82 parent_delta(dsl_dataset_t *ds, int64_t delta)
83 {
84 dsl_dataset_phys_t *ds_phys;
85 uint64_t old_bytes, new_bytes;
86
87 if (ds->ds_reserved == 0)
88 return (delta);
89
90 ds_phys = dsl_dataset_phys(ds);
91 old_bytes = MAX(ds_phys->ds_unique_bytes, ds->ds_reserved);
92 new_bytes = MAX(ds_phys->ds_unique_bytes + delta, ds->ds_reserved);
93
94 ASSERT3U(ABS((int64_t)(new_bytes - old_bytes)), <=, ABS(delta));
138 boolean_t async)
139 {
140 int used = bp_get_dsize_sync(tx->tx_pool->dp_spa, bp);
141 int compressed = BP_GET_PSIZE(bp);
142 int uncompressed = BP_GET_UCSIZE(bp);
143
144 if (BP_IS_HOLE(bp))
145 return (0);
146
147 ASSERT(dmu_tx_is_syncing(tx));
148 ASSERT(bp->blk_birth <= tx->tx_txg);
149
150 if (ds == NULL) {
151 dsl_free(tx->tx_pool, tx->tx_txg, bp);
152 dsl_pool_mos_diduse_space(tx->tx_pool,
153 -used, -compressed, -uncompressed);
154 return (used);
155 }
156 ASSERT3P(tx->tx_pool, ==, ds->ds_dir->dd_pool);
157
158 ASSERT(!dsl_dataset_is_snapshot(ds));
159 dmu_buf_will_dirty(ds->ds_dbuf, tx);
160
161 if (bp->blk_birth > dsl_dataset_phys(ds)->ds_prev_snap_txg) {
162 int64_t delta;
163
164 dprintf_bp(bp, "freeing ds=%llu", ds->ds_object);
165 dsl_free(tx->tx_pool, tx->tx_txg, bp);
166
167 mutex_enter(&ds->ds_lock);
168 ASSERT(dsl_dataset_phys(ds)->ds_unique_bytes >= used ||
169 !DS_UNIQUE_IS_ACCURATE(ds));
170 delta = parent_delta(ds, -used);
171 dsl_dataset_phys(ds)->ds_unique_bytes -= used;
172 mutex_exit(&ds->ds_lock);
173 dsl_dir_diduse_space(ds->ds_dir, DD_USED_HEAD,
174 delta, -compressed, -uncompressed, tx);
175 dsl_dir_transfer_space(ds->ds_dir, -used - delta,
176 DD_USED_REFRSRV, DD_USED_HEAD, tx);
177 } else {
178 dprintf_bp(bp, "putting on dead list: %s", "");
236 */
237 if (ds->ds_trysnap_txg >
238 spa_last_synced_txg(ds->ds_dir->dd_pool->dp_spa))
239 trysnap = ds->ds_trysnap_txg;
240 return (MAX(dsl_dataset_phys(ds)->ds_prev_snap_txg, trysnap));
241 }
242
243 boolean_t
244 dsl_dataset_block_freeable(dsl_dataset_t *ds, const blkptr_t *bp,
245 uint64_t blk_birth)
246 {
247 if (blk_birth <= dsl_dataset_prev_snap_txg(ds) ||
248 (bp != NULL && BP_IS_HOLE(bp)))
249 return (B_FALSE);
250
251 ddt_prefetch(dsl_dataset_get_spa(ds), bp);
252
253 return (B_TRUE);
254 }
255
256 /* ARGSUSED */
257 static void
258 dsl_dataset_evict(dmu_buf_t *db, void *dsv)
259 {
260 dsl_dataset_t *ds = dsv;
261
262 ASSERT(ds->ds_owner == NULL);
263
264 unique_remove(ds->ds_fsid_guid);
265
266 if (ds->ds_objset != NULL)
267 dmu_objset_evict(ds->ds_objset);
268
269 if (ds->ds_prev) {
270 dsl_dataset_rele(ds->ds_prev, ds);
271 ds->ds_prev = NULL;
272 }
273
274 bplist_destroy(&ds->ds_pending_deadlist);
275 if (dsl_dataset_phys(ds)->ds_deadlist_obj != 0)
276 dsl_deadlist_close(&ds->ds_deadlist);
277 if (ds->ds_dir)
278 dsl_dir_rele(ds->ds_dir, ds);
279
280 ASSERT(!list_link_active(&ds->ds_synced_link));
281
282 mutex_destroy(&ds->ds_lock);
283 mutex_destroy(&ds->ds_opening_lock);
284 mutex_destroy(&ds->ds_sendstream_lock);
285 refcount_destroy(&ds->ds_longholds);
286
287 kmem_free(ds, sizeof (dsl_dataset_t));
288 }
289
290 int
291 dsl_dataset_get_snapname(dsl_dataset_t *ds)
292 {
293 dsl_dataset_phys_t *headphys;
294 int err;
295 dmu_buf_t *headdbuf;
296 dsl_pool_t *dp = ds->ds_dir->dd_pool;
297 objset_t *mos = dp->dp_meta_objset;
298
372 ASSERT(dsl_pool_config_held(dp));
373
374 err = dmu_bonus_hold(mos, dsobj, tag, &dbuf);
375 if (err != 0)
376 return (err);
377
378 /* Make sure dsobj has the correct object type. */
379 dmu_object_info_from_db(dbuf, &doi);
380 if (doi.doi_bonus_type != DMU_OT_DSL_DATASET) {
381 dmu_buf_rele(dbuf, tag);
382 return (SET_ERROR(EINVAL));
383 }
384
385 ds = dmu_buf_get_user(dbuf);
386 if (ds == NULL) {
387 dsl_dataset_t *winner = NULL;
388
389 ds = kmem_zalloc(sizeof (dsl_dataset_t), KM_SLEEP);
390 ds->ds_dbuf = dbuf;
391 ds->ds_object = dsobj;
392
393 mutex_init(&ds->ds_lock, NULL, MUTEX_DEFAULT, NULL);
394 mutex_init(&ds->ds_opening_lock, NULL, MUTEX_DEFAULT, NULL);
395 mutex_init(&ds->ds_sendstream_lock, NULL, MUTEX_DEFAULT, NULL);
396 refcount_create(&ds->ds_longholds);
397
398 bplist_create(&ds->ds_pending_deadlist);
399 dsl_deadlist_open(&ds->ds_deadlist,
400 mos, dsl_dataset_phys(ds)->ds_deadlist_obj);
401
402 list_create(&ds->ds_sendstreams, sizeof (dmu_sendarg_t),
403 offsetof(dmu_sendarg_t, dsa_link));
404
405 if (doi.doi_type == DMU_OTN_ZAP_METADATA) {
406 err = zap_contains(mos, dsobj, DS_FIELD_LARGE_BLOCKS);
407 if (err == 0)
408 ds->ds_large_blocks = B_TRUE;
409 else
410 ASSERT3U(err, ==, ENOENT);
411 }
412
413 if (err == 0) {
414 err = dsl_dir_hold_obj(dp,
415 dsl_dataset_phys(ds)->ds_dir_obj, NULL, ds,
416 &ds->ds_dir);
417 }
418 if (err != 0) {
419 mutex_destroy(&ds->ds_lock);
420 mutex_destroy(&ds->ds_opening_lock);
421 mutex_destroy(&ds->ds_sendstream_lock);
422 refcount_destroy(&ds->ds_longholds);
423 bplist_destroy(&ds->ds_pending_deadlist);
424 dsl_deadlist_close(&ds->ds_deadlist);
425 kmem_free(ds, sizeof (dsl_dataset_t));
426 dmu_buf_rele(dbuf, tag);
427 return (err);
428 }
429
430 if (!dsl_dataset_is_snapshot(ds)) {
431 ds->ds_snapname[0] = '\0';
432 if (dsl_dataset_phys(ds)->ds_prev_snap_obj != 0) {
433 err = dsl_dataset_hold_obj(dp,
434 dsl_dataset_phys(ds)->ds_prev_snap_obj,
435 ds, &ds->ds_prev);
436 }
437 if (doi.doi_type == DMU_OTN_ZAP_METADATA) {
438 int zaperr = zap_lookup(mos, ds->ds_object,
439 DS_FIELD_BOOKMARK_NAMES,
440 sizeof (ds->ds_bookmarks), 1,
441 &ds->ds_bookmarks);
442 if (zaperr != ENOENT)
443 VERIFY0(zaperr);
444 }
445 } else {
446 if (zfs_flags & ZFS_DEBUG_SNAPNAMES)
447 err = dsl_dataset_get_snapname(ds);
448 if (err == 0 &&
449 dsl_dataset_phys(ds)->ds_userrefs_obj != 0) {
450 err = zap_count(
451 ds->ds_dir->dd_pool->dp_meta_objset,
452 dsl_dataset_phys(ds)->ds_userrefs_obj,
453 &ds->ds_userrefs);
454 }
455 }
456
457 if (err == 0 && !dsl_dataset_is_snapshot(ds)) {
458 err = dsl_prop_get_int_ds(ds,
459 zfs_prop_to_name(ZFS_PROP_REFRESERVATION),
460 &ds->ds_reserved);
461 if (err == 0) {
462 err = dsl_prop_get_int_ds(ds,
463 zfs_prop_to_name(ZFS_PROP_REFQUOTA),
464 &ds->ds_quota);
465 }
466 } else {
467 ds->ds_reserved = ds->ds_quota = 0;
468 }
469
470 if (err != 0 || (winner = dmu_buf_set_user_ie(dbuf, ds,
471 dsl_dataset_evict)) != NULL) {
472 bplist_destroy(&ds->ds_pending_deadlist);
473 dsl_deadlist_close(&ds->ds_deadlist);
474 if (ds->ds_prev)
475 dsl_dataset_rele(ds->ds_prev, ds);
476 dsl_dir_rele(ds->ds_dir, ds);
477 mutex_destroy(&ds->ds_lock);
478 mutex_destroy(&ds->ds_opening_lock);
479 mutex_destroy(&ds->ds_sendstream_lock);
480 refcount_destroy(&ds->ds_longholds);
481 kmem_free(ds, sizeof (dsl_dataset_t));
482 if (err != 0) {
483 dmu_buf_rele(dbuf, tag);
484 return (err);
485 }
486 ds = winner;
487 } else {
488 ds->ds_fsid_guid =
489 unique_insert(dsl_dataset_phys(ds)->ds_fsid_guid);
490 }
491 }
831 dsl_dataset_rele(ds, FTAG);
832 }
833
834 return (dsobj);
835 }
836
837 /*
838 * The unique space in the head dataset can be calculated by subtracting
839 * the space used in the most recent snapshot, that is still being used
840 * in this file system, from the space currently in use. To figure out
841 * the space in the most recent snapshot still in use, we need to take
842 * the total space used in the snapshot and subtract out the space that
843 * has been freed up since the snapshot was taken.
844 */
845 void
846 dsl_dataset_recalc_head_uniq(dsl_dataset_t *ds)
847 {
848 uint64_t mrs_used;
849 uint64_t dlused, dlcomp, dluncomp;
850
851 ASSERT(!dsl_dataset_is_snapshot(ds));
852
853 if (dsl_dataset_phys(ds)->ds_prev_snap_obj != 0)
854 mrs_used = dsl_dataset_phys(ds->ds_prev)->ds_referenced_bytes;
855 else
856 mrs_used = 0;
857
858 dsl_deadlist_space(&ds->ds_deadlist, &dlused, &dlcomp, &dluncomp);
859
860 ASSERT3U(dlused, <=, mrs_used);
861 dsl_dataset_phys(ds)->ds_unique_bytes =
862 dsl_dataset_phys(ds)->ds_referenced_bytes - (mrs_used - dlused);
863
864 if (spa_version(ds->ds_dir->dd_pool->dp_spa) >=
865 SPA_VERSION_UNIQUE_ACCURATE)
866 dsl_dataset_phys(ds)->ds_flags |= DS_FLAG_UNIQUE_ACCURATE;
867 }
868
869 void
870 dsl_dataset_remove_from_next_clones(dsl_dataset_t *ds, uint64_t obj,
871 dmu_tx_t *tx)
1574 nvlist_free(val);
1575 nvlist_free(propval);
1576 }
1577
1578 void
1579 dsl_dataset_stats(dsl_dataset_t *ds, nvlist_t *nv)
1580 {
1581 dsl_pool_t *dp = ds->ds_dir->dd_pool;
1582 uint64_t refd, avail, uobjs, aobjs, ratio;
1583
1584 ASSERT(dsl_pool_config_held(dp));
1585
1586 ratio = dsl_dataset_phys(ds)->ds_compressed_bytes == 0 ? 100 :
1587 (dsl_dataset_phys(ds)->ds_uncompressed_bytes * 100 /
1588 dsl_dataset_phys(ds)->ds_compressed_bytes);
1589
1590 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_REFRATIO, ratio);
1591 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_LOGICALREFERENCED,
1592 dsl_dataset_phys(ds)->ds_uncompressed_bytes);
1593
1594 if (dsl_dataset_is_snapshot(ds)) {
1595 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_COMPRESSRATIO, ratio);
1596 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USED,
1597 dsl_dataset_phys(ds)->ds_unique_bytes);
1598 get_clones_stat(ds, nv);
1599 } else {
1600 if (ds->ds_prev != NULL && ds->ds_prev != dp->dp_origin_snap) {
1601 char buf[MAXNAMELEN];
1602 dsl_dataset_name(ds->ds_prev, buf);
1603 dsl_prop_nvlist_add_string(nv, ZFS_PROP_PREV_SNAP, buf);
1604 }
1605
1606 dsl_dir_stats(ds->ds_dir, nv);
1607 }
1608
1609 dsl_dataset_space(ds, &refd, &avail, &uobjs, &aobjs);
1610 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_AVAILABLE, avail);
1611 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_REFERENCED, refd);
1612
1613 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_CREATION,
1614 dsl_dataset_phys(ds)->ds_creation_time);
1642 dsl_dataset_rele(prev, FTAG);
1643 if (err == 0) {
1644 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_WRITTEN,
1645 written);
1646 }
1647 }
1648 }
1649 }
1650
1651 void
1652 dsl_dataset_fast_stat(dsl_dataset_t *ds, dmu_objset_stats_t *stat)
1653 {
1654 dsl_pool_t *dp = ds->ds_dir->dd_pool;
1655 ASSERT(dsl_pool_config_held(dp));
1656
1657 stat->dds_creation_txg = dsl_dataset_phys(ds)->ds_creation_txg;
1658 stat->dds_inconsistent =
1659 dsl_dataset_phys(ds)->ds_flags & DS_FLAG_INCONSISTENT;
1660 stat->dds_guid = dsl_dataset_phys(ds)->ds_guid;
1661 stat->dds_origin[0] = '\0';
1662 if (dsl_dataset_is_snapshot(ds)) {
1663 stat->dds_is_snapshot = B_TRUE;
1664 stat->dds_num_clones =
1665 dsl_dataset_phys(ds)->ds_num_children - 1;
1666 } else {
1667 stat->dds_is_snapshot = B_FALSE;
1668 stat->dds_num_clones = 0;
1669
1670 if (dsl_dir_is_clone(ds->ds_dir)) {
1671 dsl_dataset_t *ods;
1672
1673 VERIFY0(dsl_dataset_hold_obj(dp,
1674 dsl_dir_phys(ds->ds_dir)->dd_origin_obj,
1675 FTAG, &ods));
1676 dsl_dataset_name(ods, stat->dds_origin);
1677 dsl_dataset_rele(ods, FTAG);
1678 }
1679 }
1680 }
1681
1682 uint64_t
1902 typedef struct dsl_dataset_rollback_arg {
1903 const char *ddra_fsname;
1904 void *ddra_owner;
1905 nvlist_t *ddra_result;
1906 } dsl_dataset_rollback_arg_t;
1907
1908 static int
1909 dsl_dataset_rollback_check(void *arg, dmu_tx_t *tx)
1910 {
1911 dsl_dataset_rollback_arg_t *ddra = arg;
1912 dsl_pool_t *dp = dmu_tx_pool(tx);
1913 dsl_dataset_t *ds;
1914 int64_t unused_refres_delta;
1915 int error;
1916
1917 error = dsl_dataset_hold(dp, ddra->ddra_fsname, FTAG, &ds);
1918 if (error != 0)
1919 return (error);
1920
1921 /* must not be a snapshot */
1922 if (dsl_dataset_is_snapshot(ds)) {
1923 dsl_dataset_rele(ds, FTAG);
1924 return (SET_ERROR(EINVAL));
1925 }
1926
1927 /* must have a most recent snapshot */
1928 if (dsl_dataset_phys(ds)->ds_prev_snap_txg < TXG_INITIAL) {
1929 dsl_dataset_rele(ds, FTAG);
1930 return (SET_ERROR(EINVAL));
1931 }
1932
1933 /* must not have any bookmarks after the most recent snapshot */
1934 nvlist_t *proprequest = fnvlist_alloc();
1935 fnvlist_add_boolean(proprequest, zfs_prop_to_name(ZFS_PROP_CREATETXG));
1936 nvlist_t *bookmarks = fnvlist_alloc();
1937 error = dsl_get_bookmarks_impl(ds, proprequest, bookmarks);
1938 fnvlist_free(proprequest);
1939 if (error != 0)
1940 return (error);
1941 for (nvpair_t *pair = nvlist_next_nvpair(bookmarks, NULL);
1942 pair != NULL; pair = nvlist_next_nvpair(bookmarks, pair)) {
2474 list_remove(l, snap);
2475 dsl_dataset_rele(snap->ds, tag);
2476 kmem_free(snap, sizeof (*snap));
2477 }
2478 list_destroy(l);
2479 }
2480
2481 static int
2482 promote_hold(dsl_dataset_promote_arg_t *ddpa, dsl_pool_t *dp, void *tag)
2483 {
2484 int error;
2485 dsl_dir_t *dd;
2486 struct promotenode *snap;
2487
2488 error = dsl_dataset_hold(dp, ddpa->ddpa_clonename, tag,
2489 &ddpa->ddpa_clone);
2490 if (error != 0)
2491 return (error);
2492 dd = ddpa->ddpa_clone->ds_dir;
2493
2494 if (dsl_dataset_is_snapshot(ddpa->ddpa_clone) ||
2495 !dsl_dir_is_clone(dd)) {
2496 dsl_dataset_rele(ddpa->ddpa_clone, tag);
2497 return (SET_ERROR(EINVAL));
2498 }
2499
2500 error = snaplist_make(dp, 0, dsl_dir_phys(dd)->dd_origin_obj,
2501 &ddpa->shared_snaps, tag);
2502 if (error != 0)
2503 goto out;
2504
2505 error = snaplist_make(dp, 0, ddpa->ddpa_clone->ds_object,
2506 &ddpa->clone_snaps, tag);
2507 if (error != 0)
2508 goto out;
2509
2510 snap = list_head(&ddpa->shared_snaps);
2511 ASSERT3U(snap->ds->ds_object, ==, dsl_dir_phys(dd)->dd_origin_obj);
2512 error = snaplist_make(dp, dsl_dir_phys(dd)->dd_origin_obj,
2513 dsl_dir_phys(snap->ds->ds_dir)->dd_head_dataset_obj,
2514 &ddpa->origin_snaps, tag);
2566 dmu_objset_rele(os, FTAG);
2567 if (error != 0)
2568 return (error);
2569
2570 ddpa.ddpa_clonename = name;
2571 ddpa.err_ds = conflsnap;
2572 ddpa.cr = CRED();
2573
2574 return (dsl_sync_task(name, dsl_dataset_promote_check,
2575 dsl_dataset_promote_sync, &ddpa,
2576 2 + numsnaps, ZFS_SPACE_CHECK_RESERVED));
2577 }
2578
2579 int
2580 dsl_dataset_clone_swap_check_impl(dsl_dataset_t *clone,
2581 dsl_dataset_t *origin_head, boolean_t force, void *owner, dmu_tx_t *tx)
2582 {
2583 int64_t unused_refres_delta;
2584
2585 /* they should both be heads */
2586 if (dsl_dataset_is_snapshot(clone) ||
2587 dsl_dataset_is_snapshot(origin_head))
2588 return (SET_ERROR(EINVAL));
2589
2590 /* if we are not forcing, the branch point should be just before them */
2591 if (!force && clone->ds_prev != origin_head->ds_prev)
2592 return (SET_ERROR(EINVAL));
2593
2594 /* clone should be the clone (unless they are unrelated) */
2595 if (clone->ds_prev != NULL &&
2596 clone->ds_prev != clone->ds_dir->dd_pool->dp_origin_snap &&
2597 origin_head->ds_dir != clone->ds_prev->ds_dir)
2598 return (SET_ERROR(EINVAL));
2599
2600 /* the clone should be a child of the origin */
2601 if (clone->ds_dir->dd_parent != origin_head->ds_dir)
2602 return (SET_ERROR(EINVAL));
2603
2604 /* origin_head shouldn't be modified unless 'force' */
2605 if (!force &&
2606 dsl_dataset_modified_since_snap(origin_head, origin_head->ds_prev))
2607 return (SET_ERROR(ETXTBSY));
2846 } dsl_dataset_set_qr_arg_t;
2847
2848
2849 /* ARGSUSED */
2850 static int
2851 dsl_dataset_set_refquota_check(void *arg, dmu_tx_t *tx)
2852 {
2853 dsl_dataset_set_qr_arg_t *ddsqra = arg;
2854 dsl_pool_t *dp = dmu_tx_pool(tx);
2855 dsl_dataset_t *ds;
2856 int error;
2857 uint64_t newval;
2858
2859 if (spa_version(dp->dp_spa) < SPA_VERSION_REFQUOTA)
2860 return (SET_ERROR(ENOTSUP));
2861
2862 error = dsl_dataset_hold(dp, ddsqra->ddsqra_name, FTAG, &ds);
2863 if (error != 0)
2864 return (error);
2865
2866 if (dsl_dataset_is_snapshot(ds)) {
2867 dsl_dataset_rele(ds, FTAG);
2868 return (SET_ERROR(EINVAL));
2869 }
2870
2871 error = dsl_prop_predict(ds->ds_dir,
2872 zfs_prop_to_name(ZFS_PROP_REFQUOTA),
2873 ddsqra->ddsqra_source, ddsqra->ddsqra_value, &newval);
2874 if (error != 0) {
2875 dsl_dataset_rele(ds, FTAG);
2876 return (error);
2877 }
2878
2879 if (newval == 0) {
2880 dsl_dataset_rele(ds, FTAG);
2881 return (0);
2882 }
2883
2884 if (newval < dsl_dataset_phys(ds)->ds_referenced_bytes ||
2885 newval < ds->ds_reserved) {
2886 dsl_dataset_rele(ds, FTAG);
2929 return (dsl_sync_task(dsname, dsl_dataset_set_refquota_check,
2930 dsl_dataset_set_refquota_sync, &ddsqra, 0, ZFS_SPACE_CHECK_NONE));
2931 }
2932
2933 static int
2934 dsl_dataset_set_refreservation_check(void *arg, dmu_tx_t *tx)
2935 {
2936 dsl_dataset_set_qr_arg_t *ddsqra = arg;
2937 dsl_pool_t *dp = dmu_tx_pool(tx);
2938 dsl_dataset_t *ds;
2939 int error;
2940 uint64_t newval, unique;
2941
2942 if (spa_version(dp->dp_spa) < SPA_VERSION_REFRESERVATION)
2943 return (SET_ERROR(ENOTSUP));
2944
2945 error = dsl_dataset_hold(dp, ddsqra->ddsqra_name, FTAG, &ds);
2946 if (error != 0)
2947 return (error);
2948
2949 if (dsl_dataset_is_snapshot(ds)) {
2950 dsl_dataset_rele(ds, FTAG);
2951 return (SET_ERROR(EINVAL));
2952 }
2953
2954 error = dsl_prop_predict(ds->ds_dir,
2955 zfs_prop_to_name(ZFS_PROP_REFRESERVATION),
2956 ddsqra->ddsqra_source, ddsqra->ddsqra_value, &newval);
2957 if (error != 0) {
2958 dsl_dataset_rele(ds, FTAG);
2959 return (error);
2960 }
2961
2962 /*
2963 * If we are doing the preliminary check in open context, the
2964 * space estimates may be inaccurate.
2965 */
2966 if (!dmu_tx_is_syncing(tx)) {
2967 dsl_dataset_rele(ds, FTAG);
2968 return (0);
2969 }
3143 * blocks that would be freed [---------------------------]
3144 * snapshots ---O-------O--------O-------O--------O
3145 * firstsnap lastsnap
3146 *
3147 * This is the set of blocks that were born after the snap before firstsnap,
3148 * (birth > firstsnap->prev_snap_txg) and died before the snap after the
3149 * last snap (ie, is on lastsnap->ds_next->ds_deadlist or an earlier deadlist).
3150 * We calculate this by iterating over the relevant deadlists (from the snap
3151 * after lastsnap, backward to the snap after firstsnap), summing up the
3152 * space on the deadlist that was born after the snap before firstsnap.
3153 */
3154 int
3155 dsl_dataset_space_wouldfree(dsl_dataset_t *firstsnap,
3156 dsl_dataset_t *lastsnap,
3157 uint64_t *usedp, uint64_t *compp, uint64_t *uncompp)
3158 {
3159 int err = 0;
3160 uint64_t snapobj;
3161 dsl_pool_t *dp = firstsnap->ds_dir->dd_pool;
3162
3163 ASSERT(dsl_dataset_is_snapshot(firstsnap));
3164 ASSERT(dsl_dataset_is_snapshot(lastsnap));
3165
3166 /*
3167 * Check that the snapshots are in the same dsl_dir, and firstsnap
3168 * is before lastsnap.
3169 */
3170 if (firstsnap->ds_dir != lastsnap->ds_dir ||
3171 dsl_dataset_phys(firstsnap)->ds_creation_txg >
3172 dsl_dataset_phys(lastsnap)->ds_creation_txg)
3173 return (SET_ERROR(EINVAL));
3174
3175 *usedp = *compp = *uncompp = 0;
3176
3177 snapobj = dsl_dataset_phys(lastsnap)->ds_next_snap_obj;
3178 while (snapobj != firstsnap->ds_object) {
3179 dsl_dataset_t *ds;
3180 uint64_t used, comp, uncomp;
3181
3182 err = dsl_dataset_hold_obj(dp, snapobj, FTAG, &ds);
3183 if (err != 0)
3184 break;
3269 }
3270
3271 /*
3272 * Return TRUE if 'earlier' is an earlier snapshot in 'later's timeline.
3273 * For example, they could both be snapshots of the same filesystem, and
3274 * 'earlier' is before 'later'. Or 'earlier' could be the origin of
3275 * 'later's filesystem. Or 'earlier' could be an older snapshot in the origin's
3276 * filesystem. Or 'earlier' could be the origin's origin.
3277 *
3278 * If non-zero, earlier_txg is used instead of earlier's ds_creation_txg.
3279 */
3280 boolean_t
3281 dsl_dataset_is_before(dsl_dataset_t *later, dsl_dataset_t *earlier,
3282 uint64_t earlier_txg)
3283 {
3284 dsl_pool_t *dp = later->ds_dir->dd_pool;
3285 int error;
3286 boolean_t ret;
3287
3288 ASSERT(dsl_pool_config_held(dp));
3289 ASSERT(dsl_dataset_is_snapshot(earlier) || earlier_txg != 0);
3290
3291 if (earlier_txg == 0)
3292 earlier_txg = dsl_dataset_phys(earlier)->ds_creation_txg;
3293
3294 if (dsl_dataset_is_snapshot(later) &&
3295 earlier_txg >= dsl_dataset_phys(later)->ds_creation_txg)
3296 return (B_FALSE);
3297
3298 if (later->ds_dir == earlier->ds_dir)
3299 return (B_TRUE);
3300 if (!dsl_dir_is_clone(later->ds_dir))
3301 return (B_FALSE);
3302
3303 if (dsl_dir_phys(later->ds_dir)->dd_origin_obj == earlier->ds_object)
3304 return (B_TRUE);
3305 dsl_dataset_t *origin;
3306 error = dsl_dataset_hold_obj(dp,
3307 dsl_dir_phys(later->ds_dir)->dd_origin_obj, FTAG, &origin);
3308 if (error != 0)
3309 return (B_FALSE);
3310 ret = dsl_dataset_is_before(origin, earlier, earlier_txg);
3311 dsl_dataset_rele(origin, FTAG);
3312 return (ret);
3313 }
3314
|
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
23 * Copyright (c) 2011, 2014 by Delphix. All rights reserved.
24 * Copyright (c) 2014, Joyent, Inc. All rights reserved.
25 * Copyright (c) 2014 RackTop Systems.
26 * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
27 */
28
29 #include <sys/dmu_objset.h>
30 #include <sys/dsl_dataset.h>
31 #include <sys/dsl_dir.h>
32 #include <sys/dsl_prop.h>
33 #include <sys/dsl_synctask.h>
34 #include <sys/dmu_traverse.h>
35 #include <sys/dmu_impl.h>
36 #include <sys/dmu_tx.h>
37 #include <sys/arc.h>
38 #include <sys/zio.h>
39 #include <sys/zap.h>
40 #include <sys/zfeature.h>
41 #include <sys/unique.h>
42 #include <sys/zfs_context.h>
43 #include <sys/zfs_ioctl.h>
44 #include <sys/spa.h>
45 #include <sys/zfs_znode.h>
46 #include <sys/zfs_onexit.h>
55 * The SPA supports block sizes up to 16MB. However, very large blocks
56 * can have an impact on i/o latency (e.g. tying up a spinning disk for
57 * ~300ms), and also potentially on the memory allocator. Therefore,
58 * we do not allow the recordsize to be set larger than zfs_max_recordsize
59 * (default 1MB). Larger blocks can be created by changing this tunable,
60 * and pools with larger blocks can always be imported and used, regardless
61 * of this setting.
62 */
63 int zfs_max_recordsize = 1 * 1024 * 1024;
64
65 #define SWITCH64(x, y) \
66 { \
67 uint64_t __tmp = (x); \
68 (x) = (y); \
69 (y) = __tmp; \
70 }
71
72 #define DS_REF_MAX (1ULL << 62)
73
74 extern inline dsl_dataset_phys_t *dsl_dataset_phys(dsl_dataset_t *ds);
75
76 /*
77 * Figure out how much of this delta should be propogated to the dsl_dir
78 * layer. If there's a refreservation, that space has already been
79 * partially accounted for in our ancestors.
80 */
81 static int64_t
82 parent_delta(dsl_dataset_t *ds, int64_t delta)
83 {
84 dsl_dataset_phys_t *ds_phys;
85 uint64_t old_bytes, new_bytes;
86
87 if (ds->ds_reserved == 0)
88 return (delta);
89
90 ds_phys = dsl_dataset_phys(ds);
91 old_bytes = MAX(ds_phys->ds_unique_bytes, ds->ds_reserved);
92 new_bytes = MAX(ds_phys->ds_unique_bytes + delta, ds->ds_reserved);
93
94 ASSERT3U(ABS((int64_t)(new_bytes - old_bytes)), <=, ABS(delta));
138 boolean_t async)
139 {
140 int used = bp_get_dsize_sync(tx->tx_pool->dp_spa, bp);
141 int compressed = BP_GET_PSIZE(bp);
142 int uncompressed = BP_GET_UCSIZE(bp);
143
144 if (BP_IS_HOLE(bp))
145 return (0);
146
147 ASSERT(dmu_tx_is_syncing(tx));
148 ASSERT(bp->blk_birth <= tx->tx_txg);
149
150 if (ds == NULL) {
151 dsl_free(tx->tx_pool, tx->tx_txg, bp);
152 dsl_pool_mos_diduse_space(tx->tx_pool,
153 -used, -compressed, -uncompressed);
154 return (used);
155 }
156 ASSERT3P(tx->tx_pool, ==, ds->ds_dir->dd_pool);
157
158 ASSERT(!ds->ds_is_snapshot);
159 dmu_buf_will_dirty(ds->ds_dbuf, tx);
160
161 if (bp->blk_birth > dsl_dataset_phys(ds)->ds_prev_snap_txg) {
162 int64_t delta;
163
164 dprintf_bp(bp, "freeing ds=%llu", ds->ds_object);
165 dsl_free(tx->tx_pool, tx->tx_txg, bp);
166
167 mutex_enter(&ds->ds_lock);
168 ASSERT(dsl_dataset_phys(ds)->ds_unique_bytes >= used ||
169 !DS_UNIQUE_IS_ACCURATE(ds));
170 delta = parent_delta(ds, -used);
171 dsl_dataset_phys(ds)->ds_unique_bytes -= used;
172 mutex_exit(&ds->ds_lock);
173 dsl_dir_diduse_space(ds->ds_dir, DD_USED_HEAD,
174 delta, -compressed, -uncompressed, tx);
175 dsl_dir_transfer_space(ds->ds_dir, -used - delta,
176 DD_USED_REFRSRV, DD_USED_HEAD, tx);
177 } else {
178 dprintf_bp(bp, "putting on dead list: %s", "");
236 */
237 if (ds->ds_trysnap_txg >
238 spa_last_synced_txg(ds->ds_dir->dd_pool->dp_spa))
239 trysnap = ds->ds_trysnap_txg;
240 return (MAX(dsl_dataset_phys(ds)->ds_prev_snap_txg, trysnap));
241 }
242
243 boolean_t
244 dsl_dataset_block_freeable(dsl_dataset_t *ds, const blkptr_t *bp,
245 uint64_t blk_birth)
246 {
247 if (blk_birth <= dsl_dataset_prev_snap_txg(ds) ||
248 (bp != NULL && BP_IS_HOLE(bp)))
249 return (B_FALSE);
250
251 ddt_prefetch(dsl_dataset_get_spa(ds), bp);
252
253 return (B_TRUE);
254 }
255
256 static void
257 dsl_dataset_evict(void *dbu)
258 {
259 dsl_dataset_t *ds = dbu;
260
261 ASSERT(ds->ds_owner == NULL);
262
263 ds->ds_dbuf = NULL;
264
265 unique_remove(ds->ds_fsid_guid);
266
267 if (ds->ds_objset != NULL)
268 dmu_objset_evict(ds->ds_objset);
269
270 if (ds->ds_prev) {
271 dsl_dataset_rele(ds->ds_prev, ds);
272 ds->ds_prev = NULL;
273 }
274
275 bplist_destroy(&ds->ds_pending_deadlist);
276 if (ds->ds_deadlist.dl_os != NULL)
277 dsl_deadlist_close(&ds->ds_deadlist);
278 if (ds->ds_dir)
279 dsl_dir_async_rele(ds->ds_dir, ds);
280
281 ASSERT(!list_link_active(&ds->ds_synced_link));
282
283 mutex_destroy(&ds->ds_lock);
284 mutex_destroy(&ds->ds_opening_lock);
285 mutex_destroy(&ds->ds_sendstream_lock);
286 refcount_destroy(&ds->ds_longholds);
287
288 kmem_free(ds, sizeof (dsl_dataset_t));
289 }
290
291 int
292 dsl_dataset_get_snapname(dsl_dataset_t *ds)
293 {
294 dsl_dataset_phys_t *headphys;
295 int err;
296 dmu_buf_t *headdbuf;
297 dsl_pool_t *dp = ds->ds_dir->dd_pool;
298 objset_t *mos = dp->dp_meta_objset;
299
373 ASSERT(dsl_pool_config_held(dp));
374
375 err = dmu_bonus_hold(mos, dsobj, tag, &dbuf);
376 if (err != 0)
377 return (err);
378
379 /* Make sure dsobj has the correct object type. */
380 dmu_object_info_from_db(dbuf, &doi);
381 if (doi.doi_bonus_type != DMU_OT_DSL_DATASET) {
382 dmu_buf_rele(dbuf, tag);
383 return (SET_ERROR(EINVAL));
384 }
385
386 ds = dmu_buf_get_user(dbuf);
387 if (ds == NULL) {
388 dsl_dataset_t *winner = NULL;
389
390 ds = kmem_zalloc(sizeof (dsl_dataset_t), KM_SLEEP);
391 ds->ds_dbuf = dbuf;
392 ds->ds_object = dsobj;
393 ds->ds_is_snapshot = dsl_dataset_phys(ds)->ds_num_children != 0;
394
395 mutex_init(&ds->ds_lock, NULL, MUTEX_DEFAULT, NULL);
396 mutex_init(&ds->ds_opening_lock, NULL, MUTEX_DEFAULT, NULL);
397 mutex_init(&ds->ds_sendstream_lock, NULL, MUTEX_DEFAULT, NULL);
398 refcount_create(&ds->ds_longholds);
399
400 bplist_create(&ds->ds_pending_deadlist);
401 dsl_deadlist_open(&ds->ds_deadlist,
402 mos, dsl_dataset_phys(ds)->ds_deadlist_obj);
403
404 list_create(&ds->ds_sendstreams, sizeof (dmu_sendarg_t),
405 offsetof(dmu_sendarg_t, dsa_link));
406
407 if (doi.doi_type == DMU_OTN_ZAP_METADATA) {
408 err = zap_contains(mos, dsobj, DS_FIELD_LARGE_BLOCKS);
409 if (err == 0)
410 ds->ds_large_blocks = B_TRUE;
411 else
412 ASSERT3U(err, ==, ENOENT);
413 }
414
415 if (err == 0) {
416 err = dsl_dir_hold_obj(dp,
417 dsl_dataset_phys(ds)->ds_dir_obj, NULL, ds,
418 &ds->ds_dir);
419 }
420 if (err != 0) {
421 mutex_destroy(&ds->ds_lock);
422 mutex_destroy(&ds->ds_opening_lock);
423 mutex_destroy(&ds->ds_sendstream_lock);
424 refcount_destroy(&ds->ds_longholds);
425 bplist_destroy(&ds->ds_pending_deadlist);
426 dsl_deadlist_close(&ds->ds_deadlist);
427 kmem_free(ds, sizeof (dsl_dataset_t));
428 dmu_buf_rele(dbuf, tag);
429 return (err);
430 }
431
432 if (!ds->ds_is_snapshot) {
433 ds->ds_snapname[0] = '\0';
434 if (dsl_dataset_phys(ds)->ds_prev_snap_obj != 0) {
435 err = dsl_dataset_hold_obj(dp,
436 dsl_dataset_phys(ds)->ds_prev_snap_obj,
437 ds, &ds->ds_prev);
438 }
439 if (doi.doi_type == DMU_OTN_ZAP_METADATA) {
440 int zaperr = zap_lookup(mos, ds->ds_object,
441 DS_FIELD_BOOKMARK_NAMES,
442 sizeof (ds->ds_bookmarks), 1,
443 &ds->ds_bookmarks);
444 if (zaperr != ENOENT)
445 VERIFY0(zaperr);
446 }
447 } else {
448 if (zfs_flags & ZFS_DEBUG_SNAPNAMES)
449 err = dsl_dataset_get_snapname(ds);
450 if (err == 0 &&
451 dsl_dataset_phys(ds)->ds_userrefs_obj != 0) {
452 err = zap_count(
453 ds->ds_dir->dd_pool->dp_meta_objset,
454 dsl_dataset_phys(ds)->ds_userrefs_obj,
455 &ds->ds_userrefs);
456 }
457 }
458
459 if (err == 0 && !ds->ds_is_snapshot) {
460 err = dsl_prop_get_int_ds(ds,
461 zfs_prop_to_name(ZFS_PROP_REFRESERVATION),
462 &ds->ds_reserved);
463 if (err == 0) {
464 err = dsl_prop_get_int_ds(ds,
465 zfs_prop_to_name(ZFS_PROP_REFQUOTA),
466 &ds->ds_quota);
467 }
468 } else {
469 ds->ds_reserved = ds->ds_quota = 0;
470 }
471
472 dmu_buf_init_user(&ds->ds_dbu, dsl_dataset_evict, &ds->ds_dbuf);
473 if (err == 0)
474 winner = dmu_buf_set_user_ie(dbuf, &ds->ds_dbu);
475
476 if (err != 0 || winner != NULL) {
477 bplist_destroy(&ds->ds_pending_deadlist);
478 dsl_deadlist_close(&ds->ds_deadlist);
479 if (ds->ds_prev)
480 dsl_dataset_rele(ds->ds_prev, ds);
481 dsl_dir_rele(ds->ds_dir, ds);
482 mutex_destroy(&ds->ds_lock);
483 mutex_destroy(&ds->ds_opening_lock);
484 mutex_destroy(&ds->ds_sendstream_lock);
485 refcount_destroy(&ds->ds_longholds);
486 kmem_free(ds, sizeof (dsl_dataset_t));
487 if (err != 0) {
488 dmu_buf_rele(dbuf, tag);
489 return (err);
490 }
491 ds = winner;
492 } else {
493 ds->ds_fsid_guid =
494 unique_insert(dsl_dataset_phys(ds)->ds_fsid_guid);
495 }
496 }
836 dsl_dataset_rele(ds, FTAG);
837 }
838
839 return (dsobj);
840 }
841
842 /*
843 * The unique space in the head dataset can be calculated by subtracting
844 * the space used in the most recent snapshot, that is still being used
845 * in this file system, from the space currently in use. To figure out
846 * the space in the most recent snapshot still in use, we need to take
847 * the total space used in the snapshot and subtract out the space that
848 * has been freed up since the snapshot was taken.
849 */
850 void
851 dsl_dataset_recalc_head_uniq(dsl_dataset_t *ds)
852 {
853 uint64_t mrs_used;
854 uint64_t dlused, dlcomp, dluncomp;
855
856 ASSERT(!ds->ds_is_snapshot);
857
858 if (dsl_dataset_phys(ds)->ds_prev_snap_obj != 0)
859 mrs_used = dsl_dataset_phys(ds->ds_prev)->ds_referenced_bytes;
860 else
861 mrs_used = 0;
862
863 dsl_deadlist_space(&ds->ds_deadlist, &dlused, &dlcomp, &dluncomp);
864
865 ASSERT3U(dlused, <=, mrs_used);
866 dsl_dataset_phys(ds)->ds_unique_bytes =
867 dsl_dataset_phys(ds)->ds_referenced_bytes - (mrs_used - dlused);
868
869 if (spa_version(ds->ds_dir->dd_pool->dp_spa) >=
870 SPA_VERSION_UNIQUE_ACCURATE)
871 dsl_dataset_phys(ds)->ds_flags |= DS_FLAG_UNIQUE_ACCURATE;
872 }
873
874 void
875 dsl_dataset_remove_from_next_clones(dsl_dataset_t *ds, uint64_t obj,
876 dmu_tx_t *tx)
1579 nvlist_free(val);
1580 nvlist_free(propval);
1581 }
1582
1583 void
1584 dsl_dataset_stats(dsl_dataset_t *ds, nvlist_t *nv)
1585 {
1586 dsl_pool_t *dp = ds->ds_dir->dd_pool;
1587 uint64_t refd, avail, uobjs, aobjs, ratio;
1588
1589 ASSERT(dsl_pool_config_held(dp));
1590
1591 ratio = dsl_dataset_phys(ds)->ds_compressed_bytes == 0 ? 100 :
1592 (dsl_dataset_phys(ds)->ds_uncompressed_bytes * 100 /
1593 dsl_dataset_phys(ds)->ds_compressed_bytes);
1594
1595 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_REFRATIO, ratio);
1596 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_LOGICALREFERENCED,
1597 dsl_dataset_phys(ds)->ds_uncompressed_bytes);
1598
1599 if (ds->ds_is_snapshot) {
1600 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_COMPRESSRATIO, ratio);
1601 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USED,
1602 dsl_dataset_phys(ds)->ds_unique_bytes);
1603 get_clones_stat(ds, nv);
1604 } else {
1605 if (ds->ds_prev != NULL && ds->ds_prev != dp->dp_origin_snap) {
1606 char buf[MAXNAMELEN];
1607 dsl_dataset_name(ds->ds_prev, buf);
1608 dsl_prop_nvlist_add_string(nv, ZFS_PROP_PREV_SNAP, buf);
1609 }
1610
1611 dsl_dir_stats(ds->ds_dir, nv);
1612 }
1613
1614 dsl_dataset_space(ds, &refd, &avail, &uobjs, &aobjs);
1615 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_AVAILABLE, avail);
1616 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_REFERENCED, refd);
1617
1618 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_CREATION,
1619 dsl_dataset_phys(ds)->ds_creation_time);
1647 dsl_dataset_rele(prev, FTAG);
1648 if (err == 0) {
1649 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_WRITTEN,
1650 written);
1651 }
1652 }
1653 }
1654 }
1655
1656 void
1657 dsl_dataset_fast_stat(dsl_dataset_t *ds, dmu_objset_stats_t *stat)
1658 {
1659 dsl_pool_t *dp = ds->ds_dir->dd_pool;
1660 ASSERT(dsl_pool_config_held(dp));
1661
1662 stat->dds_creation_txg = dsl_dataset_phys(ds)->ds_creation_txg;
1663 stat->dds_inconsistent =
1664 dsl_dataset_phys(ds)->ds_flags & DS_FLAG_INCONSISTENT;
1665 stat->dds_guid = dsl_dataset_phys(ds)->ds_guid;
1666 stat->dds_origin[0] = '\0';
1667 if (ds->ds_is_snapshot) {
1668 stat->dds_is_snapshot = B_TRUE;
1669 stat->dds_num_clones =
1670 dsl_dataset_phys(ds)->ds_num_children - 1;
1671 } else {
1672 stat->dds_is_snapshot = B_FALSE;
1673 stat->dds_num_clones = 0;
1674
1675 if (dsl_dir_is_clone(ds->ds_dir)) {
1676 dsl_dataset_t *ods;
1677
1678 VERIFY0(dsl_dataset_hold_obj(dp,
1679 dsl_dir_phys(ds->ds_dir)->dd_origin_obj,
1680 FTAG, &ods));
1681 dsl_dataset_name(ods, stat->dds_origin);
1682 dsl_dataset_rele(ods, FTAG);
1683 }
1684 }
1685 }
1686
1687 uint64_t
1907 typedef struct dsl_dataset_rollback_arg {
1908 const char *ddra_fsname;
1909 void *ddra_owner;
1910 nvlist_t *ddra_result;
1911 } dsl_dataset_rollback_arg_t;
1912
1913 static int
1914 dsl_dataset_rollback_check(void *arg, dmu_tx_t *tx)
1915 {
1916 dsl_dataset_rollback_arg_t *ddra = arg;
1917 dsl_pool_t *dp = dmu_tx_pool(tx);
1918 dsl_dataset_t *ds;
1919 int64_t unused_refres_delta;
1920 int error;
1921
1922 error = dsl_dataset_hold(dp, ddra->ddra_fsname, FTAG, &ds);
1923 if (error != 0)
1924 return (error);
1925
1926 /* must not be a snapshot */
1927 if (ds->ds_is_snapshot) {
1928 dsl_dataset_rele(ds, FTAG);
1929 return (SET_ERROR(EINVAL));
1930 }
1931
1932 /* must have a most recent snapshot */
1933 if (dsl_dataset_phys(ds)->ds_prev_snap_txg < TXG_INITIAL) {
1934 dsl_dataset_rele(ds, FTAG);
1935 return (SET_ERROR(EINVAL));
1936 }
1937
1938 /* must not have any bookmarks after the most recent snapshot */
1939 nvlist_t *proprequest = fnvlist_alloc();
1940 fnvlist_add_boolean(proprequest, zfs_prop_to_name(ZFS_PROP_CREATETXG));
1941 nvlist_t *bookmarks = fnvlist_alloc();
1942 error = dsl_get_bookmarks_impl(ds, proprequest, bookmarks);
1943 fnvlist_free(proprequest);
1944 if (error != 0)
1945 return (error);
1946 for (nvpair_t *pair = nvlist_next_nvpair(bookmarks, NULL);
1947 pair != NULL; pair = nvlist_next_nvpair(bookmarks, pair)) {
2479 list_remove(l, snap);
2480 dsl_dataset_rele(snap->ds, tag);
2481 kmem_free(snap, sizeof (*snap));
2482 }
2483 list_destroy(l);
2484 }
2485
2486 static int
2487 promote_hold(dsl_dataset_promote_arg_t *ddpa, dsl_pool_t *dp, void *tag)
2488 {
2489 int error;
2490 dsl_dir_t *dd;
2491 struct promotenode *snap;
2492
2493 error = dsl_dataset_hold(dp, ddpa->ddpa_clonename, tag,
2494 &ddpa->ddpa_clone);
2495 if (error != 0)
2496 return (error);
2497 dd = ddpa->ddpa_clone->ds_dir;
2498
2499 if (ddpa->ddpa_clone->ds_is_snapshot ||
2500 !dsl_dir_is_clone(dd)) {
2501 dsl_dataset_rele(ddpa->ddpa_clone, tag);
2502 return (SET_ERROR(EINVAL));
2503 }
2504
2505 error = snaplist_make(dp, 0, dsl_dir_phys(dd)->dd_origin_obj,
2506 &ddpa->shared_snaps, tag);
2507 if (error != 0)
2508 goto out;
2509
2510 error = snaplist_make(dp, 0, ddpa->ddpa_clone->ds_object,
2511 &ddpa->clone_snaps, tag);
2512 if (error != 0)
2513 goto out;
2514
2515 snap = list_head(&ddpa->shared_snaps);
2516 ASSERT3U(snap->ds->ds_object, ==, dsl_dir_phys(dd)->dd_origin_obj);
2517 error = snaplist_make(dp, dsl_dir_phys(dd)->dd_origin_obj,
2518 dsl_dir_phys(snap->ds->ds_dir)->dd_head_dataset_obj,
2519 &ddpa->origin_snaps, tag);
2571 dmu_objset_rele(os, FTAG);
2572 if (error != 0)
2573 return (error);
2574
2575 ddpa.ddpa_clonename = name;
2576 ddpa.err_ds = conflsnap;
2577 ddpa.cr = CRED();
2578
2579 return (dsl_sync_task(name, dsl_dataset_promote_check,
2580 dsl_dataset_promote_sync, &ddpa,
2581 2 + numsnaps, ZFS_SPACE_CHECK_RESERVED));
2582 }
2583
2584 int
2585 dsl_dataset_clone_swap_check_impl(dsl_dataset_t *clone,
2586 dsl_dataset_t *origin_head, boolean_t force, void *owner, dmu_tx_t *tx)
2587 {
2588 int64_t unused_refres_delta;
2589
2590 /* they should both be heads */
2591 if (clone->ds_is_snapshot ||
2592 origin_head->ds_is_snapshot)
2593 return (SET_ERROR(EINVAL));
2594
2595 /* if we are not forcing, the branch point should be just before them */
2596 if (!force && clone->ds_prev != origin_head->ds_prev)
2597 return (SET_ERROR(EINVAL));
2598
2599 /* clone should be the clone (unless they are unrelated) */
2600 if (clone->ds_prev != NULL &&
2601 clone->ds_prev != clone->ds_dir->dd_pool->dp_origin_snap &&
2602 origin_head->ds_dir != clone->ds_prev->ds_dir)
2603 return (SET_ERROR(EINVAL));
2604
2605 /* the clone should be a child of the origin */
2606 if (clone->ds_dir->dd_parent != origin_head->ds_dir)
2607 return (SET_ERROR(EINVAL));
2608
2609 /* origin_head shouldn't be modified unless 'force' */
2610 if (!force &&
2611 dsl_dataset_modified_since_snap(origin_head, origin_head->ds_prev))
2612 return (SET_ERROR(ETXTBSY));
2851 } dsl_dataset_set_qr_arg_t;
2852
2853
2854 /* ARGSUSED */
2855 static int
2856 dsl_dataset_set_refquota_check(void *arg, dmu_tx_t *tx)
2857 {
2858 dsl_dataset_set_qr_arg_t *ddsqra = arg;
2859 dsl_pool_t *dp = dmu_tx_pool(tx);
2860 dsl_dataset_t *ds;
2861 int error;
2862 uint64_t newval;
2863
2864 if (spa_version(dp->dp_spa) < SPA_VERSION_REFQUOTA)
2865 return (SET_ERROR(ENOTSUP));
2866
2867 error = dsl_dataset_hold(dp, ddsqra->ddsqra_name, FTAG, &ds);
2868 if (error != 0)
2869 return (error);
2870
2871 if (ds->ds_is_snapshot) {
2872 dsl_dataset_rele(ds, FTAG);
2873 return (SET_ERROR(EINVAL));
2874 }
2875
2876 error = dsl_prop_predict(ds->ds_dir,
2877 zfs_prop_to_name(ZFS_PROP_REFQUOTA),
2878 ddsqra->ddsqra_source, ddsqra->ddsqra_value, &newval);
2879 if (error != 0) {
2880 dsl_dataset_rele(ds, FTAG);
2881 return (error);
2882 }
2883
2884 if (newval == 0) {
2885 dsl_dataset_rele(ds, FTAG);
2886 return (0);
2887 }
2888
2889 if (newval < dsl_dataset_phys(ds)->ds_referenced_bytes ||
2890 newval < ds->ds_reserved) {
2891 dsl_dataset_rele(ds, FTAG);
2934 return (dsl_sync_task(dsname, dsl_dataset_set_refquota_check,
2935 dsl_dataset_set_refquota_sync, &ddsqra, 0, ZFS_SPACE_CHECK_NONE));
2936 }
2937
2938 static int
2939 dsl_dataset_set_refreservation_check(void *arg, dmu_tx_t *tx)
2940 {
2941 dsl_dataset_set_qr_arg_t *ddsqra = arg;
2942 dsl_pool_t *dp = dmu_tx_pool(tx);
2943 dsl_dataset_t *ds;
2944 int error;
2945 uint64_t newval, unique;
2946
2947 if (spa_version(dp->dp_spa) < SPA_VERSION_REFRESERVATION)
2948 return (SET_ERROR(ENOTSUP));
2949
2950 error = dsl_dataset_hold(dp, ddsqra->ddsqra_name, FTAG, &ds);
2951 if (error != 0)
2952 return (error);
2953
2954 if (ds->ds_is_snapshot) {
2955 dsl_dataset_rele(ds, FTAG);
2956 return (SET_ERROR(EINVAL));
2957 }
2958
2959 error = dsl_prop_predict(ds->ds_dir,
2960 zfs_prop_to_name(ZFS_PROP_REFRESERVATION),
2961 ddsqra->ddsqra_source, ddsqra->ddsqra_value, &newval);
2962 if (error != 0) {
2963 dsl_dataset_rele(ds, FTAG);
2964 return (error);
2965 }
2966
2967 /*
2968 * If we are doing the preliminary check in open context, the
2969 * space estimates may be inaccurate.
2970 */
2971 if (!dmu_tx_is_syncing(tx)) {
2972 dsl_dataset_rele(ds, FTAG);
2973 return (0);
2974 }
3148 * blocks that would be freed [---------------------------]
3149 * snapshots ---O-------O--------O-------O--------O
3150 * firstsnap lastsnap
3151 *
3152 * This is the set of blocks that were born after the snap before firstsnap,
3153 * (birth > firstsnap->prev_snap_txg) and died before the snap after the
3154 * last snap (ie, is on lastsnap->ds_next->ds_deadlist or an earlier deadlist).
3155 * We calculate this by iterating over the relevant deadlists (from the snap
3156 * after lastsnap, backward to the snap after firstsnap), summing up the
3157 * space on the deadlist that was born after the snap before firstsnap.
3158 */
3159 int
3160 dsl_dataset_space_wouldfree(dsl_dataset_t *firstsnap,
3161 dsl_dataset_t *lastsnap,
3162 uint64_t *usedp, uint64_t *compp, uint64_t *uncompp)
3163 {
3164 int err = 0;
3165 uint64_t snapobj;
3166 dsl_pool_t *dp = firstsnap->ds_dir->dd_pool;
3167
3168 ASSERT(firstsnap->ds_is_snapshot);
3169 ASSERT(lastsnap->ds_is_snapshot);
3170
3171 /*
3172 * Check that the snapshots are in the same dsl_dir, and firstsnap
3173 * is before lastsnap.
3174 */
3175 if (firstsnap->ds_dir != lastsnap->ds_dir ||
3176 dsl_dataset_phys(firstsnap)->ds_creation_txg >
3177 dsl_dataset_phys(lastsnap)->ds_creation_txg)
3178 return (SET_ERROR(EINVAL));
3179
3180 *usedp = *compp = *uncompp = 0;
3181
3182 snapobj = dsl_dataset_phys(lastsnap)->ds_next_snap_obj;
3183 while (snapobj != firstsnap->ds_object) {
3184 dsl_dataset_t *ds;
3185 uint64_t used, comp, uncomp;
3186
3187 err = dsl_dataset_hold_obj(dp, snapobj, FTAG, &ds);
3188 if (err != 0)
3189 break;
3274 }
3275
3276 /*
3277 * Return TRUE if 'earlier' is an earlier snapshot in 'later's timeline.
3278 * For example, they could both be snapshots of the same filesystem, and
3279 * 'earlier' is before 'later'. Or 'earlier' could be the origin of
3280 * 'later's filesystem. Or 'earlier' could be an older snapshot in the origin's
3281 * filesystem. Or 'earlier' could be the origin's origin.
3282 *
3283 * If non-zero, earlier_txg is used instead of earlier's ds_creation_txg.
3284 */
3285 boolean_t
3286 dsl_dataset_is_before(dsl_dataset_t *later, dsl_dataset_t *earlier,
3287 uint64_t earlier_txg)
3288 {
3289 dsl_pool_t *dp = later->ds_dir->dd_pool;
3290 int error;
3291 boolean_t ret;
3292
3293 ASSERT(dsl_pool_config_held(dp));
3294 ASSERT(earlier->ds_is_snapshot || earlier_txg != 0);
3295
3296 if (earlier_txg == 0)
3297 earlier_txg = dsl_dataset_phys(earlier)->ds_creation_txg;
3298
3299 if (later->ds_is_snapshot &&
3300 earlier_txg >= dsl_dataset_phys(later)->ds_creation_txg)
3301 return (B_FALSE);
3302
3303 if (later->ds_dir == earlier->ds_dir)
3304 return (B_TRUE);
3305 if (!dsl_dir_is_clone(later->ds_dir))
3306 return (B_FALSE);
3307
3308 if (dsl_dir_phys(later->ds_dir)->dd_origin_obj == earlier->ds_object)
3309 return (B_TRUE);
3310 dsl_dataset_t *origin;
3311 error = dsl_dataset_hold_obj(dp,
3312 dsl_dir_phys(later->ds_dir)->dd_origin_obj, FTAG, &origin);
3313 if (error != 0)
3314 return (B_FALSE);
3315 ret = dsl_dataset_is_before(origin, earlier, earlier_txg);
3316 dsl_dataset_rele(origin, FTAG);
3317 return (ret);
3318 }
3319
|