6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  23  * Copyright (c) 2011, 2014 by Delphix. All rights reserved.
  24  * Copyright (c) 2014, Joyent, Inc. All rights reserved.
  25  * Copyright (c) 2014 RackTop Systems.
  26  */
  27 
  28 #include <sys/dmu_objset.h>
  29 #include <sys/dsl_dataset.h>
  30 #include <sys/dsl_dir.h>
  31 #include <sys/dsl_prop.h>
  32 #include <sys/dsl_synctask.h>
  33 #include <sys/dmu_traverse.h>
  34 #include <sys/dmu_impl.h>
  35 #include <sys/dmu_tx.h>
  36 #include <sys/arc.h>
  37 #include <sys/zio.h>
  38 #include <sys/zap.h>
  39 #include <sys/zfeature.h>
  40 #include <sys/unique.h>
  41 #include <sys/zfs_context.h>
  42 #include <sys/zfs_ioctl.h>
  43 #include <sys/spa.h>
  44 #include <sys/zfs_znode.h>
  45 #include <sys/zfs_onexit.h>
 
 
  54  * The SPA supports block sizes up to 16MB.  However, very large blocks
  55  * can have an impact on i/o latency (e.g. tying up a spinning disk for
  56  * ~300ms), and also potentially on the memory allocator.  Therefore,
  57  * we do not allow the recordsize to be set larger than zfs_max_recordsize
  58  * (default 1MB).  Larger blocks can be created by changing this tunable,
  59  * and pools with larger blocks can always be imported and used, regardless
  60  * of this setting.
  61  */
  62 int zfs_max_recordsize = 1 * 1024 * 1024;
  63 
  64 #define SWITCH64(x, y) \
  65         { \
  66                 uint64_t __tmp = (x); \
  67                 (x) = (y); \
  68                 (y) = __tmp; \
  69         }
  70 
  71 #define DS_REF_MAX      (1ULL << 62)
  72 
  73 extern inline dsl_dataset_phys_t *dsl_dataset_phys(dsl_dataset_t *ds);
  74 extern inline boolean_t dsl_dataset_is_snapshot(dsl_dataset_t *ds);
  75 
  76 /*
  77  * Figure out how much of this delta should be propogated to the dsl_dir
  78  * layer.  If there's a refreservation, that space has already been
  79  * partially accounted for in our ancestors.
  80  */
  81 static int64_t
  82 parent_delta(dsl_dataset_t *ds, int64_t delta)
  83 {
  84         dsl_dataset_phys_t *ds_phys;
  85         uint64_t old_bytes, new_bytes;
  86 
  87         if (ds->ds_reserved == 0)
  88                 return (delta);
  89 
  90         ds_phys = dsl_dataset_phys(ds);
  91         old_bytes = MAX(ds_phys->ds_unique_bytes, ds->ds_reserved);
  92         new_bytes = MAX(ds_phys->ds_unique_bytes + delta, ds->ds_reserved);
  93 
  94         ASSERT3U(ABS((int64_t)(new_bytes - old_bytes)), <=, ABS(delta));
 
 138     boolean_t async)
 139 {
 140         int used = bp_get_dsize_sync(tx->tx_pool->dp_spa, bp);
 141         int compressed = BP_GET_PSIZE(bp);
 142         int uncompressed = BP_GET_UCSIZE(bp);
 143 
 144         if (BP_IS_HOLE(bp))
 145                 return (0);
 146 
 147         ASSERT(dmu_tx_is_syncing(tx));
 148         ASSERT(bp->blk_birth <= tx->tx_txg);
 149 
 150         if (ds == NULL) {
 151                 dsl_free(tx->tx_pool, tx->tx_txg, bp);
 152                 dsl_pool_mos_diduse_space(tx->tx_pool,
 153                     -used, -compressed, -uncompressed);
 154                 return (used);
 155         }
 156         ASSERT3P(tx->tx_pool, ==, ds->ds_dir->dd_pool);
 157 
 158         ASSERT(!dsl_dataset_is_snapshot(ds));
 159         dmu_buf_will_dirty(ds->ds_dbuf, tx);
 160 
 161         if (bp->blk_birth > dsl_dataset_phys(ds)->ds_prev_snap_txg) {
 162                 int64_t delta;
 163 
 164                 dprintf_bp(bp, "freeing ds=%llu", ds->ds_object);
 165                 dsl_free(tx->tx_pool, tx->tx_txg, bp);
 166 
 167                 mutex_enter(&ds->ds_lock);
 168                 ASSERT(dsl_dataset_phys(ds)->ds_unique_bytes >= used ||
 169                     !DS_UNIQUE_IS_ACCURATE(ds));
 170                 delta = parent_delta(ds, -used);
 171                 dsl_dataset_phys(ds)->ds_unique_bytes -= used;
 172                 mutex_exit(&ds->ds_lock);
 173                 dsl_dir_diduse_space(ds->ds_dir, DD_USED_HEAD,
 174                     delta, -compressed, -uncompressed, tx);
 175                 dsl_dir_transfer_space(ds->ds_dir, -used - delta,
 176                     DD_USED_REFRSRV, DD_USED_HEAD, tx);
 177         } else {
 178                 dprintf_bp(bp, "putting on dead list: %s", "");
 
 236          */
 237         if (ds->ds_trysnap_txg >
 238             spa_last_synced_txg(ds->ds_dir->dd_pool->dp_spa))
 239                 trysnap = ds->ds_trysnap_txg;
 240         return (MAX(dsl_dataset_phys(ds)->ds_prev_snap_txg, trysnap));
 241 }
 242 
 243 boolean_t
 244 dsl_dataset_block_freeable(dsl_dataset_t *ds, const blkptr_t *bp,
 245     uint64_t blk_birth)
 246 {
 247         if (blk_birth <= dsl_dataset_prev_snap_txg(ds) ||
 248             (bp != NULL && BP_IS_HOLE(bp)))
 249                 return (B_FALSE);
 250 
 251         ddt_prefetch(dsl_dataset_get_spa(ds), bp);
 252 
 253         return (B_TRUE);
 254 }
 255 
 256 /* ARGSUSED */
 257 static void
 258 dsl_dataset_evict(dmu_buf_t *db, void *dsv)
 259 {
 260         dsl_dataset_t *ds = dsv;
 261 
 262         ASSERT(ds->ds_owner == NULL);
 263 
 264         unique_remove(ds->ds_fsid_guid);
 265 
 266         if (ds->ds_objset != NULL)
 267                 dmu_objset_evict(ds->ds_objset);
 268 
 269         if (ds->ds_prev) {
 270                 dsl_dataset_rele(ds->ds_prev, ds);
 271                 ds->ds_prev = NULL;
 272         }
 273 
 274         bplist_destroy(&ds->ds_pending_deadlist);
 275         if (dsl_dataset_phys(ds)->ds_deadlist_obj != 0)
 276                 dsl_deadlist_close(&ds->ds_deadlist);
 277         if (ds->ds_dir)
 278                 dsl_dir_rele(ds->ds_dir, ds);
 279 
 280         ASSERT(!list_link_active(&ds->ds_synced_link));
 281 
 282         mutex_destroy(&ds->ds_lock);
 283         mutex_destroy(&ds->ds_opening_lock);
 284         mutex_destroy(&ds->ds_sendstream_lock);
 285         refcount_destroy(&ds->ds_longholds);
 286 
 287         kmem_free(ds, sizeof (dsl_dataset_t));
 288 }
 289 
 290 int
 291 dsl_dataset_get_snapname(dsl_dataset_t *ds)
 292 {
 293         dsl_dataset_phys_t *headphys;
 294         int err;
 295         dmu_buf_t *headdbuf;
 296         dsl_pool_t *dp = ds->ds_dir->dd_pool;
 297         objset_t *mos = dp->dp_meta_objset;
 298 
 
 372         ASSERT(dsl_pool_config_held(dp));
 373 
 374         err = dmu_bonus_hold(mos, dsobj, tag, &dbuf);
 375         if (err != 0)
 376                 return (err);
 377 
 378         /* Make sure dsobj has the correct object type. */
 379         dmu_object_info_from_db(dbuf, &doi);
 380         if (doi.doi_bonus_type != DMU_OT_DSL_DATASET) {
 381                 dmu_buf_rele(dbuf, tag);
 382                 return (SET_ERROR(EINVAL));
 383         }
 384 
 385         ds = dmu_buf_get_user(dbuf);
 386         if (ds == NULL) {
 387                 dsl_dataset_t *winner = NULL;
 388 
 389                 ds = kmem_zalloc(sizeof (dsl_dataset_t), KM_SLEEP);
 390                 ds->ds_dbuf = dbuf;
 391                 ds->ds_object = dsobj;
 392 
 393                 mutex_init(&ds->ds_lock, NULL, MUTEX_DEFAULT, NULL);
 394                 mutex_init(&ds->ds_opening_lock, NULL, MUTEX_DEFAULT, NULL);
 395                 mutex_init(&ds->ds_sendstream_lock, NULL, MUTEX_DEFAULT, NULL);
 396                 refcount_create(&ds->ds_longholds);
 397 
 398                 bplist_create(&ds->ds_pending_deadlist);
 399                 dsl_deadlist_open(&ds->ds_deadlist,
 400                     mos, dsl_dataset_phys(ds)->ds_deadlist_obj);
 401 
 402                 list_create(&ds->ds_sendstreams, sizeof (dmu_sendarg_t),
 403                     offsetof(dmu_sendarg_t, dsa_link));
 404 
 405                 if (doi.doi_type == DMU_OTN_ZAP_METADATA) {
 406                         err = zap_contains(mos, dsobj, DS_FIELD_LARGE_BLOCKS);
 407                         if (err == 0)
 408                                 ds->ds_large_blocks = B_TRUE;
 409                         else
 410                                 ASSERT3U(err, ==, ENOENT);
 411                 }
 412 
 413                 if (err == 0) {
 414                         err = dsl_dir_hold_obj(dp,
 415                             dsl_dataset_phys(ds)->ds_dir_obj, NULL, ds,
 416                             &ds->ds_dir);
 417                 }
 418                 if (err != 0) {
 419                         mutex_destroy(&ds->ds_lock);
 420                         mutex_destroy(&ds->ds_opening_lock);
 421                         mutex_destroy(&ds->ds_sendstream_lock);
 422                         refcount_destroy(&ds->ds_longholds);
 423                         bplist_destroy(&ds->ds_pending_deadlist);
 424                         dsl_deadlist_close(&ds->ds_deadlist);
 425                         kmem_free(ds, sizeof (dsl_dataset_t));
 426                         dmu_buf_rele(dbuf, tag);
 427                         return (err);
 428                 }
 429 
 430                 if (!dsl_dataset_is_snapshot(ds)) {
 431                         ds->ds_snapname[0] = '\0';
 432                         if (dsl_dataset_phys(ds)->ds_prev_snap_obj != 0) {
 433                                 err = dsl_dataset_hold_obj(dp,
 434                                     dsl_dataset_phys(ds)->ds_prev_snap_obj,
 435                                     ds, &ds->ds_prev);
 436                         }
 437                         if (doi.doi_type == DMU_OTN_ZAP_METADATA) {
 438                                 int zaperr = zap_lookup(mos, ds->ds_object,
 439                                     DS_FIELD_BOOKMARK_NAMES,
 440                                     sizeof (ds->ds_bookmarks), 1,
 441                                     &ds->ds_bookmarks);
 442                                 if (zaperr != ENOENT)
 443                                         VERIFY0(zaperr);
 444                         }
 445                 } else {
 446                         if (zfs_flags & ZFS_DEBUG_SNAPNAMES)
 447                                 err = dsl_dataset_get_snapname(ds);
 448                         if (err == 0 &&
 449                             dsl_dataset_phys(ds)->ds_userrefs_obj != 0) {
 450                                 err = zap_count(
 451                                     ds->ds_dir->dd_pool->dp_meta_objset,
 452                                     dsl_dataset_phys(ds)->ds_userrefs_obj,
 453                                     &ds->ds_userrefs);
 454                         }
 455                 }
 456 
 457                 if (err == 0 && !dsl_dataset_is_snapshot(ds)) {
 458                         err = dsl_prop_get_int_ds(ds,
 459                             zfs_prop_to_name(ZFS_PROP_REFRESERVATION),
 460                             &ds->ds_reserved);
 461                         if (err == 0) {
 462                                 err = dsl_prop_get_int_ds(ds,
 463                                     zfs_prop_to_name(ZFS_PROP_REFQUOTA),
 464                                     &ds->ds_quota);
 465                         }
 466                 } else {
 467                         ds->ds_reserved = ds->ds_quota = 0;
 468                 }
 469 
 470                 if (err != 0 || (winner = dmu_buf_set_user_ie(dbuf, ds,
 471                     dsl_dataset_evict)) != NULL) {
 472                         bplist_destroy(&ds->ds_pending_deadlist);
 473                         dsl_deadlist_close(&ds->ds_deadlist);
 474                         if (ds->ds_prev)
 475                                 dsl_dataset_rele(ds->ds_prev, ds);
 476                         dsl_dir_rele(ds->ds_dir, ds);
 477                         mutex_destroy(&ds->ds_lock);
 478                         mutex_destroy(&ds->ds_opening_lock);
 479                         mutex_destroy(&ds->ds_sendstream_lock);
 480                         refcount_destroy(&ds->ds_longholds);
 481                         kmem_free(ds, sizeof (dsl_dataset_t));
 482                         if (err != 0) {
 483                                 dmu_buf_rele(dbuf, tag);
 484                                 return (err);
 485                         }
 486                         ds = winner;
 487                 } else {
 488                         ds->ds_fsid_guid =
 489                             unique_insert(dsl_dataset_phys(ds)->ds_fsid_guid);
 490                 }
 491         }
 
 831                 dsl_dataset_rele(ds, FTAG);
 832         }
 833 
 834         return (dsobj);
 835 }
 836 
 837 /*
 838  * The unique space in the head dataset can be calculated by subtracting
 839  * the space used in the most recent snapshot, that is still being used
 840  * in this file system, from the space currently in use.  To figure out
 841  * the space in the most recent snapshot still in use, we need to take
 842  * the total space used in the snapshot and subtract out the space that
 843  * has been freed up since the snapshot was taken.
 844  */
 845 void
 846 dsl_dataset_recalc_head_uniq(dsl_dataset_t *ds)
 847 {
 848         uint64_t mrs_used;
 849         uint64_t dlused, dlcomp, dluncomp;
 850 
 851         ASSERT(!dsl_dataset_is_snapshot(ds));
 852 
 853         if (dsl_dataset_phys(ds)->ds_prev_snap_obj != 0)
 854                 mrs_used = dsl_dataset_phys(ds->ds_prev)->ds_referenced_bytes;
 855         else
 856                 mrs_used = 0;
 857 
 858         dsl_deadlist_space(&ds->ds_deadlist, &dlused, &dlcomp, &dluncomp);
 859 
 860         ASSERT3U(dlused, <=, mrs_used);
 861         dsl_dataset_phys(ds)->ds_unique_bytes =
 862             dsl_dataset_phys(ds)->ds_referenced_bytes - (mrs_used - dlused);
 863 
 864         if (spa_version(ds->ds_dir->dd_pool->dp_spa) >=
 865             SPA_VERSION_UNIQUE_ACCURATE)
 866                 dsl_dataset_phys(ds)->ds_flags |= DS_FLAG_UNIQUE_ACCURATE;
 867 }
 868 
 869 void
 870 dsl_dataset_remove_from_next_clones(dsl_dataset_t *ds, uint64_t obj,
 871     dmu_tx_t *tx)
 
1574         nvlist_free(val);
1575         nvlist_free(propval);
1576 }
1577 
1578 void
1579 dsl_dataset_stats(dsl_dataset_t *ds, nvlist_t *nv)
1580 {
1581         dsl_pool_t *dp = ds->ds_dir->dd_pool;
1582         uint64_t refd, avail, uobjs, aobjs, ratio;
1583 
1584         ASSERT(dsl_pool_config_held(dp));
1585 
1586         ratio = dsl_dataset_phys(ds)->ds_compressed_bytes == 0 ? 100 :
1587             (dsl_dataset_phys(ds)->ds_uncompressed_bytes * 100 /
1588             dsl_dataset_phys(ds)->ds_compressed_bytes);
1589 
1590         dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_REFRATIO, ratio);
1591         dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_LOGICALREFERENCED,
1592             dsl_dataset_phys(ds)->ds_uncompressed_bytes);
1593 
1594         if (dsl_dataset_is_snapshot(ds)) {
1595                 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_COMPRESSRATIO, ratio);
1596                 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USED,
1597                     dsl_dataset_phys(ds)->ds_unique_bytes);
1598                 get_clones_stat(ds, nv);
1599         } else {
1600                 if (ds->ds_prev != NULL && ds->ds_prev != dp->dp_origin_snap) {
1601                         char buf[MAXNAMELEN];
1602                         dsl_dataset_name(ds->ds_prev, buf);
1603                         dsl_prop_nvlist_add_string(nv, ZFS_PROP_PREV_SNAP, buf);
1604                 }
1605 
1606                 dsl_dir_stats(ds->ds_dir, nv);
1607         }
1608 
1609         dsl_dataset_space(ds, &refd, &avail, &uobjs, &aobjs);
1610         dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_AVAILABLE, avail);
1611         dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_REFERENCED, refd);
1612 
1613         dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_CREATION,
1614             dsl_dataset_phys(ds)->ds_creation_time);
 
1642                         dsl_dataset_rele(prev, FTAG);
1643                         if (err == 0) {
1644                                 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_WRITTEN,
1645                                     written);
1646                         }
1647                 }
1648         }
1649 }
1650 
1651 void
1652 dsl_dataset_fast_stat(dsl_dataset_t *ds, dmu_objset_stats_t *stat)
1653 {
1654         dsl_pool_t *dp = ds->ds_dir->dd_pool;
1655         ASSERT(dsl_pool_config_held(dp));
1656 
1657         stat->dds_creation_txg = dsl_dataset_phys(ds)->ds_creation_txg;
1658         stat->dds_inconsistent =
1659             dsl_dataset_phys(ds)->ds_flags & DS_FLAG_INCONSISTENT;
1660         stat->dds_guid = dsl_dataset_phys(ds)->ds_guid;
1661         stat->dds_origin[0] = '\0';
1662         if (dsl_dataset_is_snapshot(ds)) {
1663                 stat->dds_is_snapshot = B_TRUE;
1664                 stat->dds_num_clones =
1665                     dsl_dataset_phys(ds)->ds_num_children - 1;
1666         } else {
1667                 stat->dds_is_snapshot = B_FALSE;
1668                 stat->dds_num_clones = 0;
1669 
1670                 if (dsl_dir_is_clone(ds->ds_dir)) {
1671                         dsl_dataset_t *ods;
1672 
1673                         VERIFY0(dsl_dataset_hold_obj(dp,
1674                             dsl_dir_phys(ds->ds_dir)->dd_origin_obj,
1675                             FTAG, &ods));
1676                         dsl_dataset_name(ods, stat->dds_origin);
1677                         dsl_dataset_rele(ods, FTAG);
1678                 }
1679         }
1680 }
1681 
1682 uint64_t
 
1902 typedef struct dsl_dataset_rollback_arg {
1903         const char *ddra_fsname;
1904         void *ddra_owner;
1905         nvlist_t *ddra_result;
1906 } dsl_dataset_rollback_arg_t;
1907 
1908 static int
1909 dsl_dataset_rollback_check(void *arg, dmu_tx_t *tx)
1910 {
1911         dsl_dataset_rollback_arg_t *ddra = arg;
1912         dsl_pool_t *dp = dmu_tx_pool(tx);
1913         dsl_dataset_t *ds;
1914         int64_t unused_refres_delta;
1915         int error;
1916 
1917         error = dsl_dataset_hold(dp, ddra->ddra_fsname, FTAG, &ds);
1918         if (error != 0)
1919                 return (error);
1920 
1921         /* must not be a snapshot */
1922         if (dsl_dataset_is_snapshot(ds)) {
1923                 dsl_dataset_rele(ds, FTAG);
1924                 return (SET_ERROR(EINVAL));
1925         }
1926 
1927         /* must have a most recent snapshot */
1928         if (dsl_dataset_phys(ds)->ds_prev_snap_txg < TXG_INITIAL) {
1929                 dsl_dataset_rele(ds, FTAG);
1930                 return (SET_ERROR(EINVAL));
1931         }
1932 
1933         /* must not have any bookmarks after the most recent snapshot */
1934         nvlist_t *proprequest = fnvlist_alloc();
1935         fnvlist_add_boolean(proprequest, zfs_prop_to_name(ZFS_PROP_CREATETXG));
1936         nvlist_t *bookmarks = fnvlist_alloc();
1937         error = dsl_get_bookmarks_impl(ds, proprequest, bookmarks);
1938         fnvlist_free(proprequest);
1939         if (error != 0)
1940                 return (error);
1941         for (nvpair_t *pair = nvlist_next_nvpair(bookmarks, NULL);
1942             pair != NULL; pair = nvlist_next_nvpair(bookmarks, pair)) {
 
2474                 list_remove(l, snap);
2475                 dsl_dataset_rele(snap->ds, tag);
2476                 kmem_free(snap, sizeof (*snap));
2477         }
2478         list_destroy(l);
2479 }
2480 
2481 static int
2482 promote_hold(dsl_dataset_promote_arg_t *ddpa, dsl_pool_t *dp, void *tag)
2483 {
2484         int error;
2485         dsl_dir_t *dd;
2486         struct promotenode *snap;
2487 
2488         error = dsl_dataset_hold(dp, ddpa->ddpa_clonename, tag,
2489             &ddpa->ddpa_clone);
2490         if (error != 0)
2491                 return (error);
2492         dd = ddpa->ddpa_clone->ds_dir;
2493 
2494         if (dsl_dataset_is_snapshot(ddpa->ddpa_clone) ||
2495             !dsl_dir_is_clone(dd)) {
2496                 dsl_dataset_rele(ddpa->ddpa_clone, tag);
2497                 return (SET_ERROR(EINVAL));
2498         }
2499 
2500         error = snaplist_make(dp, 0, dsl_dir_phys(dd)->dd_origin_obj,
2501             &ddpa->shared_snaps, tag);
2502         if (error != 0)
2503                 goto out;
2504 
2505         error = snaplist_make(dp, 0, ddpa->ddpa_clone->ds_object,
2506             &ddpa->clone_snaps, tag);
2507         if (error != 0)
2508                 goto out;
2509 
2510         snap = list_head(&ddpa->shared_snaps);
2511         ASSERT3U(snap->ds->ds_object, ==, dsl_dir_phys(dd)->dd_origin_obj);
2512         error = snaplist_make(dp, dsl_dir_phys(dd)->dd_origin_obj,
2513             dsl_dir_phys(snap->ds->ds_dir)->dd_head_dataset_obj,
2514             &ddpa->origin_snaps, tag);
 
2566         dmu_objset_rele(os, FTAG);
2567         if (error != 0)
2568                 return (error);
2569 
2570         ddpa.ddpa_clonename = name;
2571         ddpa.err_ds = conflsnap;
2572         ddpa.cr = CRED();
2573 
2574         return (dsl_sync_task(name, dsl_dataset_promote_check,
2575             dsl_dataset_promote_sync, &ddpa,
2576             2 + numsnaps, ZFS_SPACE_CHECK_RESERVED));
2577 }
2578 
2579 int
2580 dsl_dataset_clone_swap_check_impl(dsl_dataset_t *clone,
2581     dsl_dataset_t *origin_head, boolean_t force, void *owner, dmu_tx_t *tx)
2582 {
2583         int64_t unused_refres_delta;
2584 
2585         /* they should both be heads */
2586         if (dsl_dataset_is_snapshot(clone) ||
2587             dsl_dataset_is_snapshot(origin_head))
2588                 return (SET_ERROR(EINVAL));
2589 
2590         /* if we are not forcing, the branch point should be just before them */
2591         if (!force && clone->ds_prev != origin_head->ds_prev)
2592                 return (SET_ERROR(EINVAL));
2593 
2594         /* clone should be the clone (unless they are unrelated) */
2595         if (clone->ds_prev != NULL &&
2596             clone->ds_prev != clone->ds_dir->dd_pool->dp_origin_snap &&
2597             origin_head->ds_dir != clone->ds_prev->ds_dir)
2598                 return (SET_ERROR(EINVAL));
2599 
2600         /* the clone should be a child of the origin */
2601         if (clone->ds_dir->dd_parent != origin_head->ds_dir)
2602                 return (SET_ERROR(EINVAL));
2603 
2604         /* origin_head shouldn't be modified unless 'force' */
2605         if (!force &&
2606             dsl_dataset_modified_since_snap(origin_head, origin_head->ds_prev))
2607                 return (SET_ERROR(ETXTBSY));
 
2846 } dsl_dataset_set_qr_arg_t;
2847 
2848 
2849 /* ARGSUSED */
2850 static int
2851 dsl_dataset_set_refquota_check(void *arg, dmu_tx_t *tx)
2852 {
2853         dsl_dataset_set_qr_arg_t *ddsqra = arg;
2854         dsl_pool_t *dp = dmu_tx_pool(tx);
2855         dsl_dataset_t *ds;
2856         int error;
2857         uint64_t newval;
2858 
2859         if (spa_version(dp->dp_spa) < SPA_VERSION_REFQUOTA)
2860                 return (SET_ERROR(ENOTSUP));
2861 
2862         error = dsl_dataset_hold(dp, ddsqra->ddsqra_name, FTAG, &ds);
2863         if (error != 0)
2864                 return (error);
2865 
2866         if (dsl_dataset_is_snapshot(ds)) {
2867                 dsl_dataset_rele(ds, FTAG);
2868                 return (SET_ERROR(EINVAL));
2869         }
2870 
2871         error = dsl_prop_predict(ds->ds_dir,
2872             zfs_prop_to_name(ZFS_PROP_REFQUOTA),
2873             ddsqra->ddsqra_source, ddsqra->ddsqra_value, &newval);
2874         if (error != 0) {
2875                 dsl_dataset_rele(ds, FTAG);
2876                 return (error);
2877         }
2878 
2879         if (newval == 0) {
2880                 dsl_dataset_rele(ds, FTAG);
2881                 return (0);
2882         }
2883 
2884         if (newval < dsl_dataset_phys(ds)->ds_referenced_bytes ||
2885             newval < ds->ds_reserved) {
2886                 dsl_dataset_rele(ds, FTAG);
 
2929         return (dsl_sync_task(dsname, dsl_dataset_set_refquota_check,
2930             dsl_dataset_set_refquota_sync, &ddsqra, 0, ZFS_SPACE_CHECK_NONE));
2931 }
2932 
2933 static int
2934 dsl_dataset_set_refreservation_check(void *arg, dmu_tx_t *tx)
2935 {
2936         dsl_dataset_set_qr_arg_t *ddsqra = arg;
2937         dsl_pool_t *dp = dmu_tx_pool(tx);
2938         dsl_dataset_t *ds;
2939         int error;
2940         uint64_t newval, unique;
2941 
2942         if (spa_version(dp->dp_spa) < SPA_VERSION_REFRESERVATION)
2943                 return (SET_ERROR(ENOTSUP));
2944 
2945         error = dsl_dataset_hold(dp, ddsqra->ddsqra_name, FTAG, &ds);
2946         if (error != 0)
2947                 return (error);
2948 
2949         if (dsl_dataset_is_snapshot(ds)) {
2950                 dsl_dataset_rele(ds, FTAG);
2951                 return (SET_ERROR(EINVAL));
2952         }
2953 
2954         error = dsl_prop_predict(ds->ds_dir,
2955             zfs_prop_to_name(ZFS_PROP_REFRESERVATION),
2956             ddsqra->ddsqra_source, ddsqra->ddsqra_value, &newval);
2957         if (error != 0) {
2958                 dsl_dataset_rele(ds, FTAG);
2959                 return (error);
2960         }
2961 
2962         /*
2963          * If we are doing the preliminary check in open context, the
2964          * space estimates may be inaccurate.
2965          */
2966         if (!dmu_tx_is_syncing(tx)) {
2967                 dsl_dataset_rele(ds, FTAG);
2968                 return (0);
2969         }
 
3143  * blocks that would be freed            [---------------------------]
3144  * snapshots                       ---O-------O--------O-------O--------O
3145  *                                        firstsnap        lastsnap
3146  *
3147  * This is the set of blocks that were born after the snap before firstsnap,
3148  * (birth > firstsnap->prev_snap_txg) and died before the snap after the
3149  * last snap (ie, is on lastsnap->ds_next->ds_deadlist or an earlier deadlist).
3150  * We calculate this by iterating over the relevant deadlists (from the snap
3151  * after lastsnap, backward to the snap after firstsnap), summing up the
3152  * space on the deadlist that was born after the snap before firstsnap.
3153  */
3154 int
3155 dsl_dataset_space_wouldfree(dsl_dataset_t *firstsnap,
3156     dsl_dataset_t *lastsnap,
3157     uint64_t *usedp, uint64_t *compp, uint64_t *uncompp)
3158 {
3159         int err = 0;
3160         uint64_t snapobj;
3161         dsl_pool_t *dp = firstsnap->ds_dir->dd_pool;
3162 
3163         ASSERT(dsl_dataset_is_snapshot(firstsnap));
3164         ASSERT(dsl_dataset_is_snapshot(lastsnap));
3165 
3166         /*
3167          * Check that the snapshots are in the same dsl_dir, and firstsnap
3168          * is before lastsnap.
3169          */
3170         if (firstsnap->ds_dir != lastsnap->ds_dir ||
3171             dsl_dataset_phys(firstsnap)->ds_creation_txg >
3172             dsl_dataset_phys(lastsnap)->ds_creation_txg)
3173                 return (SET_ERROR(EINVAL));
3174 
3175         *usedp = *compp = *uncompp = 0;
3176 
3177         snapobj = dsl_dataset_phys(lastsnap)->ds_next_snap_obj;
3178         while (snapobj != firstsnap->ds_object) {
3179                 dsl_dataset_t *ds;
3180                 uint64_t used, comp, uncomp;
3181 
3182                 err = dsl_dataset_hold_obj(dp, snapobj, FTAG, &ds);
3183                 if (err != 0)
3184                         break;
 
3269 }
3270 
3271 /*
3272  * Return TRUE if 'earlier' is an earlier snapshot in 'later's timeline.
3273  * For example, they could both be snapshots of the same filesystem, and
3274  * 'earlier' is before 'later'.  Or 'earlier' could be the origin of
3275  * 'later's filesystem.  Or 'earlier' could be an older snapshot in the origin's
3276  * filesystem.  Or 'earlier' could be the origin's origin.
3277  *
3278  * If non-zero, earlier_txg is used instead of earlier's ds_creation_txg.
3279  */
3280 boolean_t
3281 dsl_dataset_is_before(dsl_dataset_t *later, dsl_dataset_t *earlier,
3282         uint64_t earlier_txg)
3283 {
3284         dsl_pool_t *dp = later->ds_dir->dd_pool;
3285         int error;
3286         boolean_t ret;
3287 
3288         ASSERT(dsl_pool_config_held(dp));
3289         ASSERT(dsl_dataset_is_snapshot(earlier) || earlier_txg != 0);
3290 
3291         if (earlier_txg == 0)
3292                 earlier_txg = dsl_dataset_phys(earlier)->ds_creation_txg;
3293 
3294         if (dsl_dataset_is_snapshot(later) &&
3295             earlier_txg >= dsl_dataset_phys(later)->ds_creation_txg)
3296                 return (B_FALSE);
3297 
3298         if (later->ds_dir == earlier->ds_dir)
3299                 return (B_TRUE);
3300         if (!dsl_dir_is_clone(later->ds_dir))
3301                 return (B_FALSE);
3302 
3303         if (dsl_dir_phys(later->ds_dir)->dd_origin_obj == earlier->ds_object)
3304                 return (B_TRUE);
3305         dsl_dataset_t *origin;
3306         error = dsl_dataset_hold_obj(dp,
3307             dsl_dir_phys(later->ds_dir)->dd_origin_obj, FTAG, &origin);
3308         if (error != 0)
3309                 return (B_FALSE);
3310         ret = dsl_dataset_is_before(origin, earlier, earlier_txg);
3311         dsl_dataset_rele(origin, FTAG);
3312         return (ret);
3313 }
3314 
 | 
 
 
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  23  * Copyright (c) 2011, 2014 by Delphix. All rights reserved.
  24  * Copyright (c) 2014, Joyent, Inc. All rights reserved.
  25  * Copyright (c) 2014 RackTop Systems.
  26  * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
  27  */
  28 
  29 #include <sys/dmu_objset.h>
  30 #include <sys/dsl_dataset.h>
  31 #include <sys/dsl_dir.h>
  32 #include <sys/dsl_prop.h>
  33 #include <sys/dsl_synctask.h>
  34 #include <sys/dmu_traverse.h>
  35 #include <sys/dmu_impl.h>
  36 #include <sys/dmu_tx.h>
  37 #include <sys/arc.h>
  38 #include <sys/zio.h>
  39 #include <sys/zap.h>
  40 #include <sys/zfeature.h>
  41 #include <sys/unique.h>
  42 #include <sys/zfs_context.h>
  43 #include <sys/zfs_ioctl.h>
  44 #include <sys/spa.h>
  45 #include <sys/zfs_znode.h>
  46 #include <sys/zfs_onexit.h>
 
 
  55  * The SPA supports block sizes up to 16MB.  However, very large blocks
  56  * can have an impact on i/o latency (e.g. tying up a spinning disk for
  57  * ~300ms), and also potentially on the memory allocator.  Therefore,
  58  * we do not allow the recordsize to be set larger than zfs_max_recordsize
  59  * (default 1MB).  Larger blocks can be created by changing this tunable,
  60  * and pools with larger blocks can always be imported and used, regardless
  61  * of this setting.
  62  */
  63 int zfs_max_recordsize = 1 * 1024 * 1024;
  64 
  65 #define SWITCH64(x, y) \
  66         { \
  67                 uint64_t __tmp = (x); \
  68                 (x) = (y); \
  69                 (y) = __tmp; \
  70         }
  71 
  72 #define DS_REF_MAX      (1ULL << 62)
  73 
  74 extern inline dsl_dataset_phys_t *dsl_dataset_phys(dsl_dataset_t *ds);
  75 
  76 /*
  77  * Figure out how much of this delta should be propogated to the dsl_dir
  78  * layer.  If there's a refreservation, that space has already been
  79  * partially accounted for in our ancestors.
  80  */
  81 static int64_t
  82 parent_delta(dsl_dataset_t *ds, int64_t delta)
  83 {
  84         dsl_dataset_phys_t *ds_phys;
  85         uint64_t old_bytes, new_bytes;
  86 
  87         if (ds->ds_reserved == 0)
  88                 return (delta);
  89 
  90         ds_phys = dsl_dataset_phys(ds);
  91         old_bytes = MAX(ds_phys->ds_unique_bytes, ds->ds_reserved);
  92         new_bytes = MAX(ds_phys->ds_unique_bytes + delta, ds->ds_reserved);
  93 
  94         ASSERT3U(ABS((int64_t)(new_bytes - old_bytes)), <=, ABS(delta));
 
 138     boolean_t async)
 139 {
 140         int used = bp_get_dsize_sync(tx->tx_pool->dp_spa, bp);
 141         int compressed = BP_GET_PSIZE(bp);
 142         int uncompressed = BP_GET_UCSIZE(bp);
 143 
 144         if (BP_IS_HOLE(bp))
 145                 return (0);
 146 
 147         ASSERT(dmu_tx_is_syncing(tx));
 148         ASSERT(bp->blk_birth <= tx->tx_txg);
 149 
 150         if (ds == NULL) {
 151                 dsl_free(tx->tx_pool, tx->tx_txg, bp);
 152                 dsl_pool_mos_diduse_space(tx->tx_pool,
 153                     -used, -compressed, -uncompressed);
 154                 return (used);
 155         }
 156         ASSERT3P(tx->tx_pool, ==, ds->ds_dir->dd_pool);
 157 
 158         ASSERT(!ds->ds_is_snapshot);
 159         dmu_buf_will_dirty(ds->ds_dbuf, tx);
 160 
 161         if (bp->blk_birth > dsl_dataset_phys(ds)->ds_prev_snap_txg) {
 162                 int64_t delta;
 163 
 164                 dprintf_bp(bp, "freeing ds=%llu", ds->ds_object);
 165                 dsl_free(tx->tx_pool, tx->tx_txg, bp);
 166 
 167                 mutex_enter(&ds->ds_lock);
 168                 ASSERT(dsl_dataset_phys(ds)->ds_unique_bytes >= used ||
 169                     !DS_UNIQUE_IS_ACCURATE(ds));
 170                 delta = parent_delta(ds, -used);
 171                 dsl_dataset_phys(ds)->ds_unique_bytes -= used;
 172                 mutex_exit(&ds->ds_lock);
 173                 dsl_dir_diduse_space(ds->ds_dir, DD_USED_HEAD,
 174                     delta, -compressed, -uncompressed, tx);
 175                 dsl_dir_transfer_space(ds->ds_dir, -used - delta,
 176                     DD_USED_REFRSRV, DD_USED_HEAD, tx);
 177         } else {
 178                 dprintf_bp(bp, "putting on dead list: %s", "");
 
 236          */
 237         if (ds->ds_trysnap_txg >
 238             spa_last_synced_txg(ds->ds_dir->dd_pool->dp_spa))
 239                 trysnap = ds->ds_trysnap_txg;
 240         return (MAX(dsl_dataset_phys(ds)->ds_prev_snap_txg, trysnap));
 241 }
 242 
 243 boolean_t
 244 dsl_dataset_block_freeable(dsl_dataset_t *ds, const blkptr_t *bp,
 245     uint64_t blk_birth)
 246 {
 247         if (blk_birth <= dsl_dataset_prev_snap_txg(ds) ||
 248             (bp != NULL && BP_IS_HOLE(bp)))
 249                 return (B_FALSE);
 250 
 251         ddt_prefetch(dsl_dataset_get_spa(ds), bp);
 252 
 253         return (B_TRUE);
 254 }
 255 
 256 static void
 257 dsl_dataset_evict(void *dbu)
 258 {
 259         dsl_dataset_t *ds = dbu;
 260 
 261         ASSERT(ds->ds_owner == NULL);
 262 
 263         ds->ds_dbuf = NULL;
 264 
 265         unique_remove(ds->ds_fsid_guid);
 266 
 267         if (ds->ds_objset != NULL)
 268                 dmu_objset_evict(ds->ds_objset);
 269 
 270         if (ds->ds_prev) {
 271                 dsl_dataset_rele(ds->ds_prev, ds);
 272                 ds->ds_prev = NULL;
 273         }
 274 
 275         bplist_destroy(&ds->ds_pending_deadlist);
 276         if (ds->ds_deadlist.dl_os != NULL)
 277                 dsl_deadlist_close(&ds->ds_deadlist);
 278         if (ds->ds_dir)
 279                 dsl_dir_async_rele(ds->ds_dir, ds);
 280 
 281         ASSERT(!list_link_active(&ds->ds_synced_link));
 282 
 283         mutex_destroy(&ds->ds_lock);
 284         mutex_destroy(&ds->ds_opening_lock);
 285         mutex_destroy(&ds->ds_sendstream_lock);
 286         refcount_destroy(&ds->ds_longholds);
 287 
 288         kmem_free(ds, sizeof (dsl_dataset_t));
 289 }
 290 
 291 int
 292 dsl_dataset_get_snapname(dsl_dataset_t *ds)
 293 {
 294         dsl_dataset_phys_t *headphys;
 295         int err;
 296         dmu_buf_t *headdbuf;
 297         dsl_pool_t *dp = ds->ds_dir->dd_pool;
 298         objset_t *mos = dp->dp_meta_objset;
 299 
 
 373         ASSERT(dsl_pool_config_held(dp));
 374 
 375         err = dmu_bonus_hold(mos, dsobj, tag, &dbuf);
 376         if (err != 0)
 377                 return (err);
 378 
 379         /* Make sure dsobj has the correct object type. */
 380         dmu_object_info_from_db(dbuf, &doi);
 381         if (doi.doi_bonus_type != DMU_OT_DSL_DATASET) {
 382                 dmu_buf_rele(dbuf, tag);
 383                 return (SET_ERROR(EINVAL));
 384         }
 385 
 386         ds = dmu_buf_get_user(dbuf);
 387         if (ds == NULL) {
 388                 dsl_dataset_t *winner = NULL;
 389 
 390                 ds = kmem_zalloc(sizeof (dsl_dataset_t), KM_SLEEP);
 391                 ds->ds_dbuf = dbuf;
 392                 ds->ds_object = dsobj;
 393                 ds->ds_is_snapshot = dsl_dataset_phys(ds)->ds_num_children != 0;
 394 
 395                 mutex_init(&ds->ds_lock, NULL, MUTEX_DEFAULT, NULL);
 396                 mutex_init(&ds->ds_opening_lock, NULL, MUTEX_DEFAULT, NULL);
 397                 mutex_init(&ds->ds_sendstream_lock, NULL, MUTEX_DEFAULT, NULL);
 398                 refcount_create(&ds->ds_longholds);
 399 
 400                 bplist_create(&ds->ds_pending_deadlist);
 401                 dsl_deadlist_open(&ds->ds_deadlist,
 402                     mos, dsl_dataset_phys(ds)->ds_deadlist_obj);
 403 
 404                 list_create(&ds->ds_sendstreams, sizeof (dmu_sendarg_t),
 405                     offsetof(dmu_sendarg_t, dsa_link));
 406 
 407                 if (doi.doi_type == DMU_OTN_ZAP_METADATA) {
 408                         err = zap_contains(mos, dsobj, DS_FIELD_LARGE_BLOCKS);
 409                         if (err == 0)
 410                                 ds->ds_large_blocks = B_TRUE;
 411                         else
 412                                 ASSERT3U(err, ==, ENOENT);
 413                 }
 414 
 415                 if (err == 0) {
 416                         err = dsl_dir_hold_obj(dp,
 417                             dsl_dataset_phys(ds)->ds_dir_obj, NULL, ds,
 418                             &ds->ds_dir);
 419                 }
 420                 if (err != 0) {
 421                         mutex_destroy(&ds->ds_lock);
 422                         mutex_destroy(&ds->ds_opening_lock);
 423                         mutex_destroy(&ds->ds_sendstream_lock);
 424                         refcount_destroy(&ds->ds_longholds);
 425                         bplist_destroy(&ds->ds_pending_deadlist);
 426                         dsl_deadlist_close(&ds->ds_deadlist);
 427                         kmem_free(ds, sizeof (dsl_dataset_t));
 428                         dmu_buf_rele(dbuf, tag);
 429                         return (err);
 430                 }
 431 
 432                 if (!ds->ds_is_snapshot) {
 433                         ds->ds_snapname[0] = '\0';
 434                         if (dsl_dataset_phys(ds)->ds_prev_snap_obj != 0) {
 435                                 err = dsl_dataset_hold_obj(dp,
 436                                     dsl_dataset_phys(ds)->ds_prev_snap_obj,
 437                                     ds, &ds->ds_prev);
 438                         }
 439                         if (doi.doi_type == DMU_OTN_ZAP_METADATA) {
 440                                 int zaperr = zap_lookup(mos, ds->ds_object,
 441                                     DS_FIELD_BOOKMARK_NAMES,
 442                                     sizeof (ds->ds_bookmarks), 1,
 443                                     &ds->ds_bookmarks);
 444                                 if (zaperr != ENOENT)
 445                                         VERIFY0(zaperr);
 446                         }
 447                 } else {
 448                         if (zfs_flags & ZFS_DEBUG_SNAPNAMES)
 449                                 err = dsl_dataset_get_snapname(ds);
 450                         if (err == 0 &&
 451                             dsl_dataset_phys(ds)->ds_userrefs_obj != 0) {
 452                                 err = zap_count(
 453                                     ds->ds_dir->dd_pool->dp_meta_objset,
 454                                     dsl_dataset_phys(ds)->ds_userrefs_obj,
 455                                     &ds->ds_userrefs);
 456                         }
 457                 }
 458 
 459                 if (err == 0 && !ds->ds_is_snapshot) {
 460                         err = dsl_prop_get_int_ds(ds,
 461                             zfs_prop_to_name(ZFS_PROP_REFRESERVATION),
 462                             &ds->ds_reserved);
 463                         if (err == 0) {
 464                                 err = dsl_prop_get_int_ds(ds,
 465                                     zfs_prop_to_name(ZFS_PROP_REFQUOTA),
 466                                     &ds->ds_quota);
 467                         }
 468                 } else {
 469                         ds->ds_reserved = ds->ds_quota = 0;
 470                 }
 471 
 472                 dmu_buf_init_user(&ds->ds_dbu, dsl_dataset_evict, &ds->ds_dbuf);
 473                 if (err == 0)
 474                         winner = dmu_buf_set_user_ie(dbuf, &ds->ds_dbu);
 475 
 476                 if (err != 0 || winner != NULL) {
 477                         bplist_destroy(&ds->ds_pending_deadlist);
 478                         dsl_deadlist_close(&ds->ds_deadlist);
 479                         if (ds->ds_prev)
 480                                 dsl_dataset_rele(ds->ds_prev, ds);
 481                         dsl_dir_rele(ds->ds_dir, ds);
 482                         mutex_destroy(&ds->ds_lock);
 483                         mutex_destroy(&ds->ds_opening_lock);
 484                         mutex_destroy(&ds->ds_sendstream_lock);
 485                         refcount_destroy(&ds->ds_longholds);
 486                         kmem_free(ds, sizeof (dsl_dataset_t));
 487                         if (err != 0) {
 488                                 dmu_buf_rele(dbuf, tag);
 489                                 return (err);
 490                         }
 491                         ds = winner;
 492                 } else {
 493                         ds->ds_fsid_guid =
 494                             unique_insert(dsl_dataset_phys(ds)->ds_fsid_guid);
 495                 }
 496         }
 
 836                 dsl_dataset_rele(ds, FTAG);
 837         }
 838 
 839         return (dsobj);
 840 }
 841 
 842 /*
 843  * The unique space in the head dataset can be calculated by subtracting
 844  * the space used in the most recent snapshot, that is still being used
 845  * in this file system, from the space currently in use.  To figure out
 846  * the space in the most recent snapshot still in use, we need to take
 847  * the total space used in the snapshot and subtract out the space that
 848  * has been freed up since the snapshot was taken.
 849  */
 850 void
 851 dsl_dataset_recalc_head_uniq(dsl_dataset_t *ds)
 852 {
 853         uint64_t mrs_used;
 854         uint64_t dlused, dlcomp, dluncomp;
 855 
 856         ASSERT(!ds->ds_is_snapshot);
 857 
 858         if (dsl_dataset_phys(ds)->ds_prev_snap_obj != 0)
 859                 mrs_used = dsl_dataset_phys(ds->ds_prev)->ds_referenced_bytes;
 860         else
 861                 mrs_used = 0;
 862 
 863         dsl_deadlist_space(&ds->ds_deadlist, &dlused, &dlcomp, &dluncomp);
 864 
 865         ASSERT3U(dlused, <=, mrs_used);
 866         dsl_dataset_phys(ds)->ds_unique_bytes =
 867             dsl_dataset_phys(ds)->ds_referenced_bytes - (mrs_used - dlused);
 868 
 869         if (spa_version(ds->ds_dir->dd_pool->dp_spa) >=
 870             SPA_VERSION_UNIQUE_ACCURATE)
 871                 dsl_dataset_phys(ds)->ds_flags |= DS_FLAG_UNIQUE_ACCURATE;
 872 }
 873 
 874 void
 875 dsl_dataset_remove_from_next_clones(dsl_dataset_t *ds, uint64_t obj,
 876     dmu_tx_t *tx)
 
1579         nvlist_free(val);
1580         nvlist_free(propval);
1581 }
1582 
1583 void
1584 dsl_dataset_stats(dsl_dataset_t *ds, nvlist_t *nv)
1585 {
1586         dsl_pool_t *dp = ds->ds_dir->dd_pool;
1587         uint64_t refd, avail, uobjs, aobjs, ratio;
1588 
1589         ASSERT(dsl_pool_config_held(dp));
1590 
1591         ratio = dsl_dataset_phys(ds)->ds_compressed_bytes == 0 ? 100 :
1592             (dsl_dataset_phys(ds)->ds_uncompressed_bytes * 100 /
1593             dsl_dataset_phys(ds)->ds_compressed_bytes);
1594 
1595         dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_REFRATIO, ratio);
1596         dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_LOGICALREFERENCED,
1597             dsl_dataset_phys(ds)->ds_uncompressed_bytes);
1598 
1599         if (ds->ds_is_snapshot) {
1600                 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_COMPRESSRATIO, ratio);
1601                 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USED,
1602                     dsl_dataset_phys(ds)->ds_unique_bytes);
1603                 get_clones_stat(ds, nv);
1604         } else {
1605                 if (ds->ds_prev != NULL && ds->ds_prev != dp->dp_origin_snap) {
1606                         char buf[MAXNAMELEN];
1607                         dsl_dataset_name(ds->ds_prev, buf);
1608                         dsl_prop_nvlist_add_string(nv, ZFS_PROP_PREV_SNAP, buf);
1609                 }
1610 
1611                 dsl_dir_stats(ds->ds_dir, nv);
1612         }
1613 
1614         dsl_dataset_space(ds, &refd, &avail, &uobjs, &aobjs);
1615         dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_AVAILABLE, avail);
1616         dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_REFERENCED, refd);
1617 
1618         dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_CREATION,
1619             dsl_dataset_phys(ds)->ds_creation_time);
 
1647                         dsl_dataset_rele(prev, FTAG);
1648                         if (err == 0) {
1649                                 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_WRITTEN,
1650                                     written);
1651                         }
1652                 }
1653         }
1654 }
1655 
1656 void
1657 dsl_dataset_fast_stat(dsl_dataset_t *ds, dmu_objset_stats_t *stat)
1658 {
1659         dsl_pool_t *dp = ds->ds_dir->dd_pool;
1660         ASSERT(dsl_pool_config_held(dp));
1661 
1662         stat->dds_creation_txg = dsl_dataset_phys(ds)->ds_creation_txg;
1663         stat->dds_inconsistent =
1664             dsl_dataset_phys(ds)->ds_flags & DS_FLAG_INCONSISTENT;
1665         stat->dds_guid = dsl_dataset_phys(ds)->ds_guid;
1666         stat->dds_origin[0] = '\0';
1667         if (ds->ds_is_snapshot) {
1668                 stat->dds_is_snapshot = B_TRUE;
1669                 stat->dds_num_clones =
1670                     dsl_dataset_phys(ds)->ds_num_children - 1;
1671         } else {
1672                 stat->dds_is_snapshot = B_FALSE;
1673                 stat->dds_num_clones = 0;
1674 
1675                 if (dsl_dir_is_clone(ds->ds_dir)) {
1676                         dsl_dataset_t *ods;
1677 
1678                         VERIFY0(dsl_dataset_hold_obj(dp,
1679                             dsl_dir_phys(ds->ds_dir)->dd_origin_obj,
1680                             FTAG, &ods));
1681                         dsl_dataset_name(ods, stat->dds_origin);
1682                         dsl_dataset_rele(ods, FTAG);
1683                 }
1684         }
1685 }
1686 
1687 uint64_t
 
1907 typedef struct dsl_dataset_rollback_arg {
1908         const char *ddra_fsname;
1909         void *ddra_owner;
1910         nvlist_t *ddra_result;
1911 } dsl_dataset_rollback_arg_t;
1912 
1913 static int
1914 dsl_dataset_rollback_check(void *arg, dmu_tx_t *tx)
1915 {
1916         dsl_dataset_rollback_arg_t *ddra = arg;
1917         dsl_pool_t *dp = dmu_tx_pool(tx);
1918         dsl_dataset_t *ds;
1919         int64_t unused_refres_delta;
1920         int error;
1921 
1922         error = dsl_dataset_hold(dp, ddra->ddra_fsname, FTAG, &ds);
1923         if (error != 0)
1924                 return (error);
1925 
1926         /* must not be a snapshot */
1927         if (ds->ds_is_snapshot) {
1928                 dsl_dataset_rele(ds, FTAG);
1929                 return (SET_ERROR(EINVAL));
1930         }
1931 
1932         /* must have a most recent snapshot */
1933         if (dsl_dataset_phys(ds)->ds_prev_snap_txg < TXG_INITIAL) {
1934                 dsl_dataset_rele(ds, FTAG);
1935                 return (SET_ERROR(EINVAL));
1936         }
1937 
1938         /* must not have any bookmarks after the most recent snapshot */
1939         nvlist_t *proprequest = fnvlist_alloc();
1940         fnvlist_add_boolean(proprequest, zfs_prop_to_name(ZFS_PROP_CREATETXG));
1941         nvlist_t *bookmarks = fnvlist_alloc();
1942         error = dsl_get_bookmarks_impl(ds, proprequest, bookmarks);
1943         fnvlist_free(proprequest);
1944         if (error != 0)
1945                 return (error);
1946         for (nvpair_t *pair = nvlist_next_nvpair(bookmarks, NULL);
1947             pair != NULL; pair = nvlist_next_nvpair(bookmarks, pair)) {
 
2479                 list_remove(l, snap);
2480                 dsl_dataset_rele(snap->ds, tag);
2481                 kmem_free(snap, sizeof (*snap));
2482         }
2483         list_destroy(l);
2484 }
2485 
2486 static int
2487 promote_hold(dsl_dataset_promote_arg_t *ddpa, dsl_pool_t *dp, void *tag)
2488 {
2489         int error;
2490         dsl_dir_t *dd;
2491         struct promotenode *snap;
2492 
2493         error = dsl_dataset_hold(dp, ddpa->ddpa_clonename, tag,
2494             &ddpa->ddpa_clone);
2495         if (error != 0)
2496                 return (error);
2497         dd = ddpa->ddpa_clone->ds_dir;
2498 
2499         if (ddpa->ddpa_clone->ds_is_snapshot ||
2500             !dsl_dir_is_clone(dd)) {
2501                 dsl_dataset_rele(ddpa->ddpa_clone, tag);
2502                 return (SET_ERROR(EINVAL));
2503         }
2504 
2505         error = snaplist_make(dp, 0, dsl_dir_phys(dd)->dd_origin_obj,
2506             &ddpa->shared_snaps, tag);
2507         if (error != 0)
2508                 goto out;
2509 
2510         error = snaplist_make(dp, 0, ddpa->ddpa_clone->ds_object,
2511             &ddpa->clone_snaps, tag);
2512         if (error != 0)
2513                 goto out;
2514 
2515         snap = list_head(&ddpa->shared_snaps);
2516         ASSERT3U(snap->ds->ds_object, ==, dsl_dir_phys(dd)->dd_origin_obj);
2517         error = snaplist_make(dp, dsl_dir_phys(dd)->dd_origin_obj,
2518             dsl_dir_phys(snap->ds->ds_dir)->dd_head_dataset_obj,
2519             &ddpa->origin_snaps, tag);
 
2571         dmu_objset_rele(os, FTAG);
2572         if (error != 0)
2573                 return (error);
2574 
2575         ddpa.ddpa_clonename = name;
2576         ddpa.err_ds = conflsnap;
2577         ddpa.cr = CRED();
2578 
2579         return (dsl_sync_task(name, dsl_dataset_promote_check,
2580             dsl_dataset_promote_sync, &ddpa,
2581             2 + numsnaps, ZFS_SPACE_CHECK_RESERVED));
2582 }
2583 
2584 int
2585 dsl_dataset_clone_swap_check_impl(dsl_dataset_t *clone,
2586     dsl_dataset_t *origin_head, boolean_t force, void *owner, dmu_tx_t *tx)
2587 {
2588         int64_t unused_refres_delta;
2589 
2590         /* they should both be heads */
2591         if (clone->ds_is_snapshot ||
2592             origin_head->ds_is_snapshot)
2593                 return (SET_ERROR(EINVAL));
2594 
2595         /* if we are not forcing, the branch point should be just before them */
2596         if (!force && clone->ds_prev != origin_head->ds_prev)
2597                 return (SET_ERROR(EINVAL));
2598 
2599         /* clone should be the clone (unless they are unrelated) */
2600         if (clone->ds_prev != NULL &&
2601             clone->ds_prev != clone->ds_dir->dd_pool->dp_origin_snap &&
2602             origin_head->ds_dir != clone->ds_prev->ds_dir)
2603                 return (SET_ERROR(EINVAL));
2604 
2605         /* the clone should be a child of the origin */
2606         if (clone->ds_dir->dd_parent != origin_head->ds_dir)
2607                 return (SET_ERROR(EINVAL));
2608 
2609         /* origin_head shouldn't be modified unless 'force' */
2610         if (!force &&
2611             dsl_dataset_modified_since_snap(origin_head, origin_head->ds_prev))
2612                 return (SET_ERROR(ETXTBSY));
 
2851 } dsl_dataset_set_qr_arg_t;
2852 
2853 
2854 /* ARGSUSED */
2855 static int
2856 dsl_dataset_set_refquota_check(void *arg, dmu_tx_t *tx)
2857 {
2858         dsl_dataset_set_qr_arg_t *ddsqra = arg;
2859         dsl_pool_t *dp = dmu_tx_pool(tx);
2860         dsl_dataset_t *ds;
2861         int error;
2862         uint64_t newval;
2863 
2864         if (spa_version(dp->dp_spa) < SPA_VERSION_REFQUOTA)
2865                 return (SET_ERROR(ENOTSUP));
2866 
2867         error = dsl_dataset_hold(dp, ddsqra->ddsqra_name, FTAG, &ds);
2868         if (error != 0)
2869                 return (error);
2870 
2871         if (ds->ds_is_snapshot) {
2872                 dsl_dataset_rele(ds, FTAG);
2873                 return (SET_ERROR(EINVAL));
2874         }
2875 
2876         error = dsl_prop_predict(ds->ds_dir,
2877             zfs_prop_to_name(ZFS_PROP_REFQUOTA),
2878             ddsqra->ddsqra_source, ddsqra->ddsqra_value, &newval);
2879         if (error != 0) {
2880                 dsl_dataset_rele(ds, FTAG);
2881                 return (error);
2882         }
2883 
2884         if (newval == 0) {
2885                 dsl_dataset_rele(ds, FTAG);
2886                 return (0);
2887         }
2888 
2889         if (newval < dsl_dataset_phys(ds)->ds_referenced_bytes ||
2890             newval < ds->ds_reserved) {
2891                 dsl_dataset_rele(ds, FTAG);
 
2934         return (dsl_sync_task(dsname, dsl_dataset_set_refquota_check,
2935             dsl_dataset_set_refquota_sync, &ddsqra, 0, ZFS_SPACE_CHECK_NONE));
2936 }
2937 
2938 static int
2939 dsl_dataset_set_refreservation_check(void *arg, dmu_tx_t *tx)
2940 {
2941         dsl_dataset_set_qr_arg_t *ddsqra = arg;
2942         dsl_pool_t *dp = dmu_tx_pool(tx);
2943         dsl_dataset_t *ds;
2944         int error;
2945         uint64_t newval, unique;
2946 
2947         if (spa_version(dp->dp_spa) < SPA_VERSION_REFRESERVATION)
2948                 return (SET_ERROR(ENOTSUP));
2949 
2950         error = dsl_dataset_hold(dp, ddsqra->ddsqra_name, FTAG, &ds);
2951         if (error != 0)
2952                 return (error);
2953 
2954         if (ds->ds_is_snapshot) {
2955                 dsl_dataset_rele(ds, FTAG);
2956                 return (SET_ERROR(EINVAL));
2957         }
2958 
2959         error = dsl_prop_predict(ds->ds_dir,
2960             zfs_prop_to_name(ZFS_PROP_REFRESERVATION),
2961             ddsqra->ddsqra_source, ddsqra->ddsqra_value, &newval);
2962         if (error != 0) {
2963                 dsl_dataset_rele(ds, FTAG);
2964                 return (error);
2965         }
2966 
2967         /*
2968          * If we are doing the preliminary check in open context, the
2969          * space estimates may be inaccurate.
2970          */
2971         if (!dmu_tx_is_syncing(tx)) {
2972                 dsl_dataset_rele(ds, FTAG);
2973                 return (0);
2974         }
 
3148  * blocks that would be freed            [---------------------------]
3149  * snapshots                       ---O-------O--------O-------O--------O
3150  *                                        firstsnap        lastsnap
3151  *
3152  * This is the set of blocks that were born after the snap before firstsnap,
3153  * (birth > firstsnap->prev_snap_txg) and died before the snap after the
3154  * last snap (ie, is on lastsnap->ds_next->ds_deadlist or an earlier deadlist).
3155  * We calculate this by iterating over the relevant deadlists (from the snap
3156  * after lastsnap, backward to the snap after firstsnap), summing up the
3157  * space on the deadlist that was born after the snap before firstsnap.
3158  */
3159 int
3160 dsl_dataset_space_wouldfree(dsl_dataset_t *firstsnap,
3161     dsl_dataset_t *lastsnap,
3162     uint64_t *usedp, uint64_t *compp, uint64_t *uncompp)
3163 {
3164         int err = 0;
3165         uint64_t snapobj;
3166         dsl_pool_t *dp = firstsnap->ds_dir->dd_pool;
3167 
3168         ASSERT(firstsnap->ds_is_snapshot);
3169         ASSERT(lastsnap->ds_is_snapshot);
3170 
3171         /*
3172          * Check that the snapshots are in the same dsl_dir, and firstsnap
3173          * is before lastsnap.
3174          */
3175         if (firstsnap->ds_dir != lastsnap->ds_dir ||
3176             dsl_dataset_phys(firstsnap)->ds_creation_txg >
3177             dsl_dataset_phys(lastsnap)->ds_creation_txg)
3178                 return (SET_ERROR(EINVAL));
3179 
3180         *usedp = *compp = *uncompp = 0;
3181 
3182         snapobj = dsl_dataset_phys(lastsnap)->ds_next_snap_obj;
3183         while (snapobj != firstsnap->ds_object) {
3184                 dsl_dataset_t *ds;
3185                 uint64_t used, comp, uncomp;
3186 
3187                 err = dsl_dataset_hold_obj(dp, snapobj, FTAG, &ds);
3188                 if (err != 0)
3189                         break;
 
3274 }
3275 
3276 /*
3277  * Return TRUE if 'earlier' is an earlier snapshot in 'later's timeline.
3278  * For example, they could both be snapshots of the same filesystem, and
3279  * 'earlier' is before 'later'.  Or 'earlier' could be the origin of
3280  * 'later's filesystem.  Or 'earlier' could be an older snapshot in the origin's
3281  * filesystem.  Or 'earlier' could be the origin's origin.
3282  *
3283  * If non-zero, earlier_txg is used instead of earlier's ds_creation_txg.
3284  */
3285 boolean_t
3286 dsl_dataset_is_before(dsl_dataset_t *later, dsl_dataset_t *earlier,
3287         uint64_t earlier_txg)
3288 {
3289         dsl_pool_t *dp = later->ds_dir->dd_pool;
3290         int error;
3291         boolean_t ret;
3292 
3293         ASSERT(dsl_pool_config_held(dp));
3294         ASSERT(earlier->ds_is_snapshot || earlier_txg != 0);
3295 
3296         if (earlier_txg == 0)
3297                 earlier_txg = dsl_dataset_phys(earlier)->ds_creation_txg;
3298 
3299         if (later->ds_is_snapshot &&
3300             earlier_txg >= dsl_dataset_phys(later)->ds_creation_txg)
3301                 return (B_FALSE);
3302 
3303         if (later->ds_dir == earlier->ds_dir)
3304                 return (B_TRUE);
3305         if (!dsl_dir_is_clone(later->ds_dir))
3306                 return (B_FALSE);
3307 
3308         if (dsl_dir_phys(later->ds_dir)->dd_origin_obj == earlier->ds_object)
3309                 return (B_TRUE);
3310         dsl_dataset_t *origin;
3311         error = dsl_dataset_hold_obj(dp,
3312             dsl_dir_phys(later->ds_dir)->dd_origin_obj, FTAG, &origin);
3313         if (error != 0)
3314                 return (B_FALSE);
3315         ret = dsl_dataset_is_before(origin, earlier, earlier_txg);
3316         dsl_dataset_rele(origin, FTAG);
3317         return (ret);
3318 }
3319 
 |