1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  23  * Copyright (c) 2011, 2017 by Delphix. All rights reserved.
  24  * Copyright (c) 2013, Joyent, Inc. All rights reserved.
  25  * Copyright (c) 2013 Steven Hartland. All rights reserved.
  26  * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
  27  * Copyright (c) 2014 Integros [integros.com]
  28  */
  29 
  30 #ifndef _SYS_DSL_DATASET_H
  31 #define _SYS_DSL_DATASET_H
  32 
  33 #include <sys/dmu.h>
  34 #include <sys/spa.h>
  35 #include <sys/txg.h>
  36 #include <sys/zio.h>
  37 #include <sys/bplist.h>
  38 #include <sys/dsl_synctask.h>
  39 #include <sys/zfs_context.h>
  40 #include <sys/dsl_deadlist.h>
  41 #include <sys/refcount.h>
  42 #include <sys/rrwlock.h>
  43 #include <zfeature_common.h>
  44 
  45 #ifdef  __cplusplus
  46 extern "C" {
  47 #endif
  48 
  49 struct dsl_dataset;
  50 struct dsl_dir;
  51 struct dsl_pool;
  52 
  53 #define DS_FLAG_INCONSISTENT    (1ULL<<0)
  54 #define DS_IS_INCONSISTENT(ds)  \
  55         (dsl_dataset_phys(ds)->ds_flags & DS_FLAG_INCONSISTENT)
  56 
  57 /*
  58  * Do not allow this dataset to be promoted.
  59  */
  60 #define DS_FLAG_NOPROMOTE       (1ULL<<1)
  61 
  62 /*
  63  * DS_FLAG_UNIQUE_ACCURATE is set if ds_unique_bytes has been correctly
  64  * calculated for head datasets (starting with SPA_VERSION_UNIQUE_ACCURATE,
  65  * refquota/refreservations).
  66  */
  67 #define DS_FLAG_UNIQUE_ACCURATE (1ULL<<2)
  68 
  69 /*
  70  * DS_FLAG_DEFER_DESTROY is set after 'zfs destroy -d' has been called
  71  * on a dataset. This allows the dataset to be destroyed using 'zfs release'.
  72  */
  73 #define DS_FLAG_DEFER_DESTROY   (1ULL<<3)
  74 #define DS_IS_DEFER_DESTROY(ds) \
  75         (dsl_dataset_phys(ds)->ds_flags & DS_FLAG_DEFER_DESTROY)
  76 
  77 /*
  78  * DS_FIELD_* are strings that are used in the "extensified" dataset zap object.
  79  * They should be of the format <reverse-dns>:<field>.
  80  */
  81 
  82 /*
  83  * This field's value is the object ID of a zap object which contains the
  84  * bookmarks of this dataset.  If it is present, then this dataset is counted
  85  * in the refcount of the SPA_FEATURES_BOOKMARKS feature.
  86  */
  87 #define DS_FIELD_BOOKMARK_NAMES "com.delphix:bookmarks"
  88 
  89 /*
  90  * These fields are set on datasets that are in the middle of a resumable
  91  * receive, and allow the sender to resume the send if it is interrupted.
  92  */
  93 #define DS_FIELD_RESUME_FROMGUID "com.delphix:resume_fromguid"
  94 #define DS_FIELD_RESUME_TONAME "com.delphix:resume_toname"
  95 #define DS_FIELD_RESUME_TOGUID "com.delphix:resume_toguid"
  96 #define DS_FIELD_RESUME_OBJECT "com.delphix:resume_object"
  97 #define DS_FIELD_RESUME_OFFSET "com.delphix:resume_offset"
  98 #define DS_FIELD_RESUME_BYTES "com.delphix:resume_bytes"
  99 #define DS_FIELD_RESUME_LARGEBLOCK "com.delphix:resume_largeblockok"
 100 #define DS_FIELD_RESUME_EMBEDOK "com.delphix:resume_embedok"
 101 #define DS_FIELD_RESUME_COMPRESSOK "com.delphix:resume_compressok"
 102 
 103 /*
 104  * This field is set to the object number of the remap deadlist if one exists.
 105  */
 106 #define DS_FIELD_REMAP_DEADLIST "com.delphix:remap_deadlist"
 107 
 108 /*
 109  * DS_FLAG_CI_DATASET is set if the dataset contains a file system whose
 110  * name lookups should be performed case-insensitively.
 111  */
 112 #define DS_FLAG_CI_DATASET      (1ULL<<16)
 113 
 114 #define DS_CREATE_FLAG_NODIRTY  (1ULL<<24)
 115 
 116 typedef struct dsl_dataset_phys {
 117         uint64_t ds_dir_obj;            /* DMU_OT_DSL_DIR */
 118         uint64_t ds_prev_snap_obj;      /* DMU_OT_DSL_DATASET */
 119         uint64_t ds_prev_snap_txg;
 120         uint64_t ds_next_snap_obj;      /* DMU_OT_DSL_DATASET */
 121         uint64_t ds_snapnames_zapobj;   /* DMU_OT_DSL_DS_SNAP_MAP 0 for snaps */
 122         uint64_t ds_num_children;       /* clone/snap children; ==0 for head */
 123         uint64_t ds_creation_time;      /* seconds since 1970 */
 124         uint64_t ds_creation_txg;
 125         uint64_t ds_deadlist_obj;       /* DMU_OT_DEADLIST */
 126         /*
 127          * ds_referenced_bytes, ds_compressed_bytes, and ds_uncompressed_bytes
 128          * include all blocks referenced by this dataset, including those
 129          * shared with any other datasets.
 130          */
 131         uint64_t ds_referenced_bytes;
 132         uint64_t ds_compressed_bytes;
 133         uint64_t ds_uncompressed_bytes;
 134         uint64_t ds_unique_bytes;       /* only relevant to snapshots */
 135         /*
 136          * The ds_fsid_guid is a 56-bit ID that can change to avoid
 137          * collisions.  The ds_guid is a 64-bit ID that will never
 138          * change, so there is a small probability that it will collide.
 139          */
 140         uint64_t ds_fsid_guid;
 141         uint64_t ds_guid;
 142         uint64_t ds_flags;              /* DS_FLAG_* */
 143         blkptr_t ds_bp;
 144         uint64_t ds_next_clones_obj;    /* DMU_OT_DSL_CLONES */
 145         uint64_t ds_props_obj;          /* DMU_OT_DSL_PROPS for snaps */
 146         uint64_t ds_userrefs_obj;       /* DMU_OT_USERREFS */
 147         uint64_t ds_pad[5]; /* pad out to 320 bytes for good measure */
 148 } dsl_dataset_phys_t;
 149 
 150 typedef struct dsl_dataset {
 151         dmu_buf_user_t ds_dbu;
 152         rrwlock_t ds_bp_rwlock; /* Protects ds_phys->ds_bp */
 153 
 154         /* Immutable: */
 155         struct dsl_dir *ds_dir;
 156         dmu_buf_t *ds_dbuf;
 157         uint64_t ds_object;
 158         uint64_t ds_fsid_guid;
 159         boolean_t ds_is_snapshot;
 160 
 161         /* only used in syncing context, only valid for non-snapshots: */
 162         struct dsl_dataset *ds_prev;
 163         uint64_t ds_bookmarks;  /* DMU_OTN_ZAP_METADATA */
 164 
 165         /* has internal locking: */
 166         dsl_deadlist_t ds_deadlist;
 167         bplist_t ds_pending_deadlist;
 168 
 169         /*
 170          * The remap deadlist contains blocks (DVA's, really) that are
 171          * referenced by the previous snapshot and point to indirect vdevs,
 172          * but in this dataset they have been remapped to point to concrete
 173          * (or at least, less-indirect) vdevs.  In other words, the
 174          * physical DVA is referenced by the previous snapshot but not by
 175          * this dataset.  Logically, the DVA continues to be referenced,
 176          * but we are using a different (less indirect) physical DVA.
 177          * This deadlist is used to determine when physical DVAs that
 178          * point to indirect vdevs are no longer referenced anywhere,
 179          * and thus should be marked obsolete.
 180          *
 181          * This is only used if SPA_FEATURE_OBSOLETE_COUNTS is enabled.
 182          */
 183         dsl_deadlist_t ds_remap_deadlist;
 184         /* protects creation of the ds_remap_deadlist */
 185         kmutex_t ds_remap_deadlist_lock;
 186 
 187         /* protected by lock on pool's dp_dirty_datasets list */
 188         txg_node_t ds_dirty_link;
 189         list_node_t ds_synced_link;
 190 
 191         /*
 192          * ds_phys->ds_<accounting> is also protected by ds_lock.
 193          * Protected by ds_lock:
 194          */
 195         kmutex_t ds_lock;
 196         objset_t *ds_objset;
 197         uint64_t ds_userrefs;
 198         void *ds_owner;
 199 
 200         /*
 201          * Long holds prevent the ds from being destroyed; they allow the
 202          * ds to remain held even after dropping the dp_config_rwlock.
 203          * Owning counts as a long hold.  See the comments above
 204          * dsl_pool_hold() for details.
 205          */
 206         refcount_t ds_longholds;
 207 
 208         /* no locking; only for making guesses */
 209         uint64_t ds_trysnap_txg;
 210 
 211         /* for objset_open() */
 212         kmutex_t ds_opening_lock;
 213 
 214         uint64_t ds_reserved;   /* cached refreservation */
 215         uint64_t ds_quota;      /* cached refquota */
 216 
 217         kmutex_t ds_sendstream_lock;
 218         list_t ds_sendstreams;
 219 
 220         /*
 221          * When in the middle of a resumable receive, tracks how much
 222          * progress we have made.
 223          */
 224         uint64_t ds_resume_object[TXG_SIZE];
 225         uint64_t ds_resume_offset[TXG_SIZE];
 226         uint64_t ds_resume_bytes[TXG_SIZE];
 227 
 228         /* Protected by our dsl_dir's dd_lock */
 229         list_t ds_prop_cbs;
 230 
 231         /*
 232          * For ZFEATURE_FLAG_PER_DATASET features, set if this dataset
 233          * uses this feature.
 234          */
 235         uint8_t ds_feature_inuse[SPA_FEATURES];
 236 
 237         /*
 238          * Set if we need to activate the feature on this dataset this txg
 239          * (used only in syncing context).
 240          */
 241         uint8_t ds_feature_activation_needed[SPA_FEATURES];
 242 
 243         /* Protected by ds_lock; keep at end of struct for better locality */
 244         char ds_snapname[ZFS_MAX_DATASET_NAME_LEN];
 245 } dsl_dataset_t;
 246 
 247 inline dsl_dataset_phys_t *
 248 dsl_dataset_phys(dsl_dataset_t *ds)
 249 {
 250         return (ds->ds_dbuf->db_data);
 251 }
 252 
 253 typedef struct dsl_dataset_promote_arg {
 254         const char *ddpa_clonename;
 255         dsl_dataset_t *ddpa_clone;
 256         list_t shared_snaps, origin_snaps, clone_snaps;
 257         dsl_dataset_t *origin_origin; /* origin of the origin */
 258         uint64_t used, comp, uncomp, unique, cloneusedsnap, originusedsnap;
 259         nvlist_t *err_ds;
 260         cred_t *cr;
 261 } dsl_dataset_promote_arg_t;
 262 
 263 typedef struct dsl_dataset_rollback_arg {
 264         const char *ddra_fsname;
 265         const char *ddra_tosnap;
 266         void *ddra_owner;
 267         nvlist_t *ddra_result;
 268 } dsl_dataset_rollback_arg_t;
 269 
 270 typedef struct dsl_dataset_snapshot_arg {
 271         nvlist_t *ddsa_snaps;
 272         nvlist_t *ddsa_props;
 273         nvlist_t *ddsa_errors;
 274         cred_t *ddsa_cr;
 275 } dsl_dataset_snapshot_arg_t;
 276 
 277 /*
 278  * The max length of a temporary tag prefix is the number of hex digits
 279  * required to express UINT64_MAX plus one for the hyphen.
 280  */
 281 #define MAX_TAG_PREFIX_LEN      17
 282 
 283 #define dsl_dataset_is_snapshot(ds) \
 284         (dsl_dataset_phys(ds)->ds_num_children != 0)
 285 
 286 #define DS_UNIQUE_IS_ACCURATE(ds)       \
 287         ((dsl_dataset_phys(ds)->ds_flags & DS_FLAG_UNIQUE_ACCURATE) != 0)
 288 
 289 int dsl_dataset_hold(struct dsl_pool *dp, const char *name, void *tag,
 290     dsl_dataset_t **dsp);
 291 boolean_t dsl_dataset_try_add_ref(struct dsl_pool *dp, dsl_dataset_t *ds,
 292     void *tag);
 293 int dsl_dataset_hold_obj(struct dsl_pool *dp, uint64_t dsobj, void *tag,
 294     dsl_dataset_t **);
 295 void dsl_dataset_rele(dsl_dataset_t *ds, void *tag);
 296 int dsl_dataset_own(struct dsl_pool *dp, const char *name,
 297     void *tag, dsl_dataset_t **dsp);
 298 int dsl_dataset_own_obj(struct dsl_pool *dp, uint64_t dsobj,
 299     void *tag, dsl_dataset_t **dsp);
 300 void dsl_dataset_disown(dsl_dataset_t *ds, void *tag);
 301 void dsl_dataset_name(dsl_dataset_t *ds, char *name);
 302 boolean_t dsl_dataset_tryown(dsl_dataset_t *ds, void *tag);
 303 int dsl_dataset_namelen(dsl_dataset_t *ds);
 304 boolean_t dsl_dataset_has_owner(dsl_dataset_t *ds);
 305 uint64_t dsl_dataset_create_sync(dsl_dir_t *pds, const char *lastname,
 306     dsl_dataset_t *origin, uint64_t flags, cred_t *, dmu_tx_t *);
 307 uint64_t dsl_dataset_create_sync_dd(dsl_dir_t *dd, dsl_dataset_t *origin,
 308     uint64_t flags, dmu_tx_t *tx);
 309 void dsl_dataset_snapshot_sync(void *arg, dmu_tx_t *tx);
 310 int dsl_dataset_snapshot_check(void *arg, dmu_tx_t *tx);
 311 int dsl_dataset_snapshot(nvlist_t *snaps, nvlist_t *props, nvlist_t *errors);
 312 void dsl_dataset_promote_sync(void *arg, dmu_tx_t *tx);
 313 int dsl_dataset_promote_check(void *arg, dmu_tx_t *tx);
 314 int dsl_dataset_promote(const char *name, char *conflsnap);
 315 int dsl_dataset_clone_swap(dsl_dataset_t *clone, dsl_dataset_t *origin_head,
 316     boolean_t force);
 317 int dsl_dataset_rename_snapshot(const char *fsname,
 318     const char *oldsnapname, const char *newsnapname, boolean_t recursive);
 319 int dsl_dataset_snapshot_tmp(const char *fsname, const char *snapname,
 320     minor_t cleanup_minor, const char *htag);
 321 
 322 blkptr_t *dsl_dataset_get_blkptr(dsl_dataset_t *ds);
 323 
 324 spa_t *dsl_dataset_get_spa(dsl_dataset_t *ds);
 325 
 326 boolean_t dsl_dataset_modified_since_snap(dsl_dataset_t *ds,
 327     dsl_dataset_t *snap);
 328 
 329 void dsl_dataset_sync(dsl_dataset_t *os, zio_t *zio, dmu_tx_t *tx);
 330 void dsl_dataset_sync_done(dsl_dataset_t *os, dmu_tx_t *tx);
 331 
 332 void dsl_dataset_block_born(dsl_dataset_t *ds, const blkptr_t *bp,
 333     dmu_tx_t *tx);
 334 int dsl_dataset_block_kill(dsl_dataset_t *ds, const blkptr_t *bp,
 335     dmu_tx_t *tx, boolean_t async);
 336 void dsl_dataset_block_remapped(dsl_dataset_t *ds, uint64_t vdev,
 337     uint64_t offset, uint64_t size, uint64_t birth, dmu_tx_t *tx);
 338 
 339 void dsl_dataset_dirty(dsl_dataset_t *ds, dmu_tx_t *tx);
 340 
 341 int get_clones_stat_impl(dsl_dataset_t *ds, nvlist_t *val);
 342 char *get_receive_resume_stats_impl(dsl_dataset_t *ds);
 343 char *get_child_receive_stats(dsl_dataset_t *ds);
 344 uint64_t dsl_get_refratio(dsl_dataset_t *ds);
 345 uint64_t dsl_get_logicalreferenced(dsl_dataset_t *ds);
 346 uint64_t dsl_get_compressratio(dsl_dataset_t *ds);
 347 uint64_t dsl_get_used(dsl_dataset_t *ds);
 348 uint64_t dsl_get_creation(dsl_dataset_t *ds);
 349 uint64_t dsl_get_creationtxg(dsl_dataset_t *ds);
 350 uint64_t dsl_get_refquota(dsl_dataset_t *ds);
 351 uint64_t dsl_get_refreservation(dsl_dataset_t *ds);
 352 uint64_t dsl_get_guid(dsl_dataset_t *ds);
 353 uint64_t dsl_get_unique(dsl_dataset_t *ds);
 354 uint64_t dsl_get_objsetid(dsl_dataset_t *ds);
 355 uint64_t dsl_get_userrefs(dsl_dataset_t *ds);
 356 uint64_t dsl_get_defer_destroy(dsl_dataset_t *ds);
 357 uint64_t dsl_get_referenced(dsl_dataset_t *ds);
 358 uint64_t dsl_get_numclones(dsl_dataset_t *ds);
 359 uint64_t dsl_get_inconsistent(dsl_dataset_t *ds);
 360 uint64_t dsl_get_available(dsl_dataset_t *ds);
 361 int dsl_get_written(dsl_dataset_t *ds, uint64_t *written);
 362 int dsl_get_prev_snap(dsl_dataset_t *ds, char *snap);
 363 int dsl_get_mountpoint(dsl_dataset_t *ds, const char *dsname, char *value,
 364     char *source);
 365 
 366 void get_clones_stat(dsl_dataset_t *ds, nvlist_t *nv);
 367 
 368 void dsl_dataset_stats(dsl_dataset_t *os, nvlist_t *nv);
 369 
 370 void dsl_dataset_fast_stat(dsl_dataset_t *ds, dmu_objset_stats_t *stat);
 371 void dsl_dataset_space(dsl_dataset_t *ds,
 372     uint64_t *refdbytesp, uint64_t *availbytesp,
 373     uint64_t *usedobjsp, uint64_t *availobjsp);
 374 uint64_t dsl_dataset_fsid_guid(dsl_dataset_t *ds);
 375 int dsl_dataset_space_written(dsl_dataset_t *oldsnap, dsl_dataset_t *new,
 376     uint64_t *usedp, uint64_t *compp, uint64_t *uncompp);
 377 int dsl_dataset_space_wouldfree(dsl_dataset_t *firstsnap, dsl_dataset_t *last,
 378     uint64_t *usedp, uint64_t *compp, uint64_t *uncompp);
 379 boolean_t dsl_dataset_is_dirty(dsl_dataset_t *ds);
 380 
 381 int dsl_dsobj_to_dsname(char *pname, uint64_t obj, char *buf);
 382 
 383 int dsl_dataset_check_quota(dsl_dataset_t *ds, boolean_t check_quota,
 384     uint64_t asize, uint64_t inflight, uint64_t *used,
 385     uint64_t *ref_rsrv);
 386 int dsl_dataset_set_refquota(const char *dsname, zprop_source_t source,
 387     uint64_t quota);
 388 int dsl_dataset_set_refreservation(const char *dsname, zprop_source_t source,
 389     uint64_t reservation);
 390 
 391 boolean_t dsl_dataset_is_before(dsl_dataset_t *later, dsl_dataset_t *earlier,
 392     uint64_t earlier_txg);
 393 void dsl_dataset_long_hold(dsl_dataset_t *ds, void *tag);
 394 void dsl_dataset_long_rele(dsl_dataset_t *ds, void *tag);
 395 boolean_t dsl_dataset_long_held(dsl_dataset_t *ds);
 396 
 397 int dsl_dataset_clone_swap_check_impl(dsl_dataset_t *clone,
 398     dsl_dataset_t *origin_head, boolean_t force, void *owner, dmu_tx_t *tx);
 399 void dsl_dataset_clone_swap_sync_impl(dsl_dataset_t *clone,
 400     dsl_dataset_t *origin_head, dmu_tx_t *tx);
 401 int dsl_dataset_snapshot_check_impl(dsl_dataset_t *ds, const char *snapname,
 402     dmu_tx_t *tx, boolean_t recv, uint64_t cnt, cred_t *cr);
 403 void dsl_dataset_snapshot_sync_impl(dsl_dataset_t *ds, const char *snapname,
 404     dmu_tx_t *tx);
 405 
 406 void dsl_dataset_remove_from_next_clones(dsl_dataset_t *ds, uint64_t obj,
 407     dmu_tx_t *tx);
 408 void dsl_dataset_recalc_head_uniq(dsl_dataset_t *ds);
 409 int dsl_dataset_get_snapname(dsl_dataset_t *ds);
 410 int dsl_dataset_snap_lookup(dsl_dataset_t *ds, const char *name,
 411     uint64_t *value);
 412 int dsl_dataset_snap_remove(dsl_dataset_t *ds, const char *name, dmu_tx_t *tx,
 413     boolean_t adj_cnt);
 414 void dsl_dataset_set_refreservation_sync_impl(dsl_dataset_t *ds,
 415     zprop_source_t source, uint64_t value, dmu_tx_t *tx);
 416 void dsl_dataset_zapify(dsl_dataset_t *ds, dmu_tx_t *tx);
 417 boolean_t dsl_dataset_is_zapified(dsl_dataset_t *ds);
 418 boolean_t dsl_dataset_has_resume_receive_state(dsl_dataset_t *ds);
 419 
 420 int dsl_dataset_rollback_check(void *arg, dmu_tx_t *tx);
 421 void dsl_dataset_rollback_sync(void *arg, dmu_tx_t *tx);
 422 int dsl_dataset_rollback(const char *fsname, const char *tosnap, void *owner,
 423     nvlist_t *result);
 424 
 425 uint64_t dsl_dataset_get_remap_deadlist_object(dsl_dataset_t *ds);
 426 void dsl_dataset_create_remap_deadlist(dsl_dataset_t *ds, dmu_tx_t *tx);
 427 boolean_t dsl_dataset_remap_deadlist_exists(dsl_dataset_t *ds);
 428 void dsl_dataset_destroy_remap_deadlist(dsl_dataset_t *ds, dmu_tx_t *tx);
 429 
 430 void dsl_dataset_deactivate_feature(uint64_t dsobj,
 431     spa_feature_t f, dmu_tx_t *tx);
 432 
 433 #ifdef ZFS_DEBUG
 434 #define dprintf_ds(ds, fmt, ...) do { \
 435         if (zfs_flags & ZFS_DEBUG_DPRINTF) { \
 436         char *__ds_name = kmem_alloc(ZFS_MAX_DATASET_NAME_LEN, KM_SLEEP); \
 437         dsl_dataset_name(ds, __ds_name); \
 438         dprintf("ds=%s " fmt, __ds_name, __VA_ARGS__); \
 439         kmem_free(__ds_name, ZFS_MAX_DATASET_NAME_LEN); \
 440         } \
 441 _NOTE(CONSTCOND) } while (0)
 442 #else
 443 #define dprintf_ds(dd, fmt, ...)
 444 #endif
 445 
 446 #ifdef  __cplusplus
 447 }
 448 #endif
 449 
 450 #endif /* _SYS_DSL_DATASET_H */