Print this page
NEX-6855 System fails to boot up after a large number of datasets created
Reviewed by: Sanjay Nadkarni <sanjay.nadkarni@nexenta.com>
Reviewed by: Yuri Pankov <yuri.pankov@nexenta.com>
NEX-9301 BAD Trap: Double Fault panic on zfs destroy snapshot
Reviewed by: Roman Strashkin <roman.strashkin@nexenta.com>
Reviewed by: Yuri Pankov <yuri.pankov@nexenta.com>
NEX-7641 Impossible to remove special vdev from pool if WBC-ed dataset was removed before disabling WBC
Reviewed by: Alek Pinchuk <alek@nexenta.com>
Reviewed by: Yuri Pankov <yuri.pankov@nexenta.com>
NEX-5795 Rename 'wrc' as 'wbc' in the source and in the tech docs
Reviewed by: Alex Aizman <alex.aizman@nexenta.com>
Reviewed by: Sanjay Nadkarni <sanjay.nadkarni@nexenta.com>
Reviewed by: Alek Pinchuk <alek.pinchuk@nexenta.com>
2605 want to resume interrupted zfs send
Reviewed by: George Wilson <george.wilson@delphix.com>
Reviewed by: Paul Dagnelie <pcd@delphix.com>
Reviewed by: Richard Elling <Richard.Elling@RichardElling.com>
Reviewed by: Xin Li <delphij@freebsd.org>
Reviewed by: Arne Jansen <sensille@gmx.net>
Approved by: Dan McDonald <danmcd@omniti.com>
6047 SPARC boot should support feature@embedded_data
Reviewed by: Igor Kozhukhov <ikozhukhov@gmail.com>
Approved by: Dan McDonald <danmcd@omniti.com>
5959 clean up per-dataset feature count code
Reviewed by: Toomas Soome <tsoome@me.com>
Reviewed by: George Wilson <george@delphix.com>
Reviewed by: Alex Reece <alex@delphix.com>
Approved by: Richard Lowe <richlowe@richlowe.net>
NEX-4582 update wrc test cases for allow to use write back cache per tree of datasets
Reviewed by: Steve Peng <steve.peng@nexenta.com>
Reviewed by: Alex Aizman <alex.aizman@nexenta.com>
5960 zfs recv should prefetch indirect blocks
5925 zfs receive -o origin=
Reviewed by: Prakash Surya <prakash.surya@delphix.com>
Reviewed by: Matthew Ahrens <mahrens@delphix.com>
NEX-4476 WRC: Allow to use write back cache per tree of datasets
Reviewed by: Alek Pinchuk <alek.pinchuk@nexenta.com>
Reviewed by: Alex Aizman <alex.aizman@nexenta.com>
Revert "NEX-4476 WRC: Allow to use write back cache per tree of datasets"
This reverts commit fe97b74444278a6f36fec93179133641296312da.
NEX-4476 WRC: Allow to use write back cache per tree of datasets
Reviewed by: Alek Pinchuk <alek.pinchuk@nexenta.com>
Reviewed by: Alex Aizman <alex.aizman@nexenta.com>
NEX-3964 It should not be allowed to rename a snapshot that its new name is matched to the prefix of in-kernel autosnapshots (lint)
NEX-3964 It should not be allowed to rename a snapshot that its new name is matched to the prefix of in-kernel autosnapshots
Reviewed by: Alek Pinchuk <alek.pinchuk@nexenta.com>
Reviewed by: Josef 'Jeff' Sipek <josef.sipek@nexenta.com>
NEX-3558 KRRP Integration
4370 avoid transmitting holes during zfs send
4371 DMU code clean up
Reviewed by: Matthew Ahrens <mahrens@delphix.com>
Reviewed by: George Wilson <george.wilson@delphix.com>
Reviewed by: Christopher Siden <christopher.siden@delphix.com>
Reviewed by: Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
Approved by: Garrett D'Amore <garrett@damore.org>
@@ -18,16 +18,18 @@
*
* CDDL HEADER END
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012, 2017 by Delphix. All rights reserved.
+ * Copyright (c) 2012, 2016 by Delphix. All rights reserved.
* Copyright (c) 2013 Steven Hartland. All rights reserved.
* Copyright (c) 2013 by Joyent, Inc. All rights reserved.
* Copyright (c) 2014 Integros [integros.com]
+ * Copyright 2016 Nexenta Systems, Inc. All rights reserved.
*/
+#include <sys/autosnap.h>
#include <sys/zfs_context.h>
#include <sys/dsl_userhold.h>
#include <sys/dsl_dataset.h>
#include <sys/dsl_synctask.h>
#include <sys/dsl_destroy.h>
@@ -40,10 +42,11 @@
#include <sys/zap.h>
#include <sys/zfeature.h>
#include <sys/zfs_ioctl.h>
#include <sys/dsl_deleg.h>
#include <sys/dmu_impl.h>
+#include <sys/wbc.h>
#include <sys/zcp.h>
int
dsl_destroy_snapshot_check_impl(dsl_dataset_t *ds, boolean_t defer)
{
@@ -182,81 +185,50 @@
static void
dsl_dataset_remove_clones_key(dsl_dataset_t *ds, uint64_t mintxg, dmu_tx_t *tx)
{
objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset;
- zap_cursor_t zc;
- zap_attribute_t za;
+ zap_cursor_t *zc;
+ zap_attribute_t *za;
/*
* If it is the old version, dd_clones doesn't exist so we can't
* find the clones, but dsl_deadlist_remove_key() is a no-op so it
* doesn't matter.
*/
if (dsl_dir_phys(ds->ds_dir)->dd_clones == 0)
return;
+ zc = kmem_alloc(sizeof (zap_cursor_t), KM_SLEEP);
+ za = kmem_alloc(sizeof (zap_attribute_t), KM_SLEEP);
- for (zap_cursor_init(&zc, mos, dsl_dir_phys(ds->ds_dir)->dd_clones);
- zap_cursor_retrieve(&zc, &za) == 0;
- zap_cursor_advance(&zc)) {
+ for (zap_cursor_init(zc, mos, dsl_dir_phys(ds->ds_dir)->dd_clones);
+ zap_cursor_retrieve(zc, za) == 0;
+ zap_cursor_advance(zc)) {
dsl_dataset_t *clone;
VERIFY0(dsl_dataset_hold_obj(ds->ds_dir->dd_pool,
- za.za_first_integer, FTAG, &clone));
+ za->za_first_integer, FTAG, &clone));
if (clone->ds_dir->dd_origin_txg > mintxg) {
dsl_deadlist_remove_key(&clone->ds_deadlist,
mintxg, tx);
- if (dsl_dataset_remap_deadlist_exists(clone)) {
- dsl_deadlist_remove_key(
- &clone->ds_remap_deadlist, mintxg, tx);
- }
dsl_dataset_remove_clones_key(clone, mintxg, tx);
}
dsl_dataset_rele(clone, FTAG);
}
- zap_cursor_fini(&zc);
+ zap_cursor_fini(zc);
+ kmem_free(zc, sizeof (zap_cursor_t));
+ kmem_free(za, sizeof (zap_attribute_t));
}
-static void
-dsl_destroy_snapshot_handle_remaps(dsl_dataset_t *ds, dsl_dataset_t *ds_next,
- dmu_tx_t *tx)
-{
- dsl_pool_t *dp = ds->ds_dir->dd_pool;
-
- /* Move blocks to be obsoleted to pool's obsolete list. */
- if (dsl_dataset_remap_deadlist_exists(ds_next)) {
- if (!bpobj_is_open(&dp->dp_obsolete_bpobj))
- dsl_pool_create_obsolete_bpobj(dp, tx);
-
- dsl_deadlist_move_bpobj(&ds_next->ds_remap_deadlist,
- &dp->dp_obsolete_bpobj,
- dsl_dataset_phys(ds)->ds_prev_snap_txg, tx);
- }
-
- /* Merge our deadlist into next's and free it. */
- if (dsl_dataset_remap_deadlist_exists(ds)) {
- uint64_t remap_deadlist_object =
- dsl_dataset_get_remap_deadlist_object(ds);
- ASSERT(remap_deadlist_object != 0);
-
- mutex_enter(&ds_next->ds_remap_deadlist_lock);
- if (!dsl_dataset_remap_deadlist_exists(ds_next))
- dsl_dataset_create_remap_deadlist(ds_next, tx);
- mutex_exit(&ds_next->ds_remap_deadlist_lock);
-
- dsl_deadlist_merge(&ds_next->ds_remap_deadlist,
- remap_deadlist_object, tx);
- dsl_dataset_destroy_remap_deadlist(ds, tx);
- }
-}
-
void
dsl_destroy_snapshot_sync_impl(dsl_dataset_t *ds, boolean_t defer, dmu_tx_t *tx)
{
int err;
int after_branch_point = FALSE;
dsl_pool_t *dp = ds->ds_dir->dd_pool;
+ spa_t *spa = dp->dp_spa;
+ wbc_data_t *wbc_data = spa_get_wbc_data(spa);
objset_t *mos = dp->dp_meta_objset;
dsl_dataset_t *ds_prev = NULL;
uint64_t obj;
ASSERT(RRW_WRITE_HELD(&dp->dp_config_rwlock));
@@ -263,10 +235,19 @@
rrw_enter(&ds->ds_bp_rwlock, RW_READER, FTAG);
ASSERT3U(dsl_dataset_phys(ds)->ds_bp.blk_birth, <=, tx->tx_txg);
rrw_exit(&ds->ds_bp_rwlock, FTAG);
ASSERT(refcount_is_zero(&ds->ds_longholds));
+ /*
+ * if an edge snapshot of WBC window is destroyed, the window must be
+ * aborted
+ */
+ mutex_enter(&wbc_data->wbc_lock);
+ if (dsl_dataset_phys(ds)->ds_creation_txg == wbc_data->wbc_finish_txg)
+ wbc_purge_window(spa, tx);
+ mutex_exit(&wbc_data->wbc_lock);
+
if (defer &&
(ds->ds_userrefs > 0 ||
dsl_dataset_phys(ds)->ds_num_children > 1)) {
ASSERT(spa_version(dp->dp_spa) >= SPA_VERSION_USERREFS);
dmu_buf_will_dirty(ds->ds_dbuf, tx);
@@ -362,18 +343,15 @@
/* Merge our deadlist into next's and free it. */
dsl_deadlist_merge(&ds_next->ds_deadlist,
dsl_dataset_phys(ds)->ds_deadlist_obj, tx);
}
-
dsl_deadlist_close(&ds->ds_deadlist);
dsl_deadlist_free(mos, dsl_dataset_phys(ds)->ds_deadlist_obj, tx);
dmu_buf_will_dirty(ds->ds_dbuf, tx);
dsl_dataset_phys(ds)->ds_deadlist_obj = 0;
- dsl_destroy_snapshot_handle_remaps(ds, ds_next, tx);
-
/* Collapse range in clone heads */
dsl_dataset_remove_clones_key(ds,
dsl_dataset_phys(ds)->ds_creation_txg, tx);
if (ds_next->ds_is_snapshot) {
@@ -403,14 +381,10 @@
dsl_dataset_t *hds;
VERIFY0(dsl_dataset_hold_obj(dp,
dsl_dir_phys(ds->ds_dir)->dd_head_dataset_obj, FTAG, &hds));
dsl_deadlist_remove_key(&hds->ds_deadlist,
dsl_dataset_phys(ds)->ds_creation_txg, tx);
- if (dsl_dataset_remap_deadlist_exists(hds)) {
- dsl_deadlist_remove_key(&hds->ds_remap_deadlist,
- dsl_dataset_phys(ds)->ds_creation_txg, tx);
- }
dsl_dataset_rele(hds, FTAG);
} else {
ASSERT3P(ds_next->ds_prev, ==, ds);
dsl_dataset_rele(ds_next->ds_prev, ds_next);
@@ -506,10 +480,14 @@
int error = dsl_dataset_hold(dp, dsname, FTAG, &ds);
if (error == ENOENT)
return;
ASSERT0(error);
+
+ if (autosnap_check_name(strchr(dsname, '@')))
+ autosnap_exempt_snapshot(dp->dp_spa, dsname);
+
dsl_destroy_snapshot_sync_impl(ds, defer, tx);
dsl_dataset_rele(ds, FTAG);
}
/*
@@ -855,25 +833,35 @@
dsl_dataset_phys(ds->ds_prev)->ds_num_children--;
}
/*
* Destroy the deadlist. Unless it's a clone, the
- * deadlist should be empty since the dataset has no snapshots.
- * (If it's a clone, it's safe to ignore the deadlist contents
- * since they are still referenced by the origin snapshot.)
+ * deadlist should be empty. (If it's a clone, it's
+ * safe to ignore the deadlist contents.)
*/
dsl_deadlist_close(&ds->ds_deadlist);
dsl_deadlist_free(mos, dsl_dataset_phys(ds)->ds_deadlist_obj, tx);
dmu_buf_will_dirty(ds->ds_dbuf, tx);
dsl_dataset_phys(ds)->ds_deadlist_obj = 0;
- if (dsl_dataset_remap_deadlist_exists(ds))
- dsl_dataset_destroy_remap_deadlist(ds, tx);
-
objset_t *os;
VERIFY0(dmu_objset_from_ds(ds, &os));
+ if (spa_feature_is_active(dp->dp_spa, SPA_FEATURE_WBC)) {
+ wbc_process_objset(spa_get_wbc_data(dp->dp_spa), os, B_TRUE);
+
+ /*
+ * If WBC was activated for this dataset and it is a root
+ * of WBC-ed tree of datasets then need to decrement WBC
+ * feature flag refcounter, to be sure that 'feature@wbc'
+ * shows correct information about the status of WBC
+ */
+ if (os->os_wbc_root_ds_obj != 0 &&
+ ds->ds_object == os->os_wbc_root_ds_obj)
+ spa_feature_decr(os->os_spa, SPA_FEATURE_WBC, tx);
+ }
+
if (!spa_feature_is_enabled(dp->dp_spa, SPA_FEATURE_ASYNC_DESTROY)) {
old_synchronous_dataset_destroy(ds, tx);
} else {
/*
* Move the bptree into the pool's list of trees to
@@ -1011,10 +999,11 @@
error = spa_open(name, &spa, FTAG);
if (error != 0)
return (error);
isenabled = spa_feature_is_enabled(spa, SPA_FEATURE_ASYNC_DESTROY);
+
spa_close(spa, FTAG);
ddha.ddha_name = name;
if (!isenabled) {
@@ -1048,33 +1037,257 @@
return (dsl_sync_task(name, dsl_destroy_head_check,
dsl_destroy_head_sync, &ddha, 0, ZFS_SPACE_CHECK_NONE));
}
-/*
- * Note, this function is used as the callback for dmu_objset_find(). We
- * always return 0 so that we will continue to find and process
- * inconsistent datasets, even if we encounter an error trying to
- * process one of them.
- */
+typedef struct {
+ kmutex_t lock;
+ list_t list;
+} dsl_inconsistent_walker_cb_t;
+
+typedef struct {
+ char name[ZFS_MAX_DATASET_NAME_LEN];
+ list_node_t node;
+} dsl_inconsistent_node_t;
+
/* ARGSUSED */
-int
-dsl_destroy_inconsistent(const char *dsname, void *arg)
+static int
+dsl_collect_inconsistent_datasets_cb(dsl_pool_t *dp,
+ dsl_dataset_t *ds, void *arg)
{
- objset_t *os;
+ dsl_inconsistent_node_t *ds_node;
+ dsl_inconsistent_walker_cb_t *walker =
+ (dsl_inconsistent_walker_cb_t *)arg;
- if (dmu_objset_hold(dsname, FTAG, &os) == 0) {
- boolean_t need_destroy = DS_IS_INCONSISTENT(dmu_objset_ds(os));
+ if (!DS_IS_INCONSISTENT(ds))
+ return (0);
/*
* If the dataset is inconsistent because a resumable receive
* has failed, then do not destroy it.
*/
- if (dsl_dataset_has_resume_receive_state(dmu_objset_ds(os)))
- need_destroy = B_FALSE;
+ if (dsl_dataset_has_resume_receive_state(ds))
+ return (0);
- dmu_objset_rele(os, FTAG);
- if (need_destroy)
- (void) dsl_destroy_head(dsname);
+ ds_node = kmem_alloc(sizeof (dsl_inconsistent_node_t), KM_SLEEP);
+ dsl_dataset_name(ds, ds_node->name);
+
+ mutex_enter(&walker->lock);
+ list_insert_tail(&walker->list, ds_node);
+ mutex_exit(&walker->lock);
+
+ return (0);
+}
+
+/*
+ * Walk in parallel over the entire pool and gather inconsistent
+ * datasets namely, those that don't have resume token and destroy them.
+ */
+void
+dsl_destroy_inconsistent(dsl_pool_t *dp)
+{
+ dsl_inconsistent_walker_cb_t walker;
+ dsl_inconsistent_node_t *ds_node;
+
+ mutex_init(&walker.lock, NULL, MUTEX_DEFAULT, NULL);
+ list_create(&walker.list, sizeof (dsl_inconsistent_node_t),
+ offsetof(dsl_inconsistent_node_t, node));
+
+ VERIFY0(dmu_objset_find_dp(dp, dp->dp_root_dir_obj,
+ dsl_collect_inconsistent_datasets_cb,
+ &walker, DS_FIND_CHILDREN));
+
+ while ((ds_node = list_remove_head(&walker.list)) != NULL) {
+ (void) dsl_destroy_head(ds_node->name);
+ kmem_free(ds_node, sizeof (dsl_inconsistent_node_t));
}
+
+ list_destroy(&walker.list);
+ mutex_destroy(&walker.lock);
+}
+
+typedef struct {
+ const char *from_ds;
+ boolean_t defer;
+} dmu_destroy_atomically_arg_t;
+
+static int
+dsl_destroy_atomically_sync(void *arg, dmu_tx_t *tx)
+{
+ dmu_destroy_atomically_arg_t *ddaa = arg;
+ boolean_t defer = ddaa->defer;
+ dsl_pool_t *dp = dmu_tx_pool(tx);
+ zfs_ds_collector_entry_t *tail;
+ list_t namestack;
+ int err = 0;
+
+ /* do not perfrom checks in ioctl */
+ if (!dmu_tx_is_syncing(tx))
return (0);
+
+ ASSERT(dsl_pool_config_held(dp));
+
+ if (!spa_feature_is_enabled(dp->dp_spa, SPA_FEATURE_ASYNC_DESTROY))
+ return (SET_ERROR(ENOTSUP));
+
+ /* It is possible than autosnap watches the DS */
+ if (spa_feature_is_active(dp->dp_spa, SPA_FEATURE_WBC)) {
+ objset_t *os = NULL;
+ dsl_dataset_t *ds = NULL;
+
+ err = dsl_dataset_hold(dp, ddaa->from_ds, FTAG, &ds);
+ if (err != 0)
+ return (err);
+
+ err = dmu_objset_from_ds(ds, &os);
+ if (err != 0) {
+ dsl_dataset_rele(ds, FTAG);
+ return (err);
+ }
+
+ if (!dmu_objset_is_snapshot(os)) {
+ wbc_process_objset(spa_get_wbc_data(dp->dp_spa),
+ os, B_TRUE);
+ }
+
+ dsl_dataset_rele(ds, FTAG);
+ }
+
+ /* initialize the stack of datasets */
+ list_create(&namestack, sizeof (zfs_ds_collector_entry_t),
+ offsetof(zfs_ds_collector_entry_t, node));
+ tail = dsl_dataset_collector_cache_alloc();
+
+ /* push the head */
+ tail->cookie = 0;
+ tail->cookie_is_snap = B_FALSE;
+ (void) strcpy(tail->name, ddaa->from_ds);
+ list_insert_tail(&namestack, tail);
+
+ /* the head is processed at the very end and after all is done */
+ while (err == 0 && ((tail = list_tail(&namestack)) != NULL)) {
+ zfs_ds_collector_entry_t *el;
+ objset_t *os;
+ dsl_dataset_t *ds;
+ char *p;
+
+ /* init new entry */
+ el = dsl_dataset_collector_cache_alloc();
+ el->cookie = 0;
+ el->cookie_is_snap = B_FALSE;
+ (void) strcpy(el->name, tail->name);
+ p = el->name + strlen(el->name);
+
+ /* hold the current dataset to traverse its children */
+ err = dsl_dataset_hold(dp, tail->name, FTAG, &ds);
+ if (err != 0) {
+ dsl_dataset_collector_cache_free(el);
+ break;
+ }
+
+ err = dmu_objset_from_ds(ds, &os);
+ if (err != 0) {
+ dsl_dataset_rele(ds, FTAG);
+ dsl_dataset_collector_cache_free(el);
+ break;
+ }
+
+ if (dmu_objset_is_snapshot(os)) {
+ /* traverse clones for snapshots */
+ err = dmu_clone_list_next(os, MAXNAMELEN,
+ el->name, NULL, &tail->cookie);
+ } else {
+ /* for filesystems traverse fs first, then snaps */
+ if (!tail->cookie_is_snap) {
+ *p++ = '/';
+ do {
+ *p = '\0';
+ err = dmu_dir_list_next(os,
+ MAXNAMELEN - (p - el->name),
+ p, NULL, &tail->cookie);
+ } while (err == 0 &&
+ dataset_name_hidden(el->name));
+
+ /* no more fs, move to snapshots */
+ if (err == ENOENT) {
+ *(--p) = '\0';
+ tail->cookie_is_snap = 1;
+ tail->cookie = 0;
+ err = 0;
+ }
+ }
+
+ if (err == 0 && tail->cookie_is_snap) {
+ *p++ = '@';
+ *p = '\0';
+ err = dmu_snapshot_list_next(os,
+ MAXNAMELEN - (p - el->name),
+ p, NULL, &tail->cookie, NULL);
+ }
+ }
+
+ if (err == 0) {
+ /* a children found, add it and continue */
+ list_insert_tail(&namestack, el);
+ dsl_dataset_rele(ds, FTAG);
+ continue;
+ }
+
+ dsl_dataset_collector_cache_free(el);
+
+ if (err != ENOENT) {
+ dsl_dataset_rele(ds, FTAG);
+ break;
+ }
+
+ /*
+ * There are no more children of the dataset, pop it from stack
+ * and destroy it
+ */
+
+ err = 0;
+
+ list_remove(&namestack, tail);
+
+ if (dmu_objset_is_snapshot(os)) {
+ err = dsl_destroy_snapshot_check_impl(ds, defer);
+ if (err == 0)
+ dsl_destroy_snapshot_sync_impl(ds, defer, tx);
+ } else if (strchr(tail->name, '/') != NULL) {
+ err = dsl_destroy_head_check_impl(ds, 0);
+ if (err == 0)
+ dsl_destroy_head_sync_impl(ds, tx);
+ }
+
+ dsl_dataset_rele(ds, FTAG);
+ dsl_dataset_collector_cache_free(tail);
+ }
+
+ if (err != 0) {
+ while ((tail = list_remove_tail(&namestack)) != NULL)
+ dsl_dataset_collector_cache_free(tail);
+ }
+
+ ASSERT(list_head(&namestack) == NULL);
+
+ list_destroy(&namestack);
+
+ return (err);
+}
+
+/*ARGSUSED*/
+void
+dsl_destroy_atomically_sync_dummy(void *arg, dmu_tx_t *tx)
+{
+}
+
+int
+dsl_destroy_atomically(const char *name, boolean_t defer)
+{
+ dmu_destroy_atomically_arg_t ddaa;
+
+ ddaa.from_ds = name;
+ ddaa.defer = defer;
+
+ return (dsl_sync_task(name, dsl_destroy_atomically_sync,
+ dsl_destroy_atomically_sync_dummy, &ddaa, 0, ZFS_SPACE_CHECK_NONE));
}