Print this page
NEX-6855 System fails to boot up after a large number of datasets created
Reviewed by: Sanjay Nadkarni <sanjay.nadkarni@nexenta.com>
Reviewed by: Yuri Pankov <yuri.pankov@nexenta.com>
NEX-9301 BAD Trap: Double Fault panic on zfs destroy snapshot
Reviewed by: Roman Strashkin <roman.strashkin@nexenta.com>
Reviewed by: Yuri Pankov <yuri.pankov@nexenta.com>
NEX-7641 Impossible to remove special vdev from pool if WBC-ed dataset was removed before disabling WBC
Reviewed by: Alek Pinchuk <alek@nexenta.com>
Reviewed by: Yuri Pankov <yuri.pankov@nexenta.com>
NEX-5795 Rename 'wrc' as 'wbc' in the source and in the tech docs
Reviewed by: Alex Aizman <alex.aizman@nexenta.com>
Reviewed by: Sanjay Nadkarni <sanjay.nadkarni@nexenta.com>
Reviewed by: Alek Pinchuk <alek.pinchuk@nexenta.com>
2605 want to resume interrupted zfs send
Reviewed by: George Wilson <george.wilson@delphix.com>
Reviewed by: Paul Dagnelie <pcd@delphix.com>
Reviewed by: Richard Elling <Richard.Elling@RichardElling.com>
Reviewed by: Xin Li <delphij@freebsd.org>
Reviewed by: Arne Jansen <sensille@gmx.net>
Approved by: Dan McDonald <danmcd@omniti.com>
6047 SPARC boot should support feature@embedded_data
Reviewed by: Igor Kozhukhov <ikozhukhov@gmail.com>
Approved by: Dan McDonald <danmcd@omniti.com>
5959 clean up per-dataset feature count code
Reviewed by: Toomas Soome <tsoome@me.com>
Reviewed by: George Wilson <george@delphix.com>
Reviewed by: Alex Reece <alex@delphix.com>
Approved by: Richard Lowe <richlowe@richlowe.net>
NEX-4582 update wrc test cases for allow to use write back cache per tree of datasets
Reviewed by: Steve Peng <steve.peng@nexenta.com>
Reviewed by: Alex Aizman <alex.aizman@nexenta.com>
5960 zfs recv should prefetch indirect blocks
5925 zfs receive -o origin=
Reviewed by: Prakash Surya <prakash.surya@delphix.com>
Reviewed by: Matthew Ahrens <mahrens@delphix.com>
NEX-4476 WRC: Allow to use write back cache per tree of datasets
Reviewed by: Alek Pinchuk <alek.pinchuk@nexenta.com>
Reviewed by: Alex Aizman <alex.aizman@nexenta.com>
Revert "NEX-4476 WRC: Allow to use write back cache per tree of datasets"
This reverts commit fe97b74444278a6f36fec93179133641296312da.
NEX-4476 WRC: Allow to use write back cache per tree of datasets
Reviewed by: Alek Pinchuk <alek.pinchuk@nexenta.com>
Reviewed by: Alex Aizman <alex.aizman@nexenta.com>
NEX-3964 It should not be allowed to rename a snapshot that its new name is matched to the prefix of in-kernel autosnapshots (lint)
NEX-3964 It should not be allowed to rename a snapshot that its new name is matched to the prefix of in-kernel autosnapshots
Reviewed by: Alek Pinchuk <alek.pinchuk@nexenta.com>
Reviewed by: Josef 'Jeff' Sipek <josef.sipek@nexenta.com>
NEX-3558 KRRP Integration
4370 avoid transmitting holes during zfs send
4371 DMU code clean up
Reviewed by: Matthew Ahrens <mahrens@delphix.com>
Reviewed by: George Wilson <george.wilson@delphix.com>
Reviewed by: Christopher Siden <christopher.siden@delphix.com>
Reviewed by: Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
Approved by: Garrett D'Amore <garrett@damore.org>
| Split |
Close |
| Expand all |
| Collapse all |
--- old/usr/src/uts/common/fs/zfs/dsl_destroy.c
+++ new/usr/src/uts/common/fs/zfs/dsl_destroy.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
|
↓ open down ↓ |
12 lines elided |
↑ open up ↑ |
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21 /*
22 22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
23 - * Copyright (c) 2012, 2017 by Delphix. All rights reserved.
23 + * Copyright (c) 2012, 2016 by Delphix. All rights reserved.
24 24 * Copyright (c) 2013 Steven Hartland. All rights reserved.
25 25 * Copyright (c) 2013 by Joyent, Inc. All rights reserved.
26 26 * Copyright (c) 2014 Integros [integros.com]
27 + * Copyright 2016 Nexenta Systems, Inc. All rights reserved.
27 28 */
28 29
30 +#include <sys/autosnap.h>
29 31 #include <sys/zfs_context.h>
30 32 #include <sys/dsl_userhold.h>
31 33 #include <sys/dsl_dataset.h>
32 34 #include <sys/dsl_synctask.h>
33 35 #include <sys/dsl_destroy.h>
34 36 #include <sys/dmu_tx.h>
35 37 #include <sys/dsl_pool.h>
36 38 #include <sys/dsl_dir.h>
37 39 #include <sys/dmu_traverse.h>
38 40 #include <sys/dsl_scan.h>
39 41 #include <sys/dmu_objset.h>
40 42 #include <sys/zap.h>
41 43 #include <sys/zfeature.h>
42 44 #include <sys/zfs_ioctl.h>
43 45 #include <sys/dsl_deleg.h>
44 46 #include <sys/dmu_impl.h>
47 +#include <sys/wbc.h>
45 48 #include <sys/zcp.h>
46 49
47 50 int
48 51 dsl_destroy_snapshot_check_impl(dsl_dataset_t *ds, boolean_t defer)
49 52 {
50 53 if (!ds->ds_is_snapshot)
51 54 return (SET_ERROR(EINVAL));
52 55
53 56 if (dsl_dataset_long_held(ds))
54 57 return (SET_ERROR(EBUSY));
55 58
56 59 /*
57 60 * Only allow deferred destroy on pools that support it.
58 61 * NOTE: deferred destroy is only supported on snapshots.
59 62 */
60 63 if (defer) {
61 64 if (spa_version(ds->ds_dir->dd_pool->dp_spa) <
62 65 SPA_VERSION_USERREFS)
63 66 return (SET_ERROR(ENOTSUP));
64 67 return (0);
65 68 }
66 69
67 70 /*
68 71 * If this snapshot has an elevated user reference count,
69 72 * we can't destroy it yet.
70 73 */
71 74 if (ds->ds_userrefs > 0)
72 75 return (SET_ERROR(EBUSY));
73 76
74 77 /*
75 78 * Can't delete a branch point.
76 79 */
77 80 if (dsl_dataset_phys(ds)->ds_num_children > 1)
78 81 return (SET_ERROR(EEXIST));
79 82
80 83 return (0);
81 84 }
82 85
83 86 int
84 87 dsl_destroy_snapshot_check(void *arg, dmu_tx_t *tx)
85 88 {
86 89 dsl_destroy_snapshot_arg_t *ddsa = arg;
87 90 const char *dsname = ddsa->ddsa_name;
88 91 boolean_t defer = ddsa->ddsa_defer;
89 92
90 93 dsl_pool_t *dp = dmu_tx_pool(tx);
91 94 int error = 0;
92 95 dsl_dataset_t *ds;
93 96
94 97 error = dsl_dataset_hold(dp, dsname, FTAG, &ds);
95 98
96 99 /*
97 100 * If the snapshot does not exist, silently ignore it, and
98 101 * dsl_destroy_snapshot_sync() will be a no-op
99 102 * (it's "already destroyed").
100 103 */
101 104 if (error == ENOENT)
102 105 return (0);
103 106
104 107 if (error == 0) {
105 108 error = dsl_destroy_snapshot_check_impl(ds, defer);
106 109 dsl_dataset_rele(ds, FTAG);
107 110 }
108 111
109 112 return (error);
110 113 }
111 114
112 115 struct process_old_arg {
113 116 dsl_dataset_t *ds;
114 117 dsl_dataset_t *ds_prev;
115 118 boolean_t after_branch_point;
116 119 zio_t *pio;
117 120 uint64_t used, comp, uncomp;
118 121 };
119 122
120 123 static int
121 124 process_old_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx)
122 125 {
123 126 struct process_old_arg *poa = arg;
124 127 dsl_pool_t *dp = poa->ds->ds_dir->dd_pool;
125 128
126 129 ASSERT(!BP_IS_HOLE(bp));
127 130
128 131 if (bp->blk_birth <= dsl_dataset_phys(poa->ds)->ds_prev_snap_txg) {
129 132 dsl_deadlist_insert(&poa->ds->ds_deadlist, bp, tx);
130 133 if (poa->ds_prev && !poa->after_branch_point &&
131 134 bp->blk_birth >
132 135 dsl_dataset_phys(poa->ds_prev)->ds_prev_snap_txg) {
133 136 dsl_dataset_phys(poa->ds_prev)->ds_unique_bytes +=
134 137 bp_get_dsize_sync(dp->dp_spa, bp);
135 138 }
136 139 } else {
137 140 poa->used += bp_get_dsize_sync(dp->dp_spa, bp);
138 141 poa->comp += BP_GET_PSIZE(bp);
139 142 poa->uncomp += BP_GET_UCSIZE(bp);
140 143 dsl_free_sync(poa->pio, dp, tx->tx_txg, bp);
141 144 }
142 145 return (0);
143 146 }
144 147
145 148 static void
146 149 process_old_deadlist(dsl_dataset_t *ds, dsl_dataset_t *ds_prev,
147 150 dsl_dataset_t *ds_next, boolean_t after_branch_point, dmu_tx_t *tx)
148 151 {
149 152 struct process_old_arg poa = { 0 };
150 153 dsl_pool_t *dp = ds->ds_dir->dd_pool;
151 154 objset_t *mos = dp->dp_meta_objset;
152 155 uint64_t deadlist_obj;
153 156
154 157 ASSERT(ds->ds_deadlist.dl_oldfmt);
155 158 ASSERT(ds_next->ds_deadlist.dl_oldfmt);
156 159
157 160 poa.ds = ds;
158 161 poa.ds_prev = ds_prev;
159 162 poa.after_branch_point = after_branch_point;
160 163 poa.pio = zio_root(dp->dp_spa, NULL, NULL, ZIO_FLAG_MUSTSUCCEED);
161 164 VERIFY0(bpobj_iterate(&ds_next->ds_deadlist.dl_bpobj,
162 165 process_old_cb, &poa, tx));
163 166 VERIFY0(zio_wait(poa.pio));
164 167 ASSERT3U(poa.used, ==, dsl_dataset_phys(ds)->ds_unique_bytes);
165 168
166 169 /* change snapused */
167 170 dsl_dir_diduse_space(ds->ds_dir, DD_USED_SNAP,
168 171 -poa.used, -poa.comp, -poa.uncomp, tx);
169 172
170 173 /* swap next's deadlist to our deadlist */
171 174 dsl_deadlist_close(&ds->ds_deadlist);
172 175 dsl_deadlist_close(&ds_next->ds_deadlist);
173 176 deadlist_obj = dsl_dataset_phys(ds)->ds_deadlist_obj;
174 177 dsl_dataset_phys(ds)->ds_deadlist_obj =
175 178 dsl_dataset_phys(ds_next)->ds_deadlist_obj;
176 179 dsl_dataset_phys(ds_next)->ds_deadlist_obj = deadlist_obj;
|
↓ open down ↓ |
122 lines elided |
↑ open up ↑ |
177 180 dsl_deadlist_open(&ds->ds_deadlist, mos,
178 181 dsl_dataset_phys(ds)->ds_deadlist_obj);
179 182 dsl_deadlist_open(&ds_next->ds_deadlist, mos,
180 183 dsl_dataset_phys(ds_next)->ds_deadlist_obj);
181 184 }
182 185
183 186 static void
184 187 dsl_dataset_remove_clones_key(dsl_dataset_t *ds, uint64_t mintxg, dmu_tx_t *tx)
185 188 {
186 189 objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset;
187 - zap_cursor_t zc;
188 - zap_attribute_t za;
190 + zap_cursor_t *zc;
191 + zap_attribute_t *za;
189 192
190 193 /*
191 194 * If it is the old version, dd_clones doesn't exist so we can't
192 195 * find the clones, but dsl_deadlist_remove_key() is a no-op so it
193 196 * doesn't matter.
194 197 */
195 198 if (dsl_dir_phys(ds->ds_dir)->dd_clones == 0)
196 199 return;
200 + zc = kmem_alloc(sizeof (zap_cursor_t), KM_SLEEP);
201 + za = kmem_alloc(sizeof (zap_attribute_t), KM_SLEEP);
197 202
198 - for (zap_cursor_init(&zc, mos, dsl_dir_phys(ds->ds_dir)->dd_clones);
199 - zap_cursor_retrieve(&zc, &za) == 0;
200 - zap_cursor_advance(&zc)) {
203 + for (zap_cursor_init(zc, mos, dsl_dir_phys(ds->ds_dir)->dd_clones);
204 + zap_cursor_retrieve(zc, za) == 0;
205 + zap_cursor_advance(zc)) {
201 206 dsl_dataset_t *clone;
202 207
203 208 VERIFY0(dsl_dataset_hold_obj(ds->ds_dir->dd_pool,
204 - za.za_first_integer, FTAG, &clone));
209 + za->za_first_integer, FTAG, &clone));
205 210 if (clone->ds_dir->dd_origin_txg > mintxg) {
206 211 dsl_deadlist_remove_key(&clone->ds_deadlist,
207 212 mintxg, tx);
208 - if (dsl_dataset_remap_deadlist_exists(clone)) {
209 - dsl_deadlist_remove_key(
210 - &clone->ds_remap_deadlist, mintxg, tx);
211 - }
212 213 dsl_dataset_remove_clones_key(clone, mintxg, tx);
213 214 }
214 215 dsl_dataset_rele(clone, FTAG);
215 216 }
216 - zap_cursor_fini(&zc);
217 + zap_cursor_fini(zc);
218 + kmem_free(zc, sizeof (zap_cursor_t));
219 + kmem_free(za, sizeof (zap_attribute_t));
217 220 }
218 221
219 -static void
220 -dsl_destroy_snapshot_handle_remaps(dsl_dataset_t *ds, dsl_dataset_t *ds_next,
221 - dmu_tx_t *tx)
222 -{
223 - dsl_pool_t *dp = ds->ds_dir->dd_pool;
224 -
225 - /* Move blocks to be obsoleted to pool's obsolete list. */
226 - if (dsl_dataset_remap_deadlist_exists(ds_next)) {
227 - if (!bpobj_is_open(&dp->dp_obsolete_bpobj))
228 - dsl_pool_create_obsolete_bpobj(dp, tx);
229 -
230 - dsl_deadlist_move_bpobj(&ds_next->ds_remap_deadlist,
231 - &dp->dp_obsolete_bpobj,
232 - dsl_dataset_phys(ds)->ds_prev_snap_txg, tx);
233 - }
234 -
235 - /* Merge our deadlist into next's and free it. */
236 - if (dsl_dataset_remap_deadlist_exists(ds)) {
237 - uint64_t remap_deadlist_object =
238 - dsl_dataset_get_remap_deadlist_object(ds);
239 - ASSERT(remap_deadlist_object != 0);
240 -
241 - mutex_enter(&ds_next->ds_remap_deadlist_lock);
242 - if (!dsl_dataset_remap_deadlist_exists(ds_next))
243 - dsl_dataset_create_remap_deadlist(ds_next, tx);
244 - mutex_exit(&ds_next->ds_remap_deadlist_lock);
245 -
246 - dsl_deadlist_merge(&ds_next->ds_remap_deadlist,
247 - remap_deadlist_object, tx);
248 - dsl_dataset_destroy_remap_deadlist(ds, tx);
249 - }
250 -}
251 -
252 222 void
253 223 dsl_destroy_snapshot_sync_impl(dsl_dataset_t *ds, boolean_t defer, dmu_tx_t *tx)
254 224 {
255 225 int err;
256 226 int after_branch_point = FALSE;
257 227 dsl_pool_t *dp = ds->ds_dir->dd_pool;
228 + spa_t *spa = dp->dp_spa;
229 + wbc_data_t *wbc_data = spa_get_wbc_data(spa);
258 230 objset_t *mos = dp->dp_meta_objset;
259 231 dsl_dataset_t *ds_prev = NULL;
260 232 uint64_t obj;
261 233
262 234 ASSERT(RRW_WRITE_HELD(&dp->dp_config_rwlock));
263 235 rrw_enter(&ds->ds_bp_rwlock, RW_READER, FTAG);
264 236 ASSERT3U(dsl_dataset_phys(ds)->ds_bp.blk_birth, <=, tx->tx_txg);
265 237 rrw_exit(&ds->ds_bp_rwlock, FTAG);
266 238 ASSERT(refcount_is_zero(&ds->ds_longholds));
267 239
240 + /*
241 + * if an edge snapshot of WBC window is destroyed, the window must be
242 + * aborted
243 + */
244 + mutex_enter(&wbc_data->wbc_lock);
245 + if (dsl_dataset_phys(ds)->ds_creation_txg == wbc_data->wbc_finish_txg)
246 + wbc_purge_window(spa, tx);
247 + mutex_exit(&wbc_data->wbc_lock);
248 +
268 249 if (defer &&
269 250 (ds->ds_userrefs > 0 ||
270 251 dsl_dataset_phys(ds)->ds_num_children > 1)) {
271 252 ASSERT(spa_version(dp->dp_spa) >= SPA_VERSION_USERREFS);
272 253 dmu_buf_will_dirty(ds->ds_dbuf, tx);
273 254 dsl_dataset_phys(ds)->ds_flags |= DS_FLAG_DEFER_DESTROY;
274 255 spa_history_log_internal_ds(ds, "defer_destroy", tx, "");
275 256 return;
276 257 }
277 258
278 259 ASSERT3U(dsl_dataset_phys(ds)->ds_num_children, <=, 1);
279 260
280 261 /* We need to log before removing it from the namespace. */
281 262 spa_history_log_internal_ds(ds, "destroy", tx, "");
282 263
283 264 dsl_scan_ds_destroyed(ds, tx);
284 265
285 266 obj = ds->ds_object;
286 267
287 268 for (spa_feature_t f = 0; f < SPA_FEATURES; f++) {
288 269 if (ds->ds_feature_inuse[f]) {
289 270 dsl_dataset_deactivate_feature(obj, f, tx);
290 271 ds->ds_feature_inuse[f] = B_FALSE;
291 272 }
292 273 }
293 274 if (dsl_dataset_phys(ds)->ds_prev_snap_obj != 0) {
294 275 ASSERT3P(ds->ds_prev, ==, NULL);
295 276 VERIFY0(dsl_dataset_hold_obj(dp,
296 277 dsl_dataset_phys(ds)->ds_prev_snap_obj, FTAG, &ds_prev));
297 278 after_branch_point =
298 279 (dsl_dataset_phys(ds_prev)->ds_next_snap_obj != obj);
299 280
300 281 dmu_buf_will_dirty(ds_prev->ds_dbuf, tx);
301 282 if (after_branch_point &&
302 283 dsl_dataset_phys(ds_prev)->ds_next_clones_obj != 0) {
303 284 dsl_dataset_remove_from_next_clones(ds_prev, obj, tx);
304 285 if (dsl_dataset_phys(ds)->ds_next_snap_obj != 0) {
305 286 VERIFY0(zap_add_int(mos,
306 287 dsl_dataset_phys(ds_prev)->
307 288 ds_next_clones_obj,
308 289 dsl_dataset_phys(ds)->ds_next_snap_obj,
309 290 tx));
310 291 }
311 292 }
312 293 if (!after_branch_point) {
313 294 dsl_dataset_phys(ds_prev)->ds_next_snap_obj =
314 295 dsl_dataset_phys(ds)->ds_next_snap_obj;
315 296 }
316 297 }
317 298
318 299 dsl_dataset_t *ds_next;
319 300 uint64_t old_unique;
320 301 uint64_t used = 0, comp = 0, uncomp = 0;
321 302
322 303 VERIFY0(dsl_dataset_hold_obj(dp,
323 304 dsl_dataset_phys(ds)->ds_next_snap_obj, FTAG, &ds_next));
324 305 ASSERT3U(dsl_dataset_phys(ds_next)->ds_prev_snap_obj, ==, obj);
325 306
326 307 old_unique = dsl_dataset_phys(ds_next)->ds_unique_bytes;
327 308
328 309 dmu_buf_will_dirty(ds_next->ds_dbuf, tx);
329 310 dsl_dataset_phys(ds_next)->ds_prev_snap_obj =
330 311 dsl_dataset_phys(ds)->ds_prev_snap_obj;
331 312 dsl_dataset_phys(ds_next)->ds_prev_snap_txg =
332 313 dsl_dataset_phys(ds)->ds_prev_snap_txg;
333 314 ASSERT3U(dsl_dataset_phys(ds)->ds_prev_snap_txg, ==,
334 315 ds_prev ? dsl_dataset_phys(ds_prev)->ds_creation_txg : 0);
335 316
336 317 if (ds_next->ds_deadlist.dl_oldfmt) {
337 318 process_old_deadlist(ds, ds_prev, ds_next,
338 319 after_branch_point, tx);
339 320 } else {
340 321 /* Adjust prev's unique space. */
341 322 if (ds_prev && !after_branch_point) {
342 323 dsl_deadlist_space_range(&ds_next->ds_deadlist,
343 324 dsl_dataset_phys(ds_prev)->ds_prev_snap_txg,
344 325 dsl_dataset_phys(ds)->ds_prev_snap_txg,
345 326 &used, &comp, &uncomp);
346 327 dsl_dataset_phys(ds_prev)->ds_unique_bytes += used;
347 328 }
348 329
349 330 /* Adjust snapused. */
350 331 dsl_deadlist_space_range(&ds_next->ds_deadlist,
351 332 dsl_dataset_phys(ds)->ds_prev_snap_txg, UINT64_MAX,
352 333 &used, &comp, &uncomp);
353 334 dsl_dir_diduse_space(ds->ds_dir, DD_USED_SNAP,
354 335 -used, -comp, -uncomp, tx);
355 336
356 337 /* Move blocks to be freed to pool's free list. */
|
↓ open down ↓ |
79 lines elided |
↑ open up ↑ |
357 338 dsl_deadlist_move_bpobj(&ds_next->ds_deadlist,
358 339 &dp->dp_free_bpobj, dsl_dataset_phys(ds)->ds_prev_snap_txg,
359 340 tx);
360 341 dsl_dir_diduse_space(tx->tx_pool->dp_free_dir,
361 342 DD_USED_HEAD, used, comp, uncomp, tx);
362 343
363 344 /* Merge our deadlist into next's and free it. */
364 345 dsl_deadlist_merge(&ds_next->ds_deadlist,
365 346 dsl_dataset_phys(ds)->ds_deadlist_obj, tx);
366 347 }
367 -
368 348 dsl_deadlist_close(&ds->ds_deadlist);
369 349 dsl_deadlist_free(mos, dsl_dataset_phys(ds)->ds_deadlist_obj, tx);
370 350 dmu_buf_will_dirty(ds->ds_dbuf, tx);
371 351 dsl_dataset_phys(ds)->ds_deadlist_obj = 0;
372 352
373 - dsl_destroy_snapshot_handle_remaps(ds, ds_next, tx);
374 -
375 353 /* Collapse range in clone heads */
376 354 dsl_dataset_remove_clones_key(ds,
377 355 dsl_dataset_phys(ds)->ds_creation_txg, tx);
378 356
379 357 if (ds_next->ds_is_snapshot) {
380 358 dsl_dataset_t *ds_nextnext;
381 359
382 360 /*
383 361 * Update next's unique to include blocks which
384 362 * were previously shared by only this snapshot
385 363 * and it. Those blocks will be born after the
386 364 * prev snap and before this snap, and will have
387 365 * died after the next snap and before the one
388 366 * after that (ie. be on the snap after next's
389 367 * deadlist).
390 368 */
391 369 VERIFY0(dsl_dataset_hold_obj(dp,
392 370 dsl_dataset_phys(ds_next)->ds_next_snap_obj,
393 371 FTAG, &ds_nextnext));
394 372 dsl_deadlist_space_range(&ds_nextnext->ds_deadlist,
395 373 dsl_dataset_phys(ds)->ds_prev_snap_txg,
396 374 dsl_dataset_phys(ds)->ds_creation_txg,
397 375 &used, &comp, &uncomp);
|
↓ open down ↓ |
13 lines elided |
↑ open up ↑ |
398 376 dsl_dataset_phys(ds_next)->ds_unique_bytes += used;
399 377 dsl_dataset_rele(ds_nextnext, FTAG);
400 378 ASSERT3P(ds_next->ds_prev, ==, NULL);
401 379
402 380 /* Collapse range in this head. */
403 381 dsl_dataset_t *hds;
404 382 VERIFY0(dsl_dataset_hold_obj(dp,
405 383 dsl_dir_phys(ds->ds_dir)->dd_head_dataset_obj, FTAG, &hds));
406 384 dsl_deadlist_remove_key(&hds->ds_deadlist,
407 385 dsl_dataset_phys(ds)->ds_creation_txg, tx);
408 - if (dsl_dataset_remap_deadlist_exists(hds)) {
409 - dsl_deadlist_remove_key(&hds->ds_remap_deadlist,
410 - dsl_dataset_phys(ds)->ds_creation_txg, tx);
411 - }
412 386 dsl_dataset_rele(hds, FTAG);
413 387
414 388 } else {
415 389 ASSERT3P(ds_next->ds_prev, ==, ds);
416 390 dsl_dataset_rele(ds_next->ds_prev, ds_next);
417 391 ds_next->ds_prev = NULL;
418 392 if (ds_prev) {
419 393 VERIFY0(dsl_dataset_hold_obj(dp,
420 394 dsl_dataset_phys(ds)->ds_prev_snap_obj,
421 395 ds_next, &ds_next->ds_prev));
422 396 }
423 397
424 398 dsl_dataset_recalc_head_uniq(ds_next);
425 399
426 400 /*
427 401 * Reduce the amount of our unconsumed refreservation
428 402 * being charged to our parent by the amount of
429 403 * new unique data we have gained.
430 404 */
431 405 if (old_unique < ds_next->ds_reserved) {
432 406 int64_t mrsdelta;
433 407 uint64_t new_unique =
434 408 dsl_dataset_phys(ds_next)->ds_unique_bytes;
435 409
436 410 ASSERT(old_unique <= new_unique);
437 411 mrsdelta = MIN(new_unique - old_unique,
438 412 ds_next->ds_reserved - old_unique);
439 413 dsl_dir_diduse_space(ds->ds_dir,
440 414 DD_USED_REFRSRV, -mrsdelta, 0, 0, tx);
441 415 }
442 416 }
443 417 dsl_dataset_rele(ds_next, FTAG);
444 418
445 419 /*
446 420 * This must be done after the dsl_traverse(), because it will
447 421 * re-open the objset.
448 422 */
449 423 if (ds->ds_objset) {
450 424 dmu_objset_evict(ds->ds_objset);
451 425 ds->ds_objset = NULL;
452 426 }
453 427
454 428 /* remove from snapshot namespace */
455 429 dsl_dataset_t *ds_head;
456 430 ASSERT(dsl_dataset_phys(ds)->ds_snapnames_zapobj == 0);
457 431 VERIFY0(dsl_dataset_hold_obj(dp,
458 432 dsl_dir_phys(ds->ds_dir)->dd_head_dataset_obj, FTAG, &ds_head));
459 433 VERIFY0(dsl_dataset_get_snapname(ds));
460 434 #ifdef ZFS_DEBUG
461 435 {
462 436 uint64_t val;
463 437
464 438 err = dsl_dataset_snap_lookup(ds_head,
465 439 ds->ds_snapname, &val);
466 440 ASSERT0(err);
467 441 ASSERT3U(val, ==, obj);
468 442 }
469 443 #endif
470 444 VERIFY0(dsl_dataset_snap_remove(ds_head, ds->ds_snapname, tx, B_TRUE));
471 445 dsl_dataset_rele(ds_head, FTAG);
472 446
473 447 if (ds_prev != NULL)
474 448 dsl_dataset_rele(ds_prev, FTAG);
475 449
476 450 spa_prop_clear_bootfs(dp->dp_spa, ds->ds_object, tx);
477 451
478 452 if (dsl_dataset_phys(ds)->ds_next_clones_obj != 0) {
479 453 uint64_t count;
480 454 ASSERT0(zap_count(mos,
481 455 dsl_dataset_phys(ds)->ds_next_clones_obj, &count) &&
482 456 count == 0);
483 457 VERIFY0(dmu_object_free(mos,
484 458 dsl_dataset_phys(ds)->ds_next_clones_obj, tx));
485 459 }
486 460 if (dsl_dataset_phys(ds)->ds_props_obj != 0)
487 461 VERIFY0(zap_destroy(mos, dsl_dataset_phys(ds)->ds_props_obj,
488 462 tx));
489 463 if (dsl_dataset_phys(ds)->ds_userrefs_obj != 0)
490 464 VERIFY0(zap_destroy(mos, dsl_dataset_phys(ds)->ds_userrefs_obj,
491 465 tx));
492 466 dsl_dir_rele(ds->ds_dir, ds);
493 467 ds->ds_dir = NULL;
494 468 dmu_object_free_zapified(mos, obj, tx);
495 469 }
496 470
497 471 void
498 472 dsl_destroy_snapshot_sync(void *arg, dmu_tx_t *tx)
499 473 {
500 474 dsl_destroy_snapshot_arg_t *ddsa = arg;
|
↓ open down ↓ |
79 lines elided |
↑ open up ↑ |
501 475 const char *dsname = ddsa->ddsa_name;
502 476 boolean_t defer = ddsa->ddsa_defer;
503 477
504 478 dsl_pool_t *dp = dmu_tx_pool(tx);
505 479 dsl_dataset_t *ds;
506 480
507 481 int error = dsl_dataset_hold(dp, dsname, FTAG, &ds);
508 482 if (error == ENOENT)
509 483 return;
510 484 ASSERT0(error);
485 +
486 + if (autosnap_check_name(strchr(dsname, '@')))
487 + autosnap_exempt_snapshot(dp->dp_spa, dsname);
488 +
511 489 dsl_destroy_snapshot_sync_impl(ds, defer, tx);
512 490 dsl_dataset_rele(ds, FTAG);
513 491 }
514 492
515 493 /*
516 494 * The semantics of this function are described in the comment above
517 495 * lzc_destroy_snaps(). To summarize:
518 496 *
519 497 * The snapshots must all be in the same pool.
520 498 *
521 499 * Snapshots that don't exist will be silently ignored (considered to be
522 500 * "already deleted").
523 501 *
524 502 * On success, all snaps will be destroyed and this will return 0.
525 503 * On failure, no snaps will be destroyed, the errlist will be filled in,
526 504 * and this will return an errno.
527 505 */
528 506 int
529 507 dsl_destroy_snapshots_nvl(nvlist_t *snaps, boolean_t defer,
530 508 nvlist_t *errlist)
531 509 {
532 510 if (nvlist_next_nvpair(snaps, NULL) == NULL)
533 511 return (0);
534 512
535 513 /*
536 514 * lzc_destroy_snaps() is documented to take an nvlist whose
537 515 * values "don't matter". We need to convert that nvlist to
538 516 * one that we know can be converted to LUA. We also don't
539 517 * care about any duplicate entries because the nvlist will
540 518 * be converted to a LUA table which should take care of this.
541 519 */
542 520 nvlist_t *snaps_normalized;
543 521 VERIFY0(nvlist_alloc(&snaps_normalized, 0, KM_SLEEP));
544 522 for (nvpair_t *pair = nvlist_next_nvpair(snaps, NULL);
545 523 pair != NULL; pair = nvlist_next_nvpair(snaps, pair)) {
546 524 fnvlist_add_boolean_value(snaps_normalized,
547 525 nvpair_name(pair), B_TRUE);
548 526 }
549 527
550 528 nvlist_t *arg;
551 529 VERIFY0(nvlist_alloc(&arg, 0, KM_SLEEP));
552 530 fnvlist_add_nvlist(arg, "snaps", snaps_normalized);
553 531 fnvlist_free(snaps_normalized);
554 532 fnvlist_add_boolean_value(arg, "defer", defer);
555 533
556 534 nvlist_t *wrapper;
557 535 VERIFY0(nvlist_alloc(&wrapper, 0, KM_SLEEP));
558 536 fnvlist_add_nvlist(wrapper, ZCP_ARG_ARGLIST, arg);
559 537 fnvlist_free(arg);
560 538
561 539 const char *program =
562 540 "arg = ...\n"
563 541 "snaps = arg['snaps']\n"
564 542 "defer = arg['defer']\n"
565 543 "errors = { }\n"
566 544 "has_errors = false\n"
567 545 "for snap, v in pairs(snaps) do\n"
568 546 " errno = zfs.check.destroy{snap, defer=defer}\n"
569 547 " zfs.debug('snap: ' .. snap .. ' errno: ' .. errno)\n"
570 548 " if errno == ENOENT then\n"
571 549 " snaps[snap] = nil\n"
572 550 " elseif errno ~= 0 then\n"
573 551 " errors[snap] = errno\n"
574 552 " has_errors = true\n"
575 553 " end\n"
576 554 "end\n"
577 555 "if has_errors then\n"
578 556 " return errors\n"
579 557 "end\n"
580 558 "for snap, v in pairs(snaps) do\n"
581 559 " errno = zfs.sync.destroy{snap, defer=defer}\n"
582 560 " assert(errno == 0)\n"
583 561 "end\n"
584 562 "return { }\n";
585 563
586 564 nvlist_t *result = fnvlist_alloc();
587 565 int error = zcp_eval(nvpair_name(nvlist_next_nvpair(snaps, NULL)),
588 566 program,
589 567 B_TRUE,
590 568 0,
591 569 zfs_lua_max_memlimit,
592 570 nvlist_next_nvpair(wrapper, NULL), result);
593 571 if (error != 0) {
594 572 char *errorstr = NULL;
595 573 (void) nvlist_lookup_string(result, ZCP_RET_ERROR, &errorstr);
596 574 if (errorstr != NULL) {
597 575 zfs_dbgmsg(errorstr);
598 576 }
599 577 return (error);
600 578 }
601 579 fnvlist_free(wrapper);
602 580
603 581 /*
604 582 * lzc_destroy_snaps() is documented to fill the errlist with
605 583 * int32 values, so we need to covert the int64 values that are
606 584 * returned from LUA.
607 585 */
608 586 int rv = 0;
609 587 nvlist_t *errlist_raw = fnvlist_lookup_nvlist(result, ZCP_RET_RETURN);
610 588 for (nvpair_t *pair = nvlist_next_nvpair(errlist_raw, NULL);
611 589 pair != NULL; pair = nvlist_next_nvpair(errlist_raw, pair)) {
612 590 int32_t val = (int32_t)fnvpair_value_int64(pair);
613 591 if (rv == 0)
614 592 rv = val;
615 593 fnvlist_add_int32(errlist, nvpair_name(pair), val);
616 594 }
617 595 fnvlist_free(result);
618 596 return (rv);
619 597 }
620 598
621 599 int
622 600 dsl_destroy_snapshot(const char *name, boolean_t defer)
623 601 {
624 602 int error;
625 603 nvlist_t *nvl = fnvlist_alloc();
626 604 nvlist_t *errlist = fnvlist_alloc();
627 605
628 606 fnvlist_add_boolean(nvl, name);
629 607 error = dsl_destroy_snapshots_nvl(nvl, defer, errlist);
630 608 fnvlist_free(errlist);
631 609 fnvlist_free(nvl);
632 610 return (error);
633 611 }
634 612
635 613 struct killarg {
636 614 dsl_dataset_t *ds;
637 615 dmu_tx_t *tx;
638 616 };
639 617
640 618 /* ARGSUSED */
641 619 static int
642 620 kill_blkptr(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
643 621 const zbookmark_phys_t *zb, const dnode_phys_t *dnp, void *arg)
644 622 {
645 623 struct killarg *ka = arg;
646 624 dmu_tx_t *tx = ka->tx;
647 625
648 626 if (bp == NULL || BP_IS_HOLE(bp) || BP_IS_EMBEDDED(bp))
649 627 return (0);
650 628
651 629 if (zb->zb_level == ZB_ZIL_LEVEL) {
652 630 ASSERT(zilog != NULL);
653 631 /*
654 632 * It's a block in the intent log. It has no
655 633 * accounting, so just free it.
656 634 */
657 635 dsl_free(ka->tx->tx_pool, ka->tx->tx_txg, bp);
658 636 } else {
659 637 ASSERT(zilog == NULL);
660 638 ASSERT3U(bp->blk_birth, >,
661 639 dsl_dataset_phys(ka->ds)->ds_prev_snap_txg);
662 640 (void) dsl_dataset_block_kill(ka->ds, bp, tx, B_FALSE);
663 641 }
664 642
665 643 return (0);
666 644 }
667 645
668 646 static void
669 647 old_synchronous_dataset_destroy(dsl_dataset_t *ds, dmu_tx_t *tx)
670 648 {
671 649 struct killarg ka;
672 650
673 651 /*
674 652 * Free everything that we point to (that's born after
675 653 * the previous snapshot, if we are a clone)
676 654 *
677 655 * NB: this should be very quick, because we already
678 656 * freed all the objects in open context.
679 657 */
680 658 ka.ds = ds;
681 659 ka.tx = tx;
682 660 VERIFY0(traverse_dataset(ds,
683 661 dsl_dataset_phys(ds)->ds_prev_snap_txg, TRAVERSE_POST,
684 662 kill_blkptr, &ka));
685 663 ASSERT(!DS_UNIQUE_IS_ACCURATE(ds) ||
686 664 dsl_dataset_phys(ds)->ds_unique_bytes == 0);
687 665 }
688 666
689 667 int
690 668 dsl_destroy_head_check_impl(dsl_dataset_t *ds, int expected_holds)
691 669 {
692 670 int error;
693 671 uint64_t count;
694 672 objset_t *mos;
695 673
696 674 ASSERT(!ds->ds_is_snapshot);
697 675 if (ds->ds_is_snapshot)
698 676 return (SET_ERROR(EINVAL));
699 677
700 678 if (refcount_count(&ds->ds_longholds) != expected_holds)
701 679 return (SET_ERROR(EBUSY));
702 680
703 681 mos = ds->ds_dir->dd_pool->dp_meta_objset;
704 682
705 683 /*
706 684 * Can't delete a head dataset if there are snapshots of it.
707 685 * (Except if the only snapshots are from the branch we cloned
708 686 * from.)
709 687 */
710 688 if (ds->ds_prev != NULL &&
711 689 dsl_dataset_phys(ds->ds_prev)->ds_next_snap_obj == ds->ds_object)
712 690 return (SET_ERROR(EBUSY));
713 691
714 692 /*
715 693 * Can't delete if there are children of this fs.
716 694 */
717 695 error = zap_count(mos,
718 696 dsl_dir_phys(ds->ds_dir)->dd_child_dir_zapobj, &count);
719 697 if (error != 0)
720 698 return (error);
721 699 if (count != 0)
722 700 return (SET_ERROR(EEXIST));
723 701
724 702 if (dsl_dir_is_clone(ds->ds_dir) && DS_IS_DEFER_DESTROY(ds->ds_prev) &&
725 703 dsl_dataset_phys(ds->ds_prev)->ds_num_children == 2 &&
726 704 ds->ds_prev->ds_userrefs == 0) {
727 705 /* We need to remove the origin snapshot as well. */
728 706 if (!refcount_is_zero(&ds->ds_prev->ds_longholds))
729 707 return (SET_ERROR(EBUSY));
730 708 }
731 709 return (0);
732 710 }
733 711
734 712 int
735 713 dsl_destroy_head_check(void *arg, dmu_tx_t *tx)
736 714 {
737 715 dsl_destroy_head_arg_t *ddha = arg;
738 716 dsl_pool_t *dp = dmu_tx_pool(tx);
739 717 dsl_dataset_t *ds;
740 718 int error;
741 719
742 720 error = dsl_dataset_hold(dp, ddha->ddha_name, FTAG, &ds);
743 721 if (error != 0)
744 722 return (error);
745 723
746 724 error = dsl_destroy_head_check_impl(ds, 0);
747 725 dsl_dataset_rele(ds, FTAG);
748 726 return (error);
749 727 }
750 728
751 729 static void
752 730 dsl_dir_destroy_sync(uint64_t ddobj, dmu_tx_t *tx)
753 731 {
754 732 dsl_dir_t *dd;
755 733 dsl_pool_t *dp = dmu_tx_pool(tx);
756 734 objset_t *mos = dp->dp_meta_objset;
757 735 dd_used_t t;
758 736
759 737 ASSERT(RRW_WRITE_HELD(&dmu_tx_pool(tx)->dp_config_rwlock));
760 738
761 739 VERIFY0(dsl_dir_hold_obj(dp, ddobj, NULL, FTAG, &dd));
762 740
763 741 ASSERT0(dsl_dir_phys(dd)->dd_head_dataset_obj);
764 742
765 743 /*
766 744 * Decrement the filesystem count for all parent filesystems.
767 745 *
768 746 * When we receive an incremental stream into a filesystem that already
769 747 * exists, a temporary clone is created. We never count this temporary
770 748 * clone, whose name begins with a '%'.
771 749 */
772 750 if (dd->dd_myname[0] != '%' && dd->dd_parent != NULL)
773 751 dsl_fs_ss_count_adjust(dd->dd_parent, -1,
774 752 DD_FIELD_FILESYSTEM_COUNT, tx);
775 753
776 754 /*
777 755 * Remove our reservation. The impl() routine avoids setting the
778 756 * actual property, which would require the (already destroyed) ds.
779 757 */
780 758 dsl_dir_set_reservation_sync_impl(dd, 0, tx);
781 759
782 760 ASSERT0(dsl_dir_phys(dd)->dd_used_bytes);
783 761 ASSERT0(dsl_dir_phys(dd)->dd_reserved);
784 762 for (t = 0; t < DD_USED_NUM; t++)
785 763 ASSERT0(dsl_dir_phys(dd)->dd_used_breakdown[t]);
786 764
787 765 VERIFY0(zap_destroy(mos, dsl_dir_phys(dd)->dd_child_dir_zapobj, tx));
788 766 VERIFY0(zap_destroy(mos, dsl_dir_phys(dd)->dd_props_zapobj, tx));
789 767 VERIFY0(dsl_deleg_destroy(mos, dsl_dir_phys(dd)->dd_deleg_zapobj, tx));
790 768 VERIFY0(zap_remove(mos,
791 769 dsl_dir_phys(dd->dd_parent)->dd_child_dir_zapobj,
792 770 dd->dd_myname, tx));
793 771
794 772 dsl_dir_rele(dd, FTAG);
795 773 dmu_object_free_zapified(mos, ddobj, tx);
796 774 }
797 775
798 776 void
799 777 dsl_destroy_head_sync_impl(dsl_dataset_t *ds, dmu_tx_t *tx)
800 778 {
801 779 dsl_pool_t *dp = dmu_tx_pool(tx);
802 780 objset_t *mos = dp->dp_meta_objset;
803 781 uint64_t obj, ddobj, prevobj = 0;
804 782 boolean_t rmorigin;
805 783
806 784 ASSERT3U(dsl_dataset_phys(ds)->ds_num_children, <=, 1);
807 785 ASSERT(ds->ds_prev == NULL ||
808 786 dsl_dataset_phys(ds->ds_prev)->ds_next_snap_obj != ds->ds_object);
809 787 rrw_enter(&ds->ds_bp_rwlock, RW_READER, FTAG);
810 788 ASSERT3U(dsl_dataset_phys(ds)->ds_bp.blk_birth, <=, tx->tx_txg);
811 789 rrw_exit(&ds->ds_bp_rwlock, FTAG);
812 790 ASSERT(RRW_WRITE_HELD(&dp->dp_config_rwlock));
813 791
814 792 /* We need to log before removing it from the namespace. */
815 793 spa_history_log_internal_ds(ds, "destroy", tx, "");
816 794
817 795 rmorigin = (dsl_dir_is_clone(ds->ds_dir) &&
818 796 DS_IS_DEFER_DESTROY(ds->ds_prev) &&
819 797 dsl_dataset_phys(ds->ds_prev)->ds_num_children == 2 &&
820 798 ds->ds_prev->ds_userrefs == 0);
821 799
822 800 /* Remove our reservation. */
823 801 if (ds->ds_reserved != 0) {
824 802 dsl_dataset_set_refreservation_sync_impl(ds,
825 803 (ZPROP_SRC_NONE | ZPROP_SRC_LOCAL | ZPROP_SRC_RECEIVED),
826 804 0, tx);
827 805 ASSERT0(ds->ds_reserved);
828 806 }
829 807
830 808 obj = ds->ds_object;
831 809
832 810 for (spa_feature_t f = 0; f < SPA_FEATURES; f++) {
833 811 if (ds->ds_feature_inuse[f]) {
834 812 dsl_dataset_deactivate_feature(obj, f, tx);
835 813 ds->ds_feature_inuse[f] = B_FALSE;
836 814 }
837 815 }
838 816
839 817 dsl_scan_ds_destroyed(ds, tx);
840 818
841 819 if (dsl_dataset_phys(ds)->ds_prev_snap_obj != 0) {
842 820 /* This is a clone */
843 821 ASSERT(ds->ds_prev != NULL);
844 822 ASSERT3U(dsl_dataset_phys(ds->ds_prev)->ds_next_snap_obj, !=,
845 823 obj);
846 824 ASSERT0(dsl_dataset_phys(ds)->ds_next_snap_obj);
847 825
848 826 dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx);
849 827 if (dsl_dataset_phys(ds->ds_prev)->ds_next_clones_obj != 0) {
|
↓ open down ↓ |
329 lines elided |
↑ open up ↑ |
850 828 dsl_dataset_remove_from_next_clones(ds->ds_prev,
851 829 obj, tx);
852 830 }
853 831
854 832 ASSERT3U(dsl_dataset_phys(ds->ds_prev)->ds_num_children, >, 1);
855 833 dsl_dataset_phys(ds->ds_prev)->ds_num_children--;
856 834 }
857 835
858 836 /*
859 837 * Destroy the deadlist. Unless it's a clone, the
860 - * deadlist should be empty since the dataset has no snapshots.
861 - * (If it's a clone, it's safe to ignore the deadlist contents
862 - * since they are still referenced by the origin snapshot.)
838 + * deadlist should be empty. (If it's a clone, it's
839 + * safe to ignore the deadlist contents.)
863 840 */
864 841 dsl_deadlist_close(&ds->ds_deadlist);
865 842 dsl_deadlist_free(mos, dsl_dataset_phys(ds)->ds_deadlist_obj, tx);
866 843 dmu_buf_will_dirty(ds->ds_dbuf, tx);
867 844 dsl_dataset_phys(ds)->ds_deadlist_obj = 0;
868 845
869 - if (dsl_dataset_remap_deadlist_exists(ds))
870 - dsl_dataset_destroy_remap_deadlist(ds, tx);
871 -
872 846 objset_t *os;
873 847 VERIFY0(dmu_objset_from_ds(ds, &os));
874 848
849 + if (spa_feature_is_active(dp->dp_spa, SPA_FEATURE_WBC)) {
850 + wbc_process_objset(spa_get_wbc_data(dp->dp_spa), os, B_TRUE);
851 +
852 + /*
853 + * If WBC was activated for this dataset and it is a root
854 + * of WBC-ed tree of datasets then need to decrement WBC
855 + * feature flag refcounter, to be sure that 'feature@wbc'
856 + * shows correct information about the status of WBC
857 + */
858 + if (os->os_wbc_root_ds_obj != 0 &&
859 + ds->ds_object == os->os_wbc_root_ds_obj)
860 + spa_feature_decr(os->os_spa, SPA_FEATURE_WBC, tx);
861 + }
862 +
875 863 if (!spa_feature_is_enabled(dp->dp_spa, SPA_FEATURE_ASYNC_DESTROY)) {
876 864 old_synchronous_dataset_destroy(ds, tx);
877 865 } else {
878 866 /*
879 867 * Move the bptree into the pool's list of trees to
880 868 * clean up and update space accounting information.
881 869 */
882 870 uint64_t used, comp, uncomp;
883 871
884 872 zil_destroy_sync(dmu_objset_zil(os), tx);
885 873
886 874 if (!spa_feature_is_active(dp->dp_spa,
887 875 SPA_FEATURE_ASYNC_DESTROY)) {
888 876 dsl_scan_t *scn = dp->dp_scan;
889 877 spa_feature_incr(dp->dp_spa, SPA_FEATURE_ASYNC_DESTROY,
890 878 tx);
891 879 dp->dp_bptree_obj = bptree_alloc(mos, tx);
892 880 VERIFY0(zap_add(mos,
893 881 DMU_POOL_DIRECTORY_OBJECT,
894 882 DMU_POOL_BPTREE_OBJ, sizeof (uint64_t), 1,
895 883 &dp->dp_bptree_obj, tx));
896 884 ASSERT(!scn->scn_async_destroying);
897 885 scn->scn_async_destroying = B_TRUE;
898 886 }
899 887
900 888 used = dsl_dir_phys(ds->ds_dir)->dd_used_bytes;
901 889 comp = dsl_dir_phys(ds->ds_dir)->dd_compressed_bytes;
902 890 uncomp = dsl_dir_phys(ds->ds_dir)->dd_uncompressed_bytes;
903 891
904 892 ASSERT(!DS_UNIQUE_IS_ACCURATE(ds) ||
905 893 dsl_dataset_phys(ds)->ds_unique_bytes == used);
906 894
907 895 rrw_enter(&ds->ds_bp_rwlock, RW_READER, FTAG);
908 896 bptree_add(mos, dp->dp_bptree_obj,
909 897 &dsl_dataset_phys(ds)->ds_bp,
910 898 dsl_dataset_phys(ds)->ds_prev_snap_txg,
911 899 used, comp, uncomp, tx);
912 900 rrw_exit(&ds->ds_bp_rwlock, FTAG);
913 901 dsl_dir_diduse_space(ds->ds_dir, DD_USED_HEAD,
914 902 -used, -comp, -uncomp, tx);
915 903 dsl_dir_diduse_space(dp->dp_free_dir, DD_USED_HEAD,
916 904 used, comp, uncomp, tx);
917 905 }
918 906
919 907 if (ds->ds_prev != NULL) {
920 908 if (spa_version(dp->dp_spa) >= SPA_VERSION_DIR_CLONES) {
921 909 VERIFY0(zap_remove_int(mos,
922 910 dsl_dir_phys(ds->ds_prev->ds_dir)->dd_clones,
923 911 ds->ds_object, tx));
924 912 }
925 913 prevobj = ds->ds_prev->ds_object;
926 914 dsl_dataset_rele(ds->ds_prev, ds);
927 915 ds->ds_prev = NULL;
928 916 }
929 917
930 918 /*
931 919 * This must be done after the dsl_traverse(), because it will
932 920 * re-open the objset.
933 921 */
934 922 if (ds->ds_objset) {
935 923 dmu_objset_evict(ds->ds_objset);
936 924 ds->ds_objset = NULL;
937 925 }
938 926
939 927 /* Erase the link in the dir */
940 928 dmu_buf_will_dirty(ds->ds_dir->dd_dbuf, tx);
941 929 dsl_dir_phys(ds->ds_dir)->dd_head_dataset_obj = 0;
942 930 ddobj = ds->ds_dir->dd_object;
943 931 ASSERT(dsl_dataset_phys(ds)->ds_snapnames_zapobj != 0);
944 932 VERIFY0(zap_destroy(mos,
945 933 dsl_dataset_phys(ds)->ds_snapnames_zapobj, tx));
946 934
947 935 if (ds->ds_bookmarks != 0) {
948 936 VERIFY0(zap_destroy(mos, ds->ds_bookmarks, tx));
949 937 spa_feature_decr(dp->dp_spa, SPA_FEATURE_BOOKMARKS, tx);
950 938 }
951 939
952 940 spa_prop_clear_bootfs(dp->dp_spa, ds->ds_object, tx);
953 941
954 942 ASSERT0(dsl_dataset_phys(ds)->ds_next_clones_obj);
955 943 ASSERT0(dsl_dataset_phys(ds)->ds_props_obj);
956 944 ASSERT0(dsl_dataset_phys(ds)->ds_userrefs_obj);
957 945 dsl_dir_rele(ds->ds_dir, ds);
958 946 ds->ds_dir = NULL;
959 947 dmu_object_free_zapified(mos, obj, tx);
960 948
961 949 dsl_dir_destroy_sync(ddobj, tx);
962 950
963 951 if (rmorigin) {
964 952 dsl_dataset_t *prev;
965 953 VERIFY0(dsl_dataset_hold_obj(dp, prevobj, FTAG, &prev));
966 954 dsl_destroy_snapshot_sync_impl(prev, B_FALSE, tx);
967 955 dsl_dataset_rele(prev, FTAG);
968 956 }
969 957 }
970 958
971 959 void
972 960 dsl_destroy_head_sync(void *arg, dmu_tx_t *tx)
973 961 {
974 962 dsl_destroy_head_arg_t *ddha = arg;
975 963 dsl_pool_t *dp = dmu_tx_pool(tx);
976 964 dsl_dataset_t *ds;
977 965
978 966 VERIFY0(dsl_dataset_hold(dp, ddha->ddha_name, FTAG, &ds));
979 967 dsl_destroy_head_sync_impl(ds, tx);
980 968 dsl_dataset_rele(ds, FTAG);
981 969 }
982 970
983 971 static void
984 972 dsl_destroy_head_begin_sync(void *arg, dmu_tx_t *tx)
985 973 {
986 974 dsl_destroy_head_arg_t *ddha = arg;
987 975 dsl_pool_t *dp = dmu_tx_pool(tx);
988 976 dsl_dataset_t *ds;
989 977
990 978 VERIFY0(dsl_dataset_hold(dp, ddha->ddha_name, FTAG, &ds));
991 979
992 980 /* Mark it as inconsistent on-disk, in case we crash */
993 981 dmu_buf_will_dirty(ds->ds_dbuf, tx);
994 982 dsl_dataset_phys(ds)->ds_flags |= DS_FLAG_INCONSISTENT;
995 983
996 984 spa_history_log_internal_ds(ds, "destroy begin", tx, "");
997 985 dsl_dataset_rele(ds, FTAG);
998 986 }
999 987
1000 988 int
1001 989 dsl_destroy_head(const char *name)
1002 990 {
1003 991 dsl_destroy_head_arg_t ddha;
1004 992 int error;
1005 993 spa_t *spa;
|
↓ open down ↓ |
121 lines elided |
↑ open up ↑ |
1006 994 boolean_t isenabled;
1007 995
1008 996 #ifdef _KERNEL
1009 997 zfs_destroy_unmount_origin(name);
1010 998 #endif
1011 999
1012 1000 error = spa_open(name, &spa, FTAG);
1013 1001 if (error != 0)
1014 1002 return (error);
1015 1003 isenabled = spa_feature_is_enabled(spa, SPA_FEATURE_ASYNC_DESTROY);
1004 +
1016 1005 spa_close(spa, FTAG);
1017 1006
1018 1007 ddha.ddha_name = name;
1019 1008
1020 1009 if (!isenabled) {
1021 1010 objset_t *os;
1022 1011
1023 1012 error = dsl_sync_task(name, dsl_destroy_head_check,
1024 1013 dsl_destroy_head_begin_sync, &ddha,
1025 1014 0, ZFS_SPACE_CHECK_NONE);
1026 1015 if (error != 0)
1027 1016 return (error);
1028 1017
1029 1018 /*
1030 1019 * Head deletion is processed in one txg on old pools;
1031 1020 * remove the objects from open context so that the txg sync
1032 1021 * is not too long.
1033 1022 */
1034 1023 error = dmu_objset_own(name, DMU_OST_ANY, B_FALSE, FTAG, &os);
1035 1024 if (error == 0) {
1036 1025 uint64_t prev_snap_txg =
1037 1026 dsl_dataset_phys(dmu_objset_ds(os))->
1038 1027 ds_prev_snap_txg;
1039 1028 for (uint64_t obj = 0; error == 0;
1040 1029 error = dmu_object_next(os, &obj, FALSE,
1041 1030 prev_snap_txg))
1042 1031 (void) dmu_free_long_object(os, obj);
|
↓ open down ↓ |
17 lines elided |
↑ open up ↑ |
1043 1032 /* sync out all frees */
1044 1033 txg_wait_synced(dmu_objset_pool(os), 0);
1045 1034 dmu_objset_disown(os, FTAG);
1046 1035 }
1047 1036 }
1048 1037
1049 1038 return (dsl_sync_task(name, dsl_destroy_head_check,
1050 1039 dsl_destroy_head_sync, &ddha, 0, ZFS_SPACE_CHECK_NONE));
1051 1040 }
1052 1041
1042 +typedef struct {
1043 + kmutex_t lock;
1044 + list_t list;
1045 +} dsl_inconsistent_walker_cb_t;
1046 +
1047 +typedef struct {
1048 + char name[ZFS_MAX_DATASET_NAME_LEN];
1049 + list_node_t node;
1050 +} dsl_inconsistent_node_t;
1051 +
1052 +/* ARGSUSED */
1053 +static int
1054 +dsl_collect_inconsistent_datasets_cb(dsl_pool_t *dp,
1055 + dsl_dataset_t *ds, void *arg)
1056 +{
1057 + dsl_inconsistent_node_t *ds_node;
1058 + dsl_inconsistent_walker_cb_t *walker =
1059 + (dsl_inconsistent_walker_cb_t *)arg;
1060 +
1061 + if (!DS_IS_INCONSISTENT(ds))
1062 + return (0);
1063 +
1064 + /*
1065 + * If the dataset is inconsistent because a resumable receive
1066 + * has failed, then do not destroy it.
1067 + */
1068 + if (dsl_dataset_has_resume_receive_state(ds))
1069 + return (0);
1070 +
1071 + ds_node = kmem_alloc(sizeof (dsl_inconsistent_node_t), KM_SLEEP);
1072 + dsl_dataset_name(ds, ds_node->name);
1073 +
1074 + mutex_enter(&walker->lock);
1075 + list_insert_tail(&walker->list, ds_node);
1076 + mutex_exit(&walker->lock);
1077 +
1078 + return (0);
1079 +}
1080 +
1053 1081 /*
1054 - * Note, this function is used as the callback for dmu_objset_find(). We
1055 - * always return 0 so that we will continue to find and process
1056 - * inconsistent datasets, even if we encounter an error trying to
1057 - * process one of them.
1082 + * Walk in parallel over the entire pool and gather inconsistent
1083 + * datasets namely, those that don't have resume token and destroy them.
1058 1084 */
1059 -/* ARGSUSED */
1060 -int
1061 -dsl_destroy_inconsistent(const char *dsname, void *arg)
1085 +void
1086 +dsl_destroy_inconsistent(dsl_pool_t *dp)
1062 1087 {
1063 - objset_t *os;
1088 + dsl_inconsistent_walker_cb_t walker;
1089 + dsl_inconsistent_node_t *ds_node;
1064 1090
1065 - if (dmu_objset_hold(dsname, FTAG, &os) == 0) {
1066 - boolean_t need_destroy = DS_IS_INCONSISTENT(dmu_objset_ds(os));
1091 + mutex_init(&walker.lock, NULL, MUTEX_DEFAULT, NULL);
1092 + list_create(&walker.list, sizeof (dsl_inconsistent_node_t),
1093 + offsetof(dsl_inconsistent_node_t, node));
1067 1094
1095 + VERIFY0(dmu_objset_find_dp(dp, dp->dp_root_dir_obj,
1096 + dsl_collect_inconsistent_datasets_cb,
1097 + &walker, DS_FIND_CHILDREN));
1098 +
1099 + while ((ds_node = list_remove_head(&walker.list)) != NULL) {
1100 + (void) dsl_destroy_head(ds_node->name);
1101 + kmem_free(ds_node, sizeof (dsl_inconsistent_node_t));
1102 + }
1103 +
1104 + list_destroy(&walker.list);
1105 + mutex_destroy(&walker.lock);
1106 +}
1107 +
1108 +typedef struct {
1109 + const char *from_ds;
1110 + boolean_t defer;
1111 +} dmu_destroy_atomically_arg_t;
1112 +
1113 +static int
1114 +dsl_destroy_atomically_sync(void *arg, dmu_tx_t *tx)
1115 +{
1116 + dmu_destroy_atomically_arg_t *ddaa = arg;
1117 + boolean_t defer = ddaa->defer;
1118 + dsl_pool_t *dp = dmu_tx_pool(tx);
1119 + zfs_ds_collector_entry_t *tail;
1120 + list_t namestack;
1121 + int err = 0;
1122 +
1123 + /* do not perfrom checks in ioctl */
1124 + if (!dmu_tx_is_syncing(tx))
1125 + return (0);
1126 +
1127 + ASSERT(dsl_pool_config_held(dp));
1128 +
1129 + if (!spa_feature_is_enabled(dp->dp_spa, SPA_FEATURE_ASYNC_DESTROY))
1130 + return (SET_ERROR(ENOTSUP));
1131 +
1132 + /* It is possible than autosnap watches the DS */
1133 + if (spa_feature_is_active(dp->dp_spa, SPA_FEATURE_WBC)) {
1134 + objset_t *os = NULL;
1135 + dsl_dataset_t *ds = NULL;
1136 +
1137 + err = dsl_dataset_hold(dp, ddaa->from_ds, FTAG, &ds);
1138 + if (err != 0)
1139 + return (err);
1140 +
1141 + err = dmu_objset_from_ds(ds, &os);
1142 + if (err != 0) {
1143 + dsl_dataset_rele(ds, FTAG);
1144 + return (err);
1145 + }
1146 +
1147 + if (!dmu_objset_is_snapshot(os)) {
1148 + wbc_process_objset(spa_get_wbc_data(dp->dp_spa),
1149 + os, B_TRUE);
1150 + }
1151 +
1152 + dsl_dataset_rele(ds, FTAG);
1153 + }
1154 +
1155 + /* initialize the stack of datasets */
1156 + list_create(&namestack, sizeof (zfs_ds_collector_entry_t),
1157 + offsetof(zfs_ds_collector_entry_t, node));
1158 + tail = dsl_dataset_collector_cache_alloc();
1159 +
1160 + /* push the head */
1161 + tail->cookie = 0;
1162 + tail->cookie_is_snap = B_FALSE;
1163 + (void) strcpy(tail->name, ddaa->from_ds);
1164 + list_insert_tail(&namestack, tail);
1165 +
1166 + /* the head is processed at the very end and after all is done */
1167 + while (err == 0 && ((tail = list_tail(&namestack)) != NULL)) {
1168 + zfs_ds_collector_entry_t *el;
1169 + objset_t *os;
1170 + dsl_dataset_t *ds;
1171 + char *p;
1172 +
1173 + /* init new entry */
1174 + el = dsl_dataset_collector_cache_alloc();
1175 + el->cookie = 0;
1176 + el->cookie_is_snap = B_FALSE;
1177 + (void) strcpy(el->name, tail->name);
1178 + p = el->name + strlen(el->name);
1179 +
1180 + /* hold the current dataset to traverse its children */
1181 + err = dsl_dataset_hold(dp, tail->name, FTAG, &ds);
1182 + if (err != 0) {
1183 + dsl_dataset_collector_cache_free(el);
1184 + break;
1185 + }
1186 +
1187 + err = dmu_objset_from_ds(ds, &os);
1188 + if (err != 0) {
1189 + dsl_dataset_rele(ds, FTAG);
1190 + dsl_dataset_collector_cache_free(el);
1191 + break;
1192 + }
1193 +
1194 + if (dmu_objset_is_snapshot(os)) {
1195 + /* traverse clones for snapshots */
1196 + err = dmu_clone_list_next(os, MAXNAMELEN,
1197 + el->name, NULL, &tail->cookie);
1198 + } else {
1199 + /* for filesystems traverse fs first, then snaps */
1200 + if (!tail->cookie_is_snap) {
1201 + *p++ = '/';
1202 + do {
1203 + *p = '\0';
1204 + err = dmu_dir_list_next(os,
1205 + MAXNAMELEN - (p - el->name),
1206 + p, NULL, &tail->cookie);
1207 + } while (err == 0 &&
1208 + dataset_name_hidden(el->name));
1209 +
1210 + /* no more fs, move to snapshots */
1211 + if (err == ENOENT) {
1212 + *(--p) = '\0';
1213 + tail->cookie_is_snap = 1;
1214 + tail->cookie = 0;
1215 + err = 0;
1216 + }
1217 + }
1218 +
1219 + if (err == 0 && tail->cookie_is_snap) {
1220 + *p++ = '@';
1221 + *p = '\0';
1222 + err = dmu_snapshot_list_next(os,
1223 + MAXNAMELEN - (p - el->name),
1224 + p, NULL, &tail->cookie, NULL);
1225 + }
1226 + }
1227 +
1228 + if (err == 0) {
1229 + /* a children found, add it and continue */
1230 + list_insert_tail(&namestack, el);
1231 + dsl_dataset_rele(ds, FTAG);
1232 + continue;
1233 + }
1234 +
1235 + dsl_dataset_collector_cache_free(el);
1236 +
1237 + if (err != ENOENT) {
1238 + dsl_dataset_rele(ds, FTAG);
1239 + break;
1240 + }
1241 +
1068 1242 /*
1069 - * If the dataset is inconsistent because a resumable receive
1070 - * has failed, then do not destroy it.
1243 + * There are no more children of the dataset, pop it from stack
1244 + * and destroy it
1071 1245 */
1072 - if (dsl_dataset_has_resume_receive_state(dmu_objset_ds(os)))
1073 - need_destroy = B_FALSE;
1074 1246
1075 - dmu_objset_rele(os, FTAG);
1076 - if (need_destroy)
1077 - (void) dsl_destroy_head(dsname);
1247 + err = 0;
1248 +
1249 + list_remove(&namestack, tail);
1250 +
1251 + if (dmu_objset_is_snapshot(os)) {
1252 + err = dsl_destroy_snapshot_check_impl(ds, defer);
1253 + if (err == 0)
1254 + dsl_destroy_snapshot_sync_impl(ds, defer, tx);
1255 + } else if (strchr(tail->name, '/') != NULL) {
1256 + err = dsl_destroy_head_check_impl(ds, 0);
1257 + if (err == 0)
1258 + dsl_destroy_head_sync_impl(ds, tx);
1259 + }
1260 +
1261 + dsl_dataset_rele(ds, FTAG);
1262 + dsl_dataset_collector_cache_free(tail);
1078 1263 }
1079 - return (0);
1264 +
1265 + if (err != 0) {
1266 + while ((tail = list_remove_tail(&namestack)) != NULL)
1267 + dsl_dataset_collector_cache_free(tail);
1268 + }
1269 +
1270 + ASSERT(list_head(&namestack) == NULL);
1271 +
1272 + list_destroy(&namestack);
1273 +
1274 + return (err);
1275 +}
1276 +
1277 +/*ARGSUSED*/
1278 +void
1279 +dsl_destroy_atomically_sync_dummy(void *arg, dmu_tx_t *tx)
1280 +{
1281 +}
1282 +
1283 +int
1284 +dsl_destroy_atomically(const char *name, boolean_t defer)
1285 +{
1286 + dmu_destroy_atomically_arg_t ddaa;
1287 +
1288 + ddaa.from_ds = name;
1289 + ddaa.defer = defer;
1290 +
1291 + return (dsl_sync_task(name, dsl_destroy_atomically_sync,
1292 + dsl_destroy_atomically_sync_dummy, &ddaa, 0, ZFS_SPACE_CHECK_NONE));
1080 1293 }
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX