Print this page
2619 asynchronous destruction of ZFS file systems
2747 SPA versioning with zfs feature flags
Reviewed by: Matt Ahrens <mahrens@delphix.com>
Reviewed by: George Wilson <gwilson@delphix.com>
Reviewed by: Richard Lowe <richlowe@richlowe.net>
Reviewed by: Dan Kruchinin <dan.kruchinin@gmail.com>
Approved by: Dan McDonald <danmcd@nexenta.com>
| Split |
Close |
| Expand all |
| Collapse all |
--- old/usr/src/uts/common/fs/zfs/dnode_sync.c
+++ new/usr/src/uts/common/fs/zfs/dnode_sync.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
|
↓ open down ↓ |
10 lines elided |
↑ open up ↑ |
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 +
21 22 /*
22 23 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
24 + * Copyright (c) 2012 by Delphix. All rights reserved.
23 25 */
24 26
25 27 #include <sys/zfs_context.h>
26 28 #include <sys/dbuf.h>
27 29 #include <sys/dnode.h>
28 30 #include <sys/dmu.h>
29 31 #include <sys/dmu_tx.h>
30 32 #include <sys/dmu_objset.h>
31 33 #include <sys/dsl_dataset.h>
32 34 #include <sys/spa.h>
33 35
34 36 static void
35 37 dnode_increase_indirection(dnode_t *dn, dmu_tx_t *tx)
36 38 {
37 39 dmu_buf_impl_t *db;
38 40 int txgoff = tx->tx_txg & TXG_MASK;
39 41 int nblkptr = dn->dn_phys->dn_nblkptr;
40 42 int old_toplvl = dn->dn_phys->dn_nlevels - 1;
41 43 int new_level = dn->dn_next_nlevels[txgoff];
42 44 int i;
43 45
44 46 rw_enter(&dn->dn_struct_rwlock, RW_WRITER);
45 47
46 48 /* this dnode can't be paged out because it's dirty */
47 49 ASSERT(dn->dn_phys->dn_type != DMU_OT_NONE);
48 50 ASSERT(RW_WRITE_HELD(&dn->dn_struct_rwlock));
49 51 ASSERT(new_level > 1 && dn->dn_phys->dn_nlevels > 0);
50 52
51 53 db = dbuf_hold_level(dn, dn->dn_phys->dn_nlevels, 0, FTAG);
52 54 ASSERT(db != NULL);
53 55
54 56 dn->dn_phys->dn_nlevels = new_level;
55 57 dprintf("os=%p obj=%llu, increase to %d\n", dn->dn_objset,
56 58 dn->dn_object, dn->dn_phys->dn_nlevels);
57 59
58 60 /* check for existing blkptrs in the dnode */
59 61 for (i = 0; i < nblkptr; i++)
60 62 if (!BP_IS_HOLE(&dn->dn_phys->dn_blkptr[i]))
61 63 break;
62 64 if (i != nblkptr) {
63 65 /* transfer dnode's block pointers to new indirect block */
64 66 (void) dbuf_read(db, NULL, DB_RF_MUST_SUCCEED|DB_RF_HAVESTRUCT);
65 67 ASSERT(db->db.db_data);
66 68 ASSERT(arc_released(db->db_buf));
67 69 ASSERT3U(sizeof (blkptr_t) * nblkptr, <=, db->db.db_size);
68 70 bcopy(dn->dn_phys->dn_blkptr, db->db.db_data,
69 71 sizeof (blkptr_t) * nblkptr);
70 72 arc_buf_freeze(db->db_buf);
71 73 }
72 74
73 75 /* set dbuf's parent pointers to new indirect buf */
74 76 for (i = 0; i < nblkptr; i++) {
75 77 dmu_buf_impl_t *child = dbuf_find(dn, old_toplvl, i);
76 78
77 79 if (child == NULL)
78 80 continue;
79 81 #ifdef DEBUG
80 82 DB_DNODE_ENTER(child);
81 83 ASSERT3P(DB_DNODE(child), ==, dn);
82 84 DB_DNODE_EXIT(child);
83 85 #endif /* DEBUG */
84 86 if (child->db_parent && child->db_parent != dn->dn_dbuf) {
85 87 ASSERT(child->db_parent->db_level == db->db_level);
86 88 ASSERT(child->db_blkptr !=
87 89 &dn->dn_phys->dn_blkptr[child->db_blkid]);
88 90 mutex_exit(&child->db_mtx);
89 91 continue;
90 92 }
91 93 ASSERT(child->db_parent == NULL ||
92 94 child->db_parent == dn->dn_dbuf);
93 95
94 96 child->db_parent = db;
95 97 dbuf_add_ref(db, child);
96 98 if (db->db.db_data)
97 99 child->db_blkptr = (blkptr_t *)db->db.db_data + i;
98 100 else
99 101 child->db_blkptr = NULL;
100 102 dprintf_dbuf_bp(child, child->db_blkptr,
101 103 "changed db_blkptr to new indirect %s", "");
102 104
103 105 mutex_exit(&child->db_mtx);
104 106 }
105 107
106 108 bzero(dn->dn_phys->dn_blkptr, sizeof (blkptr_t) * nblkptr);
107 109
108 110 dbuf_rele(db, FTAG);
109 111
110 112 rw_exit(&dn->dn_struct_rwlock);
111 113 }
112 114
113 115 static int
114 116 free_blocks(dnode_t *dn, blkptr_t *bp, int num, dmu_tx_t *tx)
115 117 {
116 118 dsl_dataset_t *ds = dn->dn_objset->os_dsl_dataset;
117 119 uint64_t bytesfreed = 0;
118 120 int i, blocks_freed = 0;
119 121
120 122 dprintf("ds=%p obj=%llx num=%d\n", ds, dn->dn_object, num);
121 123
122 124 for (i = 0; i < num; i++, bp++) {
123 125 if (BP_IS_HOLE(bp))
124 126 continue;
125 127
126 128 bytesfreed += dsl_dataset_block_kill(ds, bp, tx, B_FALSE);
127 129 ASSERT3U(bytesfreed, <=, DN_USED_BYTES(dn->dn_phys));
128 130 bzero(bp, sizeof (blkptr_t));
129 131 blocks_freed += 1;
130 132 }
131 133 dnode_diduse_space(dn, -bytesfreed);
132 134 return (blocks_freed);
133 135 }
134 136
135 137 #ifdef ZFS_DEBUG
136 138 static void
137 139 free_verify(dmu_buf_impl_t *db, uint64_t start, uint64_t end, dmu_tx_t *tx)
138 140 {
139 141 int off, num;
140 142 int i, err, epbs;
141 143 uint64_t txg = tx->tx_txg;
142 144 dnode_t *dn;
143 145
144 146 DB_DNODE_ENTER(db);
145 147 dn = DB_DNODE(db);
146 148 epbs = dn->dn_phys->dn_indblkshift - SPA_BLKPTRSHIFT;
147 149 off = start - (db->db_blkid * 1<<epbs);
148 150 num = end - start + 1;
149 151
150 152 ASSERT3U(off, >=, 0);
151 153 ASSERT3U(num, >=, 0);
152 154 ASSERT3U(db->db_level, >, 0);
153 155 ASSERT3U(db->db.db_size, ==, 1 << dn->dn_phys->dn_indblkshift);
154 156 ASSERT3U(off+num, <=, db->db.db_size >> SPA_BLKPTRSHIFT);
155 157 ASSERT(db->db_blkptr != NULL);
156 158
157 159 for (i = off; i < off+num; i++) {
158 160 uint64_t *buf;
159 161 dmu_buf_impl_t *child;
160 162 dbuf_dirty_record_t *dr;
161 163 int j;
162 164
163 165 ASSERT(db->db_level == 1);
164 166
165 167 rw_enter(&dn->dn_struct_rwlock, RW_READER);
166 168 err = dbuf_hold_impl(dn, db->db_level-1,
167 169 (db->db_blkid << epbs) + i, TRUE, FTAG, &child);
168 170 rw_exit(&dn->dn_struct_rwlock);
169 171 if (err == ENOENT)
170 172 continue;
171 173 ASSERT(err == 0);
172 174 ASSERT(child->db_level == 0);
173 175 dr = child->db_last_dirty;
174 176 while (dr && dr->dr_txg > txg)
175 177 dr = dr->dr_next;
176 178 ASSERT(dr == NULL || dr->dr_txg == txg);
177 179
178 180 /* data_old better be zeroed */
179 181 if (dr) {
180 182 buf = dr->dt.dl.dr_data->b_data;
181 183 for (j = 0; j < child->db.db_size >> 3; j++) {
182 184 if (buf[j] != 0) {
183 185 panic("freed data not zero: "
184 186 "child=%p i=%d off=%d num=%d\n",
185 187 (void *)child, i, off, num);
186 188 }
187 189 }
188 190 }
189 191
190 192 /*
191 193 * db_data better be zeroed unless it's dirty in a
192 194 * future txg.
193 195 */
194 196 mutex_enter(&child->db_mtx);
195 197 buf = child->db.db_data;
196 198 if (buf != NULL && child->db_state != DB_FILL &&
197 199 child->db_last_dirty == NULL) {
198 200 for (j = 0; j < child->db.db_size >> 3; j++) {
199 201 if (buf[j] != 0) {
200 202 panic("freed data not zero: "
201 203 "child=%p i=%d off=%d num=%d\n",
202 204 (void *)child, i, off, num);
203 205 }
204 206 }
205 207 }
206 208 mutex_exit(&child->db_mtx);
207 209
208 210 dbuf_rele(child, FTAG);
209 211 }
210 212 DB_DNODE_EXIT(db);
211 213 }
212 214 #endif
213 215
214 216 #define ALL -1
215 217
216 218 static int
217 219 free_children(dmu_buf_impl_t *db, uint64_t blkid, uint64_t nblks, int trunc,
218 220 dmu_tx_t *tx)
219 221 {
220 222 dnode_t *dn;
221 223 blkptr_t *bp;
222 224 dmu_buf_impl_t *subdb;
223 225 uint64_t start, end, dbstart, dbend, i;
224 226 int epbs, shift, err;
225 227 int all = TRUE;
226 228 int blocks_freed = 0;
227 229
228 230 /*
229 231 * There is a small possibility that this block will not be cached:
230 232 * 1 - if level > 1 and there are no children with level <= 1
231 233 * 2 - if we didn't get a dirty hold (because this block had just
232 234 * finished being written -- and so had no holds), and then this
233 235 * block got evicted before we got here.
234 236 */
235 237 if (db->db_state != DB_CACHED)
236 238 (void) dbuf_read(db, NULL, DB_RF_MUST_SUCCEED);
237 239
238 240 dbuf_release_bp(db);
239 241 bp = (blkptr_t *)db->db.db_data;
240 242
241 243 DB_DNODE_ENTER(db);
242 244 dn = DB_DNODE(db);
243 245 epbs = dn->dn_phys->dn_indblkshift - SPA_BLKPTRSHIFT;
244 246 shift = (db->db_level - 1) * epbs;
245 247 dbstart = db->db_blkid << epbs;
246 248 start = blkid >> shift;
247 249 if (dbstart < start) {
248 250 bp += start - dbstart;
249 251 all = FALSE;
250 252 } else {
251 253 start = dbstart;
252 254 }
253 255 dbend = ((db->db_blkid + 1) << epbs) - 1;
254 256 end = (blkid + nblks - 1) >> shift;
255 257 if (dbend <= end)
256 258 end = dbend;
257 259 else if (all)
258 260 all = trunc;
259 261 ASSERT3U(start, <=, end);
260 262
261 263 if (db->db_level == 1) {
262 264 FREE_VERIFY(db, start, end, tx);
263 265 blocks_freed = free_blocks(dn, bp, end-start+1, tx);
264 266 arc_buf_freeze(db->db_buf);
265 267 ASSERT(all || blocks_freed == 0 || db->db_last_dirty);
266 268 DB_DNODE_EXIT(db);
267 269 return (all ? ALL : blocks_freed);
268 270 }
269 271
270 272 for (i = start; i <= end; i++, bp++) {
271 273 if (BP_IS_HOLE(bp))
272 274 continue;
273 275 rw_enter(&dn->dn_struct_rwlock, RW_READER);
274 276 err = dbuf_hold_impl(dn, db->db_level-1, i, TRUE, FTAG, &subdb);
275 277 ASSERT3U(err, ==, 0);
276 278 rw_exit(&dn->dn_struct_rwlock);
277 279
278 280 if (free_children(subdb, blkid, nblks, trunc, tx) == ALL) {
279 281 ASSERT3P(subdb->db_blkptr, ==, bp);
280 282 blocks_freed += free_blocks(dn, bp, 1, tx);
281 283 } else {
282 284 all = FALSE;
283 285 }
284 286 dbuf_rele(subdb, FTAG);
285 287 }
286 288 DB_DNODE_EXIT(db);
287 289 arc_buf_freeze(db->db_buf);
288 290 #ifdef ZFS_DEBUG
289 291 bp -= (end-start)+1;
290 292 for (i = start; i <= end; i++, bp++) {
291 293 if (i == start && blkid != 0)
292 294 continue;
293 295 else if (i == end && !trunc)
294 296 continue;
295 297 ASSERT3U(bp->blk_birth, ==, 0);
296 298 }
297 299 #endif
298 300 ASSERT(all || blocks_freed == 0 || db->db_last_dirty);
299 301 return (all ? ALL : blocks_freed);
300 302 }
301 303
302 304 /*
303 305 * free_range: Traverse the indicated range of the provided file
304 306 * and "free" all the blocks contained there.
305 307 */
306 308 static void
307 309 dnode_sync_free_range(dnode_t *dn, uint64_t blkid, uint64_t nblks, dmu_tx_t *tx)
308 310 {
309 311 blkptr_t *bp = dn->dn_phys->dn_blkptr;
310 312 dmu_buf_impl_t *db;
311 313 int trunc, start, end, shift, i, err;
312 314 int dnlevel = dn->dn_phys->dn_nlevels;
313 315
314 316 if (blkid > dn->dn_phys->dn_maxblkid)
315 317 return;
316 318
317 319 ASSERT(dn->dn_phys->dn_maxblkid < UINT64_MAX);
318 320 trunc = blkid + nblks > dn->dn_phys->dn_maxblkid;
319 321 if (trunc)
320 322 nblks = dn->dn_phys->dn_maxblkid - blkid + 1;
321 323
322 324 /* There are no indirect blocks in the object */
323 325 if (dnlevel == 1) {
324 326 if (blkid >= dn->dn_phys->dn_nblkptr) {
325 327 /* this range was never made persistent */
326 328 return;
327 329 }
328 330 ASSERT3U(blkid + nblks, <=, dn->dn_phys->dn_nblkptr);
329 331 (void) free_blocks(dn, bp + blkid, nblks, tx);
330 332 if (trunc) {
331 333 uint64_t off = (dn->dn_phys->dn_maxblkid + 1) *
332 334 (dn->dn_phys->dn_datablkszsec << SPA_MINBLOCKSHIFT);
333 335 dn->dn_phys->dn_maxblkid = (blkid ? blkid - 1 : 0);
334 336 ASSERT(off < dn->dn_phys->dn_maxblkid ||
335 337 dn->dn_phys->dn_maxblkid == 0 ||
336 338 dnode_next_offset(dn, 0, &off, 1, 1, 0) != 0);
337 339 }
338 340 return;
339 341 }
340 342
341 343 shift = (dnlevel - 1) * (dn->dn_phys->dn_indblkshift - SPA_BLKPTRSHIFT);
342 344 start = blkid >> shift;
343 345 ASSERT(start < dn->dn_phys->dn_nblkptr);
344 346 end = (blkid + nblks - 1) >> shift;
345 347 bp += start;
346 348 for (i = start; i <= end; i++, bp++) {
347 349 if (BP_IS_HOLE(bp))
348 350 continue;
349 351 rw_enter(&dn->dn_struct_rwlock, RW_READER);
350 352 err = dbuf_hold_impl(dn, dnlevel-1, i, TRUE, FTAG, &db);
351 353 ASSERT3U(err, ==, 0);
352 354 rw_exit(&dn->dn_struct_rwlock);
353 355
354 356 if (free_children(db, blkid, nblks, trunc, tx) == ALL) {
355 357 ASSERT3P(db->db_blkptr, ==, bp);
356 358 (void) free_blocks(dn, bp, 1, tx);
357 359 }
358 360 dbuf_rele(db, FTAG);
359 361 }
360 362 if (trunc) {
361 363 uint64_t off = (dn->dn_phys->dn_maxblkid + 1) *
362 364 (dn->dn_phys->dn_datablkszsec << SPA_MINBLOCKSHIFT);
363 365 dn->dn_phys->dn_maxblkid = (blkid ? blkid - 1 : 0);
364 366 ASSERT(off < dn->dn_phys->dn_maxblkid ||
365 367 dn->dn_phys->dn_maxblkid == 0 ||
366 368 dnode_next_offset(dn, 0, &off, 1, 1, 0) != 0);
367 369 }
368 370 }
369 371
370 372 /*
371 373 * Try to kick all the dnodes dbufs out of the cache...
372 374 */
373 375 void
374 376 dnode_evict_dbufs(dnode_t *dn)
375 377 {
376 378 int progress;
377 379 int pass = 0;
378 380
379 381 do {
380 382 dmu_buf_impl_t *db, marker;
381 383 int evicting = FALSE;
382 384
383 385 progress = FALSE;
384 386 mutex_enter(&dn->dn_dbufs_mtx);
385 387 list_insert_tail(&dn->dn_dbufs, &marker);
386 388 db = list_head(&dn->dn_dbufs);
387 389 for (; db != ▮ db = list_head(&dn->dn_dbufs)) {
388 390 list_remove(&dn->dn_dbufs, db);
389 391 list_insert_tail(&dn->dn_dbufs, db);
390 392 #ifdef DEBUG
391 393 DB_DNODE_ENTER(db);
392 394 ASSERT3P(DB_DNODE(db), ==, dn);
393 395 DB_DNODE_EXIT(db);
394 396 #endif /* DEBUG */
395 397
396 398 mutex_enter(&db->db_mtx);
397 399 if (db->db_state == DB_EVICTING) {
398 400 progress = TRUE;
399 401 evicting = TRUE;
400 402 mutex_exit(&db->db_mtx);
401 403 } else if (refcount_is_zero(&db->db_holds)) {
402 404 progress = TRUE;
403 405 dbuf_clear(db); /* exits db_mtx for us */
404 406 } else {
405 407 mutex_exit(&db->db_mtx);
406 408 }
407 409
408 410 }
409 411 list_remove(&dn->dn_dbufs, &marker);
410 412 /*
411 413 * NB: we need to drop dn_dbufs_mtx between passes so
412 414 * that any DB_EVICTING dbufs can make progress.
413 415 * Ideally, we would have some cv we could wait on, but
414 416 * since we don't, just wait a bit to give the other
415 417 * thread a chance to run.
416 418 */
417 419 mutex_exit(&dn->dn_dbufs_mtx);
418 420 if (evicting)
419 421 delay(1);
420 422 pass++;
421 423 ASSERT(pass < 100); /* sanity check */
422 424 } while (progress);
423 425
424 426 rw_enter(&dn->dn_struct_rwlock, RW_WRITER);
425 427 if (dn->dn_bonus && refcount_is_zero(&dn->dn_bonus->db_holds)) {
426 428 mutex_enter(&dn->dn_bonus->db_mtx);
427 429 dbuf_evict(dn->dn_bonus);
428 430 dn->dn_bonus = NULL;
429 431 }
430 432 rw_exit(&dn->dn_struct_rwlock);
431 433 }
432 434
433 435 static void
434 436 dnode_undirty_dbufs(list_t *list)
435 437 {
436 438 dbuf_dirty_record_t *dr;
437 439
438 440 while (dr = list_head(list)) {
439 441 dmu_buf_impl_t *db = dr->dr_dbuf;
440 442 uint64_t txg = dr->dr_txg;
441 443
442 444 if (db->db_level != 0)
443 445 dnode_undirty_dbufs(&dr->dt.di.dr_children);
444 446
445 447 mutex_enter(&db->db_mtx);
446 448 /* XXX - use dbuf_undirty()? */
447 449 list_remove(list, dr);
448 450 ASSERT(db->db_last_dirty == dr);
449 451 db->db_last_dirty = NULL;
450 452 db->db_dirtycnt -= 1;
451 453 if (db->db_level == 0) {
452 454 ASSERT(db->db_blkid == DMU_BONUS_BLKID ||
453 455 dr->dt.dl.dr_data == db->db_buf);
454 456 dbuf_unoverride(dr);
455 457 }
456 458 kmem_free(dr, sizeof (dbuf_dirty_record_t));
457 459 dbuf_rele_and_unlock(db, (void *)(uintptr_t)txg);
458 460 }
459 461 }
460 462
461 463 static void
462 464 dnode_sync_free(dnode_t *dn, dmu_tx_t *tx)
463 465 {
464 466 int txgoff = tx->tx_txg & TXG_MASK;
465 467
466 468 ASSERT(dmu_tx_is_syncing(tx));
467 469
468 470 /*
469 471 * Our contents should have been freed in dnode_sync() by the
470 472 * free range record inserted by the caller of dnode_free().
471 473 */
472 474 ASSERT3U(DN_USED_BYTES(dn->dn_phys), ==, 0);
473 475 ASSERT(BP_IS_HOLE(dn->dn_phys->dn_blkptr));
474 476
475 477 dnode_undirty_dbufs(&dn->dn_dirty_records[txgoff]);
476 478 dnode_evict_dbufs(dn);
477 479 ASSERT3P(list_head(&dn->dn_dbufs), ==, NULL);
478 480
479 481 /*
480 482 * XXX - It would be nice to assert this, but we may still
481 483 * have residual holds from async evictions from the arc...
482 484 *
483 485 * zfs_obj_to_path() also depends on this being
484 486 * commented out.
485 487 *
486 488 * ASSERT3U(refcount_count(&dn->dn_holds), ==, 1);
487 489 */
488 490
489 491 /* Undirty next bits */
490 492 dn->dn_next_nlevels[txgoff] = 0;
491 493 dn->dn_next_indblkshift[txgoff] = 0;
492 494 dn->dn_next_blksz[txgoff] = 0;
493 495
494 496 /* ASSERT(blkptrs are zero); */
495 497 ASSERT(dn->dn_phys->dn_type != DMU_OT_NONE);
496 498 ASSERT(dn->dn_type != DMU_OT_NONE);
497 499
498 500 ASSERT(dn->dn_free_txg > 0);
499 501 if (dn->dn_allocated_txg != dn->dn_free_txg)
500 502 dbuf_will_dirty(dn->dn_dbuf, tx);
501 503 bzero(dn->dn_phys, sizeof (dnode_phys_t));
502 504
503 505 mutex_enter(&dn->dn_mtx);
504 506 dn->dn_type = DMU_OT_NONE;
505 507 dn->dn_maxblkid = 0;
506 508 dn->dn_allocated_txg = 0;
507 509 dn->dn_free_txg = 0;
508 510 dn->dn_have_spill = B_FALSE;
509 511 mutex_exit(&dn->dn_mtx);
510 512
511 513 ASSERT(dn->dn_object != DMU_META_DNODE_OBJECT);
512 514
513 515 dnode_rele(dn, (void *)(uintptr_t)tx->tx_txg);
514 516 /*
515 517 * Now that we've released our hold, the dnode may
516 518 * be evicted, so we musn't access it.
517 519 */
518 520 }
519 521
520 522 /*
521 523 * Write out the dnode's dirty buffers.
522 524 */
523 525 void
524 526 dnode_sync(dnode_t *dn, dmu_tx_t *tx)
525 527 {
526 528 free_range_t *rp;
527 529 dnode_phys_t *dnp = dn->dn_phys;
528 530 int txgoff = tx->tx_txg & TXG_MASK;
529 531 list_t *list = &dn->dn_dirty_records[txgoff];
530 532 static const dnode_phys_t zerodn = { 0 };
531 533 boolean_t kill_spill = B_FALSE;
532 534
533 535 ASSERT(dmu_tx_is_syncing(tx));
534 536 ASSERT(dnp->dn_type != DMU_OT_NONE || dn->dn_allocated_txg);
535 537 ASSERT(dnp->dn_type != DMU_OT_NONE ||
536 538 bcmp(dnp, &zerodn, DNODE_SIZE) == 0);
537 539 DNODE_VERIFY(dn);
538 540
539 541 ASSERT(dn->dn_dbuf == NULL || arc_released(dn->dn_dbuf->db_buf));
540 542
541 543 if (dmu_objset_userused_enabled(dn->dn_objset) &&
542 544 !DMU_OBJECT_IS_SPECIAL(dn->dn_object)) {
543 545 mutex_enter(&dn->dn_mtx);
544 546 dn->dn_oldused = DN_USED_BYTES(dn->dn_phys);
545 547 dn->dn_oldflags = dn->dn_phys->dn_flags;
546 548 dn->dn_phys->dn_flags |= DNODE_FLAG_USERUSED_ACCOUNTED;
547 549 mutex_exit(&dn->dn_mtx);
548 550 dmu_objset_userquota_get_ids(dn, B_FALSE, tx);
549 551 } else {
550 552 /* Once we account for it, we should always account for it. */
551 553 ASSERT(!(dn->dn_phys->dn_flags &
552 554 DNODE_FLAG_USERUSED_ACCOUNTED));
553 555 }
554 556
555 557 mutex_enter(&dn->dn_mtx);
556 558 if (dn->dn_allocated_txg == tx->tx_txg) {
557 559 /* The dnode is newly allocated or reallocated */
558 560 if (dnp->dn_type == DMU_OT_NONE) {
559 561 /* this is a first alloc, not a realloc */
560 562 dnp->dn_nlevels = 1;
561 563 dnp->dn_nblkptr = dn->dn_nblkptr;
562 564 }
563 565
564 566 dnp->dn_type = dn->dn_type;
565 567 dnp->dn_bonustype = dn->dn_bonustype;
566 568 dnp->dn_bonuslen = dn->dn_bonuslen;
567 569 }
568 570
569 571 ASSERT(dnp->dn_nlevels > 1 ||
570 572 BP_IS_HOLE(&dnp->dn_blkptr[0]) ||
571 573 BP_GET_LSIZE(&dnp->dn_blkptr[0]) ==
572 574 dnp->dn_datablkszsec << SPA_MINBLOCKSHIFT);
573 575
574 576 if (dn->dn_next_blksz[txgoff]) {
575 577 ASSERT(P2PHASE(dn->dn_next_blksz[txgoff],
576 578 SPA_MINBLOCKSIZE) == 0);
577 579 ASSERT(BP_IS_HOLE(&dnp->dn_blkptr[0]) ||
578 580 dn->dn_maxblkid == 0 || list_head(list) != NULL ||
579 581 avl_last(&dn->dn_ranges[txgoff]) ||
580 582 dn->dn_next_blksz[txgoff] >> SPA_MINBLOCKSHIFT ==
581 583 dnp->dn_datablkszsec);
582 584 dnp->dn_datablkszsec =
583 585 dn->dn_next_blksz[txgoff] >> SPA_MINBLOCKSHIFT;
584 586 dn->dn_next_blksz[txgoff] = 0;
585 587 }
586 588
|
↓ open down ↓ |
554 lines elided |
↑ open up ↑ |
587 589 if (dn->dn_next_bonuslen[txgoff]) {
588 590 if (dn->dn_next_bonuslen[txgoff] == DN_ZERO_BONUSLEN)
589 591 dnp->dn_bonuslen = 0;
590 592 else
591 593 dnp->dn_bonuslen = dn->dn_next_bonuslen[txgoff];
592 594 ASSERT(dnp->dn_bonuslen <= DN_MAX_BONUSLEN);
593 595 dn->dn_next_bonuslen[txgoff] = 0;
594 596 }
595 597
596 598 if (dn->dn_next_bonustype[txgoff]) {
597 - ASSERT(dn->dn_next_bonustype[txgoff] < DMU_OT_NUMTYPES);
599 + ASSERT(DMU_OT_IS_VALID(dn->dn_next_bonustype[txgoff]));
598 600 dnp->dn_bonustype = dn->dn_next_bonustype[txgoff];
599 601 dn->dn_next_bonustype[txgoff] = 0;
600 602 }
601 603
602 604 /*
603 605 * We will either remove a spill block when a file is being removed
604 606 * or we have been asked to remove it.
605 607 */
606 608 if (dn->dn_rm_spillblk[txgoff] ||
607 609 ((dnp->dn_flags & DNODE_FLAG_SPILL_BLKPTR) &&
608 610 dn->dn_free_txg > 0 && dn->dn_free_txg <= tx->tx_txg)) {
609 611 if ((dnp->dn_flags & DNODE_FLAG_SPILL_BLKPTR))
610 612 kill_spill = B_TRUE;
611 613 dn->dn_rm_spillblk[txgoff] = 0;
612 614 }
613 615
614 616 if (dn->dn_next_indblkshift[txgoff]) {
615 617 ASSERT(dnp->dn_nlevels == 1);
616 618 dnp->dn_indblkshift = dn->dn_next_indblkshift[txgoff];
617 619 dn->dn_next_indblkshift[txgoff] = 0;
618 620 }
619 621
620 622 /*
621 623 * Just take the live (open-context) values for checksum and compress.
622 624 * Strictly speaking it's a future leak, but nothing bad happens if we
623 625 * start using the new checksum or compress algorithm a little early.
624 626 */
625 627 dnp->dn_checksum = dn->dn_checksum;
626 628 dnp->dn_compress = dn->dn_compress;
627 629
628 630 mutex_exit(&dn->dn_mtx);
629 631
630 632 if (kill_spill) {
631 633 (void) free_blocks(dn, &dn->dn_phys->dn_spill, 1, tx);
632 634 mutex_enter(&dn->dn_mtx);
633 635 dnp->dn_flags &= ~DNODE_FLAG_SPILL_BLKPTR;
634 636 mutex_exit(&dn->dn_mtx);
635 637 }
636 638
637 639 /* process all the "freed" ranges in the file */
638 640 while (rp = avl_last(&dn->dn_ranges[txgoff])) {
639 641 dnode_sync_free_range(dn, rp->fr_blkid, rp->fr_nblks, tx);
640 642 /* grab the mutex so we don't race with dnode_block_freed() */
641 643 mutex_enter(&dn->dn_mtx);
642 644 avl_remove(&dn->dn_ranges[txgoff], rp);
643 645 mutex_exit(&dn->dn_mtx);
644 646 kmem_free(rp, sizeof (free_range_t));
645 647 }
646 648
647 649 if (dn->dn_free_txg > 0 && dn->dn_free_txg <= tx->tx_txg) {
648 650 dnode_sync_free(dn, tx);
649 651 return;
650 652 }
651 653
652 654 if (dn->dn_next_nblkptr[txgoff]) {
653 655 /* this should only happen on a realloc */
654 656 ASSERT(dn->dn_allocated_txg == tx->tx_txg);
655 657 if (dn->dn_next_nblkptr[txgoff] > dnp->dn_nblkptr) {
656 658 /* zero the new blkptrs we are gaining */
657 659 bzero(dnp->dn_blkptr + dnp->dn_nblkptr,
658 660 sizeof (blkptr_t) *
659 661 (dn->dn_next_nblkptr[txgoff] - dnp->dn_nblkptr));
660 662 #ifdef ZFS_DEBUG
661 663 } else {
662 664 int i;
663 665 ASSERT(dn->dn_next_nblkptr[txgoff] < dnp->dn_nblkptr);
664 666 /* the blkptrs we are losing better be unallocated */
665 667 for (i = dn->dn_next_nblkptr[txgoff];
666 668 i < dnp->dn_nblkptr; i++)
667 669 ASSERT(BP_IS_HOLE(&dnp->dn_blkptr[i]));
668 670 #endif
669 671 }
670 672 mutex_enter(&dn->dn_mtx);
671 673 dnp->dn_nblkptr = dn->dn_next_nblkptr[txgoff];
672 674 dn->dn_next_nblkptr[txgoff] = 0;
673 675 mutex_exit(&dn->dn_mtx);
674 676 }
675 677
676 678 if (dn->dn_next_nlevels[txgoff]) {
677 679 dnode_increase_indirection(dn, tx);
678 680 dn->dn_next_nlevels[txgoff] = 0;
679 681 }
680 682
681 683 dbuf_sync_list(list, tx);
682 684
683 685 if (!DMU_OBJECT_IS_SPECIAL(dn->dn_object)) {
684 686 ASSERT3P(list_head(list), ==, NULL);
685 687 dnode_rele(dn, (void *)(uintptr_t)tx->tx_txg);
686 688 }
687 689
688 690 /*
689 691 * Although we have dropped our reference to the dnode, it
690 692 * can't be evicted until its written, and we haven't yet
691 693 * initiated the IO for the dnode's dbuf.
692 694 */
693 695 }
|
↓ open down ↓ |
86 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX