5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
24 * Copyright (c) 2012, 2014 by Delphix. All rights reserved.
25 */
26
27 #include <sys/zfs_context.h>
28 #include <sys/dbuf.h>
29 #include <sys/dnode.h>
30 #include <sys/dmu.h>
31 #include <sys/dmu_tx.h>
32 #include <sys/dmu_objset.h>
33 #include <sys/dsl_dataset.h>
34 #include <sys/spa.h>
35 #include <sys/range_tree.h>
36 #include <sys/zfeature.h>
37
38 static void
39 dnode_increase_indirection(dnode_t *dn, dmu_tx_t *tx)
40 {
41 dmu_buf_impl_t *db;
42 int txgoff = tx->tx_txg & TXG_MASK;
43 int nblkptr = dn->dn_phys->dn_nblkptr;
44 int old_toplvl = dn->dn_phys->dn_nlevels - 1;
379 dmu_tx_t *dsfra_tx;
380 } dnode_sync_free_range_arg_t;
381
382 static void
383 dnode_sync_free_range(void *arg, uint64_t blkid, uint64_t nblks)
384 {
385 dnode_sync_free_range_arg_t *dsfra = arg;
386 dnode_t *dn = dsfra->dsfra_dnode;
387
388 mutex_exit(&dn->dn_mtx);
389 dnode_sync_free_range_impl(dn, blkid, nblks, dsfra->dsfra_tx);
390 mutex_enter(&dn->dn_mtx);
391 }
392
393 /*
394 * Try to kick all the dnode's dbufs out of the cache...
395 */
396 void
397 dnode_evict_dbufs(dnode_t *dn)
398 {
399 int progress;
400 int pass = 0;
401
402 do {
403 dmu_buf_impl_t *db, *db_next;
404 int evicting = FALSE;
405
406 progress = FALSE;
407 mutex_enter(&dn->dn_dbufs_mtx);
408 for (db = avl_first(&dn->dn_dbufs); db != NULL; db = db_next) {
409 db_next = AVL_NEXT(&dn->dn_dbufs, db);
410 #ifdef DEBUG
411 DB_DNODE_ENTER(db);
412 ASSERT3P(DB_DNODE(db), ==, dn);
413 DB_DNODE_EXIT(db);
414 #endif /* DEBUG */
415
416 mutex_enter(&db->db_mtx);
417 if (db->db_state == DB_EVICTING) {
418 progress = TRUE;
419 evicting = TRUE;
420 mutex_exit(&db->db_mtx);
421 } else if (refcount_is_zero(&db->db_holds)) {
422 progress = TRUE;
423 dbuf_clear(db); /* exits db_mtx for us */
424 } else {
425 mutex_exit(&db->db_mtx);
426 }
427
428 }
429 /*
430 * NB: we need to drop dn_dbufs_mtx between passes so
431 * that any DB_EVICTING dbufs can make progress.
432 * Ideally, we would have some cv we could wait on, but
433 * since we don't, just wait a bit to give the other
434 * thread a chance to run.
435 */
436 mutex_exit(&dn->dn_dbufs_mtx);
437 if (evicting)
438 delay(1);
439 pass++;
440 ASSERT(pass < 100); /* sanity check */
441 } while (progress);
442
443 rw_enter(&dn->dn_struct_rwlock, RW_WRITER);
444 if (dn->dn_bonus && refcount_is_zero(&dn->dn_bonus->db_holds)) {
445 mutex_enter(&dn->dn_bonus->db_mtx);
446 dbuf_evict(dn->dn_bonus);
447 dn->dn_bonus = NULL;
448 }
449 rw_exit(&dn->dn_struct_rwlock);
450 }
451
452 static void
453 dnode_undirty_dbufs(list_t *list)
454 {
455 dbuf_dirty_record_t *dr;
456
457 while (dr = list_head(list)) {
458 dmu_buf_impl_t *db = dr->dr_dbuf;
459 uint64_t txg = dr->dr_txg;
460
461 if (db->db_level != 0)
480 }
481 }
482
483 static void
484 dnode_sync_free(dnode_t *dn, dmu_tx_t *tx)
485 {
486 int txgoff = tx->tx_txg & TXG_MASK;
487
488 ASSERT(dmu_tx_is_syncing(tx));
489
490 /*
491 * Our contents should have been freed in dnode_sync() by the
492 * free range record inserted by the caller of dnode_free().
493 */
494 ASSERT0(DN_USED_BYTES(dn->dn_phys));
495 ASSERT(BP_IS_HOLE(dn->dn_phys->dn_blkptr));
496
497 dnode_undirty_dbufs(&dn->dn_dirty_records[txgoff]);
498 dnode_evict_dbufs(dn);
499 ASSERT(avl_is_empty(&dn->dn_dbufs));
500 ASSERT3P(dn->dn_bonus, ==, NULL);
501
502 /*
503 * XXX - It would be nice to assert this, but we may still
504 * have residual holds from async evictions from the arc...
505 *
506 * zfs_obj_to_path() also depends on this being
507 * commented out.
508 *
509 * ASSERT3U(refcount_count(&dn->dn_holds), ==, 1);
510 */
511
512 /* Undirty next bits */
513 dn->dn_next_nlevels[txgoff] = 0;
514 dn->dn_next_indblkshift[txgoff] = 0;
515 dn->dn_next_blksz[txgoff] = 0;
516
517 /* ASSERT(blkptrs are zero); */
518 ASSERT(dn->dn_phys->dn_type != DMU_OT_NONE);
519 ASSERT(dn->dn_type != DMU_OT_NONE);
520
|
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
24 * Copyright (c) 2012, 2014 by Delphix. All rights reserved.
25 * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
26 */
27
28 #include <sys/zfs_context.h>
29 #include <sys/dbuf.h>
30 #include <sys/dnode.h>
31 #include <sys/dmu.h>
32 #include <sys/dmu_tx.h>
33 #include <sys/dmu_objset.h>
34 #include <sys/dsl_dataset.h>
35 #include <sys/spa.h>
36 #include <sys/range_tree.h>
37 #include <sys/zfeature.h>
38
39 static void
40 dnode_increase_indirection(dnode_t *dn, dmu_tx_t *tx)
41 {
42 dmu_buf_impl_t *db;
43 int txgoff = tx->tx_txg & TXG_MASK;
44 int nblkptr = dn->dn_phys->dn_nblkptr;
45 int old_toplvl = dn->dn_phys->dn_nlevels - 1;
380 dmu_tx_t *dsfra_tx;
381 } dnode_sync_free_range_arg_t;
382
383 static void
384 dnode_sync_free_range(void *arg, uint64_t blkid, uint64_t nblks)
385 {
386 dnode_sync_free_range_arg_t *dsfra = arg;
387 dnode_t *dn = dsfra->dsfra_dnode;
388
389 mutex_exit(&dn->dn_mtx);
390 dnode_sync_free_range_impl(dn, blkid, nblks, dsfra->dsfra_tx);
391 mutex_enter(&dn->dn_mtx);
392 }
393
394 /*
395 * Try to kick all the dnode's dbufs out of the cache...
396 */
397 void
398 dnode_evict_dbufs(dnode_t *dn)
399 {
400 dmu_buf_impl_t db_marker;
401 dmu_buf_impl_t *db, *db_next;
402
403 mutex_enter(&dn->dn_dbufs_mtx);
404 for (db = avl_first(&dn->dn_dbufs); db != NULL; db = db_next) {
405
406 #ifdef DEBUG
407 DB_DNODE_ENTER(db);
408 ASSERT3P(DB_DNODE(db), ==, dn);
409 DB_DNODE_EXIT(db);
410 #endif /* DEBUG */
411
412 mutex_enter(&db->db_mtx);
413 if (db->db_state != DB_EVICTING &&
414 refcount_is_zero(&db->db_holds)) {
415 db_marker.db_level = db->db_level;
416 db_marker.db_blkid = db->db_blkid;
417 db_marker.db_state = DB_SEARCH;
418 avl_insert_here(&dn->dn_dbufs, &db_marker, db,
419 AVL_BEFORE);
420
421 dbuf_clear(db);
422
423 db_next = AVL_NEXT(&dn->dn_dbufs, &db_marker);
424 avl_remove(&dn->dn_dbufs, &db_marker);
425 } else {
426 mutex_exit(&db->db_mtx);
427 db_next = AVL_NEXT(&dn->dn_dbufs, db);
428 }
429 }
430 mutex_exit(&dn->dn_dbufs_mtx);
431
432 rw_enter(&dn->dn_struct_rwlock, RW_WRITER);
433 if (dn->dn_bonus && refcount_is_zero(&dn->dn_bonus->db_holds)) {
434 mutex_enter(&dn->dn_bonus->db_mtx);
435 dbuf_evict(dn->dn_bonus);
436 dn->dn_bonus = NULL;
437 }
438 rw_exit(&dn->dn_struct_rwlock);
439 }
440
441 static void
442 dnode_undirty_dbufs(list_t *list)
443 {
444 dbuf_dirty_record_t *dr;
445
446 while (dr = list_head(list)) {
447 dmu_buf_impl_t *db = dr->dr_dbuf;
448 uint64_t txg = dr->dr_txg;
449
450 if (db->db_level != 0)
469 }
470 }
471
472 static void
473 dnode_sync_free(dnode_t *dn, dmu_tx_t *tx)
474 {
475 int txgoff = tx->tx_txg & TXG_MASK;
476
477 ASSERT(dmu_tx_is_syncing(tx));
478
479 /*
480 * Our contents should have been freed in dnode_sync() by the
481 * free range record inserted by the caller of dnode_free().
482 */
483 ASSERT0(DN_USED_BYTES(dn->dn_phys));
484 ASSERT(BP_IS_HOLE(dn->dn_phys->dn_blkptr));
485
486 dnode_undirty_dbufs(&dn->dn_dirty_records[txgoff]);
487 dnode_evict_dbufs(dn);
488 ASSERT(avl_is_empty(&dn->dn_dbufs));
489
490 /*
491 * XXX - It would be nice to assert this, but we may still
492 * have residual holds from async evictions from the arc...
493 *
494 * zfs_obj_to_path() also depends on this being
495 * commented out.
496 *
497 * ASSERT3U(refcount_count(&dn->dn_holds), ==, 1);
498 */
499
500 /* Undirty next bits */
501 dn->dn_next_nlevels[txgoff] = 0;
502 dn->dn_next_indblkshift[txgoff] = 0;
503 dn->dn_next_blksz[txgoff] = 0;
504
505 /* ASSERT(blkptrs are zero); */
506 ASSERT(dn->dn_phys->dn_type != DMU_OT_NONE);
507 ASSERT(dn->dn_type != DMU_OT_NONE);
508
|