Print this page
4374 dn_free_ranges should use range_tree_t
Reviewed by: George Wilson <george.wilson@delphix.com>
Reviewed by: Max Grossman <max.grossman@delphix.com>
Reviewed by: Christopher Siden <christopher.siden@delphix.com
Reviewed by: Garrett D'Amore <garrett@damore.org>
Reviewed by: Dan McDonald <danmcd@omniti.com>
Approved by: Dan McDonald <danmcd@omniti.com>

*** 18,28 **** * * CDDL HEADER END */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. ! * Copyright (c) 2013 by Delphix. All rights reserved. */ #include <sys/zfs_context.h> #include <sys/dbuf.h> #include <sys/dnode.h> --- 18,28 ---- * * CDDL HEADER END */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. ! * Copyright (c) 2012, 2014 by Delphix. All rights reserved. */ #include <sys/zfs_context.h> #include <sys/dbuf.h> #include <sys/dnode.h>
*** 33,45 **** #include <sys/dsl_dir.h> #include <sys/dsl_dataset.h> #include <sys/spa.h> #include <sys/zio.h> #include <sys/dmu_zfetch.h> - static int free_range_compar(const void *node1, const void *node2); - static kmem_cache_t *dnode_cache; /* * Define DNODE_STATS to turn on statistic gathering. By default, it is only * turned on when DEBUG is also defined. */ --- 33,44 ---- #include <sys/dsl_dir.h> #include <sys/dsl_dataset.h> #include <sys/spa.h> #include <sys/zio.h> #include <sys/dmu_zfetch.h> + #include <sys/range_tree.h> static kmem_cache_t *dnode_cache; /* * Define DNODE_STATS to turn on statistic gathering. By default, it is only * turned on when DEBUG is also defined. */
*** 88,100 **** bzero(&dn->dn_next_bonuslen[0], sizeof (dn->dn_next_bonuslen)); bzero(&dn->dn_next_blksz[0], sizeof (dn->dn_next_blksz)); for (i = 0; i < TXG_SIZE; i++) { list_link_init(&dn->dn_dirty_link[i]); ! avl_create(&dn->dn_ranges[i], free_range_compar, ! sizeof (free_range_t), ! offsetof(struct free_range, fr_node)); list_create(&dn->dn_dirty_records[i], sizeof (dbuf_dirty_record_t), offsetof(dbuf_dirty_record_t, dr_dirty_node)); } --- 87,97 ---- bzero(&dn->dn_next_bonuslen[0], sizeof (dn->dn_next_bonuslen)); bzero(&dn->dn_next_blksz[0], sizeof (dn->dn_next_blksz)); for (i = 0; i < TXG_SIZE; i++) { list_link_init(&dn->dn_dirty_link[i]); ! dn->dn_free_ranges[i] = NULL; list_create(&dn->dn_dirty_records[i], sizeof (dbuf_dirty_record_t), offsetof(dbuf_dirty_record_t, dr_dirty_node)); }
*** 138,148 **** refcount_destroy(&dn->dn_tx_holds); ASSERT(!list_link_active(&dn->dn_link)); for (i = 0; i < TXG_SIZE; i++) { ASSERT(!list_link_active(&dn->dn_dirty_link[i])); ! avl_destroy(&dn->dn_ranges[i]); list_destroy(&dn->dn_dirty_records[i]); ASSERT0(dn->dn_next_nblkptr[i]); ASSERT0(dn->dn_next_nlevels[i]); ASSERT0(dn->dn_next_indblkshift[i]); ASSERT0(dn->dn_next_bonustype[i]); --- 135,145 ---- refcount_destroy(&dn->dn_tx_holds); ASSERT(!list_link_active(&dn->dn_link)); for (i = 0; i < TXG_SIZE; i++) { ASSERT(!list_link_active(&dn->dn_dirty_link[i])); ! ASSERT3P(dn->dn_free_ranges[i], ==, NULL); list_destroy(&dn->dn_dirty_records[i]); ASSERT0(dn->dn_next_nblkptr[i]); ASSERT0(dn->dn_next_nlevels[i]); ASSERT0(dn->dn_next_indblkshift[i]); ASSERT0(dn->dn_next_bonustype[i]);
*** 311,333 **** dnode_byteswap(buf); buf++; } } - static int - free_range_compar(const void *node1, const void *node2) - { - const free_range_t *rp1 = node1; - const free_range_t *rp2 = node2; - - if (rp1->fr_blkid < rp2->fr_blkid) - return (-1); - else if (rp1->fr_blkid > rp2->fr_blkid) - return (1); - else return (0); - } - void dnode_setbonuslen(dnode_t *dn, int newsize, dmu_tx_t *tx) { ASSERT3U(refcount_count(&dn->dn_holds), >=, 1); --- 308,317 ----
*** 372,382 **** ASSERT3U(size, >=, SPA_MINBLOCKSIZE); ASSERT3U(size >> SPA_MINBLOCKSHIFT, <, 1<<(sizeof (dn->dn_phys->dn_datablkszsec) * 8)); dn->dn_datablksz = size; dn->dn_datablkszsec = size >> SPA_MINBLOCKSHIFT; ! dn->dn_datablkshift = ISP2(size) ? highbit(size - 1) : 0; } static dnode_t * dnode_create(objset_t *os, dnode_phys_t *dnp, dmu_buf_impl_t *db, uint64_t object, dnode_handle_t *dnh) --- 356,366 ---- ASSERT3U(size, >=, SPA_MINBLOCKSIZE); ASSERT3U(size >> SPA_MINBLOCKSHIFT, <, 1<<(sizeof (dn->dn_phys->dn_datablkszsec) * 8)); dn->dn_datablksz = size; dn->dn_datablkszsec = size >> SPA_MINBLOCKSHIFT; ! dn->dn_datablkshift = ISP2(size) ? highbit64(size - 1) : 0; } static dnode_t * dnode_create(objset_t *os, dnode_phys_t *dnp, dmu_buf_impl_t *db, uint64_t object, dnode_handle_t *dnh)
*** 528,538 **** ASSERT0(dn->dn_next_bonustype[i]); ASSERT0(dn->dn_rm_spillblk[i]); ASSERT0(dn->dn_next_blksz[i]); ASSERT(!list_link_active(&dn->dn_dirty_link[i])); ASSERT3P(list_head(&dn->dn_dirty_records[i]), ==, NULL); ! ASSERT0(avl_numnodes(&dn->dn_ranges[i])); } dn->dn_type = ot; dnode_setdblksz(dn, blocksize); dn->dn_indblkshift = ibs; --- 512,522 ---- ASSERT0(dn->dn_next_bonustype[i]); ASSERT0(dn->dn_rm_spillblk[i]); ASSERT0(dn->dn_next_blksz[i]); ASSERT(!list_link_active(&dn->dn_dirty_link[i])); ASSERT3P(list_head(&dn->dn_dirty_records[i]), ==, NULL); ! ASSERT3P(dn->dn_free_ranges[i], ==, NULL); } dn->dn_type = ot; dnode_setdblksz(dn, blocksize); dn->dn_indblkshift = ibs;
*** 692,702 **** sizeof (odn->dn_next_blksz)); for (i = 0; i < TXG_SIZE; i++) { list_move_tail(&ndn->dn_dirty_records[i], &odn->dn_dirty_records[i]); } ! bcopy(&odn->dn_ranges[0], &ndn->dn_ranges[0], sizeof (odn->dn_ranges)); ndn->dn_allocated_txg = odn->dn_allocated_txg; ndn->dn_free_txg = odn->dn_free_txg; ndn->dn_assigned_txg = odn->dn_assigned_txg; ndn->dn_dirtyctx = odn->dn_dirtyctx; ndn->dn_dirtyctx_firstset = odn->dn_dirtyctx_firstset; --- 676,687 ---- sizeof (odn->dn_next_blksz)); for (i = 0; i < TXG_SIZE; i++) { list_move_tail(&ndn->dn_dirty_records[i], &odn->dn_dirty_records[i]); } ! bcopy(&odn->dn_free_ranges[0], &ndn->dn_free_ranges[0], ! sizeof (odn->dn_free_ranges)); ndn->dn_allocated_txg = odn->dn_allocated_txg; ndn->dn_free_txg = odn->dn_free_txg; ndn->dn_assigned_txg = odn->dn_assigned_txg; ndn->dn_dirtyctx = odn->dn_dirtyctx; ndn->dn_dirtyctx_firstset = odn->dn_dirtyctx_firstset;
*** 755,766 **** */ for (i = 0; i < TXG_SIZE; i++) { list_create(&odn->dn_dirty_records[i], sizeof (dbuf_dirty_record_t), offsetof(dbuf_dirty_record_t, dr_dirty_node)); ! odn->dn_ranges[i].avl_root = NULL; ! odn->dn_ranges[i].avl_numnodes = 0; odn->dn_next_nlevels[i] = 0; odn->dn_next_indblkshift[i] = 0; odn->dn_next_bonustype[i] = 0; odn->dn_rm_spillblk[i] = 0; odn->dn_next_bonuslen[i] = 0; --- 740,750 ---- */ for (i = 0; i < TXG_SIZE; i++) { list_create(&odn->dn_dirty_records[i], sizeof (dbuf_dirty_record_t), offsetof(dbuf_dirty_record_t, dr_dirty_node)); ! odn->dn_free_ranges[i] = NULL; odn->dn_next_nlevels[i] = 0; odn->dn_next_indblkshift[i] = 0; odn->dn_next_bonustype[i] = 0; odn->dn_rm_spillblk[i] = 0; odn->dn_next_bonuslen[i] = 0;
*** 1460,1522 **** if (have_read) rw_downgrade(&dn->dn_struct_rwlock); } void - dnode_clear_range(dnode_t *dn, uint64_t blkid, uint64_t nblks, dmu_tx_t *tx) - { - avl_tree_t *tree = &dn->dn_ranges[tx->tx_txg&TXG_MASK]; - avl_index_t where; - free_range_t *rp; - free_range_t rp_tofind; - uint64_t endblk = blkid + nblks; - - ASSERT(MUTEX_HELD(&dn->dn_mtx)); - ASSERT(nblks <= UINT64_MAX - blkid); /* no overflow */ - - dprintf_dnode(dn, "blkid=%llu nblks=%llu txg=%llu\n", - blkid, nblks, tx->tx_txg); - rp_tofind.fr_blkid = blkid; - rp = avl_find(tree, &rp_tofind, &where); - if (rp == NULL) - rp = avl_nearest(tree, where, AVL_BEFORE); - if (rp == NULL) - rp = avl_nearest(tree, where, AVL_AFTER); - - while (rp && (rp->fr_blkid <= blkid + nblks)) { - uint64_t fr_endblk = rp->fr_blkid + rp->fr_nblks; - free_range_t *nrp = AVL_NEXT(tree, rp); - - if (blkid <= rp->fr_blkid && endblk >= fr_endblk) { - /* clear this entire range */ - avl_remove(tree, rp); - kmem_free(rp, sizeof (free_range_t)); - } else if (blkid <= rp->fr_blkid && - endblk > rp->fr_blkid && endblk < fr_endblk) { - /* clear the beginning of this range */ - rp->fr_blkid = endblk; - rp->fr_nblks = fr_endblk - endblk; - } else if (blkid > rp->fr_blkid && blkid < fr_endblk && - endblk >= fr_endblk) { - /* clear the end of this range */ - rp->fr_nblks = blkid - rp->fr_blkid; - } else if (blkid > rp->fr_blkid && endblk < fr_endblk) { - /* clear a chunk out of this range */ - free_range_t *new_rp = - kmem_alloc(sizeof (free_range_t), KM_SLEEP); - - new_rp->fr_blkid = endblk; - new_rp->fr_nblks = fr_endblk - endblk; - avl_insert_here(tree, new_rp, rp, AVL_AFTER); - rp->fr_nblks = blkid - rp->fr_blkid; - } - /* there may be no overlap */ - rp = nrp; - } - } - - void dnode_free_range(dnode_t *dn, uint64_t off, uint64_t len, dmu_tx_t *tx) { dmu_buf_impl_t *db; uint64_t blkoff, blkid, nblks; int blksz, blkshift, head, tail; --- 1444,1453 ----
*** 1662,1687 **** /* * Add this range to the dnode range list. * We will finish up this free operation in the syncing phase. */ mutex_enter(&dn->dn_mtx); ! dnode_clear_range(dn, blkid, nblks, tx); ! { ! free_range_t *rp, *found; ! avl_index_t where; ! avl_tree_t *tree = &dn->dn_ranges[tx->tx_txg&TXG_MASK]; ! ! /* Add new range to dn_ranges */ ! rp = kmem_alloc(sizeof (free_range_t), KM_SLEEP); ! rp->fr_blkid = blkid; ! rp->fr_nblks = nblks; ! found = avl_find(tree, rp, &where); ! ASSERT(found == NULL); ! avl_insert(tree, rp, where); dprintf_dnode(dn, "blkid=%llu nblks=%llu txg=%llu\n", blkid, nblks, tx->tx_txg); - } mutex_exit(&dn->dn_mtx); dbuf_free_range(dn, blkid, blkid + nblks - 1, tx); dnode_setdirty(dn, tx); out: --- 1593,1611 ---- /* * Add this range to the dnode range list. * We will finish up this free operation in the syncing phase. */ mutex_enter(&dn->dn_mtx); ! int txgoff = tx->tx_txg & TXG_MASK; ! if (dn->dn_free_ranges[txgoff] == NULL) { ! dn->dn_free_ranges[txgoff] = ! range_tree_create(NULL, NULL, &dn->dn_mtx); ! } ! range_tree_clear(dn->dn_free_ranges[txgoff], blkid, nblks); ! range_tree_add(dn->dn_free_ranges[txgoff], blkid, nblks); dprintf_dnode(dn, "blkid=%llu nblks=%llu txg=%llu\n", blkid, nblks, tx->tx_txg); mutex_exit(&dn->dn_mtx); dbuf_free_range(dn, blkid, blkid + nblks - 1, tx); dnode_setdirty(dn, tx); out:
*** 1705,1715 **** /* return TRUE if this blkid was freed in a recent txg, or FALSE if it wasn't */ uint64_t dnode_block_freed(dnode_t *dn, uint64_t blkid) { - free_range_t range_tofind; void *dp = spa_get_dsl(dn->dn_objset->os_spa); int i; if (blkid == DMU_BONUS_BLKID) return (FALSE); --- 1629,1638 ----
*** 1725,1750 **** return (TRUE); if (blkid == DMU_SPILL_BLKID) return (dnode_spill_freed(dn)); - range_tofind.fr_blkid = blkid; mutex_enter(&dn->dn_mtx); for (i = 0; i < TXG_SIZE; i++) { ! free_range_t *range_found; ! avl_index_t idx; ! ! range_found = avl_find(&dn->dn_ranges[i], &range_tofind, &idx); ! if (range_found) { ! ASSERT(range_found->fr_nblks > 0); break; } - range_found = avl_nearest(&dn->dn_ranges[i], idx, AVL_BEFORE); - if (range_found && - range_found->fr_blkid + range_found->fr_nblks > blkid) - break; - } mutex_exit(&dn->dn_mtx); return (i < TXG_SIZE); } /* call from syncing context when we actually write/free space for this dnode */ --- 1648,1663 ---- return (TRUE); if (blkid == DMU_SPILL_BLKID) return (dnode_spill_freed(dn)); mutex_enter(&dn->dn_mtx); for (i = 0; i < TXG_SIZE; i++) { ! if (dn->dn_free_ranges[i] != NULL && ! range_tree_contains(dn->dn_free_ranges[i], blkid, 1)) break; } mutex_exit(&dn->dn_mtx); return (i < TXG_SIZE); } /* call from syncing context when we actually write/free space for this dnode */