Print this page
10592 misc. metaslab and vdev related ZoL bug fixes
Portions contributed by: Jerry Jelinek <jerry.jelinek@joyent.com>
Reviewed by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed by: Giuseppe Di Natale <guss80@gmail.com>
Reviewed by: George Melikov <mail@gmelikov.ru>
Reviewed by: Paul Dagnelie <pcd@delphix.com>
Reviewed by: Matt Ahrens <mahrens@delphix.com>
Reviewed by: Pavel Zakharov <pavel.zakharov@delphix.com>
Reviewed by: Tony Hutter <hutter2@llnl.gov>
Reviewed by: Kody Kantor <kody.kantor@joyent.com>
Approved by: Dan McDonald <danmcd@joyent.com>
*** 21,31 ****
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
/*
! * Copyright (c) 2012, 2017 by Delphix. All rights reserved.
*/
#include <sys/zfs_context.h>
#include <sys/spa.h>
#include <sys/dmu.h>
--- 21,31 ----
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
/*
! * Copyright (c) 2012, 2018 by Delphix. All rights reserved.
*/
#include <sys/zfs_context.h>
#include <sys/spa.h>
#include <sys/dmu.h>
*** 79,111 ****
return (SM_PREFIX_DECODE(e) == SM2_PREFIX);
}
/*
* Iterate through the space map, invoking the callback on each (non-debug)
! * space map entry.
*/
int
! space_map_iterate(space_map_t *sm, sm_cb_t callback, void *arg)
{
! uint64_t sm_len = space_map_length(sm);
! ASSERT3U(sm->sm_blksz, !=, 0);
! dmu_prefetch(sm->sm_os, space_map_object(sm), 0, 0, sm_len,
ZIO_PRIORITY_SYNC_READ);
- uint64_t blksz = sm->sm_blksz;
int error = 0;
! for (uint64_t block_base = 0; block_base < sm_len && error == 0;
block_base += blksz) {
dmu_buf_t *db;
error = dmu_buf_hold(sm->sm_os, space_map_object(sm),
block_base, FTAG, &db, DMU_READ_PREFETCH);
if (error != 0)
return (error);
uint64_t *block_start = db->db_data;
! uint64_t block_length = MIN(sm_len - block_base, blksz);
uint64_t *block_end = block_start +
(block_length / sizeof (uint64_t));
VERIFY0(P2PHASE(block_length, sizeof (uint64_t)));
VERIFY3U(block_length, !=, 0);
--- 79,113 ----
return (SM_PREFIX_DECODE(e) == SM2_PREFIX);
}
/*
* Iterate through the space map, invoking the callback on each (non-debug)
! * space map entry. Stop after reading 'end' bytes of the space map.
*/
int
! space_map_iterate(space_map_t *sm, uint64_t end, sm_cb_t callback, void *arg)
{
! uint64_t blksz = sm->sm_blksz;
! ASSERT3U(blksz, !=, 0);
! ASSERT3U(end, <=, space_map_length(sm));
! ASSERT0(P2PHASE(end, sizeof (uint64_t)));
!
! dmu_prefetch(sm->sm_os, space_map_object(sm), 0, 0, end,
ZIO_PRIORITY_SYNC_READ);
int error = 0;
! for (uint64_t block_base = 0; block_base < end && error == 0;
block_base += blksz) {
dmu_buf_t *db;
error = dmu_buf_hold(sm->sm_os, space_map_object(sm),
block_base, FTAG, &db, DMU_READ_PREFETCH);
if (error != 0)
return (error);
uint64_t *block_start = db->db_data;
! uint64_t block_length = MIN(end - block_base, blksz);
uint64_t *block_end = block_start +
(block_length / sizeof (uint64_t));
VERIFY0(P2PHASE(block_length, sizeof (uint64_t)));
VERIFY3U(block_length, !=, 0);
*** 184,194 ****
* Find the offset of the last word in the space map and use
* that to read the last block of the space map with
* dmu_buf_hold().
*/
uint64_t last_word_offset =
! sm->sm_phys->smp_objsize - sizeof (uint64_t);
error = dmu_buf_hold(sm->sm_os, space_map_object(sm), last_word_offset,
FTAG, &db, DMU_READ_NO_PREFETCH);
if (error != 0)
return (error);
--- 186,196 ----
* Find the offset of the last word in the space map and use
* that to read the last block of the space map with
* dmu_buf_hold().
*/
uint64_t last_word_offset =
! sm->sm_phys->smp_length - sizeof (uint64_t);
error = dmu_buf_hold(sm->sm_os, space_map_object(sm), last_word_offset,
FTAG, &db, DMU_READ_NO_PREFETCH);
if (error != 0)
return (error);
*** 197,207 ****
ASSERT3U(bufsz, >=, db->db_size);
ASSERT(nwords != NULL);
uint64_t *words = db->db_data;
*nwords =
! (sm->sm_phys->smp_objsize - db->db_offset) / sizeof (uint64_t);
ASSERT3U(*nwords, <=, bufsz / sizeof (uint64_t));
uint64_t n = *nwords;
uint64_t j = n - 1;
--- 199,209 ----
ASSERT3U(bufsz, >=, db->db_size);
ASSERT(nwords != NULL);
uint64_t *words = db->db_data;
*nwords =
! (sm->sm_phys->smp_length - db->db_offset) / sizeof (uint64_t);
ASSERT3U(*nwords, <=, bufsz / sizeof (uint64_t));
uint64_t n = *nwords;
uint64_t j = n - 1;
*** 296,307 ****
for (uint64_t i = 0; i < nwords; i++) {
uint64_t e = buf[i];
if (sm_entry_is_debug(e)) {
! sm->sm_phys->smp_objsize -= sizeof (uint64_t);
! space_map_update(sm);
continue;
}
int words = 1;
uint64_t raw_offset, raw_run, vdev_id;
--- 298,308 ----
for (uint64_t i = 0; i < nwords; i++) {
uint64_t e = buf[i];
if (sm_entry_is_debug(e)) {
! sm->sm_phys->smp_length -= sizeof (uint64_t);
continue;
}
int words = 1;
uint64_t raw_offset, raw_run, vdev_id;
*** 352,370 ****
if (type == SM_ALLOC)
sm->sm_phys->smp_alloc -= entry_run;
else
sm->sm_phys->smp_alloc += entry_run;
! sm->sm_phys->smp_objsize -= words * sizeof (uint64_t);
! space_map_update(sm);
}
}
if (space_map_length(sm) == 0) {
ASSERT0(error);
! ASSERT0(sm->sm_phys->smp_objsize);
! ASSERT0(sm->sm_alloc);
}
zio_buf_free(buf, bufsz);
return (error);
}
--- 353,369 ----
if (type == SM_ALLOC)
sm->sm_phys->smp_alloc -= entry_run;
else
sm->sm_phys->smp_alloc += entry_run;
! sm->sm_phys->smp_length -= words * sizeof (uint64_t);
}
}
if (space_map_length(sm) == 0) {
ASSERT0(error);
! ASSERT0(space_map_allocated(sm));
}
zio_buf_free(buf, bufsz);
return (error);
}
*** 389,430 ****
return (0);
}
/*
! * Load the space map disk into the specified range tree. Segments of maptype
! * are added to the range tree, other segment types are removed.
*/
int
! space_map_load(space_map_t *sm, range_tree_t *rt, maptype_t maptype)
{
- uint64_t space;
- int err;
space_map_load_arg_t smla;
VERIFY0(range_tree_space(rt));
- space = space_map_allocated(sm);
! if (maptype == SM_FREE) {
range_tree_add(rt, sm->sm_start, sm->sm_size);
- space = sm->sm_size - space;
- }
smla.smla_rt = rt;
smla.smla_sm = sm;
smla.smla_type = maptype;
! err = space_map_iterate(sm, space_map_load_callback, &smla);
! if (err == 0) {
! VERIFY3U(range_tree_space(rt), ==, space);
! } else {
range_tree_vacate(rt, NULL, NULL);
- }
return (err);
}
void
space_map_histogram_clear(space_map_t *sm)
{
if (sm->sm_dbuf->db_size != sizeof (space_map_phys_t))
return;
--- 388,433 ----
return (0);
}
/*
! * Load the spacemap into the rangetree, like space_map_load. But only
! * read the first 'length' bytes of the spacemap.
*/
int
! space_map_load_length(space_map_t *sm, range_tree_t *rt, maptype_t maptype,
! uint64_t length)
{
space_map_load_arg_t smla;
VERIFY0(range_tree_space(rt));
! if (maptype == SM_FREE)
range_tree_add(rt, sm->sm_start, sm->sm_size);
smla.smla_rt = rt;
smla.smla_sm = sm;
smla.smla_type = maptype;
! int err = space_map_iterate(sm, length,
! space_map_load_callback, &smla);
! if (err != 0)
range_tree_vacate(rt, NULL, NULL);
return (err);
}
+ /*
+ * Load the space map disk into the specified range tree. Segments of maptype
+ * are added to the range tree, other segment types are removed.
+ */
+ int
+ space_map_load(space_map_t *sm, range_tree_t *rt, maptype_t maptype)
+ {
+ return (space_map_load_length(sm, rt, maptype, space_map_length(sm)));
+ }
+
void
space_map_histogram_clear(space_map_t *sm)
{
if (sm->sm_dbuf->db_size != sizeof (space_map_phys_t))
return;
*** 504,517 ****
uint64_t dentry = SM_PREFIX_ENCODE(SM_DEBUG_PREFIX) |
SM_DEBUG_ACTION_ENCODE(maptype) |
SM_DEBUG_SYNCPASS_ENCODE(spa_sync_pass(tx->tx_pool->dp_spa)) |
SM_DEBUG_TXG_ENCODE(dmu_tx_get_txg(tx));
! dmu_write(sm->sm_os, space_map_object(sm), sm->sm_phys->smp_objsize,
sizeof (dentry), &dentry, tx);
! sm->sm_phys->smp_objsize += sizeof (dentry);
}
/*
* Writes one or more entries given a segment.
*
--- 507,520 ----
uint64_t dentry = SM_PREFIX_ENCODE(SM_DEBUG_PREFIX) |
SM_DEBUG_ACTION_ENCODE(maptype) |
SM_DEBUG_SYNCPASS_ENCODE(spa_sync_pass(tx->tx_pool->dp_spa)) |
SM_DEBUG_TXG_ENCODE(dmu_tx_get_txg(tx));
! dmu_write(sm->sm_os, space_map_object(sm), sm->sm_phys->smp_length,
sizeof (dentry), &dentry, tx);
! sm->sm_phys->smp_length += sizeof (dentry);
}
/*
* Writes one or more entries given a segment.
*
*** 539,549 ****
ASSERT3U(db->db_size, ==, sm->sm_blksz);
uint64_t *block_base = db->db_data;
uint64_t *block_end = block_base + (sm->sm_blksz / sizeof (uint64_t));
uint64_t *block_cursor = block_base +
! (sm->sm_phys->smp_objsize - db->db_offset) / sizeof (uint64_t);
ASSERT3P(block_cursor, <=, block_end);
uint64_t size = (rs->rs_end - rs->rs_start) >> sm->sm_shift;
uint64_t start = (rs->rs_start - sm->sm_start) >> sm->sm_shift;
--- 542,552 ----
ASSERT3U(db->db_size, ==, sm->sm_blksz);
uint64_t *block_base = db->db_data;
uint64_t *block_end = block_base + (sm->sm_blksz / sizeof (uint64_t));
uint64_t *block_cursor = block_base +
! (sm->sm_phys->smp_length - db->db_offset) / sizeof (uint64_t);
ASSERT3P(block_cursor, <=, block_end);
uint64_t size = (rs->rs_end - rs->rs_start) >> sm->sm_shift;
uint64_t start = (rs->rs_start - sm->sm_start) >> sm->sm_shift;
*** 562,572 ****
* writing again from the beginning.
*/
if (block_cursor == block_end) {
dmu_buf_rele(db, tag);
! uint64_t next_word_offset = sm->sm_phys->smp_objsize;
VERIFY0(dmu_buf_hold(sm->sm_os,
space_map_object(sm), next_word_offset,
tag, &db, DMU_READ_PREFETCH));
dmu_buf_will_dirty(db, tx);
--- 565,575 ----
* writing again from the beginning.
*/
if (block_cursor == block_end) {
dmu_buf_rele(db, tag);
! uint64_t next_word_offset = sm->sm_phys->smp_length;
VERIFY0(dmu_buf_hold(sm->sm_os,
space_map_object(sm), next_word_offset,
tag, &db, DMU_READ_PREFETCH));
dmu_buf_will_dirty(db, tx);
*** 592,602 ****
*block_cursor = SM_PREFIX_ENCODE(SM_DEBUG_PREFIX) |
SM_DEBUG_ACTION_ENCODE(0) |
SM_DEBUG_SYNCPASS_ENCODE(0) |
SM_DEBUG_TXG_ENCODE(0);
block_cursor++;
! sm->sm_phys->smp_objsize += sizeof (uint64_t);
ASSERT3P(block_cursor, ==, block_end);
continue;
}
uint64_t run_len = MIN(size, run_max);
--- 595,605 ----
*block_cursor = SM_PREFIX_ENCODE(SM_DEBUG_PREFIX) |
SM_DEBUG_ACTION_ENCODE(0) |
SM_DEBUG_SYNCPASS_ENCODE(0) |
SM_DEBUG_TXG_ENCODE(0);
block_cursor++;
! sm->sm_phys->smp_length += sizeof (uint64_t);
ASSERT3P(block_cursor, ==, block_end);
continue;
}
uint64_t run_len = MIN(size, run_max);
*** 623,633 ****
default:
panic("%d-word space map entries are not supported",
words);
break;
}
! sm->sm_phys->smp_objsize += words * sizeof (uint64_t);
start += run_len;
size -= run_len;
}
ASSERT0(size);
--- 626,636 ----
default:
panic("%d-word space map entries are not supported",
words);
break;
}
! sm->sm_phys->smp_length += words * sizeof (uint64_t);
start += run_len;
size -= run_len;
}
ASSERT0(size);
*** 650,660 ****
#ifdef DEBUG
/*
* We do this right after we write the intro debug entry
* because the estimate does not take it into account.
*/
! uint64_t initial_objsize = sm->sm_phys->smp_objsize;
uint64_t estimated_growth =
space_map_estimate_optimal_size(sm, rt, SM_NO_VDEVID);
uint64_t estimated_final_objsize = initial_objsize + estimated_growth;
#endif
--- 653,663 ----
#ifdef DEBUG
/*
* We do this right after we write the intro debug entry
* because the estimate does not take it into account.
*/
! uint64_t initial_objsize = sm->sm_phys->smp_length;
uint64_t estimated_growth =
space_map_estimate_optimal_size(sm, rt, SM_NO_VDEVID);
uint64_t estimated_final_objsize = initial_objsize + estimated_growth;
#endif
*** 661,671 ****
/*
* Find the offset right after the last word in the space map
* and use that to get a hold of the last block, so we can
* start appending to it.
*/
! uint64_t next_word_offset = sm->sm_phys->smp_objsize;
VERIFY0(dmu_buf_hold(sm->sm_os, space_map_object(sm),
next_word_offset, FTAG, &db, DMU_READ_PREFETCH));
ASSERT3U(db->db_size, ==, sm->sm_blksz);
dmu_buf_will_dirty(db, tx);
--- 664,674 ----
/*
* Find the offset right after the last word in the space map
* and use that to get a hold of the last block, so we can
* start appending to it.
*/
! uint64_t next_word_offset = sm->sm_phys->smp_length;
VERIFY0(dmu_buf_hold(sm->sm_os, space_map_object(sm),
next_word_offset, FTAG, &db, DMU_READ_PREFETCH));
ASSERT3U(db->db_size, ==, sm->sm_blksz);
dmu_buf_will_dirty(db, tx);
*** 709,719 ****
* We expect our estimation to be based on the worst case
* scenario [see comment in space_map_estimate_optimal_size()].
* Therefore we expect the actual objsize to be equal or less
* than whatever we estimated it to be.
*/
! ASSERT3U(estimated_final_objsize, >=, sm->sm_phys->smp_objsize);
#endif
}
/*
* Note: This function manipulates the state of the given space map but
--- 712,722 ----
* We expect our estimation to be based on the worst case
* scenario [see comment in space_map_estimate_optimal_size()].
* Therefore we expect the actual objsize to be equal or less
* than whatever we estimated it to be.
*/
! ASSERT3U(estimated_final_objsize, >=, sm->sm_phys->smp_length);
#endif
}
/*
* Note: This function manipulates the state of the given space map but
*** 865,891 ****
bzero(sm->sm_phys->smp_histogram,
sizeof (sm->sm_phys->smp_histogram));
}
dmu_buf_will_dirty(sm->sm_dbuf, tx);
! sm->sm_phys->smp_objsize = 0;
sm->sm_phys->smp_alloc = 0;
}
- /*
- * Update the in-core space_map allocation and length values.
- */
- void
- space_map_update(space_map_t *sm)
- {
- if (sm == NULL)
- return;
-
- sm->sm_alloc = sm->sm_phys->smp_alloc;
- sm->sm_length = sm->sm_phys->smp_objsize;
- }
-
uint64_t
space_map_alloc(objset_t *os, int blocksize, dmu_tx_t *tx)
{
spa_t *spa = dmu_objset_spa(os);
uint64_t object;
--- 868,881 ----
bzero(sm->sm_phys->smp_histogram,
sizeof (sm->sm_phys->smp_histogram));
}
dmu_buf_will_dirty(sm->sm_dbuf, tx);
! sm->sm_phys->smp_length = 0;
sm->sm_phys->smp_alloc = 0;
}
uint64_t
space_map_alloc(objset_t *os, int blocksize, dmu_tx_t *tx)
{
spa_t *spa = dmu_objset_spa(os);
uint64_t object;
*** 1063,1096 ****
space_map_object(space_map_t *sm)
{
return (sm != NULL ? sm->sm_object : 0);
}
! /*
! * Returns the already synced, on-disk allocated space.
! */
! uint64_t
space_map_allocated(space_map_t *sm)
{
! return (sm != NULL ? sm->sm_alloc : 0);
}
- /*
- * Returns the already synced, on-disk length;
- */
uint64_t
space_map_length(space_map_t *sm)
{
! return (sm != NULL ? sm->sm_length : 0);
! }
!
! /*
! * Returns the allocated space that is currently syncing.
! */
! int64_t
! space_map_alloc_delta(space_map_t *sm)
! {
! if (sm == NULL)
! return (0);
! ASSERT(sm->sm_dbuf != NULL);
! return (sm->sm_phys->smp_alloc - space_map_allocated(sm));
}
--- 1053,1068 ----
space_map_object(space_map_t *sm)
{
return (sm != NULL ? sm->sm_object : 0);
}
! int64_t
space_map_allocated(space_map_t *sm)
{
! return (sm != NULL ? sm->sm_phys->smp_alloc : 0);
}
uint64_t
space_map_length(space_map_t *sm)
{
! return (sm != NULL ? sm->sm_phys->smp_length : 0);
}