Print this page
10592 misc. metaslab and vdev related ZoL bug fixes
Portions contributed by: Jerry Jelinek <jerry.jelinek@joyent.com>
Reviewed by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed by: Giuseppe Di Natale <guss80@gmail.com>
Reviewed by: George Melikov <mail@gmelikov.ru>
Reviewed by: Paul Dagnelie <pcd@delphix.com>
Reviewed by: Matt Ahrens <mahrens@delphix.com>
Reviewed by: Pavel Zakharov <pavel.zakharov@delphix.com>
Reviewed by: Tony Hutter <hutter2@llnl.gov>
Reviewed by: Kody Kantor <kody.kantor@joyent.com>
Approved by: Dan McDonald <danmcd@joyent.com>

*** 21,31 **** /* * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ /* ! * Copyright (c) 2012, 2017 by Delphix. All rights reserved. */ #include <sys/zfs_context.h> #include <sys/spa.h> #include <sys/dmu.h> --- 21,31 ---- /* * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ /* ! * Copyright (c) 2012, 2018 by Delphix. All rights reserved. */ #include <sys/zfs_context.h> #include <sys/spa.h> #include <sys/dmu.h>
*** 79,111 **** return (SM_PREFIX_DECODE(e) == SM2_PREFIX); } /* * Iterate through the space map, invoking the callback on each (non-debug) ! * space map entry. */ int ! space_map_iterate(space_map_t *sm, sm_cb_t callback, void *arg) { ! uint64_t sm_len = space_map_length(sm); ! ASSERT3U(sm->sm_blksz, !=, 0); ! dmu_prefetch(sm->sm_os, space_map_object(sm), 0, 0, sm_len, ZIO_PRIORITY_SYNC_READ); - uint64_t blksz = sm->sm_blksz; int error = 0; ! for (uint64_t block_base = 0; block_base < sm_len && error == 0; block_base += blksz) { dmu_buf_t *db; error = dmu_buf_hold(sm->sm_os, space_map_object(sm), block_base, FTAG, &db, DMU_READ_PREFETCH); if (error != 0) return (error); uint64_t *block_start = db->db_data; ! uint64_t block_length = MIN(sm_len - block_base, blksz); uint64_t *block_end = block_start + (block_length / sizeof (uint64_t)); VERIFY0(P2PHASE(block_length, sizeof (uint64_t))); VERIFY3U(block_length, !=, 0); --- 79,113 ---- return (SM_PREFIX_DECODE(e) == SM2_PREFIX); } /* * Iterate through the space map, invoking the callback on each (non-debug) ! * space map entry. Stop after reading 'end' bytes of the space map. */ int ! space_map_iterate(space_map_t *sm, uint64_t end, sm_cb_t callback, void *arg) { ! uint64_t blksz = sm->sm_blksz; ! ASSERT3U(blksz, !=, 0); ! ASSERT3U(end, <=, space_map_length(sm)); ! ASSERT0(P2PHASE(end, sizeof (uint64_t))); ! ! dmu_prefetch(sm->sm_os, space_map_object(sm), 0, 0, end, ZIO_PRIORITY_SYNC_READ); int error = 0; ! for (uint64_t block_base = 0; block_base < end && error == 0; block_base += blksz) { dmu_buf_t *db; error = dmu_buf_hold(sm->sm_os, space_map_object(sm), block_base, FTAG, &db, DMU_READ_PREFETCH); if (error != 0) return (error); uint64_t *block_start = db->db_data; ! uint64_t block_length = MIN(end - block_base, blksz); uint64_t *block_end = block_start + (block_length / sizeof (uint64_t)); VERIFY0(P2PHASE(block_length, sizeof (uint64_t))); VERIFY3U(block_length, !=, 0);
*** 184,194 **** * Find the offset of the last word in the space map and use * that to read the last block of the space map with * dmu_buf_hold(). */ uint64_t last_word_offset = ! sm->sm_phys->smp_objsize - sizeof (uint64_t); error = dmu_buf_hold(sm->sm_os, space_map_object(sm), last_word_offset, FTAG, &db, DMU_READ_NO_PREFETCH); if (error != 0) return (error); --- 186,196 ---- * Find the offset of the last word in the space map and use * that to read the last block of the space map with * dmu_buf_hold(). */ uint64_t last_word_offset = ! sm->sm_phys->smp_length - sizeof (uint64_t); error = dmu_buf_hold(sm->sm_os, space_map_object(sm), last_word_offset, FTAG, &db, DMU_READ_NO_PREFETCH); if (error != 0) return (error);
*** 197,207 **** ASSERT3U(bufsz, >=, db->db_size); ASSERT(nwords != NULL); uint64_t *words = db->db_data; *nwords = ! (sm->sm_phys->smp_objsize - db->db_offset) / sizeof (uint64_t); ASSERT3U(*nwords, <=, bufsz / sizeof (uint64_t)); uint64_t n = *nwords; uint64_t j = n - 1; --- 199,209 ---- ASSERT3U(bufsz, >=, db->db_size); ASSERT(nwords != NULL); uint64_t *words = db->db_data; *nwords = ! (sm->sm_phys->smp_length - db->db_offset) / sizeof (uint64_t); ASSERT3U(*nwords, <=, bufsz / sizeof (uint64_t)); uint64_t n = *nwords; uint64_t j = n - 1;
*** 296,307 **** for (uint64_t i = 0; i < nwords; i++) { uint64_t e = buf[i]; if (sm_entry_is_debug(e)) { ! sm->sm_phys->smp_objsize -= sizeof (uint64_t); ! space_map_update(sm); continue; } int words = 1; uint64_t raw_offset, raw_run, vdev_id; --- 298,308 ---- for (uint64_t i = 0; i < nwords; i++) { uint64_t e = buf[i]; if (sm_entry_is_debug(e)) { ! sm->sm_phys->smp_length -= sizeof (uint64_t); continue; } int words = 1; uint64_t raw_offset, raw_run, vdev_id;
*** 352,370 **** if (type == SM_ALLOC) sm->sm_phys->smp_alloc -= entry_run; else sm->sm_phys->smp_alloc += entry_run; ! sm->sm_phys->smp_objsize -= words * sizeof (uint64_t); ! space_map_update(sm); } } if (space_map_length(sm) == 0) { ASSERT0(error); ! ASSERT0(sm->sm_phys->smp_objsize); ! ASSERT0(sm->sm_alloc); } zio_buf_free(buf, bufsz); return (error); } --- 353,369 ---- if (type == SM_ALLOC) sm->sm_phys->smp_alloc -= entry_run; else sm->sm_phys->smp_alloc += entry_run; ! sm->sm_phys->smp_length -= words * sizeof (uint64_t); } } if (space_map_length(sm) == 0) { ASSERT0(error); ! ASSERT0(space_map_allocated(sm)); } zio_buf_free(buf, bufsz); return (error); }
*** 389,430 **** return (0); } /* ! * Load the space map disk into the specified range tree. Segments of maptype ! * are added to the range tree, other segment types are removed. */ int ! space_map_load(space_map_t *sm, range_tree_t *rt, maptype_t maptype) { - uint64_t space; - int err; space_map_load_arg_t smla; VERIFY0(range_tree_space(rt)); - space = space_map_allocated(sm); ! if (maptype == SM_FREE) { range_tree_add(rt, sm->sm_start, sm->sm_size); - space = sm->sm_size - space; - } smla.smla_rt = rt; smla.smla_sm = sm; smla.smla_type = maptype; ! err = space_map_iterate(sm, space_map_load_callback, &smla); ! if (err == 0) { ! VERIFY3U(range_tree_space(rt), ==, space); ! } else { range_tree_vacate(rt, NULL, NULL); - } return (err); } void space_map_histogram_clear(space_map_t *sm) { if (sm->sm_dbuf->db_size != sizeof (space_map_phys_t)) return; --- 388,433 ---- return (0); } /* ! * Load the spacemap into the rangetree, like space_map_load. But only ! * read the first 'length' bytes of the spacemap. */ int ! space_map_load_length(space_map_t *sm, range_tree_t *rt, maptype_t maptype, ! uint64_t length) { space_map_load_arg_t smla; VERIFY0(range_tree_space(rt)); ! if (maptype == SM_FREE) range_tree_add(rt, sm->sm_start, sm->sm_size); smla.smla_rt = rt; smla.smla_sm = sm; smla.smla_type = maptype; ! int err = space_map_iterate(sm, length, ! space_map_load_callback, &smla); ! if (err != 0) range_tree_vacate(rt, NULL, NULL); return (err); } + /* + * Load the space map disk into the specified range tree. Segments of maptype + * are added to the range tree, other segment types are removed. + */ + int + space_map_load(space_map_t *sm, range_tree_t *rt, maptype_t maptype) + { + return (space_map_load_length(sm, rt, maptype, space_map_length(sm))); + } + void space_map_histogram_clear(space_map_t *sm) { if (sm->sm_dbuf->db_size != sizeof (space_map_phys_t)) return;
*** 504,517 **** uint64_t dentry = SM_PREFIX_ENCODE(SM_DEBUG_PREFIX) | SM_DEBUG_ACTION_ENCODE(maptype) | SM_DEBUG_SYNCPASS_ENCODE(spa_sync_pass(tx->tx_pool->dp_spa)) | SM_DEBUG_TXG_ENCODE(dmu_tx_get_txg(tx)); ! dmu_write(sm->sm_os, space_map_object(sm), sm->sm_phys->smp_objsize, sizeof (dentry), &dentry, tx); ! sm->sm_phys->smp_objsize += sizeof (dentry); } /* * Writes one or more entries given a segment. * --- 507,520 ---- uint64_t dentry = SM_PREFIX_ENCODE(SM_DEBUG_PREFIX) | SM_DEBUG_ACTION_ENCODE(maptype) | SM_DEBUG_SYNCPASS_ENCODE(spa_sync_pass(tx->tx_pool->dp_spa)) | SM_DEBUG_TXG_ENCODE(dmu_tx_get_txg(tx)); ! dmu_write(sm->sm_os, space_map_object(sm), sm->sm_phys->smp_length, sizeof (dentry), &dentry, tx); ! sm->sm_phys->smp_length += sizeof (dentry); } /* * Writes one or more entries given a segment. *
*** 539,549 **** ASSERT3U(db->db_size, ==, sm->sm_blksz); uint64_t *block_base = db->db_data; uint64_t *block_end = block_base + (sm->sm_blksz / sizeof (uint64_t)); uint64_t *block_cursor = block_base + ! (sm->sm_phys->smp_objsize - db->db_offset) / sizeof (uint64_t); ASSERT3P(block_cursor, <=, block_end); uint64_t size = (rs->rs_end - rs->rs_start) >> sm->sm_shift; uint64_t start = (rs->rs_start - sm->sm_start) >> sm->sm_shift; --- 542,552 ---- ASSERT3U(db->db_size, ==, sm->sm_blksz); uint64_t *block_base = db->db_data; uint64_t *block_end = block_base + (sm->sm_blksz / sizeof (uint64_t)); uint64_t *block_cursor = block_base + ! (sm->sm_phys->smp_length - db->db_offset) / sizeof (uint64_t); ASSERT3P(block_cursor, <=, block_end); uint64_t size = (rs->rs_end - rs->rs_start) >> sm->sm_shift; uint64_t start = (rs->rs_start - sm->sm_start) >> sm->sm_shift;
*** 562,572 **** * writing again from the beginning. */ if (block_cursor == block_end) { dmu_buf_rele(db, tag); ! uint64_t next_word_offset = sm->sm_phys->smp_objsize; VERIFY0(dmu_buf_hold(sm->sm_os, space_map_object(sm), next_word_offset, tag, &db, DMU_READ_PREFETCH)); dmu_buf_will_dirty(db, tx); --- 565,575 ---- * writing again from the beginning. */ if (block_cursor == block_end) { dmu_buf_rele(db, tag); ! uint64_t next_word_offset = sm->sm_phys->smp_length; VERIFY0(dmu_buf_hold(sm->sm_os, space_map_object(sm), next_word_offset, tag, &db, DMU_READ_PREFETCH)); dmu_buf_will_dirty(db, tx);
*** 592,602 **** *block_cursor = SM_PREFIX_ENCODE(SM_DEBUG_PREFIX) | SM_DEBUG_ACTION_ENCODE(0) | SM_DEBUG_SYNCPASS_ENCODE(0) | SM_DEBUG_TXG_ENCODE(0); block_cursor++; ! sm->sm_phys->smp_objsize += sizeof (uint64_t); ASSERT3P(block_cursor, ==, block_end); continue; } uint64_t run_len = MIN(size, run_max); --- 595,605 ---- *block_cursor = SM_PREFIX_ENCODE(SM_DEBUG_PREFIX) | SM_DEBUG_ACTION_ENCODE(0) | SM_DEBUG_SYNCPASS_ENCODE(0) | SM_DEBUG_TXG_ENCODE(0); block_cursor++; ! sm->sm_phys->smp_length += sizeof (uint64_t); ASSERT3P(block_cursor, ==, block_end); continue; } uint64_t run_len = MIN(size, run_max);
*** 623,633 **** default: panic("%d-word space map entries are not supported", words); break; } ! sm->sm_phys->smp_objsize += words * sizeof (uint64_t); start += run_len; size -= run_len; } ASSERT0(size); --- 626,636 ---- default: panic("%d-word space map entries are not supported", words); break; } ! sm->sm_phys->smp_length += words * sizeof (uint64_t); start += run_len; size -= run_len; } ASSERT0(size);
*** 650,660 **** #ifdef DEBUG /* * We do this right after we write the intro debug entry * because the estimate does not take it into account. */ ! uint64_t initial_objsize = sm->sm_phys->smp_objsize; uint64_t estimated_growth = space_map_estimate_optimal_size(sm, rt, SM_NO_VDEVID); uint64_t estimated_final_objsize = initial_objsize + estimated_growth; #endif --- 653,663 ---- #ifdef DEBUG /* * We do this right after we write the intro debug entry * because the estimate does not take it into account. */ ! uint64_t initial_objsize = sm->sm_phys->smp_length; uint64_t estimated_growth = space_map_estimate_optimal_size(sm, rt, SM_NO_VDEVID); uint64_t estimated_final_objsize = initial_objsize + estimated_growth; #endif
*** 661,671 **** /* * Find the offset right after the last word in the space map * and use that to get a hold of the last block, so we can * start appending to it. */ ! uint64_t next_word_offset = sm->sm_phys->smp_objsize; VERIFY0(dmu_buf_hold(sm->sm_os, space_map_object(sm), next_word_offset, FTAG, &db, DMU_READ_PREFETCH)); ASSERT3U(db->db_size, ==, sm->sm_blksz); dmu_buf_will_dirty(db, tx); --- 664,674 ---- /* * Find the offset right after the last word in the space map * and use that to get a hold of the last block, so we can * start appending to it. */ ! uint64_t next_word_offset = sm->sm_phys->smp_length; VERIFY0(dmu_buf_hold(sm->sm_os, space_map_object(sm), next_word_offset, FTAG, &db, DMU_READ_PREFETCH)); ASSERT3U(db->db_size, ==, sm->sm_blksz); dmu_buf_will_dirty(db, tx);
*** 709,719 **** * We expect our estimation to be based on the worst case * scenario [see comment in space_map_estimate_optimal_size()]. * Therefore we expect the actual objsize to be equal or less * than whatever we estimated it to be. */ ! ASSERT3U(estimated_final_objsize, >=, sm->sm_phys->smp_objsize); #endif } /* * Note: This function manipulates the state of the given space map but --- 712,722 ---- * We expect our estimation to be based on the worst case * scenario [see comment in space_map_estimate_optimal_size()]. * Therefore we expect the actual objsize to be equal or less * than whatever we estimated it to be. */ ! ASSERT3U(estimated_final_objsize, >=, sm->sm_phys->smp_length); #endif } /* * Note: This function manipulates the state of the given space map but
*** 865,891 **** bzero(sm->sm_phys->smp_histogram, sizeof (sm->sm_phys->smp_histogram)); } dmu_buf_will_dirty(sm->sm_dbuf, tx); ! sm->sm_phys->smp_objsize = 0; sm->sm_phys->smp_alloc = 0; } - /* - * Update the in-core space_map allocation and length values. - */ - void - space_map_update(space_map_t *sm) - { - if (sm == NULL) - return; - - sm->sm_alloc = sm->sm_phys->smp_alloc; - sm->sm_length = sm->sm_phys->smp_objsize; - } - uint64_t space_map_alloc(objset_t *os, int blocksize, dmu_tx_t *tx) { spa_t *spa = dmu_objset_spa(os); uint64_t object; --- 868,881 ---- bzero(sm->sm_phys->smp_histogram, sizeof (sm->sm_phys->smp_histogram)); } dmu_buf_will_dirty(sm->sm_dbuf, tx); ! sm->sm_phys->smp_length = 0; sm->sm_phys->smp_alloc = 0; } uint64_t space_map_alloc(objset_t *os, int blocksize, dmu_tx_t *tx) { spa_t *spa = dmu_objset_spa(os); uint64_t object;
*** 1063,1096 **** space_map_object(space_map_t *sm) { return (sm != NULL ? sm->sm_object : 0); } ! /* ! * Returns the already synced, on-disk allocated space. ! */ ! uint64_t space_map_allocated(space_map_t *sm) { ! return (sm != NULL ? sm->sm_alloc : 0); } - /* - * Returns the already synced, on-disk length; - */ uint64_t space_map_length(space_map_t *sm) { ! return (sm != NULL ? sm->sm_length : 0); ! } ! ! /* ! * Returns the allocated space that is currently syncing. ! */ ! int64_t ! space_map_alloc_delta(space_map_t *sm) ! { ! if (sm == NULL) ! return (0); ! ASSERT(sm->sm_dbuf != NULL); ! return (sm->sm_phys->smp_alloc - space_map_allocated(sm)); } --- 1053,1068 ---- space_map_object(space_map_t *sm) { return (sm != NULL ? sm->sm_object : 0); } ! int64_t space_map_allocated(space_map_t *sm) { ! return (sm != NULL ? sm->sm_phys->smp_alloc : 0); } uint64_t space_map_length(space_map_t *sm) { ! return (sm != NULL ? sm->sm_phys->smp_length : 0); }