768 if (expected_refcount != actual_refcount) {
769 (void) printf("space map refcount mismatch: expected %lld != "
770 "actual %lld\n",
771 (longlong_t)expected_refcount,
772 (longlong_t)actual_refcount);
773 return (2);
774 }
775 return (0);
776 }
777
778 static void
779 dump_spacemap(objset_t *os, space_map_t *sm)
780 {
781 char *ddata[] = { "ALLOC", "FREE", "CONDENSE", "INVALID",
782 "INVALID", "INVALID", "INVALID", "INVALID" };
783
784 if (sm == NULL)
785 return;
786
787 (void) printf("space map object %llu:\n",
788 (longlong_t)sm->sm_phys->smp_object);
789 (void) printf(" smp_objsize = 0x%llx\n",
790 (longlong_t)sm->sm_phys->smp_objsize);
791 (void) printf(" smp_alloc = 0x%llx\n",
792 (longlong_t)sm->sm_phys->smp_alloc);
793
794 /*
795 * Print out the freelist entries in both encoded and decoded form.
796 */
797 uint8_t mapshift = sm->sm_shift;
798 int64_t alloc = 0;
799 uint64_t word;
800 for (uint64_t offset = 0; offset < space_map_length(sm);
801 offset += sizeof (word)) {
802
803 VERIFY0(dmu_read(os, space_map_object(sm), offset,
804 sizeof (word), &word, DMU_READ_PREFETCH));
805
806 if (sm_entry_is_debug(word)) {
807 (void) printf("\t [%6llu] %s: txg %llu, pass %llu\n",
808 (u_longlong_t)(offset / sizeof (word)),
809 ddata[SM_DEBUG_ACTION_DECODE(word)],
810 (u_longlong_t)SM_DEBUG_TXG_DECODE(word),
811 (u_longlong_t)SM_DEBUG_SYNCPASS_DECODE(word));
812 continue;
813 }
814
815 uint8_t words;
816 char entry_type;
817 uint64_t entry_off, entry_run, entry_vdev = SM_NO_VDEVID;
818
819 if (sm_entry_is_single_word(word)) {
820 entry_type = (SM_TYPE_DECODE(word) == SM_ALLOC) ?
821 'A' : 'F';
822 entry_off = (SM_OFFSET_DECODE(word) << mapshift) +
823 sm->sm_start;
824 entry_run = SM_RUN_DECODE(word) << mapshift;
825 words = 1;
826 } else {
827 /* it is a two-word entry so we read another word */
828 ASSERT(sm_entry_is_double_word(word));
829
830 uint64_t extra_word;
831 offset += sizeof (extra_word);
832 VERIFY0(dmu_read(os, space_map_object(sm), offset,
833 sizeof (extra_word), &extra_word,
834 DMU_READ_PREFETCH));
835
836 ASSERT3U(offset, <=, space_map_length(sm));
837
838 entry_run = SM2_RUN_DECODE(word) << mapshift;
839 entry_vdev = SM2_VDEV_DECODE(word);
840 entry_type = (SM2_TYPE_DECODE(extra_word) == SM_ALLOC) ?
841 'A' : 'F';
842 entry_off = (SM2_OFFSET_DECODE(extra_word) <<
843 mapshift) + sm->sm_start;
844 words = 2;
845 }
846
847 (void) printf("\t [%6llu] %c range:"
848 " %010llx-%010llx size: %06llx vdev: %06llu words: %u\n",
849 (u_longlong_t)(offset / sizeof (word)),
850 entry_type, (u_longlong_t)entry_off,
851 (u_longlong_t)(entry_off + entry_run),
852 (u_longlong_t)entry_run,
853 (u_longlong_t)entry_vdev, words);
854
855 if (entry_type == 'A')
856 alloc += entry_run;
857 else
858 alloc -= entry_run;
859 }
860 if ((uint64_t)alloc != space_map_allocated(sm)) {
861 (void) printf("space_map_object alloc (%lld) INCONSISTENT "
862 "with space map summary (%lld)\n",
863 (longlong_t)space_map_allocated(sm), (longlong_t)alloc);
864 }
865 }
866
867 static void
868 dump_metaslab_stats(metaslab_t *msp)
869 {
870 char maxbuf[32];
871 range_tree_t *rt = msp->ms_allocatable;
872 avl_tree_t *t = &msp->ms_allocatable_by_size;
873 int free_pct = range_tree_space(rt) * 100 / msp->ms_size;
874
875 /* max sure nicenum has enough space */
876 CTASSERT(sizeof (maxbuf) >= NN_NUMBUF_SZ);
877
878 zdb_nicenum(metaslab_block_maxsize(msp), maxbuf, sizeof (maxbuf));
879
880 (void) printf("\t %25s %10lu %7s %6s %4s %4d%%\n",
904 mutex_enter(&msp->ms_lock);
905 VERIFY0(metaslab_load(msp));
906 range_tree_stat_verify(msp->ms_allocatable);
907 dump_metaslab_stats(msp);
908 metaslab_unload(msp);
909 mutex_exit(&msp->ms_lock);
910 }
911
912 if (dump_opt['m'] > 1 && sm != NULL &&
913 spa_feature_is_active(spa, SPA_FEATURE_SPACEMAP_HISTOGRAM)) {
914 /*
915 * The space map histogram represents free space in chunks
916 * of sm_shift (i.e. bucket 0 refers to 2^sm_shift).
917 */
918 (void) printf("\tOn-disk histogram:\t\tfragmentation %llu\n",
919 (u_longlong_t)msp->ms_fragmentation);
920 dump_histogram(sm->sm_phys->smp_histogram,
921 SPACE_MAP_HISTOGRAM_SIZE, sm->sm_shift);
922 }
923
924 if (dump_opt['d'] > 5 || dump_opt['m'] > 3) {
925 ASSERT(msp->ms_size == (1ULL << vd->vdev_ms_shift));
926
927 dump_spacemap(spa->spa_meta_objset, msp->ms_sm);
928 }
929 }
930
931 static void
932 print_vdev_metaslab_header(vdev_t *vd)
933 {
934 vdev_alloc_bias_t alloc_bias = vd->vdev_alloc_bias;
935 const char *bias_str;
936
937 bias_str = (alloc_bias == VDEV_BIAS_LOG || vd->vdev_islog) ?
938 VDEV_ALLOC_BIAS_LOG :
939 (alloc_bias == VDEV_BIAS_SPECIAL) ? VDEV_ALLOC_BIAS_SPECIAL :
940 (alloc_bias == VDEV_BIAS_DEDUP) ? VDEV_ALLOC_BIAS_DEDUP :
941 vd->vdev_islog ? "log" : "";
942
943 (void) printf("\tvdev %10llu %s\n"
944 "\t%-10s%5llu %-19s %-15s %-12s\n",
945 (u_longlong_t)vd->vdev_id, bias_str,
946 "metaslabs", (u_longlong_t)vd->vdev_ms_count,
947 "offset", "spacemap", "free");
948 (void) printf("\t%15s %19s %15s %12s\n",
3081 static void
3082 zdb_leak(void *arg, uint64_t start, uint64_t size)
3083 {
3084 vdev_t *vd = arg;
3085
3086 (void) printf("leaked space: vdev %llu, offset 0x%llx, size %llu\n",
3087 (u_longlong_t)vd->vdev_id, (u_longlong_t)start, (u_longlong_t)size);
3088 }
3089
3090 static metaslab_ops_t zdb_metaslab_ops = {
3091 NULL /* alloc */
3092 };
3093
3094 static void
3095 zdb_ddt_leak_init(spa_t *spa, zdb_cb_t *zcb)
3096 {
3097 ddt_bookmark_t ddb;
3098 ddt_entry_t dde;
3099 int error;
3100
3101 bzero(&ddb, sizeof (ddb));
3102 while ((error = ddt_walk(spa, &ddb, &dde)) == 0) {
3103 blkptr_t blk;
3104 ddt_phys_t *ddp = dde.dde_phys;
3105
3106 if (ddb.ddb_class == DDT_CLASS_UNIQUE)
3107 return;
3108
3109 ASSERT(ddt_phys_total_refcnt(&dde) > 1);
3110
3111 for (int p = 0; p < DDT_PHYS_TYPES; p++, ddp++) {
3112 if (ddp->ddp_phys_birth == 0)
3113 continue;
3114 ddt_bp_create(ddb.ddb_checksum,
3115 &dde.dde_key, ddp, &blk);
3116 if (p == DDT_PHYS_DITTO) {
3117 zdb_count_block(zcb, NULL, &blk, ZDB_OT_DITTO);
3118 } else {
3119 zcb->zcb_dedup_asize +=
3120 BP_GET_ASIZE(&blk) * (ddp->ddp_refcnt - 1);
3121 zcb->zcb_dedup_blocks++;
3122 }
3123 }
3124 if (!dump_opt['L']) {
3125 ddt_t *ddt = spa->spa_ddt[ddb.ddb_checksum];
3126 ddt_enter(ddt);
3127 VERIFY(ddt_lookup(ddt, &blk, B_TRUE) != NULL);
3128 ddt_exit(ddt);
3129 }
3130 }
3131
3132 ASSERT(error == ENOENT);
3133 }
3134
3135 /* ARGSUSED */
3136 static void
3137 claim_segment_impl_cb(uint64_t inner_offset, vdev_t *vd, uint64_t offset,
3138 uint64_t size, void *arg)
3139 {
3140 /*
3141 * This callback was called through a remap from
3142 * a device being removed. Therefore, the vdev that
3143 * this callback is applied to is a concrete
3144 * vdev.
3145 */
3146 ASSERT(vdev_is_concrete(vd));
3147
3148 VERIFY0(metaslab_claim_impl(vd, offset, size,
3149 spa_min_claim_txg(vd->vdev_spa)));
3150 }
3151
3152 static void
3153 claim_segment_cb(void *arg, uint64_t offset, uint64_t size)
3154 {
3155 vdev_t *vd = arg;
3156
3157 vdev_indirect_ops.vdev_op_remap(vd, offset, size,
3158 claim_segment_impl_cb, NULL);
3159 }
3160
3161 /*
3162 * After accounting for all allocated blocks that are directly referenced,
3163 * we might have missed a reference to a block from a partially complete
3164 * (and thus unused) indirect mapping object. We perform a secondary pass
3165 * through the metaslabs we have already mapped and claim the destination
3166 * blocks.
3167 */
3168 static void
3169 zdb_claim_removing(spa_t *spa, zdb_cb_t *zcb)
3170 {
3171 if (spa->spa_vdev_removal == NULL)
3172 return;
3173
3174 spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER);
3175
3176 spa_vdev_removal_t *svr = spa->spa_vdev_removal;
3177 vdev_t *vd = vdev_lookup_top(spa, svr->svr_vdev_id);
3178 vdev_indirect_mapping_t *vim = vd->vdev_indirect_mapping;
3179
3180 for (uint64_t msi = 0; msi < vd->vdev_ms_count; msi++) {
3181 metaslab_t *msp = vd->vdev_ms[msi];
3182
3183 if (msp->ms_start >= vdev_indirect_mapping_max_offset(vim))
3184 break;
3185
3186 ASSERT0(range_tree_space(svr->svr_allocd_segs));
3187
3188 if (msp->ms_sm != NULL) {
3189 VERIFY0(space_map_load(msp->ms_sm,
3190 svr->svr_allocd_segs, SM_ALLOC));
3242 static uint32_t *
3243 zdb_load_obsolete_counts(vdev_t *vd)
3244 {
3245 vdev_indirect_mapping_t *vim = vd->vdev_indirect_mapping;
3246 spa_t *spa = vd->vdev_spa;
3247 spa_condensing_indirect_phys_t *scip =
3248 &spa->spa_condensing_indirect_phys;
3249 uint32_t *counts;
3250
3251 EQUIV(vdev_obsolete_sm_object(vd) != 0, vd->vdev_obsolete_sm != NULL);
3252 counts = vdev_indirect_mapping_load_obsolete_counts(vim);
3253 if (vd->vdev_obsolete_sm != NULL) {
3254 vdev_indirect_mapping_load_obsolete_spacemap(vim, counts,
3255 vd->vdev_obsolete_sm);
3256 }
3257 if (scip->scip_vdev == vd->vdev_id &&
3258 scip->scip_prev_obsolete_sm_object != 0) {
3259 space_map_t *prev_obsolete_sm = NULL;
3260 VERIFY0(space_map_open(&prev_obsolete_sm, spa->spa_meta_objset,
3261 scip->scip_prev_obsolete_sm_object, 0, vd->vdev_asize, 0));
3262 space_map_update(prev_obsolete_sm);
3263 vdev_indirect_mapping_load_obsolete_spacemap(vim, counts,
3264 prev_obsolete_sm);
3265 space_map_close(prev_obsolete_sm);
3266 }
3267 return (counts);
3268 }
3269
3270 typedef struct checkpoint_sm_exclude_entry_arg {
3271 vdev_t *cseea_vd;
3272 uint64_t cseea_checkpoint_size;
3273 } checkpoint_sm_exclude_entry_arg_t;
3274
3275 static int
3276 checkpoint_sm_exclude_entry_cb(space_map_entry_t *sme, void *arg)
3277 {
3278 checkpoint_sm_exclude_entry_arg_t *cseea = arg;
3279 vdev_t *vd = cseea->cseea_vd;
3280 metaslab_t *ms = vd->vdev_ms[sme->sme_offset >> vd->vdev_ms_shift];
3281 uint64_t end = sme->sme_offset + sme->sme_run;
3282
3336 * 2] There is a checkpoint, but no checkpointed blocks
3337 * have been freed yet
3338 * 3] The current vdev is indirect
3339 *
3340 * In these cases we return immediately.
3341 */
3342 if (zap_contains(spa_meta_objset(spa), vd->vdev_top_zap,
3343 VDEV_TOP_ZAP_POOL_CHECKPOINT_SM) != 0)
3344 return;
3345
3346 VERIFY0(zap_lookup(spa_meta_objset(spa), vd->vdev_top_zap,
3347 VDEV_TOP_ZAP_POOL_CHECKPOINT_SM, sizeof (uint64_t), 1,
3348 &checkpoint_sm_obj));
3349
3350 checkpoint_sm_exclude_entry_arg_t cseea;
3351 cseea.cseea_vd = vd;
3352 cseea.cseea_checkpoint_size = 0;
3353
3354 VERIFY0(space_map_open(&checkpoint_sm, spa_meta_objset(spa),
3355 checkpoint_sm_obj, 0, vd->vdev_asize, vd->vdev_ashift));
3356 space_map_update(checkpoint_sm);
3357
3358 VERIFY0(space_map_iterate(checkpoint_sm,
3359 checkpoint_sm_exclude_entry_cb, &cseea));
3360 space_map_close(checkpoint_sm);
3361
3362 zcb->zcb_checkpoint_size += cseea.cseea_checkpoint_size;
3363 }
3364
3365 static void
3366 zdb_leak_init_exclude_checkpoint(spa_t *spa, zdb_cb_t *zcb)
3367 {
3368 vdev_t *rvd = spa->spa_root_vdev;
3369 for (uint64_t c = 0; c < rvd->vdev_children; c++) {
3370 ASSERT3U(c, ==, rvd->vdev_child[c]->vdev_id);
3371 zdb_leak_init_vdev_exclude_checkpoint(rvd->vdev_child[c], zcb);
3372 }
3373 }
3374
3375 static void
3376 load_concrete_ms_allocatable_trees(spa_t *spa, maptype_t maptype)
3377 {
3378 vdev_t *rvd = spa->spa_root_vdev;
3379 for (uint64_t i = 0; i < rvd->vdev_children; i++) {
3380 vdev_t *vd = rvd->vdev_child[i];
3381
3382 ASSERT3U(i, ==, vd->vdev_id);
3383
3384 if (vd->vdev_ops == &vdev_indirect_ops)
3385 continue;
3386
3387 for (uint64_t m = 0; m < vd->vdev_ms_count; m++) {
3444 if (ent_offset >= msp->ms_start + msp->ms_size)
3445 break;
3446
3447 /*
3448 * Mappings do not cross metaslab boundaries,
3449 * because we create them by walking the metaslabs.
3450 */
3451 ASSERT3U(ent_offset + ent_len, <=,
3452 msp->ms_start + msp->ms_size);
3453 range_tree_add(msp->ms_allocatable, ent_offset, ent_len);
3454 }
3455
3456 if (!msp->ms_loaded)
3457 msp->ms_loaded = B_TRUE;
3458 mutex_exit(&msp->ms_lock);
3459 }
3460
3461 static void
3462 zdb_leak_init_prepare_indirect_vdevs(spa_t *spa, zdb_cb_t *zcb)
3463 {
3464 vdev_t *rvd = spa->spa_root_vdev;
3465 for (uint64_t c = 0; c < rvd->vdev_children; c++) {
3466 vdev_t *vd = rvd->vdev_child[c];
3467
3468 ASSERT3U(c, ==, vd->vdev_id);
3469
3470 if (vd->vdev_ops != &vdev_indirect_ops)
3471 continue;
3472
3473 /*
3474 * Note: we don't check for mapping leaks on
3475 * removing vdevs because their ms_allocatable's
3476 * are used to look for leaks in allocated space.
3477 */
3478 zcb->zcb_vd_obsolete_counts[c] = zdb_load_obsolete_counts(vd);
3479
3480 /*
3481 * Normally, indirect vdevs don't have any
3482 * metaslabs. We want to set them up for
3483 * zio_claim().
3490
3491 (void) fprintf(stderr,
3492 "\rloading indirect vdev %llu, "
3493 "metaslab %llu of %llu ...",
3494 (longlong_t)vd->vdev_id,
3495 (longlong_t)vd->vdev_ms[m]->ms_id,
3496 (longlong_t)vd->vdev_ms_count);
3497
3498 load_indirect_ms_allocatable_tree(vd, vd->vdev_ms[m],
3499 &vim_idx);
3500 }
3501 ASSERT3U(vim_idx, ==, vdev_indirect_mapping_num_entries(vim));
3502 }
3503 }
3504
3505 static void
3506 zdb_leak_init(spa_t *spa, zdb_cb_t *zcb)
3507 {
3508 zcb->zcb_spa = spa;
3509
3510 if (!dump_opt['L']) {
3511 dsl_pool_t *dp = spa->spa_dsl_pool;
3512 vdev_t *rvd = spa->spa_root_vdev;
3513
3514 /*
3515 * We are going to be changing the meaning of the metaslab's
3516 * ms_allocatable. Ensure that the allocator doesn't try to
3517 * use the tree.
3518 */
3519 spa->spa_normal_class->mc_ops = &zdb_metaslab_ops;
3520 spa->spa_log_class->mc_ops = &zdb_metaslab_ops;
3521
3522 zcb->zcb_vd_obsolete_counts =
3523 umem_zalloc(rvd->vdev_children * sizeof (uint32_t *),
3524 UMEM_NOFAIL);
3525
3526 /*
3527 * For leak detection, we overload the ms_allocatable trees
3528 * to contain allocated segments instead of free segments.
3529 * As a result, we can't use the normal metaslab_load/unload
3530 * interfaces.
3535 /*
3536 * On load_concrete_ms_allocatable_trees() we loaded all the
3537 * allocated entries from the ms_sm to the ms_allocatable for
3538 * each metaslab. If the pool has a checkpoint or is in the
3539 * middle of discarding a checkpoint, some of these blocks
3540 * may have been freed but their ms_sm may not have been
3541 * updated because they are referenced by the checkpoint. In
3542 * order to avoid false-positives during leak-detection, we
3543 * go through the vdev's checkpoint space map and exclude all
3544 * its entries from their relevant ms_allocatable.
3545 *
3546 * We also aggregate the space held by the checkpoint and add
3547 * it to zcb_checkpoint_size.
3548 *
3549 * Note that at this point we are also verifying that all the
3550 * entries on the checkpoint_sm are marked as allocated in
3551 * the ms_sm of their relevant metaslab.
3552 * [see comment in checkpoint_sm_exclude_entry_cb()]
3553 */
3554 zdb_leak_init_exclude_checkpoint(spa, zcb);
3555
3556 /* for cleaner progress output */
3557 (void) fprintf(stderr, "\n");
3558
3559 if (bpobj_is_open(&dp->dp_obsolete_bpobj)) {
3560 ASSERT(spa_feature_is_enabled(spa,
3561 SPA_FEATURE_DEVICE_REMOVAL));
3562 (void) bpobj_iterate_nofree(&dp->dp_obsolete_bpobj,
3563 increment_indirect_mapping_cb, zcb, NULL);
3564 }
3565 } else {
3566 /*
3567 * If leak tracing is disabled, we still need to consider
3568 * any checkpointed space in our space verification.
3569 */
3570 zcb->zcb_checkpoint_size += spa_get_checkpoint_space(spa);
3571 }
3572
3573 spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER);
3574 zdb_ddt_leak_init(spa, zcb);
3575 spa_config_exit(spa, SCL_CONFIG, FTAG);
3576 }
3577
3578 static boolean_t
3579 zdb_check_for_obsolete_leaks(vdev_t *vd, zdb_cb_t *zcb)
3580 {
3581 boolean_t leaks = B_FALSE;
3582 vdev_indirect_mapping_t *vim = vd->vdev_indirect_mapping;
3583 uint64_t total_leaked = 0;
3584
3585 ASSERT(vim != NULL);
3586
3587 for (uint64_t i = 0; i < vdev_indirect_mapping_num_entries(vim); i++) {
3588 vdev_indirect_mapping_entry_phys_t *vimep =
3589 &vim->vim_entries[i];
3590 uint64_t obsolete_bytes = 0;
3591 uint64_t offset = DVA_MAPPING_GET_SRC_OFFSET(vimep);
3631 (u_longlong_t)vd->vdev_id, pct_leaked,
3632 (u_longlong_t)total_leaked);
3633 } else if (total_leaked > 0) {
3634 (void) printf("obsolete indirect mapping count mismatch "
3635 "for vdev %llu -- %llx total bytes mismatched\n",
3636 (u_longlong_t)vd->vdev_id,
3637 (u_longlong_t)total_leaked);
3638 leaks |= B_TRUE;
3639 }
3640
3641 vdev_indirect_mapping_free_obsolete_counts(vim,
3642 zcb->zcb_vd_obsolete_counts[vd->vdev_id]);
3643 zcb->zcb_vd_obsolete_counts[vd->vdev_id] = NULL;
3644
3645 return (leaks);
3646 }
3647
3648 static boolean_t
3649 zdb_leak_fini(spa_t *spa, zdb_cb_t *zcb)
3650 {
3651 boolean_t leaks = B_FALSE;
3652 if (!dump_opt['L']) {
3653 vdev_t *rvd = spa->spa_root_vdev;
3654 for (unsigned c = 0; c < rvd->vdev_children; c++) {
3655 vdev_t *vd = rvd->vdev_child[c];
3656 metaslab_group_t *mg = vd->vdev_mg;
3657
3658 if (zcb->zcb_vd_obsolete_counts[c] != NULL) {
3659 leaks |= zdb_check_for_obsolete_leaks(vd, zcb);
3660 }
3661
3662 for (uint64_t m = 0; m < vd->vdev_ms_count; m++) {
3663 metaslab_t *msp = vd->vdev_ms[m];
3664 ASSERT3P(mg, ==, msp->ms_group);
3665
3666 /*
3667 * ms_allocatable has been overloaded
3668 * to contain allocated segments. Now that
3669 * we finished traversing all blocks, any
3670 * block that remains in the ms_allocatable
3671 * represents an allocated block that we
3672 * did not claim during the traversal.
3673 * Claimed blocks would have been removed
3674 * from the ms_allocatable. For indirect
3675 * vdevs, space remaining in the tree
3676 * represents parts of the mapping that are
3677 * not referenced, which is not a bug.
3678 */
3679 if (vd->vdev_ops == &vdev_indirect_ops) {
3680 range_tree_vacate(msp->ms_allocatable,
3681 NULL, NULL);
3682 } else {
3683 range_tree_vacate(msp->ms_allocatable,
3684 zdb_leak, vd);
3685 }
3686
3687 if (msp->ms_loaded) {
3688 msp->ms_loaded = B_FALSE;
3689 }
3690 }
3691 }
3692
3693 umem_free(zcb->zcb_vd_obsolete_counts,
3694 rvd->vdev_children * sizeof (uint32_t *));
3695 zcb->zcb_vd_obsolete_counts = NULL;
3696 }
3697 return (leaks);
3698 }
3699
3700 /* ARGSUSED */
3701 static int
3702 count_block_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx)
3703 {
3704 zdb_cb_t *zcb = arg;
3705
3706 if (dump_opt['b'] >= 5) {
3707 char blkbuf[BP_SPRINTF_LEN];
3708 snprintf_blkptr(blkbuf, sizeof (blkbuf), bp);
3709 (void) printf("[%s] %s\n",
3710 "deferred free", blkbuf);
3711 }
3712 zdb_count_block(zcb, NULL, bp, ZDB_OT_DEFERRED);
3713 return (0);
3714 }
3715
3716 static int
3717 dump_block_stats(spa_t *spa)
3718 {
3719 zdb_cb_t zcb;
3720 zdb_blkstats_t *zb, *tzb;
3721 uint64_t norm_alloc, norm_space, total_alloc, total_found;
3722 int flags = TRAVERSE_PRE | TRAVERSE_PREFETCH_METADATA | TRAVERSE_HARD;
3723 boolean_t leaks = B_FALSE;
3724 int err;
3725
3726 bzero(&zcb, sizeof (zcb));
3727 (void) printf("\nTraversing all blocks %s%s%s%s%s...\n\n",
3728 (dump_opt['c'] || !dump_opt['L']) ? "to verify " : "",
3729 (dump_opt['c'] == 1) ? "metadata " : "",
3730 dump_opt['c'] ? "checksums " : "",
3731 (dump_opt['c'] && !dump_opt['L']) ? "and verify " : "",
3732 !dump_opt['L'] ? "nothing leaked " : "");
3733
3734 /*
3735 * Load all space maps as SM_ALLOC maps, then traverse the pool
3736 * claiming each block we discover. If the pool is perfectly
3737 * consistent, the space maps will be empty when we're done.
3738 * Anything left over is a leak; any block we can't claim (because
3739 * it's not part of any space map) is a double allocation,
3740 * reference to a freed block, or an unclaimed log block.
3741 */
3742 zdb_leak_init(spa, &zcb);
3743
3744 /*
3745 * If there's a deferred-free bplist, process that first.
3746 */
3747 (void) bpobj_iterate_nofree(&spa->spa_deferred_bpobj,
3748 count_block_cb, &zcb, NULL);
3749
3750 if (spa_version(spa) >= SPA_VERSION_DEADLISTS) {
3751 (void) bpobj_iterate_nofree(&spa->spa_dsl_pool->dp_free_bpobj,
3752 count_block_cb, &zcb, NULL);
3753 }
3754
3755 zdb_claim_removing(spa, &zcb);
3756
3757 if (spa_feature_is_active(spa, SPA_FEATURE_ASYNC_DESTROY)) {
3758 VERIFY3U(0, ==, bptree_iterate(spa->spa_meta_objset,
3759 spa->spa_dsl_pool->dp_bptree_obj, B_FALSE, count_block_cb,
3760 &zcb, NULL));
3761 }
3800 }
3801 }
3802
3803 /*
3804 * Report any leaked segments.
3805 */
3806 leaks |= zdb_leak_fini(spa, &zcb);
3807
3808 tzb = &zcb.zcb_type[ZB_TOTAL][ZDB_OT_TOTAL];
3809
3810 norm_alloc = metaslab_class_get_alloc(spa_normal_class(spa));
3811 norm_space = metaslab_class_get_space(spa_normal_class(spa));
3812
3813 total_alloc = norm_alloc +
3814 metaslab_class_get_alloc(spa_log_class(spa)) +
3815 metaslab_class_get_alloc(spa_special_class(spa)) +
3816 metaslab_class_get_alloc(spa_dedup_class(spa));
3817 total_found = tzb->zb_asize - zcb.zcb_dedup_asize +
3818 zcb.zcb_removing_size + zcb.zcb_checkpoint_size;
3819
3820 if (total_found == total_alloc) {
3821 if (!dump_opt['L'])
3822 (void) printf("\n\tNo leaks (block sum matches space"
3823 " maps exactly)\n");
3824 } else {
3825 (void) printf("block traversal size %llu != alloc %llu "
3826 "(%s %lld)\n",
3827 (u_longlong_t)total_found,
3828 (u_longlong_t)total_alloc,
3829 (dump_opt['L']) ? "unreachable" : "leaked",
3830 (longlong_t)(total_alloc - total_found));
3831 leaks = B_TRUE;
3832 }
3833
3834 if (tzb->zb_count == 0)
3835 return (2);
3836
3837 (void) printf("\n");
3838 (void) printf("\t%-16s %14llu\n", "bp count:",
3839 (u_longlong_t)tzb->zb_count);
3840 (void) printf("\t%-16s %14llu\n", "ganged count:",
3841 (longlong_t)tzb->zb_gangs);
3842 (void) printf("\t%-16s %14llu avg: %6llu\n", "bp logical:",
3843 (u_longlong_t)tzb->zb_lsize,
3844 (u_longlong_t)(tzb->zb_lsize / tzb->zb_count));
4144 int ret = 0;
4145
4146 spa_condensing_indirect_phys_t *scip =
4147 &spa->spa_condensing_indirect_phys;
4148 if (scip->scip_next_mapping_object != 0) {
4149 vdev_t *vd = spa->spa_root_vdev->vdev_child[scip->scip_vdev];
4150 ASSERT(scip->scip_prev_obsolete_sm_object != 0);
4151 ASSERT3P(vd->vdev_ops, ==, &vdev_indirect_ops);
4152
4153 (void) printf("Condensing indirect vdev %llu: new mapping "
4154 "object %llu, prev obsolete sm %llu\n",
4155 (u_longlong_t)scip->scip_vdev,
4156 (u_longlong_t)scip->scip_next_mapping_object,
4157 (u_longlong_t)scip->scip_prev_obsolete_sm_object);
4158 if (scip->scip_prev_obsolete_sm_object != 0) {
4159 space_map_t *prev_obsolete_sm = NULL;
4160 VERIFY0(space_map_open(&prev_obsolete_sm,
4161 spa->spa_meta_objset,
4162 scip->scip_prev_obsolete_sm_object,
4163 0, vd->vdev_asize, 0));
4164 space_map_update(prev_obsolete_sm);
4165 dump_spacemap(spa->spa_meta_objset, prev_obsolete_sm);
4166 (void) printf("\n");
4167 space_map_close(prev_obsolete_sm);
4168 }
4169
4170 scip_count += 2;
4171 }
4172
4173 for (uint64_t i = 0; i < spa->spa_root_vdev->vdev_children; i++) {
4174 vdev_t *vd = spa->spa_root_vdev->vdev_child[i];
4175 vdev_indirect_config_t *vic = &vd->vdev_indirect_config;
4176
4177 if (vic->vic_mapping_object != 0) {
4178 ASSERT(vd->vdev_ops == &vdev_indirect_ops ||
4179 vd->vdev_removing);
4180 indirect_vdev_count++;
4181
4182 if (vd->vdev_indirect_mapping->vim_havecounts) {
4183 obsolete_counts_count++;
4184 }
4350 (void) fprintf(stderr,
4351 "\rverifying vdev %llu, space map entry %llu of %llu ...",
4352 (longlong_t)vd->vdev_id,
4353 (longlong_t)vcsec->vcsec_entryid,
4354 (longlong_t)vcsec->vcsec_num_entries);
4355 }
4356 vcsec->vcsec_entryid++;
4357
4358 /*
4359 * See comment in checkpoint_sm_exclude_entry_cb()
4360 */
4361 VERIFY3U(sme->sme_offset, >=, ms->ms_start);
4362 VERIFY3U(end, <=, ms->ms_start + ms->ms_size);
4363
4364 /*
4365 * The entries in the vdev_checkpoint_sm should be marked as
4366 * allocated in the checkpointed state of the pool, therefore
4367 * their respective ms_allocateable trees should not contain them.
4368 */
4369 mutex_enter(&ms->ms_lock);
4370 range_tree_verify(ms->ms_allocatable, sme->sme_offset, sme->sme_run);
4371 mutex_exit(&ms->ms_lock);
4372
4373 return (0);
4374 }
4375
4376 /*
4377 * Verify that all segments in the vdev_checkpoint_sm are allocated
4378 * according to the checkpoint's ms_sm (i.e. are not in the checkpoint's
4379 * ms_allocatable).
4380 *
4381 * Do so by comparing the checkpoint space maps (vdev_checkpoint_sm) of
4382 * each vdev in the current state of the pool to the metaslab space maps
4383 * (ms_sm) of the checkpointed state of the pool.
4384 *
4385 * Note that the function changes the state of the ms_allocatable
4386 * trees of the current spa_t. The entries of these ms_allocatable
4387 * trees are cleared out and then repopulated from with the free
4388 * entries of their respective ms_sm space maps.
4389 */
4390 static void
4413 continue;
4414 }
4415
4416 /*
4417 * If the checkpoint space map doesn't exist, then nothing
4418 * here is checkpointed so there's nothing to verify.
4419 */
4420 if (current_vd->vdev_top_zap == 0 ||
4421 zap_contains(spa_meta_objset(current),
4422 current_vd->vdev_top_zap,
4423 VDEV_TOP_ZAP_POOL_CHECKPOINT_SM) != 0)
4424 continue;
4425
4426 VERIFY0(zap_lookup(spa_meta_objset(current),
4427 current_vd->vdev_top_zap, VDEV_TOP_ZAP_POOL_CHECKPOINT_SM,
4428 sizeof (uint64_t), 1, &checkpoint_sm_obj));
4429
4430 VERIFY0(space_map_open(&checkpoint_sm, spa_meta_objset(current),
4431 checkpoint_sm_obj, 0, current_vd->vdev_asize,
4432 current_vd->vdev_ashift));
4433 space_map_update(checkpoint_sm);
4434
4435 verify_checkpoint_sm_entry_cb_arg_t vcsec;
4436 vcsec.vcsec_vd = ckpoint_vd;
4437 vcsec.vcsec_entryid = 0;
4438 vcsec.vcsec_num_entries =
4439 space_map_length(checkpoint_sm) / sizeof (uint64_t);
4440 VERIFY0(space_map_iterate(checkpoint_sm,
4441 verify_checkpoint_sm_entry_cb, &vcsec));
4442 dump_spacemap(current->spa_meta_objset, checkpoint_sm);
4443 space_map_close(checkpoint_sm);
4444 }
4445
4446 /*
4447 * If we've added vdevs since we took the checkpoint, ensure
4448 * that their checkpoint space maps are empty.
4449 */
4450 if (ckpoint_rvd->vdev_children < current_rvd->vdev_children) {
4451 for (uint64_t c = ckpoint_rvd->vdev_children;
4452 c < current_rvd->vdev_children; c++) {
4453 vdev_t *current_vd = current_rvd->vdev_child[c];
4454 ASSERT3P(current_vd->vdev_checkpoint_sm, ==, NULL);
4455 }
4456 }
4457
4458 /* for cleaner progress output */
4459 (void) fprintf(stderr, "\n");
4460 }
4500 "\rverifying vdev %llu of %llu, "
4501 "metaslab %llu of %llu ...",
4502 (longlong_t)current_vd->vdev_id,
4503 (longlong_t)current_rvd->vdev_children,
4504 (longlong_t)current_vd->vdev_ms[m]->ms_id,
4505 (longlong_t)current_vd->vdev_ms_count);
4506
4507 /*
4508 * We walk through the ms_allocatable trees that
4509 * are loaded with the allocated blocks from the
4510 * ms_sm spacemaps of the checkpoint. For each
4511 * one of these ranges we ensure that none of them
4512 * exists in the ms_allocatable trees of the
4513 * current state which are loaded with the ranges
4514 * that are currently free.
4515 *
4516 * This way we ensure that none of the blocks that
4517 * are part of the checkpoint were freed by mistake.
4518 */
4519 range_tree_walk(ckpoint_msp->ms_allocatable,
4520 (range_tree_func_t *)range_tree_verify,
4521 current_msp->ms_allocatable);
4522 }
4523 }
4524
4525 /* for cleaner progress output */
4526 (void) fprintf(stderr, "\n");
4527 }
4528
4529 static void
4530 verify_checkpoint_blocks(spa_t *spa)
4531 {
4532 spa_t *checkpoint_spa;
4533 char *checkpoint_pool;
4534 nvlist_t *config = NULL;
4535 int error = 0;
4536
4537 /*
4538 * We import the checkpointed state of the pool (under a different
4539 * name) so we can do verification on it against the current state
4540 * of the pool.
4541 */
4542 checkpoint_pool = import_checkpointed_state(spa->spa_name, config,
4543 NULL);
4544 ASSERT(strcmp(spa->spa_name, checkpoint_pool) != 0);
4545
4546 error = spa_open(checkpoint_pool, &checkpoint_spa, FTAG);
4547 if (error != 0) {
4548 fatal("Tried to open pool \"%s\" but spa_open() failed with "
4549 "error %d\n", checkpoint_pool, error);
4550 }
4551
4577
4578 for (uint64_t i = 0; i < rvd->vdev_children; i++) {
4579 vdev_t *vd = rvd->vdev_child[i];
4580
4581 space_map_t *checkpoint_sm = NULL;
4582 uint64_t checkpoint_sm_obj;
4583
4584 if (vd->vdev_top_zap == 0)
4585 continue;
4586
4587 if (zap_contains(spa_meta_objset(spa), vd->vdev_top_zap,
4588 VDEV_TOP_ZAP_POOL_CHECKPOINT_SM) != 0)
4589 continue;
4590
4591 VERIFY0(zap_lookup(spa_meta_objset(spa), vd->vdev_top_zap,
4592 VDEV_TOP_ZAP_POOL_CHECKPOINT_SM,
4593 sizeof (uint64_t), 1, &checkpoint_sm_obj));
4594
4595 VERIFY0(space_map_open(&checkpoint_sm, spa_meta_objset(spa),
4596 checkpoint_sm_obj, 0, vd->vdev_asize, vd->vdev_ashift));
4597 space_map_update(checkpoint_sm);
4598 dump_spacemap(spa->spa_meta_objset, checkpoint_sm);
4599 space_map_close(checkpoint_sm);
4600 }
4601 }
4602
4603 static int
4604 verify_checkpoint(spa_t *spa)
4605 {
4606 uberblock_t checkpoint;
4607 int error;
4608
4609 if (!spa_feature_is_active(spa, SPA_FEATURE_POOL_CHECKPOINT))
4610 return (0);
4611
4612 error = zap_lookup(spa->spa_meta_objset, DMU_POOL_DIRECTORY_OBJECT,
4613 DMU_POOL_ZPOOL_CHECKPOINT, sizeof (uint64_t),
4614 sizeof (uberblock_t) / sizeof (uint64_t), &checkpoint);
4615
4616 if (error == ENOENT && !dump_opt['L']) {
4617 /*
|
768 if (expected_refcount != actual_refcount) {
769 (void) printf("space map refcount mismatch: expected %lld != "
770 "actual %lld\n",
771 (longlong_t)expected_refcount,
772 (longlong_t)actual_refcount);
773 return (2);
774 }
775 return (0);
776 }
777
778 static void
779 dump_spacemap(objset_t *os, space_map_t *sm)
780 {
781 char *ddata[] = { "ALLOC", "FREE", "CONDENSE", "INVALID",
782 "INVALID", "INVALID", "INVALID", "INVALID" };
783
784 if (sm == NULL)
785 return;
786
787 (void) printf("space map object %llu:\n",
788 (longlong_t)sm->sm_object);
789 (void) printf(" smp_length = 0x%llx\n",
790 (longlong_t)sm->sm_phys->smp_length);
791 (void) printf(" smp_alloc = 0x%llx\n",
792 (longlong_t)sm->sm_phys->smp_alloc);
793
794 if (dump_opt['d'] < 6 && dump_opt['m'] < 4)
795 return;
796
797 /*
798 * Print out the freelist entries in both encoded and decoded form.
799 */
800 uint8_t mapshift = sm->sm_shift;
801 int64_t alloc = 0;
802 uint64_t word, entry_id = 0;
803 for (uint64_t offset = 0; offset < space_map_length(sm);
804 offset += sizeof (word)) {
805
806 VERIFY0(dmu_read(os, space_map_object(sm), offset,
807 sizeof (word), &word, DMU_READ_PREFETCH));
808
809 if (sm_entry_is_debug(word)) {
810 (void) printf("\t [%6llu] %s: txg %llu pass %llu\n",
811 (u_longlong_t)entry_id,
812 ddata[SM_DEBUG_ACTION_DECODE(word)],
813 (u_longlong_t)SM_DEBUG_TXG_DECODE(word),
814 (u_longlong_t)SM_DEBUG_SYNCPASS_DECODE(word));
815 entry_id++;
816 continue;
817 }
818
819 uint8_t words;
820 char entry_type;
821 uint64_t entry_off, entry_run, entry_vdev = SM_NO_VDEVID;
822
823 if (sm_entry_is_single_word(word)) {
824 entry_type = (SM_TYPE_DECODE(word) == SM_ALLOC) ?
825 'A' : 'F';
826 entry_off = (SM_OFFSET_DECODE(word) << mapshift) +
827 sm->sm_start;
828 entry_run = SM_RUN_DECODE(word) << mapshift;
829 words = 1;
830 } else {
831 /* it is a two-word entry so we read another word */
832 ASSERT(sm_entry_is_double_word(word));
833
834 uint64_t extra_word;
835 offset += sizeof (extra_word);
836 VERIFY0(dmu_read(os, space_map_object(sm), offset,
837 sizeof (extra_word), &extra_word,
838 DMU_READ_PREFETCH));
839
840 ASSERT3U(offset, <=, space_map_length(sm));
841
842 entry_run = SM2_RUN_DECODE(word) << mapshift;
843 entry_vdev = SM2_VDEV_DECODE(word);
844 entry_type = (SM2_TYPE_DECODE(extra_word) == SM_ALLOC) ?
845 'A' : 'F';
846 entry_off = (SM2_OFFSET_DECODE(extra_word) <<
847 mapshift) + sm->sm_start;
848 words = 2;
849 }
850
851 (void) printf("\t [%6llu] %c range:"
852 " %010llx-%010llx size: %06llx vdev: %06llu words: %u\n",
853 (u_longlong_t)entry_id,
854 entry_type, (u_longlong_t)entry_off,
855 (u_longlong_t)(entry_off + entry_run),
856 (u_longlong_t)entry_run,
857 (u_longlong_t)entry_vdev, words);
858
859 if (entry_type == 'A')
860 alloc += entry_run;
861 else
862 alloc -= entry_run;
863 entry_id++;
864 }
865 if (alloc != space_map_allocated(sm)) {
866 (void) printf("space_map_object alloc (%lld) INCONSISTENT "
867 "with space map summary (%lld)\n",
868 (longlong_t)space_map_allocated(sm), (longlong_t)alloc);
869 }
870 }
871
872 static void
873 dump_metaslab_stats(metaslab_t *msp)
874 {
875 char maxbuf[32];
876 range_tree_t *rt = msp->ms_allocatable;
877 avl_tree_t *t = &msp->ms_allocatable_by_size;
878 int free_pct = range_tree_space(rt) * 100 / msp->ms_size;
879
880 /* max sure nicenum has enough space */
881 CTASSERT(sizeof (maxbuf) >= NN_NUMBUF_SZ);
882
883 zdb_nicenum(metaslab_block_maxsize(msp), maxbuf, sizeof (maxbuf));
884
885 (void) printf("\t %25s %10lu %7s %6s %4s %4d%%\n",
909 mutex_enter(&msp->ms_lock);
910 VERIFY0(metaslab_load(msp));
911 range_tree_stat_verify(msp->ms_allocatable);
912 dump_metaslab_stats(msp);
913 metaslab_unload(msp);
914 mutex_exit(&msp->ms_lock);
915 }
916
917 if (dump_opt['m'] > 1 && sm != NULL &&
918 spa_feature_is_active(spa, SPA_FEATURE_SPACEMAP_HISTOGRAM)) {
919 /*
920 * The space map histogram represents free space in chunks
921 * of sm_shift (i.e. bucket 0 refers to 2^sm_shift).
922 */
923 (void) printf("\tOn-disk histogram:\t\tfragmentation %llu\n",
924 (u_longlong_t)msp->ms_fragmentation);
925 dump_histogram(sm->sm_phys->smp_histogram,
926 SPACE_MAP_HISTOGRAM_SIZE, sm->sm_shift);
927 }
928
929 ASSERT(msp->ms_size == (1ULL << vd->vdev_ms_shift));
930 dump_spacemap(spa->spa_meta_objset, msp->ms_sm);
931 }
932
933 static void
934 print_vdev_metaslab_header(vdev_t *vd)
935 {
936 vdev_alloc_bias_t alloc_bias = vd->vdev_alloc_bias;
937 const char *bias_str;
938
939 bias_str = (alloc_bias == VDEV_BIAS_LOG || vd->vdev_islog) ?
940 VDEV_ALLOC_BIAS_LOG :
941 (alloc_bias == VDEV_BIAS_SPECIAL) ? VDEV_ALLOC_BIAS_SPECIAL :
942 (alloc_bias == VDEV_BIAS_DEDUP) ? VDEV_ALLOC_BIAS_DEDUP :
943 vd->vdev_islog ? "log" : "";
944
945 (void) printf("\tvdev %10llu %s\n"
946 "\t%-10s%5llu %-19s %-15s %-12s\n",
947 (u_longlong_t)vd->vdev_id, bias_str,
948 "metaslabs", (u_longlong_t)vd->vdev_ms_count,
949 "offset", "spacemap", "free");
950 (void) printf("\t%15s %19s %15s %12s\n",
3083 static void
3084 zdb_leak(void *arg, uint64_t start, uint64_t size)
3085 {
3086 vdev_t *vd = arg;
3087
3088 (void) printf("leaked space: vdev %llu, offset 0x%llx, size %llu\n",
3089 (u_longlong_t)vd->vdev_id, (u_longlong_t)start, (u_longlong_t)size);
3090 }
3091
3092 static metaslab_ops_t zdb_metaslab_ops = {
3093 NULL /* alloc */
3094 };
3095
3096 static void
3097 zdb_ddt_leak_init(spa_t *spa, zdb_cb_t *zcb)
3098 {
3099 ddt_bookmark_t ddb;
3100 ddt_entry_t dde;
3101 int error;
3102
3103 ASSERT(!dump_opt['L']);
3104
3105 bzero(&ddb, sizeof (ddb));
3106 while ((error = ddt_walk(spa, &ddb, &dde)) == 0) {
3107 blkptr_t blk;
3108 ddt_phys_t *ddp = dde.dde_phys;
3109
3110 if (ddb.ddb_class == DDT_CLASS_UNIQUE)
3111 return;
3112
3113 ASSERT(ddt_phys_total_refcnt(&dde) > 1);
3114
3115 for (int p = 0; p < DDT_PHYS_TYPES; p++, ddp++) {
3116 if (ddp->ddp_phys_birth == 0)
3117 continue;
3118 ddt_bp_create(ddb.ddb_checksum,
3119 &dde.dde_key, ddp, &blk);
3120 if (p == DDT_PHYS_DITTO) {
3121 zdb_count_block(zcb, NULL, &blk, ZDB_OT_DITTO);
3122 } else {
3123 zcb->zcb_dedup_asize +=
3124 BP_GET_ASIZE(&blk) * (ddp->ddp_refcnt - 1);
3125 zcb->zcb_dedup_blocks++;
3126 }
3127 }
3128 ddt_t *ddt = spa->spa_ddt[ddb.ddb_checksum];
3129 ddt_enter(ddt);
3130 VERIFY(ddt_lookup(ddt, &blk, B_TRUE) != NULL);
3131 ddt_exit(ddt);
3132 }
3133
3134 ASSERT(error == ENOENT);
3135 }
3136
3137 /* ARGSUSED */
3138 static void
3139 claim_segment_impl_cb(uint64_t inner_offset, vdev_t *vd, uint64_t offset,
3140 uint64_t size, void *arg)
3141 {
3142 /*
3143 * This callback was called through a remap from
3144 * a device being removed. Therefore, the vdev that
3145 * this callback is applied to is a concrete
3146 * vdev.
3147 */
3148 ASSERT(vdev_is_concrete(vd));
3149
3150 VERIFY0(metaslab_claim_impl(vd, offset, size,
3151 spa_min_claim_txg(vd->vdev_spa)));
3152 }
3153
3154 static void
3155 claim_segment_cb(void *arg, uint64_t offset, uint64_t size)
3156 {
3157 vdev_t *vd = arg;
3158
3159 vdev_indirect_ops.vdev_op_remap(vd, offset, size,
3160 claim_segment_impl_cb, NULL);
3161 }
3162
3163 /*
3164 * After accounting for all allocated blocks that are directly referenced,
3165 * we might have missed a reference to a block from a partially complete
3166 * (and thus unused) indirect mapping object. We perform a secondary pass
3167 * through the metaslabs we have already mapped and claim the destination
3168 * blocks.
3169 */
3170 static void
3171 zdb_claim_removing(spa_t *spa, zdb_cb_t *zcb)
3172 {
3173 if (dump_opt['L'])
3174 return;
3175
3176 if (spa->spa_vdev_removal == NULL)
3177 return;
3178
3179 spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER);
3180
3181 spa_vdev_removal_t *svr = spa->spa_vdev_removal;
3182 vdev_t *vd = vdev_lookup_top(spa, svr->svr_vdev_id);
3183 vdev_indirect_mapping_t *vim = vd->vdev_indirect_mapping;
3184
3185 for (uint64_t msi = 0; msi < vd->vdev_ms_count; msi++) {
3186 metaslab_t *msp = vd->vdev_ms[msi];
3187
3188 if (msp->ms_start >= vdev_indirect_mapping_max_offset(vim))
3189 break;
3190
3191 ASSERT0(range_tree_space(svr->svr_allocd_segs));
3192
3193 if (msp->ms_sm != NULL) {
3194 VERIFY0(space_map_load(msp->ms_sm,
3195 svr->svr_allocd_segs, SM_ALLOC));
3247 static uint32_t *
3248 zdb_load_obsolete_counts(vdev_t *vd)
3249 {
3250 vdev_indirect_mapping_t *vim = vd->vdev_indirect_mapping;
3251 spa_t *spa = vd->vdev_spa;
3252 spa_condensing_indirect_phys_t *scip =
3253 &spa->spa_condensing_indirect_phys;
3254 uint32_t *counts;
3255
3256 EQUIV(vdev_obsolete_sm_object(vd) != 0, vd->vdev_obsolete_sm != NULL);
3257 counts = vdev_indirect_mapping_load_obsolete_counts(vim);
3258 if (vd->vdev_obsolete_sm != NULL) {
3259 vdev_indirect_mapping_load_obsolete_spacemap(vim, counts,
3260 vd->vdev_obsolete_sm);
3261 }
3262 if (scip->scip_vdev == vd->vdev_id &&
3263 scip->scip_prev_obsolete_sm_object != 0) {
3264 space_map_t *prev_obsolete_sm = NULL;
3265 VERIFY0(space_map_open(&prev_obsolete_sm, spa->spa_meta_objset,
3266 scip->scip_prev_obsolete_sm_object, 0, vd->vdev_asize, 0));
3267 vdev_indirect_mapping_load_obsolete_spacemap(vim, counts,
3268 prev_obsolete_sm);
3269 space_map_close(prev_obsolete_sm);
3270 }
3271 return (counts);
3272 }
3273
3274 typedef struct checkpoint_sm_exclude_entry_arg {
3275 vdev_t *cseea_vd;
3276 uint64_t cseea_checkpoint_size;
3277 } checkpoint_sm_exclude_entry_arg_t;
3278
3279 static int
3280 checkpoint_sm_exclude_entry_cb(space_map_entry_t *sme, void *arg)
3281 {
3282 checkpoint_sm_exclude_entry_arg_t *cseea = arg;
3283 vdev_t *vd = cseea->cseea_vd;
3284 metaslab_t *ms = vd->vdev_ms[sme->sme_offset >> vd->vdev_ms_shift];
3285 uint64_t end = sme->sme_offset + sme->sme_run;
3286
3340 * 2] There is a checkpoint, but no checkpointed blocks
3341 * have been freed yet
3342 * 3] The current vdev is indirect
3343 *
3344 * In these cases we return immediately.
3345 */
3346 if (zap_contains(spa_meta_objset(spa), vd->vdev_top_zap,
3347 VDEV_TOP_ZAP_POOL_CHECKPOINT_SM) != 0)
3348 return;
3349
3350 VERIFY0(zap_lookup(spa_meta_objset(spa), vd->vdev_top_zap,
3351 VDEV_TOP_ZAP_POOL_CHECKPOINT_SM, sizeof (uint64_t), 1,
3352 &checkpoint_sm_obj));
3353
3354 checkpoint_sm_exclude_entry_arg_t cseea;
3355 cseea.cseea_vd = vd;
3356 cseea.cseea_checkpoint_size = 0;
3357
3358 VERIFY0(space_map_open(&checkpoint_sm, spa_meta_objset(spa),
3359 checkpoint_sm_obj, 0, vd->vdev_asize, vd->vdev_ashift));
3360
3361 VERIFY0(space_map_iterate(checkpoint_sm,
3362 space_map_length(checkpoint_sm),
3363 checkpoint_sm_exclude_entry_cb, &cseea));
3364 space_map_close(checkpoint_sm);
3365
3366 zcb->zcb_checkpoint_size += cseea.cseea_checkpoint_size;
3367 }
3368
3369 static void
3370 zdb_leak_init_exclude_checkpoint(spa_t *spa, zdb_cb_t *zcb)
3371 {
3372 ASSERT(!dump_opt['L']);
3373
3374 vdev_t *rvd = spa->spa_root_vdev;
3375 for (uint64_t c = 0; c < rvd->vdev_children; c++) {
3376 ASSERT3U(c, ==, rvd->vdev_child[c]->vdev_id);
3377 zdb_leak_init_vdev_exclude_checkpoint(rvd->vdev_child[c], zcb);
3378 }
3379 }
3380
3381 static void
3382 load_concrete_ms_allocatable_trees(spa_t *spa, maptype_t maptype)
3383 {
3384 vdev_t *rvd = spa->spa_root_vdev;
3385 for (uint64_t i = 0; i < rvd->vdev_children; i++) {
3386 vdev_t *vd = rvd->vdev_child[i];
3387
3388 ASSERT3U(i, ==, vd->vdev_id);
3389
3390 if (vd->vdev_ops == &vdev_indirect_ops)
3391 continue;
3392
3393 for (uint64_t m = 0; m < vd->vdev_ms_count; m++) {
3450 if (ent_offset >= msp->ms_start + msp->ms_size)
3451 break;
3452
3453 /*
3454 * Mappings do not cross metaslab boundaries,
3455 * because we create them by walking the metaslabs.
3456 */
3457 ASSERT3U(ent_offset + ent_len, <=,
3458 msp->ms_start + msp->ms_size);
3459 range_tree_add(msp->ms_allocatable, ent_offset, ent_len);
3460 }
3461
3462 if (!msp->ms_loaded)
3463 msp->ms_loaded = B_TRUE;
3464 mutex_exit(&msp->ms_lock);
3465 }
3466
3467 static void
3468 zdb_leak_init_prepare_indirect_vdevs(spa_t *spa, zdb_cb_t *zcb)
3469 {
3470 ASSERT(!dump_opt['L']);
3471
3472 vdev_t *rvd = spa->spa_root_vdev;
3473 for (uint64_t c = 0; c < rvd->vdev_children; c++) {
3474 vdev_t *vd = rvd->vdev_child[c];
3475
3476 ASSERT3U(c, ==, vd->vdev_id);
3477
3478 if (vd->vdev_ops != &vdev_indirect_ops)
3479 continue;
3480
3481 /*
3482 * Note: we don't check for mapping leaks on
3483 * removing vdevs because their ms_allocatable's
3484 * are used to look for leaks in allocated space.
3485 */
3486 zcb->zcb_vd_obsolete_counts[c] = zdb_load_obsolete_counts(vd);
3487
3488 /*
3489 * Normally, indirect vdevs don't have any
3490 * metaslabs. We want to set them up for
3491 * zio_claim().
3498
3499 (void) fprintf(stderr,
3500 "\rloading indirect vdev %llu, "
3501 "metaslab %llu of %llu ...",
3502 (longlong_t)vd->vdev_id,
3503 (longlong_t)vd->vdev_ms[m]->ms_id,
3504 (longlong_t)vd->vdev_ms_count);
3505
3506 load_indirect_ms_allocatable_tree(vd, vd->vdev_ms[m],
3507 &vim_idx);
3508 }
3509 ASSERT3U(vim_idx, ==, vdev_indirect_mapping_num_entries(vim));
3510 }
3511 }
3512
3513 static void
3514 zdb_leak_init(spa_t *spa, zdb_cb_t *zcb)
3515 {
3516 zcb->zcb_spa = spa;
3517
3518 if (dump_opt['L'])
3519 return;
3520
3521 dsl_pool_t *dp = spa->spa_dsl_pool;
3522 vdev_t *rvd = spa->spa_root_vdev;
3523
3524 /*
3525 * We are going to be changing the meaning of the metaslab's
3526 * ms_allocatable. Ensure that the allocator doesn't try to
3527 * use the tree.
3528 */
3529 spa->spa_normal_class->mc_ops = &zdb_metaslab_ops;
3530 spa->spa_log_class->mc_ops = &zdb_metaslab_ops;
3531
3532 zcb->zcb_vd_obsolete_counts =
3533 umem_zalloc(rvd->vdev_children * sizeof (uint32_t *),
3534 UMEM_NOFAIL);
3535
3536 /*
3537 * For leak detection, we overload the ms_allocatable trees
3538 * to contain allocated segments instead of free segments.
3539 * As a result, we can't use the normal metaslab_load/unload
3540 * interfaces.
3545 /*
3546 * On load_concrete_ms_allocatable_trees() we loaded all the
3547 * allocated entries from the ms_sm to the ms_allocatable for
3548 * each metaslab. If the pool has a checkpoint or is in the
3549 * middle of discarding a checkpoint, some of these blocks
3550 * may have been freed but their ms_sm may not have been
3551 * updated because they are referenced by the checkpoint. In
3552 * order to avoid false-positives during leak-detection, we
3553 * go through the vdev's checkpoint space map and exclude all
3554 * its entries from their relevant ms_allocatable.
3555 *
3556 * We also aggregate the space held by the checkpoint and add
3557 * it to zcb_checkpoint_size.
3558 *
3559 * Note that at this point we are also verifying that all the
3560 * entries on the checkpoint_sm are marked as allocated in
3561 * the ms_sm of their relevant metaslab.
3562 * [see comment in checkpoint_sm_exclude_entry_cb()]
3563 */
3564 zdb_leak_init_exclude_checkpoint(spa, zcb);
3565 ASSERT3U(zcb->zcb_checkpoint_size, ==, spa_get_checkpoint_space(spa));
3566
3567 /* for cleaner progress output */
3568 (void) fprintf(stderr, "\n");
3569
3570 if (bpobj_is_open(&dp->dp_obsolete_bpobj)) {
3571 ASSERT(spa_feature_is_enabled(spa,
3572 SPA_FEATURE_DEVICE_REMOVAL));
3573 (void) bpobj_iterate_nofree(&dp->dp_obsolete_bpobj,
3574 increment_indirect_mapping_cb, zcb, NULL);
3575 }
3576
3577 spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER);
3578 zdb_ddt_leak_init(spa, zcb);
3579 spa_config_exit(spa, SCL_CONFIG, FTAG);
3580 }
3581
3582 static boolean_t
3583 zdb_check_for_obsolete_leaks(vdev_t *vd, zdb_cb_t *zcb)
3584 {
3585 boolean_t leaks = B_FALSE;
3586 vdev_indirect_mapping_t *vim = vd->vdev_indirect_mapping;
3587 uint64_t total_leaked = 0;
3588
3589 ASSERT(vim != NULL);
3590
3591 for (uint64_t i = 0; i < vdev_indirect_mapping_num_entries(vim); i++) {
3592 vdev_indirect_mapping_entry_phys_t *vimep =
3593 &vim->vim_entries[i];
3594 uint64_t obsolete_bytes = 0;
3595 uint64_t offset = DVA_MAPPING_GET_SRC_OFFSET(vimep);
3635 (u_longlong_t)vd->vdev_id, pct_leaked,
3636 (u_longlong_t)total_leaked);
3637 } else if (total_leaked > 0) {
3638 (void) printf("obsolete indirect mapping count mismatch "
3639 "for vdev %llu -- %llx total bytes mismatched\n",
3640 (u_longlong_t)vd->vdev_id,
3641 (u_longlong_t)total_leaked);
3642 leaks |= B_TRUE;
3643 }
3644
3645 vdev_indirect_mapping_free_obsolete_counts(vim,
3646 zcb->zcb_vd_obsolete_counts[vd->vdev_id]);
3647 zcb->zcb_vd_obsolete_counts[vd->vdev_id] = NULL;
3648
3649 return (leaks);
3650 }
3651
3652 static boolean_t
3653 zdb_leak_fini(spa_t *spa, zdb_cb_t *zcb)
3654 {
3655 if (dump_opt['L'])
3656 return (B_FALSE);
3657
3658 boolean_t leaks = B_FALSE;
3659
3660 vdev_t *rvd = spa->spa_root_vdev;
3661 for (unsigned c = 0; c < rvd->vdev_children; c++) {
3662 vdev_t *vd = rvd->vdev_child[c];
3663 #if DEBUG
3664 metaslab_group_t *mg = vd->vdev_mg;
3665 #endif
3666
3667 if (zcb->zcb_vd_obsolete_counts[c] != NULL) {
3668 leaks |= zdb_check_for_obsolete_leaks(vd, zcb);
3669 }
3670
3671 for (uint64_t m = 0; m < vd->vdev_ms_count; m++) {
3672 metaslab_t *msp = vd->vdev_ms[m];
3673 ASSERT3P(mg, ==, msp->ms_group);
3674
3675 /*
3676 * ms_allocatable has been overloaded
3677 * to contain allocated segments. Now that
3678 * we finished traversing all blocks, any
3679 * block that remains in the ms_allocatable
3680 * represents an allocated block that we
3681 * did not claim during the traversal.
3682 * Claimed blocks would have been removed
3683 * from the ms_allocatable. For indirect
3684 * vdevs, space remaining in the tree
3685 * represents parts of the mapping that are
3686 * not referenced, which is not a bug.
3687 */
3688 if (vd->vdev_ops == &vdev_indirect_ops) {
3689 range_tree_vacate(msp->ms_allocatable,
3690 NULL, NULL);
3691 } else {
3692 range_tree_vacate(msp->ms_allocatable,
3693 zdb_leak, vd);
3694 }
3695
3696 if (msp->ms_loaded) {
3697 msp->ms_loaded = B_FALSE;
3698 }
3699 }
3700
3701 }
3702
3703 umem_free(zcb->zcb_vd_obsolete_counts,
3704 rvd->vdev_children * sizeof (uint32_t *));
3705 zcb->zcb_vd_obsolete_counts = NULL;
3706
3707 return (leaks);
3708 }
3709
3710 /* ARGSUSED */
3711 static int
3712 count_block_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx)
3713 {
3714 zdb_cb_t *zcb = arg;
3715
3716 if (dump_opt['b'] >= 5) {
3717 char blkbuf[BP_SPRINTF_LEN];
3718 snprintf_blkptr(blkbuf, sizeof (blkbuf), bp);
3719 (void) printf("[%s] %s\n",
3720 "deferred free", blkbuf);
3721 }
3722 zdb_count_block(zcb, NULL, bp, ZDB_OT_DEFERRED);
3723 return (0);
3724 }
3725
3726 static int
3727 dump_block_stats(spa_t *spa)
3728 {
3729 zdb_cb_t zcb;
3730 zdb_blkstats_t *zb, *tzb;
3731 uint64_t norm_alloc, norm_space, total_alloc, total_found;
3732 int flags = TRAVERSE_PRE | TRAVERSE_PREFETCH_METADATA | TRAVERSE_HARD;
3733 boolean_t leaks = B_FALSE;
3734 int err;
3735
3736 bzero(&zcb, sizeof (zcb));
3737 (void) printf("\nTraversing all blocks %s%s%s%s%s...\n\n",
3738 (dump_opt['c'] || !dump_opt['L']) ? "to verify " : "",
3739 (dump_opt['c'] == 1) ? "metadata " : "",
3740 dump_opt['c'] ? "checksums " : "",
3741 (dump_opt['c'] && !dump_opt['L']) ? "and verify " : "",
3742 !dump_opt['L'] ? "nothing leaked " : "");
3743
3744 /*
3745 * When leak detection is enabled we load all space maps as SM_ALLOC
3746 * maps, then traverse the pool claiming each block we discover. If
3747 * the pool is perfectly consistent, the segment trees will be empty
3748 * when we're done. Anything left over is a leak; any block we can't
3749 * claim (because it's not part of any space map) is a double
3750 * allocation, reference to a freed block, or an unclaimed log block.
3751 *
3752 * When leak detection is disabled (-L option) we still traverse the
3753 * pool claiming each block we discover, but we skip opening any space
3754 * maps.
3755 */
3756 bzero(&zcb, sizeof (zdb_cb_t));
3757 zdb_leak_init(spa, &zcb);
3758
3759 /*
3760 * If there's a deferred-free bplist, process that first.
3761 */
3762 (void) bpobj_iterate_nofree(&spa->spa_deferred_bpobj,
3763 count_block_cb, &zcb, NULL);
3764
3765 if (spa_version(spa) >= SPA_VERSION_DEADLISTS) {
3766 (void) bpobj_iterate_nofree(&spa->spa_dsl_pool->dp_free_bpobj,
3767 count_block_cb, &zcb, NULL);
3768 }
3769
3770 zdb_claim_removing(spa, &zcb);
3771
3772 if (spa_feature_is_active(spa, SPA_FEATURE_ASYNC_DESTROY)) {
3773 VERIFY3U(0, ==, bptree_iterate(spa->spa_meta_objset,
3774 spa->spa_dsl_pool->dp_bptree_obj, B_FALSE, count_block_cb,
3775 &zcb, NULL));
3776 }
3815 }
3816 }
3817
3818 /*
3819 * Report any leaked segments.
3820 */
3821 leaks |= zdb_leak_fini(spa, &zcb);
3822
3823 tzb = &zcb.zcb_type[ZB_TOTAL][ZDB_OT_TOTAL];
3824
3825 norm_alloc = metaslab_class_get_alloc(spa_normal_class(spa));
3826 norm_space = metaslab_class_get_space(spa_normal_class(spa));
3827
3828 total_alloc = norm_alloc +
3829 metaslab_class_get_alloc(spa_log_class(spa)) +
3830 metaslab_class_get_alloc(spa_special_class(spa)) +
3831 metaslab_class_get_alloc(spa_dedup_class(spa));
3832 total_found = tzb->zb_asize - zcb.zcb_dedup_asize +
3833 zcb.zcb_removing_size + zcb.zcb_checkpoint_size;
3834
3835 if (total_found == total_alloc && !dump_opt['L']) {
3836 (void) printf("\n\tNo leaks (block sum matches space"
3837 " maps exactly)\n");
3838 } else if (!dump_opt['L']) {
3839 (void) printf("block traversal size %llu != alloc %llu "
3840 "(%s %lld)\n",
3841 (u_longlong_t)total_found,
3842 (u_longlong_t)total_alloc,
3843 (dump_opt['L']) ? "unreachable" : "leaked",
3844 (longlong_t)(total_alloc - total_found));
3845 leaks = B_TRUE;
3846 }
3847
3848 if (tzb->zb_count == 0)
3849 return (2);
3850
3851 (void) printf("\n");
3852 (void) printf("\t%-16s %14llu\n", "bp count:",
3853 (u_longlong_t)tzb->zb_count);
3854 (void) printf("\t%-16s %14llu\n", "ganged count:",
3855 (longlong_t)tzb->zb_gangs);
3856 (void) printf("\t%-16s %14llu avg: %6llu\n", "bp logical:",
3857 (u_longlong_t)tzb->zb_lsize,
3858 (u_longlong_t)(tzb->zb_lsize / tzb->zb_count));
4158 int ret = 0;
4159
4160 spa_condensing_indirect_phys_t *scip =
4161 &spa->spa_condensing_indirect_phys;
4162 if (scip->scip_next_mapping_object != 0) {
4163 vdev_t *vd = spa->spa_root_vdev->vdev_child[scip->scip_vdev];
4164 ASSERT(scip->scip_prev_obsolete_sm_object != 0);
4165 ASSERT3P(vd->vdev_ops, ==, &vdev_indirect_ops);
4166
4167 (void) printf("Condensing indirect vdev %llu: new mapping "
4168 "object %llu, prev obsolete sm %llu\n",
4169 (u_longlong_t)scip->scip_vdev,
4170 (u_longlong_t)scip->scip_next_mapping_object,
4171 (u_longlong_t)scip->scip_prev_obsolete_sm_object);
4172 if (scip->scip_prev_obsolete_sm_object != 0) {
4173 space_map_t *prev_obsolete_sm = NULL;
4174 VERIFY0(space_map_open(&prev_obsolete_sm,
4175 spa->spa_meta_objset,
4176 scip->scip_prev_obsolete_sm_object,
4177 0, vd->vdev_asize, 0));
4178 dump_spacemap(spa->spa_meta_objset, prev_obsolete_sm);
4179 (void) printf("\n");
4180 space_map_close(prev_obsolete_sm);
4181 }
4182
4183 scip_count += 2;
4184 }
4185
4186 for (uint64_t i = 0; i < spa->spa_root_vdev->vdev_children; i++) {
4187 vdev_t *vd = spa->spa_root_vdev->vdev_child[i];
4188 vdev_indirect_config_t *vic = &vd->vdev_indirect_config;
4189
4190 if (vic->vic_mapping_object != 0) {
4191 ASSERT(vd->vdev_ops == &vdev_indirect_ops ||
4192 vd->vdev_removing);
4193 indirect_vdev_count++;
4194
4195 if (vd->vdev_indirect_mapping->vim_havecounts) {
4196 obsolete_counts_count++;
4197 }
4363 (void) fprintf(stderr,
4364 "\rverifying vdev %llu, space map entry %llu of %llu ...",
4365 (longlong_t)vd->vdev_id,
4366 (longlong_t)vcsec->vcsec_entryid,
4367 (longlong_t)vcsec->vcsec_num_entries);
4368 }
4369 vcsec->vcsec_entryid++;
4370
4371 /*
4372 * See comment in checkpoint_sm_exclude_entry_cb()
4373 */
4374 VERIFY3U(sme->sme_offset, >=, ms->ms_start);
4375 VERIFY3U(end, <=, ms->ms_start + ms->ms_size);
4376
4377 /*
4378 * The entries in the vdev_checkpoint_sm should be marked as
4379 * allocated in the checkpointed state of the pool, therefore
4380 * their respective ms_allocateable trees should not contain them.
4381 */
4382 mutex_enter(&ms->ms_lock);
4383 range_tree_verify_not_present(ms->ms_allocatable,
4384 sme->sme_offset, sme->sme_run);
4385 mutex_exit(&ms->ms_lock);
4386
4387 return (0);
4388 }
4389
4390 /*
4391 * Verify that all segments in the vdev_checkpoint_sm are allocated
4392 * according to the checkpoint's ms_sm (i.e. are not in the checkpoint's
4393 * ms_allocatable).
4394 *
4395 * Do so by comparing the checkpoint space maps (vdev_checkpoint_sm) of
4396 * each vdev in the current state of the pool to the metaslab space maps
4397 * (ms_sm) of the checkpointed state of the pool.
4398 *
4399 * Note that the function changes the state of the ms_allocatable
4400 * trees of the current spa_t. The entries of these ms_allocatable
4401 * trees are cleared out and then repopulated from with the free
4402 * entries of their respective ms_sm space maps.
4403 */
4404 static void
4427 continue;
4428 }
4429
4430 /*
4431 * If the checkpoint space map doesn't exist, then nothing
4432 * here is checkpointed so there's nothing to verify.
4433 */
4434 if (current_vd->vdev_top_zap == 0 ||
4435 zap_contains(spa_meta_objset(current),
4436 current_vd->vdev_top_zap,
4437 VDEV_TOP_ZAP_POOL_CHECKPOINT_SM) != 0)
4438 continue;
4439
4440 VERIFY0(zap_lookup(spa_meta_objset(current),
4441 current_vd->vdev_top_zap, VDEV_TOP_ZAP_POOL_CHECKPOINT_SM,
4442 sizeof (uint64_t), 1, &checkpoint_sm_obj));
4443
4444 VERIFY0(space_map_open(&checkpoint_sm, spa_meta_objset(current),
4445 checkpoint_sm_obj, 0, current_vd->vdev_asize,
4446 current_vd->vdev_ashift));
4447
4448 verify_checkpoint_sm_entry_cb_arg_t vcsec;
4449 vcsec.vcsec_vd = ckpoint_vd;
4450 vcsec.vcsec_entryid = 0;
4451 vcsec.vcsec_num_entries =
4452 space_map_length(checkpoint_sm) / sizeof (uint64_t);
4453 VERIFY0(space_map_iterate(checkpoint_sm,
4454 space_map_length(checkpoint_sm),
4455 verify_checkpoint_sm_entry_cb, &vcsec));
4456 dump_spacemap(current->spa_meta_objset, checkpoint_sm);
4457 space_map_close(checkpoint_sm);
4458 }
4459
4460 /*
4461 * If we've added vdevs since we took the checkpoint, ensure
4462 * that their checkpoint space maps are empty.
4463 */
4464 if (ckpoint_rvd->vdev_children < current_rvd->vdev_children) {
4465 for (uint64_t c = ckpoint_rvd->vdev_children;
4466 c < current_rvd->vdev_children; c++) {
4467 vdev_t *current_vd = current_rvd->vdev_child[c];
4468 ASSERT3P(current_vd->vdev_checkpoint_sm, ==, NULL);
4469 }
4470 }
4471
4472 /* for cleaner progress output */
4473 (void) fprintf(stderr, "\n");
4474 }
4514 "\rverifying vdev %llu of %llu, "
4515 "metaslab %llu of %llu ...",
4516 (longlong_t)current_vd->vdev_id,
4517 (longlong_t)current_rvd->vdev_children,
4518 (longlong_t)current_vd->vdev_ms[m]->ms_id,
4519 (longlong_t)current_vd->vdev_ms_count);
4520
4521 /*
4522 * We walk through the ms_allocatable trees that
4523 * are loaded with the allocated blocks from the
4524 * ms_sm spacemaps of the checkpoint. For each
4525 * one of these ranges we ensure that none of them
4526 * exists in the ms_allocatable trees of the
4527 * current state which are loaded with the ranges
4528 * that are currently free.
4529 *
4530 * This way we ensure that none of the blocks that
4531 * are part of the checkpoint were freed by mistake.
4532 */
4533 range_tree_walk(ckpoint_msp->ms_allocatable,
4534 (range_tree_func_t *)range_tree_verify_not_present,
4535 current_msp->ms_allocatable);
4536 }
4537 }
4538
4539 /* for cleaner progress output */
4540 (void) fprintf(stderr, "\n");
4541 }
4542
4543 static void
4544 verify_checkpoint_blocks(spa_t *spa)
4545 {
4546 ASSERT(!dump_opt['L']);
4547
4548 spa_t *checkpoint_spa;
4549 char *checkpoint_pool;
4550 nvlist_t *config = NULL;
4551 int error = 0;
4552
4553 /*
4554 * We import the checkpointed state of the pool (under a different
4555 * name) so we can do verification on it against the current state
4556 * of the pool.
4557 */
4558 checkpoint_pool = import_checkpointed_state(spa->spa_name, config,
4559 NULL);
4560 ASSERT(strcmp(spa->spa_name, checkpoint_pool) != 0);
4561
4562 error = spa_open(checkpoint_pool, &checkpoint_spa, FTAG);
4563 if (error != 0) {
4564 fatal("Tried to open pool \"%s\" but spa_open() failed with "
4565 "error %d\n", checkpoint_pool, error);
4566 }
4567
4593
4594 for (uint64_t i = 0; i < rvd->vdev_children; i++) {
4595 vdev_t *vd = rvd->vdev_child[i];
4596
4597 space_map_t *checkpoint_sm = NULL;
4598 uint64_t checkpoint_sm_obj;
4599
4600 if (vd->vdev_top_zap == 0)
4601 continue;
4602
4603 if (zap_contains(spa_meta_objset(spa), vd->vdev_top_zap,
4604 VDEV_TOP_ZAP_POOL_CHECKPOINT_SM) != 0)
4605 continue;
4606
4607 VERIFY0(zap_lookup(spa_meta_objset(spa), vd->vdev_top_zap,
4608 VDEV_TOP_ZAP_POOL_CHECKPOINT_SM,
4609 sizeof (uint64_t), 1, &checkpoint_sm_obj));
4610
4611 VERIFY0(space_map_open(&checkpoint_sm, spa_meta_objset(spa),
4612 checkpoint_sm_obj, 0, vd->vdev_asize, vd->vdev_ashift));
4613 dump_spacemap(spa->spa_meta_objset, checkpoint_sm);
4614 space_map_close(checkpoint_sm);
4615 }
4616 }
4617
4618 static int
4619 verify_checkpoint(spa_t *spa)
4620 {
4621 uberblock_t checkpoint;
4622 int error;
4623
4624 if (!spa_feature_is_active(spa, SPA_FEATURE_POOL_CHECKPOINT))
4625 return (0);
4626
4627 error = zap_lookup(spa->spa_meta_objset, DMU_POOL_DIRECTORY_OBJECT,
4628 DMU_POOL_ZPOOL_CHECKPOINT, sizeof (uint64_t),
4629 sizeof (uberblock_t) / sizeof (uint64_t), &checkpoint);
4630
4631 if (error == ENOENT && !dump_opt['L']) {
4632 /*
|