1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  24  * Copyright (c) 2011, 2017 by Delphix. All rights reserved.
  25  * Copyright (c) 2014 Integros [integros.com]
  26  * Copyright 2017 Nexenta Systems, Inc.
  27  * Copyright 2017 RackTop Systems.
  28  */
  29 
  30 #include <stdio.h>
  31 #include <unistd.h>
  32 #include <stdio_ext.h>
  33 #include <stdlib.h>
  34 #include <ctype.h>
  35 #include <sys/zfs_context.h>
  36 #include <sys/spa.h>
  37 #include <sys/spa_impl.h>
  38 #include <sys/dmu.h>
  39 #include <sys/zap.h>
  40 #include <sys/fs/zfs.h>
  41 #include <sys/zfs_znode.h>
  42 #include <sys/zfs_sa.h>
  43 #include <sys/sa.h>
  44 #include <sys/sa_impl.h>
  45 #include <sys/vdev.h>
  46 #include <sys/vdev_impl.h>
  47 #include <sys/metaslab_impl.h>
  48 #include <sys/dmu_objset.h>
  49 #include <sys/dsl_dir.h>
  50 #include <sys/dsl_dataset.h>
  51 #include <sys/dsl_pool.h>
  52 #include <sys/dbuf.h>
  53 #include <sys/zil.h>
  54 #include <sys/zil_impl.h>
  55 #include <sys/stat.h>
  56 #include <sys/resource.h>
  57 #include <sys/dmu_traverse.h>
  58 #include <sys/zio_checksum.h>
  59 #include <sys/zio_compress.h>
  60 #include <sys/zfs_fuid.h>
  61 #include <sys/arc.h>
  62 #include <sys/ddt.h>
  63 #include <sys/zfeature.h>
  64 #include <sys/abd.h>
  65 #include <sys/blkptr.h>
  66 #include <zfs_comutil.h>
  67 #include <libcmdutils.h>
  68 #undef verify
  69 #include <libzfs.h>
  70 
  71 #include "zdb.h"
  72 
  73 #define ZDB_COMPRESS_NAME(idx) ((idx) < ZIO_COMPRESS_FUNCTIONS ?     \
  74         zio_compress_table[(idx)].ci_name : "UNKNOWN")
  75 #define ZDB_CHECKSUM_NAME(idx) ((idx) < ZIO_CHECKSUM_FUNCTIONS ?     \
  76         zio_checksum_table[(idx)].ci_name : "UNKNOWN")
  77 #define ZDB_OT_NAME(idx) ((idx) < DMU_OT_NUMTYPES ?  \
  78         dmu_ot[(idx)].ot_name : DMU_OT_IS_VALID(idx) ?  \
  79         dmu_ot_byteswap[DMU_OT_BYTESWAP(idx)].ob_name : "UNKNOWN")
  80 #define ZDB_OT_TYPE(idx) ((idx) < DMU_OT_NUMTYPES ? (idx) :          \
  81         (idx) == DMU_OTN_ZAP_DATA || (idx) == DMU_OTN_ZAP_METADATA ?    \
  82         DMU_OT_ZAP_OTHER : \
  83         (idx) == DMU_OTN_UINT64_DATA || (idx) == DMU_OTN_UINT64_METADATA ? \
  84         DMU_OT_UINT64_OTHER : DMU_OT_NUMTYPES)
  85 
  86 #ifndef lint
  87 extern int reference_tracking_enable;
  88 extern boolean_t zfs_recover;
  89 extern uint64_t zfs_arc_max, zfs_arc_meta_limit;
  90 extern int zfs_vdev_async_read_max_active;
  91 extern int aok;
  92 extern boolean_t spa_load_verify_dryrun;
  93 #else
  94 int reference_tracking_enable;
  95 boolean_t zfs_recover;
  96 uint64_t zfs_arc_max, zfs_arc_meta_limit;
  97 int zfs_vdev_async_read_max_active;
  98 int aok;
  99 boolean_t spa_load_verify_dryrun;
 100 #endif
 101 
 102 static const char cmdname[] = "zdb";
 103 uint8_t dump_opt[256];
 104 
 105 typedef void object_viewer_t(objset_t *, uint64_t, void *data, size_t size);
 106 
 107 uint64_t *zopt_object = NULL;
 108 static unsigned zopt_objects = 0;
 109 libzfs_handle_t *g_zfs;
 110 uint64_t max_inflight = 1000;
 111 
 112 static void snprintf_blkptr_compact(char *, size_t, const blkptr_t *);
 113 
 114 /*
 115  * These libumem hooks provide a reasonable set of defaults for the allocator's
 116  * debugging facilities.
 117  */
 118 const char *
 119 _umem_debug_init()
 120 {
 121         return ("default,verbose"); /* $UMEM_DEBUG setting */
 122 }
 123 
 124 const char *
 125 _umem_logging_init(void)
 126 {
 127         return ("fail,contents"); /* $UMEM_LOGGING setting */
 128 }
 129 
 130 static void
 131 usage(void)
 132 {
 133         (void) fprintf(stderr,
 134             "Usage:\t%s [-AbcdDFGhiLMPsvX] [-e [-V] [-p <path> ...]] "
 135             "[-I <inflight I/Os>]\n"
 136             "\t\t[-o <var>=<value>]... [-t <txg>] [-U <cache>] [-x <dumpdir>]\n"
 137             "\t\t[<poolname> [<object> ...]]\n"
 138             "\t%s [-AdiPv] [-e [-V] [-p <path> ...]] [-U <cache>] <dataset> "
 139             "[<object> ...]\n"
 140             "\t%s -C [-A] [-U <cache>]\n"
 141             "\t%s -l [-Aqu] <device>\n"
 142             "\t%s -m [-AFLPX] [-e [-V] [-p <path> ...]] [-t <txg>] "
 143             "[-U <cache>]\n\t\t<poolname> [<vdev> [<metaslab> ...]]\n"
 144             "\t%s -O <dataset> <path>\n"
 145             "\t%s -R [-A] [-e [-V] [-p <path> ...]] [-U <cache>]\n"
 146             "\t\t<poolname> <vdev>:<offset>:<size>[:<flags>]\n"
 147             "\t%s -E [-A] word0:word1:...:word15\n"
 148             "\t%s -S [-AP] [-e [-V] [-p <path> ...]] [-U <cache>] "
 149             "<poolname>\n\n",
 150             cmdname, cmdname, cmdname, cmdname, cmdname, cmdname, cmdname,
 151             cmdname, cmdname);
 152 
 153         (void) fprintf(stderr, "    Dataset name must include at least one "
 154             "separator character '/' or '@'\n");
 155         (void) fprintf(stderr, "    If dataset name is specified, only that "
 156             "dataset is dumped\n");
 157         (void) fprintf(stderr, "    If object numbers are specified, only "
 158             "those objects are dumped\n\n");
 159         (void) fprintf(stderr, "    Options to control amount of output:\n");
 160         (void) fprintf(stderr, "        -b block statistics\n");
 161         (void) fprintf(stderr, "        -c checksum all metadata (twice for "
 162             "all data) blocks\n");
 163         (void) fprintf(stderr, "        -C config (or cachefile if alone)\n");
 164         (void) fprintf(stderr, "        -d dataset(s)\n");
 165         (void) fprintf(stderr, "        -D dedup statistics\n");
 166         (void) fprintf(stderr, "        -E decode and display block from an "
 167             "embedded block pointer\n");
 168         (void) fprintf(stderr, "        -h pool history\n");
 169         (void) fprintf(stderr, "        -i intent logs\n");
 170         (void) fprintf(stderr, "        -l read label contents\n");
 171         (void) fprintf(stderr, "        -L disable leak tracking (do not "
 172             "load spacemaps)\n");
 173         (void) fprintf(stderr, "        -m metaslabs\n");
 174         (void) fprintf(stderr, "        -M metaslab groups\n");
 175         (void) fprintf(stderr, "        -O perform object lookups by path\n");
 176         (void) fprintf(stderr, "        -R read and display block from a "
 177             "device\n");
 178         (void) fprintf(stderr, "        -s report stats on zdb's I/O\n");
 179         (void) fprintf(stderr, "        -S simulate dedup to measure effect\n");
 180         (void) fprintf(stderr, "        -v verbose (applies to all "
 181             "others)\n\n");
 182         (void) fprintf(stderr, "    Below options are intended for use "
 183             "with other options:\n");
 184         (void) fprintf(stderr, "        -A ignore assertions (-A), enable "
 185             "panic recovery (-AA) or both (-AAA)\n");
 186         (void) fprintf(stderr, "        -e pool is exported/destroyed/"
 187             "has altroot/not in a cachefile\n");
 188         (void) fprintf(stderr, "        -F attempt automatic rewind within "
 189             "safe range of transaction groups\n");
 190         (void) fprintf(stderr, "        -G dump zfs_dbgmsg buffer before "
 191             "exiting\n");
 192         (void) fprintf(stderr, "        -I <number of inflight I/Os> -- "
 193             "specify the maximum number of "
 194             "checksumming I/Os [default is 200]\n");
 195         (void) fprintf(stderr, "        -o <variable>=<value> set global "
 196             "variable to an unsigned 32-bit integer value\n");
 197         (void) fprintf(stderr, "        -p <path> -- use one or more with "
 198             "-e to specify path to vdev dir\n");
 199         (void) fprintf(stderr, "        -P print numbers in parseable form\n");
 200         (void) fprintf(stderr, "        -q don't print label contents\n");
 201         (void) fprintf(stderr, "        -t <txg> -- highest txg to use when "
 202             "searching for uberblocks\n");
 203         (void) fprintf(stderr, "        -u uberblock\n");
 204         (void) fprintf(stderr, "        -U <cachefile_path> -- use alternate "
 205             "cachefile\n");
 206         (void) fprintf(stderr, "        -V do verbatim import\n");
 207         (void) fprintf(stderr, "        -x <dumpdir> -- "
 208             "dump all read blocks into specified directory\n");
 209         (void) fprintf(stderr, "        -X attempt extreme rewind (does not "
 210             "work with dataset)\n\n");
 211         (void) fprintf(stderr, "Specify an option more than once (e.g. -bb) "
 212             "to make only that option verbose\n");
 213         (void) fprintf(stderr, "Default is to dump everything non-verbosely\n");
 214         exit(1);
 215 }
 216 
 217 static void
 218 dump_debug_buffer()
 219 {
 220         if (dump_opt['G']) {
 221                 (void) printf("\n");
 222                 zfs_dbgmsg_print("zdb");
 223         }
 224 }
 225 
 226 /*
 227  * Called for usage errors that are discovered after a call to spa_open(),
 228  * dmu_bonus_hold(), or pool_match().  abort() is called for other errors.
 229  */
 230 
 231 static void
 232 fatal(const char *fmt, ...)
 233 {
 234         va_list ap;
 235 
 236         va_start(ap, fmt);
 237         (void) fprintf(stderr, "%s: ", cmdname);
 238         (void) vfprintf(stderr, fmt, ap);
 239         va_end(ap);
 240         (void) fprintf(stderr, "\n");
 241 
 242         dump_debug_buffer();
 243 
 244         exit(1);
 245 }
 246 
 247 /* ARGSUSED */
 248 static void
 249 dump_packed_nvlist(objset_t *os, uint64_t object, void *data, size_t size)
 250 {
 251         nvlist_t *nv;
 252         size_t nvsize = *(uint64_t *)data;
 253         char *packed = umem_alloc(nvsize, UMEM_NOFAIL);
 254 
 255         VERIFY(0 == dmu_read(os, object, 0, nvsize, packed, DMU_READ_PREFETCH));
 256 
 257         VERIFY(nvlist_unpack(packed, nvsize, &nv, 0) == 0);
 258 
 259         umem_free(packed, nvsize);
 260 
 261         dump_nvlist(nv, 8);
 262 
 263         nvlist_free(nv);
 264 }
 265 
 266 /* ARGSUSED */
 267 static void
 268 dump_history_offsets(objset_t *os, uint64_t object, void *data, size_t size)
 269 {
 270         spa_history_phys_t *shp = data;
 271 
 272         if (shp == NULL)
 273                 return;
 274 
 275         (void) printf("\t\tpool_create_len = %llu\n",
 276             (u_longlong_t)shp->sh_pool_create_len);
 277         (void) printf("\t\tphys_max_off = %llu\n",
 278             (u_longlong_t)shp->sh_phys_max_off);
 279         (void) printf("\t\tbof = %llu\n",
 280             (u_longlong_t)shp->sh_bof);
 281         (void) printf("\t\teof = %llu\n",
 282             (u_longlong_t)shp->sh_eof);
 283         (void) printf("\t\trecords_lost = %llu\n",
 284             (u_longlong_t)shp->sh_records_lost);
 285 }
 286 
 287 static void
 288 zdb_nicenum(uint64_t num, char *buf, size_t buflen)
 289 {
 290         if (dump_opt['P'])
 291                 (void) snprintf(buf, buflen, "%llu", (longlong_t)num);
 292         else
 293                 nicenum(num, buf, sizeof (buf));
 294 }
 295 
 296 static const char histo_stars[] = "****************************************";
 297 static const uint64_t histo_width = sizeof (histo_stars) - 1;
 298 
 299 static void
 300 dump_histogram(const uint64_t *histo, int size, int offset)
 301 {
 302         int i;
 303         int minidx = size - 1;
 304         int maxidx = 0;
 305         uint64_t max = 0;
 306 
 307         for (i = 0; i < size; i++) {
 308                 if (histo[i] > max)
 309                         max = histo[i];
 310                 if (histo[i] > 0 && i > maxidx)
 311                         maxidx = i;
 312                 if (histo[i] > 0 && i < minidx)
 313                         minidx = i;
 314         }
 315 
 316         if (max < histo_width)
 317                 max = histo_width;
 318 
 319         for (i = minidx; i <= maxidx; i++) {
 320                 (void) printf("\t\t\t%3u: %6llu %s\n",
 321                     i + offset, (u_longlong_t)histo[i],
 322                     &histo_stars[(max - histo[i]) * histo_width / max]);
 323         }
 324 }
 325 
 326 static void
 327 dump_zap_stats(objset_t *os, uint64_t object)
 328 {
 329         int error;
 330         zap_stats_t zs;
 331 
 332         error = zap_get_stats(os, object, &zs);
 333         if (error)
 334                 return;
 335 
 336         if (zs.zs_ptrtbl_len == 0) {
 337                 ASSERT(zs.zs_num_blocks == 1);
 338                 (void) printf("\tmicrozap: %llu bytes, %llu entries\n",
 339                     (u_longlong_t)zs.zs_blocksize,
 340                     (u_longlong_t)zs.zs_num_entries);
 341                 return;
 342         }
 343 
 344         (void) printf("\tFat ZAP stats:\n");
 345 
 346         (void) printf("\t\tPointer table:\n");
 347         (void) printf("\t\t\t%llu elements\n",
 348             (u_longlong_t)zs.zs_ptrtbl_len);
 349         (void) printf("\t\t\tzt_blk: %llu\n",
 350             (u_longlong_t)zs.zs_ptrtbl_zt_blk);
 351         (void) printf("\t\t\tzt_numblks: %llu\n",
 352             (u_longlong_t)zs.zs_ptrtbl_zt_numblks);
 353         (void) printf("\t\t\tzt_shift: %llu\n",
 354             (u_longlong_t)zs.zs_ptrtbl_zt_shift);
 355         (void) printf("\t\t\tzt_blks_copied: %llu\n",
 356             (u_longlong_t)zs.zs_ptrtbl_blks_copied);
 357         (void) printf("\t\t\tzt_nextblk: %llu\n",
 358             (u_longlong_t)zs.zs_ptrtbl_nextblk);
 359 
 360         (void) printf("\t\tZAP entries: %llu\n",
 361             (u_longlong_t)zs.zs_num_entries);
 362         (void) printf("\t\tLeaf blocks: %llu\n",
 363             (u_longlong_t)zs.zs_num_leafs);
 364         (void) printf("\t\tTotal blocks: %llu\n",
 365             (u_longlong_t)zs.zs_num_blocks);
 366         (void) printf("\t\tzap_block_type: 0x%llx\n",
 367             (u_longlong_t)zs.zs_block_type);
 368         (void) printf("\t\tzap_magic: 0x%llx\n",
 369             (u_longlong_t)zs.zs_magic);
 370         (void) printf("\t\tzap_salt: 0x%llx\n",
 371             (u_longlong_t)zs.zs_salt);
 372 
 373         (void) printf("\t\tLeafs with 2^n pointers:\n");
 374         dump_histogram(zs.zs_leafs_with_2n_pointers, ZAP_HISTOGRAM_SIZE, 0);
 375 
 376         (void) printf("\t\tBlocks with n*5 entries:\n");
 377         dump_histogram(zs.zs_blocks_with_n5_entries, ZAP_HISTOGRAM_SIZE, 0);
 378 
 379         (void) printf("\t\tBlocks n/10 full:\n");
 380         dump_histogram(zs.zs_blocks_n_tenths_full, ZAP_HISTOGRAM_SIZE, 0);
 381 
 382         (void) printf("\t\tEntries with n chunks:\n");
 383         dump_histogram(zs.zs_entries_using_n_chunks, ZAP_HISTOGRAM_SIZE, 0);
 384 
 385         (void) printf("\t\tBuckets with n entries:\n");
 386         dump_histogram(zs.zs_buckets_with_n_entries, ZAP_HISTOGRAM_SIZE, 0);
 387 }
 388 
 389 /*ARGSUSED*/
 390 static void
 391 dump_none(objset_t *os, uint64_t object, void *data, size_t size)
 392 {
 393 }
 394 
 395 /*ARGSUSED*/
 396 static void
 397 dump_unknown(objset_t *os, uint64_t object, void *data, size_t size)
 398 {
 399         (void) printf("\tUNKNOWN OBJECT TYPE\n");
 400 }
 401 
 402 /*ARGSUSED*/
 403 static void
 404 dump_uint8(objset_t *os, uint64_t object, void *data, size_t size)
 405 {
 406 }
 407 
 408 /*ARGSUSED*/
 409 static void
 410 dump_uint64(objset_t *os, uint64_t object, void *data, size_t size)
 411 {
 412 }
 413 
 414 /*ARGSUSED*/
 415 static void
 416 dump_zap(objset_t *os, uint64_t object, void *data, size_t size)
 417 {
 418         zap_cursor_t zc;
 419         zap_attribute_t attr;
 420         void *prop;
 421         unsigned i;
 422 
 423         dump_zap_stats(os, object);
 424         (void) printf("\n");
 425 
 426         for (zap_cursor_init(&zc, os, object);
 427             zap_cursor_retrieve(&zc, &attr) == 0;
 428             zap_cursor_advance(&zc)) {
 429                 (void) printf("\t\t%s = ", attr.za_name);
 430                 if (attr.za_num_integers == 0) {
 431                         (void) printf("\n");
 432                         continue;
 433                 }
 434                 prop = umem_zalloc(attr.za_num_integers *
 435                     attr.za_integer_length, UMEM_NOFAIL);
 436                 (void) zap_lookup(os, object, attr.za_name,
 437                     attr.za_integer_length, attr.za_num_integers, prop);
 438                 if (attr.za_integer_length == 1) {
 439                         (void) printf("%s", (char *)prop);
 440                 } else {
 441                         for (i = 0; i < attr.za_num_integers; i++) {
 442                                 switch (attr.za_integer_length) {
 443                                 case 2:
 444                                         (void) printf("%u ",
 445                                             ((uint16_t *)prop)[i]);
 446                                         break;
 447                                 case 4:
 448                                         (void) printf("%u ",
 449                                             ((uint32_t *)prop)[i]);
 450                                         break;
 451                                 case 8:
 452                                         (void) printf("%lld ",
 453                                             (u_longlong_t)((int64_t *)prop)[i]);
 454                                         break;
 455                                 }
 456                         }
 457                 }
 458                 (void) printf("\n");
 459                 umem_free(prop, attr.za_num_integers * attr.za_integer_length);
 460         }
 461         zap_cursor_fini(&zc);
 462 }
 463 
 464 static void
 465 dump_bpobj(objset_t *os, uint64_t object, void *data, size_t size)
 466 {
 467         bpobj_phys_t *bpop = data;
 468         char bytes[32], comp[32], uncomp[32];
 469 
 470         /* make sure the output won't get truncated */
 471         CTASSERT(sizeof (bytes) >= NN_NUMBUF_SZ);
 472         CTASSERT(sizeof (comp) >= NN_NUMBUF_SZ);
 473         CTASSERT(sizeof (uncomp) >= NN_NUMBUF_SZ);
 474 
 475         if (bpop == NULL)
 476                 return;
 477 
 478         zdb_nicenum(bpop->bpo_bytes, bytes, sizeof (bytes));
 479         zdb_nicenum(bpop->bpo_comp, comp, sizeof (comp));
 480         zdb_nicenum(bpop->bpo_uncomp, uncomp, sizeof (uncomp));
 481 
 482         (void) printf("\t\tnum_blkptrs = %llu\n",
 483             (u_longlong_t)bpop->bpo_num_blkptrs);
 484         (void) printf("\t\tbytes = %s\n", bytes);
 485         if (size >= BPOBJ_SIZE_V1) {
 486                 (void) printf("\t\tcomp = %s\n", comp);
 487                 (void) printf("\t\tuncomp = %s\n", uncomp);
 488         }
 489         if (size >= sizeof (*bpop)) {
 490                 (void) printf("\t\tsubobjs = %llu\n",
 491                     (u_longlong_t)bpop->bpo_subobjs);
 492                 (void) printf("\t\tnum_subobjs = %llu\n",
 493                     (u_longlong_t)bpop->bpo_num_subobjs);
 494         }
 495 
 496         if (dump_opt['d'] < 5)
 497                 return;
 498 
 499         for (uint64_t i = 0; i < bpop->bpo_num_blkptrs; i++) {
 500                 char blkbuf[BP_SPRINTF_LEN];
 501                 blkptr_t bp;
 502 
 503                 int err = dmu_read(os, object,
 504                     i * sizeof (bp), sizeof (bp), &bp, 0);
 505                 if (err != 0) {
 506                         (void) printf("got error %u from dmu_read\n", err);
 507                         break;
 508                 }
 509                 snprintf_blkptr_compact(blkbuf, sizeof (blkbuf), &bp);
 510                 (void) printf("\t%s\n", blkbuf);
 511         }
 512 }
 513 
 514 /* ARGSUSED */
 515 static void
 516 dump_bpobj_subobjs(objset_t *os, uint64_t object, void *data, size_t size)
 517 {
 518         dmu_object_info_t doi;
 519 
 520         VERIFY0(dmu_object_info(os, object, &doi));
 521         uint64_t *subobjs = kmem_alloc(doi.doi_max_offset, KM_SLEEP);
 522 
 523         int err = dmu_read(os, object, 0, doi.doi_max_offset, subobjs, 0);
 524         if (err != 0) {
 525                 (void) printf("got error %u from dmu_read\n", err);
 526                 kmem_free(subobjs, doi.doi_max_offset);
 527                 return;
 528         }
 529 
 530         int64_t last_nonzero = -1;
 531         for (uint64_t i = 0; i < doi.doi_max_offset / 8; i++) {
 532                 if (subobjs[i] != 0)
 533                         last_nonzero = i;
 534         }
 535 
 536         for (int64_t i = 0; i <= last_nonzero; i++) {
 537                 (void) printf("\t%llu\n", (longlong_t)subobjs[i]);
 538         }
 539         kmem_free(subobjs, doi.doi_max_offset);
 540 }
 541 
 542 /*ARGSUSED*/
 543 static void
 544 dump_ddt_zap(objset_t *os, uint64_t object, void *data, size_t size)
 545 {
 546         dump_zap_stats(os, object);
 547         /* contents are printed elsewhere, properly decoded */
 548 }
 549 
 550 /*ARGSUSED*/
 551 static void
 552 dump_sa_attrs(objset_t *os, uint64_t object, void *data, size_t size)
 553 {
 554         zap_cursor_t zc;
 555         zap_attribute_t attr;
 556 
 557         dump_zap_stats(os, object);
 558         (void) printf("\n");
 559 
 560         for (zap_cursor_init(&zc, os, object);
 561             zap_cursor_retrieve(&zc, &attr) == 0;
 562             zap_cursor_advance(&zc)) {
 563                 (void) printf("\t\t%s = ", attr.za_name);
 564                 if (attr.za_num_integers == 0) {
 565                         (void) printf("\n");
 566                         continue;
 567                 }
 568                 (void) printf(" %llx : [%d:%d:%d]\n",
 569                     (u_longlong_t)attr.za_first_integer,
 570                     (int)ATTR_LENGTH(attr.za_first_integer),
 571                     (int)ATTR_BSWAP(attr.za_first_integer),
 572                     (int)ATTR_NUM(attr.za_first_integer));
 573         }
 574         zap_cursor_fini(&zc);
 575 }
 576 
 577 /*ARGSUSED*/
 578 static void
 579 dump_sa_layouts(objset_t *os, uint64_t object, void *data, size_t size)
 580 {
 581         zap_cursor_t zc;
 582         zap_attribute_t attr;
 583         uint16_t *layout_attrs;
 584         unsigned i;
 585 
 586         dump_zap_stats(os, object);
 587         (void) printf("\n");
 588 
 589         for (zap_cursor_init(&zc, os, object);
 590             zap_cursor_retrieve(&zc, &attr) == 0;
 591             zap_cursor_advance(&zc)) {
 592                 (void) printf("\t\t%s = [", attr.za_name);
 593                 if (attr.za_num_integers == 0) {
 594                         (void) printf("\n");
 595                         continue;
 596                 }
 597 
 598                 VERIFY(attr.za_integer_length == 2);
 599                 layout_attrs = umem_zalloc(attr.za_num_integers *
 600                     attr.za_integer_length, UMEM_NOFAIL);
 601 
 602                 VERIFY(zap_lookup(os, object, attr.za_name,
 603                     attr.za_integer_length,
 604                     attr.za_num_integers, layout_attrs) == 0);
 605 
 606                 for (i = 0; i != attr.za_num_integers; i++)
 607                         (void) printf(" %d ", (int)layout_attrs[i]);
 608                 (void) printf("]\n");
 609                 umem_free(layout_attrs,
 610                     attr.za_num_integers * attr.za_integer_length);
 611         }
 612         zap_cursor_fini(&zc);
 613 }
 614 
 615 /*ARGSUSED*/
 616 static void
 617 dump_zpldir(objset_t *os, uint64_t object, void *data, size_t size)
 618 {
 619         zap_cursor_t zc;
 620         zap_attribute_t attr;
 621         const char *typenames[] = {
 622                 /* 0 */ "not specified",
 623                 /* 1 */ "FIFO",
 624                 /* 2 */ "Character Device",
 625                 /* 3 */ "3 (invalid)",
 626                 /* 4 */ "Directory",
 627                 /* 5 */ "5 (invalid)",
 628                 /* 6 */ "Block Device",
 629                 /* 7 */ "7 (invalid)",
 630                 /* 8 */ "Regular File",
 631                 /* 9 */ "9 (invalid)",
 632                 /* 10 */ "Symbolic Link",
 633                 /* 11 */ "11 (invalid)",
 634                 /* 12 */ "Socket",
 635                 /* 13 */ "Door",
 636                 /* 14 */ "Event Port",
 637                 /* 15 */ "15 (invalid)",
 638         };
 639 
 640         dump_zap_stats(os, object);
 641         (void) printf("\n");
 642 
 643         for (zap_cursor_init(&zc, os, object);
 644             zap_cursor_retrieve(&zc, &attr) == 0;
 645             zap_cursor_advance(&zc)) {
 646                 (void) printf("\t\t%s = %lld (type: %s)\n",
 647                     attr.za_name, ZFS_DIRENT_OBJ(attr.za_first_integer),
 648                     typenames[ZFS_DIRENT_TYPE(attr.za_first_integer)]);
 649         }
 650         zap_cursor_fini(&zc);
 651 }
 652 
 653 static int
 654 get_dtl_refcount(vdev_t *vd)
 655 {
 656         int refcount = 0;
 657 
 658         if (vd->vdev_ops->vdev_op_leaf) {
 659                 space_map_t *sm = vd->vdev_dtl_sm;
 660 
 661                 if (sm != NULL &&
 662                     sm->sm_dbuf->db_size == sizeof (space_map_phys_t))
 663                         return (1);
 664                 return (0);
 665         }
 666 
 667         for (unsigned c = 0; c < vd->vdev_children; c++)
 668                 refcount += get_dtl_refcount(vd->vdev_child[c]);
 669         return (refcount);
 670 }
 671 
 672 static int
 673 get_metaslab_refcount(vdev_t *vd)
 674 {
 675         int refcount = 0;
 676 
 677         if (vd->vdev_top == vd) {
 678                 for (uint64_t m = 0; m < vd->vdev_ms_count; m++) {
 679                         space_map_t *sm = vd->vdev_ms[m]->ms_sm;
 680 
 681                         if (sm != NULL &&
 682                             sm->sm_dbuf->db_size == sizeof (space_map_phys_t))
 683                                 refcount++;
 684                 }
 685         }
 686         for (unsigned c = 0; c < vd->vdev_children; c++)
 687                 refcount += get_metaslab_refcount(vd->vdev_child[c]);
 688 
 689         return (refcount);
 690 }
 691 
 692 static int
 693 get_obsolete_refcount(vdev_t *vd)
 694 {
 695         int refcount = 0;
 696 
 697         uint64_t obsolete_sm_obj = vdev_obsolete_sm_object(vd);
 698         if (vd->vdev_top == vd && obsolete_sm_obj != 0) {
 699                 dmu_object_info_t doi;
 700                 VERIFY0(dmu_object_info(vd->vdev_spa->spa_meta_objset,
 701                     obsolete_sm_obj, &doi));
 702                 if (doi.doi_bonus_size == sizeof (space_map_phys_t)) {
 703                         refcount++;
 704                 }
 705         } else {
 706                 ASSERT3P(vd->vdev_obsolete_sm, ==, NULL);
 707                 ASSERT3U(obsolete_sm_obj, ==, 0);
 708         }
 709         for (unsigned c = 0; c < vd->vdev_children; c++) {
 710                 refcount += get_obsolete_refcount(vd->vdev_child[c]);
 711         }
 712 
 713         return (refcount);
 714 }
 715 
 716 static int
 717 get_prev_obsolete_spacemap_refcount(spa_t *spa)
 718 {
 719         uint64_t prev_obj =
 720             spa->spa_condensing_indirect_phys.scip_prev_obsolete_sm_object;
 721         if (prev_obj != 0) {
 722                 dmu_object_info_t doi;
 723                 VERIFY0(dmu_object_info(spa->spa_meta_objset, prev_obj, &doi));
 724                 if (doi.doi_bonus_size == sizeof (space_map_phys_t)) {
 725                         return (1);
 726                 }
 727         }
 728         return (0);
 729 }
 730 
 731 static int
 732 verify_spacemap_refcounts(spa_t *spa)
 733 {
 734         uint64_t expected_refcount = 0;
 735         uint64_t actual_refcount;
 736 
 737         (void) feature_get_refcount(spa,
 738             &spa_feature_table[SPA_FEATURE_SPACEMAP_HISTOGRAM],
 739             &expected_refcount);
 740         actual_refcount = get_dtl_refcount(spa->spa_root_vdev);
 741         actual_refcount += get_metaslab_refcount(spa->spa_root_vdev);
 742         actual_refcount += get_obsolete_refcount(spa->spa_root_vdev);
 743         actual_refcount += get_prev_obsolete_spacemap_refcount(spa);
 744 
 745         if (expected_refcount != actual_refcount) {
 746                 (void) printf("space map refcount mismatch: expected %lld != "
 747                     "actual %lld\n",
 748                     (longlong_t)expected_refcount,
 749                     (longlong_t)actual_refcount);
 750                 return (2);
 751         }
 752         return (0);
 753 }
 754 
 755 static void
 756 dump_spacemap(objset_t *os, space_map_t *sm)
 757 {
 758         uint64_t alloc, offset, entry;
 759         char *ddata[] = { "ALLOC", "FREE", "CONDENSE", "INVALID",
 760             "INVALID", "INVALID", "INVALID", "INVALID" };
 761 
 762         if (sm == NULL)
 763                 return;
 764 
 765         (void) printf("space map object %llu:\n",
 766             (longlong_t)sm->sm_phys->smp_object);
 767         (void) printf("  smp_objsize = 0x%llx\n",
 768             (longlong_t)sm->sm_phys->smp_objsize);
 769         (void) printf("  smp_alloc = 0x%llx\n",
 770             (longlong_t)sm->sm_phys->smp_alloc);
 771 
 772         /*
 773          * Print out the freelist entries in both encoded and decoded form.
 774          */
 775         alloc = 0;
 776         for (offset = 0; offset < space_map_length(sm);
 777             offset += sizeof (entry)) {
 778                 uint8_t mapshift = sm->sm_shift;
 779 
 780                 VERIFY0(dmu_read(os, space_map_object(sm), offset,
 781                     sizeof (entry), &entry, DMU_READ_PREFETCH));
 782                 if (SM_DEBUG_DECODE(entry)) {
 783 
 784                         (void) printf("\t    [%6llu] %s: txg %llu, pass %llu\n",
 785                             (u_longlong_t)(offset / sizeof (entry)),
 786                             ddata[SM_DEBUG_ACTION_DECODE(entry)],
 787                             (u_longlong_t)SM_DEBUG_TXG_DECODE(entry),
 788                             (u_longlong_t)SM_DEBUG_SYNCPASS_DECODE(entry));
 789                 } else {
 790                         (void) printf("\t    [%6llu]    %c  range:"
 791                             " %010llx-%010llx  size: %06llx\n",
 792                             (u_longlong_t)(offset / sizeof (entry)),
 793                             SM_TYPE_DECODE(entry) == SM_ALLOC ? 'A' : 'F',
 794                             (u_longlong_t)((SM_OFFSET_DECODE(entry) <<
 795                             mapshift) + sm->sm_start),
 796                             (u_longlong_t)((SM_OFFSET_DECODE(entry) <<
 797                             mapshift) + sm->sm_start +
 798                             (SM_RUN_DECODE(entry) << mapshift)),
 799                             (u_longlong_t)(SM_RUN_DECODE(entry) << mapshift));
 800                         if (SM_TYPE_DECODE(entry) == SM_ALLOC)
 801                                 alloc += SM_RUN_DECODE(entry) << mapshift;
 802                         else
 803                                 alloc -= SM_RUN_DECODE(entry) << mapshift;
 804                 }
 805         }
 806         if (alloc != space_map_allocated(sm)) {
 807                 (void) printf("space_map_object alloc (%llu) INCONSISTENT "
 808                     "with space map summary (%llu)\n",
 809                     (u_longlong_t)space_map_allocated(sm), (u_longlong_t)alloc);
 810         }
 811 }
 812 
 813 static void
 814 dump_metaslab_stats(metaslab_t *msp)
 815 {
 816         char maxbuf[32];
 817         range_tree_t *rt = msp->ms_tree;
 818         avl_tree_t *t = &msp->ms_size_tree;
 819         int free_pct = range_tree_space(rt) * 100 / msp->ms_size;
 820 
 821         /* max sure nicenum has enough space */
 822         CTASSERT(sizeof (maxbuf) >= NN_NUMBUF_SZ);
 823 
 824         zdb_nicenum(metaslab_block_maxsize(msp), maxbuf, sizeof (maxbuf));
 825 
 826         (void) printf("\t %25s %10lu   %7s  %6s   %4s %4d%%\n",
 827             "segments", avl_numnodes(t), "maxsize", maxbuf,
 828             "freepct", free_pct);
 829         (void) printf("\tIn-memory histogram:\n");
 830         dump_histogram(rt->rt_histogram, RANGE_TREE_HISTOGRAM_SIZE, 0);
 831 }
 832 
 833 static void
 834 dump_metaslab(metaslab_t *msp)
 835 {
 836         vdev_t *vd = msp->ms_group->mg_vd;
 837         spa_t *spa = vd->vdev_spa;
 838         space_map_t *sm = msp->ms_sm;
 839         char freebuf[32];
 840 
 841         zdb_nicenum(msp->ms_size - space_map_allocated(sm), freebuf,
 842             sizeof (freebuf));
 843 
 844         (void) printf(
 845             "\tmetaslab %6llu   offset %12llx   spacemap %6llu   free    %5s\n",
 846             (u_longlong_t)msp->ms_id, (u_longlong_t)msp->ms_start,
 847             (u_longlong_t)space_map_object(sm), freebuf);
 848 
 849         if (dump_opt['m'] > 2 && !dump_opt['L']) {
 850                 mutex_enter(&msp->ms_lock);
 851                 metaslab_load_wait(msp);
 852                 if (!msp->ms_loaded) {
 853                         VERIFY0(metaslab_load(msp));
 854                         range_tree_stat_verify(msp->ms_tree);
 855                 }
 856                 dump_metaslab_stats(msp);
 857                 metaslab_unload(msp);
 858                 mutex_exit(&msp->ms_lock);
 859         }
 860 
 861         if (dump_opt['m'] > 1 && sm != NULL &&
 862             spa_feature_is_active(spa, SPA_FEATURE_SPACEMAP_HISTOGRAM)) {
 863                 /*
 864                  * The space map histogram represents free space in chunks
 865                  * of sm_shift (i.e. bucket 0 refers to 2^sm_shift).
 866                  */
 867                 (void) printf("\tOn-disk histogram:\t\tfragmentation %llu\n",
 868                     (u_longlong_t)msp->ms_fragmentation);
 869                 dump_histogram(sm->sm_phys->smp_histogram,
 870                     SPACE_MAP_HISTOGRAM_SIZE, sm->sm_shift);
 871         }
 872 
 873         if (dump_opt['d'] > 5 || dump_opt['m'] > 3) {
 874                 ASSERT(msp->ms_size == (1ULL << vd->vdev_ms_shift));
 875 
 876                 dump_spacemap(spa->spa_meta_objset, msp->ms_sm);
 877         }
 878 }
 879 
 880 static void
 881 print_vdev_metaslab_header(vdev_t *vd)
 882 {
 883         (void) printf("\tvdev %10llu\n\t%-10s%5llu   %-19s   %-15s   %-10s\n",
 884             (u_longlong_t)vd->vdev_id,
 885             "metaslabs", (u_longlong_t)vd->vdev_ms_count,
 886             "offset", "spacemap", "free");
 887         (void) printf("\t%15s   %19s   %15s   %10s\n",
 888             "---------------", "-------------------",
 889             "---------------", "-------------");
 890 }
 891 
 892 static void
 893 dump_metaslab_groups(spa_t *spa)
 894 {
 895         vdev_t *rvd = spa->spa_root_vdev;
 896         metaslab_class_t *mc = spa_normal_class(spa);
 897         uint64_t fragmentation;
 898 
 899         metaslab_class_histogram_verify(mc);
 900 
 901         for (unsigned c = 0; c < rvd->vdev_children; c++) {
 902                 vdev_t *tvd = rvd->vdev_child[c];
 903                 metaslab_group_t *mg = tvd->vdev_mg;
 904 
 905                 if (mg->mg_class != mc)
 906                         continue;
 907 
 908                 metaslab_group_histogram_verify(mg);
 909                 mg->mg_fragmentation = metaslab_group_fragmentation(mg);
 910 
 911                 (void) printf("\tvdev %10llu\t\tmetaslabs%5llu\t\t"
 912                     "fragmentation",
 913                     (u_longlong_t)tvd->vdev_id,
 914                     (u_longlong_t)tvd->vdev_ms_count);
 915                 if (mg->mg_fragmentation == ZFS_FRAG_INVALID) {
 916                         (void) printf("%3s\n", "-");
 917                 } else {
 918                         (void) printf("%3llu%%\n",
 919                             (u_longlong_t)mg->mg_fragmentation);
 920                 }
 921                 dump_histogram(mg->mg_histogram, RANGE_TREE_HISTOGRAM_SIZE, 0);
 922         }
 923 
 924         (void) printf("\tpool %s\tfragmentation", spa_name(spa));
 925         fragmentation = metaslab_class_fragmentation(mc);
 926         if (fragmentation == ZFS_FRAG_INVALID)
 927                 (void) printf("\t%3s\n", "-");
 928         else
 929                 (void) printf("\t%3llu%%\n", (u_longlong_t)fragmentation);
 930         dump_histogram(mc->mc_histogram, RANGE_TREE_HISTOGRAM_SIZE, 0);
 931 }
 932 
 933 static void
 934 print_vdev_indirect(vdev_t *vd)
 935 {
 936         vdev_indirect_config_t *vic = &vd->vdev_indirect_config;
 937         vdev_indirect_mapping_t *vim = vd->vdev_indirect_mapping;
 938         vdev_indirect_births_t *vib = vd->vdev_indirect_births;
 939 
 940         if (vim == NULL) {
 941                 ASSERT3P(vib, ==, NULL);
 942                 return;
 943         }
 944 
 945         ASSERT3U(vdev_indirect_mapping_object(vim), ==,
 946             vic->vic_mapping_object);
 947         ASSERT3U(vdev_indirect_births_object(vib), ==,
 948             vic->vic_births_object);
 949 
 950         (void) printf("indirect births obj %llu:\n",
 951             (longlong_t)vic->vic_births_object);
 952         (void) printf("    vib_count = %llu\n",
 953             (longlong_t)vdev_indirect_births_count(vib));
 954         for (uint64_t i = 0; i < vdev_indirect_births_count(vib); i++) {
 955                 vdev_indirect_birth_entry_phys_t *cur_vibe =
 956                     &vib->vib_entries[i];
 957                 (void) printf("\toffset %llx -> txg %llu\n",
 958                     (longlong_t)cur_vibe->vibe_offset,
 959                     (longlong_t)cur_vibe->vibe_phys_birth_txg);
 960         }
 961         (void) printf("\n");
 962 
 963         (void) printf("indirect mapping obj %llu:\n",
 964             (longlong_t)vic->vic_mapping_object);
 965         (void) printf("    vim_max_offset = 0x%llx\n",
 966             (longlong_t)vdev_indirect_mapping_max_offset(vim));
 967         (void) printf("    vim_bytes_mapped = 0x%llx\n",
 968             (longlong_t)vdev_indirect_mapping_bytes_mapped(vim));
 969         (void) printf("    vim_count = %llu\n",
 970             (longlong_t)vdev_indirect_mapping_num_entries(vim));
 971 
 972         if (dump_opt['d'] <= 5 && dump_opt['m'] <= 3)
 973                 return;
 974 
 975         uint32_t *counts = vdev_indirect_mapping_load_obsolete_counts(vim);
 976 
 977         for (uint64_t i = 0; i < vdev_indirect_mapping_num_entries(vim); i++) {
 978                 vdev_indirect_mapping_entry_phys_t *vimep =
 979                     &vim->vim_entries[i];
 980                 (void) printf("\t<%llx:%llx:%llx> -> "
 981                     "<%llx:%llx:%llx> (%x obsolete)\n",
 982                     (longlong_t)vd->vdev_id,
 983                     (longlong_t)DVA_MAPPING_GET_SRC_OFFSET(vimep),
 984                     (longlong_t)DVA_GET_ASIZE(&vimep->vimep_dst),
 985                     (longlong_t)DVA_GET_VDEV(&vimep->vimep_dst),
 986                     (longlong_t)DVA_GET_OFFSET(&vimep->vimep_dst),
 987                     (longlong_t)DVA_GET_ASIZE(&vimep->vimep_dst),
 988                     counts[i]);
 989         }
 990         (void) printf("\n");
 991 
 992         uint64_t obsolete_sm_object = vdev_obsolete_sm_object(vd);
 993         if (obsolete_sm_object != 0) {
 994                 objset_t *mos = vd->vdev_spa->spa_meta_objset;
 995                 (void) printf("obsolete space map object %llu:\n",
 996                     (u_longlong_t)obsolete_sm_object);
 997                 ASSERT(vd->vdev_obsolete_sm != NULL);
 998                 ASSERT3U(space_map_object(vd->vdev_obsolete_sm), ==,
 999                     obsolete_sm_object);
1000                 dump_spacemap(mos, vd->vdev_obsolete_sm);
1001                 (void) printf("\n");
1002         }
1003 }
1004 
1005 static void
1006 dump_metaslabs(spa_t *spa)
1007 {
1008         vdev_t *vd, *rvd = spa->spa_root_vdev;
1009         uint64_t m, c = 0, children = rvd->vdev_children;
1010 
1011         (void) printf("\nMetaslabs:\n");
1012 
1013         if (!dump_opt['d'] && zopt_objects > 0) {
1014                 c = zopt_object[0];
1015 
1016                 if (c >= children)
1017                         (void) fatal("bad vdev id: %llu", (u_longlong_t)c);
1018 
1019                 if (zopt_objects > 1) {
1020                         vd = rvd->vdev_child[c];
1021                         print_vdev_metaslab_header(vd);
1022 
1023                         for (m = 1; m < zopt_objects; m++) {
1024                                 if (zopt_object[m] < vd->vdev_ms_count)
1025                                         dump_metaslab(
1026                                             vd->vdev_ms[zopt_object[m]]);
1027                                 else
1028                                         (void) fprintf(stderr, "bad metaslab "
1029                                             "number %llu\n",
1030                                             (u_longlong_t)zopt_object[m]);
1031                         }
1032                         (void) printf("\n");
1033                         return;
1034                 }
1035                 children = c + 1;
1036         }
1037         for (; c < children; c++) {
1038                 vd = rvd->vdev_child[c];
1039                 print_vdev_metaslab_header(vd);
1040 
1041                 print_vdev_indirect(vd);
1042 
1043                 for (m = 0; m < vd->vdev_ms_count; m++)
1044                         dump_metaslab(vd->vdev_ms[m]);
1045                 (void) printf("\n");
1046         }
1047 }
1048 
1049 static void
1050 dump_dde(const ddt_t *ddt, const ddt_entry_t *dde, uint64_t index)
1051 {
1052         const ddt_phys_t *ddp = dde->dde_phys;
1053         const ddt_key_t *ddk = &dde->dde_key;
1054         const char *types[4] = { "ditto", "single", "double", "triple" };
1055         char blkbuf[BP_SPRINTF_LEN];
1056         blkptr_t blk;
1057 
1058         for (int p = 0; p < DDT_PHYS_TYPES; p++, ddp++) {
1059                 if (ddp->ddp_phys_birth == 0)
1060                         continue;
1061                 ddt_bp_create(ddt->ddt_checksum, ddk, ddp, &blk);
1062                 snprintf_blkptr(blkbuf, sizeof (blkbuf), &blk);
1063                 (void) printf("index %llx refcnt %llu %s %s\n",
1064                     (u_longlong_t)index, (u_longlong_t)ddp->ddp_refcnt,
1065                     types[p], blkbuf);
1066         }
1067 }
1068 
1069 static void
1070 dump_dedup_ratio(const ddt_stat_t *dds)
1071 {
1072         double rL, rP, rD, D, dedup, compress, copies;
1073 
1074         if (dds->dds_blocks == 0)
1075                 return;
1076 
1077         rL = (double)dds->dds_ref_lsize;
1078         rP = (double)dds->dds_ref_psize;
1079         rD = (double)dds->dds_ref_dsize;
1080         D = (double)dds->dds_dsize;
1081 
1082         dedup = rD / D;
1083         compress = rL / rP;
1084         copies = rD / rP;
1085 
1086         (void) printf("dedup = %.2f, compress = %.2f, copies = %.2f, "
1087             "dedup * compress / copies = %.2f\n\n",
1088             dedup, compress, copies, dedup * compress / copies);
1089 }
1090 
1091 static void
1092 dump_ddt(ddt_t *ddt, enum ddt_type type, enum ddt_class class)
1093 {
1094         char name[DDT_NAMELEN];
1095         ddt_entry_t dde;
1096         uint64_t walk = 0;
1097         dmu_object_info_t doi;
1098         uint64_t count, dspace, mspace;
1099         int error;
1100 
1101         error = ddt_object_info(ddt, type, class, &doi);
1102 
1103         if (error == ENOENT)
1104                 return;
1105         ASSERT(error == 0);
1106 
1107         if ((count = ddt_object_count(ddt, type, class)) == 0)
1108                 return;
1109 
1110         dspace = doi.doi_physical_blocks_512 << 9;
1111         mspace = doi.doi_fill_count * doi.doi_data_block_size;
1112 
1113         ddt_object_name(ddt, type, class, name);
1114 
1115         (void) printf("%s: %llu entries, size %llu on disk, %llu in core\n",
1116             name,
1117             (u_longlong_t)count,
1118             (u_longlong_t)(dspace / count),
1119             (u_longlong_t)(mspace / count));
1120 
1121         if (dump_opt['D'] < 3)
1122                 return;
1123 
1124         zpool_dump_ddt(NULL, &ddt->ddt_histogram[type][class]);
1125 
1126         if (dump_opt['D'] < 4)
1127                 return;
1128 
1129         if (dump_opt['D'] < 5 && class == DDT_CLASS_UNIQUE)
1130                 return;
1131 
1132         (void) printf("%s contents:\n\n", name);
1133 
1134         while ((error = ddt_object_walk(ddt, type, class, &walk, &dde)) == 0)
1135                 dump_dde(ddt, &dde, walk);
1136 
1137         ASSERT(error == ENOENT);
1138 
1139         (void) printf("\n");
1140 }
1141 
1142 static void
1143 dump_all_ddts(spa_t *spa)
1144 {
1145         ddt_histogram_t ddh_total;
1146         ddt_stat_t dds_total;
1147 
1148         bzero(&ddh_total, sizeof (ddh_total));
1149         bzero(&dds_total, sizeof (dds_total));
1150 
1151         for (enum zio_checksum c = 0; c < ZIO_CHECKSUM_FUNCTIONS; c++) {
1152                 ddt_t *ddt = spa->spa_ddt[c];
1153                 for (enum ddt_type type = 0; type < DDT_TYPES; type++) {
1154                         for (enum ddt_class class = 0; class < DDT_CLASSES;
1155                             class++) {
1156                                 dump_ddt(ddt, type, class);
1157                         }
1158                 }
1159         }
1160 
1161         ddt_get_dedup_stats(spa, &dds_total);
1162 
1163         if (dds_total.dds_blocks == 0) {
1164                 (void) printf("All DDTs are empty\n");
1165                 return;
1166         }
1167 
1168         (void) printf("\n");
1169 
1170         if (dump_opt['D'] > 1) {
1171                 (void) printf("DDT histogram (aggregated over all DDTs):\n");
1172                 ddt_get_dedup_histogram(spa, &ddh_total);
1173                 zpool_dump_ddt(&dds_total, &ddh_total);
1174         }
1175 
1176         dump_dedup_ratio(&dds_total);
1177 }
1178 
1179 static void
1180 dump_dtl_seg(void *arg, uint64_t start, uint64_t size)
1181 {
1182         char *prefix = arg;
1183 
1184         (void) printf("%s [%llu,%llu) length %llu\n",
1185             prefix,
1186             (u_longlong_t)start,
1187             (u_longlong_t)(start + size),
1188             (u_longlong_t)(size));
1189 }
1190 
1191 static void
1192 dump_dtl(vdev_t *vd, int indent)
1193 {
1194         spa_t *spa = vd->vdev_spa;
1195         boolean_t required;
1196         const char *name[DTL_TYPES] = { "missing", "partial", "scrub",
1197                 "outage" };
1198         char prefix[256];
1199 
1200         spa_vdev_state_enter(spa, SCL_NONE);
1201         required = vdev_dtl_required(vd);
1202         (void) spa_vdev_state_exit(spa, NULL, 0);
1203 
1204         if (indent == 0)
1205                 (void) printf("\nDirty time logs:\n\n");
1206 
1207         (void) printf("\t%*s%s [%s]\n", indent, "",
1208             vd->vdev_path ? vd->vdev_path :
1209             vd->vdev_parent ? vd->vdev_ops->vdev_op_type : spa_name(spa),
1210             required ? "DTL-required" : "DTL-expendable");
1211 
1212         for (int t = 0; t < DTL_TYPES; t++) {
1213                 range_tree_t *rt = vd->vdev_dtl[t];
1214                 if (range_tree_space(rt) == 0)
1215                         continue;
1216                 (void) snprintf(prefix, sizeof (prefix), "\t%*s%s",
1217                     indent + 2, "", name[t]);
1218                 range_tree_walk(rt, dump_dtl_seg, prefix);
1219                 if (dump_opt['d'] > 5 && vd->vdev_children == 0)
1220                         dump_spacemap(spa->spa_meta_objset, vd->vdev_dtl_sm);
1221         }
1222 
1223         for (unsigned c = 0; c < vd->vdev_children; c++)
1224                 dump_dtl(vd->vdev_child[c], indent + 4);
1225 }
1226 
1227 static void
1228 dump_history(spa_t *spa)
1229 {
1230         nvlist_t **events = NULL;
1231         uint64_t resid, len, off = 0;
1232         uint_t num = 0;
1233         int error;
1234         time_t tsec;
1235         struct tm t;
1236         char tbuf[30];
1237         char internalstr[MAXPATHLEN];
1238 
1239         char *buf = umem_alloc(SPA_MAXBLOCKSIZE, UMEM_NOFAIL);
1240         do {
1241                 len = SPA_MAXBLOCKSIZE;
1242 
1243                 if ((error = spa_history_get(spa, &off, &len, buf)) != 0) {
1244                         (void) fprintf(stderr, "Unable to read history: "
1245                             "error %d\n", error);
1246                         umem_free(buf, SPA_MAXBLOCKSIZE);
1247                         return;
1248                 }
1249 
1250                 if (zpool_history_unpack(buf, len, &resid, &events, &num) != 0)
1251                         break;
1252 
1253                 off -= resid;
1254         } while (len != 0);
1255         umem_free(buf, SPA_MAXBLOCKSIZE);
1256 
1257         (void) printf("\nHistory:\n");
1258         for (unsigned i = 0; i < num; i++) {
1259                 uint64_t time, txg, ievent;
1260                 char *cmd, *intstr;
1261                 boolean_t printed = B_FALSE;
1262 
1263                 if (nvlist_lookup_uint64(events[i], ZPOOL_HIST_TIME,
1264                     &time) != 0)
1265                         goto next;
1266                 if (nvlist_lookup_string(events[i], ZPOOL_HIST_CMD,
1267                     &cmd) != 0) {
1268                         if (nvlist_lookup_uint64(events[i],
1269                             ZPOOL_HIST_INT_EVENT, &ievent) != 0)
1270                                 goto next;
1271                         verify(nvlist_lookup_uint64(events[i],
1272                             ZPOOL_HIST_TXG, &txg) == 0);
1273                         verify(nvlist_lookup_string(events[i],
1274                             ZPOOL_HIST_INT_STR, &intstr) == 0);
1275                         if (ievent >= ZFS_NUM_LEGACY_HISTORY_EVENTS)
1276                                 goto next;
1277 
1278                         (void) snprintf(internalstr,
1279                             sizeof (internalstr),
1280                             "[internal %s txg:%ju] %s",
1281                             zfs_history_event_names[ievent], (uintmax_t)txg,
1282                             intstr);
1283                         cmd = internalstr;
1284                 }
1285                 tsec = time;
1286                 (void) localtime_r(&tsec, &t);
1287                 (void) strftime(tbuf, sizeof (tbuf), "%F.%T", &t);
1288                 (void) printf("%s %s\n", tbuf, cmd);
1289                 printed = B_TRUE;
1290 
1291 next:
1292                 if (dump_opt['h'] > 1) {
1293                         if (!printed)
1294                                 (void) printf("unrecognized record:\n");
1295                         dump_nvlist(events[i], 2);
1296                 }
1297         }
1298 }
1299 
1300 /*ARGSUSED*/
1301 static void
1302 dump_dnode(objset_t *os, uint64_t object, void *data, size_t size)
1303 {
1304 }
1305 
1306 static uint64_t
1307 blkid2offset(const dnode_phys_t *dnp, const blkptr_t *bp,
1308     const zbookmark_phys_t *zb)
1309 {
1310         if (dnp == NULL) {
1311                 ASSERT(zb->zb_level < 0);
1312                 if (zb->zb_object == 0)
1313                         return (zb->zb_blkid);
1314                 return (zb->zb_blkid * BP_GET_LSIZE(bp));
1315         }
1316 
1317         ASSERT(zb->zb_level >= 0);
1318 
1319         return ((zb->zb_blkid <<
1320             (zb->zb_level * (dnp->dn_indblkshift - SPA_BLKPTRSHIFT))) *
1321             dnp->dn_datablkszsec << SPA_MINBLOCKSHIFT);
1322 }
1323 
1324 static void
1325 snprintf_blkptr_compact(char *blkbuf, size_t buflen, const blkptr_t *bp)
1326 {
1327         const dva_t *dva = bp->blk_dva;
1328         int ndvas = dump_opt['d'] > 5 ? BP_GET_NDVAS(bp) : 1;
1329 
1330         if (dump_opt['b'] >= 6) {
1331                 snprintf_blkptr(blkbuf, buflen, bp);
1332                 return;
1333         }
1334 
1335         if (BP_IS_EMBEDDED(bp)) {
1336                 (void) sprintf(blkbuf,
1337                     "EMBEDDED et=%u %llxL/%llxP B=%llu",
1338                     (int)BPE_GET_ETYPE(bp),
1339                     (u_longlong_t)BPE_GET_LSIZE(bp),
1340                     (u_longlong_t)BPE_GET_PSIZE(bp),
1341                     (u_longlong_t)bp->blk_birth);
1342                 return;
1343         }
1344 
1345         blkbuf[0] = '\0';
1346         for (int i = 0; i < ndvas; i++)
1347                 (void) snprintf(blkbuf + strlen(blkbuf),
1348                     buflen - strlen(blkbuf), "%llu:%llx:%llx ",
1349                     (u_longlong_t)DVA_GET_VDEV(&dva[i]),
1350                     (u_longlong_t)DVA_GET_OFFSET(&dva[i]),
1351                     (u_longlong_t)DVA_GET_ASIZE(&dva[i]));
1352 
1353         if (BP_IS_HOLE(bp)) {
1354                 (void) snprintf(blkbuf + strlen(blkbuf),
1355                     buflen - strlen(blkbuf),
1356                     "%llxL B=%llu",
1357                     (u_longlong_t)BP_GET_LSIZE(bp),
1358                     (u_longlong_t)bp->blk_birth);
1359         } else {
1360                 (void) snprintf(blkbuf + strlen(blkbuf),
1361                     buflen - strlen(blkbuf),
1362                     "%llxL/%llxP F=%llu B=%llu/%llu",
1363                     (u_longlong_t)BP_GET_LSIZE(bp),
1364                     (u_longlong_t)BP_GET_PSIZE(bp),
1365                     (u_longlong_t)BP_GET_FILL(bp),
1366                     (u_longlong_t)bp->blk_birth,
1367                     (u_longlong_t)BP_PHYSICAL_BIRTH(bp));
1368         }
1369 }
1370 
1371 static void
1372 print_indirect(blkptr_t *bp, const zbookmark_phys_t *zb,
1373     const dnode_phys_t *dnp)
1374 {
1375         char blkbuf[BP_SPRINTF_LEN];
1376         int l;
1377 
1378         if (!BP_IS_EMBEDDED(bp)) {
1379                 ASSERT3U(BP_GET_TYPE(bp), ==, dnp->dn_type);
1380                 ASSERT3U(BP_GET_LEVEL(bp), ==, zb->zb_level);
1381         }
1382 
1383         (void) printf("%16llx ", (u_longlong_t)blkid2offset(dnp, bp, zb));
1384 
1385         ASSERT(zb->zb_level >= 0);
1386 
1387         for (l = dnp->dn_nlevels - 1; l >= -1; l--) {
1388                 if (l == zb->zb_level) {
1389                         (void) printf("L%llx", (u_longlong_t)zb->zb_level);
1390                 } else {
1391                         (void) printf(" ");
1392                 }
1393         }
1394 
1395         snprintf_blkptr_compact(blkbuf, sizeof (blkbuf), bp);
1396         (void) printf("%s\n", blkbuf);
1397 }
1398 
1399 static int
1400 visit_indirect(spa_t *spa, const dnode_phys_t *dnp,
1401     blkptr_t *bp, const zbookmark_phys_t *zb)
1402 {
1403         int err = 0;
1404 
1405         if (bp->blk_birth == 0)
1406                 return (0);
1407 
1408         print_indirect(bp, zb, dnp);
1409 
1410         if (BP_GET_LEVEL(bp) > 0 && !BP_IS_HOLE(bp)) {
1411                 arc_flags_t flags = ARC_FLAG_WAIT;
1412                 int i;
1413                 blkptr_t *cbp;
1414                 int epb = BP_GET_LSIZE(bp) >> SPA_BLKPTRSHIFT;
1415                 arc_buf_t *buf;
1416                 uint64_t fill = 0;
1417 
1418                 err = arc_read(NULL, spa, bp, arc_getbuf_func, &buf,
1419                     ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL, &flags, zb);
1420                 if (err)
1421                         return (err);
1422                 ASSERT(buf->b_data);
1423 
1424                 /* recursively visit blocks below this */
1425                 cbp = buf->b_data;
1426                 for (i = 0; i < epb; i++, cbp++) {
1427                         zbookmark_phys_t czb;
1428 
1429                         SET_BOOKMARK(&czb, zb->zb_objset, zb->zb_object,
1430                             zb->zb_level - 1,
1431                             zb->zb_blkid * epb + i);
1432                         err = visit_indirect(spa, dnp, cbp, &czb);
1433                         if (err)
1434                                 break;
1435                         fill += BP_GET_FILL(cbp);
1436                 }
1437                 if (!err)
1438                         ASSERT3U(fill, ==, BP_GET_FILL(bp));
1439                 arc_buf_destroy(buf, &buf);
1440         }
1441 
1442         return (err);
1443 }
1444 
1445 /*ARGSUSED*/
1446 static void
1447 dump_indirect(dnode_t *dn)
1448 {
1449         dnode_phys_t *dnp = dn->dn_phys;
1450         int j;
1451         zbookmark_phys_t czb;
1452 
1453         (void) printf("Indirect blocks:\n");
1454 
1455         SET_BOOKMARK(&czb, dmu_objset_id(dn->dn_objset),
1456             dn->dn_object, dnp->dn_nlevels - 1, 0);
1457         for (j = 0; j < dnp->dn_nblkptr; j++) {
1458                 czb.zb_blkid = j;
1459                 (void) visit_indirect(dmu_objset_spa(dn->dn_objset), dnp,
1460                     &dnp->dn_blkptr[j], &czb);
1461         }
1462 
1463         (void) printf("\n");
1464 }
1465 
1466 /*ARGSUSED*/
1467 static void
1468 dump_dsl_dir(objset_t *os, uint64_t object, void *data, size_t size)
1469 {
1470         dsl_dir_phys_t *dd = data;
1471         time_t crtime;
1472         char nice[32];
1473 
1474         /* make sure nicenum has enough space */
1475         CTASSERT(sizeof (nice) >= NN_NUMBUF_SZ);
1476 
1477         if (dd == NULL)
1478                 return;
1479 
1480         ASSERT3U(size, >=, sizeof (dsl_dir_phys_t));
1481 
1482         crtime = dd->dd_creation_time;
1483         (void) printf("\t\tcreation_time = %s", ctime(&crtime));
1484         (void) printf("\t\thead_dataset_obj = %llu\n",
1485             (u_longlong_t)dd->dd_head_dataset_obj);
1486         (void) printf("\t\tparent_dir_obj = %llu\n",
1487             (u_longlong_t)dd->dd_parent_obj);
1488         (void) printf("\t\torigin_obj = %llu\n",
1489             (u_longlong_t)dd->dd_origin_obj);
1490         (void) printf("\t\tchild_dir_zapobj = %llu\n",
1491             (u_longlong_t)dd->dd_child_dir_zapobj);
1492         zdb_nicenum(dd->dd_used_bytes, nice, sizeof (nice));
1493         (void) printf("\t\tused_bytes = %s\n", nice);
1494         zdb_nicenum(dd->dd_compressed_bytes, nice, sizeof (nice));
1495         (void) printf("\t\tcompressed_bytes = %s\n", nice);
1496         zdb_nicenum(dd->dd_uncompressed_bytes, nice, sizeof (nice));
1497         (void) printf("\t\tuncompressed_bytes = %s\n", nice);
1498         zdb_nicenum(dd->dd_quota, nice, sizeof (nice));
1499         (void) printf("\t\tquota = %s\n", nice);
1500         zdb_nicenum(dd->dd_reserved, nice, sizeof (nice));
1501         (void) printf("\t\treserved = %s\n", nice);
1502         (void) printf("\t\tprops_zapobj = %llu\n",
1503             (u_longlong_t)dd->dd_props_zapobj);
1504         (void) printf("\t\tdeleg_zapobj = %llu\n",
1505             (u_longlong_t)dd->dd_deleg_zapobj);
1506         (void) printf("\t\tflags = %llx\n",
1507             (u_longlong_t)dd->dd_flags);
1508 
1509 #define DO(which) \
1510         zdb_nicenum(dd->dd_used_breakdown[DD_USED_ ## which], nice, \
1511             sizeof (nice)); \
1512         (void) printf("\t\tused_breakdown[" #which "] = %s\n", nice)
1513         DO(HEAD);
1514         DO(SNAP);
1515         DO(CHILD);
1516         DO(CHILD_RSRV);
1517         DO(REFRSRV);
1518 #undef DO
1519 }
1520 
1521 /*ARGSUSED*/
1522 static void
1523 dump_dsl_dataset(objset_t *os, uint64_t object, void *data, size_t size)
1524 {
1525         dsl_dataset_phys_t *ds = data;
1526         time_t crtime;
1527         char used[32], compressed[32], uncompressed[32], unique[32];
1528         char blkbuf[BP_SPRINTF_LEN];
1529 
1530         /* make sure nicenum has enough space */
1531         CTASSERT(sizeof (used) >= NN_NUMBUF_SZ);
1532         CTASSERT(sizeof (compressed) >= NN_NUMBUF_SZ);
1533         CTASSERT(sizeof (uncompressed) >= NN_NUMBUF_SZ);
1534         CTASSERT(sizeof (unique) >= NN_NUMBUF_SZ);
1535 
1536         if (ds == NULL)
1537                 return;
1538 
1539         ASSERT(size == sizeof (*ds));
1540         crtime = ds->ds_creation_time;
1541         zdb_nicenum(ds->ds_referenced_bytes, used, sizeof (used));
1542         zdb_nicenum(ds->ds_compressed_bytes, compressed, sizeof (compressed));
1543         zdb_nicenum(ds->ds_uncompressed_bytes, uncompressed,
1544             sizeof (uncompressed));
1545         zdb_nicenum(ds->ds_unique_bytes, unique, sizeof (unique));
1546         snprintf_blkptr(blkbuf, sizeof (blkbuf), &ds->ds_bp);
1547 
1548         (void) printf("\t\tdir_obj = %llu\n",
1549             (u_longlong_t)ds->ds_dir_obj);
1550         (void) printf("\t\tprev_snap_obj = %llu\n",
1551             (u_longlong_t)ds->ds_prev_snap_obj);
1552         (void) printf("\t\tprev_snap_txg = %llu\n",
1553             (u_longlong_t)ds->ds_prev_snap_txg);
1554         (void) printf("\t\tnext_snap_obj = %llu\n",
1555             (u_longlong_t)ds->ds_next_snap_obj);
1556         (void) printf("\t\tsnapnames_zapobj = %llu\n",
1557             (u_longlong_t)ds->ds_snapnames_zapobj);
1558         (void) printf("\t\tnum_children = %llu\n",
1559             (u_longlong_t)ds->ds_num_children);
1560         (void) printf("\t\tuserrefs_obj = %llu\n",
1561             (u_longlong_t)ds->ds_userrefs_obj);
1562         (void) printf("\t\tcreation_time = %s", ctime(&crtime));
1563         (void) printf("\t\tcreation_txg = %llu\n",
1564             (u_longlong_t)ds->ds_creation_txg);
1565         (void) printf("\t\tdeadlist_obj = %llu\n",
1566             (u_longlong_t)ds->ds_deadlist_obj);
1567         (void) printf("\t\tused_bytes = %s\n", used);
1568         (void) printf("\t\tcompressed_bytes = %s\n", compressed);
1569         (void) printf("\t\tuncompressed_bytes = %s\n", uncompressed);
1570         (void) printf("\t\tunique = %s\n", unique);
1571         (void) printf("\t\tfsid_guid = %llu\n",
1572             (u_longlong_t)ds->ds_fsid_guid);
1573         (void) printf("\t\tguid = %llu\n",
1574             (u_longlong_t)ds->ds_guid);
1575         (void) printf("\t\tflags = %llx\n",
1576             (u_longlong_t)ds->ds_flags);
1577         (void) printf("\t\tnext_clones_obj = %llu\n",
1578             (u_longlong_t)ds->ds_next_clones_obj);
1579         (void) printf("\t\tprops_obj = %llu\n",
1580             (u_longlong_t)ds->ds_props_obj);
1581         (void) printf("\t\tbp = %s\n", blkbuf);
1582 }
1583 
1584 /* ARGSUSED */
1585 static int
1586 dump_bptree_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx)
1587 {
1588         char blkbuf[BP_SPRINTF_LEN];
1589 
1590         if (bp->blk_birth != 0) {
1591                 snprintf_blkptr(blkbuf, sizeof (blkbuf), bp);
1592                 (void) printf("\t%s\n", blkbuf);
1593         }
1594         return (0);
1595 }
1596 
1597 static void
1598 dump_bptree(objset_t *os, uint64_t obj, const char *name)
1599 {
1600         char bytes[32];
1601         bptree_phys_t *bt;
1602         dmu_buf_t *db;
1603 
1604         /* make sure nicenum has enough space */
1605         CTASSERT(sizeof (bytes) >= NN_NUMBUF_SZ);
1606 
1607         if (dump_opt['d'] < 3)
1608                 return;
1609 
1610         VERIFY3U(0, ==, dmu_bonus_hold(os, obj, FTAG, &db));
1611         bt = db->db_data;
1612         zdb_nicenum(bt->bt_bytes, bytes, sizeof (bytes));
1613         (void) printf("\n    %s: %llu datasets, %s\n",
1614             name, (unsigned long long)(bt->bt_end - bt->bt_begin), bytes);
1615         dmu_buf_rele(db, FTAG);
1616 
1617         if (dump_opt['d'] < 5)
1618                 return;
1619 
1620         (void) printf("\n");
1621 
1622         (void) bptree_iterate(os, obj, B_FALSE, dump_bptree_cb, NULL, NULL);
1623 }
1624 
1625 /* ARGSUSED */
1626 static int
1627 dump_bpobj_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx)
1628 {
1629         char blkbuf[BP_SPRINTF_LEN];
1630 
1631         ASSERT(bp->blk_birth != 0);
1632         snprintf_blkptr_compact(blkbuf, sizeof (blkbuf), bp);
1633         (void) printf("\t%s\n", blkbuf);
1634         return (0);
1635 }
1636 
1637 static void
1638 dump_full_bpobj(bpobj_t *bpo, const char *name, int indent)
1639 {
1640         char bytes[32];
1641         char comp[32];
1642         char uncomp[32];
1643 
1644         /* make sure nicenum has enough space */
1645         CTASSERT(sizeof (bytes) >= NN_NUMBUF_SZ);
1646         CTASSERT(sizeof (comp) >= NN_NUMBUF_SZ);
1647         CTASSERT(sizeof (uncomp) >= NN_NUMBUF_SZ);
1648 
1649         if (dump_opt['d'] < 3)
1650                 return;
1651 
1652         zdb_nicenum(bpo->bpo_phys->bpo_bytes, bytes, sizeof (bytes));
1653         if (bpo->bpo_havesubobj && bpo->bpo_phys->bpo_subobjs != 0) {
1654                 zdb_nicenum(bpo->bpo_phys->bpo_comp, comp, sizeof (comp));
1655                 zdb_nicenum(bpo->bpo_phys->bpo_uncomp, uncomp, sizeof (uncomp));
1656                 (void) printf("    %*s: object %llu, %llu local blkptrs, "
1657                     "%llu subobjs in object %llu, %s (%s/%s comp)\n",
1658                     indent * 8, name,
1659                     (u_longlong_t)bpo->bpo_object,
1660                     (u_longlong_t)bpo->bpo_phys->bpo_num_blkptrs,
1661                     (u_longlong_t)bpo->bpo_phys->bpo_num_subobjs,
1662                     (u_longlong_t)bpo->bpo_phys->bpo_subobjs,
1663                     bytes, comp, uncomp);
1664 
1665                 for (uint64_t i = 0; i < bpo->bpo_phys->bpo_num_subobjs; i++) {
1666                         uint64_t subobj;
1667                         bpobj_t subbpo;
1668                         int error;
1669                         VERIFY0(dmu_read(bpo->bpo_os,
1670                             bpo->bpo_phys->bpo_subobjs,
1671                             i * sizeof (subobj), sizeof (subobj), &subobj, 0));
1672                         error = bpobj_open(&subbpo, bpo->bpo_os, subobj);
1673                         if (error != 0) {
1674                                 (void) printf("ERROR %u while trying to open "
1675                                     "subobj id %llu\n",
1676                                     error, (u_longlong_t)subobj);
1677                                 continue;
1678                         }
1679                         dump_full_bpobj(&subbpo, "subobj", indent + 1);
1680                         bpobj_close(&subbpo);
1681                 }
1682         } else {
1683                 (void) printf("    %*s: object %llu, %llu blkptrs, %s\n",
1684                     indent * 8, name,
1685                     (u_longlong_t)bpo->bpo_object,
1686                     (u_longlong_t)bpo->bpo_phys->bpo_num_blkptrs,
1687                     bytes);
1688         }
1689 
1690         if (dump_opt['d'] < 5)
1691                 return;
1692 
1693 
1694         if (indent == 0) {
1695                 (void) bpobj_iterate_nofree(bpo, dump_bpobj_cb, NULL, NULL);
1696                 (void) printf("\n");
1697         }
1698 }
1699 
1700 static void
1701 dump_deadlist(dsl_deadlist_t *dl)
1702 {
1703         dsl_deadlist_entry_t *dle;
1704         uint64_t unused;
1705         char bytes[32];
1706         char comp[32];
1707         char uncomp[32];
1708 
1709         /* make sure nicenum has enough space */
1710         CTASSERT(sizeof (bytes) >= NN_NUMBUF_SZ);
1711         CTASSERT(sizeof (comp) >= NN_NUMBUF_SZ);
1712         CTASSERT(sizeof (uncomp) >= NN_NUMBUF_SZ);
1713 
1714         if (dump_opt['d'] < 3)
1715                 return;
1716 
1717         if (dl->dl_oldfmt) {
1718                 dump_full_bpobj(&dl->dl_bpobj, "old-format deadlist", 0);
1719                 return;
1720         }
1721 
1722         zdb_nicenum(dl->dl_phys->dl_used, bytes, sizeof (bytes));
1723         zdb_nicenum(dl->dl_phys->dl_comp, comp, sizeof (comp));
1724         zdb_nicenum(dl->dl_phys->dl_uncomp, uncomp, sizeof (uncomp));
1725         (void) printf("\n    Deadlist: %s (%s/%s comp)\n",
1726             bytes, comp, uncomp);
1727 
1728         if (dump_opt['d'] < 4)
1729                 return;
1730 
1731         (void) printf("\n");
1732 
1733         /* force the tree to be loaded */
1734         dsl_deadlist_space_range(dl, 0, UINT64_MAX, &unused, &unused, &unused);
1735 
1736         for (dle = avl_first(&dl->dl_tree); dle;
1737             dle = AVL_NEXT(&dl->dl_tree, dle)) {
1738                 if (dump_opt['d'] >= 5) {
1739                         char buf[128];
1740                         (void) snprintf(buf, sizeof (buf),
1741                             "mintxg %llu -> obj %llu",
1742                             (longlong_t)dle->dle_mintxg,
1743                             (longlong_t)dle->dle_bpobj.bpo_object);
1744 
1745                         dump_full_bpobj(&dle->dle_bpobj, buf, 0);
1746                 } else {
1747                         (void) printf("mintxg %llu -> obj %llu\n",
1748                             (longlong_t)dle->dle_mintxg,
1749                             (longlong_t)dle->dle_bpobj.bpo_object);
1750 
1751                 }
1752         }
1753 }
1754 
1755 static avl_tree_t idx_tree;
1756 static avl_tree_t domain_tree;
1757 static boolean_t fuid_table_loaded;
1758 static objset_t *sa_os = NULL;
1759 static sa_attr_type_t *sa_attr_table = NULL;
1760 
1761 static int
1762 open_objset(const char *path, dmu_objset_type_t type, void *tag, objset_t **osp)
1763 {
1764         int err;
1765         uint64_t sa_attrs = 0;
1766         uint64_t version = 0;
1767 
1768         VERIFY3P(sa_os, ==, NULL);
1769         err = dmu_objset_own(path, type, B_TRUE, tag, osp);
1770         if (err != 0) {
1771                 (void) fprintf(stderr, "failed to own dataset '%s': %s\n", path,
1772                     strerror(err));
1773                 return (err);
1774         }
1775 
1776         if (dmu_objset_type(*osp) == DMU_OST_ZFS) {
1777                 (void) zap_lookup(*osp, MASTER_NODE_OBJ, ZPL_VERSION_STR,
1778                     8, 1, &version);
1779                 if (version >= ZPL_VERSION_SA) {
1780                         (void) zap_lookup(*osp, MASTER_NODE_OBJ, ZFS_SA_ATTRS,
1781                             8, 1, &sa_attrs);
1782                 }
1783                 err = sa_setup(*osp, sa_attrs, zfs_attr_table, ZPL_END,
1784                     &sa_attr_table);
1785                 if (err != 0) {
1786                         (void) fprintf(stderr, "sa_setup failed: %s\n",
1787                             strerror(err));
1788                         dmu_objset_disown(*osp, tag);
1789                         *osp = NULL;
1790                 }
1791         }
1792         sa_os = *osp;
1793 
1794         return (0);
1795 }
1796 
1797 static void
1798 close_objset(objset_t *os, void *tag)
1799 {
1800         VERIFY3P(os, ==, sa_os);
1801         if (os->os_sa != NULL)
1802                 sa_tear_down(os);
1803         dmu_objset_disown(os, tag);
1804         sa_attr_table = NULL;
1805         sa_os = NULL;
1806 }
1807 
1808 static void
1809 fuid_table_destroy()
1810 {
1811         if (fuid_table_loaded) {
1812                 zfs_fuid_table_destroy(&idx_tree, &domain_tree);
1813                 fuid_table_loaded = B_FALSE;
1814         }
1815 }
1816 
1817 /*
1818  * print uid or gid information.
1819  * For normal POSIX id just the id is printed in decimal format.
1820  * For CIFS files with FUID the fuid is printed in hex followed by
1821  * the domain-rid string.
1822  */
1823 static void
1824 print_idstr(uint64_t id, const char *id_type)
1825 {
1826         if (FUID_INDEX(id)) {
1827                 char *domain;
1828 
1829                 domain = zfs_fuid_idx_domain(&idx_tree, FUID_INDEX(id));
1830                 (void) printf("\t%s     %llx [%s-%d]\n", id_type,
1831                     (u_longlong_t)id, domain, (int)FUID_RID(id));
1832         } else {
1833                 (void) printf("\t%s     %llu\n", id_type, (u_longlong_t)id);
1834         }
1835 
1836 }
1837 
1838 static void
1839 dump_uidgid(objset_t *os, uint64_t uid, uint64_t gid)
1840 {
1841         uint32_t uid_idx, gid_idx;
1842 
1843         uid_idx = FUID_INDEX(uid);
1844         gid_idx = FUID_INDEX(gid);
1845 
1846         /* Load domain table, if not already loaded */
1847         if (!fuid_table_loaded && (uid_idx || gid_idx)) {
1848                 uint64_t fuid_obj;
1849 
1850                 /* first find the fuid object.  It lives in the master node */
1851                 VERIFY(zap_lookup(os, MASTER_NODE_OBJ, ZFS_FUID_TABLES,
1852                     8, 1, &fuid_obj) == 0);
1853                 zfs_fuid_avl_tree_create(&idx_tree, &domain_tree);
1854                 (void) zfs_fuid_table_load(os, fuid_obj,
1855                     &idx_tree, &domain_tree);
1856                 fuid_table_loaded = B_TRUE;
1857         }
1858 
1859         print_idstr(uid, "uid");
1860         print_idstr(gid, "gid");
1861 }
1862 
1863 /*ARGSUSED*/
1864 static void
1865 dump_znode(objset_t *os, uint64_t object, void *data, size_t size)
1866 {
1867         char path[MAXPATHLEN * 2];      /* allow for xattr and failure prefix */
1868         sa_handle_t *hdl;
1869         uint64_t xattr, rdev, gen;
1870         uint64_t uid, gid, mode, fsize, parent, links;
1871         uint64_t pflags;
1872         uint64_t acctm[2], modtm[2], chgtm[2], crtm[2];
1873         time_t z_crtime, z_atime, z_mtime, z_ctime;
1874         sa_bulk_attr_t bulk[12];
1875         int idx = 0;
1876         int error;
1877 
1878         VERIFY3P(os, ==, sa_os);
1879         if (sa_handle_get(os, object, NULL, SA_HDL_PRIVATE, &hdl)) {
1880                 (void) printf("Failed to get handle for SA znode\n");
1881                 return;
1882         }
1883 
1884         SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_UID], NULL, &uid, 8);
1885         SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_GID], NULL, &gid, 8);
1886         SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_LINKS], NULL,
1887             &links, 8);
1888         SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_GEN], NULL, &gen, 8);
1889         SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_MODE], NULL,
1890             &mode, 8);
1891         SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_PARENT],
1892             NULL, &parent, 8);
1893         SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_SIZE], NULL,
1894             &fsize, 8);
1895         SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_ATIME], NULL,
1896             acctm, 16);
1897         SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_MTIME], NULL,
1898             modtm, 16);
1899         SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_CRTIME], NULL,
1900             crtm, 16);
1901         SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_CTIME], NULL,
1902             chgtm, 16);
1903         SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_FLAGS], NULL,
1904             &pflags, 8);
1905 
1906         if (sa_bulk_lookup(hdl, bulk, idx)) {
1907                 (void) sa_handle_destroy(hdl);
1908                 return;
1909         }
1910 
1911         z_crtime = (time_t)crtm[0];
1912         z_atime = (time_t)acctm[0];
1913         z_mtime = (time_t)modtm[0];
1914         z_ctime = (time_t)chgtm[0];
1915 
1916         if (dump_opt['d'] > 4) {
1917                 error = zfs_obj_to_path(os, object, path, sizeof (path));
1918                 if (error != 0) {
1919                         (void) snprintf(path, sizeof (path),
1920                             "\?\?\?<object#%llu>", (u_longlong_t)object);
1921                 }
1922                 (void) printf("\tpath   %s\n", path);
1923         }
1924         dump_uidgid(os, uid, gid);
1925         (void) printf("\tatime  %s", ctime(&z_atime));
1926         (void) printf("\tmtime  %s", ctime(&z_mtime));
1927         (void) printf("\tctime  %s", ctime(&z_ctime));
1928         (void) printf("\tcrtime %s", ctime(&z_crtime));
1929         (void) printf("\tgen    %llu\n", (u_longlong_t)gen);
1930         (void) printf("\tmode   %llo\n", (u_longlong_t)mode);
1931         (void) printf("\tsize   %llu\n", (u_longlong_t)fsize);
1932         (void) printf("\tparent %llu\n", (u_longlong_t)parent);
1933         (void) printf("\tlinks  %llu\n", (u_longlong_t)links);
1934         (void) printf("\tpflags %llx\n", (u_longlong_t)pflags);
1935         if (sa_lookup(hdl, sa_attr_table[ZPL_XATTR], &xattr,
1936             sizeof (uint64_t)) == 0)
1937                 (void) printf("\txattr  %llu\n", (u_longlong_t)xattr);
1938         if (sa_lookup(hdl, sa_attr_table[ZPL_RDEV], &rdev,
1939             sizeof (uint64_t)) == 0)
1940                 (void) printf("\trdev   0x%016llx\n", (u_longlong_t)rdev);
1941         sa_handle_destroy(hdl);
1942 }
1943 
1944 /*ARGSUSED*/
1945 static void
1946 dump_acl(objset_t *os, uint64_t object, void *data, size_t size)
1947 {
1948 }
1949 
1950 /*ARGSUSED*/
1951 static void
1952 dump_dmu_objset(objset_t *os, uint64_t object, void *data, size_t size)
1953 {
1954 }
1955 
1956 static object_viewer_t *object_viewer[DMU_OT_NUMTYPES + 1] = {
1957         dump_none,              /* unallocated                  */
1958         dump_zap,               /* object directory             */
1959         dump_uint64,            /* object array                 */
1960         dump_none,              /* packed nvlist                */
1961         dump_packed_nvlist,     /* packed nvlist size           */
1962         dump_none,              /* bpobj                        */
1963         dump_bpobj,             /* bpobj header                 */
1964         dump_none,              /* SPA space map header         */
1965         dump_none,              /* SPA space map                */
1966         dump_none,              /* ZIL intent log               */
1967         dump_dnode,             /* DMU dnode                    */
1968         dump_dmu_objset,        /* DMU objset                   */
1969         dump_dsl_dir,           /* DSL directory                */
1970         dump_zap,               /* DSL directory child map      */
1971         dump_zap,               /* DSL dataset snap map         */
1972         dump_zap,               /* DSL props                    */
1973         dump_dsl_dataset,       /* DSL dataset                  */
1974         dump_znode,             /* ZFS znode                    */
1975         dump_acl,               /* ZFS V0 ACL                   */
1976         dump_uint8,             /* ZFS plain file               */
1977         dump_zpldir,            /* ZFS directory                */
1978         dump_zap,               /* ZFS master node              */
1979         dump_zap,               /* ZFS delete queue             */
1980         dump_uint8,             /* zvol object                  */
1981         dump_zap,               /* zvol prop                    */
1982         dump_uint8,             /* other uint8[]                */
1983         dump_uint64,            /* other uint64[]               */
1984         dump_zap,               /* other ZAP                    */
1985         dump_zap,               /* persistent error log         */
1986         dump_uint8,             /* SPA history                  */
1987         dump_history_offsets,   /* SPA history offsets          */
1988         dump_zap,               /* Pool properties              */
1989         dump_zap,               /* DSL permissions              */
1990         dump_acl,               /* ZFS ACL                      */
1991         dump_uint8,             /* ZFS SYSACL                   */
1992         dump_none,              /* FUID nvlist                  */
1993         dump_packed_nvlist,     /* FUID nvlist size             */
1994         dump_zap,               /* DSL dataset next clones      */
1995         dump_zap,               /* DSL scrub queue              */
1996         dump_zap,               /* ZFS user/group used          */
1997         dump_zap,               /* ZFS user/group quota         */
1998         dump_zap,               /* snapshot refcount tags       */
1999         dump_ddt_zap,           /* DDT ZAP object               */
2000         dump_zap,               /* DDT statistics               */
2001         dump_znode,             /* SA object                    */
2002         dump_zap,               /* SA Master Node               */
2003         dump_sa_attrs,          /* SA attribute registration    */
2004         dump_sa_layouts,        /* SA attribute layouts         */
2005         dump_zap,               /* DSL scrub translations       */
2006         dump_none,              /* fake dedup BP                */
2007         dump_zap,               /* deadlist                     */
2008         dump_none,              /* deadlist hdr                 */
2009         dump_zap,               /* dsl clones                   */
2010         dump_bpobj_subobjs,     /* bpobj subobjs                */
2011         dump_unknown,           /* Unknown type, must be last   */
2012 };
2013 
2014 static void
2015 dump_object(objset_t *os, uint64_t object, int verbosity, int *print_header)
2016 {
2017         dmu_buf_t *db = NULL;
2018         dmu_object_info_t doi;
2019         dnode_t *dn;
2020         void *bonus = NULL;
2021         size_t bsize = 0;
2022         char iblk[32], dblk[32], lsize[32], asize[32], fill[32];
2023         char bonus_size[32];
2024         char aux[50];
2025         int error;
2026 
2027         /* make sure nicenum has enough space */
2028         CTASSERT(sizeof (iblk) >= NN_NUMBUF_SZ);
2029         CTASSERT(sizeof (dblk) >= NN_NUMBUF_SZ);
2030         CTASSERT(sizeof (lsize) >= NN_NUMBUF_SZ);
2031         CTASSERT(sizeof (asize) >= NN_NUMBUF_SZ);
2032         CTASSERT(sizeof (bonus_size) >= NN_NUMBUF_SZ);
2033 
2034         if (*print_header) {
2035                 (void) printf("\n%10s  %3s  %5s  %5s  %5s  %5s  %6s  %s\n",
2036                     "Object", "lvl", "iblk", "dblk", "dsize", "lsize",
2037                     "%full", "type");
2038                 *print_header = 0;
2039         }
2040 
2041         if (object == 0) {
2042                 dn = DMU_META_DNODE(os);
2043         } else {
2044                 error = dmu_bonus_hold(os, object, FTAG, &db);
2045                 if (error)
2046                         fatal("dmu_bonus_hold(%llu) failed, errno %u",
2047                             object, error);
2048                 bonus = db->db_data;
2049                 bsize = db->db_size;
2050                 dn = DB_DNODE((dmu_buf_impl_t *)db);
2051         }
2052         dmu_object_info_from_dnode(dn, &doi);
2053 
2054         zdb_nicenum(doi.doi_metadata_block_size, iblk, sizeof (iblk));
2055         zdb_nicenum(doi.doi_data_block_size, dblk, sizeof (dblk));
2056         zdb_nicenum(doi.doi_max_offset, lsize, sizeof (lsize));
2057         zdb_nicenum(doi.doi_physical_blocks_512 << 9, asize, sizeof (asize));
2058         zdb_nicenum(doi.doi_bonus_size, bonus_size, sizeof (bonus_size));
2059         (void) sprintf(fill, "%6.2f", 100.0 * doi.doi_fill_count *
2060             doi.doi_data_block_size / (object == 0 ? DNODES_PER_BLOCK : 1) /
2061             doi.doi_max_offset);
2062 
2063         aux[0] = '\0';
2064 
2065         if (doi.doi_checksum != ZIO_CHECKSUM_INHERIT || verbosity >= 6) {
2066                 (void) snprintf(aux + strlen(aux), sizeof (aux), " (K=%s)",
2067                     ZDB_CHECKSUM_NAME(doi.doi_checksum));
2068         }
2069 
2070         if (doi.doi_compress != ZIO_COMPRESS_INHERIT || verbosity >= 6) {
2071                 (void) snprintf(aux + strlen(aux), sizeof (aux), " (Z=%s)",
2072                     ZDB_COMPRESS_NAME(doi.doi_compress));
2073         }
2074 
2075         (void) printf("%10lld  %3u  %5s  %5s  %5s  %5s  %6s  %s%s\n",
2076             (u_longlong_t)object, doi.doi_indirection, iblk, dblk,
2077             asize, lsize, fill, ZDB_OT_NAME(doi.doi_type), aux);
2078 
2079         if (doi.doi_bonus_type != DMU_OT_NONE && verbosity > 3) {
2080                 (void) printf("%10s  %3s  %5s  %5s  %5s  %5s  %6s  %s\n",
2081                     "", "", "", "", "", bonus_size, "bonus",
2082                     ZDB_OT_NAME(doi.doi_bonus_type));
2083         }
2084 
2085         if (verbosity >= 4) {
2086                 (void) printf("\tdnode flags: %s%s%s\n",
2087                     (dn->dn_phys->dn_flags & DNODE_FLAG_USED_BYTES) ?
2088                     "USED_BYTES " : "",
2089                     (dn->dn_phys->dn_flags & DNODE_FLAG_USERUSED_ACCOUNTED) ?
2090                     "USERUSED_ACCOUNTED " : "",
2091                     (dn->dn_phys->dn_flags & DNODE_FLAG_SPILL_BLKPTR) ?
2092                     "SPILL_BLKPTR" : "");
2093                 (void) printf("\tdnode maxblkid: %llu\n",
2094                     (longlong_t)dn->dn_phys->dn_maxblkid);
2095 
2096                 object_viewer[ZDB_OT_TYPE(doi.doi_bonus_type)](os, object,
2097                     bonus, bsize);
2098                 object_viewer[ZDB_OT_TYPE(doi.doi_type)](os, object, NULL, 0);
2099                 *print_header = 1;
2100         }
2101 
2102         if (verbosity >= 5)
2103                 dump_indirect(dn);
2104 
2105         if (verbosity >= 5) {
2106                 /*
2107                  * Report the list of segments that comprise the object.
2108                  */
2109                 uint64_t start = 0;
2110                 uint64_t end;
2111                 uint64_t blkfill = 1;
2112                 int minlvl = 1;
2113 
2114                 if (dn->dn_type == DMU_OT_DNODE) {
2115                         minlvl = 0;
2116                         blkfill = DNODES_PER_BLOCK;
2117                 }
2118 
2119                 for (;;) {
2120                         char segsize[32];
2121                         /* make sure nicenum has enough space */
2122                         CTASSERT(sizeof (segsize) >= NN_NUMBUF_SZ);
2123                         error = dnode_next_offset(dn,
2124                             0, &start, minlvl, blkfill, 0);
2125                         if (error)
2126                                 break;
2127                         end = start;
2128                         error = dnode_next_offset(dn,
2129                             DNODE_FIND_HOLE, &end, minlvl, blkfill, 0);
2130                         zdb_nicenum(end - start, segsize, sizeof (segsize));
2131                         (void) printf("\t\tsegment [%016llx, %016llx)"
2132                             " size %5s\n", (u_longlong_t)start,
2133                             (u_longlong_t)end, segsize);
2134                         if (error)
2135                                 break;
2136                         start = end;
2137                 }
2138         }
2139 
2140         if (db != NULL)
2141                 dmu_buf_rele(db, FTAG);
2142 }
2143 
2144 static const char *objset_types[DMU_OST_NUMTYPES] = {
2145         "NONE", "META", "ZPL", "ZVOL", "OTHER", "ANY" };
2146 
2147 static void
2148 dump_dir(objset_t *os)
2149 {
2150         dmu_objset_stats_t dds;
2151         uint64_t object, object_count;
2152         uint64_t refdbytes, usedobjs, scratch;
2153         char numbuf[32];
2154         char blkbuf[BP_SPRINTF_LEN + 20];
2155         char osname[ZFS_MAX_DATASET_NAME_LEN];
2156         const char *type = "UNKNOWN";
2157         int verbosity = dump_opt['d'];
2158         int print_header = 1;
2159         unsigned i;
2160         int error;
2161 
2162         /* make sure nicenum has enough space */
2163         CTASSERT(sizeof (numbuf) >= NN_NUMBUF_SZ);
2164 
2165         dsl_pool_config_enter(dmu_objset_pool(os), FTAG);
2166         dmu_objset_fast_stat(os, &dds);
2167         dsl_pool_config_exit(dmu_objset_pool(os), FTAG);
2168 
2169         if (dds.dds_type < DMU_OST_NUMTYPES)
2170                 type = objset_types[dds.dds_type];
2171 
2172         if (dds.dds_type == DMU_OST_META) {
2173                 dds.dds_creation_txg = TXG_INITIAL;
2174                 usedobjs = BP_GET_FILL(os->os_rootbp);
2175                 refdbytes = dsl_dir_phys(os->os_spa->spa_dsl_pool->dp_mos_dir)->
2176                     dd_used_bytes;
2177         } else {
2178                 dmu_objset_space(os, &refdbytes, &scratch, &usedobjs, &scratch);
2179         }
2180 
2181         ASSERT3U(usedobjs, ==, BP_GET_FILL(os->os_rootbp));
2182 
2183         zdb_nicenum(refdbytes, numbuf, sizeof (numbuf));
2184 
2185         if (verbosity >= 4) {
2186                 (void) snprintf(blkbuf, sizeof (blkbuf), ", rootbp ");
2187                 (void) snprintf_blkptr(blkbuf + strlen(blkbuf),
2188                     sizeof (blkbuf) - strlen(blkbuf), os->os_rootbp);
2189         } else {
2190                 blkbuf[0] = '\0';
2191         }
2192 
2193         dmu_objset_name(os, osname);
2194 
2195         (void) printf("Dataset %s [%s], ID %llu, cr_txg %llu, "
2196             "%s, %llu objects%s\n",
2197             osname, type, (u_longlong_t)dmu_objset_id(os),
2198             (u_longlong_t)dds.dds_creation_txg,
2199             numbuf, (u_longlong_t)usedobjs, blkbuf);
2200 
2201         if (zopt_objects != 0) {
2202                 for (i = 0; i < zopt_objects; i++)
2203                         dump_object(os, zopt_object[i], verbosity,
2204                             &print_header);
2205                 (void) printf("\n");
2206                 return;
2207         }
2208 
2209         if (dump_opt['i'] != 0 || verbosity >= 2)
2210                 dump_intent_log(dmu_objset_zil(os));
2211 
2212         if (dmu_objset_ds(os) != NULL) {
2213                 dsl_dataset_t *ds = dmu_objset_ds(os);
2214                 dump_deadlist(&ds->ds_deadlist);
2215 
2216                 if (dsl_dataset_remap_deadlist_exists(ds)) {
2217                         (void) printf("ds_remap_deadlist:\n");
2218                         dump_deadlist(&ds->ds_remap_deadlist);
2219                 }
2220         }
2221 
2222         if (verbosity < 2)
2223                 return;
2224 
2225         if (BP_IS_HOLE(os->os_rootbp))
2226                 return;
2227 
2228         dump_object(os, 0, verbosity, &print_header);
2229         object_count = 0;
2230         if (DMU_USERUSED_DNODE(os) != NULL &&
2231             DMU_USERUSED_DNODE(os)->dn_type != 0) {
2232                 dump_object(os, DMU_USERUSED_OBJECT, verbosity, &print_header);
2233                 dump_object(os, DMU_GROUPUSED_OBJECT, verbosity, &print_header);
2234         }
2235 
2236         object = 0;
2237         while ((error = dmu_object_next(os, &object, B_FALSE, 0)) == 0) {
2238                 dump_object(os, object, verbosity, &print_header);
2239                 object_count++;
2240         }
2241 
2242         ASSERT3U(object_count, ==, usedobjs);
2243 
2244         (void) printf("\n");
2245 
2246         if (error != ESRCH) {
2247                 (void) fprintf(stderr, "dmu_object_next() = %d\n", error);
2248                 abort();
2249         }
2250 }
2251 
2252 static void
2253 dump_uberblock(uberblock_t *ub, const char *header, const char *footer)
2254 {
2255         time_t timestamp = ub->ub_timestamp;
2256 
2257         (void) printf("%s", header ? header : "");
2258         (void) printf("\tmagic = %016llx\n", (u_longlong_t)ub->ub_magic);
2259         (void) printf("\tversion = %llu\n", (u_longlong_t)ub->ub_version);
2260         (void) printf("\ttxg = %llu\n", (u_longlong_t)ub->ub_txg);
2261         (void) printf("\tguid_sum = %llu\n", (u_longlong_t)ub->ub_guid_sum);
2262         (void) printf("\ttimestamp = %llu UTC = %s",
2263             (u_longlong_t)ub->ub_timestamp, asctime(localtime(&timestamp)));
2264         if (dump_opt['u'] >= 3) {
2265                 char blkbuf[BP_SPRINTF_LEN];
2266                 snprintf_blkptr(blkbuf, sizeof (blkbuf), &ub->ub_rootbp);
2267                 (void) printf("\trootbp = %s\n", blkbuf);
2268         }
2269         (void) printf("%s", footer ? footer : "");
2270 }
2271 
2272 static void
2273 dump_config(spa_t *spa)
2274 {
2275         dmu_buf_t *db;
2276         size_t nvsize = 0;
2277         int error = 0;
2278 
2279 
2280         error = dmu_bonus_hold(spa->spa_meta_objset,
2281             spa->spa_config_object, FTAG, &db);
2282 
2283         if (error == 0) {
2284                 nvsize = *(uint64_t *)db->db_data;
2285                 dmu_buf_rele(db, FTAG);
2286 
2287                 (void) printf("\nMOS Configuration:\n");
2288                 dump_packed_nvlist(spa->spa_meta_objset,
2289                     spa->spa_config_object, (void *)&nvsize, 1);
2290         } else {
2291                 (void) fprintf(stderr, "dmu_bonus_hold(%llu) failed, errno %d",
2292                     (u_longlong_t)spa->spa_config_object, error);
2293         }
2294 }
2295 
2296 static void
2297 dump_cachefile(const char *cachefile)
2298 {
2299         int fd;
2300         struct stat64 statbuf;
2301         char *buf;
2302         nvlist_t *config;
2303 
2304         if ((fd = open64(cachefile, O_RDONLY)) < 0) {
2305                 (void) printf("cannot open '%s': %s\n", cachefile,
2306                     strerror(errno));
2307                 exit(1);
2308         }
2309 
2310         if (fstat64(fd, &statbuf) != 0) {
2311                 (void) printf("failed to stat '%s': %s\n", cachefile,
2312                     strerror(errno));
2313                 exit(1);
2314         }
2315 
2316         if ((buf = malloc(statbuf.st_size)) == NULL) {
2317                 (void) fprintf(stderr, "failed to allocate %llu bytes\n",
2318                     (u_longlong_t)statbuf.st_size);
2319                 exit(1);
2320         }
2321 
2322         if (read(fd, buf, statbuf.st_size) != statbuf.st_size) {
2323                 (void) fprintf(stderr, "failed to read %llu bytes\n",
2324                     (u_longlong_t)statbuf.st_size);
2325                 exit(1);
2326         }
2327 
2328         (void) close(fd);
2329 
2330         if (nvlist_unpack(buf, statbuf.st_size, &config, 0) != 0) {
2331                 (void) fprintf(stderr, "failed to unpack nvlist\n");
2332                 exit(1);
2333         }
2334 
2335         free(buf);
2336 
2337         dump_nvlist(config, 0);
2338 
2339         nvlist_free(config);
2340 }
2341 
2342 #define ZDB_MAX_UB_HEADER_SIZE 32
2343 
2344 static void
2345 dump_label_uberblocks(vdev_label_t *lbl, uint64_t ashift)
2346 {
2347         vdev_t vd;
2348         vdev_t *vdp = &vd;
2349         char header[ZDB_MAX_UB_HEADER_SIZE];
2350 
2351         vd.vdev_ashift = ashift;
2352         vdp->vdev_top = vdp;
2353 
2354         for (int i = 0; i < VDEV_UBERBLOCK_COUNT(vdp); i++) {
2355                 uint64_t uoff = VDEV_UBERBLOCK_OFFSET(vdp, i);
2356                 uberblock_t *ub = (void *)((char *)lbl + uoff);
2357 
2358                 if (uberblock_verify(ub))
2359                         continue;
2360                 (void) snprintf(header, ZDB_MAX_UB_HEADER_SIZE,
2361                     "Uberblock[%d]\n", i);
2362                 dump_uberblock(ub, header, "");
2363         }
2364 }
2365 
2366 static char curpath[PATH_MAX];
2367 
2368 /*
2369  * Iterate through the path components, recursively passing
2370  * current one's obj and remaining path until we find the obj
2371  * for the last one.
2372  */
2373 static int
2374 dump_path_impl(objset_t *os, uint64_t obj, char *name)
2375 {
2376         int err;
2377         int header = 1;
2378         uint64_t child_obj;
2379         char *s;
2380         dmu_buf_t *db;
2381         dmu_object_info_t doi;
2382 
2383         if ((s = strchr(name, '/')) != NULL)
2384                 *s = '\0';
2385         err = zap_lookup(os, obj, name, 8, 1, &child_obj);
2386 
2387         (void) strlcat(curpath, name, sizeof (curpath));
2388 
2389         if (err != 0) {
2390                 (void) fprintf(stderr, "failed to lookup %s: %s\n",
2391                     curpath, strerror(err));
2392                 return (err);
2393         }
2394 
2395         child_obj = ZFS_DIRENT_OBJ(child_obj);
2396         err = sa_buf_hold(os, child_obj, FTAG, &db);
2397         if (err != 0) {
2398                 (void) fprintf(stderr,
2399                     "failed to get SA dbuf for obj %llu: %s\n",
2400                     (u_longlong_t)child_obj, strerror(err));
2401                 return (EINVAL);
2402         }
2403         dmu_object_info_from_db(db, &doi);
2404         sa_buf_rele(db, FTAG);
2405 
2406         if (doi.doi_bonus_type != DMU_OT_SA &&
2407             doi.doi_bonus_type != DMU_OT_ZNODE) {
2408                 (void) fprintf(stderr, "invalid bonus type %d for obj %llu\n",
2409                     doi.doi_bonus_type, (u_longlong_t)child_obj);
2410                 return (EINVAL);
2411         }
2412 
2413         if (dump_opt['v'] > 6) {
2414                 (void) printf("obj=%llu %s type=%d bonustype=%d\n",
2415                     (u_longlong_t)child_obj, curpath, doi.doi_type,
2416                     doi.doi_bonus_type);
2417         }
2418 
2419         (void) strlcat(curpath, "/", sizeof (curpath));
2420 
2421         switch (doi.doi_type) {
2422         case DMU_OT_DIRECTORY_CONTENTS:
2423                 if (s != NULL && *(s + 1) != '\0')
2424                         return (dump_path_impl(os, child_obj, s + 1));
2425                 /*FALLTHROUGH*/
2426         case DMU_OT_PLAIN_FILE_CONTENTS:
2427                 dump_object(os, child_obj, dump_opt['v'], &header);
2428                 return (0);
2429         default:
2430                 (void) fprintf(stderr, "object %llu has non-file/directory "
2431                     "type %d\n", (u_longlong_t)obj, doi.doi_type);
2432                 break;
2433         }
2434 
2435         return (EINVAL);
2436 }
2437 
2438 /*
2439  * Dump the blocks for the object specified by path inside the dataset.
2440  */
2441 static int
2442 dump_path(char *ds, char *path)
2443 {
2444         int err;
2445         objset_t *os;
2446         uint64_t root_obj;
2447 
2448         err = open_objset(ds, DMU_OST_ZFS, FTAG, &os);
2449         if (err != 0)
2450                 return (err);
2451 
2452         err = zap_lookup(os, MASTER_NODE_OBJ, ZFS_ROOT_OBJ, 8, 1, &root_obj);
2453         if (err != 0) {
2454                 (void) fprintf(stderr, "can't lookup root znode: %s\n",
2455                     strerror(err));
2456                 dmu_objset_disown(os, FTAG);
2457                 return (EINVAL);
2458         }
2459 
2460         (void) snprintf(curpath, sizeof (curpath), "dataset=%s path=/", ds);
2461 
2462         err = dump_path_impl(os, root_obj, path);
2463 
2464         close_objset(os, FTAG);
2465         return (err);
2466 }
2467 
2468 static int
2469 dump_label(const char *dev)
2470 {
2471         int fd;
2472         vdev_label_t label;
2473         char path[MAXPATHLEN];
2474         char *buf = label.vl_vdev_phys.vp_nvlist;
2475         size_t buflen = sizeof (label.vl_vdev_phys.vp_nvlist);
2476         struct stat64 statbuf;
2477         uint64_t psize, ashift;
2478         boolean_t label_found = B_FALSE;
2479 
2480         (void) strlcpy(path, dev, sizeof (path));
2481         if (dev[0] == '/') {
2482                 if (strncmp(dev, ZFS_DISK_ROOTD,
2483                     strlen(ZFS_DISK_ROOTD)) == 0) {
2484                         (void) snprintf(path, sizeof (path), "%s%s",
2485                             ZFS_RDISK_ROOTD, dev + strlen(ZFS_DISK_ROOTD));
2486                 }
2487         } else if (stat64(path, &statbuf) != 0) {
2488                 char *s;
2489 
2490                 (void) snprintf(path, sizeof (path), "%s%s", ZFS_RDISK_ROOTD,
2491                     dev);
2492                 if (((s = strrchr(dev, 's')) == NULL &&
2493                     (s = strchr(dev, 'p')) == NULL) ||
2494                     !isdigit(*(s + 1)))
2495                         (void) strlcat(path, "s0", sizeof (path));
2496         }
2497 
2498         if ((fd = open64(path, O_RDONLY)) < 0) {
2499                 (void) fprintf(stderr, "cannot open '%s': %s\n", path,
2500                     strerror(errno));
2501                 exit(1);
2502         }
2503 
2504         if (fstat64(fd, &statbuf) != 0) {
2505                 (void) fprintf(stderr, "failed to stat '%s': %s\n", path,
2506                     strerror(errno));
2507                 (void) close(fd);
2508                 exit(1);
2509         }
2510 
2511         if (S_ISBLK(statbuf.st_mode)) {
2512                 (void) fprintf(stderr,
2513                     "cannot use '%s': character device required\n", path);
2514                 (void) close(fd);
2515                 exit(1);
2516         }
2517 
2518         psize = statbuf.st_size;
2519         psize = P2ALIGN(psize, (uint64_t)sizeof (vdev_label_t));
2520 
2521         for (int l = 0; l < VDEV_LABELS; l++) {
2522                 nvlist_t *config = NULL;
2523 
2524                 if (!dump_opt['q']) {
2525                         (void) printf("------------------------------------\n");
2526                         (void) printf("LABEL %d\n", l);
2527                         (void) printf("------------------------------------\n");
2528                 }
2529 
2530                 if (pread64(fd, &label, sizeof (label),
2531                     vdev_label_offset(psize, l, 0)) != sizeof (label)) {
2532                         if (!dump_opt['q'])
2533                                 (void) printf("failed to read label %d\n", l);
2534                         continue;
2535                 }
2536 
2537                 if (nvlist_unpack(buf, buflen, &config, 0) != 0) {
2538                         if (!dump_opt['q'])
2539                                 (void) printf("failed to unpack label %d\n", l);
2540                         ashift = SPA_MINBLOCKSHIFT;
2541                 } else {
2542                         nvlist_t *vdev_tree = NULL;
2543 
2544                         if (!dump_opt['q'])
2545                                 dump_nvlist(config, 4);
2546                         if ((nvlist_lookup_nvlist(config,
2547                             ZPOOL_CONFIG_VDEV_TREE, &vdev_tree) != 0) ||
2548                             (nvlist_lookup_uint64(vdev_tree,
2549                             ZPOOL_CONFIG_ASHIFT, &ashift) != 0))
2550                                 ashift = SPA_MINBLOCKSHIFT;
2551                         nvlist_free(config);
2552                         label_found = B_TRUE;
2553                 }
2554                 if (dump_opt['u'])
2555                         dump_label_uberblocks(&label, ashift);
2556         }
2557 
2558         (void) close(fd);
2559 
2560         return (label_found ? 0 : 2);
2561 }
2562 
2563 static uint64_t dataset_feature_count[SPA_FEATURES];
2564 static uint64_t remap_deadlist_count = 0;
2565 
2566 /*ARGSUSED*/
2567 static int
2568 dump_one_dir(const char *dsname, void *arg)
2569 {
2570         int error;
2571         objset_t *os;
2572 
2573         error = open_objset(dsname, DMU_OST_ANY, FTAG, &os);
2574         if (error != 0)
2575                 return (0);
2576 
2577         for (spa_feature_t f = 0; f < SPA_FEATURES; f++) {
2578                 if (!dmu_objset_ds(os)->ds_feature_inuse[f])
2579                         continue;
2580                 ASSERT(spa_feature_table[f].fi_flags &
2581                     ZFEATURE_FLAG_PER_DATASET);
2582                 dataset_feature_count[f]++;
2583         }
2584 
2585         if (dsl_dataset_remap_deadlist_exists(dmu_objset_ds(os))) {
2586                 remap_deadlist_count++;
2587         }
2588 
2589         dump_dir(os);
2590         close_objset(os, FTAG);
2591         fuid_table_destroy();
2592         return (0);
2593 }
2594 
2595 /*
2596  * Block statistics.
2597  */
2598 #define PSIZE_HISTO_SIZE (SPA_OLD_MAXBLOCKSIZE / SPA_MINBLOCKSIZE + 2)
2599 typedef struct zdb_blkstats {
2600         uint64_t zb_asize;
2601         uint64_t zb_lsize;
2602         uint64_t zb_psize;
2603         uint64_t zb_count;
2604         uint64_t zb_gangs;
2605         uint64_t zb_ditto_samevdev;
2606         uint64_t zb_psize_histogram[PSIZE_HISTO_SIZE];
2607 } zdb_blkstats_t;
2608 
2609 /*
2610  * Extended object types to report deferred frees and dedup auto-ditto blocks.
2611  */
2612 #define ZDB_OT_DEFERRED (DMU_OT_NUMTYPES + 0)
2613 #define ZDB_OT_DITTO    (DMU_OT_NUMTYPES + 1)
2614 #define ZDB_OT_OTHER    (DMU_OT_NUMTYPES + 2)
2615 #define ZDB_OT_TOTAL    (DMU_OT_NUMTYPES + 3)
2616 
2617 static const char *zdb_ot_extname[] = {
2618         "deferred free",
2619         "dedup ditto",
2620         "other",
2621         "Total",
2622 };
2623 
2624 #define ZB_TOTAL        DN_MAX_LEVELS
2625 
2626 typedef struct zdb_cb {
2627         zdb_blkstats_t  zcb_type[ZB_TOTAL + 1][ZDB_OT_TOTAL + 1];
2628         uint64_t        zcb_removing_size;
2629         uint64_t        zcb_dedup_asize;
2630         uint64_t        zcb_dedup_blocks;
2631         uint64_t        zcb_embedded_blocks[NUM_BP_EMBEDDED_TYPES];
2632         uint64_t        zcb_embedded_histogram[NUM_BP_EMBEDDED_TYPES]
2633             [BPE_PAYLOAD_SIZE];
2634         uint64_t        zcb_start;
2635         hrtime_t        zcb_lastprint;
2636         uint64_t        zcb_totalasize;
2637         uint64_t        zcb_errors[256];
2638         int             zcb_readfails;
2639         int             zcb_haderrors;
2640         spa_t           *zcb_spa;
2641         uint32_t        **zcb_vd_obsolete_counts;
2642 } zdb_cb_t;
2643 
2644 static void
2645 zdb_count_block(zdb_cb_t *zcb, zilog_t *zilog, const blkptr_t *bp,
2646     dmu_object_type_t type)
2647 {
2648         uint64_t refcnt = 0;
2649 
2650         ASSERT(type < ZDB_OT_TOTAL);
2651 
2652         if (zilog && zil_bp_tree_add(zilog, bp) != 0)
2653                 return;
2654 
2655         for (int i = 0; i < 4; i++) {
2656                 int l = (i < 2) ? BP_GET_LEVEL(bp) : ZB_TOTAL;
2657                 int t = (i & 1) ? type : ZDB_OT_TOTAL;
2658                 int equal;
2659                 zdb_blkstats_t *zb = &zcb->zcb_type[l][t];
2660 
2661                 zb->zb_asize += BP_GET_ASIZE(bp);
2662                 zb->zb_lsize += BP_GET_LSIZE(bp);
2663                 zb->zb_psize += BP_GET_PSIZE(bp);
2664                 zb->zb_count++;
2665 
2666                 /*
2667                  * The histogram is only big enough to record blocks up to
2668                  * SPA_OLD_MAXBLOCKSIZE; larger blocks go into the last,
2669                  * "other", bucket.
2670                  */
2671                 unsigned idx = BP_GET_PSIZE(bp) >> SPA_MINBLOCKSHIFT;
2672                 idx = MIN(idx, SPA_OLD_MAXBLOCKSIZE / SPA_MINBLOCKSIZE + 1);
2673                 zb->zb_psize_histogram[idx]++;
2674 
2675                 zb->zb_gangs += BP_COUNT_GANG(bp);
2676 
2677                 switch (BP_GET_NDVAS(bp)) {
2678                 case 2:
2679                         if (DVA_GET_VDEV(&bp->blk_dva[0]) ==
2680                             DVA_GET_VDEV(&bp->blk_dva[1]))
2681                                 zb->zb_ditto_samevdev++;
2682                         break;
2683                 case 3:
2684                         equal = (DVA_GET_VDEV(&bp->blk_dva[0]) ==
2685                             DVA_GET_VDEV(&bp->blk_dva[1])) +
2686                             (DVA_GET_VDEV(&bp->blk_dva[0]) ==
2687                             DVA_GET_VDEV(&bp->blk_dva[2])) +
2688                             (DVA_GET_VDEV(&bp->blk_dva[1]) ==
2689                             DVA_GET_VDEV(&bp->blk_dva[2]));
2690                         if (equal != 0)
2691                                 zb->zb_ditto_samevdev++;
2692                         break;
2693                 }
2694 
2695         }
2696 
2697         if (BP_IS_EMBEDDED(bp)) {
2698                 zcb->zcb_embedded_blocks[BPE_GET_ETYPE(bp)]++;
2699                 zcb->zcb_embedded_histogram[BPE_GET_ETYPE(bp)]
2700                     [BPE_GET_PSIZE(bp)]++;
2701                 return;
2702         }
2703 
2704         if (dump_opt['L'])
2705                 return;
2706 
2707         if (BP_GET_DEDUP(bp)) {
2708                 ddt_t *ddt;
2709                 ddt_entry_t *dde;
2710 
2711                 ddt = ddt_select(zcb->zcb_spa, bp);
2712                 ddt_enter(ddt);
2713                 dde = ddt_lookup(ddt, bp, B_FALSE);
2714 
2715                 if (dde == NULL) {
2716                         refcnt = 0;
2717                 } else {
2718                         ddt_phys_t *ddp = ddt_phys_select(dde, bp);
2719                         ddt_phys_decref(ddp);
2720                         refcnt = ddp->ddp_refcnt;
2721                         if (ddt_phys_total_refcnt(dde) == 0)
2722                                 ddt_remove(ddt, dde);
2723                 }
2724                 ddt_exit(ddt);
2725         }
2726 
2727         VERIFY3U(zio_wait(zio_claim(NULL, zcb->zcb_spa,
2728             refcnt ? 0 : spa_first_txg(zcb->zcb_spa),
2729             bp, NULL, NULL, ZIO_FLAG_CANFAIL)), ==, 0);
2730 }
2731 
2732 static void
2733 zdb_blkptr_done(zio_t *zio)
2734 {
2735         spa_t *spa = zio->io_spa;
2736         blkptr_t *bp = zio->io_bp;
2737         int ioerr = zio->io_error;
2738         zdb_cb_t *zcb = zio->io_private;
2739         zbookmark_phys_t *zb = &zio->io_bookmark;
2740 
2741         abd_free(zio->io_abd);
2742 
2743         mutex_enter(&spa->spa_scrub_lock);
2744         spa->spa_scrub_inflight--;
2745         cv_broadcast(&spa->spa_scrub_io_cv);
2746 
2747         if (ioerr && !(zio->io_flags & ZIO_FLAG_SPECULATIVE)) {
2748                 char blkbuf[BP_SPRINTF_LEN];
2749 
2750                 zcb->zcb_haderrors = 1;
2751                 zcb->zcb_errors[ioerr]++;
2752 
2753                 if (dump_opt['b'] >= 2)
2754                         snprintf_blkptr(blkbuf, sizeof (blkbuf), bp);
2755                 else
2756                         blkbuf[0] = '\0';
2757 
2758                 (void) printf("zdb_blkptr_cb: "
2759                     "Got error %d reading "
2760                     "<%llu, %llu, %lld, %llx> %s -- skipping\n",
2761                     ioerr,
2762                     (u_longlong_t)zb->zb_objset,
2763                     (u_longlong_t)zb->zb_object,
2764                     (u_longlong_t)zb->zb_level,
2765                     (u_longlong_t)zb->zb_blkid,
2766                     blkbuf);
2767         }
2768         mutex_exit(&spa->spa_scrub_lock);
2769 }
2770 
2771 static int
2772 zdb_blkptr_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
2773     const zbookmark_phys_t *zb, const dnode_phys_t *dnp, void *arg)
2774 {
2775         zdb_cb_t *zcb = arg;
2776         dmu_object_type_t type;
2777         boolean_t is_metadata;
2778 
2779         if (bp == NULL)
2780                 return (0);
2781 
2782         if (dump_opt['b'] >= 5 && bp->blk_birth > 0) {
2783                 char blkbuf[BP_SPRINTF_LEN];
2784                 snprintf_blkptr(blkbuf, sizeof (blkbuf), bp);
2785                 (void) printf("objset %llu object %llu "
2786                     "level %lld offset 0x%llx %s\n",
2787                     (u_longlong_t)zb->zb_objset,
2788                     (u_longlong_t)zb->zb_object,
2789                     (longlong_t)zb->zb_level,
2790                     (u_longlong_t)blkid2offset(dnp, bp, zb),
2791                     blkbuf);
2792         }
2793 
2794         if (BP_IS_HOLE(bp))
2795                 return (0);
2796 
2797         type = BP_GET_TYPE(bp);
2798 
2799         zdb_count_block(zcb, zilog, bp,
2800             (type & DMU_OT_NEWTYPE) ? ZDB_OT_OTHER : type);
2801 
2802         is_metadata = (BP_GET_LEVEL(bp) != 0 || DMU_OT_IS_METADATA(type));
2803 
2804         if (!BP_IS_EMBEDDED(bp) &&
2805             (dump_opt['c'] > 1 || (dump_opt['c'] && is_metadata))) {
2806                 size_t size = BP_GET_PSIZE(bp);
2807                 abd_t *abd = abd_alloc(size, B_FALSE);
2808                 int flags = ZIO_FLAG_CANFAIL | ZIO_FLAG_SCRUB | ZIO_FLAG_RAW;
2809 
2810                 /* If it's an intent log block, failure is expected. */
2811                 if (zb->zb_level == ZB_ZIL_LEVEL)
2812                         flags |= ZIO_FLAG_SPECULATIVE;
2813 
2814                 mutex_enter(&spa->spa_scrub_lock);
2815                 while (spa->spa_scrub_inflight > max_inflight)
2816                         cv_wait(&spa->spa_scrub_io_cv, &spa->spa_scrub_lock);
2817                 spa->spa_scrub_inflight++;
2818                 mutex_exit(&spa->spa_scrub_lock);
2819 
2820                 zio_nowait(zio_read(NULL, spa, bp, abd, size,
2821                     zdb_blkptr_done, zcb, ZIO_PRIORITY_ASYNC_READ, flags, zb));
2822         }
2823 
2824         zcb->zcb_readfails = 0;
2825 
2826         /* only call gethrtime() every 100 blocks */
2827         static int iters;
2828         if (++iters > 100)
2829                 iters = 0;
2830         else
2831                 return (0);
2832 
2833         if (dump_opt['b'] < 5 && gethrtime() > zcb->zcb_lastprint + NANOSEC) {
2834                 uint64_t now = gethrtime();
2835                 char buf[10];
2836                 uint64_t bytes = zcb->zcb_type[ZB_TOTAL][ZDB_OT_TOTAL].zb_asize;
2837                 int kb_per_sec =
2838                     1 + bytes / (1 + ((now - zcb->zcb_start) / 1000 / 1000));
2839                 int sec_remaining =
2840                     (zcb->zcb_totalasize - bytes) / 1024 / kb_per_sec;
2841 
2842                 /* make sure nicenum has enough space */
2843                 CTASSERT(sizeof (buf) >= NN_NUMBUF_SZ);
2844 
2845                 zfs_nicenum(bytes, buf, sizeof (buf));
2846                 (void) fprintf(stderr,
2847                     "\r%5s completed (%4dMB/s) "
2848                     "estimated time remaining: %uhr %02umin %02usec        ",
2849                     buf, kb_per_sec / 1024,
2850                     sec_remaining / 60 / 60,
2851                     sec_remaining / 60 % 60,
2852                     sec_remaining % 60);
2853 
2854                 zcb->zcb_lastprint = now;
2855         }
2856 
2857         return (0);
2858 }
2859 
2860 static void
2861 zdb_leak(void *arg, uint64_t start, uint64_t size)
2862 {
2863         vdev_t *vd = arg;
2864 
2865         (void) printf("leaked space: vdev %llu, offset 0x%llx, size %llu\n",
2866             (u_longlong_t)vd->vdev_id, (u_longlong_t)start, (u_longlong_t)size);
2867 }
2868 
2869 static metaslab_ops_t zdb_metaslab_ops = {
2870         NULL    /* alloc */
2871 };
2872 
2873 static void
2874 zdb_ddt_leak_init(spa_t *spa, zdb_cb_t *zcb)
2875 {
2876         ddt_bookmark_t ddb;
2877         ddt_entry_t dde;
2878         int error;
2879 
2880         bzero(&ddb, sizeof (ddb));
2881         while ((error = ddt_walk(spa, &ddb, &dde)) == 0) {
2882                 blkptr_t blk;
2883                 ddt_phys_t *ddp = dde.dde_phys;
2884 
2885                 if (ddb.ddb_class == DDT_CLASS_UNIQUE)
2886                         return;
2887 
2888                 ASSERT(ddt_phys_total_refcnt(&dde) > 1);
2889 
2890                 for (int p = 0; p < DDT_PHYS_TYPES; p++, ddp++) {
2891                         if (ddp->ddp_phys_birth == 0)
2892                                 continue;
2893                         ddt_bp_create(ddb.ddb_checksum,
2894                             &dde.dde_key, ddp, &blk);
2895                         if (p == DDT_PHYS_DITTO) {
2896                                 zdb_count_block(zcb, NULL, &blk, ZDB_OT_DITTO);
2897                         } else {
2898                                 zcb->zcb_dedup_asize +=
2899                                     BP_GET_ASIZE(&blk) * (ddp->ddp_refcnt - 1);
2900                                 zcb->zcb_dedup_blocks++;
2901                         }
2902                 }
2903                 if (!dump_opt['L']) {
2904                         ddt_t *ddt = spa->spa_ddt[ddb.ddb_checksum];
2905                         ddt_enter(ddt);
2906                         VERIFY(ddt_lookup(ddt, &blk, B_TRUE) != NULL);
2907                         ddt_exit(ddt);
2908                 }
2909         }
2910 
2911         ASSERT(error == ENOENT);
2912 }
2913 
2914 /* ARGSUSED */
2915 static void
2916 claim_segment_impl_cb(uint64_t inner_offset, vdev_t *vd, uint64_t offset,
2917     uint64_t size, void *arg)
2918 {
2919         /*
2920          * This callback was called through a remap from
2921          * a device being removed. Therefore, the vdev that
2922          * this callback is applied to is a concrete
2923          * vdev.
2924          */
2925         ASSERT(vdev_is_concrete(vd));
2926 
2927         VERIFY0(metaslab_claim_impl(vd, offset, size,
2928             spa_first_txg(vd->vdev_spa)));
2929 }
2930 
2931 static void
2932 claim_segment_cb(void *arg, uint64_t offset, uint64_t size)
2933 {
2934         vdev_t *vd = arg;
2935 
2936         vdev_indirect_ops.vdev_op_remap(vd, offset, size,
2937             claim_segment_impl_cb, NULL);
2938 }
2939 
2940 /*
2941  * After accounting for all allocated blocks that are directly referenced,
2942  * we might have missed a reference to a block from a partially complete
2943  * (and thus unused) indirect mapping object. We perform a secondary pass
2944  * through the metaslabs we have already mapped and claim the destination
2945  * blocks.
2946  */
2947 static void
2948 zdb_claim_removing(spa_t *spa, zdb_cb_t *zcb)
2949 {
2950         if (spa->spa_vdev_removal == NULL)
2951                 return;
2952 
2953         spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER);
2954 
2955         spa_vdev_removal_t *svr = spa->spa_vdev_removal;
2956         vdev_t *vd = svr->svr_vdev;
2957         vdev_indirect_mapping_t *vim = vd->vdev_indirect_mapping;
2958 
2959         for (uint64_t msi = 0; msi < vd->vdev_ms_count; msi++) {
2960                 metaslab_t *msp = vd->vdev_ms[msi];
2961 
2962                 if (msp->ms_start >= vdev_indirect_mapping_max_offset(vim))
2963                         break;
2964 
2965                 ASSERT0(range_tree_space(svr->svr_allocd_segs));
2966 
2967                 if (msp->ms_sm != NULL) {
2968                         VERIFY0(space_map_load(msp->ms_sm,
2969                             svr->svr_allocd_segs, SM_ALLOC));
2970 
2971                         /*
2972                          * Clear everything past what has been synced,
2973                          * because we have not allocated mappings for it yet.
2974                          */
2975                         range_tree_clear(svr->svr_allocd_segs,
2976                             vdev_indirect_mapping_max_offset(vim),
2977                             msp->ms_sm->sm_start + msp->ms_sm->sm_size -
2978                             vdev_indirect_mapping_max_offset(vim));
2979                 }
2980 
2981                 zcb->zcb_removing_size +=
2982                     range_tree_space(svr->svr_allocd_segs);
2983                 range_tree_vacate(svr->svr_allocd_segs, claim_segment_cb, vd);
2984         }
2985 
2986         spa_config_exit(spa, SCL_CONFIG, FTAG);
2987 }
2988 
2989 /*
2990  * vm_idxp is an in-out parameter which (for indirect vdevs) is the
2991  * index in vim_entries that has the first entry in this metaslab.  On
2992  * return, it will be set to the first entry after this metaslab.
2993  */
2994 static void
2995 zdb_leak_init_ms(metaslab_t *msp, uint64_t *vim_idxp)
2996 {
2997         metaslab_group_t *mg = msp->ms_group;
2998         vdev_t *vd = mg->mg_vd;
2999         vdev_t *rvd = vd->vdev_spa->spa_root_vdev;
3000 
3001         mutex_enter(&msp->ms_lock);
3002         metaslab_unload(msp);
3003 
3004         /*
3005          * We don't want to spend the CPU manipulating the size-ordered
3006          * tree, so clear the range_tree ops.
3007          */
3008         msp->ms_tree->rt_ops = NULL;
3009 
3010         (void) fprintf(stderr,
3011             "\rloading vdev %llu of %llu, metaslab %llu of %llu ...",
3012             (longlong_t)vd->vdev_id,
3013             (longlong_t)rvd->vdev_children,
3014             (longlong_t)msp->ms_id,
3015             (longlong_t)vd->vdev_ms_count);
3016 
3017         /*
3018          * For leak detection, we overload the metaslab ms_tree to
3019          * contain allocated segments instead of free segments. As a
3020          * result, we can't use the normal metaslab_load/unload
3021          * interfaces.
3022          */
3023         if (vd->vdev_ops == &vdev_indirect_ops) {
3024                 vdev_indirect_mapping_t *vim = vd->vdev_indirect_mapping;
3025                 for (; *vim_idxp < vdev_indirect_mapping_num_entries(vim);
3026                     (*vim_idxp)++) {
3027                         vdev_indirect_mapping_entry_phys_t *vimep =
3028                             &vim->vim_entries[*vim_idxp];
3029                         uint64_t ent_offset = DVA_MAPPING_GET_SRC_OFFSET(vimep);
3030                         uint64_t ent_len = DVA_GET_ASIZE(&vimep->vimep_dst);
3031                         ASSERT3U(ent_offset, >=, msp->ms_start);
3032                         if (ent_offset >= msp->ms_start + msp->ms_size)
3033                                 break;
3034 
3035                         /*
3036                          * Mappings do not cross metaslab boundaries,
3037                          * because we create them by walking the metaslabs.
3038                          */
3039                         ASSERT3U(ent_offset + ent_len, <=,
3040                             msp->ms_start + msp->ms_size);
3041                         range_tree_add(msp->ms_tree, ent_offset, ent_len);
3042                 }
3043         } else if (msp->ms_sm != NULL) {
3044                 VERIFY0(space_map_load(msp->ms_sm, msp->ms_tree, SM_ALLOC));
3045         }
3046 
3047         if (!msp->ms_loaded) {
3048                 msp->ms_loaded = B_TRUE;
3049         }
3050         mutex_exit(&msp->ms_lock);
3051 }
3052 
3053 /* ARGSUSED */
3054 static int
3055 increment_indirect_mapping_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx)
3056 {
3057         zdb_cb_t *zcb = arg;
3058         spa_t *spa = zcb->zcb_spa;
3059         vdev_t *vd;
3060         const dva_t *dva = &bp->blk_dva[0];
3061 
3062         ASSERT(!dump_opt['L']);
3063         ASSERT3U(BP_GET_NDVAS(bp), ==, 1);
3064 
3065         spa_config_enter(spa, SCL_VDEV, FTAG, RW_READER);
3066         vd = vdev_lookup_top(zcb->zcb_spa, DVA_GET_VDEV(dva));
3067         ASSERT3P(vd, !=, NULL);
3068         spa_config_exit(spa, SCL_VDEV, FTAG);
3069 
3070         ASSERT(vd->vdev_indirect_config.vic_mapping_object != 0);
3071         ASSERT3P(zcb->zcb_vd_obsolete_counts[vd->vdev_id], !=, NULL);
3072 
3073         vdev_indirect_mapping_increment_obsolete_count(
3074             vd->vdev_indirect_mapping,
3075             DVA_GET_OFFSET(dva), DVA_GET_ASIZE(dva),
3076             zcb->zcb_vd_obsolete_counts[vd->vdev_id]);
3077 
3078         return (0);
3079 }
3080 
3081 static uint32_t *
3082 zdb_load_obsolete_counts(vdev_t *vd)
3083 {
3084         vdev_indirect_mapping_t *vim = vd->vdev_indirect_mapping;
3085         spa_t *spa = vd->vdev_spa;
3086         spa_condensing_indirect_phys_t *scip =
3087             &spa->spa_condensing_indirect_phys;
3088         uint32_t *counts;
3089 
3090         EQUIV(vdev_obsolete_sm_object(vd) != 0, vd->vdev_obsolete_sm != NULL);
3091         counts = vdev_indirect_mapping_load_obsolete_counts(vim);
3092         if (vd->vdev_obsolete_sm != NULL) {
3093                 vdev_indirect_mapping_load_obsolete_spacemap(vim, counts,
3094                     vd->vdev_obsolete_sm);
3095         }
3096         if (scip->scip_vdev == vd->vdev_id &&
3097             scip->scip_prev_obsolete_sm_object != 0) {
3098                 space_map_t *prev_obsolete_sm = NULL;
3099                 VERIFY0(space_map_open(&prev_obsolete_sm, spa->spa_meta_objset,
3100                     scip->scip_prev_obsolete_sm_object, 0, vd->vdev_asize, 0));
3101                 space_map_update(prev_obsolete_sm);
3102                 vdev_indirect_mapping_load_obsolete_spacemap(vim, counts,
3103                     prev_obsolete_sm);
3104                 space_map_close(prev_obsolete_sm);
3105         }
3106         return (counts);
3107 }
3108 
3109 static void
3110 zdb_leak_init(spa_t *spa, zdb_cb_t *zcb)
3111 {
3112         zcb->zcb_spa = spa;
3113 
3114         if (!dump_opt['L']) {
3115                 dsl_pool_t *dp = spa->spa_dsl_pool;
3116                 vdev_t *rvd = spa->spa_root_vdev;
3117 
3118                 /*
3119                  * We are going to be changing the meaning of the metaslab's
3120                  * ms_tree.  Ensure that the allocator doesn't try to
3121                  * use the tree.
3122                  */
3123                 spa->spa_normal_class->mc_ops = &zdb_metaslab_ops;
3124                 spa->spa_log_class->mc_ops = &zdb_metaslab_ops;
3125 
3126                 zcb->zcb_vd_obsolete_counts =
3127                     umem_zalloc(rvd->vdev_children * sizeof (uint32_t *),
3128                     UMEM_NOFAIL);
3129 
3130 
3131                 for (uint64_t c = 0; c < rvd->vdev_children; c++) {
3132                         vdev_t *vd = rvd->vdev_child[c];
3133                         uint64_t vim_idx = 0;
3134 
3135                         ASSERT3U(c, ==, vd->vdev_id);
3136 
3137                         /*
3138                          * Note: we don't check for mapping leaks on
3139                          * removing vdevs because their ms_tree's are
3140                          * used to look for leaks in allocated space.
3141                          */
3142                         if (vd->vdev_ops == &vdev_indirect_ops) {
3143                                 zcb->zcb_vd_obsolete_counts[c] =
3144                                     zdb_load_obsolete_counts(vd);
3145 
3146                                 /*
3147                                  * Normally, indirect vdevs don't have any
3148                                  * metaslabs.  We want to set them up for
3149                                  * zio_claim().
3150                                  */
3151                                 VERIFY0(vdev_metaslab_init(vd, 0));
3152                         }
3153 
3154                         for (uint64_t m = 0; m < vd->vdev_ms_count; m++) {
3155                                 zdb_leak_init_ms(vd->vdev_ms[m], &vim_idx);
3156                         }
3157                         if (vd->vdev_ops == &vdev_indirect_ops) {
3158                                 ASSERT3U(vim_idx, ==,
3159                                     vdev_indirect_mapping_num_entries(
3160                                     vd->vdev_indirect_mapping));
3161                         }
3162                 }
3163                 (void) fprintf(stderr, "\n");
3164 
3165                 if (bpobj_is_open(&dp->dp_obsolete_bpobj)) {
3166                         ASSERT(spa_feature_is_enabled(spa,
3167                             SPA_FEATURE_DEVICE_REMOVAL));
3168                         (void) bpobj_iterate_nofree(&dp->dp_obsolete_bpobj,
3169                             increment_indirect_mapping_cb, zcb, NULL);
3170                 }
3171         }
3172 
3173         spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER);
3174 
3175         zdb_ddt_leak_init(spa, zcb);
3176 
3177         spa_config_exit(spa, SCL_CONFIG, FTAG);
3178 }
3179 
3180 static boolean_t
3181 zdb_check_for_obsolete_leaks(vdev_t *vd, zdb_cb_t *zcb)
3182 {
3183         boolean_t leaks = B_FALSE;
3184         vdev_indirect_mapping_t *vim = vd->vdev_indirect_mapping;
3185         uint64_t total_leaked = 0;
3186 
3187         ASSERT(vim != NULL);
3188 
3189         for (uint64_t i = 0; i < vdev_indirect_mapping_num_entries(vim); i++) {
3190                 vdev_indirect_mapping_entry_phys_t *vimep =
3191                     &vim->vim_entries[i];
3192                 uint64_t obsolete_bytes = 0;
3193                 uint64_t offset = DVA_MAPPING_GET_SRC_OFFSET(vimep);
3194                 metaslab_t *msp = vd->vdev_ms[offset >> vd->vdev_ms_shift];
3195 
3196                 /*
3197                  * This is not very efficient but it's easy to
3198                  * verify correctness.
3199                  */
3200                 for (uint64_t inner_offset = 0;
3201                     inner_offset < DVA_GET_ASIZE(&vimep->vimep_dst);
3202                     inner_offset += 1 << vd->vdev_ashift) {
3203                         if (range_tree_contains(msp->ms_tree,
3204                             offset + inner_offset, 1 << vd->vdev_ashift)) {
3205                                 obsolete_bytes += 1 << vd->vdev_ashift;
3206                         }
3207                 }
3208 
3209                 int64_t bytes_leaked = obsolete_bytes -
3210                     zcb->zcb_vd_obsolete_counts[vd->vdev_id][i];
3211                 ASSERT3U(DVA_GET_ASIZE(&vimep->vimep_dst), >=,
3212                     zcb->zcb_vd_obsolete_counts[vd->vdev_id][i]);
3213                 if (bytes_leaked != 0 &&
3214                     (vdev_obsolete_counts_are_precise(vd) ||
3215                     dump_opt['d'] >= 5)) {
3216                         (void) printf("obsolete indirect mapping count "
3217                             "mismatch on %llu:%llx:%llx : %llx bytes leaked\n",
3218                             (u_longlong_t)vd->vdev_id,
3219                             (u_longlong_t)DVA_MAPPING_GET_SRC_OFFSET(vimep),
3220                             (u_longlong_t)DVA_GET_ASIZE(&vimep->vimep_dst),
3221                             (u_longlong_t)bytes_leaked);
3222                 }
3223                 total_leaked += ABS(bytes_leaked);
3224         }
3225 
3226         if (!vdev_obsolete_counts_are_precise(vd) && total_leaked > 0) {
3227                 int pct_leaked = total_leaked * 100 /
3228                     vdev_indirect_mapping_bytes_mapped(vim);
3229                 (void) printf("cannot verify obsolete indirect mapping "
3230                     "counts of vdev %llu because precise feature was not "
3231                     "enabled when it was removed: %d%% (%llx bytes) of mapping"
3232                     "unreferenced\n",
3233                     (u_longlong_t)vd->vdev_id, pct_leaked,
3234                     (u_longlong_t)total_leaked);
3235         } else if (total_leaked > 0) {
3236                 (void) printf("obsolete indirect mapping count mismatch "
3237                     "for vdev %llu -- %llx total bytes mismatched\n",
3238                     (u_longlong_t)vd->vdev_id,
3239                     (u_longlong_t)total_leaked);
3240                 leaks |= B_TRUE;
3241         }
3242 
3243         vdev_indirect_mapping_free_obsolete_counts(vim,
3244             zcb->zcb_vd_obsolete_counts[vd->vdev_id]);
3245         zcb->zcb_vd_obsolete_counts[vd->vdev_id] = NULL;
3246 
3247         return (leaks);
3248 }
3249 
3250 static boolean_t
3251 zdb_leak_fini(spa_t *spa, zdb_cb_t *zcb)
3252 {
3253         boolean_t leaks = B_FALSE;
3254         if (!dump_opt['L']) {
3255                 vdev_t *rvd = spa->spa_root_vdev;
3256                 for (unsigned c = 0; c < rvd->vdev_children; c++) {
3257                         vdev_t *vd = rvd->vdev_child[c];
3258                         metaslab_group_t *mg = vd->vdev_mg;
3259 
3260                         if (zcb->zcb_vd_obsolete_counts[c] != NULL) {
3261                                 leaks |= zdb_check_for_obsolete_leaks(vd, zcb);
3262                         }
3263 
3264                         for (uint64_t m = 0; m < vd->vdev_ms_count; m++) {
3265                                 metaslab_t *msp = vd->vdev_ms[m];
3266                                 ASSERT3P(mg, ==, msp->ms_group);
3267 
3268                                 /*
3269                                  * The ms_tree has been overloaded to
3270                                  * contain allocated segments. Now that we
3271                                  * finished traversing all blocks, any
3272                                  * block that remains in the ms_tree
3273                                  * represents an allocated block that we
3274                                  * did not claim during the traversal.
3275                                  * Claimed blocks would have been removed
3276                                  * from the ms_tree.  For indirect vdevs,
3277                                  * space remaining in the tree represents
3278                                  * parts of the mapping that are not
3279                                  * referenced, which is not a bug.
3280                                  */
3281                                 if (vd->vdev_ops == &vdev_indirect_ops) {
3282                                         range_tree_vacate(msp->ms_tree,
3283                                             NULL, NULL);
3284                                 } else {
3285                                         range_tree_vacate(msp->ms_tree,
3286                                             zdb_leak, vd);
3287                                 }
3288 
3289                                 if (msp->ms_loaded) {
3290                                         msp->ms_loaded = B_FALSE;
3291                                 }
3292                         }
3293                 }
3294 
3295                 umem_free(zcb->zcb_vd_obsolete_counts,
3296                     rvd->vdev_children * sizeof (uint32_t *));
3297                 zcb->zcb_vd_obsolete_counts = NULL;
3298         }
3299         return (leaks);
3300 }
3301 
3302 /* ARGSUSED */
3303 static int
3304 count_block_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx)
3305 {
3306         zdb_cb_t *zcb = arg;
3307 
3308         if (dump_opt['b'] >= 5) {
3309                 char blkbuf[BP_SPRINTF_LEN];
3310                 snprintf_blkptr(blkbuf, sizeof (blkbuf), bp);
3311                 (void) printf("[%s] %s\n",
3312                     "deferred free", blkbuf);
3313         }
3314         zdb_count_block(zcb, NULL, bp, ZDB_OT_DEFERRED);
3315         return (0);
3316 }
3317 
3318 static int
3319 dump_block_stats(spa_t *spa)
3320 {
3321         zdb_cb_t zcb;
3322         zdb_blkstats_t *zb, *tzb;
3323         uint64_t norm_alloc, norm_space, total_alloc, total_found;
3324         int flags = TRAVERSE_PRE | TRAVERSE_PREFETCH_METADATA | TRAVERSE_HARD;
3325         boolean_t leaks = B_FALSE;
3326 
3327         bzero(&zcb, sizeof (zcb));
3328         (void) printf("\nTraversing all blocks %s%s%s%s%s...\n\n",
3329             (dump_opt['c'] || !dump_opt['L']) ? "to verify " : "",
3330             (dump_opt['c'] == 1) ? "metadata " : "",
3331             dump_opt['c'] ? "checksums " : "",
3332             (dump_opt['c'] && !dump_opt['L']) ? "and verify " : "",
3333             !dump_opt['L'] ? "nothing leaked " : "");
3334 
3335         /*
3336          * Load all space maps as SM_ALLOC maps, then traverse the pool
3337          * claiming each block we discover.  If the pool is perfectly
3338          * consistent, the space maps will be empty when we're done.
3339          * Anything left over is a leak; any block we can't claim (because
3340          * it's not part of any space map) is a double allocation,
3341          * reference to a freed block, or an unclaimed log block.
3342          */
3343         zdb_leak_init(spa, &zcb);
3344 
3345         /*
3346          * If there's a deferred-free bplist, process that first.
3347          */
3348         (void) bpobj_iterate_nofree(&spa->spa_deferred_bpobj,
3349             count_block_cb, &zcb, NULL);
3350 
3351         if (spa_version(spa) >= SPA_VERSION_DEADLISTS) {
3352                 (void) bpobj_iterate_nofree(&spa->spa_dsl_pool->dp_free_bpobj,
3353                     count_block_cb, &zcb, NULL);
3354         }
3355 
3356         zdb_claim_removing(spa, &zcb);
3357 
3358         if (spa_feature_is_active(spa, SPA_FEATURE_ASYNC_DESTROY)) {
3359                 VERIFY3U(0, ==, bptree_iterate(spa->spa_meta_objset,
3360                     spa->spa_dsl_pool->dp_bptree_obj, B_FALSE, count_block_cb,
3361                     &zcb, NULL));
3362         }
3363 
3364         if (dump_opt['c'] > 1)
3365                 flags |= TRAVERSE_PREFETCH_DATA;
3366 
3367         zcb.zcb_totalasize = metaslab_class_get_alloc(spa_normal_class(spa));
3368         zcb.zcb_start = zcb.zcb_lastprint = gethrtime();
3369         zcb.zcb_haderrors |= traverse_pool(spa, 0, flags, zdb_blkptr_cb, &zcb);
3370 
3371         /*
3372          * If we've traversed the data blocks then we need to wait for those
3373          * I/Os to complete. We leverage "The Godfather" zio to wait on
3374          * all async I/Os to complete.
3375          */
3376         if (dump_opt['c']) {
3377                 for (int i = 0; i < max_ncpus; i++) {
3378                         (void) zio_wait(spa->spa_async_zio_root[i]);
3379                         spa->spa_async_zio_root[i] = zio_root(spa, NULL, NULL,
3380                             ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE |
3381                             ZIO_FLAG_GODFATHER);
3382                 }
3383         }
3384 
3385         if (zcb.zcb_haderrors) {
3386                 (void) printf("\nError counts:\n\n");
3387                 (void) printf("\t%5s  %s\n", "errno", "count");
3388                 for (int e = 0; e < 256; e++) {
3389                         if (zcb.zcb_errors[e] != 0) {
3390                                 (void) printf("\t%5d  %llu\n",
3391                                     e, (u_longlong_t)zcb.zcb_errors[e]);
3392                         }
3393                 }
3394         }
3395 
3396         /*
3397          * Report any leaked segments.
3398          */
3399         leaks |= zdb_leak_fini(spa, &zcb);
3400 
3401         tzb = &zcb.zcb_type[ZB_TOTAL][ZDB_OT_TOTAL];
3402 
3403         norm_alloc = metaslab_class_get_alloc(spa_normal_class(spa));
3404         norm_space = metaslab_class_get_space(spa_normal_class(spa));
3405 
3406         total_alloc = norm_alloc + metaslab_class_get_alloc(spa_log_class(spa));
3407         total_found = tzb->zb_asize - zcb.zcb_dedup_asize +
3408             zcb.zcb_removing_size;
3409 
3410         if (total_found == total_alloc) {
3411                 if (!dump_opt['L'])
3412                         (void) printf("\n\tNo leaks (block sum matches space"
3413                             " maps exactly)\n");
3414         } else {
3415                 (void) printf("block traversal size %llu != alloc %llu "
3416                     "(%s %lld)\n",
3417                     (u_longlong_t)total_found,
3418                     (u_longlong_t)total_alloc,
3419                     (dump_opt['L']) ? "unreachable" : "leaked",
3420                     (longlong_t)(total_alloc - total_found));
3421                 leaks = B_TRUE;
3422         }
3423 
3424         if (tzb->zb_count == 0)
3425                 return (2);
3426 
3427         (void) printf("\n");
3428         (void) printf("\tbp count:      %10llu\n",
3429             (u_longlong_t)tzb->zb_count);
3430         (void) printf("\tganged count:  %10llu\n",
3431             (longlong_t)tzb->zb_gangs);
3432         (void) printf("\tbp logical:    %10llu      avg: %6llu\n",
3433             (u_longlong_t)tzb->zb_lsize,
3434             (u_longlong_t)(tzb->zb_lsize / tzb->zb_count));
3435         (void) printf("\tbp physical:   %10llu      avg:"
3436             " %6llu     compression: %6.2f\n",
3437             (u_longlong_t)tzb->zb_psize,
3438             (u_longlong_t)(tzb->zb_psize / tzb->zb_count),
3439             (double)tzb->zb_lsize / tzb->zb_psize);
3440         (void) printf("\tbp allocated:  %10llu      avg:"
3441             " %6llu     compression: %6.2f\n",
3442             (u_longlong_t)tzb->zb_asize,
3443             (u_longlong_t)(tzb->zb_asize / tzb->zb_count),
3444             (double)tzb->zb_lsize / tzb->zb_asize);
3445         (void) printf("\tbp deduped:    %10llu    ref>1:"
3446             " %6llu   deduplication: %6.2f\n",
3447             (u_longlong_t)zcb.zcb_dedup_asize,
3448             (u_longlong_t)zcb.zcb_dedup_blocks,
3449             (double)zcb.zcb_dedup_asize / tzb->zb_asize + 1.0);
3450         (void) printf("\tSPA allocated: %10llu     used: %5.2f%%\n",
3451             (u_longlong_t)norm_alloc, 100.0 * norm_alloc / norm_space);
3452 
3453         for (bp_embedded_type_t i = 0; i < NUM_BP_EMBEDDED_TYPES; i++) {
3454                 if (zcb.zcb_embedded_blocks[i] == 0)
3455                         continue;
3456                 (void) printf("\n");
3457                 (void) printf("\tadditional, non-pointer bps of type %u: "
3458                     "%10llu\n",
3459                     i, (u_longlong_t)zcb.zcb_embedded_blocks[i]);
3460 
3461                 if (dump_opt['b'] >= 3) {
3462                         (void) printf("\t number of (compressed) bytes:  "
3463                             "number of bps\n");
3464                         dump_histogram(zcb.zcb_embedded_histogram[i],
3465                             sizeof (zcb.zcb_embedded_histogram[i]) /
3466                             sizeof (zcb.zcb_embedded_histogram[i][0]), 0);
3467                 }
3468         }
3469 
3470         if (tzb->zb_ditto_samevdev != 0) {
3471                 (void) printf("\tDittoed blocks on same vdev: %llu\n",
3472                     (longlong_t)tzb->zb_ditto_samevdev);
3473         }
3474 
3475         for (uint64_t v = 0; v < spa->spa_root_vdev->vdev_children; v++) {
3476                 vdev_t *vd = spa->spa_root_vdev->vdev_child[v];
3477                 vdev_indirect_mapping_t *vim = vd->vdev_indirect_mapping;
3478 
3479                 if (vim == NULL) {
3480                         continue;
3481                 }
3482 
3483                 char mem[32];
3484                 zdb_nicenum(vdev_indirect_mapping_num_entries(vim),
3485                     mem, vdev_indirect_mapping_size(vim));
3486 
3487                 (void) printf("\tindirect vdev id %llu has %llu segments "
3488                     "(%s in memory)\n",
3489                     (longlong_t)vd->vdev_id,
3490                     (longlong_t)vdev_indirect_mapping_num_entries(vim), mem);
3491         }
3492 
3493         if (dump_opt['b'] >= 2) {
3494                 int l, t, level;
3495                 (void) printf("\nBlocks\tLSIZE\tPSIZE\tASIZE"
3496                     "\t  avg\t comp\t%%Total\tType\n");
3497 
3498                 for (t = 0; t <= ZDB_OT_TOTAL; t++) {
3499                         char csize[32], lsize[32], psize[32], asize[32];
3500                         char avg[32], gang[32];
3501                         const char *typename;
3502 
3503                         /* make sure nicenum has enough space */
3504                         CTASSERT(sizeof (csize) >= NN_NUMBUF_SZ);
3505                         CTASSERT(sizeof (lsize) >= NN_NUMBUF_SZ);
3506                         CTASSERT(sizeof (psize) >= NN_NUMBUF_SZ);
3507                         CTASSERT(sizeof (asize) >= NN_NUMBUF_SZ);
3508                         CTASSERT(sizeof (avg) >= NN_NUMBUF_SZ);
3509                         CTASSERT(sizeof (gang) >= NN_NUMBUF_SZ);
3510 
3511                         if (t < DMU_OT_NUMTYPES)
3512                                 typename = dmu_ot[t].ot_name;
3513                         else
3514                                 typename = zdb_ot_extname[t - DMU_OT_NUMTYPES];
3515 
3516                         if (zcb.zcb_type[ZB_TOTAL][t].zb_asize == 0) {
3517                                 (void) printf("%6s\t%5s\t%5s\t%5s"
3518                                     "\t%5s\t%5s\t%6s\t%s\n",
3519                                     "-",
3520                                     "-",
3521                                     "-",
3522                                     "-",
3523                                     "-",
3524                                     "-",
3525                                     "-",
3526                                     typename);
3527                                 continue;
3528                         }
3529 
3530                         for (l = ZB_TOTAL - 1; l >= -1; l--) {
3531                                 level = (l == -1 ? ZB_TOTAL : l);
3532                                 zb = &zcb.zcb_type[level][t];
3533 
3534                                 if (zb->zb_asize == 0)
3535                                         continue;
3536 
3537                                 if (dump_opt['b'] < 3 && level != ZB_TOTAL)
3538                                         continue;
3539 
3540                                 if (level == 0 && zb->zb_asize ==
3541                                     zcb.zcb_type[ZB_TOTAL][t].zb_asize)
3542                                         continue;
3543 
3544                                 zdb_nicenum(zb->zb_count, csize,
3545                                     sizeof (csize));
3546                                 zdb_nicenum(zb->zb_lsize, lsize,
3547                                     sizeof (lsize));
3548                                 zdb_nicenum(zb->zb_psize, psize,
3549                                     sizeof (psize));
3550                                 zdb_nicenum(zb->zb_asize, asize,
3551                                     sizeof (asize));
3552                                 zdb_nicenum(zb->zb_asize / zb->zb_count, avg,
3553                                     sizeof (avg));
3554                                 zdb_nicenum(zb->zb_gangs, gang, sizeof (gang));
3555 
3556                                 (void) printf("%6s\t%5s\t%5s\t%5s\t%5s"
3557                                     "\t%5.2f\t%6.2f\t",
3558                                     csize, lsize, psize, asize, avg,
3559                                     (double)zb->zb_lsize / zb->zb_psize,
3560                                     100.0 * zb->zb_asize / tzb->zb_asize);
3561 
3562                                 if (level == ZB_TOTAL)
3563                                         (void) printf("%s\n", typename);
3564                                 else
3565                                         (void) printf("    L%d %s\n",
3566                                             level, typename);
3567 
3568                                 if (dump_opt['b'] >= 3 && zb->zb_gangs > 0) {
3569                                         (void) printf("\t number of ganged "
3570                                             "blocks: %s\n", gang);
3571                                 }
3572 
3573                                 if (dump_opt['b'] >= 4) {
3574                                         (void) printf("psize "
3575                                             "(in 512-byte sectors): "
3576                                             "number of blocks\n");
3577                                         dump_histogram(zb->zb_psize_histogram,
3578                                             PSIZE_HISTO_SIZE, 0);
3579                                 }
3580                         }
3581                 }
3582         }
3583 
3584         (void) printf("\n");
3585 
3586         if (leaks)
3587                 return (2);
3588 
3589         if (zcb.zcb_haderrors)
3590                 return (3);
3591 
3592         return (0);
3593 }
3594 
3595 typedef struct zdb_ddt_entry {
3596         ddt_key_t       zdde_key;
3597         uint64_t        zdde_ref_blocks;
3598         uint64_t        zdde_ref_lsize;
3599         uint64_t        zdde_ref_psize;
3600         uint64_t        zdde_ref_dsize;
3601         avl_node_t      zdde_node;
3602 } zdb_ddt_entry_t;
3603 
3604 /* ARGSUSED */
3605 static int
3606 zdb_ddt_add_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
3607     const zbookmark_phys_t *zb, const dnode_phys_t *dnp, void *arg)
3608 {
3609         avl_tree_t *t = arg;
3610         avl_index_t where;
3611         zdb_ddt_entry_t *zdde, zdde_search;
3612 
3613         if (bp == NULL || BP_IS_HOLE(bp) || BP_IS_EMBEDDED(bp))
3614                 return (0);
3615 
3616         if (dump_opt['S'] > 1 && zb->zb_level == ZB_ROOT_LEVEL) {
3617                 (void) printf("traversing objset %llu, %llu objects, "
3618                     "%lu blocks so far\n",
3619                     (u_longlong_t)zb->zb_objset,
3620                     (u_longlong_t)BP_GET_FILL(bp),
3621                     avl_numnodes(t));
3622         }
3623 
3624         if (BP_IS_HOLE(bp) || BP_GET_CHECKSUM(bp) == ZIO_CHECKSUM_OFF ||
3625             BP_GET_LEVEL(bp) > 0 || DMU_OT_IS_METADATA(BP_GET_TYPE(bp)))
3626                 return (0);
3627 
3628         ddt_key_fill(&zdde_search.zdde_key, bp);
3629 
3630         zdde = avl_find(t, &zdde_search, &where);
3631 
3632         if (zdde == NULL) {
3633                 zdde = umem_zalloc(sizeof (*zdde), UMEM_NOFAIL);
3634                 zdde->zdde_key = zdde_search.zdde_key;
3635                 avl_insert(t, zdde, where);
3636         }
3637 
3638         zdde->zdde_ref_blocks += 1;
3639         zdde->zdde_ref_lsize += BP_GET_LSIZE(bp);
3640         zdde->zdde_ref_psize += BP_GET_PSIZE(bp);
3641         zdde->zdde_ref_dsize += bp_get_dsize_sync(spa, bp);
3642 
3643         return (0);
3644 }
3645 
3646 static void
3647 dump_simulated_ddt(spa_t *spa)
3648 {
3649         avl_tree_t t;
3650         void *cookie = NULL;
3651         zdb_ddt_entry_t *zdde;
3652         ddt_histogram_t ddh_total;
3653         ddt_stat_t dds_total;
3654 
3655         bzero(&ddh_total, sizeof (ddh_total));
3656         bzero(&dds_total, sizeof (dds_total));
3657         avl_create(&t, ddt_entry_compare,
3658             sizeof (zdb_ddt_entry_t), offsetof(zdb_ddt_entry_t, zdde_node));
3659 
3660         spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER);
3661 
3662         (void) traverse_pool(spa, 0, TRAVERSE_PRE | TRAVERSE_PREFETCH_METADATA,
3663             zdb_ddt_add_cb, &t);
3664 
3665         spa_config_exit(spa, SCL_CONFIG, FTAG);
3666 
3667         while ((zdde = avl_destroy_nodes(&t, &cookie)) != NULL) {
3668                 ddt_stat_t dds;
3669                 uint64_t refcnt = zdde->zdde_ref_blocks;
3670                 ASSERT(refcnt != 0);
3671 
3672                 dds.dds_blocks = zdde->zdde_ref_blocks / refcnt;
3673                 dds.dds_lsize = zdde->zdde_ref_lsize / refcnt;
3674                 dds.dds_psize = zdde->zdde_ref_psize / refcnt;
3675                 dds.dds_dsize = zdde->zdde_ref_dsize / refcnt;
3676 
3677                 dds.dds_ref_blocks = zdde->zdde_ref_blocks;
3678                 dds.dds_ref_lsize = zdde->zdde_ref_lsize;
3679                 dds.dds_ref_psize = zdde->zdde_ref_psize;
3680                 dds.dds_ref_dsize = zdde->zdde_ref_dsize;
3681 
3682                 ddt_stat_add(&ddh_total.ddh_stat[highbit64(refcnt) - 1],
3683                     &dds, 0);
3684 
3685                 umem_free(zdde, sizeof (*zdde));
3686         }
3687 
3688         avl_destroy(&t);
3689 
3690         ddt_histogram_stat(&dds_total, &ddh_total);
3691 
3692         (void) printf("Simulated DDT histogram:\n");
3693 
3694         zpool_dump_ddt(&dds_total, &ddh_total);
3695 
3696         dump_dedup_ratio(&dds_total);
3697 }
3698 
3699 static int
3700 verify_device_removal_feature_counts(spa_t *spa)
3701 {
3702         uint64_t dr_feature_refcount = 0;
3703         uint64_t oc_feature_refcount = 0;
3704         uint64_t indirect_vdev_count = 0;
3705         uint64_t precise_vdev_count = 0;
3706         uint64_t obsolete_counts_object_count = 0;
3707         uint64_t obsolete_sm_count = 0;
3708         uint64_t obsolete_counts_count = 0;
3709         uint64_t scip_count = 0;
3710         uint64_t obsolete_bpobj_count = 0;
3711         int ret = 0;
3712 
3713         spa_condensing_indirect_phys_t *scip =
3714             &spa->spa_condensing_indirect_phys;
3715         if (scip->scip_next_mapping_object != 0) {
3716                 vdev_t *vd = spa->spa_root_vdev->vdev_child[scip->scip_vdev];
3717                 ASSERT(scip->scip_prev_obsolete_sm_object != 0);
3718                 ASSERT3P(vd->vdev_ops, ==, &vdev_indirect_ops);
3719 
3720                 (void) printf("Condensing indirect vdev %llu: new mapping "
3721                     "object %llu, prev obsolete sm %llu\n",
3722                     (u_longlong_t)scip->scip_vdev,
3723                     (u_longlong_t)scip->scip_next_mapping_object,
3724                     (u_longlong_t)scip->scip_prev_obsolete_sm_object);
3725                 if (scip->scip_prev_obsolete_sm_object != 0) {
3726                         space_map_t *prev_obsolete_sm = NULL;
3727                         VERIFY0(space_map_open(&prev_obsolete_sm,
3728                             spa->spa_meta_objset,
3729                             scip->scip_prev_obsolete_sm_object,
3730                             0, vd->vdev_asize, 0));
3731                         space_map_update(prev_obsolete_sm);
3732                         dump_spacemap(spa->spa_meta_objset, prev_obsolete_sm);
3733                         (void) printf("\n");
3734                         space_map_close(prev_obsolete_sm);
3735                 }
3736 
3737                 scip_count += 2;
3738         }
3739 
3740         for (uint64_t i = 0; i < spa->spa_root_vdev->vdev_children; i++) {
3741                 vdev_t *vd = spa->spa_root_vdev->vdev_child[i];
3742                 vdev_indirect_config_t *vic = &vd->vdev_indirect_config;
3743 
3744                 if (vic->vic_mapping_object != 0) {
3745                         ASSERT(vd->vdev_ops == &vdev_indirect_ops ||
3746                             vd->vdev_removing);
3747                         indirect_vdev_count++;
3748 
3749                         if (vd->vdev_indirect_mapping->vim_havecounts) {
3750                                 obsolete_counts_count++;
3751                         }
3752                 }
3753                 if (vdev_obsolete_counts_are_precise(vd)) {
3754                         ASSERT(vic->vic_mapping_object != 0);
3755                         precise_vdev_count++;
3756                 }
3757                 if (vdev_obsolete_sm_object(vd) != 0) {
3758                         ASSERT(vic->vic_mapping_object != 0);
3759                         obsolete_sm_count++;
3760                 }
3761         }
3762 
3763         (void) feature_get_refcount(spa,
3764             &spa_feature_table[SPA_FEATURE_DEVICE_REMOVAL],
3765             &dr_feature_refcount);
3766         (void) feature_get_refcount(spa,
3767             &spa_feature_table[SPA_FEATURE_OBSOLETE_COUNTS],
3768             &oc_feature_refcount);
3769 
3770         if (dr_feature_refcount != indirect_vdev_count) {
3771                 ret = 1;
3772                 (void) printf("Number of indirect vdevs (%llu) " \
3773                     "does not match feature count (%llu)\n",
3774                     (u_longlong_t)indirect_vdev_count,
3775                     (u_longlong_t)dr_feature_refcount);
3776         } else {
3777                 (void) printf("Verified device_removal feature refcount " \
3778                     "of %llu is correct\n",
3779                     (u_longlong_t)dr_feature_refcount);
3780         }
3781 
3782         if (zap_contains(spa_meta_objset(spa), DMU_POOL_DIRECTORY_OBJECT,
3783             DMU_POOL_OBSOLETE_BPOBJ) == 0) {
3784                 obsolete_bpobj_count++;
3785         }
3786 
3787 
3788         obsolete_counts_object_count = precise_vdev_count;
3789         obsolete_counts_object_count += obsolete_sm_count;
3790         obsolete_counts_object_count += obsolete_counts_count;
3791         obsolete_counts_object_count += scip_count;
3792         obsolete_counts_object_count += obsolete_bpobj_count;
3793         obsolete_counts_object_count += remap_deadlist_count;
3794 
3795         if (oc_feature_refcount != obsolete_counts_object_count) {
3796                 ret = 1;
3797                 (void) printf("Number of obsolete counts objects (%llu) " \
3798                     "does not match feature count (%llu)\n",
3799                     (u_longlong_t)obsolete_counts_object_count,
3800                     (u_longlong_t)oc_feature_refcount);
3801                 (void) printf("pv:%llu os:%llu oc:%llu sc:%llu "
3802                     "ob:%llu rd:%llu\n",
3803                     (u_longlong_t)precise_vdev_count,
3804                     (u_longlong_t)obsolete_sm_count,
3805                     (u_longlong_t)obsolete_counts_count,
3806                     (u_longlong_t)scip_count,
3807                     (u_longlong_t)obsolete_bpobj_count,
3808                     (u_longlong_t)remap_deadlist_count);
3809         } else {
3810                 (void) printf("Verified indirect_refcount feature refcount " \
3811                     "of %llu is correct\n",
3812                     (u_longlong_t)oc_feature_refcount);
3813         }
3814         return (ret);
3815 }
3816 
3817 static void
3818 dump_zpool(spa_t *spa)
3819 {
3820         dsl_pool_t *dp = spa_get_dsl(spa);
3821         int rc = 0;
3822 
3823         if (dump_opt['S']) {
3824                 dump_simulated_ddt(spa);
3825                 return;
3826         }
3827 
3828         if (!dump_opt['e'] && dump_opt['C'] > 1) {
3829                 (void) printf("\nCached configuration:\n");
3830                 dump_nvlist(spa->spa_config, 8);
3831         }
3832 
3833         if (dump_opt['C'])
3834                 dump_config(spa);
3835 
3836         if (dump_opt['u'])
3837                 dump_uberblock(&spa->spa_uberblock, "\nUberblock:\n", "\n");
3838 
3839         if (dump_opt['D'])
3840                 dump_all_ddts(spa);
3841 
3842         if (dump_opt['d'] > 2 || dump_opt['m'])
3843                 dump_metaslabs(spa);
3844         if (dump_opt['M'])
3845                 dump_metaslab_groups(spa);
3846 
3847         if (dump_opt['d'] || dump_opt['i']) {
3848                 dump_dir(dp->dp_meta_objset);
3849                 if (dump_opt['d'] >= 3) {
3850                         dsl_pool_t *dp = spa->spa_dsl_pool;
3851                         dump_full_bpobj(&spa->spa_deferred_bpobj,
3852                             "Deferred frees", 0);
3853                         if (spa_version(spa) >= SPA_VERSION_DEADLISTS) {
3854                                 dump_full_bpobj(&dp->dp_free_bpobj,
3855                                     "Pool snapshot frees", 0);
3856                         }
3857                         if (bpobj_is_open(&dp->dp_obsolete_bpobj)) {
3858                                 ASSERT(spa_feature_is_enabled(spa,
3859                                     SPA_FEATURE_DEVICE_REMOVAL));
3860                                 dump_full_bpobj(&dp->dp_obsolete_bpobj,
3861                                     "Pool obsolete blocks", 0);
3862                         }
3863 
3864                         if (spa_feature_is_active(spa,
3865                             SPA_FEATURE_ASYNC_DESTROY)) {
3866                                 dump_bptree(spa->spa_meta_objset,
3867                                     dp->dp_bptree_obj,
3868                                     "Pool dataset frees");
3869                         }
3870                         dump_dtl(spa->spa_root_vdev, 0);
3871                 }
3872                 (void) dmu_objset_find(spa_name(spa), dump_one_dir,
3873                     NULL, DS_FIND_SNAPSHOTS | DS_FIND_CHILDREN);
3874 
3875                 for (spa_feature_t f = 0; f < SPA_FEATURES; f++) {
3876                         uint64_t refcount;
3877 
3878                         if (!(spa_feature_table[f].fi_flags &
3879                             ZFEATURE_FLAG_PER_DATASET) ||
3880                             !spa_feature_is_enabled(spa, f)) {
3881                                 ASSERT0(dataset_feature_count[f]);
3882                                 continue;
3883                         }
3884                         (void) feature_get_refcount(spa,
3885                             &spa_feature_table[f], &refcount);
3886                         if (dataset_feature_count[f] != refcount) {
3887                                 (void) printf("%s feature refcount mismatch: "
3888                                     "%lld datasets != %lld refcount\n",
3889                                     spa_feature_table[f].fi_uname,
3890                                     (longlong_t)dataset_feature_count[f],
3891                                     (longlong_t)refcount);
3892                                 rc = 2;
3893                         } else {
3894                                 (void) printf("Verified %s feature refcount "
3895                                     "of %llu is correct\n",
3896                                     spa_feature_table[f].fi_uname,
3897                                     (longlong_t)refcount);
3898                         }
3899                 }
3900 
3901                 if (rc == 0) {
3902                         rc = verify_device_removal_feature_counts(spa);
3903                 }
3904         }
3905         if (rc == 0 && (dump_opt['b'] || dump_opt['c']))
3906                 rc = dump_block_stats(spa);
3907 
3908         if (rc == 0)
3909                 rc = verify_spacemap_refcounts(spa);
3910 
3911         if (dump_opt['s'])
3912                 show_pool_stats(spa);
3913 
3914         if (dump_opt['h'])
3915                 dump_history(spa);
3916 
3917         if (rc != 0) {
3918                 dump_debug_buffer();
3919                 exit(rc);
3920         }
3921 }
3922 
3923 #define ZDB_FLAG_CHECKSUM       0x0001
3924 #define ZDB_FLAG_DECOMPRESS     0x0002
3925 #define ZDB_FLAG_BSWAP          0x0004
3926 #define ZDB_FLAG_GBH            0x0008
3927 #define ZDB_FLAG_INDIRECT       0x0010
3928 #define ZDB_FLAG_PHYS           0x0020
3929 #define ZDB_FLAG_RAW            0x0040
3930 #define ZDB_FLAG_PRINT_BLKPTR   0x0080
3931 
3932 static int flagbits[256];
3933 
3934 static void
3935 zdb_print_blkptr(blkptr_t *bp, int flags)
3936 {
3937         char blkbuf[BP_SPRINTF_LEN];
3938 
3939         if (flags & ZDB_FLAG_BSWAP)
3940                 byteswap_uint64_array((void *)bp, sizeof (blkptr_t));
3941 
3942         snprintf_blkptr(blkbuf, sizeof (blkbuf), bp);
3943         (void) printf("%s\n", blkbuf);
3944 }
3945 
3946 static void
3947 zdb_dump_indirect(blkptr_t *bp, int nbps, int flags)
3948 {
3949         int i;
3950 
3951         for (i = 0; i < nbps; i++)
3952                 zdb_print_blkptr(&bp[i], flags);
3953 }
3954 
3955 static void
3956 zdb_dump_gbh(void *buf, int flags)
3957 {
3958         zdb_dump_indirect((blkptr_t *)buf, SPA_GBH_NBLKPTRS, flags);
3959 }
3960 
3961 static void
3962 zdb_dump_block_raw(void *buf, uint64_t size, int flags)
3963 {
3964         if (flags & ZDB_FLAG_BSWAP)
3965                 byteswap_uint64_array(buf, size);
3966         (void) write(1, buf, size);
3967 }
3968 
3969 static void
3970 zdb_dump_block(char *label, void *buf, uint64_t size, int flags)
3971 {
3972         uint64_t *d = (uint64_t *)buf;
3973         unsigned nwords = size / sizeof (uint64_t);
3974         int do_bswap = !!(flags & ZDB_FLAG_BSWAP);
3975         unsigned i, j;
3976         const char *hdr;
3977         char *c;
3978 
3979 
3980         if (do_bswap)
3981                 hdr = " 7 6 5 4 3 2 1 0   f e d c b a 9 8";
3982         else
3983                 hdr = " 0 1 2 3 4 5 6 7   8 9 a b c d e f";
3984 
3985         (void) printf("\n%s\n%6s   %s  0123456789abcdef\n", label, "", hdr);
3986 
3987         for (i = 0; i < nwords; i += 2) {
3988                 (void) printf("%06llx:  %016llx  %016llx  ",
3989                     (u_longlong_t)(i * sizeof (uint64_t)),
3990                     (u_longlong_t)(do_bswap ? BSWAP_64(d[i]) : d[i]),
3991                     (u_longlong_t)(do_bswap ? BSWAP_64(d[i + 1]) : d[i + 1]));
3992 
3993                 c = (char *)&d[i];
3994                 for (j = 0; j < 2 * sizeof (uint64_t); j++)
3995                         (void) printf("%c", isprint(c[j]) ? c[j] : '.');
3996                 (void) printf("\n");
3997         }
3998 }
3999 
4000 /*
4001  * There are two acceptable formats:
4002  *      leaf_name         - For example: c1t0d0 or /tmp/ztest.0a
4003  *      child[.child]*    - For example: 0.1.1
4004  *
4005  * The second form can be used to specify arbitrary vdevs anywhere
4006  * in the heirarchy.  For example, in a pool with a mirror of
4007  * RAID-Zs, you can specify either RAID-Z vdev with 0.0 or 0.1 .
4008  */
4009 static vdev_t *
4010 zdb_vdev_lookup(vdev_t *vdev, const char *path)
4011 {
4012         char *s, *p, *q;
4013         unsigned i;
4014 
4015         if (vdev == NULL)
4016                 return (NULL);
4017 
4018         /* First, assume the x.x.x.x format */
4019         i = strtoul(path, &s, 10);
4020         if (s == path || (s && *s != '.' && *s != '\0'))
4021                 goto name;
4022         if (i >= vdev->vdev_children)
4023                 return (NULL);
4024 
4025         vdev = vdev->vdev_child[i];
4026         if (*s == '\0')
4027                 return (vdev);
4028         return (zdb_vdev_lookup(vdev, s+1));
4029 
4030 name:
4031         for (i = 0; i < vdev->vdev_children; i++) {
4032                 vdev_t *vc = vdev->vdev_child[i];
4033 
4034                 if (vc->vdev_path == NULL) {
4035                         vc = zdb_vdev_lookup(vc, path);
4036                         if (vc == NULL)
4037                                 continue;
4038                         else
4039                                 return (vc);
4040                 }
4041 
4042                 p = strrchr(vc->vdev_path, '/');
4043                 p = p ? p + 1 : vc->vdev_path;
4044                 q = &vc->vdev_path[strlen(vc->vdev_path) - 2];
4045 
4046                 if (strcmp(vc->vdev_path, path) == 0)
4047                         return (vc);
4048                 if (strcmp(p, path) == 0)
4049                         return (vc);
4050                 if (strcmp(q, "s0") == 0 && strncmp(p, path, q - p) == 0)
4051                         return (vc);
4052         }
4053 
4054         return (NULL);
4055 }
4056 
4057 /* ARGSUSED */
4058 static int
4059 random_get_pseudo_bytes_cb(void *buf, size_t len, void *unused)
4060 {
4061         return (random_get_pseudo_bytes(buf, len));
4062 }
4063 
4064 /*
4065  * Read a block from a pool and print it out.  The syntax of the
4066  * block descriptor is:
4067  *
4068  *      pool:vdev_specifier:offset:size[:flags]
4069  *
4070  *      pool           - The name of the pool you wish to read from
4071  *      vdev_specifier - Which vdev (see comment for zdb_vdev_lookup)
4072  *      offset         - offset, in hex, in bytes
4073  *      size           - Amount of data to read, in hex, in bytes
4074  *      flags          - A string of characters specifying options
4075  *               b: Decode a blkptr at given offset within block
4076  *              *c: Calculate and display checksums
4077  *               d: Decompress data before dumping
4078  *               e: Byteswap data before dumping
4079  *               g: Display data as a gang block header
4080  *               i: Display as an indirect block
4081  *               p: Do I/O to physical offset
4082  *               r: Dump raw data to stdout
4083  *
4084  *              * = not yet implemented
4085  */
4086 static void
4087 zdb_read_block(char *thing, spa_t *spa)
4088 {
4089         blkptr_t blk, *bp = &blk;
4090         dva_t *dva = bp->blk_dva;
4091         int flags = 0;
4092         uint64_t offset = 0, size = 0, psize = 0, lsize = 0, blkptr_offset = 0;
4093         zio_t *zio;
4094         vdev_t *vd;
4095         abd_t *pabd;
4096         void *lbuf, *buf;
4097         const char *s, *vdev;
4098         char *p, *dup, *flagstr;
4099         int i, error;
4100 
4101         dup = strdup(thing);
4102         s = strtok(dup, ":");
4103         vdev = s ? s : "";
4104         s = strtok(NULL, ":");
4105         offset = strtoull(s ? s : "", NULL, 16);
4106         s = strtok(NULL, ":");
4107         size = strtoull(s ? s : "", NULL, 16);
4108         s = strtok(NULL, ":");
4109         if (s)
4110                 flagstr = strdup(s);
4111         else
4112                 flagstr = strdup("");
4113 
4114         s = NULL;
4115         if (size == 0)
4116                 s = "size must not be zero";
4117         if (!IS_P2ALIGNED(size, DEV_BSIZE))
4118                 s = "size must be a multiple of sector size";
4119         if (!IS_P2ALIGNED(offset, DEV_BSIZE))
4120                 s = "offset must be a multiple of sector size";
4121         if (s) {
4122                 (void) printf("Invalid block specifier: %s  - %s\n", thing, s);
4123                 free(dup);
4124                 return;
4125         }
4126 
4127         for (s = strtok(flagstr, ":"); s; s = strtok(NULL, ":")) {
4128                 for (i = 0; flagstr[i]; i++) {
4129                         int bit = flagbits[(uchar_t)flagstr[i]];
4130 
4131                         if (bit == 0) {
4132                                 (void) printf("***Invalid flag: %c\n",
4133                                     flagstr[i]);
4134                                 continue;
4135                         }
4136                         flags |= bit;
4137 
4138                         /* If it's not something with an argument, keep going */
4139                         if ((bit & (ZDB_FLAG_CHECKSUM |
4140                             ZDB_FLAG_PRINT_BLKPTR)) == 0)
4141                                 continue;
4142 
4143                         p = &flagstr[i + 1];
4144                         if (bit == ZDB_FLAG_PRINT_BLKPTR)
4145                                 blkptr_offset = strtoull(p, &p, 16);
4146                         if (*p != ':' && *p != '\0') {
4147                                 (void) printf("***Invalid flag arg: '%s'\n", s);
4148                                 free(dup);
4149                                 return;
4150                         }
4151                 }
4152         }
4153         free(flagstr);
4154 
4155         vd = zdb_vdev_lookup(spa->spa_root_vdev, vdev);
4156         if (vd == NULL) {
4157                 (void) printf("***Invalid vdev: %s\n", vdev);
4158                 free(dup);
4159                 return;
4160         } else {
4161                 if (vd->vdev_path)
4162                         (void) fprintf(stderr, "Found vdev: %s\n",
4163                             vd->vdev_path);
4164                 else
4165                         (void) fprintf(stderr, "Found vdev type: %s\n",
4166                             vd->vdev_ops->vdev_op_type);
4167         }
4168 
4169         psize = size;
4170         lsize = size;
4171 
4172         pabd = abd_alloc_linear(SPA_MAXBLOCKSIZE, B_FALSE);
4173         lbuf = umem_alloc(SPA_MAXBLOCKSIZE, UMEM_NOFAIL);
4174 
4175         BP_ZERO(bp);
4176 
4177         DVA_SET_VDEV(&dva[0], vd->vdev_id);
4178         DVA_SET_OFFSET(&dva[0], offset);
4179         DVA_SET_GANG(&dva[0], !!(flags & ZDB_FLAG_GBH));
4180         DVA_SET_ASIZE(&dva[0], vdev_psize_to_asize(vd, psize));
4181 
4182         BP_SET_BIRTH(bp, TXG_INITIAL, TXG_INITIAL);
4183 
4184         BP_SET_LSIZE(bp, lsize);
4185         BP_SET_PSIZE(bp, psize);
4186         BP_SET_COMPRESS(bp, ZIO_COMPRESS_OFF);
4187         BP_SET_CHECKSUM(bp, ZIO_CHECKSUM_OFF);
4188         BP_SET_TYPE(bp, DMU_OT_NONE);
4189         BP_SET_LEVEL(bp, 0);
4190         BP_SET_DEDUP(bp, 0);
4191         BP_SET_BYTEORDER(bp, ZFS_HOST_BYTEORDER);
4192 
4193         spa_config_enter(spa, SCL_STATE, FTAG, RW_READER);
4194         zio = zio_root(spa, NULL, NULL, 0);
4195 
4196         if (vd == vd->vdev_top) {
4197                 /*
4198                  * Treat this as a normal block read.
4199                  */
4200                 zio_nowait(zio_read(zio, spa, bp, pabd, psize, NULL, NULL,
4201                     ZIO_PRIORITY_SYNC_READ,
4202                     ZIO_FLAG_CANFAIL | ZIO_FLAG_RAW, NULL));
4203         } else {
4204                 /*
4205                  * Treat this as a vdev child I/O.
4206                  */
4207                 zio_nowait(zio_vdev_child_io(zio, bp, vd, offset, pabd,
4208                     psize, ZIO_TYPE_READ, ZIO_PRIORITY_SYNC_READ,
4209                     ZIO_FLAG_DONT_CACHE | ZIO_FLAG_DONT_QUEUE |
4210                     ZIO_FLAG_DONT_PROPAGATE | ZIO_FLAG_DONT_RETRY |
4211                     ZIO_FLAG_CANFAIL | ZIO_FLAG_RAW | ZIO_FLAG_OPTIONAL,
4212                     NULL, NULL));
4213         }
4214 
4215         error = zio_wait(zio);
4216         spa_config_exit(spa, SCL_STATE, FTAG);
4217 
4218         if (error) {
4219                 (void) printf("Read of %s failed, error: %d\n", thing, error);
4220                 goto out;
4221         }
4222 
4223         if (flags & ZDB_FLAG_DECOMPRESS) {
4224                 /*
4225                  * We don't know how the data was compressed, so just try
4226                  * every decompress function at every inflated blocksize.
4227                  */
4228                 enum zio_compress c;
4229                 void *pbuf2 = umem_alloc(SPA_MAXBLOCKSIZE, UMEM_NOFAIL);
4230                 void *lbuf2 = umem_alloc(SPA_MAXBLOCKSIZE, UMEM_NOFAIL);
4231 
4232                 abd_copy_to_buf(pbuf2, pabd, psize);
4233 
4234                 VERIFY0(abd_iterate_func(pabd, psize, SPA_MAXBLOCKSIZE - psize,
4235                     random_get_pseudo_bytes_cb, NULL));
4236 
4237                 VERIFY0(random_get_pseudo_bytes((uint8_t *)pbuf2 + psize,
4238                     SPA_MAXBLOCKSIZE - psize));
4239 
4240                 for (lsize = SPA_MAXBLOCKSIZE; lsize > psize;
4241                     lsize -= SPA_MINBLOCKSIZE) {
4242                         for (c = 0; c < ZIO_COMPRESS_FUNCTIONS; c++) {
4243                                 if (zio_decompress_data(c, pabd,
4244                                     lbuf, psize, lsize) == 0 &&
4245                                     zio_decompress_data_buf(c, pbuf2,
4246                                     lbuf2, psize, lsize) == 0 &&
4247                                     bcmp(lbuf, lbuf2, lsize) == 0)
4248                                         break;
4249                         }
4250                         if (c != ZIO_COMPRESS_FUNCTIONS)
4251                                 break;
4252                         lsize -= SPA_MINBLOCKSIZE;
4253                 }
4254 
4255                 umem_free(pbuf2, SPA_MAXBLOCKSIZE);
4256                 umem_free(lbuf2, SPA_MAXBLOCKSIZE);
4257 
4258                 if (lsize <= psize) {
4259                         (void) printf("Decompress of %s failed\n", thing);
4260                         goto out;
4261                 }
4262                 buf = lbuf;
4263                 size = lsize;
4264         } else {
4265                 buf = abd_to_buf(pabd);
4266                 size = psize;
4267         }
4268 
4269         if (flags & ZDB_FLAG_PRINT_BLKPTR)
4270                 zdb_print_blkptr((blkptr_t *)(void *)
4271                     ((uintptr_t)buf + (uintptr_t)blkptr_offset), flags);
4272         else if (flags & ZDB_FLAG_RAW)
4273                 zdb_dump_block_raw(buf, size, flags);
4274         else if (flags & ZDB_FLAG_INDIRECT)
4275                 zdb_dump_indirect((blkptr_t *)buf, size / sizeof (blkptr_t),
4276                     flags);
4277         else if (flags & ZDB_FLAG_GBH)
4278                 zdb_dump_gbh(buf, flags);
4279         else
4280                 zdb_dump_block(thing, buf, size, flags);
4281 
4282 out:
4283         abd_free(pabd);
4284         umem_free(lbuf, SPA_MAXBLOCKSIZE);
4285         free(dup);
4286 }
4287 
4288 static void
4289 zdb_embedded_block(char *thing)
4290 {
4291         blkptr_t bp;
4292         unsigned long long *words = (void *)&bp;
4293         char buf[SPA_MAXBLOCKSIZE];
4294         int err;
4295 
4296         bzero(&bp, sizeof (bp));
4297         err = sscanf(thing, "%llx:%llx:%llx:%llx:%llx:%llx:%llx:%llx:"
4298             "%llx:%llx:%llx:%llx:%llx:%llx:%llx:%llx",
4299             words + 0, words + 1, words + 2, words + 3,
4300             words + 4, words + 5, words + 6, words + 7,
4301             words + 8, words + 9, words + 10, words + 11,
4302             words + 12, words + 13, words + 14, words + 15);
4303         if (err != 16) {
4304                 (void) printf("invalid input format\n");
4305                 exit(1);
4306         }
4307         ASSERT3U(BPE_GET_LSIZE(&bp), <=, SPA_MAXBLOCKSIZE);
4308         err = decode_embedded_bp(&bp, buf, BPE_GET_LSIZE(&bp));
4309         if (err != 0) {
4310                 (void) printf("decode failed: %u\n", err);
4311                 exit(1);
4312         }
4313         zdb_dump_block_raw(buf, BPE_GET_LSIZE(&bp), 0);
4314 }
4315 
4316 static boolean_t
4317 pool_match(nvlist_t *cfg, char *tgt)
4318 {
4319         uint64_t v, guid = strtoull(tgt, NULL, 0);
4320         char *s;
4321 
4322         if (guid != 0) {
4323                 if (nvlist_lookup_uint64(cfg, ZPOOL_CONFIG_POOL_GUID, &v) == 0)
4324                         return (v == guid);
4325         } else {
4326                 if (nvlist_lookup_string(cfg, ZPOOL_CONFIG_POOL_NAME, &s) == 0)
4327                         return (strcmp(s, tgt) == 0);
4328         }
4329         return (B_FALSE);
4330 }
4331 
4332 static char *
4333 find_zpool(char **target, nvlist_t **configp, int dirc, char **dirv)
4334 {
4335         nvlist_t *pools;
4336         nvlist_t *match = NULL;
4337         char *name = NULL;
4338         char *sepp = NULL;
4339         char sep = '\0';
4340         int count = 0;
4341         importargs_t args;
4342 
4343         bzero(&args, sizeof (args));
4344         args.paths = dirc;
4345         args.path = dirv;
4346         args.can_be_active = B_TRUE;
4347 
4348         if ((sepp = strpbrk(*target, "/@")) != NULL) {
4349                 sep = *sepp;
4350                 *sepp = '\0';
4351         }
4352 
4353         pools = zpool_search_import(g_zfs, &args);
4354 
4355         if (pools != NULL) {
4356                 nvpair_t *elem = NULL;
4357                 while ((elem = nvlist_next_nvpair(pools, elem)) != NULL) {
4358                         verify(nvpair_value_nvlist(elem, configp) == 0);
4359                         if (pool_match(*configp, *target)) {
4360                                 count++;
4361                                 if (match != NULL) {
4362                                         /* print previously found config */
4363                                         if (name != NULL) {
4364                                                 (void) printf("%s\n", name);
4365                                                 dump_nvlist(match, 8);
4366                                                 name = NULL;
4367                                         }
4368                                         (void) printf("%s\n",
4369                                             nvpair_name(elem));
4370                                         dump_nvlist(*configp, 8);
4371                                 } else {
4372                                         match = *configp;
4373                                         name = nvpair_name(elem);
4374                                 }
4375                         }
4376                 }
4377         }
4378         if (count > 1)
4379                 (void) fatal("\tMatched %d pools - use pool GUID "
4380                     "instead of pool name or \n"
4381                     "\tpool name part of a dataset name to select pool", count);
4382 
4383         if (sepp)
4384                 *sepp = sep;
4385         /*
4386          * If pool GUID was specified for pool id, replace it with pool name
4387          */
4388         if (name && (strstr(*target, name) != *target)) {
4389                 int sz = 1 + strlen(name) + ((sepp) ? strlen(sepp) : 0);
4390 
4391                 *target = umem_alloc(sz, UMEM_NOFAIL);
4392                 (void) snprintf(*target, sz, "%s%s", name, sepp ? sepp : "");
4393         }
4394 
4395         *configp = name ? match : NULL;
4396 
4397         return (name);
4398 }
4399 
4400 int
4401 main(int argc, char **argv)
4402 {
4403         int c;
4404         struct rlimit rl = { 1024, 1024 };
4405         spa_t *spa = NULL;
4406         objset_t *os = NULL;
4407         int dump_all = 1;
4408         int verbose = 0;
4409         int error = 0;
4410         char **searchdirs = NULL;
4411         int nsearch = 0;
4412         char *target;
4413         nvlist_t *policy = NULL;
4414         uint64_t max_txg = UINT64_MAX;
4415         int flags = ZFS_IMPORT_MISSING_LOG;
4416         int rewind = ZPOOL_NEVER_REWIND;
4417         char *spa_config_path_env;
4418         boolean_t target_is_spa = B_TRUE;
4419 
4420         (void) setrlimit(RLIMIT_NOFILE, &rl);
4421         (void) enable_extended_FILE_stdio(-1, -1);
4422 
4423         dprintf_setup(&argc, argv);
4424 
4425         /*
4426          * If there is an environment variable SPA_CONFIG_PATH it overrides
4427          * default spa_config_path setting. If -U flag is specified it will
4428          * override this environment variable settings once again.
4429          */
4430         spa_config_path_env = getenv("SPA_CONFIG_PATH");
4431         if (spa_config_path_env != NULL)
4432                 spa_config_path = spa_config_path_env;
4433 
4434         while ((c = getopt(argc, argv,
4435             "AbcCdDeEFGhiI:lLmMo:Op:PqRsSt:uU:vVx:X")) != -1) {
4436                 switch (c) {
4437                 case 'b':
4438                 case 'c':
4439                 case 'C':
4440                 case 'd':
4441                 case 'D':
4442                 case 'E':
4443                 case 'G':
4444                 case 'h':
4445                 case 'i':
4446                 case 'l':
4447                 case 'm':
4448                 case 'M':
4449                 case 'O':
4450                 case 'R':
4451                 case 's':
4452                 case 'S':
4453                 case 'u':
4454                         dump_opt[c]++;
4455                         dump_all = 0;
4456                         break;
4457                 case 'A':
4458                 case 'e':
4459                 case 'F':
4460                 case 'L':
4461                 case 'P':
4462                 case 'q':
4463                 case 'X':
4464                         dump_opt[c]++;
4465                         break;
4466                 /* NB: Sort single match options below. */
4467                 case 'I':
4468                         max_inflight = strtoull(optarg, NULL, 0);
4469                         if (max_inflight == 0) {
4470                                 (void) fprintf(stderr, "maximum number "
4471                                     "of inflight I/Os must be greater "
4472                                     "than 0\n");
4473                                 usage();
4474                         }
4475                         break;
4476                 case 'o':
4477                         error = set_global_var(optarg);
4478                         if (error != 0)
4479                                 usage();
4480                         break;
4481                 case 'p':
4482                         if (searchdirs == NULL) {
4483                                 searchdirs = umem_alloc(sizeof (char *),
4484                                     UMEM_NOFAIL);
4485                         } else {
4486                                 char **tmp = umem_alloc((nsearch + 1) *
4487                                     sizeof (char *), UMEM_NOFAIL);
4488                                 bcopy(searchdirs, tmp, nsearch *
4489                                     sizeof (char *));
4490                                 umem_free(searchdirs,
4491                                     nsearch * sizeof (char *));
4492                                 searchdirs = tmp;
4493                         }
4494                         searchdirs[nsearch++] = optarg;
4495                         break;
4496                 case 't':
4497                         max_txg = strtoull(optarg, NULL, 0);
4498                         if (max_txg < TXG_INITIAL) {
4499                                 (void) fprintf(stderr, "incorrect txg "
4500                                     "specified: %s\n", optarg);
4501                                 usage();
4502                         }
4503                         break;
4504                 case 'U':
4505                         spa_config_path = optarg;
4506                         if (spa_config_path[0] != '/') {
4507                                 (void) fprintf(stderr,
4508                                     "cachefile must be an absolute path "
4509                                     "(i.e. start with a slash)\n");
4510                                 usage();
4511                         }
4512                         break;
4513                 case 'v':
4514                         verbose++;
4515                         break;
4516                 case 'V':
4517                         flags = ZFS_IMPORT_VERBATIM;
4518                         break;
4519                 case 'x':
4520                         vn_dumpdir = optarg;
4521                         break;
4522                 default:
4523                         usage();
4524                         break;
4525                 }
4526         }
4527 
4528         if (!dump_opt['e'] && searchdirs != NULL) {
4529                 (void) fprintf(stderr, "-p option requires use of -e\n");
4530                 usage();
4531         }
4532 
4533         /*
4534          * ZDB does not typically re-read blocks; therefore limit the ARC
4535          * to 256 MB, which can be used entirely for metadata.
4536          */
4537         zfs_arc_max = zfs_arc_meta_limit = 256 * 1024 * 1024;
4538 
4539         /*
4540          * "zdb -c" uses checksum-verifying scrub i/os which are async reads.
4541          * "zdb -b" uses traversal prefetch which uses async reads.
4542          * For good performance, let several of them be active at once.
4543          */
4544         zfs_vdev_async_read_max_active = 10;
4545 
4546         /*
4547          * Disable reference tracking for better performance.
4548          */
4549         reference_tracking_enable = B_FALSE;
4550 
4551         /*
4552          * Do not fail spa_load when spa_load_verify fails. This is needed
4553          * to load non-idle pools.
4554          */
4555         spa_load_verify_dryrun = B_TRUE;
4556 
4557         kernel_init(FREAD);
4558         g_zfs = libzfs_init();
4559         ASSERT(g_zfs != NULL);
4560 
4561         if (dump_all)
4562                 verbose = MAX(verbose, 1);
4563 
4564         for (c = 0; c < 256; c++) {
4565                 if (dump_all && strchr("AeEFlLOPRSX", c) == NULL)
4566                         dump_opt[c] = 1;
4567                 if (dump_opt[c])
4568                         dump_opt[c] += verbose;
4569         }
4570 
4571         aok = (dump_opt['A'] == 1) || (dump_opt['A'] > 2);
4572         zfs_recover = (dump_opt['A'] > 1);
4573 
4574         argc -= optind;
4575         argv += optind;
4576 
4577         if (argc < 2 && dump_opt['R'])
4578                 usage();
4579 
4580         if (dump_opt['E']) {
4581                 if (argc != 1)
4582                         usage();
4583                 zdb_embedded_block(argv[0]);
4584                 return (0);
4585         }
4586 
4587         if (argc < 1) {
4588                 if (!dump_opt['e'] && dump_opt['C']) {
4589                         dump_cachefile(spa_config_path);
4590                         return (0);
4591                 }
4592                 usage();
4593         }
4594 
4595         if (dump_opt['l'])
4596                 return (dump_label(argv[0]));
4597 
4598         if (dump_opt['O']) {
4599                 if (argc != 2)
4600                         usage();
4601                 dump_opt['v'] = verbose + 3;
4602                 return (dump_path(argv[0], argv[1]));
4603         }
4604 
4605         if (dump_opt['X'] || dump_opt['F'])
4606                 rewind = ZPOOL_DO_REWIND |
4607                     (dump_opt['X'] ? ZPOOL_EXTREME_REWIND : 0);
4608 
4609         if (nvlist_alloc(&policy, NV_UNIQUE_NAME_TYPE, 0) != 0 ||
4610             nvlist_add_uint64(policy, ZPOOL_REWIND_REQUEST_TXG, max_txg) != 0 ||
4611             nvlist_add_uint32(policy, ZPOOL_REWIND_REQUEST, rewind) != 0)
4612                 fatal("internal error: %s", strerror(ENOMEM));
4613 
4614         error = 0;
4615         target = argv[0];
4616 
4617         if (dump_opt['e']) {
4618                 nvlist_t *cfg = NULL;
4619                 char *name = find_zpool(&target, &cfg, nsearch, searchdirs);
4620 
4621                 error = ENOENT;
4622                 if (name) {
4623                         if (dump_opt['C'] > 1) {
4624                                 (void) printf("\nConfiguration for import:\n");
4625                                 dump_nvlist(cfg, 8);
4626                         }
4627                         if (nvlist_add_nvlist(cfg,
4628                             ZPOOL_REWIND_POLICY, policy) != 0) {
4629                                 fatal("can't open '%s': %s",
4630                                     target, strerror(ENOMEM));
4631                         }
4632                         error = spa_import(name, cfg, NULL, flags);
4633                 }
4634         }
4635 
4636         if (strpbrk(target, "/@") != NULL) {
4637                 size_t targetlen;
4638 
4639                 target_is_spa = B_FALSE;
4640                 /*
4641                  * Remove any trailing slash.  Later code would get confused
4642                  * by it, but we want to allow it so that "pool/" can
4643                  * indicate that we want to dump the topmost filesystem,
4644                  * rather than the whole pool.
4645                  */
4646                 targetlen = strlen(target);
4647                 if (targetlen != 0 && target[targetlen - 1] == '/')
4648                         target[targetlen - 1] = '\0';
4649         }
4650 
4651         if (error == 0) {
4652                 if (target_is_spa || dump_opt['R']) {
4653                         error = spa_open_rewind(target, &spa, FTAG, policy,
4654                             NULL);
4655                         if (error) {
4656                                 /*
4657                                  * If we're missing the log device then
4658                                  * try opening the pool after clearing the
4659                                  * log state.
4660                                  */
4661                                 mutex_enter(&spa_namespace_lock);
4662                                 if ((spa = spa_lookup(target)) != NULL &&
4663                                     spa->spa_log_state == SPA_LOG_MISSING) {
4664                                         spa->spa_log_state = SPA_LOG_CLEAR;
4665                                         error = 0;
4666                                 }
4667                                 mutex_exit(&spa_namespace_lock);
4668 
4669                                 if (!error) {
4670                                         error = spa_open_rewind(target, &spa,
4671                                             FTAG, policy, NULL);
4672                                 }
4673                         }
4674                 } else {
4675                         error = open_objset(target, DMU_OST_ANY, FTAG, &os);
4676                 }
4677         }
4678         nvlist_free(policy);
4679 
4680         if (error)
4681                 fatal("can't open '%s': %s", target, strerror(error));
4682 
4683         argv++;
4684         argc--;
4685         if (!dump_opt['R']) {
4686                 if (argc > 0) {
4687                         zopt_objects = argc;
4688                         zopt_object = calloc(zopt_objects, sizeof (uint64_t));
4689                         for (unsigned i = 0; i < zopt_objects; i++) {
4690                                 errno = 0;
4691                                 zopt_object[i] = strtoull(argv[i], NULL, 0);
4692                                 if (zopt_object[i] == 0 && errno != 0)
4693                                         fatal("bad number %s: %s",
4694                                             argv[i], strerror(errno));
4695                         }
4696                 }
4697                 if (os != NULL) {
4698                         dump_dir(os);
4699                 } else if (zopt_objects > 0 && !dump_opt['m']) {
4700                         dump_dir(spa->spa_meta_objset);
4701                 } else {
4702                         dump_zpool(spa);
4703                 }
4704         } else {
4705                 flagbits['b'] = ZDB_FLAG_PRINT_BLKPTR;
4706                 flagbits['c'] = ZDB_FLAG_CHECKSUM;
4707                 flagbits['d'] = ZDB_FLAG_DECOMPRESS;
4708                 flagbits['e'] = ZDB_FLAG_BSWAP;
4709                 flagbits['g'] = ZDB_FLAG_GBH;
4710                 flagbits['i'] = ZDB_FLAG_INDIRECT;
4711                 flagbits['p'] = ZDB_FLAG_PHYS;
4712                 flagbits['r'] = ZDB_FLAG_RAW;
4713 
4714                 for (int i = 0; i < argc; i++)
4715                         zdb_read_block(argv[i], spa);
4716         }
4717 
4718         if (os != NULL)
4719                 close_objset(os, FTAG);
4720         else
4721                 spa_close(spa, FTAG);
4722 
4723         fuid_table_destroy();
4724 
4725         dump_debug_buffer();
4726 
4727         libzfs_fini(g_zfs);
4728         kernel_fini();
4729 
4730         return (0);
4731 }