Print this page
    
10592 misc. metaslab and vdev related ZoL bug fixes
Portions contributed by: Jerry Jelinek <jerry.jelinek@joyent.com>
Reviewed by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed by: Giuseppe Di Natale <guss80@gmail.com>
Reviewed by: George Melikov <mail@gmelikov.ru>
Reviewed by: Paul Dagnelie <pcd@delphix.com>
Reviewed by: Matt Ahrens <mahrens@delphix.com>
Reviewed by: Pavel Zakharov <pavel.zakharov@delphix.com>
Reviewed by: Tony Hutter <hutter2@llnl.gov>
Reviewed by: Kody Kantor <kody.kantor@joyent.com>
Approved by: Dan McDonald <danmcd@joyent.com>
    
      
        | Split | 
	Close | 
      
      | Expand all | 
      | Collapse all | 
    
    
          --- old/usr/src/cmd/zdb/zdb.c
          +++ new/usr/src/cmd/zdb/zdb.c
   1    1  /*
   2    2   * CDDL HEADER START
   3    3   *
   4    4   * The contents of this file are subject to the terms of the
   5    5   * Common Development and Distribution License (the "License").
   6    6   * You may not use this file except in compliance with the License.
   7    7   *
   8    8   * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9    9   * or http://www.opensolaris.org/os/licensing.
  10   10   * See the License for the specific language governing permissions
  11   11   * and limitations under the License.
  12   12   *
  13   13   * When distributing Covered Code, include this CDDL HEADER in each
  14   14   * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15   15   * If applicable, add the following below this CDDL HEADER, with the
  16   16   * fields enclosed by brackets "[]" replaced with your own identifying
  17   17   * information: Portions Copyright [yyyy] [name of copyright owner]
  18   18   *
  19   19   * CDDL HEADER END
  20   20   */
  21   21  
  22   22  /*
  23   23   * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  24   24   * Copyright (c) 2011, 2018 by Delphix. All rights reserved.
  25   25   * Copyright (c) 2014 Integros [integros.com]
  26   26   * Copyright 2017 Nexenta Systems, Inc.
  27   27   * Copyright (c) 2017, 2018 Lawrence Livermore National Security, LLC.
  28   28   * Copyright 2017 RackTop Systems.
  29   29   */
  30   30  
  31   31  #include <stdio.h>
  32   32  #include <unistd.h>
  33   33  #include <stdio_ext.h>
  34   34  #include <stdlib.h>
  35   35  #include <ctype.h>
  36   36  #include <sys/zfs_context.h>
  37   37  #include <sys/spa.h>
  38   38  #include <sys/spa_impl.h>
  39   39  #include <sys/dmu.h>
  40   40  #include <sys/zap.h>
  41   41  #include <sys/fs/zfs.h>
  42   42  #include <sys/zfs_znode.h>
  43   43  #include <sys/zfs_sa.h>
  44   44  #include <sys/sa.h>
  45   45  #include <sys/sa_impl.h>
  46   46  #include <sys/vdev.h>
  47   47  #include <sys/vdev_impl.h>
  48   48  #include <sys/metaslab_impl.h>
  49   49  #include <sys/dmu_objset.h>
  50   50  #include <sys/dsl_dir.h>
  51   51  #include <sys/dsl_dataset.h>
  52   52  #include <sys/dsl_pool.h>
  53   53  #include <sys/dbuf.h>
  54   54  #include <sys/zil.h>
  55   55  #include <sys/zil_impl.h>
  56   56  #include <sys/stat.h>
  57   57  #include <sys/resource.h>
  58   58  #include <sys/dmu_traverse.h>
  59   59  #include <sys/zio_checksum.h>
  60   60  #include <sys/zio_compress.h>
  61   61  #include <sys/zfs_fuid.h>
  62   62  #include <sys/arc.h>
  63   63  #include <sys/ddt.h>
  64   64  #include <sys/zfeature.h>
  65   65  #include <sys/abd.h>
  66   66  #include <sys/blkptr.h>
  67   67  #include <sys/dsl_scan.h>
  68   68  #include <zfs_comutil.h>
  69   69  #include <libcmdutils.h>
  70   70  #undef verify
  71   71  #include <libzfs.h>
  72   72  
  73   73  #include "zdb.h"
  74   74  
  75   75  #define ZDB_COMPRESS_NAME(idx) ((idx) < ZIO_COMPRESS_FUNCTIONS ?        \
  76   76          zio_compress_table[(idx)].ci_name : "UNKNOWN")
  77   77  #define ZDB_CHECKSUM_NAME(idx) ((idx) < ZIO_CHECKSUM_FUNCTIONS ?        \
  78   78          zio_checksum_table[(idx)].ci_name : "UNKNOWN")
  79   79  #define ZDB_OT_NAME(idx) ((idx) < DMU_OT_NUMTYPES ?     \
  80   80          dmu_ot[(idx)].ot_name : DMU_OT_IS_VALID(idx) ?  \
  81   81          dmu_ot_byteswap[DMU_OT_BYTESWAP(idx)].ob_name : "UNKNOWN")
  82   82  #define ZDB_OT_TYPE(idx) ((idx) < DMU_OT_NUMTYPES ? (idx) :             \
  83   83          (idx) == DMU_OTN_ZAP_DATA || (idx) == DMU_OTN_ZAP_METADATA ?    \
  84   84          DMU_OT_ZAP_OTHER : \
  85   85          (idx) == DMU_OTN_UINT64_DATA || (idx) == DMU_OTN_UINT64_METADATA ? \
  86   86          DMU_OT_UINT64_OTHER : DMU_OT_NUMTYPES)
  87   87  
  88   88  #ifndef lint
  89   89  extern int reference_tracking_enable;
  90   90  extern boolean_t zfs_recover;
  91   91  extern uint64_t zfs_arc_max, zfs_arc_meta_limit;
  92   92  extern int zfs_vdev_async_read_max_active;
  93   93  extern int aok;
  94   94  extern boolean_t spa_load_verify_dryrun;
  95   95  #else
  96   96  int reference_tracking_enable;
  97   97  boolean_t zfs_recover;
  98   98  uint64_t zfs_arc_max, zfs_arc_meta_limit;
  99   99  int zfs_vdev_async_read_max_active;
 100  100  int aok;
 101  101  boolean_t spa_load_verify_dryrun;
 102  102  #endif
 103  103  
 104  104  static const char cmdname[] = "zdb";
 105  105  uint8_t dump_opt[256];
 106  106  
 107  107  typedef void object_viewer_t(objset_t *, uint64_t, void *data, size_t size);
 108  108  
 109  109  uint64_t *zopt_object = NULL;
 110  110  static unsigned zopt_objects = 0;
 111  111  libzfs_handle_t *g_zfs;
 112  112  uint64_t max_inflight = 1000;
 113  113  static int leaked_objects = 0;
 114  114  
 115  115  static void snprintf_blkptr_compact(char *, size_t, const blkptr_t *);
 116  116  static void mos_obj_refd(uint64_t);
 117  117  
 118  118  /*
 119  119   * These libumem hooks provide a reasonable set of defaults for the allocator's
 120  120   * debugging facilities.
 121  121   */
 122  122  const char *
 123  123  _umem_debug_init()
 124  124  {
 125  125          return ("default,verbose"); /* $UMEM_DEBUG setting */
 126  126  }
 127  127  
 128  128  const char *
 129  129  _umem_logging_init(void)
 130  130  {
 131  131          return ("fail,contents"); /* $UMEM_LOGGING setting */
 132  132  }
 133  133  
 134  134  static void
 135  135  usage(void)
 136  136  {
 137  137          (void) fprintf(stderr,
 138  138              "Usage:\t%s [-AbcdDFGhikLMPsvX] [-e [-V] [-p <path> ...]] "
 139  139              "[-I <inflight I/Os>]\n"
 140  140              "\t\t[-o <var>=<value>]... [-t <txg>] [-U <cache>] [-x <dumpdir>]\n"
 141  141              "\t\t[<poolname> [<object> ...]]\n"
 142  142              "\t%s [-AdiPv] [-e [-V] [-p <path> ...]] [-U <cache>] <dataset> "
 143  143              "[<object> ...]\n"
 144  144              "\t%s -C [-A] [-U <cache>]\n"
 145  145              "\t%s -l [-Aqu] <device>\n"
 146  146              "\t%s -m [-AFLPX] [-e [-V] [-p <path> ...]] [-t <txg>] "
 147  147              "[-U <cache>]\n\t\t<poolname> [<vdev> [<metaslab> ...]]\n"
 148  148              "\t%s -O <dataset> <path>\n"
 149  149              "\t%s -R [-A] [-e [-V] [-p <path> ...]] [-U <cache>]\n"
 150  150              "\t\t<poolname> <vdev>:<offset>:<size>[:<flags>]\n"
 151  151              "\t%s -E [-A] word0:word1:...:word15\n"
 152  152              "\t%s -S [-AP] [-e [-V] [-p <path> ...]] [-U <cache>] "
 153  153              "<poolname>\n\n",
 154  154              cmdname, cmdname, cmdname, cmdname, cmdname, cmdname, cmdname,
 155  155              cmdname, cmdname);
 156  156  
 157  157          (void) fprintf(stderr, "    Dataset name must include at least one "
 158  158              "separator character '/' or '@'\n");
 159  159          (void) fprintf(stderr, "    If dataset name is specified, only that "
 160  160              "dataset is dumped\n");
 161  161          (void) fprintf(stderr, "    If object numbers are specified, only "
 162  162              "those objects are dumped\n\n");
 163  163          (void) fprintf(stderr, "    Options to control amount of output:\n");
 164  164          (void) fprintf(stderr, "        -b block statistics\n");
 165  165          (void) fprintf(stderr, "        -c checksum all metadata (twice for "
 166  166              "all data) blocks\n");
 167  167          (void) fprintf(stderr, "        -C config (or cachefile if alone)\n");
 168  168          (void) fprintf(stderr, "        -d dataset(s)\n");
 169  169          (void) fprintf(stderr, "        -D dedup statistics\n");
 170  170          (void) fprintf(stderr, "        -E decode and display block from an "
 171  171              "embedded block pointer\n");
 172  172          (void) fprintf(stderr, "        -h pool history\n");
 173  173          (void) fprintf(stderr, "        -i intent logs\n");
 174  174          (void) fprintf(stderr, "        -l read label contents\n");
 175  175          (void) fprintf(stderr, "        -k examine the checkpointed state "
 176  176              "of the pool\n");
 177  177          (void) fprintf(stderr, "        -L disable leak tracking (do not "
 178  178              "load spacemaps)\n");
 179  179          (void) fprintf(stderr, "        -m metaslabs\n");
 180  180          (void) fprintf(stderr, "        -M metaslab groups\n");
 181  181          (void) fprintf(stderr, "        -O perform object lookups by path\n");
 182  182          (void) fprintf(stderr, "        -R read and display block from a "
 183  183              "device\n");
 184  184          (void) fprintf(stderr, "        -s report stats on zdb's I/O\n");
 185  185          (void) fprintf(stderr, "        -S simulate dedup to measure effect\n");
 186  186          (void) fprintf(stderr, "        -v verbose (applies to all "
 187  187              "others)\n\n");
 188  188          (void) fprintf(stderr, "    Below options are intended for use "
 189  189              "with other options:\n");
 190  190          (void) fprintf(stderr, "        -A ignore assertions (-A), enable "
 191  191              "panic recovery (-AA) or both (-AAA)\n");
 192  192          (void) fprintf(stderr, "        -e pool is exported/destroyed/"
 193  193              "has altroot/not in a cachefile\n");
 194  194          (void) fprintf(stderr, "        -F attempt automatic rewind within "
 195  195              "safe range of transaction groups\n");
 196  196          (void) fprintf(stderr, "        -G dump zfs_dbgmsg buffer before "
 197  197              "exiting\n");
 198  198          (void) fprintf(stderr, "        -I <number of inflight I/Os> -- "
 199  199              "specify the maximum number of "
 200  200              "checksumming I/Os [default is 200]\n");
 201  201          (void) fprintf(stderr, "        -o <variable>=<value> set global "
 202  202              "variable to an unsigned 32-bit integer value\n");
 203  203          (void) fprintf(stderr, "        -p <path> -- use one or more with "
 204  204              "-e to specify path to vdev dir\n");
 205  205          (void) fprintf(stderr, "        -P print numbers in parseable form\n");
 206  206          (void) fprintf(stderr, "        -q don't print label contents\n");
 207  207          (void) fprintf(stderr, "        -t <txg> -- highest txg to use when "
 208  208              "searching for uberblocks\n");
 209  209          (void) fprintf(stderr, "        -u uberblock\n");
 210  210          (void) fprintf(stderr, "        -U <cachefile_path> -- use alternate "
 211  211              "cachefile\n");
 212  212          (void) fprintf(stderr, "        -V do verbatim import\n");
 213  213          (void) fprintf(stderr, "        -x <dumpdir> -- "
 214  214              "dump all read blocks into specified directory\n");
 215  215          (void) fprintf(stderr, "        -X attempt extreme rewind (does not "
 216  216              "work with dataset)\n\n");
 217  217          (void) fprintf(stderr, "Specify an option more than once (e.g. -bb) "
 218  218              "to make only that option verbose\n");
 219  219          (void) fprintf(stderr, "Default is to dump everything non-verbosely\n");
 220  220          exit(1);
 221  221  }
 222  222  
 223  223  static void
 224  224  dump_debug_buffer()
 225  225  {
 226  226          if (dump_opt['G']) {
 227  227                  (void) printf("\n");
 228  228                  zfs_dbgmsg_print("zdb");
 229  229          }
 230  230  }
 231  231  
 232  232  /*
 233  233   * Called for usage errors that are discovered after a call to spa_open(),
 234  234   * dmu_bonus_hold(), or pool_match().  abort() is called for other errors.
 235  235   */
 236  236  
 237  237  static void
 238  238  fatal(const char *fmt, ...)
 239  239  {
 240  240          va_list ap;
 241  241  
 242  242          va_start(ap, fmt);
 243  243          (void) fprintf(stderr, "%s: ", cmdname);
 244  244          (void) vfprintf(stderr, fmt, ap);
 245  245          va_end(ap);
 246  246          (void) fprintf(stderr, "\n");
 247  247  
 248  248          dump_debug_buffer();
 249  249  
 250  250          exit(1);
 251  251  }
 252  252  
 253  253  /* ARGSUSED */
 254  254  static void
 255  255  dump_packed_nvlist(objset_t *os, uint64_t object, void *data, size_t size)
 256  256  {
 257  257          nvlist_t *nv;
 258  258          size_t nvsize = *(uint64_t *)data;
 259  259          char *packed = umem_alloc(nvsize, UMEM_NOFAIL);
 260  260  
 261  261          VERIFY(0 == dmu_read(os, object, 0, nvsize, packed, DMU_READ_PREFETCH));
 262  262  
 263  263          VERIFY(nvlist_unpack(packed, nvsize, &nv, 0) == 0);
 264  264  
 265  265          umem_free(packed, nvsize);
 266  266  
 267  267          dump_nvlist(nv, 8);
 268  268  
 269  269          nvlist_free(nv);
 270  270  }
 271  271  
 272  272  /* ARGSUSED */
 273  273  static void
 274  274  dump_history_offsets(objset_t *os, uint64_t object, void *data, size_t size)
 275  275  {
 276  276          spa_history_phys_t *shp = data;
 277  277  
 278  278          if (shp == NULL)
 279  279                  return;
 280  280  
 281  281          (void) printf("\t\tpool_create_len = %llu\n",
 282  282              (u_longlong_t)shp->sh_pool_create_len);
 283  283          (void) printf("\t\tphys_max_off = %llu\n",
 284  284              (u_longlong_t)shp->sh_phys_max_off);
 285  285          (void) printf("\t\tbof = %llu\n",
 286  286              (u_longlong_t)shp->sh_bof);
 287  287          (void) printf("\t\teof = %llu\n",
 288  288              (u_longlong_t)shp->sh_eof);
 289  289          (void) printf("\t\trecords_lost = %llu\n",
 290  290              (u_longlong_t)shp->sh_records_lost);
 291  291  }
 292  292  
 293  293  static void
 294  294  zdb_nicenum(uint64_t num, char *buf, size_t buflen)
 295  295  {
 296  296          if (dump_opt['P'])
 297  297                  (void) snprintf(buf, buflen, "%llu", (longlong_t)num);
 298  298          else
 299  299                  nicenum(num, buf, sizeof (buf));
 300  300  }
 301  301  
 302  302  static const char histo_stars[] = "****************************************";
 303  303  static const uint64_t histo_width = sizeof (histo_stars) - 1;
 304  304  
 305  305  static void
 306  306  dump_histogram(const uint64_t *histo, int size, int offset)
 307  307  {
 308  308          int i;
 309  309          int minidx = size - 1;
 310  310          int maxidx = 0;
 311  311          uint64_t max = 0;
 312  312  
 313  313          for (i = 0; i < size; i++) {
 314  314                  if (histo[i] > max)
 315  315                          max = histo[i];
 316  316                  if (histo[i] > 0 && i > maxidx)
 317  317                          maxidx = i;
 318  318                  if (histo[i] > 0 && i < minidx)
 319  319                          minidx = i;
 320  320          }
 321  321  
 322  322          if (max < histo_width)
 323  323                  max = histo_width;
 324  324  
 325  325          for (i = minidx; i <= maxidx; i++) {
 326  326                  (void) printf("\t\t\t%3u: %6llu %s\n",
 327  327                      i + offset, (u_longlong_t)histo[i],
 328  328                      &histo_stars[(max - histo[i]) * histo_width / max]);
 329  329          }
 330  330  }
 331  331  
 332  332  static void
 333  333  dump_zap_stats(objset_t *os, uint64_t object)
 334  334  {
 335  335          int error;
 336  336          zap_stats_t zs;
 337  337  
 338  338          error = zap_get_stats(os, object, &zs);
 339  339          if (error)
 340  340                  return;
 341  341  
 342  342          if (zs.zs_ptrtbl_len == 0) {
 343  343                  ASSERT(zs.zs_num_blocks == 1);
 344  344                  (void) printf("\tmicrozap: %llu bytes, %llu entries\n",
 345  345                      (u_longlong_t)zs.zs_blocksize,
 346  346                      (u_longlong_t)zs.zs_num_entries);
 347  347                  return;
 348  348          }
 349  349  
 350  350          (void) printf("\tFat ZAP stats:\n");
 351  351  
 352  352          (void) printf("\t\tPointer table:\n");
 353  353          (void) printf("\t\t\t%llu elements\n",
 354  354              (u_longlong_t)zs.zs_ptrtbl_len);
 355  355          (void) printf("\t\t\tzt_blk: %llu\n",
 356  356              (u_longlong_t)zs.zs_ptrtbl_zt_blk);
 357  357          (void) printf("\t\t\tzt_numblks: %llu\n",
 358  358              (u_longlong_t)zs.zs_ptrtbl_zt_numblks);
 359  359          (void) printf("\t\t\tzt_shift: %llu\n",
 360  360              (u_longlong_t)zs.zs_ptrtbl_zt_shift);
 361  361          (void) printf("\t\t\tzt_blks_copied: %llu\n",
 362  362              (u_longlong_t)zs.zs_ptrtbl_blks_copied);
 363  363          (void) printf("\t\t\tzt_nextblk: %llu\n",
 364  364              (u_longlong_t)zs.zs_ptrtbl_nextblk);
 365  365  
 366  366          (void) printf("\t\tZAP entries: %llu\n",
 367  367              (u_longlong_t)zs.zs_num_entries);
 368  368          (void) printf("\t\tLeaf blocks: %llu\n",
 369  369              (u_longlong_t)zs.zs_num_leafs);
 370  370          (void) printf("\t\tTotal blocks: %llu\n",
 371  371              (u_longlong_t)zs.zs_num_blocks);
 372  372          (void) printf("\t\tzap_block_type: 0x%llx\n",
 373  373              (u_longlong_t)zs.zs_block_type);
 374  374          (void) printf("\t\tzap_magic: 0x%llx\n",
 375  375              (u_longlong_t)zs.zs_magic);
 376  376          (void) printf("\t\tzap_salt: 0x%llx\n",
 377  377              (u_longlong_t)zs.zs_salt);
 378  378  
 379  379          (void) printf("\t\tLeafs with 2^n pointers:\n");
 380  380          dump_histogram(zs.zs_leafs_with_2n_pointers, ZAP_HISTOGRAM_SIZE, 0);
 381  381  
 382  382          (void) printf("\t\tBlocks with n*5 entries:\n");
 383  383          dump_histogram(zs.zs_blocks_with_n5_entries, ZAP_HISTOGRAM_SIZE, 0);
 384  384  
 385  385          (void) printf("\t\tBlocks n/10 full:\n");
 386  386          dump_histogram(zs.zs_blocks_n_tenths_full, ZAP_HISTOGRAM_SIZE, 0);
 387  387  
 388  388          (void) printf("\t\tEntries with n chunks:\n");
 389  389          dump_histogram(zs.zs_entries_using_n_chunks, ZAP_HISTOGRAM_SIZE, 0);
 390  390  
 391  391          (void) printf("\t\tBuckets with n entries:\n");
 392  392          dump_histogram(zs.zs_buckets_with_n_entries, ZAP_HISTOGRAM_SIZE, 0);
 393  393  }
 394  394  
 395  395  /*ARGSUSED*/
 396  396  static void
 397  397  dump_none(objset_t *os, uint64_t object, void *data, size_t size)
 398  398  {
 399  399  }
 400  400  
 401  401  /*ARGSUSED*/
 402  402  static void
 403  403  dump_unknown(objset_t *os, uint64_t object, void *data, size_t size)
 404  404  {
 405  405          (void) printf("\tUNKNOWN OBJECT TYPE\n");
 406  406  }
 407  407  
 408  408  /*ARGSUSED*/
 409  409  static void
 410  410  dump_uint8(objset_t *os, uint64_t object, void *data, size_t size)
 411  411  {
 412  412  }
 413  413  
 414  414  /*ARGSUSED*/
 415  415  static void
 416  416  dump_uint64(objset_t *os, uint64_t object, void *data, size_t size)
 417  417  {
 418  418  }
 419  419  
 420  420  /*ARGSUSED*/
 421  421  static void
 422  422  dump_zap(objset_t *os, uint64_t object, void *data, size_t size)
 423  423  {
 424  424          zap_cursor_t zc;
 425  425          zap_attribute_t attr;
 426  426          void *prop;
 427  427          unsigned i;
 428  428  
 429  429          dump_zap_stats(os, object);
 430  430          (void) printf("\n");
 431  431  
 432  432          for (zap_cursor_init(&zc, os, object);
 433  433              zap_cursor_retrieve(&zc, &attr) == 0;
 434  434              zap_cursor_advance(&zc)) {
 435  435                  (void) printf("\t\t%s = ", attr.za_name);
 436  436                  if (attr.za_num_integers == 0) {
 437  437                          (void) printf("\n");
 438  438                          continue;
 439  439                  }
 440  440                  prop = umem_zalloc(attr.za_num_integers *
 441  441                      attr.za_integer_length, UMEM_NOFAIL);
 442  442                  (void) zap_lookup(os, object, attr.za_name,
 443  443                      attr.za_integer_length, attr.za_num_integers, prop);
 444  444                  if (attr.za_integer_length == 1) {
 445  445                          (void) printf("%s", (char *)prop);
 446  446                  } else {
 447  447                          for (i = 0; i < attr.za_num_integers; i++) {
 448  448                                  switch (attr.za_integer_length) {
 449  449                                  case 2:
 450  450                                          (void) printf("%u ",
 451  451                                              ((uint16_t *)prop)[i]);
 452  452                                          break;
 453  453                                  case 4:
 454  454                                          (void) printf("%u ",
 455  455                                              ((uint32_t *)prop)[i]);
 456  456                                          break;
 457  457                                  case 8:
 458  458                                          (void) printf("%lld ",
 459  459                                              (u_longlong_t)((int64_t *)prop)[i]);
 460  460                                          break;
 461  461                                  }
 462  462                          }
 463  463                  }
 464  464                  (void) printf("\n");
 465  465                  umem_free(prop, attr.za_num_integers * attr.za_integer_length);
 466  466          }
 467  467          zap_cursor_fini(&zc);
 468  468  }
 469  469  
 470  470  static void
 471  471  dump_bpobj(objset_t *os, uint64_t object, void *data, size_t size)
 472  472  {
 473  473          bpobj_phys_t *bpop = data;
 474  474          char bytes[32], comp[32], uncomp[32];
 475  475  
 476  476          /* make sure the output won't get truncated */
 477  477          CTASSERT(sizeof (bytes) >= NN_NUMBUF_SZ);
 478  478          CTASSERT(sizeof (comp) >= NN_NUMBUF_SZ);
 479  479          CTASSERT(sizeof (uncomp) >= NN_NUMBUF_SZ);
 480  480  
 481  481          if (bpop == NULL)
 482  482                  return;
 483  483  
 484  484          zdb_nicenum(bpop->bpo_bytes, bytes, sizeof (bytes));
 485  485          zdb_nicenum(bpop->bpo_comp, comp, sizeof (comp));
 486  486          zdb_nicenum(bpop->bpo_uncomp, uncomp, sizeof (uncomp));
 487  487  
 488  488          (void) printf("\t\tnum_blkptrs = %llu\n",
 489  489              (u_longlong_t)bpop->bpo_num_blkptrs);
 490  490          (void) printf("\t\tbytes = %s\n", bytes);
 491  491          if (size >= BPOBJ_SIZE_V1) {
 492  492                  (void) printf("\t\tcomp = %s\n", comp);
 493  493                  (void) printf("\t\tuncomp = %s\n", uncomp);
 494  494          }
 495  495          if (size >= sizeof (*bpop)) {
 496  496                  (void) printf("\t\tsubobjs = %llu\n",
 497  497                      (u_longlong_t)bpop->bpo_subobjs);
 498  498                  (void) printf("\t\tnum_subobjs = %llu\n",
 499  499                      (u_longlong_t)bpop->bpo_num_subobjs);
 500  500          }
 501  501  
 502  502          if (dump_opt['d'] < 5)
 503  503                  return;
 504  504  
 505  505          for (uint64_t i = 0; i < bpop->bpo_num_blkptrs; i++) {
 506  506                  char blkbuf[BP_SPRINTF_LEN];
 507  507                  blkptr_t bp;
 508  508  
 509  509                  int err = dmu_read(os, object,
 510  510                      i * sizeof (bp), sizeof (bp), &bp, 0);
 511  511                  if (err != 0) {
 512  512                          (void) printf("got error %u from dmu_read\n", err);
 513  513                          break;
 514  514                  }
 515  515                  snprintf_blkptr_compact(blkbuf, sizeof (blkbuf), &bp);
 516  516                  (void) printf("\t%s\n", blkbuf);
 517  517          }
 518  518  }
 519  519  
 520  520  /* ARGSUSED */
 521  521  static void
 522  522  dump_bpobj_subobjs(objset_t *os, uint64_t object, void *data, size_t size)
 523  523  {
 524  524          dmu_object_info_t doi;
 525  525  
 526  526          VERIFY0(dmu_object_info(os, object, &doi));
 527  527          uint64_t *subobjs = kmem_alloc(doi.doi_max_offset, KM_SLEEP);
 528  528  
 529  529          int err = dmu_read(os, object, 0, doi.doi_max_offset, subobjs, 0);
 530  530          if (err != 0) {
 531  531                  (void) printf("got error %u from dmu_read\n", err);
 532  532                  kmem_free(subobjs, doi.doi_max_offset);
 533  533                  return;
 534  534          }
 535  535  
 536  536          int64_t last_nonzero = -1;
 537  537          for (uint64_t i = 0; i < doi.doi_max_offset / 8; i++) {
 538  538                  if (subobjs[i] != 0)
 539  539                          last_nonzero = i;
 540  540          }
 541  541  
 542  542          for (int64_t i = 0; i <= last_nonzero; i++) {
 543  543                  (void) printf("\t%llu\n", (longlong_t)subobjs[i]);
 544  544          }
 545  545          kmem_free(subobjs, doi.doi_max_offset);
 546  546  }
 547  547  
 548  548  /*ARGSUSED*/
 549  549  static void
 550  550  dump_ddt_zap(objset_t *os, uint64_t object, void *data, size_t size)
 551  551  {
 552  552          dump_zap_stats(os, object);
 553  553          /* contents are printed elsewhere, properly decoded */
 554  554  }
 555  555  
 556  556  /*ARGSUSED*/
 557  557  static void
 558  558  dump_sa_attrs(objset_t *os, uint64_t object, void *data, size_t size)
 559  559  {
 560  560          zap_cursor_t zc;
 561  561          zap_attribute_t attr;
 562  562  
 563  563          dump_zap_stats(os, object);
 564  564          (void) printf("\n");
 565  565  
 566  566          for (zap_cursor_init(&zc, os, object);
 567  567              zap_cursor_retrieve(&zc, &attr) == 0;
 568  568              zap_cursor_advance(&zc)) {
 569  569                  (void) printf("\t\t%s = ", attr.za_name);
 570  570                  if (attr.za_num_integers == 0) {
 571  571                          (void) printf("\n");
 572  572                          continue;
 573  573                  }
 574  574                  (void) printf(" %llx : [%d:%d:%d]\n",
 575  575                      (u_longlong_t)attr.za_first_integer,
 576  576                      (int)ATTR_LENGTH(attr.za_first_integer),
 577  577                      (int)ATTR_BSWAP(attr.za_first_integer),
 578  578                      (int)ATTR_NUM(attr.za_first_integer));
 579  579          }
 580  580          zap_cursor_fini(&zc);
 581  581  }
 582  582  
 583  583  /*ARGSUSED*/
 584  584  static void
 585  585  dump_sa_layouts(objset_t *os, uint64_t object, void *data, size_t size)
 586  586  {
 587  587          zap_cursor_t zc;
 588  588          zap_attribute_t attr;
 589  589          uint16_t *layout_attrs;
 590  590          unsigned i;
 591  591  
 592  592          dump_zap_stats(os, object);
 593  593          (void) printf("\n");
 594  594  
 595  595          for (zap_cursor_init(&zc, os, object);
 596  596              zap_cursor_retrieve(&zc, &attr) == 0;
 597  597              zap_cursor_advance(&zc)) {
 598  598                  (void) printf("\t\t%s = [", attr.za_name);
 599  599                  if (attr.za_num_integers == 0) {
 600  600                          (void) printf("\n");
 601  601                          continue;
 602  602                  }
 603  603  
 604  604                  VERIFY(attr.za_integer_length == 2);
 605  605                  layout_attrs = umem_zalloc(attr.za_num_integers *
 606  606                      attr.za_integer_length, UMEM_NOFAIL);
 607  607  
 608  608                  VERIFY(zap_lookup(os, object, attr.za_name,
 609  609                      attr.za_integer_length,
 610  610                      attr.za_num_integers, layout_attrs) == 0);
 611  611  
 612  612                  for (i = 0; i != attr.za_num_integers; i++)
 613  613                          (void) printf(" %d ", (int)layout_attrs[i]);
 614  614                  (void) printf("]\n");
 615  615                  umem_free(layout_attrs,
 616  616                      attr.za_num_integers * attr.za_integer_length);
 617  617          }
 618  618          zap_cursor_fini(&zc);
 619  619  }
 620  620  
 621  621  /*ARGSUSED*/
 622  622  static void
 623  623  dump_zpldir(objset_t *os, uint64_t object, void *data, size_t size)
 624  624  {
 625  625          zap_cursor_t zc;
 626  626          zap_attribute_t attr;
 627  627          const char *typenames[] = {
 628  628                  /* 0 */ "not specified",
 629  629                  /* 1 */ "FIFO",
 630  630                  /* 2 */ "Character Device",
 631  631                  /* 3 */ "3 (invalid)",
 632  632                  /* 4 */ "Directory",
 633  633                  /* 5 */ "5 (invalid)",
 634  634                  /* 6 */ "Block Device",
 635  635                  /* 7 */ "7 (invalid)",
 636  636                  /* 8 */ "Regular File",
 637  637                  /* 9 */ "9 (invalid)",
 638  638                  /* 10 */ "Symbolic Link",
 639  639                  /* 11 */ "11 (invalid)",
 640  640                  /* 12 */ "Socket",
 641  641                  /* 13 */ "Door",
 642  642                  /* 14 */ "Event Port",
 643  643                  /* 15 */ "15 (invalid)",
 644  644          };
 645  645  
 646  646          dump_zap_stats(os, object);
 647  647          (void) printf("\n");
 648  648  
 649  649          for (zap_cursor_init(&zc, os, object);
 650  650              zap_cursor_retrieve(&zc, &attr) == 0;
 651  651              zap_cursor_advance(&zc)) {
 652  652                  (void) printf("\t\t%s = %lld (type: %s)\n",
 653  653                      attr.za_name, ZFS_DIRENT_OBJ(attr.za_first_integer),
 654  654                      typenames[ZFS_DIRENT_TYPE(attr.za_first_integer)]);
 655  655          }
 656  656          zap_cursor_fini(&zc);
 657  657  }
 658  658  
 659  659  static int
 660  660  get_dtl_refcount(vdev_t *vd)
 661  661  {
 662  662          int refcount = 0;
 663  663  
 664  664          if (vd->vdev_ops->vdev_op_leaf) {
 665  665                  space_map_t *sm = vd->vdev_dtl_sm;
 666  666  
 667  667                  if (sm != NULL &&
 668  668                      sm->sm_dbuf->db_size == sizeof (space_map_phys_t))
 669  669                          return (1);
 670  670                  return (0);
 671  671          }
 672  672  
 673  673          for (unsigned c = 0; c < vd->vdev_children; c++)
 674  674                  refcount += get_dtl_refcount(vd->vdev_child[c]);
 675  675          return (refcount);
 676  676  }
 677  677  
 678  678  static int
 679  679  get_metaslab_refcount(vdev_t *vd)
 680  680  {
 681  681          int refcount = 0;
 682  682  
 683  683          if (vd->vdev_top == vd) {
 684  684                  for (uint64_t m = 0; m < vd->vdev_ms_count; m++) {
 685  685                          space_map_t *sm = vd->vdev_ms[m]->ms_sm;
 686  686  
 687  687                          if (sm != NULL &&
 688  688                              sm->sm_dbuf->db_size == sizeof (space_map_phys_t))
 689  689                                  refcount++;
 690  690                  }
 691  691          }
 692  692          for (unsigned c = 0; c < vd->vdev_children; c++)
 693  693                  refcount += get_metaslab_refcount(vd->vdev_child[c]);
 694  694  
 695  695          return (refcount);
 696  696  }
 697  697  
 698  698  static int
 699  699  get_obsolete_refcount(vdev_t *vd)
 700  700  {
 701  701          int refcount = 0;
 702  702  
 703  703          uint64_t obsolete_sm_obj = vdev_obsolete_sm_object(vd);
 704  704          if (vd->vdev_top == vd && obsolete_sm_obj != 0) {
 705  705                  dmu_object_info_t doi;
 706  706                  VERIFY0(dmu_object_info(vd->vdev_spa->spa_meta_objset,
 707  707                      obsolete_sm_obj, &doi));
 708  708                  if (doi.doi_bonus_size == sizeof (space_map_phys_t)) {
 709  709                          refcount++;
 710  710                  }
 711  711          } else {
 712  712                  ASSERT3P(vd->vdev_obsolete_sm, ==, NULL);
 713  713                  ASSERT3U(obsolete_sm_obj, ==, 0);
 714  714          }
 715  715          for (unsigned c = 0; c < vd->vdev_children; c++) {
 716  716                  refcount += get_obsolete_refcount(vd->vdev_child[c]);
 717  717          }
 718  718  
 719  719          return (refcount);
 720  720  }
 721  721  
 722  722  static int
 723  723  get_prev_obsolete_spacemap_refcount(spa_t *spa)
 724  724  {
 725  725          uint64_t prev_obj =
 726  726              spa->spa_condensing_indirect_phys.scip_prev_obsolete_sm_object;
 727  727          if (prev_obj != 0) {
 728  728                  dmu_object_info_t doi;
 729  729                  VERIFY0(dmu_object_info(spa->spa_meta_objset, prev_obj, &doi));
 730  730                  if (doi.doi_bonus_size == sizeof (space_map_phys_t)) {
 731  731                          return (1);
 732  732                  }
 733  733          }
 734  734          return (0);
 735  735  }
 736  736  
 737  737  static int
 738  738  get_checkpoint_refcount(vdev_t *vd)
 739  739  {
 740  740          int refcount = 0;
 741  741  
 742  742          if (vd->vdev_top == vd && vd->vdev_top_zap != 0 &&
 743  743              zap_contains(spa_meta_objset(vd->vdev_spa),
 744  744              vd->vdev_top_zap, VDEV_TOP_ZAP_POOL_CHECKPOINT_SM) == 0)
 745  745                  refcount++;
 746  746  
 747  747          for (uint64_t c = 0; c < vd->vdev_children; c++)
 748  748                  refcount += get_checkpoint_refcount(vd->vdev_child[c]);
 749  749  
 750  750          return (refcount);
 751  751  }
 752  752  
 753  753  static int
 754  754  verify_spacemap_refcounts(spa_t *spa)
 755  755  {
 756  756          uint64_t expected_refcount = 0;
 757  757          uint64_t actual_refcount;
 758  758  
 759  759          (void) feature_get_refcount(spa,
 760  760              &spa_feature_table[SPA_FEATURE_SPACEMAP_HISTOGRAM],
 761  761              &expected_refcount);
 762  762          actual_refcount = get_dtl_refcount(spa->spa_root_vdev);
 763  763          actual_refcount += get_metaslab_refcount(spa->spa_root_vdev);
 764  764          actual_refcount += get_obsolete_refcount(spa->spa_root_vdev);
 765  765          actual_refcount += get_prev_obsolete_spacemap_refcount(spa);
 766  766          actual_refcount += get_checkpoint_refcount(spa->spa_root_vdev);
 767  767  
 768  768          if (expected_refcount != actual_refcount) {
 769  769                  (void) printf("space map refcount mismatch: expected %lld != "
 770  770                      "actual %lld\n",
 771  771                      (longlong_t)expected_refcount,
 772  772                      (longlong_t)actual_refcount);
 773  773                  return (2);
 774  774          }
 775  775          return (0);
 776  776  }
 777  777  
  
    | 
      ↓ open down ↓ | 
    777 lines elided | 
    
      ↑ open up ↑ | 
  
 778  778  static void
 779  779  dump_spacemap(objset_t *os, space_map_t *sm)
 780  780  {
 781  781          char *ddata[] = { "ALLOC", "FREE", "CONDENSE", "INVALID",
 782  782              "INVALID", "INVALID", "INVALID", "INVALID" };
 783  783  
 784  784          if (sm == NULL)
 785  785                  return;
 786  786  
 787  787          (void) printf("space map object %llu:\n",
 788      -            (longlong_t)sm->sm_phys->smp_object);
 789      -        (void) printf("  smp_objsize = 0x%llx\n",
 790      -            (longlong_t)sm->sm_phys->smp_objsize);
      788 +            (longlong_t)sm->sm_object);
      789 +        (void) printf("  smp_length = 0x%llx\n",
      790 +            (longlong_t)sm->sm_phys->smp_length);
 791  791          (void) printf("  smp_alloc = 0x%llx\n",
 792  792              (longlong_t)sm->sm_phys->smp_alloc);
 793  793  
      794 +        if (dump_opt['d'] < 6 && dump_opt['m'] < 4)
      795 +                return;
      796 +
 794  797          /*
 795  798           * Print out the freelist entries in both encoded and decoded form.
 796  799           */
 797  800          uint8_t mapshift = sm->sm_shift;
 798  801          int64_t alloc = 0;
 799      -        uint64_t word;
      802 +        uint64_t word, entry_id = 0;
 800  803          for (uint64_t offset = 0; offset < space_map_length(sm);
 801  804              offset += sizeof (word)) {
 802  805  
 803  806                  VERIFY0(dmu_read(os, space_map_object(sm), offset,
 804  807                      sizeof (word), &word, DMU_READ_PREFETCH));
 805  808  
 806  809                  if (sm_entry_is_debug(word)) {
 807      -                        (void) printf("\t    [%6llu] %s: txg %llu, pass %llu\n",
 808      -                            (u_longlong_t)(offset / sizeof (word)),
      810 +                        (void) printf("\t    [%6llu] %s: txg %llu pass %llu\n",
      811 +                            (u_longlong_t)entry_id,
 809  812                              ddata[SM_DEBUG_ACTION_DECODE(word)],
 810  813                              (u_longlong_t)SM_DEBUG_TXG_DECODE(word),
 811  814                              (u_longlong_t)SM_DEBUG_SYNCPASS_DECODE(word));
      815 +                        entry_id++;
 812  816                          continue;
 813  817                  }
 814  818  
 815  819                  uint8_t words;
 816  820                  char entry_type;
 817  821                  uint64_t entry_off, entry_run, entry_vdev = SM_NO_VDEVID;
 818  822  
 819  823                  if (sm_entry_is_single_word(word)) {
 820  824                          entry_type = (SM_TYPE_DECODE(word) == SM_ALLOC) ?
 821  825                              'A' : 'F';
 822  826                          entry_off = (SM_OFFSET_DECODE(word) << mapshift) +
 823  827                              sm->sm_start;
 824  828                          entry_run = SM_RUN_DECODE(word) << mapshift;
 825  829                          words = 1;
 826  830                  } else {
 827  831                          /* it is a two-word entry so we read another word */
 828  832                          ASSERT(sm_entry_is_double_word(word));
 829  833  
 830  834                          uint64_t extra_word;
 831  835                          offset += sizeof (extra_word);
 832  836                          VERIFY0(dmu_read(os, space_map_object(sm), offset,
 833  837                              sizeof (extra_word), &extra_word,
 834  838                              DMU_READ_PREFETCH));
 835  839  
 836  840                          ASSERT3U(offset, <=, space_map_length(sm));
 837  841  
 838  842                          entry_run = SM2_RUN_DECODE(word) << mapshift;
  
    | 
      ↓ open down ↓ | 
    17 lines elided | 
    
      ↑ open up ↑ | 
  
 839  843                          entry_vdev = SM2_VDEV_DECODE(word);
 840  844                          entry_type = (SM2_TYPE_DECODE(extra_word) == SM_ALLOC) ?
 841  845                              'A' : 'F';
 842  846                          entry_off = (SM2_OFFSET_DECODE(extra_word) <<
 843  847                              mapshift) + sm->sm_start;
 844  848                          words = 2;
 845  849                  }
 846  850  
 847  851                  (void) printf("\t    [%6llu]    %c  range:"
 848  852                      " %010llx-%010llx  size: %06llx vdev: %06llu words: %u\n",
 849      -                    (u_longlong_t)(offset / sizeof (word)),
      853 +                    (u_longlong_t)entry_id,
 850  854                      entry_type, (u_longlong_t)entry_off,
 851  855                      (u_longlong_t)(entry_off + entry_run),
 852  856                      (u_longlong_t)entry_run,
 853  857                      (u_longlong_t)entry_vdev, words);
 854  858  
 855  859                  if (entry_type == 'A')
 856  860                          alloc += entry_run;
 857  861                  else
 858  862                          alloc -= entry_run;
      863 +                entry_id++;
 859  864          }
 860      -        if ((uint64_t)alloc != space_map_allocated(sm)) {
      865 +        if (alloc != space_map_allocated(sm)) {
 861  866                  (void) printf("space_map_object alloc (%lld) INCONSISTENT "
 862  867                      "with space map summary (%lld)\n",
 863  868                      (longlong_t)space_map_allocated(sm), (longlong_t)alloc);
 864  869          }
 865  870  }
 866  871  
 867  872  static void
 868  873  dump_metaslab_stats(metaslab_t *msp)
 869  874  {
 870  875          char maxbuf[32];
 871  876          range_tree_t *rt = msp->ms_allocatable;
 872  877          avl_tree_t *t = &msp->ms_allocatable_by_size;
 873  878          int free_pct = range_tree_space(rt) * 100 / msp->ms_size;
 874  879  
 875  880          /* max sure nicenum has enough space */
 876  881          CTASSERT(sizeof (maxbuf) >= NN_NUMBUF_SZ);
 877  882  
 878  883          zdb_nicenum(metaslab_block_maxsize(msp), maxbuf, sizeof (maxbuf));
 879  884  
 880  885          (void) printf("\t %25s %10lu   %7s  %6s   %4s %4d%%\n",
 881  886              "segments", avl_numnodes(t), "maxsize", maxbuf,
 882  887              "freepct", free_pct);
 883  888          (void) printf("\tIn-memory histogram:\n");
 884  889          dump_histogram(rt->rt_histogram, RANGE_TREE_HISTOGRAM_SIZE, 0);
 885  890  }
 886  891  
 887  892  static void
 888  893  dump_metaslab(metaslab_t *msp)
 889  894  {
 890  895          vdev_t *vd = msp->ms_group->mg_vd;
 891  896          spa_t *spa = vd->vdev_spa;
 892  897          space_map_t *sm = msp->ms_sm;
 893  898          char freebuf[32];
 894  899  
 895  900          zdb_nicenum(msp->ms_size - space_map_allocated(sm), freebuf,
 896  901              sizeof (freebuf));
 897  902  
 898  903          (void) printf(
 899  904              "\tmetaslab %6llu   offset %12llx   spacemap %6llu   free    %5s\n",
 900  905              (u_longlong_t)msp->ms_id, (u_longlong_t)msp->ms_start,
 901  906              (u_longlong_t)space_map_object(sm), freebuf);
 902  907  
 903  908          if (dump_opt['m'] > 2 && !dump_opt['L']) {
 904  909                  mutex_enter(&msp->ms_lock);
 905  910                  VERIFY0(metaslab_load(msp));
 906  911                  range_tree_stat_verify(msp->ms_allocatable);
 907  912                  dump_metaslab_stats(msp);
 908  913                  metaslab_unload(msp);
 909  914                  mutex_exit(&msp->ms_lock);
 910  915          }
 911  916  
 912  917          if (dump_opt['m'] > 1 && sm != NULL &&
 913  918              spa_feature_is_active(spa, SPA_FEATURE_SPACEMAP_HISTOGRAM)) {
  
    | 
      ↓ open down ↓ | 
    43 lines elided | 
    
      ↑ open up ↑ | 
  
 914  919                  /*
 915  920                   * The space map histogram represents free space in chunks
 916  921                   * of sm_shift (i.e. bucket 0 refers to 2^sm_shift).
 917  922                   */
 918  923                  (void) printf("\tOn-disk histogram:\t\tfragmentation %llu\n",
 919  924                      (u_longlong_t)msp->ms_fragmentation);
 920  925                  dump_histogram(sm->sm_phys->smp_histogram,
 921  926                      SPACE_MAP_HISTOGRAM_SIZE, sm->sm_shift);
 922  927          }
 923  928  
 924      -        if (dump_opt['d'] > 5 || dump_opt['m'] > 3) {
 925      -                ASSERT(msp->ms_size == (1ULL << vd->vdev_ms_shift));
 926      -
 927      -                dump_spacemap(spa->spa_meta_objset, msp->ms_sm);
 928      -        }
      929 +        ASSERT(msp->ms_size == (1ULL << vd->vdev_ms_shift));
      930 +        dump_spacemap(spa->spa_meta_objset, msp->ms_sm);
 929  931  }
 930  932  
 931  933  static void
 932  934  print_vdev_metaslab_header(vdev_t *vd)
 933  935  {
 934  936          vdev_alloc_bias_t alloc_bias = vd->vdev_alloc_bias;
 935  937          const char *bias_str;
 936  938  
 937  939          bias_str = (alloc_bias == VDEV_BIAS_LOG || vd->vdev_islog) ?
 938  940              VDEV_ALLOC_BIAS_LOG :
 939  941              (alloc_bias == VDEV_BIAS_SPECIAL) ? VDEV_ALLOC_BIAS_SPECIAL :
 940  942              (alloc_bias == VDEV_BIAS_DEDUP) ? VDEV_ALLOC_BIAS_DEDUP :
 941  943              vd->vdev_islog ? "log" : "";
 942  944  
 943  945          (void) printf("\tvdev %10llu   %s\n"
 944  946              "\t%-10s%5llu   %-19s   %-15s   %-12s\n",
 945  947              (u_longlong_t)vd->vdev_id, bias_str,
 946  948              "metaslabs", (u_longlong_t)vd->vdev_ms_count,
 947  949              "offset", "spacemap", "free");
 948  950          (void) printf("\t%15s   %19s   %15s   %12s\n",
 949  951              "---------------", "-------------------",
 950  952              "---------------", "------------");
 951  953  }
 952  954  
 953  955  static void
 954  956  dump_metaslab_groups(spa_t *spa)
 955  957  {
 956  958          vdev_t *rvd = spa->spa_root_vdev;
 957  959          metaslab_class_t *mc = spa_normal_class(spa);
 958  960          uint64_t fragmentation;
 959  961  
 960  962          metaslab_class_histogram_verify(mc);
 961  963  
 962  964          for (unsigned c = 0; c < rvd->vdev_children; c++) {
 963  965                  vdev_t *tvd = rvd->vdev_child[c];
 964  966                  metaslab_group_t *mg = tvd->vdev_mg;
 965  967  
 966  968                  if (mg == NULL || mg->mg_class != mc)
 967  969                          continue;
 968  970  
 969  971                  metaslab_group_histogram_verify(mg);
 970  972                  mg->mg_fragmentation = metaslab_group_fragmentation(mg);
 971  973  
 972  974                  (void) printf("\tvdev %10llu\t\tmetaslabs%5llu\t\t"
 973  975                      "fragmentation",
 974  976                      (u_longlong_t)tvd->vdev_id,
 975  977                      (u_longlong_t)tvd->vdev_ms_count);
 976  978                  if (mg->mg_fragmentation == ZFS_FRAG_INVALID) {
 977  979                          (void) printf("%3s\n", "-");
 978  980                  } else {
 979  981                          (void) printf("%3llu%%\n",
 980  982                              (u_longlong_t)mg->mg_fragmentation);
 981  983                  }
 982  984                  dump_histogram(mg->mg_histogram, RANGE_TREE_HISTOGRAM_SIZE, 0);
 983  985          }
 984  986  
 985  987          (void) printf("\tpool %s\tfragmentation", spa_name(spa));
 986  988          fragmentation = metaslab_class_fragmentation(mc);
 987  989          if (fragmentation == ZFS_FRAG_INVALID)
 988  990                  (void) printf("\t%3s\n", "-");
 989  991          else
 990  992                  (void) printf("\t%3llu%%\n", (u_longlong_t)fragmentation);
 991  993          dump_histogram(mc->mc_histogram, RANGE_TREE_HISTOGRAM_SIZE, 0);
 992  994  }
 993  995  
 994  996  static void
 995  997  print_vdev_indirect(vdev_t *vd)
 996  998  {
 997  999          vdev_indirect_config_t *vic = &vd->vdev_indirect_config;
 998 1000          vdev_indirect_mapping_t *vim = vd->vdev_indirect_mapping;
 999 1001          vdev_indirect_births_t *vib = vd->vdev_indirect_births;
1000 1002  
1001 1003          if (vim == NULL) {
1002 1004                  ASSERT3P(vib, ==, NULL);
1003 1005                  return;
1004 1006          }
1005 1007  
1006 1008          ASSERT3U(vdev_indirect_mapping_object(vim), ==,
1007 1009              vic->vic_mapping_object);
1008 1010          ASSERT3U(vdev_indirect_births_object(vib), ==,
1009 1011              vic->vic_births_object);
1010 1012  
1011 1013          (void) printf("indirect births obj %llu:\n",
1012 1014              (longlong_t)vic->vic_births_object);
1013 1015          (void) printf("    vib_count = %llu\n",
1014 1016              (longlong_t)vdev_indirect_births_count(vib));
1015 1017          for (uint64_t i = 0; i < vdev_indirect_births_count(vib); i++) {
1016 1018                  vdev_indirect_birth_entry_phys_t *cur_vibe =
1017 1019                      &vib->vib_entries[i];
1018 1020                  (void) printf("\toffset %llx -> txg %llu\n",
1019 1021                      (longlong_t)cur_vibe->vibe_offset,
1020 1022                      (longlong_t)cur_vibe->vibe_phys_birth_txg);
1021 1023          }
1022 1024          (void) printf("\n");
1023 1025  
1024 1026          (void) printf("indirect mapping obj %llu:\n",
1025 1027              (longlong_t)vic->vic_mapping_object);
1026 1028          (void) printf("    vim_max_offset = 0x%llx\n",
1027 1029              (longlong_t)vdev_indirect_mapping_max_offset(vim));
1028 1030          (void) printf("    vim_bytes_mapped = 0x%llx\n",
1029 1031              (longlong_t)vdev_indirect_mapping_bytes_mapped(vim));
1030 1032          (void) printf("    vim_count = %llu\n",
1031 1033              (longlong_t)vdev_indirect_mapping_num_entries(vim));
1032 1034  
1033 1035          if (dump_opt['d'] <= 5 && dump_opt['m'] <= 3)
1034 1036                  return;
1035 1037  
1036 1038          uint32_t *counts = vdev_indirect_mapping_load_obsolete_counts(vim);
1037 1039  
1038 1040          for (uint64_t i = 0; i < vdev_indirect_mapping_num_entries(vim); i++) {
1039 1041                  vdev_indirect_mapping_entry_phys_t *vimep =
1040 1042                      &vim->vim_entries[i];
1041 1043                  (void) printf("\t<%llx:%llx:%llx> -> "
1042 1044                      "<%llx:%llx:%llx> (%x obsolete)\n",
1043 1045                      (longlong_t)vd->vdev_id,
1044 1046                      (longlong_t)DVA_MAPPING_GET_SRC_OFFSET(vimep),
1045 1047                      (longlong_t)DVA_GET_ASIZE(&vimep->vimep_dst),
1046 1048                      (longlong_t)DVA_GET_VDEV(&vimep->vimep_dst),
1047 1049                      (longlong_t)DVA_GET_OFFSET(&vimep->vimep_dst),
1048 1050                      (longlong_t)DVA_GET_ASIZE(&vimep->vimep_dst),
1049 1051                      counts[i]);
1050 1052          }
1051 1053          (void) printf("\n");
1052 1054  
1053 1055          uint64_t obsolete_sm_object = vdev_obsolete_sm_object(vd);
1054 1056          if (obsolete_sm_object != 0) {
1055 1057                  objset_t *mos = vd->vdev_spa->spa_meta_objset;
1056 1058                  (void) printf("obsolete space map object %llu:\n",
1057 1059                      (u_longlong_t)obsolete_sm_object);
1058 1060                  ASSERT(vd->vdev_obsolete_sm != NULL);
1059 1061                  ASSERT3U(space_map_object(vd->vdev_obsolete_sm), ==,
1060 1062                      obsolete_sm_object);
1061 1063                  dump_spacemap(mos, vd->vdev_obsolete_sm);
1062 1064                  (void) printf("\n");
1063 1065          }
1064 1066  }
1065 1067  
1066 1068  static void
1067 1069  dump_metaslabs(spa_t *spa)
1068 1070  {
1069 1071          vdev_t *vd, *rvd = spa->spa_root_vdev;
1070 1072          uint64_t m, c = 0, children = rvd->vdev_children;
1071 1073  
1072 1074          (void) printf("\nMetaslabs:\n");
1073 1075  
1074 1076          if (!dump_opt['d'] && zopt_objects > 0) {
1075 1077                  c = zopt_object[0];
1076 1078  
1077 1079                  if (c >= children)
1078 1080                          (void) fatal("bad vdev id: %llu", (u_longlong_t)c);
1079 1081  
1080 1082                  if (zopt_objects > 1) {
1081 1083                          vd = rvd->vdev_child[c];
1082 1084                          print_vdev_metaslab_header(vd);
1083 1085  
1084 1086                          for (m = 1; m < zopt_objects; m++) {
1085 1087                                  if (zopt_object[m] < vd->vdev_ms_count)
1086 1088                                          dump_metaslab(
1087 1089                                              vd->vdev_ms[zopt_object[m]]);
1088 1090                                  else
1089 1091                                          (void) fprintf(stderr, "bad metaslab "
1090 1092                                              "number %llu\n",
1091 1093                                              (u_longlong_t)zopt_object[m]);
1092 1094                          }
1093 1095                          (void) printf("\n");
1094 1096                          return;
1095 1097                  }
1096 1098                  children = c + 1;
1097 1099          }
1098 1100          for (; c < children; c++) {
1099 1101                  vd = rvd->vdev_child[c];
1100 1102                  print_vdev_metaslab_header(vd);
1101 1103  
1102 1104                  print_vdev_indirect(vd);
1103 1105  
1104 1106                  for (m = 0; m < vd->vdev_ms_count; m++)
1105 1107                          dump_metaslab(vd->vdev_ms[m]);
1106 1108                  (void) printf("\n");
1107 1109          }
1108 1110  }
1109 1111  
1110 1112  static void
1111 1113  dump_dde(const ddt_t *ddt, const ddt_entry_t *dde, uint64_t index)
1112 1114  {
1113 1115          const ddt_phys_t *ddp = dde->dde_phys;
1114 1116          const ddt_key_t *ddk = &dde->dde_key;
1115 1117          const char *types[4] = { "ditto", "single", "double", "triple" };
1116 1118          char blkbuf[BP_SPRINTF_LEN];
1117 1119          blkptr_t blk;
1118 1120  
1119 1121          for (int p = 0; p < DDT_PHYS_TYPES; p++, ddp++) {
1120 1122                  if (ddp->ddp_phys_birth == 0)
1121 1123                          continue;
1122 1124                  ddt_bp_create(ddt->ddt_checksum, ddk, ddp, &blk);
1123 1125                  snprintf_blkptr(blkbuf, sizeof (blkbuf), &blk);
1124 1126                  (void) printf("index %llx refcnt %llu %s %s\n",
1125 1127                      (u_longlong_t)index, (u_longlong_t)ddp->ddp_refcnt,
1126 1128                      types[p], blkbuf);
1127 1129          }
1128 1130  }
1129 1131  
1130 1132  static void
1131 1133  dump_dedup_ratio(const ddt_stat_t *dds)
1132 1134  {
1133 1135          double rL, rP, rD, D, dedup, compress, copies;
1134 1136  
1135 1137          if (dds->dds_blocks == 0)
1136 1138                  return;
1137 1139  
1138 1140          rL = (double)dds->dds_ref_lsize;
1139 1141          rP = (double)dds->dds_ref_psize;
1140 1142          rD = (double)dds->dds_ref_dsize;
1141 1143          D = (double)dds->dds_dsize;
1142 1144  
1143 1145          dedup = rD / D;
1144 1146          compress = rL / rP;
1145 1147          copies = rD / rP;
1146 1148  
1147 1149          (void) printf("dedup = %.2f, compress = %.2f, copies = %.2f, "
1148 1150              "dedup * compress / copies = %.2f\n\n",
1149 1151              dedup, compress, copies, dedup * compress / copies);
1150 1152  }
1151 1153  
1152 1154  static void
1153 1155  dump_ddt(ddt_t *ddt, enum ddt_type type, enum ddt_class class)
1154 1156  {
1155 1157          char name[DDT_NAMELEN];
1156 1158          ddt_entry_t dde;
1157 1159          uint64_t walk = 0;
1158 1160          dmu_object_info_t doi;
1159 1161          uint64_t count, dspace, mspace;
1160 1162          int error;
1161 1163  
1162 1164          error = ddt_object_info(ddt, type, class, &doi);
1163 1165  
1164 1166          if (error == ENOENT)
1165 1167                  return;
1166 1168          ASSERT(error == 0);
1167 1169  
1168 1170          if ((count = ddt_object_count(ddt, type, class)) == 0)
1169 1171                  return;
1170 1172  
1171 1173          dspace = doi.doi_physical_blocks_512 << 9;
1172 1174          mspace = doi.doi_fill_count * doi.doi_data_block_size;
1173 1175  
1174 1176          ddt_object_name(ddt, type, class, name);
1175 1177  
1176 1178          (void) printf("%s: %llu entries, size %llu on disk, %llu in core\n",
1177 1179              name,
1178 1180              (u_longlong_t)count,
1179 1181              (u_longlong_t)(dspace / count),
1180 1182              (u_longlong_t)(mspace / count));
1181 1183  
1182 1184          if (dump_opt['D'] < 3)
1183 1185                  return;
1184 1186  
1185 1187          zpool_dump_ddt(NULL, &ddt->ddt_histogram[type][class]);
1186 1188  
1187 1189          if (dump_opt['D'] < 4)
1188 1190                  return;
1189 1191  
1190 1192          if (dump_opt['D'] < 5 && class == DDT_CLASS_UNIQUE)
1191 1193                  return;
1192 1194  
1193 1195          (void) printf("%s contents:\n\n", name);
1194 1196  
1195 1197          while ((error = ddt_object_walk(ddt, type, class, &walk, &dde)) == 0)
1196 1198                  dump_dde(ddt, &dde, walk);
1197 1199  
1198 1200          ASSERT3U(error, ==, ENOENT);
1199 1201  
1200 1202          (void) printf("\n");
1201 1203  }
1202 1204  
1203 1205  static void
1204 1206  dump_all_ddts(spa_t *spa)
1205 1207  {
1206 1208          ddt_histogram_t ddh_total;
1207 1209          ddt_stat_t dds_total;
1208 1210  
1209 1211          bzero(&ddh_total, sizeof (ddh_total));
1210 1212          bzero(&dds_total, sizeof (dds_total));
1211 1213  
1212 1214          for (enum zio_checksum c = 0; c < ZIO_CHECKSUM_FUNCTIONS; c++) {
1213 1215                  ddt_t *ddt = spa->spa_ddt[c];
1214 1216                  for (enum ddt_type type = 0; type < DDT_TYPES; type++) {
1215 1217                          for (enum ddt_class class = 0; class < DDT_CLASSES;
1216 1218                              class++) {
1217 1219                                  dump_ddt(ddt, type, class);
1218 1220                          }
1219 1221                  }
1220 1222          }
1221 1223  
1222 1224          ddt_get_dedup_stats(spa, &dds_total);
1223 1225  
1224 1226          if (dds_total.dds_blocks == 0) {
1225 1227                  (void) printf("All DDTs are empty\n");
1226 1228                  return;
1227 1229          }
1228 1230  
1229 1231          (void) printf("\n");
1230 1232  
1231 1233          if (dump_opt['D'] > 1) {
1232 1234                  (void) printf("DDT histogram (aggregated over all DDTs):\n");
1233 1235                  ddt_get_dedup_histogram(spa, &ddh_total);
1234 1236                  zpool_dump_ddt(&dds_total, &ddh_total);
1235 1237          }
1236 1238  
1237 1239          dump_dedup_ratio(&dds_total);
1238 1240  }
1239 1241  
1240 1242  static void
1241 1243  dump_dtl_seg(void *arg, uint64_t start, uint64_t size)
1242 1244  {
1243 1245          char *prefix = arg;
1244 1246  
1245 1247          (void) printf("%s [%llu,%llu) length %llu\n",
1246 1248              prefix,
1247 1249              (u_longlong_t)start,
1248 1250              (u_longlong_t)(start + size),
1249 1251              (u_longlong_t)(size));
1250 1252  }
1251 1253  
1252 1254  static void
1253 1255  dump_dtl(vdev_t *vd, int indent)
1254 1256  {
1255 1257          spa_t *spa = vd->vdev_spa;
1256 1258          boolean_t required;
1257 1259          const char *name[DTL_TYPES] = { "missing", "partial", "scrub",
1258 1260                  "outage" };
1259 1261          char prefix[256];
1260 1262  
1261 1263          spa_vdev_state_enter(spa, SCL_NONE);
1262 1264          required = vdev_dtl_required(vd);
1263 1265          (void) spa_vdev_state_exit(spa, NULL, 0);
1264 1266  
1265 1267          if (indent == 0)
1266 1268                  (void) printf("\nDirty time logs:\n\n");
1267 1269  
1268 1270          (void) printf("\t%*s%s [%s]\n", indent, "",
1269 1271              vd->vdev_path ? vd->vdev_path :
1270 1272              vd->vdev_parent ? vd->vdev_ops->vdev_op_type : spa_name(spa),
1271 1273              required ? "DTL-required" : "DTL-expendable");
1272 1274  
1273 1275          for (int t = 0; t < DTL_TYPES; t++) {
1274 1276                  range_tree_t *rt = vd->vdev_dtl[t];
1275 1277                  if (range_tree_space(rt) == 0)
1276 1278                          continue;
1277 1279                  (void) snprintf(prefix, sizeof (prefix), "\t%*s%s",
1278 1280                      indent + 2, "", name[t]);
1279 1281                  range_tree_walk(rt, dump_dtl_seg, prefix);
1280 1282                  if (dump_opt['d'] > 5 && vd->vdev_children == 0)
1281 1283                          dump_spacemap(spa->spa_meta_objset, vd->vdev_dtl_sm);
1282 1284          }
1283 1285  
1284 1286          for (unsigned c = 0; c < vd->vdev_children; c++)
1285 1287                  dump_dtl(vd->vdev_child[c], indent + 4);
1286 1288  }
1287 1289  
1288 1290  static void
1289 1291  dump_history(spa_t *spa)
1290 1292  {
1291 1293          nvlist_t **events = NULL;
1292 1294          uint64_t resid, len, off = 0;
1293 1295          uint_t num = 0;
1294 1296          int error;
1295 1297          time_t tsec;
1296 1298          struct tm t;
1297 1299          char tbuf[30];
1298 1300          char internalstr[MAXPATHLEN];
1299 1301  
1300 1302          char *buf = umem_alloc(SPA_MAXBLOCKSIZE, UMEM_NOFAIL);
1301 1303          do {
1302 1304                  len = SPA_MAXBLOCKSIZE;
1303 1305  
1304 1306                  if ((error = spa_history_get(spa, &off, &len, buf)) != 0) {
1305 1307                          (void) fprintf(stderr, "Unable to read history: "
1306 1308                              "error %d\n", error);
1307 1309                          umem_free(buf, SPA_MAXBLOCKSIZE);
1308 1310                          return;
1309 1311                  }
1310 1312  
1311 1313                  if (zpool_history_unpack(buf, len, &resid, &events, &num) != 0)
1312 1314                          break;
1313 1315  
1314 1316                  off -= resid;
1315 1317          } while (len != 0);
1316 1318          umem_free(buf, SPA_MAXBLOCKSIZE);
1317 1319  
1318 1320          (void) printf("\nHistory:\n");
1319 1321          for (unsigned i = 0; i < num; i++) {
1320 1322                  uint64_t time, txg, ievent;
1321 1323                  char *cmd, *intstr;
1322 1324                  boolean_t printed = B_FALSE;
1323 1325  
1324 1326                  if (nvlist_lookup_uint64(events[i], ZPOOL_HIST_TIME,
1325 1327                      &time) != 0)
1326 1328                          goto next;
1327 1329                  if (nvlist_lookup_string(events[i], ZPOOL_HIST_CMD,
1328 1330                      &cmd) != 0) {
1329 1331                          if (nvlist_lookup_uint64(events[i],
1330 1332                              ZPOOL_HIST_INT_EVENT, &ievent) != 0)
1331 1333                                  goto next;
1332 1334                          verify(nvlist_lookup_uint64(events[i],
1333 1335                              ZPOOL_HIST_TXG, &txg) == 0);
1334 1336                          verify(nvlist_lookup_string(events[i],
1335 1337                              ZPOOL_HIST_INT_STR, &intstr) == 0);
1336 1338                          if (ievent >= ZFS_NUM_LEGACY_HISTORY_EVENTS)
1337 1339                                  goto next;
1338 1340  
1339 1341                          (void) snprintf(internalstr,
1340 1342                              sizeof (internalstr),
1341 1343                              "[internal %s txg:%ju] %s",
1342 1344                              zfs_history_event_names[ievent], (uintmax_t)txg,
1343 1345                              intstr);
1344 1346                          cmd = internalstr;
1345 1347                  }
1346 1348                  tsec = time;
1347 1349                  (void) localtime_r(&tsec, &t);
1348 1350                  (void) strftime(tbuf, sizeof (tbuf), "%F.%T", &t);
1349 1351                  (void) printf("%s %s\n", tbuf, cmd);
1350 1352                  printed = B_TRUE;
1351 1353  
1352 1354  next:
1353 1355                  if (dump_opt['h'] > 1) {
1354 1356                          if (!printed)
1355 1357                                  (void) printf("unrecognized record:\n");
1356 1358                          dump_nvlist(events[i], 2);
1357 1359                  }
1358 1360          }
1359 1361  }
1360 1362  
1361 1363  /*ARGSUSED*/
1362 1364  static void
1363 1365  dump_dnode(objset_t *os, uint64_t object, void *data, size_t size)
1364 1366  {
1365 1367  }
1366 1368  
1367 1369  static uint64_t
1368 1370  blkid2offset(const dnode_phys_t *dnp, const blkptr_t *bp,
1369 1371      const zbookmark_phys_t *zb)
1370 1372  {
1371 1373          if (dnp == NULL) {
1372 1374                  ASSERT(zb->zb_level < 0);
1373 1375                  if (zb->zb_object == 0)
1374 1376                          return (zb->zb_blkid);
1375 1377                  return (zb->zb_blkid * BP_GET_LSIZE(bp));
1376 1378          }
1377 1379  
1378 1380          ASSERT(zb->zb_level >= 0);
1379 1381  
1380 1382          return ((zb->zb_blkid <<
1381 1383              (zb->zb_level * (dnp->dn_indblkshift - SPA_BLKPTRSHIFT))) *
1382 1384              dnp->dn_datablkszsec << SPA_MINBLOCKSHIFT);
1383 1385  }
1384 1386  
1385 1387  static void
1386 1388  snprintf_blkptr_compact(char *blkbuf, size_t buflen, const blkptr_t *bp)
1387 1389  {
1388 1390          const dva_t *dva = bp->blk_dva;
1389 1391          int ndvas = dump_opt['d'] > 5 ? BP_GET_NDVAS(bp) : 1;
1390 1392  
1391 1393          if (dump_opt['b'] >= 6) {
1392 1394                  snprintf_blkptr(blkbuf, buflen, bp);
1393 1395                  return;
1394 1396          }
1395 1397  
1396 1398          if (BP_IS_EMBEDDED(bp)) {
1397 1399                  (void) sprintf(blkbuf,
1398 1400                      "EMBEDDED et=%u %llxL/%llxP B=%llu",
1399 1401                      (int)BPE_GET_ETYPE(bp),
1400 1402                      (u_longlong_t)BPE_GET_LSIZE(bp),
1401 1403                      (u_longlong_t)BPE_GET_PSIZE(bp),
1402 1404                      (u_longlong_t)bp->blk_birth);
1403 1405                  return;
1404 1406          }
1405 1407  
1406 1408          blkbuf[0] = '\0';
1407 1409          for (int i = 0; i < ndvas; i++)
1408 1410                  (void) snprintf(blkbuf + strlen(blkbuf),
1409 1411                      buflen - strlen(blkbuf), "%llu:%llx:%llx ",
1410 1412                      (u_longlong_t)DVA_GET_VDEV(&dva[i]),
1411 1413                      (u_longlong_t)DVA_GET_OFFSET(&dva[i]),
1412 1414                      (u_longlong_t)DVA_GET_ASIZE(&dva[i]));
1413 1415  
1414 1416          if (BP_IS_HOLE(bp)) {
1415 1417                  (void) snprintf(blkbuf + strlen(blkbuf),
1416 1418                      buflen - strlen(blkbuf),
1417 1419                      "%llxL B=%llu",
1418 1420                      (u_longlong_t)BP_GET_LSIZE(bp),
1419 1421                      (u_longlong_t)bp->blk_birth);
1420 1422          } else {
1421 1423                  (void) snprintf(blkbuf + strlen(blkbuf),
1422 1424                      buflen - strlen(blkbuf),
1423 1425                      "%llxL/%llxP F=%llu B=%llu/%llu",
1424 1426                      (u_longlong_t)BP_GET_LSIZE(bp),
1425 1427                      (u_longlong_t)BP_GET_PSIZE(bp),
1426 1428                      (u_longlong_t)BP_GET_FILL(bp),
1427 1429                      (u_longlong_t)bp->blk_birth,
1428 1430                      (u_longlong_t)BP_PHYSICAL_BIRTH(bp));
1429 1431          }
1430 1432  }
1431 1433  
1432 1434  static void
1433 1435  print_indirect(blkptr_t *bp, const zbookmark_phys_t *zb,
1434 1436      const dnode_phys_t *dnp)
1435 1437  {
1436 1438          char blkbuf[BP_SPRINTF_LEN];
1437 1439          int l;
1438 1440  
1439 1441          if (!BP_IS_EMBEDDED(bp)) {
1440 1442                  ASSERT3U(BP_GET_TYPE(bp), ==, dnp->dn_type);
1441 1443                  ASSERT3U(BP_GET_LEVEL(bp), ==, zb->zb_level);
1442 1444          }
1443 1445  
1444 1446          (void) printf("%16llx ", (u_longlong_t)blkid2offset(dnp, bp, zb));
1445 1447  
1446 1448          ASSERT(zb->zb_level >= 0);
1447 1449  
1448 1450          for (l = dnp->dn_nlevels - 1; l >= -1; l--) {
1449 1451                  if (l == zb->zb_level) {
1450 1452                          (void) printf("L%llx", (u_longlong_t)zb->zb_level);
1451 1453                  } else {
1452 1454                          (void) printf(" ");
1453 1455                  }
1454 1456          }
1455 1457  
1456 1458          snprintf_blkptr_compact(blkbuf, sizeof (blkbuf), bp);
1457 1459          (void) printf("%s\n", blkbuf);
1458 1460  }
1459 1461  
1460 1462  static int
1461 1463  visit_indirect(spa_t *spa, const dnode_phys_t *dnp,
1462 1464      blkptr_t *bp, const zbookmark_phys_t *zb)
1463 1465  {
1464 1466          int err = 0;
1465 1467  
1466 1468          if (bp->blk_birth == 0)
1467 1469                  return (0);
1468 1470  
1469 1471          print_indirect(bp, zb, dnp);
1470 1472  
1471 1473          if (BP_GET_LEVEL(bp) > 0 && !BP_IS_HOLE(bp)) {
1472 1474                  arc_flags_t flags = ARC_FLAG_WAIT;
1473 1475                  int i;
1474 1476                  blkptr_t *cbp;
1475 1477                  int epb = BP_GET_LSIZE(bp) >> SPA_BLKPTRSHIFT;
1476 1478                  arc_buf_t *buf;
1477 1479                  uint64_t fill = 0;
1478 1480  
1479 1481                  err = arc_read(NULL, spa, bp, arc_getbuf_func, &buf,
1480 1482                      ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL, &flags, zb);
1481 1483                  if (err)
1482 1484                          return (err);
1483 1485                  ASSERT(buf->b_data);
1484 1486  
1485 1487                  /* recursively visit blocks below this */
1486 1488                  cbp = buf->b_data;
1487 1489                  for (i = 0; i < epb; i++, cbp++) {
1488 1490                          zbookmark_phys_t czb;
1489 1491  
1490 1492                          SET_BOOKMARK(&czb, zb->zb_objset, zb->zb_object,
1491 1493                              zb->zb_level - 1,
1492 1494                              zb->zb_blkid * epb + i);
1493 1495                          err = visit_indirect(spa, dnp, cbp, &czb);
1494 1496                          if (err)
1495 1497                                  break;
1496 1498                          fill += BP_GET_FILL(cbp);
1497 1499                  }
1498 1500                  if (!err)
1499 1501                          ASSERT3U(fill, ==, BP_GET_FILL(bp));
1500 1502                  arc_buf_destroy(buf, &buf);
1501 1503          }
1502 1504  
1503 1505          return (err);
1504 1506  }
1505 1507  
1506 1508  /*ARGSUSED*/
1507 1509  static void
1508 1510  dump_indirect(dnode_t *dn)
1509 1511  {
1510 1512          dnode_phys_t *dnp = dn->dn_phys;
1511 1513          int j;
1512 1514          zbookmark_phys_t czb;
1513 1515  
1514 1516          (void) printf("Indirect blocks:\n");
1515 1517  
1516 1518          SET_BOOKMARK(&czb, dmu_objset_id(dn->dn_objset),
1517 1519              dn->dn_object, dnp->dn_nlevels - 1, 0);
1518 1520          for (j = 0; j < dnp->dn_nblkptr; j++) {
1519 1521                  czb.zb_blkid = j;
1520 1522                  (void) visit_indirect(dmu_objset_spa(dn->dn_objset), dnp,
1521 1523                      &dnp->dn_blkptr[j], &czb);
1522 1524          }
1523 1525  
1524 1526          (void) printf("\n");
1525 1527  }
1526 1528  
1527 1529  /*ARGSUSED*/
1528 1530  static void
1529 1531  dump_dsl_dir(objset_t *os, uint64_t object, void *data, size_t size)
1530 1532  {
1531 1533          dsl_dir_phys_t *dd = data;
1532 1534          time_t crtime;
1533 1535          char nice[32];
1534 1536  
1535 1537          /* make sure nicenum has enough space */
1536 1538          CTASSERT(sizeof (nice) >= NN_NUMBUF_SZ);
1537 1539  
1538 1540          if (dd == NULL)
1539 1541                  return;
1540 1542  
1541 1543          ASSERT3U(size, >=, sizeof (dsl_dir_phys_t));
1542 1544  
1543 1545          crtime = dd->dd_creation_time;
1544 1546          (void) printf("\t\tcreation_time = %s", ctime(&crtime));
1545 1547          (void) printf("\t\thead_dataset_obj = %llu\n",
1546 1548              (u_longlong_t)dd->dd_head_dataset_obj);
1547 1549          (void) printf("\t\tparent_dir_obj = %llu\n",
1548 1550              (u_longlong_t)dd->dd_parent_obj);
1549 1551          (void) printf("\t\torigin_obj = %llu\n",
1550 1552              (u_longlong_t)dd->dd_origin_obj);
1551 1553          (void) printf("\t\tchild_dir_zapobj = %llu\n",
1552 1554              (u_longlong_t)dd->dd_child_dir_zapobj);
1553 1555          zdb_nicenum(dd->dd_used_bytes, nice, sizeof (nice));
1554 1556          (void) printf("\t\tused_bytes = %s\n", nice);
1555 1557          zdb_nicenum(dd->dd_compressed_bytes, nice, sizeof (nice));
1556 1558          (void) printf("\t\tcompressed_bytes = %s\n", nice);
1557 1559          zdb_nicenum(dd->dd_uncompressed_bytes, nice, sizeof (nice));
1558 1560          (void) printf("\t\tuncompressed_bytes = %s\n", nice);
1559 1561          zdb_nicenum(dd->dd_quota, nice, sizeof (nice));
1560 1562          (void) printf("\t\tquota = %s\n", nice);
1561 1563          zdb_nicenum(dd->dd_reserved, nice, sizeof (nice));
1562 1564          (void) printf("\t\treserved = %s\n", nice);
1563 1565          (void) printf("\t\tprops_zapobj = %llu\n",
1564 1566              (u_longlong_t)dd->dd_props_zapobj);
1565 1567          (void) printf("\t\tdeleg_zapobj = %llu\n",
1566 1568              (u_longlong_t)dd->dd_deleg_zapobj);
1567 1569          (void) printf("\t\tflags = %llx\n",
1568 1570              (u_longlong_t)dd->dd_flags);
1569 1571  
1570 1572  #define DO(which) \
1571 1573          zdb_nicenum(dd->dd_used_breakdown[DD_USED_ ## which], nice, \
1572 1574              sizeof (nice)); \
1573 1575          (void) printf("\t\tused_breakdown[" #which "] = %s\n", nice)
1574 1576          DO(HEAD);
1575 1577          DO(SNAP);
1576 1578          DO(CHILD);
1577 1579          DO(CHILD_RSRV);
1578 1580          DO(REFRSRV);
1579 1581  #undef DO
1580 1582          (void) printf("\t\tclones = %llu\n",
1581 1583              (u_longlong_t)dd->dd_clones);
1582 1584  }
1583 1585  
1584 1586  /*ARGSUSED*/
1585 1587  static void
1586 1588  dump_dsl_dataset(objset_t *os, uint64_t object, void *data, size_t size)
1587 1589  {
1588 1590          dsl_dataset_phys_t *ds = data;
1589 1591          time_t crtime;
1590 1592          char used[32], compressed[32], uncompressed[32], unique[32];
1591 1593          char blkbuf[BP_SPRINTF_LEN];
1592 1594  
1593 1595          /* make sure nicenum has enough space */
1594 1596          CTASSERT(sizeof (used) >= NN_NUMBUF_SZ);
1595 1597          CTASSERT(sizeof (compressed) >= NN_NUMBUF_SZ);
1596 1598          CTASSERT(sizeof (uncompressed) >= NN_NUMBUF_SZ);
1597 1599          CTASSERT(sizeof (unique) >= NN_NUMBUF_SZ);
1598 1600  
1599 1601          if (ds == NULL)
1600 1602                  return;
1601 1603  
1602 1604          ASSERT(size == sizeof (*ds));
1603 1605          crtime = ds->ds_creation_time;
1604 1606          zdb_nicenum(ds->ds_referenced_bytes, used, sizeof (used));
1605 1607          zdb_nicenum(ds->ds_compressed_bytes, compressed, sizeof (compressed));
1606 1608          zdb_nicenum(ds->ds_uncompressed_bytes, uncompressed,
1607 1609              sizeof (uncompressed));
1608 1610          zdb_nicenum(ds->ds_unique_bytes, unique, sizeof (unique));
1609 1611          snprintf_blkptr(blkbuf, sizeof (blkbuf), &ds->ds_bp);
1610 1612  
1611 1613          (void) printf("\t\tdir_obj = %llu\n",
1612 1614              (u_longlong_t)ds->ds_dir_obj);
1613 1615          (void) printf("\t\tprev_snap_obj = %llu\n",
1614 1616              (u_longlong_t)ds->ds_prev_snap_obj);
1615 1617          (void) printf("\t\tprev_snap_txg = %llu\n",
1616 1618              (u_longlong_t)ds->ds_prev_snap_txg);
1617 1619          (void) printf("\t\tnext_snap_obj = %llu\n",
1618 1620              (u_longlong_t)ds->ds_next_snap_obj);
1619 1621          (void) printf("\t\tsnapnames_zapobj = %llu\n",
1620 1622              (u_longlong_t)ds->ds_snapnames_zapobj);
1621 1623          (void) printf("\t\tnum_children = %llu\n",
1622 1624              (u_longlong_t)ds->ds_num_children);
1623 1625          (void) printf("\t\tuserrefs_obj = %llu\n",
1624 1626              (u_longlong_t)ds->ds_userrefs_obj);
1625 1627          (void) printf("\t\tcreation_time = %s", ctime(&crtime));
1626 1628          (void) printf("\t\tcreation_txg = %llu\n",
1627 1629              (u_longlong_t)ds->ds_creation_txg);
1628 1630          (void) printf("\t\tdeadlist_obj = %llu\n",
1629 1631              (u_longlong_t)ds->ds_deadlist_obj);
1630 1632          (void) printf("\t\tused_bytes = %s\n", used);
1631 1633          (void) printf("\t\tcompressed_bytes = %s\n", compressed);
1632 1634          (void) printf("\t\tuncompressed_bytes = %s\n", uncompressed);
1633 1635          (void) printf("\t\tunique = %s\n", unique);
1634 1636          (void) printf("\t\tfsid_guid = %llu\n",
1635 1637              (u_longlong_t)ds->ds_fsid_guid);
1636 1638          (void) printf("\t\tguid = %llu\n",
1637 1639              (u_longlong_t)ds->ds_guid);
1638 1640          (void) printf("\t\tflags = %llx\n",
1639 1641              (u_longlong_t)ds->ds_flags);
1640 1642          (void) printf("\t\tnext_clones_obj = %llu\n",
1641 1643              (u_longlong_t)ds->ds_next_clones_obj);
1642 1644          (void) printf("\t\tprops_obj = %llu\n",
1643 1645              (u_longlong_t)ds->ds_props_obj);
1644 1646          (void) printf("\t\tbp = %s\n", blkbuf);
1645 1647  }
1646 1648  
1647 1649  /* ARGSUSED */
1648 1650  static int
1649 1651  dump_bptree_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx)
1650 1652  {
1651 1653          char blkbuf[BP_SPRINTF_LEN];
1652 1654  
1653 1655          if (bp->blk_birth != 0) {
1654 1656                  snprintf_blkptr(blkbuf, sizeof (blkbuf), bp);
1655 1657                  (void) printf("\t%s\n", blkbuf);
1656 1658          }
1657 1659          return (0);
1658 1660  }
1659 1661  
1660 1662  static void
1661 1663  dump_bptree(objset_t *os, uint64_t obj, const char *name)
1662 1664  {
1663 1665          char bytes[32];
1664 1666          bptree_phys_t *bt;
1665 1667          dmu_buf_t *db;
1666 1668  
1667 1669          /* make sure nicenum has enough space */
1668 1670          CTASSERT(sizeof (bytes) >= NN_NUMBUF_SZ);
1669 1671  
1670 1672          if (dump_opt['d'] < 3)
1671 1673                  return;
1672 1674  
1673 1675          VERIFY3U(0, ==, dmu_bonus_hold(os, obj, FTAG, &db));
1674 1676          bt = db->db_data;
1675 1677          zdb_nicenum(bt->bt_bytes, bytes, sizeof (bytes));
1676 1678          (void) printf("\n    %s: %llu datasets, %s\n",
1677 1679              name, (unsigned long long)(bt->bt_end - bt->bt_begin), bytes);
1678 1680          dmu_buf_rele(db, FTAG);
1679 1681  
1680 1682          if (dump_opt['d'] < 5)
1681 1683                  return;
1682 1684  
1683 1685          (void) printf("\n");
1684 1686  
1685 1687          (void) bptree_iterate(os, obj, B_FALSE, dump_bptree_cb, NULL, NULL);
1686 1688  }
1687 1689  
1688 1690  /* ARGSUSED */
1689 1691  static int
1690 1692  dump_bpobj_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx)
1691 1693  {
1692 1694          char blkbuf[BP_SPRINTF_LEN];
1693 1695  
1694 1696          ASSERT(bp->blk_birth != 0);
1695 1697          snprintf_blkptr_compact(blkbuf, sizeof (blkbuf), bp);
1696 1698          (void) printf("\t%s\n", blkbuf);
1697 1699          return (0);
1698 1700  }
1699 1701  
1700 1702  static void
1701 1703  dump_full_bpobj(bpobj_t *bpo, const char *name, int indent)
1702 1704  {
1703 1705          char bytes[32];
1704 1706          char comp[32];
1705 1707          char uncomp[32];
1706 1708  
1707 1709          /* make sure nicenum has enough space */
1708 1710          CTASSERT(sizeof (bytes) >= NN_NUMBUF_SZ);
1709 1711          CTASSERT(sizeof (comp) >= NN_NUMBUF_SZ);
1710 1712          CTASSERT(sizeof (uncomp) >= NN_NUMBUF_SZ);
1711 1713  
1712 1714          if (dump_opt['d'] < 3)
1713 1715                  return;
1714 1716  
1715 1717          zdb_nicenum(bpo->bpo_phys->bpo_bytes, bytes, sizeof (bytes));
1716 1718          if (bpo->bpo_havesubobj && bpo->bpo_phys->bpo_subobjs != 0) {
1717 1719                  zdb_nicenum(bpo->bpo_phys->bpo_comp, comp, sizeof (comp));
1718 1720                  zdb_nicenum(bpo->bpo_phys->bpo_uncomp, uncomp, sizeof (uncomp));
1719 1721                  (void) printf("    %*s: object %llu, %llu local blkptrs, "
1720 1722                      "%llu subobjs in object %llu, %s (%s/%s comp)\n",
1721 1723                      indent * 8, name,
1722 1724                      (u_longlong_t)bpo->bpo_object,
1723 1725                      (u_longlong_t)bpo->bpo_phys->bpo_num_blkptrs,
1724 1726                      (u_longlong_t)bpo->bpo_phys->bpo_num_subobjs,
1725 1727                      (u_longlong_t)bpo->bpo_phys->bpo_subobjs,
1726 1728                      bytes, comp, uncomp);
1727 1729  
1728 1730                  for (uint64_t i = 0; i < bpo->bpo_phys->bpo_num_subobjs; i++) {
1729 1731                          uint64_t subobj;
1730 1732                          bpobj_t subbpo;
1731 1733                          int error;
1732 1734                          VERIFY0(dmu_read(bpo->bpo_os,
1733 1735                              bpo->bpo_phys->bpo_subobjs,
1734 1736                              i * sizeof (subobj), sizeof (subobj), &subobj, 0));
1735 1737                          error = bpobj_open(&subbpo, bpo->bpo_os, subobj);
1736 1738                          if (error != 0) {
1737 1739                                  (void) printf("ERROR %u while trying to open "
1738 1740                                      "subobj id %llu\n",
1739 1741                                      error, (u_longlong_t)subobj);
1740 1742                                  continue;
1741 1743                          }
1742 1744                          dump_full_bpobj(&subbpo, "subobj", indent + 1);
1743 1745                          bpobj_close(&subbpo);
1744 1746                  }
1745 1747          } else {
1746 1748                  (void) printf("    %*s: object %llu, %llu blkptrs, %s\n",
1747 1749                      indent * 8, name,
1748 1750                      (u_longlong_t)bpo->bpo_object,
1749 1751                      (u_longlong_t)bpo->bpo_phys->bpo_num_blkptrs,
1750 1752                      bytes);
1751 1753          }
1752 1754  
1753 1755          if (dump_opt['d'] < 5)
1754 1756                  return;
1755 1757  
1756 1758  
1757 1759          if (indent == 0) {
1758 1760                  (void) bpobj_iterate_nofree(bpo, dump_bpobj_cb, NULL, NULL);
1759 1761                  (void) printf("\n");
1760 1762          }
1761 1763  }
1762 1764  
1763 1765  static void
1764 1766  bpobj_count_refd(bpobj_t *bpo)
1765 1767  {
1766 1768          mos_obj_refd(bpo->bpo_object);
1767 1769  
1768 1770          if (bpo->bpo_havesubobj && bpo->bpo_phys->bpo_subobjs != 0) {
1769 1771                  mos_obj_refd(bpo->bpo_phys->bpo_subobjs);
1770 1772                  for (uint64_t i = 0; i < bpo->bpo_phys->bpo_num_subobjs; i++) {
1771 1773                          uint64_t subobj;
1772 1774                          bpobj_t subbpo;
1773 1775                          int error;
1774 1776                          VERIFY0(dmu_read(bpo->bpo_os,
1775 1777                              bpo->bpo_phys->bpo_subobjs,
1776 1778                              i * sizeof (subobj), sizeof (subobj), &subobj, 0));
1777 1779                          error = bpobj_open(&subbpo, bpo->bpo_os, subobj);
1778 1780                          if (error != 0) {
1779 1781                                  (void) printf("ERROR %u while trying to open "
1780 1782                                      "subobj id %llu\n",
1781 1783                                      error, (u_longlong_t)subobj);
1782 1784                                  continue;
1783 1785                          }
1784 1786                          bpobj_count_refd(&subbpo);
1785 1787                          bpobj_close(&subbpo);
1786 1788                  }
1787 1789          }
1788 1790  }
1789 1791  
1790 1792  static void
1791 1793  dump_deadlist(dsl_deadlist_t *dl)
1792 1794  {
1793 1795          dsl_deadlist_entry_t *dle;
1794 1796          uint64_t unused;
1795 1797          char bytes[32];
1796 1798          char comp[32];
1797 1799          char uncomp[32];
1798 1800          uint64_t empty_bpobj =
1799 1801              dmu_objset_spa(dl->dl_os)->spa_dsl_pool->dp_empty_bpobj;
1800 1802  
1801 1803          /* force the tree to be loaded */
1802 1804          dsl_deadlist_space_range(dl, 0, UINT64_MAX, &unused, &unused, &unused);
1803 1805  
1804 1806          if (dl->dl_oldfmt) {
1805 1807                  if (dl->dl_bpobj.bpo_object != empty_bpobj)
1806 1808                          bpobj_count_refd(&dl->dl_bpobj);
1807 1809          } else {
1808 1810                  mos_obj_refd(dl->dl_object);
1809 1811                  for (dle = avl_first(&dl->dl_tree); dle;
1810 1812                      dle = AVL_NEXT(&dl->dl_tree, dle)) {
1811 1813                          if (dle->dle_bpobj.bpo_object != empty_bpobj)
1812 1814                                  bpobj_count_refd(&dle->dle_bpobj);
1813 1815                  }
1814 1816          }
1815 1817  
1816 1818          /* make sure nicenum has enough space */
1817 1819          CTASSERT(sizeof (bytes) >= NN_NUMBUF_SZ);
1818 1820          CTASSERT(sizeof (comp) >= NN_NUMBUF_SZ);
1819 1821          CTASSERT(sizeof (uncomp) >= NN_NUMBUF_SZ);
1820 1822  
1821 1823          if (dump_opt['d'] < 3)
1822 1824                  return;
1823 1825  
1824 1826          if (dl->dl_oldfmt) {
1825 1827                  dump_full_bpobj(&dl->dl_bpobj, "old-format deadlist", 0);
1826 1828                  return;
1827 1829          }
1828 1830  
1829 1831          zdb_nicenum(dl->dl_phys->dl_used, bytes, sizeof (bytes));
1830 1832          zdb_nicenum(dl->dl_phys->dl_comp, comp, sizeof (comp));
1831 1833          zdb_nicenum(dl->dl_phys->dl_uncomp, uncomp, sizeof (uncomp));
1832 1834          (void) printf("\n    Deadlist: %s (%s/%s comp)\n",
1833 1835              bytes, comp, uncomp);
1834 1836  
1835 1837          if (dump_opt['d'] < 4)
1836 1838                  return;
1837 1839  
1838 1840          (void) printf("\n");
1839 1841  
1840 1842          for (dle = avl_first(&dl->dl_tree); dle;
1841 1843              dle = AVL_NEXT(&dl->dl_tree, dle)) {
1842 1844                  if (dump_opt['d'] >= 5) {
1843 1845                          char buf[128];
1844 1846                          (void) snprintf(buf, sizeof (buf),
1845 1847                              "mintxg %llu -> obj %llu",
1846 1848                              (longlong_t)dle->dle_mintxg,
1847 1849                              (longlong_t)dle->dle_bpobj.bpo_object);
1848 1850  
1849 1851                          dump_full_bpobj(&dle->dle_bpobj, buf, 0);
1850 1852                  } else {
1851 1853                          (void) printf("mintxg %llu -> obj %llu\n",
1852 1854                              (longlong_t)dle->dle_mintxg,
1853 1855                              (longlong_t)dle->dle_bpobj.bpo_object);
1854 1856                  }
1855 1857          }
1856 1858  }
1857 1859  
1858 1860  static avl_tree_t idx_tree;
1859 1861  static avl_tree_t domain_tree;
1860 1862  static boolean_t fuid_table_loaded;
1861 1863  static objset_t *sa_os = NULL;
1862 1864  static sa_attr_type_t *sa_attr_table = NULL;
1863 1865  
1864 1866  static int
1865 1867  open_objset(const char *path, dmu_objset_type_t type, void *tag, objset_t **osp)
1866 1868  {
1867 1869          int err;
1868 1870          uint64_t sa_attrs = 0;
1869 1871          uint64_t version = 0;
1870 1872  
1871 1873          VERIFY3P(sa_os, ==, NULL);
1872 1874          err = dmu_objset_own(path, type, B_TRUE, tag, osp);
1873 1875          if (err != 0) {
1874 1876                  (void) fprintf(stderr, "failed to own dataset '%s': %s\n", path,
1875 1877                      strerror(err));
1876 1878                  return (err);
1877 1879          }
1878 1880  
1879 1881          if (dmu_objset_type(*osp) == DMU_OST_ZFS) {
1880 1882                  (void) zap_lookup(*osp, MASTER_NODE_OBJ, ZPL_VERSION_STR,
1881 1883                      8, 1, &version);
1882 1884                  if (version >= ZPL_VERSION_SA) {
1883 1885                          (void) zap_lookup(*osp, MASTER_NODE_OBJ, ZFS_SA_ATTRS,
1884 1886                              8, 1, &sa_attrs);
1885 1887                  }
1886 1888                  err = sa_setup(*osp, sa_attrs, zfs_attr_table, ZPL_END,
1887 1889                      &sa_attr_table);
1888 1890                  if (err != 0) {
1889 1891                          (void) fprintf(stderr, "sa_setup failed: %s\n",
1890 1892                              strerror(err));
1891 1893                          dmu_objset_disown(*osp, tag);
1892 1894                          *osp = NULL;
1893 1895                  }
1894 1896          }
1895 1897          sa_os = *osp;
1896 1898  
1897 1899          return (0);
1898 1900  }
1899 1901  
1900 1902  static void
1901 1903  close_objset(objset_t *os, void *tag)
1902 1904  {
1903 1905          VERIFY3P(os, ==, sa_os);
1904 1906          if (os->os_sa != NULL)
1905 1907                  sa_tear_down(os);
1906 1908          dmu_objset_disown(os, tag);
1907 1909          sa_attr_table = NULL;
1908 1910          sa_os = NULL;
1909 1911  }
1910 1912  
1911 1913  static void
1912 1914  fuid_table_destroy()
1913 1915  {
1914 1916          if (fuid_table_loaded) {
1915 1917                  zfs_fuid_table_destroy(&idx_tree, &domain_tree);
1916 1918                  fuid_table_loaded = B_FALSE;
1917 1919          }
1918 1920  }
1919 1921  
1920 1922  /*
1921 1923   * print uid or gid information.
1922 1924   * For normal POSIX id just the id is printed in decimal format.
1923 1925   * For CIFS files with FUID the fuid is printed in hex followed by
1924 1926   * the domain-rid string.
1925 1927   */
1926 1928  static void
1927 1929  print_idstr(uint64_t id, const char *id_type)
1928 1930  {
1929 1931          if (FUID_INDEX(id)) {
1930 1932                  char *domain;
1931 1933  
1932 1934                  domain = zfs_fuid_idx_domain(&idx_tree, FUID_INDEX(id));
1933 1935                  (void) printf("\t%s     %llx [%s-%d]\n", id_type,
1934 1936                      (u_longlong_t)id, domain, (int)FUID_RID(id));
1935 1937          } else {
1936 1938                  (void) printf("\t%s     %llu\n", id_type, (u_longlong_t)id);
1937 1939          }
1938 1940  
1939 1941  }
1940 1942  
1941 1943  static void
1942 1944  dump_uidgid(objset_t *os, uint64_t uid, uint64_t gid)
1943 1945  {
1944 1946          uint32_t uid_idx, gid_idx;
1945 1947  
1946 1948          uid_idx = FUID_INDEX(uid);
1947 1949          gid_idx = FUID_INDEX(gid);
1948 1950  
1949 1951          /* Load domain table, if not already loaded */
1950 1952          if (!fuid_table_loaded && (uid_idx || gid_idx)) {
1951 1953                  uint64_t fuid_obj;
1952 1954  
1953 1955                  /* first find the fuid object.  It lives in the master node */
1954 1956                  VERIFY(zap_lookup(os, MASTER_NODE_OBJ, ZFS_FUID_TABLES,
1955 1957                      8, 1, &fuid_obj) == 0);
1956 1958                  zfs_fuid_avl_tree_create(&idx_tree, &domain_tree);
1957 1959                  (void) zfs_fuid_table_load(os, fuid_obj,
1958 1960                      &idx_tree, &domain_tree);
1959 1961                  fuid_table_loaded = B_TRUE;
1960 1962          }
1961 1963  
1962 1964          print_idstr(uid, "uid");
1963 1965          print_idstr(gid, "gid");
1964 1966  }
1965 1967  
1966 1968  /*ARGSUSED*/
1967 1969  static void
1968 1970  dump_znode(objset_t *os, uint64_t object, void *data, size_t size)
1969 1971  {
1970 1972          char path[MAXPATHLEN * 2];      /* allow for xattr and failure prefix */
1971 1973          sa_handle_t *hdl;
1972 1974          uint64_t xattr, rdev, gen;
1973 1975          uint64_t uid, gid, mode, fsize, parent, links;
1974 1976          uint64_t pflags;
1975 1977          uint64_t acctm[2], modtm[2], chgtm[2], crtm[2];
1976 1978          time_t z_crtime, z_atime, z_mtime, z_ctime;
1977 1979          sa_bulk_attr_t bulk[12];
1978 1980          int idx = 0;
1979 1981          int error;
1980 1982  
1981 1983          VERIFY3P(os, ==, sa_os);
1982 1984          if (sa_handle_get(os, object, NULL, SA_HDL_PRIVATE, &hdl)) {
1983 1985                  (void) printf("Failed to get handle for SA znode\n");
1984 1986                  return;
1985 1987          }
1986 1988  
1987 1989          SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_UID], NULL, &uid, 8);
1988 1990          SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_GID], NULL, &gid, 8);
1989 1991          SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_LINKS], NULL,
1990 1992              &links, 8);
1991 1993          SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_GEN], NULL, &gen, 8);
1992 1994          SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_MODE], NULL,
1993 1995              &mode, 8);
1994 1996          SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_PARENT],
1995 1997              NULL, &parent, 8);
1996 1998          SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_SIZE], NULL,
1997 1999              &fsize, 8);
1998 2000          SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_ATIME], NULL,
1999 2001              acctm, 16);
2000 2002          SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_MTIME], NULL,
2001 2003              modtm, 16);
2002 2004          SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_CRTIME], NULL,
2003 2005              crtm, 16);
2004 2006          SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_CTIME], NULL,
2005 2007              chgtm, 16);
2006 2008          SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_FLAGS], NULL,
2007 2009              &pflags, 8);
2008 2010  
2009 2011          if (sa_bulk_lookup(hdl, bulk, idx)) {
2010 2012                  (void) sa_handle_destroy(hdl);
2011 2013                  return;
2012 2014          }
2013 2015  
2014 2016          z_crtime = (time_t)crtm[0];
2015 2017          z_atime = (time_t)acctm[0];
2016 2018          z_mtime = (time_t)modtm[0];
2017 2019          z_ctime = (time_t)chgtm[0];
2018 2020  
2019 2021          if (dump_opt['d'] > 4) {
2020 2022                  error = zfs_obj_to_path(os, object, path, sizeof (path));
2021 2023                  if (error == ESTALE) {
2022 2024                          (void) snprintf(path, sizeof (path), "on delete queue");
2023 2025                  } else if (error != 0) {
2024 2026                          leaked_objects++;
2025 2027                          (void) snprintf(path, sizeof (path),
2026 2028                              "path not found, possibly leaked");
2027 2029                  }
2028 2030                  (void) printf("\tpath   %s\n", path);
2029 2031          }
2030 2032          dump_uidgid(os, uid, gid);
2031 2033          (void) printf("\tatime  %s", ctime(&z_atime));
2032 2034          (void) printf("\tmtime  %s", ctime(&z_mtime));
2033 2035          (void) printf("\tctime  %s", ctime(&z_ctime));
2034 2036          (void) printf("\tcrtime %s", ctime(&z_crtime));
2035 2037          (void) printf("\tgen    %llu\n", (u_longlong_t)gen);
2036 2038          (void) printf("\tmode   %llo\n", (u_longlong_t)mode);
2037 2039          (void) printf("\tsize   %llu\n", (u_longlong_t)fsize);
2038 2040          (void) printf("\tparent %llu\n", (u_longlong_t)parent);
2039 2041          (void) printf("\tlinks  %llu\n", (u_longlong_t)links);
2040 2042          (void) printf("\tpflags %llx\n", (u_longlong_t)pflags);
2041 2043          if (sa_lookup(hdl, sa_attr_table[ZPL_XATTR], &xattr,
2042 2044              sizeof (uint64_t)) == 0)
2043 2045                  (void) printf("\txattr  %llu\n", (u_longlong_t)xattr);
2044 2046          if (sa_lookup(hdl, sa_attr_table[ZPL_RDEV], &rdev,
2045 2047              sizeof (uint64_t)) == 0)
2046 2048                  (void) printf("\trdev   0x%016llx\n", (u_longlong_t)rdev);
2047 2049          sa_handle_destroy(hdl);
2048 2050  }
2049 2051  
2050 2052  /*ARGSUSED*/
2051 2053  static void
2052 2054  dump_acl(objset_t *os, uint64_t object, void *data, size_t size)
2053 2055  {
2054 2056  }
2055 2057  
2056 2058  /*ARGSUSED*/
2057 2059  static void
2058 2060  dump_dmu_objset(objset_t *os, uint64_t object, void *data, size_t size)
2059 2061  {
2060 2062  }
2061 2063  
2062 2064  static object_viewer_t *object_viewer[DMU_OT_NUMTYPES + 1] = {
2063 2065          dump_none,              /* unallocated                  */
2064 2066          dump_zap,               /* object directory             */
2065 2067          dump_uint64,            /* object array                 */
2066 2068          dump_none,              /* packed nvlist                */
2067 2069          dump_packed_nvlist,     /* packed nvlist size           */
2068 2070          dump_none,              /* bpobj                        */
2069 2071          dump_bpobj,             /* bpobj header                 */
2070 2072          dump_none,              /* SPA space map header         */
2071 2073          dump_none,              /* SPA space map                */
2072 2074          dump_none,              /* ZIL intent log               */
2073 2075          dump_dnode,             /* DMU dnode                    */
2074 2076          dump_dmu_objset,        /* DMU objset                   */
2075 2077          dump_dsl_dir,           /* DSL directory                */
2076 2078          dump_zap,               /* DSL directory child map      */
2077 2079          dump_zap,               /* DSL dataset snap map         */
2078 2080          dump_zap,               /* DSL props                    */
2079 2081          dump_dsl_dataset,       /* DSL dataset                  */
2080 2082          dump_znode,             /* ZFS znode                    */
2081 2083          dump_acl,               /* ZFS V0 ACL                   */
2082 2084          dump_uint8,             /* ZFS plain file               */
2083 2085          dump_zpldir,            /* ZFS directory                */
2084 2086          dump_zap,               /* ZFS master node              */
2085 2087          dump_zap,               /* ZFS delete queue             */
2086 2088          dump_uint8,             /* zvol object                  */
2087 2089          dump_zap,               /* zvol prop                    */
2088 2090          dump_uint8,             /* other uint8[]                */
2089 2091          dump_uint64,            /* other uint64[]               */
2090 2092          dump_zap,               /* other ZAP                    */
2091 2093          dump_zap,               /* persistent error log         */
2092 2094          dump_uint8,             /* SPA history                  */
2093 2095          dump_history_offsets,   /* SPA history offsets          */
2094 2096          dump_zap,               /* Pool properties              */
2095 2097          dump_zap,               /* DSL permissions              */
2096 2098          dump_acl,               /* ZFS ACL                      */
2097 2099          dump_uint8,             /* ZFS SYSACL                   */
2098 2100          dump_none,              /* FUID nvlist                  */
2099 2101          dump_packed_nvlist,     /* FUID nvlist size             */
2100 2102          dump_zap,               /* DSL dataset next clones      */
2101 2103          dump_zap,               /* DSL scrub queue              */
2102 2104          dump_zap,               /* ZFS user/group used          */
2103 2105          dump_zap,               /* ZFS user/group quota         */
2104 2106          dump_zap,               /* snapshot refcount tags       */
2105 2107          dump_ddt_zap,           /* DDT ZAP object               */
2106 2108          dump_zap,               /* DDT statistics               */
2107 2109          dump_znode,             /* SA object                    */
2108 2110          dump_zap,               /* SA Master Node               */
2109 2111          dump_sa_attrs,          /* SA attribute registration    */
2110 2112          dump_sa_layouts,        /* SA attribute layouts         */
2111 2113          dump_zap,               /* DSL scrub translations       */
2112 2114          dump_none,              /* fake dedup BP                */
2113 2115          dump_zap,               /* deadlist                     */
2114 2116          dump_none,              /* deadlist hdr                 */
2115 2117          dump_zap,               /* dsl clones                   */
2116 2118          dump_bpobj_subobjs,     /* bpobj subobjs                */
2117 2119          dump_unknown,           /* Unknown type, must be last   */
2118 2120  };
2119 2121  
2120 2122  static void
2121 2123  dump_object(objset_t *os, uint64_t object, int verbosity, int *print_header,
2122 2124      uint64_t *dnode_slots_used)
2123 2125  {
2124 2126          dmu_buf_t *db = NULL;
2125 2127          dmu_object_info_t doi;
2126 2128          dnode_t *dn;
2127 2129          void *bonus = NULL;
2128 2130          size_t bsize = 0;
2129 2131          char iblk[32], dblk[32], lsize[32], asize[32], fill[32], dnsize[32];
2130 2132          char bonus_size[32];
2131 2133          char aux[50];
2132 2134          int error;
2133 2135  
2134 2136          /* make sure nicenum has enough space */
2135 2137          CTASSERT(sizeof (iblk) >= NN_NUMBUF_SZ);
2136 2138          CTASSERT(sizeof (dblk) >= NN_NUMBUF_SZ);
2137 2139          CTASSERT(sizeof (lsize) >= NN_NUMBUF_SZ);
2138 2140          CTASSERT(sizeof (asize) >= NN_NUMBUF_SZ);
2139 2141          CTASSERT(sizeof (bonus_size) >= NN_NUMBUF_SZ);
2140 2142  
2141 2143          if (*print_header) {
2142 2144                  (void) printf("\n%10s  %3s  %5s  %5s  %5s  %6s  %5s  %6s  %s\n",
2143 2145                      "Object", "lvl", "iblk", "dblk", "dsize", "dnsize",
2144 2146                      "lsize", "%full", "type");
2145 2147                  *print_header = 0;
2146 2148          }
2147 2149  
2148 2150          if (object == 0) {
2149 2151                  dn = DMU_META_DNODE(os);
2150 2152          } else {
2151 2153                  error = dmu_bonus_hold(os, object, FTAG, &db);
2152 2154                  if (error)
2153 2155                          fatal("dmu_bonus_hold(%llu) failed, errno %u",
2154 2156                              object, error);
2155 2157                  bonus = db->db_data;
2156 2158                  bsize = db->db_size;
2157 2159                  dn = DB_DNODE((dmu_buf_impl_t *)db);
2158 2160          }
2159 2161          dmu_object_info_from_dnode(dn, &doi);
2160 2162  
2161 2163          if (dnode_slots_used != NULL)
2162 2164                  *dnode_slots_used = doi.doi_dnodesize / DNODE_MIN_SIZE;
2163 2165  
2164 2166          zdb_nicenum(doi.doi_metadata_block_size, iblk, sizeof (iblk));
2165 2167          zdb_nicenum(doi.doi_data_block_size, dblk, sizeof (dblk));
2166 2168          zdb_nicenum(doi.doi_max_offset, lsize, sizeof (lsize));
2167 2169          zdb_nicenum(doi.doi_physical_blocks_512 << 9, asize, sizeof (asize));
2168 2170          zdb_nicenum(doi.doi_bonus_size, bonus_size, sizeof (bonus_size));
2169 2171          zdb_nicenum(doi.doi_dnodesize, dnsize, sizeof (dnsize));
2170 2172          (void) sprintf(fill, "%6.2f", 100.0 * doi.doi_fill_count *
2171 2173              doi.doi_data_block_size / (object == 0 ? DNODES_PER_BLOCK : 1) /
2172 2174              doi.doi_max_offset);
2173 2175  
2174 2176          aux[0] = '\0';
2175 2177  
2176 2178          if (doi.doi_checksum != ZIO_CHECKSUM_INHERIT || verbosity >= 6) {
2177 2179                  (void) snprintf(aux + strlen(aux), sizeof (aux), " (K=%s)",
2178 2180                      ZDB_CHECKSUM_NAME(doi.doi_checksum));
2179 2181          }
2180 2182  
2181 2183          if (doi.doi_compress != ZIO_COMPRESS_INHERIT || verbosity >= 6) {
2182 2184                  (void) snprintf(aux + strlen(aux), sizeof (aux), " (Z=%s)",
2183 2185                      ZDB_COMPRESS_NAME(doi.doi_compress));
2184 2186          }
2185 2187  
2186 2188          (void) printf("%10" PRIu64
2187 2189              "  %3u  %5s  %5s  %5s  %5s  %5s  %6s  %s%s\n",
2188 2190              object, doi.doi_indirection, iblk, dblk,
2189 2191              asize, dnsize, lsize, fill, ZDB_OT_NAME(doi.doi_type), aux);
2190 2192  
2191 2193          if (doi.doi_bonus_type != DMU_OT_NONE && verbosity > 3) {
2192 2194                  (void) printf("%10s  %3s  %5s  %5s  %5s  %5s  %5s  %6s  %s\n",
2193 2195                      "", "", "", "", "", "", bonus_size, "bonus",
2194 2196                      ZDB_OT_NAME(doi.doi_bonus_type));
2195 2197          }
2196 2198  
2197 2199          if (verbosity >= 4) {
2198 2200                  (void) printf("\tdnode flags: %s%s%s\n",
2199 2201                      (dn->dn_phys->dn_flags & DNODE_FLAG_USED_BYTES) ?
2200 2202                      "USED_BYTES " : "",
2201 2203                      (dn->dn_phys->dn_flags & DNODE_FLAG_USERUSED_ACCOUNTED) ?
2202 2204                      "USERUSED_ACCOUNTED " : "",
2203 2205                      (dn->dn_phys->dn_flags & DNODE_FLAG_SPILL_BLKPTR) ?
2204 2206                      "SPILL_BLKPTR" : "");
2205 2207                  (void) printf("\tdnode maxblkid: %llu\n",
2206 2208                      (longlong_t)dn->dn_phys->dn_maxblkid);
2207 2209  
2208 2210                  object_viewer[ZDB_OT_TYPE(doi.doi_bonus_type)](os, object,
2209 2211                      bonus, bsize);
2210 2212                  object_viewer[ZDB_OT_TYPE(doi.doi_type)](os, object, NULL, 0);
2211 2213                  *print_header = 1;
2212 2214          }
2213 2215  
2214 2216          if (verbosity >= 5)
2215 2217                  dump_indirect(dn);
2216 2218  
2217 2219          if (verbosity >= 5) {
2218 2220                  /*
2219 2221                   * Report the list of segments that comprise the object.
2220 2222                   */
2221 2223                  uint64_t start = 0;
2222 2224                  uint64_t end;
2223 2225                  uint64_t blkfill = 1;
2224 2226                  int minlvl = 1;
2225 2227  
2226 2228                  if (dn->dn_type == DMU_OT_DNODE) {
2227 2229                          minlvl = 0;
2228 2230                          blkfill = DNODES_PER_BLOCK;
2229 2231                  }
2230 2232  
2231 2233                  for (;;) {
2232 2234                          char segsize[32];
2233 2235                          /* make sure nicenum has enough space */
2234 2236                          CTASSERT(sizeof (segsize) >= NN_NUMBUF_SZ);
2235 2237                          error = dnode_next_offset(dn,
2236 2238                              0, &start, minlvl, blkfill, 0);
2237 2239                          if (error)
2238 2240                                  break;
2239 2241                          end = start;
2240 2242                          error = dnode_next_offset(dn,
2241 2243                              DNODE_FIND_HOLE, &end, minlvl, blkfill, 0);
2242 2244                          zdb_nicenum(end - start, segsize, sizeof (segsize));
2243 2245                          (void) printf("\t\tsegment [%016llx, %016llx)"
2244 2246                              " size %5s\n", (u_longlong_t)start,
2245 2247                              (u_longlong_t)end, segsize);
2246 2248                          if (error)
2247 2249                                  break;
2248 2250                          start = end;
2249 2251                  }
2250 2252          }
2251 2253  
2252 2254          if (db != NULL)
2253 2255                  dmu_buf_rele(db, FTAG);
2254 2256  }
2255 2257  
2256 2258  static void
2257 2259  count_dir_mos_objects(dsl_dir_t *dd)
2258 2260  {
2259 2261          mos_obj_refd(dd->dd_object);
2260 2262          mos_obj_refd(dsl_dir_phys(dd)->dd_child_dir_zapobj);
2261 2263          mos_obj_refd(dsl_dir_phys(dd)->dd_deleg_zapobj);
2262 2264          mos_obj_refd(dsl_dir_phys(dd)->dd_props_zapobj);
2263 2265          mos_obj_refd(dsl_dir_phys(dd)->dd_clones);
2264 2266  }
2265 2267  
2266 2268  static void
2267 2269  count_ds_mos_objects(dsl_dataset_t *ds)
2268 2270  {
2269 2271          mos_obj_refd(ds->ds_object);
2270 2272          mos_obj_refd(dsl_dataset_phys(ds)->ds_next_clones_obj);
2271 2273          mos_obj_refd(dsl_dataset_phys(ds)->ds_props_obj);
2272 2274          mos_obj_refd(dsl_dataset_phys(ds)->ds_userrefs_obj);
2273 2275          mos_obj_refd(dsl_dataset_phys(ds)->ds_snapnames_zapobj);
2274 2276  
2275 2277          if (!dsl_dataset_is_snapshot(ds)) {
2276 2278                  count_dir_mos_objects(ds->ds_dir);
2277 2279          }
2278 2280  }
2279 2281  
2280 2282  static const char *objset_types[DMU_OST_NUMTYPES] = {
2281 2283          "NONE", "META", "ZPL", "ZVOL", "OTHER", "ANY" };
2282 2284  
2283 2285  static void
2284 2286  dump_dir(objset_t *os)
2285 2287  {
2286 2288          dmu_objset_stats_t dds;
2287 2289          uint64_t object, object_count;
2288 2290          uint64_t refdbytes, usedobjs, scratch;
2289 2291          char numbuf[32];
2290 2292          char blkbuf[BP_SPRINTF_LEN + 20];
2291 2293          char osname[ZFS_MAX_DATASET_NAME_LEN];
2292 2294          const char *type = "UNKNOWN";
2293 2295          int verbosity = dump_opt['d'];
2294 2296          int print_header = 1;
2295 2297          unsigned i;
2296 2298          int error;
2297 2299          uint64_t total_slots_used = 0;
2298 2300          uint64_t max_slot_used = 0;
2299 2301          uint64_t dnode_slots;
2300 2302  
2301 2303          /* make sure nicenum has enough space */
2302 2304          CTASSERT(sizeof (numbuf) >= NN_NUMBUF_SZ);
2303 2305  
2304 2306          dsl_pool_config_enter(dmu_objset_pool(os), FTAG);
2305 2307          dmu_objset_fast_stat(os, &dds);
2306 2308          dsl_pool_config_exit(dmu_objset_pool(os), FTAG);
2307 2309  
2308 2310          if (dds.dds_type < DMU_OST_NUMTYPES)
2309 2311                  type = objset_types[dds.dds_type];
2310 2312  
2311 2313          if (dds.dds_type == DMU_OST_META) {
2312 2314                  dds.dds_creation_txg = TXG_INITIAL;
2313 2315                  usedobjs = BP_GET_FILL(os->os_rootbp);
2314 2316                  refdbytes = dsl_dir_phys(os->os_spa->spa_dsl_pool->dp_mos_dir)->
2315 2317                      dd_used_bytes;
2316 2318          } else {
2317 2319                  dmu_objset_space(os, &refdbytes, &scratch, &usedobjs, &scratch);
2318 2320          }
2319 2321  
2320 2322          ASSERT3U(usedobjs, ==, BP_GET_FILL(os->os_rootbp));
2321 2323  
2322 2324          zdb_nicenum(refdbytes, numbuf, sizeof (numbuf));
2323 2325  
2324 2326          if (verbosity >= 4) {
2325 2327                  (void) snprintf(blkbuf, sizeof (blkbuf), ", rootbp ");
2326 2328                  (void) snprintf_blkptr(blkbuf + strlen(blkbuf),
2327 2329                      sizeof (blkbuf) - strlen(blkbuf), os->os_rootbp);
2328 2330          } else {
2329 2331                  blkbuf[0] = '\0';
2330 2332          }
2331 2333  
2332 2334          dmu_objset_name(os, osname);
2333 2335  
2334 2336          (void) printf("Dataset %s [%s], ID %llu, cr_txg %llu, "
2335 2337              "%s, %llu objects%s%s\n",
2336 2338              osname, type, (u_longlong_t)dmu_objset_id(os),
2337 2339              (u_longlong_t)dds.dds_creation_txg,
2338 2340              numbuf, (u_longlong_t)usedobjs, blkbuf,
2339 2341              (dds.dds_inconsistent) ? " (inconsistent)" : "");
2340 2342  
2341 2343          if (zopt_objects != 0) {
2342 2344                  for (i = 0; i < zopt_objects; i++)
2343 2345                          dump_object(os, zopt_object[i], verbosity,
2344 2346                              &print_header, NULL);
2345 2347                  (void) printf("\n");
2346 2348                  return;
2347 2349          }
2348 2350  
2349 2351          if (dump_opt['i'] != 0 || verbosity >= 2)
2350 2352                  dump_intent_log(dmu_objset_zil(os));
2351 2353  
2352 2354          if (dmu_objset_ds(os) != NULL) {
2353 2355                  dsl_dataset_t *ds = dmu_objset_ds(os);
2354 2356                  dump_deadlist(&ds->ds_deadlist);
2355 2357  
2356 2358                  if (dsl_dataset_remap_deadlist_exists(ds)) {
2357 2359                          (void) printf("ds_remap_deadlist:\n");
2358 2360                          dump_deadlist(&ds->ds_remap_deadlist);
2359 2361                  }
2360 2362                  count_ds_mos_objects(ds);
2361 2363          }
2362 2364  
2363 2365          if (verbosity < 2)
2364 2366                  return;
2365 2367  
2366 2368          if (BP_IS_HOLE(os->os_rootbp))
2367 2369                  return;
2368 2370  
2369 2371          dump_object(os, 0, verbosity, &print_header, NULL);
2370 2372          object_count = 0;
2371 2373          if (DMU_USERUSED_DNODE(os) != NULL &&
2372 2374              DMU_USERUSED_DNODE(os)->dn_type != 0) {
2373 2375                  dump_object(os, DMU_USERUSED_OBJECT, verbosity, &print_header,
2374 2376                      NULL);
2375 2377                  dump_object(os, DMU_GROUPUSED_OBJECT, verbosity, &print_header,
2376 2378                      NULL);
2377 2379          }
2378 2380  
2379 2381          object = 0;
2380 2382          while ((error = dmu_object_next(os, &object, B_FALSE, 0)) == 0) {
2381 2383                  dump_object(os, object, verbosity, &print_header, &dnode_slots);
2382 2384                  object_count++;
2383 2385                  total_slots_used += dnode_slots;
2384 2386                  max_slot_used = object + dnode_slots - 1;
2385 2387          }
2386 2388  
2387 2389          ASSERT3U(object_count, ==, usedobjs);
2388 2390  
2389 2391          (void) printf("\n");
2390 2392  
2391 2393          (void) printf("    Dnode slots:\n");
2392 2394          (void) printf("\tTotal used:    %10llu\n",
2393 2395              (u_longlong_t)total_slots_used);
2394 2396          (void) printf("\tMax used:      %10llu\n",
2395 2397              (u_longlong_t)max_slot_used);
2396 2398          (void) printf("\tPercent empty: %10lf\n",
2397 2399              (double)(max_slot_used - total_slots_used)*100 /
2398 2400              (double)max_slot_used);
2399 2401  
2400 2402          (void) printf("\n");
2401 2403  
2402 2404          if (error != ESRCH) {
2403 2405                  (void) fprintf(stderr, "dmu_object_next() = %d\n", error);
2404 2406                  abort();
2405 2407          }
2406 2408          if (leaked_objects != 0) {
2407 2409                  (void) printf("%d potentially leaked objects detected\n",
2408 2410                      leaked_objects);
2409 2411                  leaked_objects = 0;
2410 2412          }
2411 2413  }
2412 2414  
2413 2415  static void
2414 2416  dump_uberblock(uberblock_t *ub, const char *header, const char *footer)
2415 2417  {
2416 2418          time_t timestamp = ub->ub_timestamp;
2417 2419  
2418 2420          (void) printf("%s", header ? header : "");
2419 2421          (void) printf("\tmagic = %016llx\n", (u_longlong_t)ub->ub_magic);
2420 2422          (void) printf("\tversion = %llu\n", (u_longlong_t)ub->ub_version);
2421 2423          (void) printf("\ttxg = %llu\n", (u_longlong_t)ub->ub_txg);
2422 2424          (void) printf("\tguid_sum = %llu\n", (u_longlong_t)ub->ub_guid_sum);
2423 2425          (void) printf("\ttimestamp = %llu UTC = %s",
2424 2426              (u_longlong_t)ub->ub_timestamp, asctime(localtime(×tamp)));
2425 2427  
2426 2428          (void) printf("\tmmp_magic = %016llx\n",
2427 2429              (u_longlong_t)ub->ub_mmp_magic);
2428 2430          if (ub->ub_mmp_magic == MMP_MAGIC)
2429 2431                  (void) printf("\tmmp_delay = %0llu\n",
2430 2432                      (u_longlong_t)ub->ub_mmp_delay);
2431 2433  
2432 2434          if (dump_opt['u'] >= 3) {
2433 2435                  char blkbuf[BP_SPRINTF_LEN];
2434 2436                  snprintf_blkptr(blkbuf, sizeof (blkbuf), &ub->ub_rootbp);
2435 2437                  (void) printf("\trootbp = %s\n", blkbuf);
2436 2438          }
2437 2439          (void) printf("\tcheckpoint_txg = %llu\n",
2438 2440              (u_longlong_t)ub->ub_checkpoint_txg);
2439 2441          (void) printf("%s", footer ? footer : "");
2440 2442  }
2441 2443  
2442 2444  static void
2443 2445  dump_config(spa_t *spa)
2444 2446  {
2445 2447          dmu_buf_t *db;
2446 2448          size_t nvsize = 0;
2447 2449          int error = 0;
2448 2450  
2449 2451  
2450 2452          error = dmu_bonus_hold(spa->spa_meta_objset,
2451 2453              spa->spa_config_object, FTAG, &db);
2452 2454  
2453 2455          if (error == 0) {
2454 2456                  nvsize = *(uint64_t *)db->db_data;
2455 2457                  dmu_buf_rele(db, FTAG);
2456 2458  
2457 2459                  (void) printf("\nMOS Configuration:\n");
2458 2460                  dump_packed_nvlist(spa->spa_meta_objset,
2459 2461                      spa->spa_config_object, (void *)&nvsize, 1);
2460 2462          } else {
2461 2463                  (void) fprintf(stderr, "dmu_bonus_hold(%llu) failed, errno %d",
2462 2464                      (u_longlong_t)spa->spa_config_object, error);
2463 2465          }
2464 2466  }
2465 2467  
2466 2468  static void
2467 2469  dump_cachefile(const char *cachefile)
2468 2470  {
2469 2471          int fd;
2470 2472          struct stat64 statbuf;
2471 2473          char *buf;
2472 2474          nvlist_t *config;
2473 2475  
2474 2476          if ((fd = open64(cachefile, O_RDONLY)) < 0) {
2475 2477                  (void) printf("cannot open '%s': %s\n", cachefile,
2476 2478                      strerror(errno));
2477 2479                  exit(1);
2478 2480          }
2479 2481  
2480 2482          if (fstat64(fd, &statbuf) != 0) {
2481 2483                  (void) printf("failed to stat '%s': %s\n", cachefile,
2482 2484                      strerror(errno));
2483 2485                  exit(1);
2484 2486          }
2485 2487  
2486 2488          if ((buf = malloc(statbuf.st_size)) == NULL) {
2487 2489                  (void) fprintf(stderr, "failed to allocate %llu bytes\n",
2488 2490                      (u_longlong_t)statbuf.st_size);
2489 2491                  exit(1);
2490 2492          }
2491 2493  
2492 2494          if (read(fd, buf, statbuf.st_size) != statbuf.st_size) {
2493 2495                  (void) fprintf(stderr, "failed to read %llu bytes\n",
2494 2496                      (u_longlong_t)statbuf.st_size);
2495 2497                  exit(1);
2496 2498          }
2497 2499  
2498 2500          (void) close(fd);
2499 2501  
2500 2502          if (nvlist_unpack(buf, statbuf.st_size, &config, 0) != 0) {
2501 2503                  (void) fprintf(stderr, "failed to unpack nvlist\n");
2502 2504                  exit(1);
2503 2505          }
2504 2506  
2505 2507          free(buf);
2506 2508  
2507 2509          dump_nvlist(config, 0);
2508 2510  
2509 2511          nvlist_free(config);
2510 2512  }
2511 2513  
2512 2514  #define ZDB_MAX_UB_HEADER_SIZE 32
2513 2515  
2514 2516  static void
2515 2517  dump_label_uberblocks(vdev_label_t *lbl, uint64_t ashift)
2516 2518  {
2517 2519          vdev_t vd;
2518 2520          vdev_t *vdp = &vd;
2519 2521          char header[ZDB_MAX_UB_HEADER_SIZE];
2520 2522  
2521 2523          vd.vdev_ashift = ashift;
2522 2524          vdp->vdev_top = vdp;
2523 2525  
2524 2526          for (int i = 0; i < VDEV_UBERBLOCK_COUNT(vdp); i++) {
2525 2527                  uint64_t uoff = VDEV_UBERBLOCK_OFFSET(vdp, i);
2526 2528                  uberblock_t *ub = (void *)((char *)lbl + uoff);
2527 2529  
2528 2530                  if (uberblock_verify(ub))
2529 2531                          continue;
2530 2532  
2531 2533                  if ((dump_opt['u'] < 4) &&
2532 2534                      (ub->ub_mmp_magic == MMP_MAGIC) && ub->ub_mmp_delay &&
2533 2535                      (i >= VDEV_UBERBLOCK_COUNT(&vd) - MMP_BLOCKS_PER_LABEL))
2534 2536                          continue;
2535 2537  
2536 2538                  (void) snprintf(header, ZDB_MAX_UB_HEADER_SIZE,
2537 2539                      "Uberblock[%d]\n", i);
2538 2540                  dump_uberblock(ub, header, "");
2539 2541          }
2540 2542  }
2541 2543  
2542 2544  static char curpath[PATH_MAX];
2543 2545  
2544 2546  /*
2545 2547   * Iterate through the path components, recursively passing
2546 2548   * current one's obj and remaining path until we find the obj
2547 2549   * for the last one.
2548 2550   */
2549 2551  static int
2550 2552  dump_path_impl(objset_t *os, uint64_t obj, char *name)
2551 2553  {
2552 2554          int err;
2553 2555          int header = 1;
2554 2556          uint64_t child_obj;
2555 2557          char *s;
2556 2558          dmu_buf_t *db;
2557 2559          dmu_object_info_t doi;
2558 2560  
2559 2561          if ((s = strchr(name, '/')) != NULL)
2560 2562                  *s = '\0';
2561 2563          err = zap_lookup(os, obj, name, 8, 1, &child_obj);
2562 2564  
2563 2565          (void) strlcat(curpath, name, sizeof (curpath));
2564 2566  
2565 2567          if (err != 0) {
2566 2568                  (void) fprintf(stderr, "failed to lookup %s: %s\n",
2567 2569                      curpath, strerror(err));
2568 2570                  return (err);
2569 2571          }
2570 2572  
2571 2573          child_obj = ZFS_DIRENT_OBJ(child_obj);
2572 2574          err = sa_buf_hold(os, child_obj, FTAG, &db);
2573 2575          if (err != 0) {
2574 2576                  (void) fprintf(stderr,
2575 2577                      "failed to get SA dbuf for obj %llu: %s\n",
2576 2578                      (u_longlong_t)child_obj, strerror(err));
2577 2579                  return (EINVAL);
2578 2580          }
2579 2581          dmu_object_info_from_db(db, &doi);
2580 2582          sa_buf_rele(db, FTAG);
2581 2583  
2582 2584          if (doi.doi_bonus_type != DMU_OT_SA &&
2583 2585              doi.doi_bonus_type != DMU_OT_ZNODE) {
2584 2586                  (void) fprintf(stderr, "invalid bonus type %d for obj %llu\n",
2585 2587                      doi.doi_bonus_type, (u_longlong_t)child_obj);
2586 2588                  return (EINVAL);
2587 2589          }
2588 2590  
2589 2591          if (dump_opt['v'] > 6) {
2590 2592                  (void) printf("obj=%llu %s type=%d bonustype=%d\n",
2591 2593                      (u_longlong_t)child_obj, curpath, doi.doi_type,
2592 2594                      doi.doi_bonus_type);
2593 2595          }
2594 2596  
2595 2597          (void) strlcat(curpath, "/", sizeof (curpath));
2596 2598  
2597 2599          switch (doi.doi_type) {
2598 2600          case DMU_OT_DIRECTORY_CONTENTS:
2599 2601                  if (s != NULL && *(s + 1) != '\0')
2600 2602                          return (dump_path_impl(os, child_obj, s + 1));
2601 2603                  /*FALLTHROUGH*/
2602 2604          case DMU_OT_PLAIN_FILE_CONTENTS:
2603 2605                  dump_object(os, child_obj, dump_opt['v'], &header, NULL);
2604 2606                  return (0);
2605 2607          default:
2606 2608                  (void) fprintf(stderr, "object %llu has non-file/directory "
2607 2609                      "type %d\n", (u_longlong_t)obj, doi.doi_type);
2608 2610                  break;
2609 2611          }
2610 2612  
2611 2613          return (EINVAL);
2612 2614  }
2613 2615  
2614 2616  /*
2615 2617   * Dump the blocks for the object specified by path inside the dataset.
2616 2618   */
2617 2619  static int
2618 2620  dump_path(char *ds, char *path)
2619 2621  {
2620 2622          int err;
2621 2623          objset_t *os;
2622 2624          uint64_t root_obj;
2623 2625  
2624 2626          err = open_objset(ds, DMU_OST_ZFS, FTAG, &os);
2625 2627          if (err != 0)
2626 2628                  return (err);
2627 2629  
2628 2630          err = zap_lookup(os, MASTER_NODE_OBJ, ZFS_ROOT_OBJ, 8, 1, &root_obj);
2629 2631          if (err != 0) {
2630 2632                  (void) fprintf(stderr, "can't lookup root znode: %s\n",
2631 2633                      strerror(err));
2632 2634                  dmu_objset_disown(os, FTAG);
2633 2635                  return (EINVAL);
2634 2636          }
2635 2637  
2636 2638          (void) snprintf(curpath, sizeof (curpath), "dataset=%s path=/", ds);
2637 2639  
2638 2640          err = dump_path_impl(os, root_obj, path);
2639 2641  
2640 2642          close_objset(os, FTAG);
2641 2643          return (err);
2642 2644  }
2643 2645  
2644 2646  static int
2645 2647  dump_label(const char *dev)
2646 2648  {
2647 2649          int fd;
2648 2650          vdev_label_t label;
2649 2651          char path[MAXPATHLEN];
2650 2652          char *buf = label.vl_vdev_phys.vp_nvlist;
2651 2653          size_t buflen = sizeof (label.vl_vdev_phys.vp_nvlist);
2652 2654          struct stat64 statbuf;
2653 2655          uint64_t psize, ashift;
2654 2656          boolean_t label_found = B_FALSE;
2655 2657  
2656 2658          (void) strlcpy(path, dev, sizeof (path));
2657 2659          if (dev[0] == '/') {
2658 2660                  if (strncmp(dev, ZFS_DISK_ROOTD,
2659 2661                      strlen(ZFS_DISK_ROOTD)) == 0) {
2660 2662                          (void) snprintf(path, sizeof (path), "%s%s",
2661 2663                              ZFS_RDISK_ROOTD, dev + strlen(ZFS_DISK_ROOTD));
2662 2664                  }
2663 2665          } else if (stat64(path, &statbuf) != 0) {
2664 2666                  char *s;
2665 2667  
2666 2668                  (void) snprintf(path, sizeof (path), "%s%s", ZFS_RDISK_ROOTD,
2667 2669                      dev);
2668 2670                  if (((s = strrchr(dev, 's')) == NULL &&
2669 2671                      (s = strchr(dev, 'p')) == NULL) ||
2670 2672                      !isdigit(*(s + 1)))
2671 2673                          (void) strlcat(path, "s0", sizeof (path));
2672 2674          }
2673 2675  
2674 2676          if ((fd = open64(path, O_RDONLY)) < 0) {
2675 2677                  (void) fprintf(stderr, "cannot open '%s': %s\n", path,
2676 2678                      strerror(errno));
2677 2679                  exit(1);
2678 2680          }
2679 2681  
2680 2682          if (fstat64(fd, &statbuf) != 0) {
2681 2683                  (void) fprintf(stderr, "failed to stat '%s': %s\n", path,
2682 2684                      strerror(errno));
2683 2685                  (void) close(fd);
2684 2686                  exit(1);
2685 2687          }
2686 2688  
2687 2689          if (S_ISBLK(statbuf.st_mode)) {
2688 2690                  (void) fprintf(stderr,
2689 2691                      "cannot use '%s': character device required\n", path);
2690 2692                  (void) close(fd);
2691 2693                  exit(1);
2692 2694          }
2693 2695  
2694 2696          psize = statbuf.st_size;
2695 2697          psize = P2ALIGN(psize, (uint64_t)sizeof (vdev_label_t));
2696 2698  
2697 2699          for (int l = 0; l < VDEV_LABELS; l++) {
2698 2700                  nvlist_t *config = NULL;
2699 2701  
2700 2702                  if (!dump_opt['q']) {
2701 2703                          (void) printf("------------------------------------\n");
2702 2704                          (void) printf("LABEL %d\n", l);
2703 2705                          (void) printf("------------------------------------\n");
2704 2706                  }
2705 2707  
2706 2708                  if (pread64(fd, &label, sizeof (label),
2707 2709                      vdev_label_offset(psize, l, 0)) != sizeof (label)) {
2708 2710                          if (!dump_opt['q'])
2709 2711                                  (void) printf("failed to read label %d\n", l);
2710 2712                          continue;
2711 2713                  }
2712 2714  
2713 2715                  if (nvlist_unpack(buf, buflen, &config, 0) != 0) {
2714 2716                          if (!dump_opt['q'])
2715 2717                                  (void) printf("failed to unpack label %d\n", l);
2716 2718                          ashift = SPA_MINBLOCKSHIFT;
2717 2719                  } else {
2718 2720                          nvlist_t *vdev_tree = NULL;
2719 2721  
2720 2722                          if (!dump_opt['q'])
2721 2723                                  dump_nvlist(config, 4);
2722 2724                          if ((nvlist_lookup_nvlist(config,
2723 2725                              ZPOOL_CONFIG_VDEV_TREE, &vdev_tree) != 0) ||
2724 2726                              (nvlist_lookup_uint64(vdev_tree,
2725 2727                              ZPOOL_CONFIG_ASHIFT, &ashift) != 0))
2726 2728                                  ashift = SPA_MINBLOCKSHIFT;
2727 2729                          nvlist_free(config);
2728 2730                          label_found = B_TRUE;
2729 2731                  }
2730 2732                  if (dump_opt['u'])
2731 2733                          dump_label_uberblocks(&label, ashift);
2732 2734          }
2733 2735  
2734 2736          (void) close(fd);
2735 2737  
2736 2738          return (label_found ? 0 : 2);
2737 2739  }
2738 2740  
2739 2741  static uint64_t dataset_feature_count[SPA_FEATURES];
2740 2742  static uint64_t remap_deadlist_count = 0;
2741 2743  
2742 2744  /*ARGSUSED*/
2743 2745  static int
2744 2746  dump_one_dir(const char *dsname, void *arg)
2745 2747  {
2746 2748          int error;
2747 2749          objset_t *os;
2748 2750  
2749 2751          error = open_objset(dsname, DMU_OST_ANY, FTAG, &os);
2750 2752          if (error != 0)
2751 2753                  return (0);
2752 2754  
2753 2755          for (spa_feature_t f = 0; f < SPA_FEATURES; f++) {
2754 2756                  if (!dmu_objset_ds(os)->ds_feature_inuse[f])
2755 2757                          continue;
2756 2758                  ASSERT(spa_feature_table[f].fi_flags &
2757 2759                      ZFEATURE_FLAG_PER_DATASET);
2758 2760                  dataset_feature_count[f]++;
2759 2761          }
2760 2762  
2761 2763          if (dsl_dataset_remap_deadlist_exists(dmu_objset_ds(os))) {
2762 2764                  remap_deadlist_count++;
2763 2765          }
2764 2766  
2765 2767          dump_dir(os);
2766 2768          close_objset(os, FTAG);
2767 2769          fuid_table_destroy();
2768 2770          return (0);
2769 2771  }
2770 2772  
2771 2773  /*
2772 2774   * Block statistics.
2773 2775   */
2774 2776  #define PSIZE_HISTO_SIZE (SPA_OLD_MAXBLOCKSIZE / SPA_MINBLOCKSIZE + 2)
2775 2777  typedef struct zdb_blkstats {
2776 2778          uint64_t zb_asize;
2777 2779          uint64_t zb_lsize;
2778 2780          uint64_t zb_psize;
2779 2781          uint64_t zb_count;
2780 2782          uint64_t zb_gangs;
2781 2783          uint64_t zb_ditto_samevdev;
2782 2784          uint64_t zb_ditto_same_ms;
2783 2785          uint64_t zb_psize_histogram[PSIZE_HISTO_SIZE];
2784 2786  } zdb_blkstats_t;
2785 2787  
2786 2788  /*
2787 2789   * Extended object types to report deferred frees and dedup auto-ditto blocks.
2788 2790   */
2789 2791  #define ZDB_OT_DEFERRED (DMU_OT_NUMTYPES + 0)
2790 2792  #define ZDB_OT_DITTO    (DMU_OT_NUMTYPES + 1)
2791 2793  #define ZDB_OT_OTHER    (DMU_OT_NUMTYPES + 2)
2792 2794  #define ZDB_OT_TOTAL    (DMU_OT_NUMTYPES + 3)
2793 2795  
2794 2796  static const char *zdb_ot_extname[] = {
2795 2797          "deferred free",
2796 2798          "dedup ditto",
2797 2799          "other",
2798 2800          "Total",
2799 2801  };
2800 2802  
2801 2803  #define ZB_TOTAL        DN_MAX_LEVELS
2802 2804  
2803 2805  typedef struct zdb_cb {
2804 2806          zdb_blkstats_t  zcb_type[ZB_TOTAL + 1][ZDB_OT_TOTAL + 1];
2805 2807          uint64_t        zcb_removing_size;
2806 2808          uint64_t        zcb_checkpoint_size;
2807 2809          uint64_t        zcb_dedup_asize;
2808 2810          uint64_t        zcb_dedup_blocks;
2809 2811          uint64_t        zcb_embedded_blocks[NUM_BP_EMBEDDED_TYPES];
2810 2812          uint64_t        zcb_embedded_histogram[NUM_BP_EMBEDDED_TYPES]
2811 2813              [BPE_PAYLOAD_SIZE];
2812 2814          uint64_t        zcb_start;
2813 2815          hrtime_t        zcb_lastprint;
2814 2816          uint64_t        zcb_totalasize;
2815 2817          uint64_t        zcb_errors[256];
2816 2818          int             zcb_readfails;
2817 2819          int             zcb_haderrors;
2818 2820          spa_t           *zcb_spa;
2819 2821          uint32_t        **zcb_vd_obsolete_counts;
2820 2822  } zdb_cb_t;
2821 2823  
2822 2824  /* test if two DVA offsets from same vdev are within the same metaslab */
2823 2825  static boolean_t
2824 2826  same_metaslab(spa_t *spa, uint64_t vdev, uint64_t off1, uint64_t off2)
2825 2827  {
2826 2828          vdev_t *vd = vdev_lookup_top(spa, vdev);
2827 2829          uint64_t ms_shift = vd->vdev_ms_shift;
2828 2830  
2829 2831          return ((off1 >> ms_shift) == (off2 >> ms_shift));
2830 2832  }
2831 2833  
2832 2834  static void
2833 2835  zdb_count_block(zdb_cb_t *zcb, zilog_t *zilog, const blkptr_t *bp,
2834 2836      dmu_object_type_t type)
2835 2837  {
2836 2838          uint64_t refcnt = 0;
2837 2839  
2838 2840          ASSERT(type < ZDB_OT_TOTAL);
2839 2841  
2840 2842          if (zilog && zil_bp_tree_add(zilog, bp) != 0)
2841 2843                  return;
2842 2844  
2843 2845          spa_config_enter(zcb->zcb_spa, SCL_CONFIG, FTAG, RW_READER);
2844 2846  
2845 2847          for (int i = 0; i < 4; i++) {
2846 2848                  int l = (i < 2) ? BP_GET_LEVEL(bp) : ZB_TOTAL;
2847 2849                  int t = (i & 1) ? type : ZDB_OT_TOTAL;
2848 2850                  int equal;
2849 2851                  zdb_blkstats_t *zb = &zcb->zcb_type[l][t];
2850 2852  
2851 2853                  zb->zb_asize += BP_GET_ASIZE(bp);
2852 2854                  zb->zb_lsize += BP_GET_LSIZE(bp);
2853 2855                  zb->zb_psize += BP_GET_PSIZE(bp);
2854 2856                  zb->zb_count++;
2855 2857  
2856 2858                  /*
2857 2859                   * The histogram is only big enough to record blocks up to
2858 2860                   * SPA_OLD_MAXBLOCKSIZE; larger blocks go into the last,
2859 2861                   * "other", bucket.
2860 2862                   */
2861 2863                  unsigned idx = BP_GET_PSIZE(bp) >> SPA_MINBLOCKSHIFT;
2862 2864                  idx = MIN(idx, SPA_OLD_MAXBLOCKSIZE / SPA_MINBLOCKSIZE + 1);
2863 2865                  zb->zb_psize_histogram[idx]++;
2864 2866  
2865 2867                  zb->zb_gangs += BP_COUNT_GANG(bp);
2866 2868  
2867 2869                  switch (BP_GET_NDVAS(bp)) {
2868 2870                  case 2:
2869 2871                          if (DVA_GET_VDEV(&bp->blk_dva[0]) ==
2870 2872                              DVA_GET_VDEV(&bp->blk_dva[1])) {
2871 2873                                  zb->zb_ditto_samevdev++;
2872 2874  
2873 2875                                  if (same_metaslab(zcb->zcb_spa,
2874 2876                                      DVA_GET_VDEV(&bp->blk_dva[0]),
2875 2877                                      DVA_GET_OFFSET(&bp->blk_dva[0]),
2876 2878                                      DVA_GET_OFFSET(&bp->blk_dva[1])))
2877 2879                                          zb->zb_ditto_same_ms++;
2878 2880                          }
2879 2881                          break;
2880 2882                  case 3:
2881 2883                          equal = (DVA_GET_VDEV(&bp->blk_dva[0]) ==
2882 2884                              DVA_GET_VDEV(&bp->blk_dva[1])) +
2883 2885                              (DVA_GET_VDEV(&bp->blk_dva[0]) ==
2884 2886                              DVA_GET_VDEV(&bp->blk_dva[2])) +
2885 2887                              (DVA_GET_VDEV(&bp->blk_dva[1]) ==
2886 2888                              DVA_GET_VDEV(&bp->blk_dva[2]));
2887 2889                          if (equal != 0) {
2888 2890                                  zb->zb_ditto_samevdev++;
2889 2891  
2890 2892                                  if (DVA_GET_VDEV(&bp->blk_dva[0]) ==
2891 2893                                      DVA_GET_VDEV(&bp->blk_dva[1]) &&
2892 2894                                      same_metaslab(zcb->zcb_spa,
2893 2895                                      DVA_GET_VDEV(&bp->blk_dva[0]),
2894 2896                                      DVA_GET_OFFSET(&bp->blk_dva[0]),
2895 2897                                      DVA_GET_OFFSET(&bp->blk_dva[1])))
2896 2898                                          zb->zb_ditto_same_ms++;
2897 2899                                  else if (DVA_GET_VDEV(&bp->blk_dva[0]) ==
2898 2900                                      DVA_GET_VDEV(&bp->blk_dva[2]) &&
2899 2901                                      same_metaslab(zcb->zcb_spa,
2900 2902                                      DVA_GET_VDEV(&bp->blk_dva[0]),
2901 2903                                      DVA_GET_OFFSET(&bp->blk_dva[0]),
2902 2904                                      DVA_GET_OFFSET(&bp->blk_dva[2])))
2903 2905                                          zb->zb_ditto_same_ms++;
2904 2906                                  else if (DVA_GET_VDEV(&bp->blk_dva[1]) ==
2905 2907                                      DVA_GET_VDEV(&bp->blk_dva[2]) &&
2906 2908                                      same_metaslab(zcb->zcb_spa,
2907 2909                                      DVA_GET_VDEV(&bp->blk_dva[1]),
2908 2910                                      DVA_GET_OFFSET(&bp->blk_dva[1]),
2909 2911                                      DVA_GET_OFFSET(&bp->blk_dva[2])))
2910 2912                                          zb->zb_ditto_same_ms++;
2911 2913                          }
2912 2914                          break;
2913 2915                  }
2914 2916          }
2915 2917  
2916 2918          spa_config_exit(zcb->zcb_spa, SCL_CONFIG, FTAG);
2917 2919  
2918 2920          if (BP_IS_EMBEDDED(bp)) {
2919 2921                  zcb->zcb_embedded_blocks[BPE_GET_ETYPE(bp)]++;
2920 2922                  zcb->zcb_embedded_histogram[BPE_GET_ETYPE(bp)]
2921 2923                      [BPE_GET_PSIZE(bp)]++;
2922 2924                  return;
2923 2925          }
2924 2926  
2925 2927          if (dump_opt['L'])
2926 2928                  return;
2927 2929  
2928 2930          if (BP_GET_DEDUP(bp)) {
2929 2931                  ddt_t *ddt;
2930 2932                  ddt_entry_t *dde;
2931 2933  
2932 2934                  ddt = ddt_select(zcb->zcb_spa, bp);
2933 2935                  ddt_enter(ddt);
2934 2936                  dde = ddt_lookup(ddt, bp, B_FALSE);
2935 2937  
2936 2938                  if (dde == NULL) {
2937 2939                          refcnt = 0;
2938 2940                  } else {
2939 2941                          ddt_phys_t *ddp = ddt_phys_select(dde, bp);
2940 2942                          ddt_phys_decref(ddp);
2941 2943                          refcnt = ddp->ddp_refcnt;
2942 2944                          if (ddt_phys_total_refcnt(dde) == 0)
2943 2945                                  ddt_remove(ddt, dde);
2944 2946                  }
2945 2947                  ddt_exit(ddt);
2946 2948          }
2947 2949  
2948 2950          VERIFY3U(zio_wait(zio_claim(NULL, zcb->zcb_spa,
2949 2951              refcnt ? 0 : spa_min_claim_txg(zcb->zcb_spa),
2950 2952              bp, NULL, NULL, ZIO_FLAG_CANFAIL)), ==, 0);
2951 2953  }
2952 2954  
2953 2955  static void
2954 2956  zdb_blkptr_done(zio_t *zio)
2955 2957  {
2956 2958          spa_t *spa = zio->io_spa;
2957 2959          blkptr_t *bp = zio->io_bp;
2958 2960          int ioerr = zio->io_error;
2959 2961          zdb_cb_t *zcb = zio->io_private;
2960 2962          zbookmark_phys_t *zb = &zio->io_bookmark;
2961 2963  
2962 2964          abd_free(zio->io_abd);
2963 2965  
2964 2966          mutex_enter(&spa->spa_scrub_lock);
2965 2967          spa->spa_scrub_inflight--;
2966 2968          cv_broadcast(&spa->spa_scrub_io_cv);
2967 2969  
2968 2970          if (ioerr && !(zio->io_flags & ZIO_FLAG_SPECULATIVE)) {
2969 2971                  char blkbuf[BP_SPRINTF_LEN];
2970 2972  
2971 2973                  zcb->zcb_haderrors = 1;
2972 2974                  zcb->zcb_errors[ioerr]++;
2973 2975  
2974 2976                  if (dump_opt['b'] >= 2)
2975 2977                          snprintf_blkptr(blkbuf, sizeof (blkbuf), bp);
2976 2978                  else
2977 2979                          blkbuf[0] = '\0';
2978 2980  
2979 2981                  (void) printf("zdb_blkptr_cb: "
2980 2982                      "Got error %d reading "
2981 2983                      "<%llu, %llu, %lld, %llx> %s -- skipping\n",
2982 2984                      ioerr,
2983 2985                      (u_longlong_t)zb->zb_objset,
2984 2986                      (u_longlong_t)zb->zb_object,
2985 2987                      (u_longlong_t)zb->zb_level,
2986 2988                      (u_longlong_t)zb->zb_blkid,
2987 2989                      blkbuf);
2988 2990          }
2989 2991          mutex_exit(&spa->spa_scrub_lock);
2990 2992  }
2991 2993  
2992 2994  static int
2993 2995  zdb_blkptr_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
2994 2996      const zbookmark_phys_t *zb, const dnode_phys_t *dnp, void *arg)
2995 2997  {
2996 2998          zdb_cb_t *zcb = arg;
2997 2999          dmu_object_type_t type;
2998 3000          boolean_t is_metadata;
2999 3001  
3000 3002          if (bp == NULL)
3001 3003                  return (0);
3002 3004  
3003 3005          if (dump_opt['b'] >= 5 && bp->blk_birth > 0) {
3004 3006                  char blkbuf[BP_SPRINTF_LEN];
3005 3007                  snprintf_blkptr(blkbuf, sizeof (blkbuf), bp);
3006 3008                  (void) printf("objset %llu object %llu "
3007 3009                      "level %lld offset 0x%llx %s\n",
3008 3010                      (u_longlong_t)zb->zb_objset,
3009 3011                      (u_longlong_t)zb->zb_object,
3010 3012                      (longlong_t)zb->zb_level,
3011 3013                      (u_longlong_t)blkid2offset(dnp, bp, zb),
3012 3014                      blkbuf);
3013 3015          }
3014 3016  
3015 3017          if (BP_IS_HOLE(bp))
3016 3018                  return (0);
3017 3019  
3018 3020          type = BP_GET_TYPE(bp);
3019 3021  
3020 3022          zdb_count_block(zcb, zilog, bp,
3021 3023              (type & DMU_OT_NEWTYPE) ? ZDB_OT_OTHER : type);
3022 3024  
3023 3025          is_metadata = (BP_GET_LEVEL(bp) != 0 || DMU_OT_IS_METADATA(type));
3024 3026  
3025 3027          if (!BP_IS_EMBEDDED(bp) &&
3026 3028              (dump_opt['c'] > 1 || (dump_opt['c'] && is_metadata))) {
3027 3029                  size_t size = BP_GET_PSIZE(bp);
3028 3030                  abd_t *abd = abd_alloc(size, B_FALSE);
3029 3031                  int flags = ZIO_FLAG_CANFAIL | ZIO_FLAG_SCRUB | ZIO_FLAG_RAW;
3030 3032  
3031 3033                  /* If it's an intent log block, failure is expected. */
3032 3034                  if (zb->zb_level == ZB_ZIL_LEVEL)
3033 3035                          flags |= ZIO_FLAG_SPECULATIVE;
3034 3036  
3035 3037                  mutex_enter(&spa->spa_scrub_lock);
3036 3038                  while (spa->spa_scrub_inflight > max_inflight)
3037 3039                          cv_wait(&spa->spa_scrub_io_cv, &spa->spa_scrub_lock);
3038 3040                  spa->spa_scrub_inflight++;
3039 3041                  mutex_exit(&spa->spa_scrub_lock);
3040 3042  
3041 3043                  zio_nowait(zio_read(NULL, spa, bp, abd, size,
3042 3044                      zdb_blkptr_done, zcb, ZIO_PRIORITY_ASYNC_READ, flags, zb));
3043 3045          }
3044 3046  
3045 3047          zcb->zcb_readfails = 0;
3046 3048  
3047 3049          /* only call gethrtime() every 100 blocks */
3048 3050          static int iters;
3049 3051          if (++iters > 100)
3050 3052                  iters = 0;
3051 3053          else
3052 3054                  return (0);
3053 3055  
3054 3056          if (dump_opt['b'] < 5 && gethrtime() > zcb->zcb_lastprint + NANOSEC) {
3055 3057                  uint64_t now = gethrtime();
3056 3058                  char buf[10];
3057 3059                  uint64_t bytes = zcb->zcb_type[ZB_TOTAL][ZDB_OT_TOTAL].zb_asize;
3058 3060                  int kb_per_sec =
3059 3061                      1 + bytes / (1 + ((now - zcb->zcb_start) / 1000 / 1000));
3060 3062                  int sec_remaining =
3061 3063                      (zcb->zcb_totalasize - bytes) / 1024 / kb_per_sec;
3062 3064  
3063 3065                  /* make sure nicenum has enough space */
3064 3066                  CTASSERT(sizeof (buf) >= NN_NUMBUF_SZ);
3065 3067  
3066 3068                  zfs_nicenum(bytes, buf, sizeof (buf));
3067 3069                  (void) fprintf(stderr,
3068 3070                      "\r%5s completed (%4dMB/s) "
3069 3071                      "estimated time remaining: %uhr %02umin %02usec        ",
3070 3072                      buf, kb_per_sec / 1024,
3071 3073                      sec_remaining / 60 / 60,
3072 3074                      sec_remaining / 60 % 60,
3073 3075                      sec_remaining % 60);
3074 3076  
3075 3077                  zcb->zcb_lastprint = now;
3076 3078          }
3077 3079  
3078 3080          return (0);
3079 3081  }
3080 3082  
3081 3083  static void
3082 3084  zdb_leak(void *arg, uint64_t start, uint64_t size)
3083 3085  {
3084 3086          vdev_t *vd = arg;
3085 3087  
3086 3088          (void) printf("leaked space: vdev %llu, offset 0x%llx, size %llu\n",
3087 3089              (u_longlong_t)vd->vdev_id, (u_longlong_t)start, (u_longlong_t)size);
3088 3090  }
3089 3091  
3090 3092  static metaslab_ops_t zdb_metaslab_ops = {
  
    | 
      ↓ open down ↓ | 
    2152 lines elided | 
    
      ↑ open up ↑ | 
  
3091 3093          NULL    /* alloc */
3092 3094  };
3093 3095  
3094 3096  static void
3095 3097  zdb_ddt_leak_init(spa_t *spa, zdb_cb_t *zcb)
3096 3098  {
3097 3099          ddt_bookmark_t ddb;
3098 3100          ddt_entry_t dde;
3099 3101          int error;
3100 3102  
     3103 +        ASSERT(!dump_opt['L']);
     3104 +
3101 3105          bzero(&ddb, sizeof (ddb));
3102 3106          while ((error = ddt_walk(spa, &ddb, &dde)) == 0) {
3103 3107                  blkptr_t blk;
3104 3108                  ddt_phys_t *ddp = dde.dde_phys;
3105 3109  
3106 3110                  if (ddb.ddb_class == DDT_CLASS_UNIQUE)
3107 3111                          return;
3108 3112  
3109 3113                  ASSERT(ddt_phys_total_refcnt(&dde) > 1);
3110 3114  
3111 3115                  for (int p = 0; p < DDT_PHYS_TYPES; p++, ddp++) {
3112 3116                          if (ddp->ddp_phys_birth == 0)
3113 3117                                  continue;
  
    | 
      ↓ open down ↓ | 
    3 lines elided | 
    
      ↑ open up ↑ | 
  
3114 3118                          ddt_bp_create(ddb.ddb_checksum,
3115 3119                              &dde.dde_key, ddp, &blk);
3116 3120                          if (p == DDT_PHYS_DITTO) {
3117 3121                                  zdb_count_block(zcb, NULL, &blk, ZDB_OT_DITTO);
3118 3122                          } else {
3119 3123                                  zcb->zcb_dedup_asize +=
3120 3124                                      BP_GET_ASIZE(&blk) * (ddp->ddp_refcnt - 1);
3121 3125                                  zcb->zcb_dedup_blocks++;
3122 3126                          }
3123 3127                  }
3124      -                if (!dump_opt['L']) {
3125      -                        ddt_t *ddt = spa->spa_ddt[ddb.ddb_checksum];
3126      -                        ddt_enter(ddt);
3127      -                        VERIFY(ddt_lookup(ddt, &blk, B_TRUE) != NULL);
3128      -                        ddt_exit(ddt);
3129      -                }
     3128 +                ddt_t *ddt = spa->spa_ddt[ddb.ddb_checksum];
     3129 +                ddt_enter(ddt);
     3130 +                VERIFY(ddt_lookup(ddt, &blk, B_TRUE) != NULL);
     3131 +                ddt_exit(ddt);
3130 3132          }
3131 3133  
3132 3134          ASSERT(error == ENOENT);
3133 3135  }
3134 3136  
3135 3137  /* ARGSUSED */
3136 3138  static void
3137 3139  claim_segment_impl_cb(uint64_t inner_offset, vdev_t *vd, uint64_t offset,
3138 3140      uint64_t size, void *arg)
3139 3141  {
3140 3142          /*
3141 3143           * This callback was called through a remap from
3142 3144           * a device being removed. Therefore, the vdev that
3143 3145           * this callback is applied to is a concrete
3144 3146           * vdev.
3145 3147           */
3146 3148          ASSERT(vdev_is_concrete(vd));
3147 3149  
3148 3150          VERIFY0(metaslab_claim_impl(vd, offset, size,
3149 3151              spa_min_claim_txg(vd->vdev_spa)));
3150 3152  }
3151 3153  
3152 3154  static void
3153 3155  claim_segment_cb(void *arg, uint64_t offset, uint64_t size)
3154 3156  {
3155 3157          vdev_t *vd = arg;
3156 3158  
3157 3159          vdev_indirect_ops.vdev_op_remap(vd, offset, size,
3158 3160              claim_segment_impl_cb, NULL);
3159 3161  }
3160 3162  
  
    | 
      ↓ open down ↓ | 
    21 lines elided | 
    
      ↑ open up ↑ | 
  
3161 3163  /*
3162 3164   * After accounting for all allocated blocks that are directly referenced,
3163 3165   * we might have missed a reference to a block from a partially complete
3164 3166   * (and thus unused) indirect mapping object. We perform a secondary pass
3165 3167   * through the metaslabs we have already mapped and claim the destination
3166 3168   * blocks.
3167 3169   */
3168 3170  static void
3169 3171  zdb_claim_removing(spa_t *spa, zdb_cb_t *zcb)
3170 3172  {
     3173 +        if (dump_opt['L'])
     3174 +                return;
     3175 +
3171 3176          if (spa->spa_vdev_removal == NULL)
3172 3177                  return;
3173 3178  
3174 3179          spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER);
3175 3180  
3176 3181          spa_vdev_removal_t *svr = spa->spa_vdev_removal;
3177 3182          vdev_t *vd = vdev_lookup_top(spa, svr->svr_vdev_id);
3178 3183          vdev_indirect_mapping_t *vim = vd->vdev_indirect_mapping;
3179 3184  
3180 3185          for (uint64_t msi = 0; msi < vd->vdev_ms_count; msi++) {
3181 3186                  metaslab_t *msp = vd->vdev_ms[msi];
3182 3187  
3183 3188                  if (msp->ms_start >= vdev_indirect_mapping_max_offset(vim))
3184 3189                          break;
3185 3190  
3186 3191                  ASSERT0(range_tree_space(svr->svr_allocd_segs));
3187 3192  
3188 3193                  if (msp->ms_sm != NULL) {
3189 3194                          VERIFY0(space_map_load(msp->ms_sm,
3190 3195                              svr->svr_allocd_segs, SM_ALLOC));
3191 3196  
3192 3197                          /*
3193 3198                           * Clear everything past what has been synced unless
3194 3199                           * it's past the spacemap, because we have not allocated
3195 3200                           * mappings for it yet.
3196 3201                           */
3197 3202                          uint64_t vim_max_offset =
3198 3203                              vdev_indirect_mapping_max_offset(vim);
3199 3204                          uint64_t sm_end = msp->ms_sm->sm_start +
3200 3205                              msp->ms_sm->sm_size;
3201 3206                          if (sm_end > vim_max_offset)
3202 3207                                  range_tree_clear(svr->svr_allocd_segs,
3203 3208                                      vim_max_offset, sm_end - vim_max_offset);
3204 3209                  }
3205 3210  
3206 3211                  zcb->zcb_removing_size +=
3207 3212                      range_tree_space(svr->svr_allocd_segs);
3208 3213                  range_tree_vacate(svr->svr_allocd_segs, claim_segment_cb, vd);
3209 3214          }
3210 3215  
3211 3216          spa_config_exit(spa, SCL_CONFIG, FTAG);
3212 3217  }
3213 3218  
3214 3219  /* ARGSUSED */
3215 3220  static int
3216 3221  increment_indirect_mapping_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx)
3217 3222  {
3218 3223          zdb_cb_t *zcb = arg;
3219 3224          spa_t *spa = zcb->zcb_spa;
3220 3225          vdev_t *vd;
3221 3226          const dva_t *dva = &bp->blk_dva[0];
3222 3227  
3223 3228          ASSERT(!dump_opt['L']);
3224 3229          ASSERT3U(BP_GET_NDVAS(bp), ==, 1);
3225 3230  
3226 3231          spa_config_enter(spa, SCL_VDEV, FTAG, RW_READER);
3227 3232          vd = vdev_lookup_top(zcb->zcb_spa, DVA_GET_VDEV(dva));
3228 3233          ASSERT3P(vd, !=, NULL);
3229 3234          spa_config_exit(spa, SCL_VDEV, FTAG);
3230 3235  
3231 3236          ASSERT(vd->vdev_indirect_config.vic_mapping_object != 0);
3232 3237          ASSERT3P(zcb->zcb_vd_obsolete_counts[vd->vdev_id], !=, NULL);
3233 3238  
3234 3239          vdev_indirect_mapping_increment_obsolete_count(
3235 3240              vd->vdev_indirect_mapping,
3236 3241              DVA_GET_OFFSET(dva), DVA_GET_ASIZE(dva),
3237 3242              zcb->zcb_vd_obsolete_counts[vd->vdev_id]);
3238 3243  
3239 3244          return (0);
3240 3245  }
3241 3246  
3242 3247  static uint32_t *
3243 3248  zdb_load_obsolete_counts(vdev_t *vd)
3244 3249  {
3245 3250          vdev_indirect_mapping_t *vim = vd->vdev_indirect_mapping;
3246 3251          spa_t *spa = vd->vdev_spa;
3247 3252          spa_condensing_indirect_phys_t *scip =
3248 3253              &spa->spa_condensing_indirect_phys;
3249 3254          uint32_t *counts;
3250 3255  
3251 3256          EQUIV(vdev_obsolete_sm_object(vd) != 0, vd->vdev_obsolete_sm != NULL);
  
    | 
      ↓ open down ↓ | 
    71 lines elided | 
    
      ↑ open up ↑ | 
  
3252 3257          counts = vdev_indirect_mapping_load_obsolete_counts(vim);
3253 3258          if (vd->vdev_obsolete_sm != NULL) {
3254 3259                  vdev_indirect_mapping_load_obsolete_spacemap(vim, counts,
3255 3260                      vd->vdev_obsolete_sm);
3256 3261          }
3257 3262          if (scip->scip_vdev == vd->vdev_id &&
3258 3263              scip->scip_prev_obsolete_sm_object != 0) {
3259 3264                  space_map_t *prev_obsolete_sm = NULL;
3260 3265                  VERIFY0(space_map_open(&prev_obsolete_sm, spa->spa_meta_objset,
3261 3266                      scip->scip_prev_obsolete_sm_object, 0, vd->vdev_asize, 0));
3262      -                space_map_update(prev_obsolete_sm);
3263 3267                  vdev_indirect_mapping_load_obsolete_spacemap(vim, counts,
3264 3268                      prev_obsolete_sm);
3265 3269                  space_map_close(prev_obsolete_sm);
3266 3270          }
3267 3271          return (counts);
3268 3272  }
3269 3273  
3270 3274  typedef struct checkpoint_sm_exclude_entry_arg {
3271 3275          vdev_t *cseea_vd;
3272 3276          uint64_t cseea_checkpoint_size;
3273 3277  } checkpoint_sm_exclude_entry_arg_t;
3274 3278  
3275 3279  static int
3276 3280  checkpoint_sm_exclude_entry_cb(space_map_entry_t *sme, void *arg)
3277 3281  {
3278 3282          checkpoint_sm_exclude_entry_arg_t *cseea = arg;
3279 3283          vdev_t *vd = cseea->cseea_vd;
3280 3284          metaslab_t *ms = vd->vdev_ms[sme->sme_offset >> vd->vdev_ms_shift];
3281 3285          uint64_t end = sme->sme_offset + sme->sme_run;
3282 3286  
3283 3287          ASSERT(sme->sme_type == SM_FREE);
3284 3288  
3285 3289          /*
3286 3290           * Since the vdev_checkpoint_sm exists in the vdev level
3287 3291           * and the ms_sm space maps exist in the metaslab level,
3288 3292           * an entry in the checkpoint space map could theoretically
3289 3293           * cross the boundaries of the metaslab that it belongs.
3290 3294           *
3291 3295           * In reality, because of the way that we populate and
3292 3296           * manipulate the checkpoint's space maps currently,
3293 3297           * there shouldn't be any entries that cross metaslabs.
3294 3298           * Hence the assertion below.
3295 3299           *
3296 3300           * That said, there is no fundamental requirement that
3297 3301           * the checkpoint's space map entries should not cross
3298 3302           * metaslab boundaries. So if needed we could add code
3299 3303           * that handles metaslab-crossing segments in the future.
3300 3304           */
3301 3305          VERIFY3U(sme->sme_offset, >=, ms->ms_start);
3302 3306          VERIFY3U(end, <=, ms->ms_start + ms->ms_size);
3303 3307  
3304 3308          /*
3305 3309           * By removing the entry from the allocated segments we
3306 3310           * also verify that the entry is there to begin with.
3307 3311           */
3308 3312          mutex_enter(&ms->ms_lock);
3309 3313          range_tree_remove(ms->ms_allocatable, sme->sme_offset, sme->sme_run);
3310 3314          mutex_exit(&ms->ms_lock);
3311 3315  
3312 3316          cseea->cseea_checkpoint_size += sme->sme_run;
3313 3317          return (0);
3314 3318  }
3315 3319  
3316 3320  static void
3317 3321  zdb_leak_init_vdev_exclude_checkpoint(vdev_t *vd, zdb_cb_t *zcb)
3318 3322  {
3319 3323          spa_t *spa = vd->vdev_spa;
3320 3324          space_map_t *checkpoint_sm = NULL;
3321 3325          uint64_t checkpoint_sm_obj;
3322 3326  
3323 3327          /*
3324 3328           * If there is no vdev_top_zap, we are in a pool whose
3325 3329           * version predates the pool checkpoint feature.
3326 3330           */
3327 3331          if (vd->vdev_top_zap == 0)
3328 3332                  return;
3329 3333  
3330 3334          /*
3331 3335           * If there is no reference of the vdev_checkpoint_sm in
3332 3336           * the vdev_top_zap, then one of the following scenarios
3333 3337           * is true:
3334 3338           *
3335 3339           * 1] There is no checkpoint
3336 3340           * 2] There is a checkpoint, but no checkpointed blocks
3337 3341           *    have been freed yet
3338 3342           * 3] The current vdev is indirect
3339 3343           *
3340 3344           * In these cases we return immediately.
3341 3345           */
3342 3346          if (zap_contains(spa_meta_objset(spa), vd->vdev_top_zap,
3343 3347              VDEV_TOP_ZAP_POOL_CHECKPOINT_SM) != 0)
3344 3348                  return;
3345 3349  
  
    | 
      ↓ open down ↓ | 
    73 lines elided | 
    
      ↑ open up ↑ | 
  
3346 3350          VERIFY0(zap_lookup(spa_meta_objset(spa), vd->vdev_top_zap,
3347 3351              VDEV_TOP_ZAP_POOL_CHECKPOINT_SM, sizeof (uint64_t), 1,
3348 3352              &checkpoint_sm_obj));
3349 3353  
3350 3354          checkpoint_sm_exclude_entry_arg_t cseea;
3351 3355          cseea.cseea_vd = vd;
3352 3356          cseea.cseea_checkpoint_size = 0;
3353 3357  
3354 3358          VERIFY0(space_map_open(&checkpoint_sm, spa_meta_objset(spa),
3355 3359              checkpoint_sm_obj, 0, vd->vdev_asize, vd->vdev_ashift));
3356      -        space_map_update(checkpoint_sm);
3357 3360  
3358 3361          VERIFY0(space_map_iterate(checkpoint_sm,
     3362 +            space_map_length(checkpoint_sm),
3359 3363              checkpoint_sm_exclude_entry_cb, &cseea));
3360 3364          space_map_close(checkpoint_sm);
3361 3365  
3362 3366          zcb->zcb_checkpoint_size += cseea.cseea_checkpoint_size;
3363 3367  }
3364 3368  
3365 3369  static void
3366 3370  zdb_leak_init_exclude_checkpoint(spa_t *spa, zdb_cb_t *zcb)
3367 3371  {
     3372 +        ASSERT(!dump_opt['L']);
     3373 +
3368 3374          vdev_t *rvd = spa->spa_root_vdev;
3369 3375          for (uint64_t c = 0; c < rvd->vdev_children; c++) {
3370 3376                  ASSERT3U(c, ==, rvd->vdev_child[c]->vdev_id);
3371 3377                  zdb_leak_init_vdev_exclude_checkpoint(rvd->vdev_child[c], zcb);
3372 3378          }
3373 3379  }
3374 3380  
3375 3381  static void
3376 3382  load_concrete_ms_allocatable_trees(spa_t *spa, maptype_t maptype)
3377 3383  {
3378 3384          vdev_t *rvd = spa->spa_root_vdev;
3379 3385          for (uint64_t i = 0; i < rvd->vdev_children; i++) {
3380 3386                  vdev_t *vd = rvd->vdev_child[i];
3381 3387  
3382 3388                  ASSERT3U(i, ==, vd->vdev_id);
3383 3389  
3384 3390                  if (vd->vdev_ops == &vdev_indirect_ops)
3385 3391                          continue;
3386 3392  
3387 3393                  for (uint64_t m = 0; m < vd->vdev_ms_count; m++) {
3388 3394                          metaslab_t *msp = vd->vdev_ms[m];
3389 3395  
3390 3396                          (void) fprintf(stderr,
3391 3397                              "\rloading concrete vdev %llu, "
3392 3398                              "metaslab %llu of %llu ...",
3393 3399                              (longlong_t)vd->vdev_id,
3394 3400                              (longlong_t)msp->ms_id,
3395 3401                              (longlong_t)vd->vdev_ms_count);
3396 3402  
3397 3403                          mutex_enter(&msp->ms_lock);
3398 3404                          metaslab_unload(msp);
3399 3405  
3400 3406                          /*
3401 3407                           * We don't want to spend the CPU manipulating the
3402 3408                           * size-ordered tree, so clear the range_tree ops.
3403 3409                           */
3404 3410                          msp->ms_allocatable->rt_ops = NULL;
3405 3411  
3406 3412                          if (msp->ms_sm != NULL) {
3407 3413                                  VERIFY0(space_map_load(msp->ms_sm,
3408 3414                                      msp->ms_allocatable, maptype));
3409 3415                          }
3410 3416                          if (!msp->ms_loaded)
3411 3417                                  msp->ms_loaded = B_TRUE;
3412 3418                          mutex_exit(&msp->ms_lock);
3413 3419                  }
3414 3420          }
3415 3421  }
3416 3422  
3417 3423  /*
3418 3424   * vm_idxp is an in-out parameter which (for indirect vdevs) is the
3419 3425   * index in vim_entries that has the first entry in this metaslab.
3420 3426   * On return, it will be set to the first entry after this metaslab.
3421 3427   */
3422 3428  static void
3423 3429  load_indirect_ms_allocatable_tree(vdev_t *vd, metaslab_t *msp,
3424 3430      uint64_t *vim_idxp)
3425 3431  {
3426 3432          vdev_indirect_mapping_t *vim = vd->vdev_indirect_mapping;
3427 3433  
3428 3434          mutex_enter(&msp->ms_lock);
3429 3435          metaslab_unload(msp);
3430 3436  
3431 3437          /*
3432 3438           * We don't want to spend the CPU manipulating the
3433 3439           * size-ordered tree, so clear the range_tree ops.
3434 3440           */
3435 3441          msp->ms_allocatable->rt_ops = NULL;
3436 3442  
3437 3443          for (; *vim_idxp < vdev_indirect_mapping_num_entries(vim);
3438 3444              (*vim_idxp)++) {
3439 3445                  vdev_indirect_mapping_entry_phys_t *vimep =
3440 3446                      &vim->vim_entries[*vim_idxp];
3441 3447                  uint64_t ent_offset = DVA_MAPPING_GET_SRC_OFFSET(vimep);
3442 3448                  uint64_t ent_len = DVA_GET_ASIZE(&vimep->vimep_dst);
3443 3449                  ASSERT3U(ent_offset, >=, msp->ms_start);
3444 3450                  if (ent_offset >= msp->ms_start + msp->ms_size)
3445 3451                          break;
3446 3452  
3447 3453                  /*
3448 3454                   * Mappings do not cross metaslab boundaries,
3449 3455                   * because we create them by walking the metaslabs.
3450 3456                   */
3451 3457                  ASSERT3U(ent_offset + ent_len, <=,
3452 3458                      msp->ms_start + msp->ms_size);
3453 3459                  range_tree_add(msp->ms_allocatable, ent_offset, ent_len);
  
    | 
      ↓ open down ↓ | 
    76 lines elided | 
    
      ↑ open up ↑ | 
  
3454 3460          }
3455 3461  
3456 3462          if (!msp->ms_loaded)
3457 3463                  msp->ms_loaded = B_TRUE;
3458 3464          mutex_exit(&msp->ms_lock);
3459 3465  }
3460 3466  
3461 3467  static void
3462 3468  zdb_leak_init_prepare_indirect_vdevs(spa_t *spa, zdb_cb_t *zcb)
3463 3469  {
     3470 +        ASSERT(!dump_opt['L']);
     3471 +
3464 3472          vdev_t *rvd = spa->spa_root_vdev;
3465 3473          for (uint64_t c = 0; c < rvd->vdev_children; c++) {
3466 3474                  vdev_t *vd = rvd->vdev_child[c];
3467 3475  
3468 3476                  ASSERT3U(c, ==, vd->vdev_id);
3469 3477  
3470 3478                  if (vd->vdev_ops != &vdev_indirect_ops)
3471 3479                          continue;
3472 3480  
3473 3481                  /*
3474 3482                   * Note: we don't check for mapping leaks on
3475 3483                   * removing vdevs because their ms_allocatable's
3476 3484                   * are used to look for leaks in allocated space.
3477 3485                   */
3478 3486                  zcb->zcb_vd_obsolete_counts[c] = zdb_load_obsolete_counts(vd);
3479 3487  
3480 3488                  /*
3481 3489                   * Normally, indirect vdevs don't have any
3482 3490                   * metaslabs.  We want to set them up for
3483 3491                   * zio_claim().
3484 3492                   */
3485 3493                  VERIFY0(vdev_metaslab_init(vd, 0));
3486 3494  
3487 3495                  vdev_indirect_mapping_t *vim = vd->vdev_indirect_mapping;
3488 3496                  uint64_t vim_idx = 0;
3489 3497                  for (uint64_t m = 0; m < vd->vdev_ms_count; m++) {
3490 3498  
3491 3499                          (void) fprintf(stderr,
3492 3500                              "\rloading indirect vdev %llu, "
3493 3501                              "metaslab %llu of %llu ...",
3494 3502                              (longlong_t)vd->vdev_id,
3495 3503                              (longlong_t)vd->vdev_ms[m]->ms_id,
3496 3504                              (longlong_t)vd->vdev_ms_count);
3497 3505  
3498 3506                          load_indirect_ms_allocatable_tree(vd, vd->vdev_ms[m],
3499 3507                              &vim_idx);
  
    | 
      ↓ open down ↓ | 
    26 lines elided | 
    
      ↑ open up ↑ | 
  
3500 3508                  }
3501 3509                  ASSERT3U(vim_idx, ==, vdev_indirect_mapping_num_entries(vim));
3502 3510          }
3503 3511  }
3504 3512  
3505 3513  static void
3506 3514  zdb_leak_init(spa_t *spa, zdb_cb_t *zcb)
3507 3515  {
3508 3516          zcb->zcb_spa = spa;
3509 3517  
3510      -        if (!dump_opt['L']) {
3511      -                dsl_pool_t *dp = spa->spa_dsl_pool;
3512      -                vdev_t *rvd = spa->spa_root_vdev;
     3518 +        if (dump_opt['L'])
     3519 +                return;
3513 3520  
3514      -                /*
3515      -                 * We are going to be changing the meaning of the metaslab's
3516      -                 * ms_allocatable.  Ensure that the allocator doesn't try to
3517      -                 * use the tree.
3518      -                 */
3519      -                spa->spa_normal_class->mc_ops = &zdb_metaslab_ops;
3520      -                spa->spa_log_class->mc_ops = &zdb_metaslab_ops;
     3521 +        dsl_pool_t *dp = spa->spa_dsl_pool;
     3522 +        vdev_t *rvd = spa->spa_root_vdev;
3521 3523  
3522      -                zcb->zcb_vd_obsolete_counts =
3523      -                    umem_zalloc(rvd->vdev_children * sizeof (uint32_t *),
3524      -                    UMEM_NOFAIL);
     3524 +        /*
     3525 +         * We are going to be changing the meaning of the metaslab's
     3526 +         * ms_allocatable.  Ensure that the allocator doesn't try to
     3527 +         * use the tree.
     3528 +         */
     3529 +        spa->spa_normal_class->mc_ops = &zdb_metaslab_ops;
     3530 +        spa->spa_log_class->mc_ops = &zdb_metaslab_ops;
3525 3531  
3526      -                /*
3527      -                 * For leak detection, we overload the ms_allocatable trees
3528      -                 * to contain allocated segments instead of free segments.
3529      -                 * As a result, we can't use the normal metaslab_load/unload
3530      -                 * interfaces.
3531      -                 */
3532      -                zdb_leak_init_prepare_indirect_vdevs(spa, zcb);
3533      -                load_concrete_ms_allocatable_trees(spa, SM_ALLOC);
     3532 +        zcb->zcb_vd_obsolete_counts =
     3533 +            umem_zalloc(rvd->vdev_children * sizeof (uint32_t *),
     3534 +            UMEM_NOFAIL);
3534 3535  
3535      -                /*
3536      -                 * On load_concrete_ms_allocatable_trees() we loaded all the
3537      -                 * allocated entries from the ms_sm to the ms_allocatable for
3538      -                 * each metaslab. If the pool has a checkpoint or is in the
3539      -                 * middle of discarding a checkpoint, some of these blocks
3540      -                 * may have been freed but their ms_sm may not have been
3541      -                 * updated because they are referenced by the checkpoint. In
3542      -                 * order to avoid false-positives during leak-detection, we
3543      -                 * go through the vdev's checkpoint space map and exclude all
3544      -                 * its entries from their relevant ms_allocatable.
3545      -                 *
3546      -                 * We also aggregate the space held by the checkpoint and add
3547      -                 * it to zcb_checkpoint_size.
3548      -                 *
3549      -                 * Note that at this point we are also verifying that all the
3550      -                 * entries on the checkpoint_sm are marked as allocated in
3551      -                 * the ms_sm of their relevant metaslab.
3552      -                 * [see comment in checkpoint_sm_exclude_entry_cb()]
3553      -                 */
3554      -                zdb_leak_init_exclude_checkpoint(spa, zcb);
     3536 +        /*
     3537 +         * For leak detection, we overload the ms_allocatable trees
     3538 +         * to contain allocated segments instead of free segments.
     3539 +         * As a result, we can't use the normal metaslab_load/unload
     3540 +         * interfaces.
     3541 +         */
     3542 +        zdb_leak_init_prepare_indirect_vdevs(spa, zcb);
     3543 +        load_concrete_ms_allocatable_trees(spa, SM_ALLOC);
3555 3544  
3556      -                /* for cleaner progress output */
3557      -                (void) fprintf(stderr, "\n");
     3545 +        /*
     3546 +         * On load_concrete_ms_allocatable_trees() we loaded all the
     3547 +         * allocated entries from the ms_sm to the ms_allocatable for
     3548 +         * each metaslab. If the pool has a checkpoint or is in the
     3549 +         * middle of discarding a checkpoint, some of these blocks
     3550 +         * may have been freed but their ms_sm may not have been
     3551 +         * updated because they are referenced by the checkpoint. In
     3552 +         * order to avoid false-positives during leak-detection, we
     3553 +         * go through the vdev's checkpoint space map and exclude all
     3554 +         * its entries from their relevant ms_allocatable.
     3555 +         *
     3556 +         * We also aggregate the space held by the checkpoint and add
     3557 +         * it to zcb_checkpoint_size.
     3558 +         *
     3559 +         * Note that at this point we are also verifying that all the
     3560 +         * entries on the checkpoint_sm are marked as allocated in
     3561 +         * the ms_sm of their relevant metaslab.
     3562 +         * [see comment in checkpoint_sm_exclude_entry_cb()]
     3563 +         */
     3564 +        zdb_leak_init_exclude_checkpoint(spa, zcb);
     3565 +        ASSERT3U(zcb->zcb_checkpoint_size, ==, spa_get_checkpoint_space(spa));
3558 3566  
3559      -                if (bpobj_is_open(&dp->dp_obsolete_bpobj)) {
3560      -                        ASSERT(spa_feature_is_enabled(spa,
3561      -                            SPA_FEATURE_DEVICE_REMOVAL));
3562      -                        (void) bpobj_iterate_nofree(&dp->dp_obsolete_bpobj,
3563      -                            increment_indirect_mapping_cb, zcb, NULL);
3564      -                }
3565      -        } else {
3566      -                /*
3567      -                 * If leak tracing is disabled, we still need to consider
3568      -                 * any checkpointed space in our space verification.
3569      -                 */
3570      -                zcb->zcb_checkpoint_size += spa_get_checkpoint_space(spa);
     3567 +        /* for cleaner progress output */
     3568 +        (void) fprintf(stderr, "\n");
     3569 +
     3570 +        if (bpobj_is_open(&dp->dp_obsolete_bpobj)) {
     3571 +                ASSERT(spa_feature_is_enabled(spa,
     3572 +                    SPA_FEATURE_DEVICE_REMOVAL));
     3573 +                (void) bpobj_iterate_nofree(&dp->dp_obsolete_bpobj,
     3574 +                    increment_indirect_mapping_cb, zcb, NULL);
3571 3575          }
3572 3576  
3573 3577          spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER);
3574 3578          zdb_ddt_leak_init(spa, zcb);
3575 3579          spa_config_exit(spa, SCL_CONFIG, FTAG);
3576 3580  }
3577 3581  
3578 3582  static boolean_t
3579 3583  zdb_check_for_obsolete_leaks(vdev_t *vd, zdb_cb_t *zcb)
3580 3584  {
3581 3585          boolean_t leaks = B_FALSE;
3582 3586          vdev_indirect_mapping_t *vim = vd->vdev_indirect_mapping;
3583 3587          uint64_t total_leaked = 0;
3584 3588  
3585 3589          ASSERT(vim != NULL);
3586 3590  
3587 3591          for (uint64_t i = 0; i < vdev_indirect_mapping_num_entries(vim); i++) {
3588 3592                  vdev_indirect_mapping_entry_phys_t *vimep =
3589 3593                      &vim->vim_entries[i];
3590 3594                  uint64_t obsolete_bytes = 0;
3591 3595                  uint64_t offset = DVA_MAPPING_GET_SRC_OFFSET(vimep);
3592 3596                  metaslab_t *msp = vd->vdev_ms[offset >> vd->vdev_ms_shift];
3593 3597  
3594 3598                  /*
3595 3599                   * This is not very efficient but it's easy to
3596 3600                   * verify correctness.
3597 3601                   */
3598 3602                  for (uint64_t inner_offset = 0;
3599 3603                      inner_offset < DVA_GET_ASIZE(&vimep->vimep_dst);
3600 3604                      inner_offset += 1 << vd->vdev_ashift) {
3601 3605                          if (range_tree_contains(msp->ms_allocatable,
3602 3606                              offset + inner_offset, 1 << vd->vdev_ashift)) {
3603 3607                                  obsolete_bytes += 1 << vd->vdev_ashift;
3604 3608                          }
3605 3609                  }
3606 3610  
3607 3611                  int64_t bytes_leaked = obsolete_bytes -
3608 3612                      zcb->zcb_vd_obsolete_counts[vd->vdev_id][i];
3609 3613                  ASSERT3U(DVA_GET_ASIZE(&vimep->vimep_dst), >=,
3610 3614                      zcb->zcb_vd_obsolete_counts[vd->vdev_id][i]);
3611 3615                  if (bytes_leaked != 0 &&
3612 3616                      (vdev_obsolete_counts_are_precise(vd) ||
3613 3617                      dump_opt['d'] >= 5)) {
3614 3618                          (void) printf("obsolete indirect mapping count "
3615 3619                              "mismatch on %llu:%llx:%llx : %llx bytes leaked\n",
3616 3620                              (u_longlong_t)vd->vdev_id,
3617 3621                              (u_longlong_t)DVA_MAPPING_GET_SRC_OFFSET(vimep),
3618 3622                              (u_longlong_t)DVA_GET_ASIZE(&vimep->vimep_dst),
3619 3623                              (u_longlong_t)bytes_leaked);
3620 3624                  }
3621 3625                  total_leaked += ABS(bytes_leaked);
3622 3626          }
3623 3627  
3624 3628          if (!vdev_obsolete_counts_are_precise(vd) && total_leaked > 0) {
3625 3629                  int pct_leaked = total_leaked * 100 /
3626 3630                      vdev_indirect_mapping_bytes_mapped(vim);
3627 3631                  (void) printf("cannot verify obsolete indirect mapping "
3628 3632                      "counts of vdev %llu because precise feature was not "
3629 3633                      "enabled when it was removed: %d%% (%llx bytes) of mapping"
3630 3634                      "unreferenced\n",
3631 3635                      (u_longlong_t)vd->vdev_id, pct_leaked,
3632 3636                      (u_longlong_t)total_leaked);
3633 3637          } else if (total_leaked > 0) {
3634 3638                  (void) printf("obsolete indirect mapping count mismatch "
3635 3639                      "for vdev %llu -- %llx total bytes mismatched\n",
3636 3640                      (u_longlong_t)vd->vdev_id,
3637 3641                      (u_longlong_t)total_leaked);
3638 3642                  leaks |= B_TRUE;
3639 3643          }
3640 3644  
  
    | 
      ↓ open down ↓ | 
    60 lines elided | 
    
      ↑ open up ↑ | 
  
3641 3645          vdev_indirect_mapping_free_obsolete_counts(vim,
3642 3646              zcb->zcb_vd_obsolete_counts[vd->vdev_id]);
3643 3647          zcb->zcb_vd_obsolete_counts[vd->vdev_id] = NULL;
3644 3648  
3645 3649          return (leaks);
3646 3650  }
3647 3651  
3648 3652  static boolean_t
3649 3653  zdb_leak_fini(spa_t *spa, zdb_cb_t *zcb)
3650 3654  {
     3655 +        if (dump_opt['L'])
     3656 +                return (B_FALSE);
     3657 +
3651 3658          boolean_t leaks = B_FALSE;
3652      -        if (!dump_opt['L']) {
3653      -                vdev_t *rvd = spa->spa_root_vdev;
3654      -                for (unsigned c = 0; c < rvd->vdev_children; c++) {
3655      -                        vdev_t *vd = rvd->vdev_child[c];
3656      -                        metaslab_group_t *mg = vd->vdev_mg;
3657 3659  
3658      -                        if (zcb->zcb_vd_obsolete_counts[c] != NULL) {
3659      -                                leaks |= zdb_check_for_obsolete_leaks(vd, zcb);
3660      -                        }
     3660 +        vdev_t *rvd = spa->spa_root_vdev;
     3661 +        for (unsigned c = 0; c < rvd->vdev_children; c++) {
     3662 +                vdev_t *vd = rvd->vdev_child[c];
     3663 +#if DEBUG
     3664 +                metaslab_group_t *mg = vd->vdev_mg;
     3665 +#endif
3661 3666  
3662      -                        for (uint64_t m = 0; m < vd->vdev_ms_count; m++) {
3663      -                                metaslab_t *msp = vd->vdev_ms[m];
3664      -                                ASSERT3P(mg, ==, msp->ms_group);
     3667 +                if (zcb->zcb_vd_obsolete_counts[c] != NULL) {
     3668 +                        leaks |= zdb_check_for_obsolete_leaks(vd, zcb);
     3669 +                }
3665 3670  
3666      -                                /*
3667      -                                 * ms_allocatable has been overloaded
3668      -                                 * to contain allocated segments. Now that
3669      -                                 * we finished traversing all blocks, any
3670      -                                 * block that remains in the ms_allocatable
3671      -                                 * represents an allocated block that we
3672      -                                 * did not claim during the traversal.
3673      -                                 * Claimed blocks would have been removed
3674      -                                 * from the ms_allocatable.  For indirect
3675      -                                 * vdevs, space remaining in the tree
3676      -                                 * represents parts of the mapping that are
3677      -                                 * not referenced, which is not a bug.
3678      -                                 */
3679      -                                if (vd->vdev_ops == &vdev_indirect_ops) {
3680      -                                        range_tree_vacate(msp->ms_allocatable,
3681      -                                            NULL, NULL);
3682      -                                } else {
3683      -                                        range_tree_vacate(msp->ms_allocatable,
3684      -                                            zdb_leak, vd);
3685      -                                }
     3671 +                for (uint64_t m = 0; m < vd->vdev_ms_count; m++) {
     3672 +                        metaslab_t *msp = vd->vdev_ms[m];
     3673 +                        ASSERT3P(mg, ==, msp->ms_group);
3686 3674  
3687      -                                if (msp->ms_loaded) {
3688      -                                        msp->ms_loaded = B_FALSE;
3689      -                                }
     3675 +                        /*
     3676 +                         * ms_allocatable has been overloaded
     3677 +                         * to contain allocated segments. Now that
     3678 +                         * we finished traversing all blocks, any
     3679 +                         * block that remains in the ms_allocatable
     3680 +                         * represents an allocated block that we
     3681 +                         * did not claim during the traversal.
     3682 +                         * Claimed blocks would have been removed
     3683 +                         * from the ms_allocatable.  For indirect
     3684 +                         * vdevs, space remaining in the tree
     3685 +                         * represents parts of the mapping that are
     3686 +                         * not referenced, which is not a bug.
     3687 +                         */
     3688 +                        if (vd->vdev_ops == &vdev_indirect_ops) {
     3689 +                                range_tree_vacate(msp->ms_allocatable,
     3690 +                                    NULL, NULL);
     3691 +                        } else {
     3692 +                                range_tree_vacate(msp->ms_allocatable,
     3693 +                                    zdb_leak, vd);
3690 3694                          }
     3695 +
     3696 +                        if (msp->ms_loaded) {
     3697 +                                msp->ms_loaded = B_FALSE;
     3698 +                        }
3691 3699                  }
3692 3700  
3693      -                umem_free(zcb->zcb_vd_obsolete_counts,
3694      -                    rvd->vdev_children * sizeof (uint32_t *));
3695      -                zcb->zcb_vd_obsolete_counts = NULL;
3696 3701          }
     3702 +
     3703 +        umem_free(zcb->zcb_vd_obsolete_counts,
     3704 +            rvd->vdev_children * sizeof (uint32_t *));
     3705 +        zcb->zcb_vd_obsolete_counts = NULL;
     3706 +
3697 3707          return (leaks);
3698 3708  }
3699 3709  
3700 3710  /* ARGSUSED */
3701 3711  static int
3702 3712  count_block_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx)
3703 3713  {
3704 3714          zdb_cb_t *zcb = arg;
3705 3715  
3706 3716          if (dump_opt['b'] >= 5) {
3707 3717                  char blkbuf[BP_SPRINTF_LEN];
3708 3718                  snprintf_blkptr(blkbuf, sizeof (blkbuf), bp);
3709 3719                  (void) printf("[%s] %s\n",
3710 3720                      "deferred free", blkbuf);
3711 3721          }
3712 3722          zdb_count_block(zcb, NULL, bp, ZDB_OT_DEFERRED);
3713 3723          return (0);
3714 3724  }
3715 3725  
3716 3726  static int
3717 3727  dump_block_stats(spa_t *spa)
3718 3728  {
3719 3729          zdb_cb_t zcb;
3720 3730          zdb_blkstats_t *zb, *tzb;
3721 3731          uint64_t norm_alloc, norm_space, total_alloc, total_found;
3722 3732          int flags = TRAVERSE_PRE | TRAVERSE_PREFETCH_METADATA | TRAVERSE_HARD;
3723 3733          boolean_t leaks = B_FALSE;
3724 3734          int err;
  
    | 
      ↓ open down ↓ | 
    18 lines elided | 
    
      ↑ open up ↑ | 
  
3725 3735  
3726 3736          bzero(&zcb, sizeof (zcb));
3727 3737          (void) printf("\nTraversing all blocks %s%s%s%s%s...\n\n",
3728 3738              (dump_opt['c'] || !dump_opt['L']) ? "to verify " : "",
3729 3739              (dump_opt['c'] == 1) ? "metadata " : "",
3730 3740              dump_opt['c'] ? "checksums " : "",
3731 3741              (dump_opt['c'] && !dump_opt['L']) ? "and verify " : "",
3732 3742              !dump_opt['L'] ? "nothing leaked " : "");
3733 3743  
3734 3744          /*
3735      -         * Load all space maps as SM_ALLOC maps, then traverse the pool
3736      -         * claiming each block we discover.  If the pool is perfectly
3737      -         * consistent, the space maps will be empty when we're done.
3738      -         * Anything left over is a leak; any block we can't claim (because
3739      -         * it's not part of any space map) is a double allocation,
3740      -         * reference to a freed block, or an unclaimed log block.
     3745 +         * When leak detection is enabled we load all space maps as SM_ALLOC
     3746 +         * maps, then traverse the pool claiming each block we discover. If
     3747 +         * the pool is perfectly consistent, the segment trees will be empty
     3748 +         * when we're done. Anything left over is a leak; any block we can't
     3749 +         * claim (because it's not part of any space map) is a double
     3750 +         * allocation, reference to a freed block, or an unclaimed log block.
     3751 +         *
     3752 +         * When leak detection is disabled (-L option) we still traverse the
     3753 +         * pool claiming each block we discover, but we skip opening any space
     3754 +         * maps.
3741 3755           */
     3756 +        bzero(&zcb, sizeof (zdb_cb_t));
3742 3757          zdb_leak_init(spa, &zcb);
3743 3758  
3744 3759          /*
3745 3760           * If there's a deferred-free bplist, process that first.
3746 3761           */
3747 3762          (void) bpobj_iterate_nofree(&spa->spa_deferred_bpobj,
3748 3763              count_block_cb, &zcb, NULL);
3749 3764  
3750 3765          if (spa_version(spa) >= SPA_VERSION_DEADLISTS) {
3751 3766                  (void) bpobj_iterate_nofree(&spa->spa_dsl_pool->dp_free_bpobj,
3752 3767                      count_block_cb, &zcb, NULL);
3753 3768          }
3754 3769  
3755 3770          zdb_claim_removing(spa, &zcb);
3756 3771  
3757 3772          if (spa_feature_is_active(spa, SPA_FEATURE_ASYNC_DESTROY)) {
3758 3773                  VERIFY3U(0, ==, bptree_iterate(spa->spa_meta_objset,
3759 3774                      spa->spa_dsl_pool->dp_bptree_obj, B_FALSE, count_block_cb,
3760 3775                      &zcb, NULL));
3761 3776          }
3762 3777  
3763 3778          if (dump_opt['c'] > 1)
3764 3779                  flags |= TRAVERSE_PREFETCH_DATA;
3765 3780  
3766 3781          zcb.zcb_totalasize = metaslab_class_get_alloc(spa_normal_class(spa));
3767 3782          zcb.zcb_totalasize += metaslab_class_get_alloc(spa_special_class(spa));
3768 3783          zcb.zcb_totalasize += metaslab_class_get_alloc(spa_dedup_class(spa));
3769 3784          zcb.zcb_start = zcb.zcb_lastprint = gethrtime();
3770 3785          err = traverse_pool(spa, 0, flags, zdb_blkptr_cb, &zcb);
3771 3786  
3772 3787          /*
3773 3788           * If we've traversed the data blocks then we need to wait for those
3774 3789           * I/Os to complete. We leverage "The Godfather" zio to wait on
3775 3790           * all async I/Os to complete.
3776 3791           */
3777 3792          if (dump_opt['c']) {
3778 3793                  for (int i = 0; i < max_ncpus; i++) {
3779 3794                          (void) zio_wait(spa->spa_async_zio_root[i]);
3780 3795                          spa->spa_async_zio_root[i] = zio_root(spa, NULL, NULL,
3781 3796                              ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE |
3782 3797                              ZIO_FLAG_GODFATHER);
3783 3798                  }
3784 3799          }
3785 3800  
3786 3801          /*
3787 3802           * Done after zio_wait() since zcb_haderrors is modified in
3788 3803           * zdb_blkptr_done()
3789 3804           */
3790 3805          zcb.zcb_haderrors |= err;
3791 3806  
3792 3807          if (zcb.zcb_haderrors) {
3793 3808                  (void) printf("\nError counts:\n\n");
3794 3809                  (void) printf("\t%5s  %s\n", "errno", "count");
3795 3810                  for (int e = 0; e < 256; e++) {
3796 3811                          if (zcb.zcb_errors[e] != 0) {
3797 3812                                  (void) printf("\t%5d  %llu\n",
3798 3813                                      e, (u_longlong_t)zcb.zcb_errors[e]);
3799 3814                          }
3800 3815                  }
3801 3816          }
3802 3817  
3803 3818          /*
3804 3819           * Report any leaked segments.
3805 3820           */
3806 3821          leaks |= zdb_leak_fini(spa, &zcb);
3807 3822  
3808 3823          tzb = &zcb.zcb_type[ZB_TOTAL][ZDB_OT_TOTAL];
3809 3824  
  
    | 
      ↓ open down ↓ | 
    58 lines elided | 
    
      ↑ open up ↑ | 
  
3810 3825          norm_alloc = metaslab_class_get_alloc(spa_normal_class(spa));
3811 3826          norm_space = metaslab_class_get_space(spa_normal_class(spa));
3812 3827  
3813 3828          total_alloc = norm_alloc +
3814 3829              metaslab_class_get_alloc(spa_log_class(spa)) +
3815 3830              metaslab_class_get_alloc(spa_special_class(spa)) +
3816 3831              metaslab_class_get_alloc(spa_dedup_class(spa));
3817 3832          total_found = tzb->zb_asize - zcb.zcb_dedup_asize +
3818 3833              zcb.zcb_removing_size + zcb.zcb_checkpoint_size;
3819 3834  
3820      -        if (total_found == total_alloc) {
3821      -                if (!dump_opt['L'])
3822      -                        (void) printf("\n\tNo leaks (block sum matches space"
3823      -                            " maps exactly)\n");
3824      -        } else {
     3835 +        if (total_found == total_alloc && !dump_opt['L']) {
     3836 +                (void) printf("\n\tNo leaks (block sum matches space"
     3837 +                    " maps exactly)\n");
     3838 +        } else if (!dump_opt['L']) {
3825 3839                  (void) printf("block traversal size %llu != alloc %llu "
3826 3840                      "(%s %lld)\n",
3827 3841                      (u_longlong_t)total_found,
3828 3842                      (u_longlong_t)total_alloc,
3829 3843                      (dump_opt['L']) ? "unreachable" : "leaked",
3830 3844                      (longlong_t)(total_alloc - total_found));
3831 3845                  leaks = B_TRUE;
3832 3846          }
3833 3847  
3834 3848          if (tzb->zb_count == 0)
3835 3849                  return (2);
3836 3850  
3837 3851          (void) printf("\n");
3838 3852          (void) printf("\t%-16s %14llu\n", "bp count:",
3839 3853              (u_longlong_t)tzb->zb_count);
3840 3854          (void) printf("\t%-16s %14llu\n", "ganged count:",
3841 3855              (longlong_t)tzb->zb_gangs);
3842 3856          (void) printf("\t%-16s %14llu      avg: %6llu\n", "bp logical:",
3843 3857              (u_longlong_t)tzb->zb_lsize,
3844 3858              (u_longlong_t)(tzb->zb_lsize / tzb->zb_count));
3845 3859          (void) printf("\t%-16s %14llu      avg: %6llu     compression: %6.2f\n",
3846 3860              "bp physical:", (u_longlong_t)tzb->zb_psize,
3847 3861              (u_longlong_t)(tzb->zb_psize / tzb->zb_count),
3848 3862              (double)tzb->zb_lsize / tzb->zb_psize);
3849 3863          (void) printf("\t%-16s %14llu      avg: %6llu     compression: %6.2f\n",
3850 3864              "bp allocated:", (u_longlong_t)tzb->zb_asize,
3851 3865              (u_longlong_t)(tzb->zb_asize / tzb->zb_count),
3852 3866              (double)tzb->zb_lsize / tzb->zb_asize);
3853 3867          (void) printf("\t%-16s %14llu    ref>1: %6llu   deduplication: %6.2f\n",
3854 3868              "bp deduped:", (u_longlong_t)zcb.zcb_dedup_asize,
3855 3869              (u_longlong_t)zcb.zcb_dedup_blocks,
3856 3870              (double)zcb.zcb_dedup_asize / tzb->zb_asize + 1.0);
3857 3871          (void) printf("\t%-16s %14llu     used: %5.2f%%\n", "Normal class:",
3858 3872              (u_longlong_t)norm_alloc, 100.0 * norm_alloc / norm_space);
3859 3873  
3860 3874          if (spa_special_class(spa)->mc_rotor != NULL) {
3861 3875                  uint64_t alloc = metaslab_class_get_alloc(
3862 3876                      spa_special_class(spa));
3863 3877                  uint64_t space = metaslab_class_get_space(
3864 3878                      spa_special_class(spa));
3865 3879  
3866 3880                  (void) printf("\t%-16s %14llu     used: %5.2f%%\n",
3867 3881                      "Special class", (u_longlong_t)alloc,
3868 3882                      100.0 * alloc / space);
3869 3883          }
3870 3884  
3871 3885          if (spa_dedup_class(spa)->mc_rotor != NULL) {
3872 3886                  uint64_t alloc = metaslab_class_get_alloc(
3873 3887                      spa_dedup_class(spa));
3874 3888                  uint64_t space = metaslab_class_get_space(
3875 3889                      spa_dedup_class(spa));
3876 3890  
3877 3891                  (void) printf("\t%-16s %14llu     used: %5.2f%%\n",
3878 3892                      "Dedup class", (u_longlong_t)alloc,
3879 3893                      100.0 * alloc / space);
3880 3894          }
3881 3895  
3882 3896          for (bp_embedded_type_t i = 0; i < NUM_BP_EMBEDDED_TYPES; i++) {
3883 3897                  if (zcb.zcb_embedded_blocks[i] == 0)
3884 3898                          continue;
3885 3899                  (void) printf("\n");
3886 3900                  (void) printf("\tadditional, non-pointer bps of type %u: "
3887 3901                      "%10llu\n",
3888 3902                      i, (u_longlong_t)zcb.zcb_embedded_blocks[i]);
3889 3903  
3890 3904                  if (dump_opt['b'] >= 3) {
3891 3905                          (void) printf("\t number of (compressed) bytes:  "
3892 3906                              "number of bps\n");
3893 3907                          dump_histogram(zcb.zcb_embedded_histogram[i],
3894 3908                              sizeof (zcb.zcb_embedded_histogram[i]) /
3895 3909                              sizeof (zcb.zcb_embedded_histogram[i][0]), 0);
3896 3910                  }
3897 3911          }
3898 3912  
3899 3913          if (tzb->zb_ditto_samevdev != 0) {
3900 3914                  (void) printf("\tDittoed blocks on same vdev: %llu\n",
3901 3915                      (longlong_t)tzb->zb_ditto_samevdev);
3902 3916          }
3903 3917          if (tzb->zb_ditto_same_ms != 0) {
3904 3918                  (void) printf("\tDittoed blocks in same metaslab: %llu\n",
3905 3919                      (longlong_t)tzb->zb_ditto_same_ms);
3906 3920          }
3907 3921  
3908 3922          for (uint64_t v = 0; v < spa->spa_root_vdev->vdev_children; v++) {
3909 3923                  vdev_t *vd = spa->spa_root_vdev->vdev_child[v];
3910 3924                  vdev_indirect_mapping_t *vim = vd->vdev_indirect_mapping;
3911 3925  
3912 3926                  if (vim == NULL) {
3913 3927                          continue;
3914 3928                  }
3915 3929  
3916 3930                  char mem[32];
3917 3931                  zdb_nicenum(vdev_indirect_mapping_num_entries(vim),
3918 3932                      mem, vdev_indirect_mapping_size(vim));
3919 3933  
3920 3934                  (void) printf("\tindirect vdev id %llu has %llu segments "
3921 3935                      "(%s in memory)\n",
3922 3936                      (longlong_t)vd->vdev_id,
3923 3937                      (longlong_t)vdev_indirect_mapping_num_entries(vim), mem);
3924 3938          }
3925 3939  
3926 3940          if (dump_opt['b'] >= 2) {
3927 3941                  int l, t, level;
3928 3942                  (void) printf("\nBlocks\tLSIZE\tPSIZE\tASIZE"
3929 3943                      "\t  avg\t comp\t%%Total\tType\n");
3930 3944  
3931 3945                  for (t = 0; t <= ZDB_OT_TOTAL; t++) {
3932 3946                          char csize[32], lsize[32], psize[32], asize[32];
3933 3947                          char avg[32], gang[32];
3934 3948                          const char *typename;
3935 3949  
3936 3950                          /* make sure nicenum has enough space */
3937 3951                          CTASSERT(sizeof (csize) >= NN_NUMBUF_SZ);
3938 3952                          CTASSERT(sizeof (lsize) >= NN_NUMBUF_SZ);
3939 3953                          CTASSERT(sizeof (psize) >= NN_NUMBUF_SZ);
3940 3954                          CTASSERT(sizeof (asize) >= NN_NUMBUF_SZ);
3941 3955                          CTASSERT(sizeof (avg) >= NN_NUMBUF_SZ);
3942 3956                          CTASSERT(sizeof (gang) >= NN_NUMBUF_SZ);
3943 3957  
3944 3958                          if (t < DMU_OT_NUMTYPES)
3945 3959                                  typename = dmu_ot[t].ot_name;
3946 3960                          else
3947 3961                                  typename = zdb_ot_extname[t - DMU_OT_NUMTYPES];
3948 3962  
3949 3963                          if (zcb.zcb_type[ZB_TOTAL][t].zb_asize == 0) {
3950 3964                                  (void) printf("%6s\t%5s\t%5s\t%5s"
3951 3965                                      "\t%5s\t%5s\t%6s\t%s\n",
3952 3966                                      "-",
3953 3967                                      "-",
3954 3968                                      "-",
3955 3969                                      "-",
3956 3970                                      "-",
3957 3971                                      "-",
3958 3972                                      "-",
3959 3973                                      typename);
3960 3974                                  continue;
3961 3975                          }
3962 3976  
3963 3977                          for (l = ZB_TOTAL - 1; l >= -1; l--) {
3964 3978                                  level = (l == -1 ? ZB_TOTAL : l);
3965 3979                                  zb = &zcb.zcb_type[level][t];
3966 3980  
3967 3981                                  if (zb->zb_asize == 0)
3968 3982                                          continue;
3969 3983  
3970 3984                                  if (dump_opt['b'] < 3 && level != ZB_TOTAL)
3971 3985                                          continue;
3972 3986  
3973 3987                                  if (level == 0 && zb->zb_asize ==
3974 3988                                      zcb.zcb_type[ZB_TOTAL][t].zb_asize)
3975 3989                                          continue;
3976 3990  
3977 3991                                  zdb_nicenum(zb->zb_count, csize,
3978 3992                                      sizeof (csize));
3979 3993                                  zdb_nicenum(zb->zb_lsize, lsize,
3980 3994                                      sizeof (lsize));
3981 3995                                  zdb_nicenum(zb->zb_psize, psize,
3982 3996                                      sizeof (psize));
3983 3997                                  zdb_nicenum(zb->zb_asize, asize,
3984 3998                                      sizeof (asize));
3985 3999                                  zdb_nicenum(zb->zb_asize / zb->zb_count, avg,
3986 4000                                      sizeof (avg));
3987 4001                                  zdb_nicenum(zb->zb_gangs, gang, sizeof (gang));
3988 4002  
3989 4003                                  (void) printf("%6s\t%5s\t%5s\t%5s\t%5s"
3990 4004                                      "\t%5.2f\t%6.2f\t",
3991 4005                                      csize, lsize, psize, asize, avg,
3992 4006                                      (double)zb->zb_lsize / zb->zb_psize,
3993 4007                                      100.0 * zb->zb_asize / tzb->zb_asize);
3994 4008  
3995 4009                                  if (level == ZB_TOTAL)
3996 4010                                          (void) printf("%s\n", typename);
3997 4011                                  else
3998 4012                                          (void) printf("    L%d %s\n",
3999 4013                                              level, typename);
4000 4014  
4001 4015                                  if (dump_opt['b'] >= 3 && zb->zb_gangs > 0) {
4002 4016                                          (void) printf("\t number of ganged "
4003 4017                                              "blocks: %s\n", gang);
4004 4018                                  }
4005 4019  
4006 4020                                  if (dump_opt['b'] >= 4) {
4007 4021                                          (void) printf("psize "
4008 4022                                              "(in 512-byte sectors): "
4009 4023                                              "number of blocks\n");
4010 4024                                          dump_histogram(zb->zb_psize_histogram,
4011 4025                                              PSIZE_HISTO_SIZE, 0);
4012 4026                                  }
4013 4027                          }
4014 4028                  }
4015 4029          }
4016 4030  
4017 4031          (void) printf("\n");
4018 4032  
4019 4033          if (leaks)
4020 4034                  return (2);
4021 4035  
4022 4036          if (zcb.zcb_haderrors)
4023 4037                  return (3);
4024 4038  
4025 4039          return (0);
4026 4040  }
4027 4041  
4028 4042  typedef struct zdb_ddt_entry {
4029 4043          ddt_key_t       zdde_key;
4030 4044          uint64_t        zdde_ref_blocks;
4031 4045          uint64_t        zdde_ref_lsize;
4032 4046          uint64_t        zdde_ref_psize;
4033 4047          uint64_t        zdde_ref_dsize;
4034 4048          avl_node_t      zdde_node;
4035 4049  } zdb_ddt_entry_t;
4036 4050  
4037 4051  /* ARGSUSED */
4038 4052  static int
4039 4053  zdb_ddt_add_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
4040 4054      const zbookmark_phys_t *zb, const dnode_phys_t *dnp, void *arg)
4041 4055  {
4042 4056          avl_tree_t *t = arg;
4043 4057          avl_index_t where;
4044 4058          zdb_ddt_entry_t *zdde, zdde_search;
4045 4059  
4046 4060          if (bp == NULL || BP_IS_HOLE(bp) || BP_IS_EMBEDDED(bp))
4047 4061                  return (0);
4048 4062  
4049 4063          if (dump_opt['S'] > 1 && zb->zb_level == ZB_ROOT_LEVEL) {
4050 4064                  (void) printf("traversing objset %llu, %llu objects, "
4051 4065                      "%lu blocks so far\n",
4052 4066                      (u_longlong_t)zb->zb_objset,
4053 4067                      (u_longlong_t)BP_GET_FILL(bp),
4054 4068                      avl_numnodes(t));
4055 4069          }
4056 4070  
4057 4071          if (BP_IS_HOLE(bp) || BP_GET_CHECKSUM(bp) == ZIO_CHECKSUM_OFF ||
4058 4072              BP_GET_LEVEL(bp) > 0 || DMU_OT_IS_METADATA(BP_GET_TYPE(bp)))
4059 4073                  return (0);
4060 4074  
4061 4075          ddt_key_fill(&zdde_search.zdde_key, bp);
4062 4076  
4063 4077          zdde = avl_find(t, &zdde_search, &where);
4064 4078  
4065 4079          if (zdde == NULL) {
4066 4080                  zdde = umem_zalloc(sizeof (*zdde), UMEM_NOFAIL);
4067 4081                  zdde->zdde_key = zdde_search.zdde_key;
4068 4082                  avl_insert(t, zdde, where);
4069 4083          }
4070 4084  
4071 4085          zdde->zdde_ref_blocks += 1;
4072 4086          zdde->zdde_ref_lsize += BP_GET_LSIZE(bp);
4073 4087          zdde->zdde_ref_psize += BP_GET_PSIZE(bp);
4074 4088          zdde->zdde_ref_dsize += bp_get_dsize_sync(spa, bp);
4075 4089  
4076 4090          return (0);
4077 4091  }
4078 4092  
4079 4093  static void
4080 4094  dump_simulated_ddt(spa_t *spa)
4081 4095  {
4082 4096          avl_tree_t t;
4083 4097          void *cookie = NULL;
4084 4098          zdb_ddt_entry_t *zdde;
4085 4099          ddt_histogram_t ddh_total;
4086 4100          ddt_stat_t dds_total;
4087 4101  
4088 4102          bzero(&ddh_total, sizeof (ddh_total));
4089 4103          bzero(&dds_total, sizeof (dds_total));
4090 4104          avl_create(&t, ddt_entry_compare,
4091 4105              sizeof (zdb_ddt_entry_t), offsetof(zdb_ddt_entry_t, zdde_node));
4092 4106  
4093 4107          spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER);
4094 4108  
4095 4109          (void) traverse_pool(spa, 0, TRAVERSE_PRE | TRAVERSE_PREFETCH_METADATA,
4096 4110              zdb_ddt_add_cb, &t);
4097 4111  
4098 4112          spa_config_exit(spa, SCL_CONFIG, FTAG);
4099 4113  
4100 4114          while ((zdde = avl_destroy_nodes(&t, &cookie)) != NULL) {
4101 4115                  ddt_stat_t dds;
4102 4116                  uint64_t refcnt = zdde->zdde_ref_blocks;
4103 4117                  ASSERT(refcnt != 0);
4104 4118  
4105 4119                  dds.dds_blocks = zdde->zdde_ref_blocks / refcnt;
4106 4120                  dds.dds_lsize = zdde->zdde_ref_lsize / refcnt;
4107 4121                  dds.dds_psize = zdde->zdde_ref_psize / refcnt;
4108 4122                  dds.dds_dsize = zdde->zdde_ref_dsize / refcnt;
4109 4123  
4110 4124                  dds.dds_ref_blocks = zdde->zdde_ref_blocks;
4111 4125                  dds.dds_ref_lsize = zdde->zdde_ref_lsize;
4112 4126                  dds.dds_ref_psize = zdde->zdde_ref_psize;
4113 4127                  dds.dds_ref_dsize = zdde->zdde_ref_dsize;
4114 4128  
4115 4129                  ddt_stat_add(&ddh_total.ddh_stat[highbit64(refcnt) - 1],
4116 4130                      &dds, 0);
4117 4131  
4118 4132                  umem_free(zdde, sizeof (*zdde));
4119 4133          }
4120 4134  
4121 4135          avl_destroy(&t);
4122 4136  
4123 4137          ddt_histogram_stat(&dds_total, &ddh_total);
4124 4138  
4125 4139          (void) printf("Simulated DDT histogram:\n");
4126 4140  
4127 4141          zpool_dump_ddt(&dds_total, &ddh_total);
4128 4142  
4129 4143          dump_dedup_ratio(&dds_total);
4130 4144  }
4131 4145  
4132 4146  static int
4133 4147  verify_device_removal_feature_counts(spa_t *spa)
4134 4148  {
4135 4149          uint64_t dr_feature_refcount = 0;
4136 4150          uint64_t oc_feature_refcount = 0;
4137 4151          uint64_t indirect_vdev_count = 0;
4138 4152          uint64_t precise_vdev_count = 0;
4139 4153          uint64_t obsolete_counts_object_count = 0;
4140 4154          uint64_t obsolete_sm_count = 0;
4141 4155          uint64_t obsolete_counts_count = 0;
4142 4156          uint64_t scip_count = 0;
4143 4157          uint64_t obsolete_bpobj_count = 0;
4144 4158          int ret = 0;
4145 4159  
4146 4160          spa_condensing_indirect_phys_t *scip =
4147 4161              &spa->spa_condensing_indirect_phys;
4148 4162          if (scip->scip_next_mapping_object != 0) {
4149 4163                  vdev_t *vd = spa->spa_root_vdev->vdev_child[scip->scip_vdev];
4150 4164                  ASSERT(scip->scip_prev_obsolete_sm_object != 0);
4151 4165                  ASSERT3P(vd->vdev_ops, ==, &vdev_indirect_ops);
4152 4166  
4153 4167                  (void) printf("Condensing indirect vdev %llu: new mapping "
  
    | 
      ↓ open down ↓ | 
    319 lines elided | 
    
      ↑ open up ↑ | 
  
4154 4168                      "object %llu, prev obsolete sm %llu\n",
4155 4169                      (u_longlong_t)scip->scip_vdev,
4156 4170                      (u_longlong_t)scip->scip_next_mapping_object,
4157 4171                      (u_longlong_t)scip->scip_prev_obsolete_sm_object);
4158 4172                  if (scip->scip_prev_obsolete_sm_object != 0) {
4159 4173                          space_map_t *prev_obsolete_sm = NULL;
4160 4174                          VERIFY0(space_map_open(&prev_obsolete_sm,
4161 4175                              spa->spa_meta_objset,
4162 4176                              scip->scip_prev_obsolete_sm_object,
4163 4177                              0, vd->vdev_asize, 0));
4164      -                        space_map_update(prev_obsolete_sm);
4165 4178                          dump_spacemap(spa->spa_meta_objset, prev_obsolete_sm);
4166 4179                          (void) printf("\n");
4167 4180                          space_map_close(prev_obsolete_sm);
4168 4181                  }
4169 4182  
4170 4183                  scip_count += 2;
4171 4184          }
4172 4185  
4173 4186          for (uint64_t i = 0; i < spa->spa_root_vdev->vdev_children; i++) {
4174 4187                  vdev_t *vd = spa->spa_root_vdev->vdev_child[i];
4175 4188                  vdev_indirect_config_t *vic = &vd->vdev_indirect_config;
4176 4189  
4177 4190                  if (vic->vic_mapping_object != 0) {
4178 4191                          ASSERT(vd->vdev_ops == &vdev_indirect_ops ||
4179 4192                              vd->vdev_removing);
4180 4193                          indirect_vdev_count++;
4181 4194  
4182 4195                          if (vd->vdev_indirect_mapping->vim_havecounts) {
4183 4196                                  obsolete_counts_count++;
4184 4197                          }
4185 4198                  }
4186 4199                  if (vdev_obsolete_counts_are_precise(vd)) {
4187 4200                          ASSERT(vic->vic_mapping_object != 0);
4188 4201                          precise_vdev_count++;
4189 4202                  }
4190 4203                  if (vdev_obsolete_sm_object(vd) != 0) {
4191 4204                          ASSERT(vic->vic_mapping_object != 0);
4192 4205                          obsolete_sm_count++;
4193 4206                  }
4194 4207          }
4195 4208  
4196 4209          (void) feature_get_refcount(spa,
4197 4210              &spa_feature_table[SPA_FEATURE_DEVICE_REMOVAL],
4198 4211              &dr_feature_refcount);
4199 4212          (void) feature_get_refcount(spa,
4200 4213              &spa_feature_table[SPA_FEATURE_OBSOLETE_COUNTS],
4201 4214              &oc_feature_refcount);
4202 4215  
4203 4216          if (dr_feature_refcount != indirect_vdev_count) {
4204 4217                  ret = 1;
4205 4218                  (void) printf("Number of indirect vdevs (%llu) " \
4206 4219                      "does not match feature count (%llu)\n",
4207 4220                      (u_longlong_t)indirect_vdev_count,
4208 4221                      (u_longlong_t)dr_feature_refcount);
4209 4222          } else {
4210 4223                  (void) printf("Verified device_removal feature refcount " \
4211 4224                      "of %llu is correct\n",
4212 4225                      (u_longlong_t)dr_feature_refcount);
4213 4226          }
4214 4227  
4215 4228          if (zap_contains(spa_meta_objset(spa), DMU_POOL_DIRECTORY_OBJECT,
4216 4229              DMU_POOL_OBSOLETE_BPOBJ) == 0) {
4217 4230                  obsolete_bpobj_count++;
4218 4231          }
4219 4232  
4220 4233  
4221 4234          obsolete_counts_object_count = precise_vdev_count;
4222 4235          obsolete_counts_object_count += obsolete_sm_count;
4223 4236          obsolete_counts_object_count += obsolete_counts_count;
4224 4237          obsolete_counts_object_count += scip_count;
4225 4238          obsolete_counts_object_count += obsolete_bpobj_count;
4226 4239          obsolete_counts_object_count += remap_deadlist_count;
4227 4240  
4228 4241          if (oc_feature_refcount != obsolete_counts_object_count) {
4229 4242                  ret = 1;
4230 4243                  (void) printf("Number of obsolete counts objects (%llu) " \
4231 4244                      "does not match feature count (%llu)\n",
4232 4245                      (u_longlong_t)obsolete_counts_object_count,
4233 4246                      (u_longlong_t)oc_feature_refcount);
4234 4247                  (void) printf("pv:%llu os:%llu oc:%llu sc:%llu "
4235 4248                      "ob:%llu rd:%llu\n",
4236 4249                      (u_longlong_t)precise_vdev_count,
4237 4250                      (u_longlong_t)obsolete_sm_count,
4238 4251                      (u_longlong_t)obsolete_counts_count,
4239 4252                      (u_longlong_t)scip_count,
4240 4253                      (u_longlong_t)obsolete_bpobj_count,
4241 4254                      (u_longlong_t)remap_deadlist_count);
4242 4255          } else {
4243 4256                  (void) printf("Verified indirect_refcount feature refcount " \
4244 4257                      "of %llu is correct\n",
4245 4258                      (u_longlong_t)oc_feature_refcount);
4246 4259          }
4247 4260          return (ret);
4248 4261  }
4249 4262  
4250 4263  static void
4251 4264  zdb_set_skip_mmp(char *target)
4252 4265  {
4253 4266          spa_t *spa;
4254 4267  
4255 4268          /*
4256 4269           * Disable the activity check to allow examination of
4257 4270           * active pools.
4258 4271           */
4259 4272          mutex_enter(&spa_namespace_lock);
4260 4273          if ((spa = spa_lookup(target)) != NULL) {
4261 4274                  spa->spa_import_flags |= ZFS_IMPORT_SKIP_MMP;
4262 4275          }
4263 4276          mutex_exit(&spa_namespace_lock);
4264 4277  }
4265 4278  
4266 4279  #define BOGUS_SUFFIX "_CHECKPOINTED_UNIVERSE"
4267 4280  /*
4268 4281   * Import the checkpointed state of the pool specified by the target
4269 4282   * parameter as readonly. The function also accepts a pool config
4270 4283   * as an optional parameter, else it attempts to infer the config by
4271 4284   * the name of the target pool.
4272 4285   *
4273 4286   * Note that the checkpointed state's pool name will be the name of
4274 4287   * the original pool with the above suffix appened to it. In addition,
4275 4288   * if the target is not a pool name (e.g. a path to a dataset) then
4276 4289   * the new_path parameter is populated with the updated path to
4277 4290   * reflect the fact that we are looking into the checkpointed state.
4278 4291   *
4279 4292   * The function returns a newly-allocated copy of the name of the
4280 4293   * pool containing the checkpointed state. When this copy is no
4281 4294   * longer needed it should be freed with free(3C). Same thing
4282 4295   * applies to the new_path parameter if allocated.
4283 4296   */
4284 4297  static char *
4285 4298  import_checkpointed_state(char *target, nvlist_t *cfg, char **new_path)
4286 4299  {
4287 4300          int error = 0;
4288 4301          char *poolname, *bogus_name;
4289 4302  
4290 4303          /* If the target is not a pool, the extract the pool name */
4291 4304          char *path_start = strchr(target, '/');
4292 4305          if (path_start != NULL) {
4293 4306                  size_t poolname_len = path_start - target;
4294 4307                  poolname = strndup(target, poolname_len);
4295 4308          } else {
4296 4309                  poolname = target;
4297 4310          }
4298 4311  
4299 4312          if (cfg == NULL) {
4300 4313                  zdb_set_skip_mmp(poolname);
4301 4314                  error = spa_get_stats(poolname, &cfg, NULL, 0);
4302 4315                  if (error != 0) {
4303 4316                          fatal("Tried to read config of pool \"%s\" but "
4304 4317                              "spa_get_stats() failed with error %d\n",
4305 4318                              poolname, error);
4306 4319                  }
4307 4320          }
4308 4321  
4309 4322          (void) asprintf(&bogus_name, "%s%s", poolname, BOGUS_SUFFIX);
4310 4323          fnvlist_add_string(cfg, ZPOOL_CONFIG_POOL_NAME, bogus_name);
4311 4324  
4312 4325          error = spa_import(bogus_name, cfg, NULL,
4313 4326              ZFS_IMPORT_MISSING_LOG | ZFS_IMPORT_CHECKPOINT |
4314 4327              ZFS_IMPORT_SKIP_MMP);
4315 4328          if (error != 0) {
4316 4329                  fatal("Tried to import pool \"%s\" but spa_import() failed "
4317 4330                      "with error %d\n", bogus_name, error);
4318 4331          }
4319 4332  
4320 4333          if (new_path != NULL && path_start != NULL)
4321 4334                  (void) asprintf(new_path, "%s%s", bogus_name, path_start);
4322 4335  
4323 4336          if (target != poolname)
4324 4337                  free(poolname);
4325 4338  
4326 4339          return (bogus_name);
4327 4340  }
4328 4341  
4329 4342  typedef struct verify_checkpoint_sm_entry_cb_arg {
4330 4343          vdev_t *vcsec_vd;
4331 4344  
4332 4345          /* the following fields are only used for printing progress */
4333 4346          uint64_t vcsec_entryid;
4334 4347          uint64_t vcsec_num_entries;
4335 4348  } verify_checkpoint_sm_entry_cb_arg_t;
4336 4349  
4337 4350  #define ENTRIES_PER_PROGRESS_UPDATE 10000
4338 4351  
4339 4352  static int
4340 4353  verify_checkpoint_sm_entry_cb(space_map_entry_t *sme, void *arg)
4341 4354  {
4342 4355          verify_checkpoint_sm_entry_cb_arg_t *vcsec = arg;
4343 4356          vdev_t *vd = vcsec->vcsec_vd;
4344 4357          metaslab_t *ms = vd->vdev_ms[sme->sme_offset >> vd->vdev_ms_shift];
4345 4358          uint64_t end = sme->sme_offset + sme->sme_run;
4346 4359  
4347 4360          ASSERT(sme->sme_type == SM_FREE);
4348 4361  
4349 4362          if ((vcsec->vcsec_entryid % ENTRIES_PER_PROGRESS_UPDATE) == 0) {
4350 4363                  (void) fprintf(stderr,
4351 4364                      "\rverifying vdev %llu, space map entry %llu of %llu ...",
4352 4365                      (longlong_t)vd->vdev_id,
4353 4366                      (longlong_t)vcsec->vcsec_entryid,
4354 4367                      (longlong_t)vcsec->vcsec_num_entries);
4355 4368          }
4356 4369          vcsec->vcsec_entryid++;
4357 4370  
4358 4371          /*
4359 4372           * See comment in checkpoint_sm_exclude_entry_cb()
  
    | 
      ↓ open down ↓ | 
    185 lines elided | 
    
      ↑ open up ↑ | 
  
4360 4373           */
4361 4374          VERIFY3U(sme->sme_offset, >=, ms->ms_start);
4362 4375          VERIFY3U(end, <=, ms->ms_start + ms->ms_size);
4363 4376  
4364 4377          /*
4365 4378           * The entries in the vdev_checkpoint_sm should be marked as
4366 4379           * allocated in the checkpointed state of the pool, therefore
4367 4380           * their respective ms_allocateable trees should not contain them.
4368 4381           */
4369 4382          mutex_enter(&ms->ms_lock);
4370      -        range_tree_verify(ms->ms_allocatable, sme->sme_offset, sme->sme_run);
     4383 +        range_tree_verify_not_present(ms->ms_allocatable,
     4384 +            sme->sme_offset, sme->sme_run);
4371 4385          mutex_exit(&ms->ms_lock);
4372 4386  
4373 4387          return (0);
4374 4388  }
4375 4389  
4376 4390  /*
4377 4391   * Verify that all segments in the vdev_checkpoint_sm are allocated
4378 4392   * according to the checkpoint's ms_sm (i.e. are not in the checkpoint's
4379 4393   * ms_allocatable).
4380 4394   *
4381 4395   * Do so by comparing the checkpoint space maps (vdev_checkpoint_sm) of
4382 4396   * each vdev in the current state of the pool to the metaslab space maps
4383 4397   * (ms_sm) of the checkpointed state of the pool.
4384 4398   *
4385 4399   * Note that the function changes the state of the ms_allocatable
4386 4400   * trees of the current spa_t. The entries of these ms_allocatable
4387 4401   * trees are cleared out and then repopulated from with the free
4388 4402   * entries of their respective ms_sm space maps.
4389 4403   */
4390 4404  static void
4391 4405  verify_checkpoint_vdev_spacemaps(spa_t *checkpoint, spa_t *current)
4392 4406  {
4393 4407          vdev_t *ckpoint_rvd = checkpoint->spa_root_vdev;
4394 4408          vdev_t *current_rvd = current->spa_root_vdev;
4395 4409  
4396 4410          load_concrete_ms_allocatable_trees(checkpoint, SM_FREE);
4397 4411  
4398 4412          for (uint64_t c = 0; c < ckpoint_rvd->vdev_children; c++) {
4399 4413                  vdev_t *ckpoint_vd = ckpoint_rvd->vdev_child[c];
4400 4414                  vdev_t *current_vd = current_rvd->vdev_child[c];
4401 4415  
4402 4416                  space_map_t *checkpoint_sm = NULL;
4403 4417                  uint64_t checkpoint_sm_obj;
4404 4418  
4405 4419                  if (ckpoint_vd->vdev_ops == &vdev_indirect_ops) {
4406 4420                          /*
4407 4421                           * Since we don't allow device removal in a pool
4408 4422                           * that has a checkpoint, we expect that all removed
4409 4423                           * vdevs were removed from the pool before the
4410 4424                           * checkpoint.
4411 4425                           */
4412 4426                          ASSERT3P(current_vd->vdev_ops, ==, &vdev_indirect_ops);
4413 4427                          continue;
4414 4428                  }
4415 4429  
4416 4430                  /*
4417 4431                   * If the checkpoint space map doesn't exist, then nothing
4418 4432                   * here is checkpointed so there's nothing to verify.
4419 4433                   */
4420 4434                  if (current_vd->vdev_top_zap == 0 ||
4421 4435                      zap_contains(spa_meta_objset(current),
4422 4436                      current_vd->vdev_top_zap,
  
    | 
      ↓ open down ↓ | 
    42 lines elided | 
    
      ↑ open up ↑ | 
  
4423 4437                      VDEV_TOP_ZAP_POOL_CHECKPOINT_SM) != 0)
4424 4438                          continue;
4425 4439  
4426 4440                  VERIFY0(zap_lookup(spa_meta_objset(current),
4427 4441                      current_vd->vdev_top_zap, VDEV_TOP_ZAP_POOL_CHECKPOINT_SM,
4428 4442                      sizeof (uint64_t), 1, &checkpoint_sm_obj));
4429 4443  
4430 4444                  VERIFY0(space_map_open(&checkpoint_sm, spa_meta_objset(current),
4431 4445                      checkpoint_sm_obj, 0, current_vd->vdev_asize,
4432 4446                      current_vd->vdev_ashift));
4433      -                space_map_update(checkpoint_sm);
4434 4447  
4435 4448                  verify_checkpoint_sm_entry_cb_arg_t vcsec;
4436 4449                  vcsec.vcsec_vd = ckpoint_vd;
4437 4450                  vcsec.vcsec_entryid = 0;
4438 4451                  vcsec.vcsec_num_entries =
4439 4452                      space_map_length(checkpoint_sm) / sizeof (uint64_t);
4440 4453                  VERIFY0(space_map_iterate(checkpoint_sm,
     4454 +                    space_map_length(checkpoint_sm),
4441 4455                      verify_checkpoint_sm_entry_cb, &vcsec));
4442 4456                  dump_spacemap(current->spa_meta_objset, checkpoint_sm);
4443 4457                  space_map_close(checkpoint_sm);
4444 4458          }
4445 4459  
4446 4460          /*
4447 4461           * If we've added vdevs since we took the checkpoint, ensure
4448 4462           * that their checkpoint space maps are empty.
4449 4463           */
4450 4464          if (ckpoint_rvd->vdev_children < current_rvd->vdev_children) {
4451 4465                  for (uint64_t c = ckpoint_rvd->vdev_children;
4452 4466                      c < current_rvd->vdev_children; c++) {
4453 4467                          vdev_t *current_vd = current_rvd->vdev_child[c];
4454 4468                          ASSERT3P(current_vd->vdev_checkpoint_sm, ==, NULL);
4455 4469                  }
4456 4470          }
4457 4471  
4458 4472          /* for cleaner progress output */
4459 4473          (void) fprintf(stderr, "\n");
4460 4474  }
4461 4475  
4462 4476  /*
4463 4477   * Verifies that all space that's allocated in the checkpoint is
4464 4478   * still allocated in the current version, by checking that everything
4465 4479   * in checkpoint's ms_allocatable (which is actually allocated, not
4466 4480   * allocatable/free) is not present in current's ms_allocatable.
4467 4481   *
4468 4482   * Note that the function changes the state of the ms_allocatable
4469 4483   * trees of both spas when called. The entries of all ms_allocatable
4470 4484   * trees are cleared out and then repopulated from their respective
4471 4485   * ms_sm space maps. In the checkpointed state we load the allocated
4472 4486   * entries, and in the current state we load the free entries.
4473 4487   */
4474 4488  static void
4475 4489  verify_checkpoint_ms_spacemaps(spa_t *checkpoint, spa_t *current)
4476 4490  {
4477 4491          vdev_t *ckpoint_rvd = checkpoint->spa_root_vdev;
4478 4492          vdev_t *current_rvd = current->spa_root_vdev;
4479 4493  
4480 4494          load_concrete_ms_allocatable_trees(checkpoint, SM_ALLOC);
4481 4495          load_concrete_ms_allocatable_trees(current, SM_FREE);
4482 4496  
4483 4497          for (uint64_t i = 0; i < ckpoint_rvd->vdev_children; i++) {
4484 4498                  vdev_t *ckpoint_vd = ckpoint_rvd->vdev_child[i];
4485 4499                  vdev_t *current_vd = current_rvd->vdev_child[i];
4486 4500  
4487 4501                  if (ckpoint_vd->vdev_ops == &vdev_indirect_ops) {
4488 4502                          /*
4489 4503                           * See comment in verify_checkpoint_vdev_spacemaps()
4490 4504                           */
4491 4505                          ASSERT3P(current_vd->vdev_ops, ==, &vdev_indirect_ops);
4492 4506                          continue;
4493 4507                  }
4494 4508  
4495 4509                  for (uint64_t m = 0; m < ckpoint_vd->vdev_ms_count; m++) {
4496 4510                          metaslab_t *ckpoint_msp = ckpoint_vd->vdev_ms[m];
4497 4511                          metaslab_t *current_msp = current_vd->vdev_ms[m];
4498 4512  
4499 4513                          (void) fprintf(stderr,
4500 4514                              "\rverifying vdev %llu of %llu, "
4501 4515                              "metaslab %llu of %llu ...",
4502 4516                              (longlong_t)current_vd->vdev_id,
4503 4517                              (longlong_t)current_rvd->vdev_children,
4504 4518                              (longlong_t)current_vd->vdev_ms[m]->ms_id,
4505 4519                              (longlong_t)current_vd->vdev_ms_count);
4506 4520  
4507 4521                          /*
4508 4522                           * We walk through the ms_allocatable trees that
4509 4523                           * are loaded with the allocated blocks from the
  
    | 
      ↓ open down ↓ | 
    59 lines elided | 
    
      ↑ open up ↑ | 
  
4510 4524                           * ms_sm spacemaps of the checkpoint. For each
4511 4525                           * one of these ranges we ensure that none of them
4512 4526                           * exists in the ms_allocatable trees of the
4513 4527                           * current state which are loaded with the ranges
4514 4528                           * that are currently free.
4515 4529                           *
4516 4530                           * This way we ensure that none of the blocks that
4517 4531                           * are part of the checkpoint were freed by mistake.
4518 4532                           */
4519 4533                          range_tree_walk(ckpoint_msp->ms_allocatable,
4520      -                            (range_tree_func_t *)range_tree_verify,
     4534 +                            (range_tree_func_t *)range_tree_verify_not_present,
4521 4535                              current_msp->ms_allocatable);
4522 4536                  }
4523 4537          }
4524 4538  
4525 4539          /* for cleaner progress output */
4526 4540          (void) fprintf(stderr, "\n");
4527 4541  }
4528 4542  
4529 4543  static void
4530 4544  verify_checkpoint_blocks(spa_t *spa)
4531 4545  {
     4546 +        ASSERT(!dump_opt['L']);
     4547 +
4532 4548          spa_t *checkpoint_spa;
4533 4549          char *checkpoint_pool;
4534 4550          nvlist_t *config = NULL;
4535 4551          int error = 0;
4536 4552  
4537 4553          /*
4538 4554           * We import the checkpointed state of the pool (under a different
4539 4555           * name) so we can do verification on it against the current state
4540 4556           * of the pool.
4541 4557           */
4542 4558          checkpoint_pool = import_checkpointed_state(spa->spa_name, config,
4543 4559              NULL);
4544 4560          ASSERT(strcmp(spa->spa_name, checkpoint_pool) != 0);
4545 4561  
4546 4562          error = spa_open(checkpoint_pool, &checkpoint_spa, FTAG);
4547 4563          if (error != 0) {
4548 4564                  fatal("Tried to open pool \"%s\" but spa_open() failed with "
4549 4565                      "error %d\n", checkpoint_pool, error);
4550 4566          }
4551 4567  
4552 4568          /*
4553 4569           * Ensure that ranges in the checkpoint space maps of each vdev
4554 4570           * are allocated according to the checkpointed state's metaslab
4555 4571           * space maps.
4556 4572           */
4557 4573          verify_checkpoint_vdev_spacemaps(checkpoint_spa, spa);
4558 4574  
4559 4575          /*
4560 4576           * Ensure that allocated ranges in the checkpoint's metaslab
4561 4577           * space maps remain allocated in the metaslab space maps of
4562 4578           * the current state.
4563 4579           */
4564 4580          verify_checkpoint_ms_spacemaps(checkpoint_spa, spa);
4565 4581  
4566 4582          /*
4567 4583           * Once we are done, we get rid of the checkpointed state.
4568 4584           */
4569 4585          spa_close(checkpoint_spa, FTAG);
4570 4586          free(checkpoint_pool);
4571 4587  }
4572 4588  
4573 4589  static void
4574 4590  dump_leftover_checkpoint_blocks(spa_t *spa)
4575 4591  {
4576 4592          vdev_t *rvd = spa->spa_root_vdev;
4577 4593  
4578 4594          for (uint64_t i = 0; i < rvd->vdev_children; i++) {
4579 4595                  vdev_t *vd = rvd->vdev_child[i];
4580 4596  
4581 4597                  space_map_t *checkpoint_sm = NULL;
4582 4598                  uint64_t checkpoint_sm_obj;
4583 4599  
4584 4600                  if (vd->vdev_top_zap == 0)
4585 4601                          continue;
4586 4602  
  
    | 
      ↓ open down ↓ | 
    45 lines elided | 
    
      ↑ open up ↑ | 
  
4587 4603                  if (zap_contains(spa_meta_objset(spa), vd->vdev_top_zap,
4588 4604                      VDEV_TOP_ZAP_POOL_CHECKPOINT_SM) != 0)
4589 4605                          continue;
4590 4606  
4591 4607                  VERIFY0(zap_lookup(spa_meta_objset(spa), vd->vdev_top_zap,
4592 4608                      VDEV_TOP_ZAP_POOL_CHECKPOINT_SM,
4593 4609                      sizeof (uint64_t), 1, &checkpoint_sm_obj));
4594 4610  
4595 4611                  VERIFY0(space_map_open(&checkpoint_sm, spa_meta_objset(spa),
4596 4612                      checkpoint_sm_obj, 0, vd->vdev_asize, vd->vdev_ashift));
4597      -                space_map_update(checkpoint_sm);
4598 4613                  dump_spacemap(spa->spa_meta_objset, checkpoint_sm);
4599 4614                  space_map_close(checkpoint_sm);
4600 4615          }
4601 4616  }
4602 4617  
4603 4618  static int
4604 4619  verify_checkpoint(spa_t *spa)
4605 4620  {
4606 4621          uberblock_t checkpoint;
4607 4622          int error;
4608 4623  
4609 4624          if (!spa_feature_is_active(spa, SPA_FEATURE_POOL_CHECKPOINT))
4610 4625                  return (0);
4611 4626  
4612 4627          error = zap_lookup(spa->spa_meta_objset, DMU_POOL_DIRECTORY_OBJECT,
4613 4628              DMU_POOL_ZPOOL_CHECKPOINT, sizeof (uint64_t),
4614 4629              sizeof (uberblock_t) / sizeof (uint64_t), &checkpoint);
4615 4630  
4616 4631          if (error == ENOENT && !dump_opt['L']) {
4617 4632                  /*
4618 4633                   * If the feature is active but the uberblock is missing
4619 4634                   * then we must be in the middle of discarding the
4620 4635                   * checkpoint.
4621 4636                   */
4622 4637                  (void) printf("\nPartially discarded checkpoint "
4623 4638                      "state found:\n");
4624 4639                  dump_leftover_checkpoint_blocks(spa);
4625 4640                  return (0);
4626 4641          } else if (error != 0) {
4627 4642                  (void) printf("lookup error %d when looking for "
4628 4643                      "checkpointed uberblock in MOS\n", error);
4629 4644                  return (error);
4630 4645          }
4631 4646          dump_uberblock(&checkpoint, "\nCheckpointed uberblock found:\n", "\n");
4632 4647  
4633 4648          if (checkpoint.ub_checkpoint_txg == 0) {
4634 4649                  (void) printf("\nub_checkpoint_txg not set in checkpointed "
4635 4650                      "uberblock\n");
4636 4651                  error = 3;
4637 4652          }
4638 4653  
4639 4654          if (error == 0 && !dump_opt['L'])
4640 4655                  verify_checkpoint_blocks(spa);
4641 4656  
4642 4657          return (error);
4643 4658  }
4644 4659  
4645 4660  /* ARGSUSED */
4646 4661  static void
4647 4662  mos_leaks_cb(void *arg, uint64_t start, uint64_t size)
4648 4663  {
4649 4664          for (uint64_t i = start; i < size; i++) {
4650 4665                  (void) printf("MOS object %llu referenced but not allocated\n",
4651 4666                      (u_longlong_t)i);
4652 4667          }
4653 4668  }
4654 4669  
4655 4670  static range_tree_t *mos_refd_objs;
4656 4671  
4657 4672  static void
4658 4673  mos_obj_refd(uint64_t obj)
4659 4674  {
4660 4675          if (obj != 0 && mos_refd_objs != NULL)
4661 4676                  range_tree_add(mos_refd_objs, obj, 1);
4662 4677  }
4663 4678  
4664 4679  static void
4665 4680  mos_leak_vdev(vdev_t *vd)
4666 4681  {
4667 4682          mos_obj_refd(vd->vdev_dtl_object);
4668 4683          mos_obj_refd(vd->vdev_ms_array);
4669 4684          mos_obj_refd(vd->vdev_top_zap);
4670 4685          mos_obj_refd(vd->vdev_indirect_config.vic_births_object);
4671 4686          mos_obj_refd(vd->vdev_indirect_config.vic_mapping_object);
4672 4687          mos_obj_refd(vd->vdev_leaf_zap);
4673 4688          if (vd->vdev_checkpoint_sm != NULL)
4674 4689                  mos_obj_refd(vd->vdev_checkpoint_sm->sm_object);
4675 4690          if (vd->vdev_indirect_mapping != NULL) {
4676 4691                  mos_obj_refd(vd->vdev_indirect_mapping->
4677 4692                      vim_phys->vimp_counts_object);
4678 4693          }
4679 4694          if (vd->vdev_obsolete_sm != NULL)
4680 4695                  mos_obj_refd(vd->vdev_obsolete_sm->sm_object);
4681 4696  
4682 4697          for (uint64_t m = 0; m < vd->vdev_ms_count; m++) {
4683 4698                  metaslab_t *ms = vd->vdev_ms[m];
4684 4699                  mos_obj_refd(space_map_object(ms->ms_sm));
4685 4700          }
4686 4701  
4687 4702          for (uint64_t c = 0; c < vd->vdev_children; c++) {
4688 4703                  mos_leak_vdev(vd->vdev_child[c]);
4689 4704          }
4690 4705  }
4691 4706  
4692 4707  static int
4693 4708  dump_mos_leaks(spa_t *spa)
4694 4709  {
4695 4710          int rv = 0;
4696 4711          objset_t *mos = spa->spa_meta_objset;
4697 4712          dsl_pool_t *dp = spa->spa_dsl_pool;
4698 4713  
4699 4714          /* Visit and mark all referenced objects in the MOS */
4700 4715  
4701 4716          mos_obj_refd(DMU_POOL_DIRECTORY_OBJECT);
4702 4717          mos_obj_refd(spa->spa_pool_props_object);
4703 4718          mos_obj_refd(spa->spa_config_object);
4704 4719          mos_obj_refd(spa->spa_ddt_stat_object);
4705 4720          mos_obj_refd(spa->spa_feat_desc_obj);
4706 4721          mos_obj_refd(spa->spa_feat_enabled_txg_obj);
4707 4722          mos_obj_refd(spa->spa_feat_for_read_obj);
4708 4723          mos_obj_refd(spa->spa_feat_for_write_obj);
4709 4724          mos_obj_refd(spa->spa_history);
4710 4725          mos_obj_refd(spa->spa_errlog_last);
4711 4726          mos_obj_refd(spa->spa_errlog_scrub);
4712 4727          mos_obj_refd(spa->spa_all_vdev_zaps);
4713 4728          mos_obj_refd(spa->spa_dsl_pool->dp_bptree_obj);
4714 4729          mos_obj_refd(spa->spa_dsl_pool->dp_tmp_userrefs_obj);
4715 4730          mos_obj_refd(spa->spa_dsl_pool->dp_scan->scn_phys.scn_queue_obj);
4716 4731          bpobj_count_refd(&spa->spa_deferred_bpobj);
4717 4732          mos_obj_refd(dp->dp_empty_bpobj);
4718 4733          bpobj_count_refd(&dp->dp_obsolete_bpobj);
4719 4734          bpobj_count_refd(&dp->dp_free_bpobj);
4720 4735          mos_obj_refd(spa->spa_l2cache.sav_object);
4721 4736          mos_obj_refd(spa->spa_spares.sav_object);
4722 4737  
4723 4738          mos_obj_refd(spa->spa_condensing_indirect_phys.
4724 4739              scip_next_mapping_object);
4725 4740          mos_obj_refd(spa->spa_condensing_indirect_phys.
4726 4741              scip_prev_obsolete_sm_object);
4727 4742          if (spa->spa_condensing_indirect_phys.scip_next_mapping_object != 0) {
4728 4743                  vdev_indirect_mapping_t *vim =
4729 4744                      vdev_indirect_mapping_open(mos,
4730 4745                      spa->spa_condensing_indirect_phys.scip_next_mapping_object);
4731 4746                  mos_obj_refd(vim->vim_phys->vimp_counts_object);
4732 4747                  vdev_indirect_mapping_close(vim);
4733 4748          }
4734 4749  
4735 4750          if (dp->dp_origin_snap != NULL) {
4736 4751                  dsl_dataset_t *ds;
4737 4752  
4738 4753                  dsl_pool_config_enter(dp, FTAG);
4739 4754                  VERIFY0(dsl_dataset_hold_obj(dp,
4740 4755                      dsl_dataset_phys(dp->dp_origin_snap)->ds_next_snap_obj,
4741 4756                      FTAG, &ds));
4742 4757                  count_ds_mos_objects(ds);
4743 4758                  dump_deadlist(&ds->ds_deadlist);
4744 4759                  dsl_dataset_rele(ds, FTAG);
4745 4760                  dsl_pool_config_exit(dp, FTAG);
4746 4761  
4747 4762                  count_ds_mos_objects(dp->dp_origin_snap);
4748 4763                  dump_deadlist(&dp->dp_origin_snap->ds_deadlist);
4749 4764          }
4750 4765          count_dir_mos_objects(dp->dp_mos_dir);
4751 4766          if (dp->dp_free_dir != NULL)
4752 4767                  count_dir_mos_objects(dp->dp_free_dir);
4753 4768          if (dp->dp_leak_dir != NULL)
4754 4769                  count_dir_mos_objects(dp->dp_leak_dir);
4755 4770  
4756 4771          mos_leak_vdev(spa->spa_root_vdev);
4757 4772  
4758 4773          for (uint64_t class = 0; class < DDT_CLASSES; class++) {
4759 4774                  for (uint64_t type = 0; type < DDT_TYPES; type++) {
4760 4775                          for (uint64_t cksum = 0;
4761 4776                              cksum < ZIO_CHECKSUM_FUNCTIONS; cksum++) {
4762 4777                                  ddt_t *ddt = spa->spa_ddt[cksum];
4763 4778                                  mos_obj_refd(ddt->ddt_object[type][class]);
4764 4779                          }
4765 4780                  }
4766 4781          }
4767 4782  
4768 4783          /*
4769 4784           * Visit all allocated objects and make sure they are referenced.
4770 4785           */
4771 4786          uint64_t object = 0;
4772 4787          while (dmu_object_next(mos, &object, B_FALSE, 0) == 0) {
4773 4788                  if (range_tree_contains(mos_refd_objs, object, 1)) {
4774 4789                          range_tree_remove(mos_refd_objs, object, 1);
4775 4790                  } else {
4776 4791                          dmu_object_info_t doi;
4777 4792                          const char *name;
4778 4793                          dmu_object_info(mos, object, &doi);
4779 4794                          if (doi.doi_type & DMU_OT_NEWTYPE) {
4780 4795                                  dmu_object_byteswap_t bswap =
4781 4796                                      DMU_OT_BYTESWAP(doi.doi_type);
4782 4797                                  name = dmu_ot_byteswap[bswap].ob_name;
4783 4798                          } else {
4784 4799                                  name = dmu_ot[doi.doi_type].ot_name;
4785 4800                          }
4786 4801  
4787 4802                          (void) printf("MOS object %llu (%s) leaked\n",
4788 4803                              (u_longlong_t)object, name);
4789 4804                          rv = 2;
4790 4805                  }
4791 4806          }
4792 4807          (void) range_tree_walk(mos_refd_objs, mos_leaks_cb, NULL);
4793 4808          if (!range_tree_is_empty(mos_refd_objs))
4794 4809                  rv = 2;
4795 4810          range_tree_vacate(mos_refd_objs, NULL, NULL);
4796 4811          range_tree_destroy(mos_refd_objs);
4797 4812          return (rv);
4798 4813  }
4799 4814  
4800 4815  static void
4801 4816  dump_zpool(spa_t *spa)
4802 4817  {
4803 4818          dsl_pool_t *dp = spa_get_dsl(spa);
4804 4819          int rc = 0;
4805 4820  
4806 4821          if (dump_opt['S']) {
4807 4822                  dump_simulated_ddt(spa);
4808 4823                  return;
4809 4824          }
4810 4825  
4811 4826          if (!dump_opt['e'] && dump_opt['C'] > 1) {
4812 4827                  (void) printf("\nCached configuration:\n");
4813 4828                  dump_nvlist(spa->spa_config, 8);
4814 4829          }
4815 4830  
4816 4831          if (dump_opt['C'])
4817 4832                  dump_config(spa);
4818 4833  
4819 4834          if (dump_opt['u'])
4820 4835                  dump_uberblock(&spa->spa_uberblock, "\nUberblock:\n", "\n");
4821 4836  
4822 4837          if (dump_opt['D'])
4823 4838                  dump_all_ddts(spa);
4824 4839  
4825 4840          if (dump_opt['d'] > 2 || dump_opt['m'])
4826 4841                  dump_metaslabs(spa);
4827 4842          if (dump_opt['M'])
4828 4843                  dump_metaslab_groups(spa);
4829 4844  
4830 4845          if (dump_opt['d'] || dump_opt['i']) {
4831 4846                  mos_refd_objs = range_tree_create(NULL, NULL);
4832 4847                  dump_dir(dp->dp_meta_objset);
4833 4848  
4834 4849                  if (dump_opt['d'] >= 3) {
4835 4850                          dsl_pool_t *dp = spa->spa_dsl_pool;
4836 4851                          dump_full_bpobj(&spa->spa_deferred_bpobj,
4837 4852                              "Deferred frees", 0);
4838 4853                          if (spa_version(spa) >= SPA_VERSION_DEADLISTS) {
4839 4854                                  dump_full_bpobj(&dp->dp_free_bpobj,
4840 4855                                      "Pool snapshot frees", 0);
4841 4856                          }
4842 4857                          if (bpobj_is_open(&dp->dp_obsolete_bpobj)) {
4843 4858                                  ASSERT(spa_feature_is_enabled(spa,
4844 4859                                      SPA_FEATURE_DEVICE_REMOVAL));
4845 4860                                  dump_full_bpobj(&dp->dp_obsolete_bpobj,
4846 4861                                      "Pool obsolete blocks", 0);
4847 4862                          }
4848 4863  
4849 4864                          if (spa_feature_is_active(spa,
4850 4865                              SPA_FEATURE_ASYNC_DESTROY)) {
4851 4866                                  dump_bptree(spa->spa_meta_objset,
4852 4867                                      dp->dp_bptree_obj,
4853 4868                                      "Pool dataset frees");
4854 4869                          }
4855 4870                          dump_dtl(spa->spa_root_vdev, 0);
4856 4871                  }
4857 4872                  (void) dmu_objset_find(spa_name(spa), dump_one_dir,
4858 4873                      NULL, DS_FIND_SNAPSHOTS | DS_FIND_CHILDREN);
4859 4874  
4860 4875                  if (rc == 0 && !dump_opt['L'])
4861 4876                          rc = dump_mos_leaks(spa);
4862 4877  
4863 4878                  for (spa_feature_t f = 0; f < SPA_FEATURES; f++) {
4864 4879                          uint64_t refcount;
4865 4880  
4866 4881                          if (!(spa_feature_table[f].fi_flags &
4867 4882                              ZFEATURE_FLAG_PER_DATASET) ||
4868 4883                              !spa_feature_is_enabled(spa, f)) {
4869 4884                                  ASSERT0(dataset_feature_count[f]);
4870 4885                                  continue;
4871 4886                          }
4872 4887                          (void) feature_get_refcount(spa,
4873 4888                              &spa_feature_table[f], &refcount);
4874 4889                          if (dataset_feature_count[f] != refcount) {
4875 4890                                  (void) printf("%s feature refcount mismatch: "
4876 4891                                      "%lld datasets != %lld refcount\n",
4877 4892                                      spa_feature_table[f].fi_uname,
4878 4893                                      (longlong_t)dataset_feature_count[f],
4879 4894                                      (longlong_t)refcount);
4880 4895                                  rc = 2;
4881 4896                          } else {
4882 4897                                  (void) printf("Verified %s feature refcount "
4883 4898                                      "of %llu is correct\n",
4884 4899                                      spa_feature_table[f].fi_uname,
4885 4900                                      (longlong_t)refcount);
4886 4901                          }
4887 4902                  }
4888 4903  
4889 4904                  if (rc == 0) {
4890 4905                          rc = verify_device_removal_feature_counts(spa);
4891 4906                  }
4892 4907          }
4893 4908  
4894 4909          if (rc == 0 && (dump_opt['b'] || dump_opt['c']))
4895 4910                  rc = dump_block_stats(spa);
4896 4911  
4897 4912          if (rc == 0)
4898 4913                  rc = verify_spacemap_refcounts(spa);
4899 4914  
4900 4915          if (dump_opt['s'])
4901 4916                  show_pool_stats(spa);
4902 4917  
4903 4918          if (dump_opt['h'])
4904 4919                  dump_history(spa);
4905 4920  
4906 4921          if (rc == 0)
4907 4922                  rc = verify_checkpoint(spa);
4908 4923  
4909 4924          if (rc != 0) {
4910 4925                  dump_debug_buffer();
4911 4926                  exit(rc);
4912 4927          }
4913 4928  }
4914 4929  
4915 4930  #define ZDB_FLAG_CHECKSUM       0x0001
4916 4931  #define ZDB_FLAG_DECOMPRESS     0x0002
4917 4932  #define ZDB_FLAG_BSWAP          0x0004
4918 4933  #define ZDB_FLAG_GBH            0x0008
4919 4934  #define ZDB_FLAG_INDIRECT       0x0010
4920 4935  #define ZDB_FLAG_PHYS           0x0020
4921 4936  #define ZDB_FLAG_RAW            0x0040
4922 4937  #define ZDB_FLAG_PRINT_BLKPTR   0x0080
4923 4938  
4924 4939  static int flagbits[256];
4925 4940  
4926 4941  static void
4927 4942  zdb_print_blkptr(blkptr_t *bp, int flags)
4928 4943  {
4929 4944          char blkbuf[BP_SPRINTF_LEN];
4930 4945  
4931 4946          if (flags & ZDB_FLAG_BSWAP)
4932 4947                  byteswap_uint64_array((void *)bp, sizeof (blkptr_t));
4933 4948  
4934 4949          snprintf_blkptr(blkbuf, sizeof (blkbuf), bp);
4935 4950          (void) printf("%s\n", blkbuf);
4936 4951  }
4937 4952  
4938 4953  static void
4939 4954  zdb_dump_indirect(blkptr_t *bp, int nbps, int flags)
4940 4955  {
4941 4956          int i;
4942 4957  
4943 4958          for (i = 0; i < nbps; i++)
4944 4959                  zdb_print_blkptr(&bp[i], flags);
4945 4960  }
4946 4961  
4947 4962  static void
4948 4963  zdb_dump_gbh(void *buf, int flags)
4949 4964  {
4950 4965          zdb_dump_indirect((blkptr_t *)buf, SPA_GBH_NBLKPTRS, flags);
4951 4966  }
4952 4967  
4953 4968  static void
4954 4969  zdb_dump_block_raw(void *buf, uint64_t size, int flags)
4955 4970  {
4956 4971          if (flags & ZDB_FLAG_BSWAP)
4957 4972                  byteswap_uint64_array(buf, size);
4958 4973          (void) write(1, buf, size);
4959 4974  }
4960 4975  
4961 4976  static void
4962 4977  zdb_dump_block(char *label, void *buf, uint64_t size, int flags)
4963 4978  {
4964 4979          uint64_t *d = (uint64_t *)buf;
4965 4980          unsigned nwords = size / sizeof (uint64_t);
4966 4981          int do_bswap = !!(flags & ZDB_FLAG_BSWAP);
4967 4982          unsigned i, j;
4968 4983          const char *hdr;
4969 4984          char *c;
4970 4985  
4971 4986  
4972 4987          if (do_bswap)
4973 4988                  hdr = " 7 6 5 4 3 2 1 0   f e d c b a 9 8";
4974 4989          else
4975 4990                  hdr = " 0 1 2 3 4 5 6 7   8 9 a b c d e f";
4976 4991  
4977 4992          (void) printf("\n%s\n%6s   %s  0123456789abcdef\n", label, "", hdr);
4978 4993  
4979 4994          for (i = 0; i < nwords; i += 2) {
4980 4995                  (void) printf("%06llx:  %016llx  %016llx  ",
4981 4996                      (u_longlong_t)(i * sizeof (uint64_t)),
4982 4997                      (u_longlong_t)(do_bswap ? BSWAP_64(d[i]) : d[i]),
4983 4998                      (u_longlong_t)(do_bswap ? BSWAP_64(d[i + 1]) : d[i + 1]));
4984 4999  
4985 5000                  c = (char *)&d[i];
4986 5001                  for (j = 0; j < 2 * sizeof (uint64_t); j++)
4987 5002                          (void) printf("%c", isprint(c[j]) ? c[j] : '.');
4988 5003                  (void) printf("\n");
4989 5004          }
4990 5005  }
4991 5006  
4992 5007  /*
4993 5008   * There are two acceptable formats:
4994 5009   *      leaf_name         - For example: c1t0d0 or /tmp/ztest.0a
4995 5010   *      child[.child]*    - For example: 0.1.1
4996 5011   *
4997 5012   * The second form can be used to specify arbitrary vdevs anywhere
4998 5013   * in the heirarchy.  For example, in a pool with a mirror of
4999 5014   * RAID-Zs, you can specify either RAID-Z vdev with 0.0 or 0.1 .
5000 5015   */
5001 5016  static vdev_t *
5002 5017  zdb_vdev_lookup(vdev_t *vdev, const char *path)
5003 5018  {
5004 5019          char *s, *p, *q;
5005 5020          unsigned i;
5006 5021  
5007 5022          if (vdev == NULL)
5008 5023                  return (NULL);
5009 5024  
5010 5025          /* First, assume the x.x.x.x format */
5011 5026          i = strtoul(path, &s, 10);
5012 5027          if (s == path || (s && *s != '.' && *s != '\0'))
5013 5028                  goto name;
5014 5029          if (i >= vdev->vdev_children)
5015 5030                  return (NULL);
5016 5031  
5017 5032          vdev = vdev->vdev_child[i];
5018 5033          if (*s == '\0')
5019 5034                  return (vdev);
5020 5035          return (zdb_vdev_lookup(vdev, s+1));
5021 5036  
5022 5037  name:
5023 5038          for (i = 0; i < vdev->vdev_children; i++) {
5024 5039                  vdev_t *vc = vdev->vdev_child[i];
5025 5040  
5026 5041                  if (vc->vdev_path == NULL) {
5027 5042                          vc = zdb_vdev_lookup(vc, path);
5028 5043                          if (vc == NULL)
5029 5044                                  continue;
5030 5045                          else
5031 5046                                  return (vc);
5032 5047                  }
5033 5048  
5034 5049                  p = strrchr(vc->vdev_path, '/');
5035 5050                  p = p ? p + 1 : vc->vdev_path;
5036 5051                  q = &vc->vdev_path[strlen(vc->vdev_path) - 2];
5037 5052  
5038 5053                  if (strcmp(vc->vdev_path, path) == 0)
5039 5054                          return (vc);
5040 5055                  if (strcmp(p, path) == 0)
5041 5056                          return (vc);
5042 5057                  if (strcmp(q, "s0") == 0 && strncmp(p, path, q - p) == 0)
5043 5058                          return (vc);
5044 5059          }
5045 5060  
5046 5061          return (NULL);
5047 5062  }
5048 5063  
5049 5064  /* ARGSUSED */
5050 5065  static int
5051 5066  random_get_pseudo_bytes_cb(void *buf, size_t len, void *unused)
5052 5067  {
5053 5068          return (random_get_pseudo_bytes(buf, len));
5054 5069  }
5055 5070  
5056 5071  /*
5057 5072   * Read a block from a pool and print it out.  The syntax of the
5058 5073   * block descriptor is:
5059 5074   *
5060 5075   *      pool:vdev_specifier:offset:size[:flags]
5061 5076   *
5062 5077   *      pool           - The name of the pool you wish to read from
5063 5078   *      vdev_specifier - Which vdev (see comment for zdb_vdev_lookup)
5064 5079   *      offset         - offset, in hex, in bytes
5065 5080   *      size           - Amount of data to read, in hex, in bytes
5066 5081   *      flags          - A string of characters specifying options
5067 5082   *               b: Decode a blkptr at given offset within block
5068 5083   *              *c: Calculate and display checksums
5069 5084   *               d: Decompress data before dumping
5070 5085   *               e: Byteswap data before dumping
5071 5086   *               g: Display data as a gang block header
5072 5087   *               i: Display as an indirect block
5073 5088   *               p: Do I/O to physical offset
5074 5089   *               r: Dump raw data to stdout
5075 5090   *
5076 5091   *              * = not yet implemented
5077 5092   */
5078 5093  static void
5079 5094  zdb_read_block(char *thing, spa_t *spa)
5080 5095  {
5081 5096          blkptr_t blk, *bp = &blk;
5082 5097          dva_t *dva = bp->blk_dva;
5083 5098          int flags = 0;
5084 5099          uint64_t offset = 0, size = 0, psize = 0, lsize = 0, blkptr_offset = 0;
5085 5100          zio_t *zio;
5086 5101          vdev_t *vd;
5087 5102          abd_t *pabd;
5088 5103          void *lbuf, *buf;
5089 5104          const char *s, *vdev;
5090 5105          char *p, *dup, *flagstr;
5091 5106          int i, error;
5092 5107  
5093 5108          dup = strdup(thing);
5094 5109          s = strtok(dup, ":");
5095 5110          vdev = s ? s : "";
5096 5111          s = strtok(NULL, ":");
5097 5112          offset = strtoull(s ? s : "", NULL, 16);
5098 5113          s = strtok(NULL, ":");
5099 5114          size = strtoull(s ? s : "", NULL, 16);
5100 5115          s = strtok(NULL, ":");
5101 5116          if (s)
5102 5117                  flagstr = strdup(s);
5103 5118          else
5104 5119                  flagstr = strdup("");
5105 5120  
5106 5121          s = NULL;
5107 5122          if (size == 0)
5108 5123                  s = "size must not be zero";
5109 5124          if (!IS_P2ALIGNED(size, DEV_BSIZE))
5110 5125                  s = "size must be a multiple of sector size";
5111 5126          if (!IS_P2ALIGNED(offset, DEV_BSIZE))
5112 5127                  s = "offset must be a multiple of sector size";
5113 5128          if (s) {
5114 5129                  (void) printf("Invalid block specifier: %s  - %s\n", thing, s);
5115 5130                  free(dup);
5116 5131                  return;
5117 5132          }
5118 5133  
5119 5134          for (s = strtok(flagstr, ":"); s; s = strtok(NULL, ":")) {
5120 5135                  for (i = 0; flagstr[i]; i++) {
5121 5136                          int bit = flagbits[(uchar_t)flagstr[i]];
5122 5137  
5123 5138                          if (bit == 0) {
5124 5139                                  (void) printf("***Invalid flag: %c\n",
5125 5140                                      flagstr[i]);
5126 5141                                  continue;
5127 5142                          }
5128 5143                          flags |= bit;
5129 5144  
5130 5145                          /* If it's not something with an argument, keep going */
5131 5146                          if ((bit & (ZDB_FLAG_CHECKSUM |
5132 5147                              ZDB_FLAG_PRINT_BLKPTR)) == 0)
5133 5148                                  continue;
5134 5149  
5135 5150                          p = &flagstr[i + 1];
5136 5151                          if (bit == ZDB_FLAG_PRINT_BLKPTR)
5137 5152                                  blkptr_offset = strtoull(p, &p, 16);
5138 5153                          if (*p != ':' && *p != '\0') {
5139 5154                                  (void) printf("***Invalid flag arg: '%s'\n", s);
5140 5155                                  free(dup);
5141 5156                                  return;
5142 5157                          }
5143 5158                  }
5144 5159          }
5145 5160          free(flagstr);
5146 5161  
5147 5162          vd = zdb_vdev_lookup(spa->spa_root_vdev, vdev);
5148 5163          if (vd == NULL) {
5149 5164                  (void) printf("***Invalid vdev: %s\n", vdev);
5150 5165                  free(dup);
5151 5166                  return;
5152 5167          } else {
5153 5168                  if (vd->vdev_path)
5154 5169                          (void) fprintf(stderr, "Found vdev: %s\n",
5155 5170                              vd->vdev_path);
5156 5171                  else
5157 5172                          (void) fprintf(stderr, "Found vdev type: %s\n",
5158 5173                              vd->vdev_ops->vdev_op_type);
5159 5174          }
5160 5175  
5161 5176          psize = size;
5162 5177          lsize = size;
5163 5178  
5164 5179          pabd = abd_alloc_linear(SPA_MAXBLOCKSIZE, B_FALSE);
5165 5180          lbuf = umem_alloc(SPA_MAXBLOCKSIZE, UMEM_NOFAIL);
5166 5181  
5167 5182          BP_ZERO(bp);
5168 5183  
5169 5184          DVA_SET_VDEV(&dva[0], vd->vdev_id);
5170 5185          DVA_SET_OFFSET(&dva[0], offset);
5171 5186          DVA_SET_GANG(&dva[0], !!(flags & ZDB_FLAG_GBH));
5172 5187          DVA_SET_ASIZE(&dva[0], vdev_psize_to_asize(vd, psize));
5173 5188  
5174 5189          BP_SET_BIRTH(bp, TXG_INITIAL, TXG_INITIAL);
5175 5190  
5176 5191          BP_SET_LSIZE(bp, lsize);
5177 5192          BP_SET_PSIZE(bp, psize);
5178 5193          BP_SET_COMPRESS(bp, ZIO_COMPRESS_OFF);
5179 5194          BP_SET_CHECKSUM(bp, ZIO_CHECKSUM_OFF);
5180 5195          BP_SET_TYPE(bp, DMU_OT_NONE);
5181 5196          BP_SET_LEVEL(bp, 0);
5182 5197          BP_SET_DEDUP(bp, 0);
5183 5198          BP_SET_BYTEORDER(bp, ZFS_HOST_BYTEORDER);
5184 5199  
5185 5200          spa_config_enter(spa, SCL_STATE, FTAG, RW_READER);
5186 5201          zio = zio_root(spa, NULL, NULL, 0);
5187 5202  
5188 5203          if (vd == vd->vdev_top) {
5189 5204                  /*
5190 5205                   * Treat this as a normal block read.
5191 5206                   */
5192 5207                  zio_nowait(zio_read(zio, spa, bp, pabd, psize, NULL, NULL,
5193 5208                      ZIO_PRIORITY_SYNC_READ,
5194 5209                      ZIO_FLAG_CANFAIL | ZIO_FLAG_RAW, NULL));
5195 5210          } else {
5196 5211                  /*
5197 5212                   * Treat this as a vdev child I/O.
5198 5213                   */
5199 5214                  zio_nowait(zio_vdev_child_io(zio, bp, vd, offset, pabd,
5200 5215                      psize, ZIO_TYPE_READ, ZIO_PRIORITY_SYNC_READ,
5201 5216                      ZIO_FLAG_DONT_CACHE | ZIO_FLAG_DONT_QUEUE |
5202 5217                      ZIO_FLAG_DONT_PROPAGATE | ZIO_FLAG_DONT_RETRY |
5203 5218                      ZIO_FLAG_CANFAIL | ZIO_FLAG_RAW | ZIO_FLAG_OPTIONAL,
5204 5219                      NULL, NULL));
5205 5220          }
5206 5221  
5207 5222          error = zio_wait(zio);
5208 5223          spa_config_exit(spa, SCL_STATE, FTAG);
5209 5224  
5210 5225          if (error) {
5211 5226                  (void) printf("Read of %s failed, error: %d\n", thing, error);
5212 5227                  goto out;
5213 5228          }
5214 5229  
5215 5230          if (flags & ZDB_FLAG_DECOMPRESS) {
5216 5231                  /*
5217 5232                   * We don't know how the data was compressed, so just try
5218 5233                   * every decompress function at every inflated blocksize.
5219 5234                   */
5220 5235                  enum zio_compress c;
5221 5236                  void *pbuf2 = umem_alloc(SPA_MAXBLOCKSIZE, UMEM_NOFAIL);
5222 5237                  void *lbuf2 = umem_alloc(SPA_MAXBLOCKSIZE, UMEM_NOFAIL);
5223 5238  
5224 5239                  abd_copy_to_buf(pbuf2, pabd, psize);
5225 5240  
5226 5241                  VERIFY0(abd_iterate_func(pabd, psize, SPA_MAXBLOCKSIZE - psize,
5227 5242                      random_get_pseudo_bytes_cb, NULL));
5228 5243  
5229 5244                  VERIFY0(random_get_pseudo_bytes((uint8_t *)pbuf2 + psize,
5230 5245                      SPA_MAXBLOCKSIZE - psize));
5231 5246  
5232 5247                  for (lsize = SPA_MAXBLOCKSIZE; lsize > psize;
5233 5248                      lsize -= SPA_MINBLOCKSIZE) {
5234 5249                          for (c = 0; c < ZIO_COMPRESS_FUNCTIONS; c++) {
5235 5250                                  if (zio_decompress_data(c, pabd,
5236 5251                                      lbuf, psize, lsize) == 0 &&
5237 5252                                      zio_decompress_data_buf(c, pbuf2,
5238 5253                                      lbuf2, psize, lsize) == 0 &&
5239 5254                                      bcmp(lbuf, lbuf2, lsize) == 0)
5240 5255                                          break;
5241 5256                          }
5242 5257                          if (c != ZIO_COMPRESS_FUNCTIONS)
5243 5258                                  break;
5244 5259                          lsize -= SPA_MINBLOCKSIZE;
5245 5260                  }
5246 5261  
5247 5262                  umem_free(pbuf2, SPA_MAXBLOCKSIZE);
5248 5263                  umem_free(lbuf2, SPA_MAXBLOCKSIZE);
5249 5264  
5250 5265                  if (lsize <= psize) {
5251 5266                          (void) printf("Decompress of %s failed\n", thing);
5252 5267                          goto out;
5253 5268                  }
5254 5269                  buf = lbuf;
5255 5270                  size = lsize;
5256 5271          } else {
5257 5272                  buf = abd_to_buf(pabd);
5258 5273                  size = psize;
5259 5274          }
5260 5275  
5261 5276          if (flags & ZDB_FLAG_PRINT_BLKPTR)
5262 5277                  zdb_print_blkptr((blkptr_t *)(void *)
5263 5278                      ((uintptr_t)buf + (uintptr_t)blkptr_offset), flags);
5264 5279          else if (flags & ZDB_FLAG_RAW)
5265 5280                  zdb_dump_block_raw(buf, size, flags);
5266 5281          else if (flags & ZDB_FLAG_INDIRECT)
5267 5282                  zdb_dump_indirect((blkptr_t *)buf, size / sizeof (blkptr_t),
5268 5283                      flags);
5269 5284          else if (flags & ZDB_FLAG_GBH)
5270 5285                  zdb_dump_gbh(buf, flags);
5271 5286          else
5272 5287                  zdb_dump_block(thing, buf, size, flags);
5273 5288  
5274 5289  out:
5275 5290          abd_free(pabd);
5276 5291          umem_free(lbuf, SPA_MAXBLOCKSIZE);
5277 5292          free(dup);
5278 5293  }
5279 5294  
5280 5295  static void
5281 5296  zdb_embedded_block(char *thing)
5282 5297  {
5283 5298          blkptr_t bp;
5284 5299          unsigned long long *words = (void *)&bp;
5285 5300          char *buf;
5286 5301          int err;
5287 5302  
5288 5303          bzero(&bp, sizeof (bp));
5289 5304          err = sscanf(thing, "%llx:%llx:%llx:%llx:%llx:%llx:%llx:%llx:"
5290 5305              "%llx:%llx:%llx:%llx:%llx:%llx:%llx:%llx",
5291 5306              words + 0, words + 1, words + 2, words + 3,
5292 5307              words + 4, words + 5, words + 6, words + 7,
5293 5308              words + 8, words + 9, words + 10, words + 11,
5294 5309              words + 12, words + 13, words + 14, words + 15);
5295 5310          if (err != 16) {
5296 5311                  (void) fprintf(stderr, "invalid input format\n");
5297 5312                  exit(1);
5298 5313          }
5299 5314          ASSERT3U(BPE_GET_LSIZE(&bp), <=, SPA_MAXBLOCKSIZE);
5300 5315          buf = malloc(SPA_MAXBLOCKSIZE);
5301 5316          if (buf == NULL) {
5302 5317                  (void) fprintf(stderr, "out of memory\n");
5303 5318                  exit(1);
5304 5319          }
5305 5320          err = decode_embedded_bp(&bp, buf, BPE_GET_LSIZE(&bp));
5306 5321          if (err != 0) {
5307 5322                  (void) fprintf(stderr, "decode failed: %u\n", err);
5308 5323                  exit(1);
5309 5324          }
5310 5325          zdb_dump_block_raw(buf, BPE_GET_LSIZE(&bp), 0);
5311 5326          free(buf);
5312 5327  }
5313 5328  
5314 5329  int
5315 5330  main(int argc, char **argv)
5316 5331  {
5317 5332          int c;
5318 5333          struct rlimit rl = { 1024, 1024 };
5319 5334          spa_t *spa = NULL;
5320 5335          objset_t *os = NULL;
5321 5336          int dump_all = 1;
5322 5337          int verbose = 0;
5323 5338          int error = 0;
5324 5339          char **searchdirs = NULL;
5325 5340          int nsearch = 0;
5326 5341          char *target, *target_pool;
5327 5342          nvlist_t *policy = NULL;
5328 5343          uint64_t max_txg = UINT64_MAX;
5329 5344          int flags = ZFS_IMPORT_MISSING_LOG;
5330 5345          int rewind = ZPOOL_NEVER_REWIND;
5331 5346          char *spa_config_path_env;
5332 5347          boolean_t target_is_spa = B_TRUE;
5333 5348          nvlist_t *cfg = NULL;
5334 5349  
5335 5350          (void) setrlimit(RLIMIT_NOFILE, &rl);
5336 5351          (void) enable_extended_FILE_stdio(-1, -1);
5337 5352  
5338 5353          dprintf_setup(&argc, argv);
5339 5354  
5340 5355          /*
5341 5356           * If there is an environment variable SPA_CONFIG_PATH it overrides
5342 5357           * default spa_config_path setting. If -U flag is specified it will
5343 5358           * override this environment variable settings once again.
5344 5359           */
5345 5360          spa_config_path_env = getenv("SPA_CONFIG_PATH");
5346 5361          if (spa_config_path_env != NULL)
5347 5362                  spa_config_path = spa_config_path_env;
5348 5363  
5349 5364          while ((c = getopt(argc, argv,
5350 5365              "AbcCdDeEFGhiI:klLmMo:Op:PqRsSt:uU:vVx:X")) != -1) {
5351 5366                  switch (c) {
5352 5367                  case 'b':
5353 5368                  case 'c':
5354 5369                  case 'C':
5355 5370                  case 'd':
5356 5371                  case 'D':
5357 5372                  case 'E':
5358 5373                  case 'G':
5359 5374                  case 'h':
5360 5375                  case 'i':
5361 5376                  case 'l':
5362 5377                  case 'm':
5363 5378                  case 'M':
5364 5379                  case 'O':
5365 5380                  case 'R':
5366 5381                  case 's':
5367 5382                  case 'S':
5368 5383                  case 'u':
5369 5384                          dump_opt[c]++;
5370 5385                          dump_all = 0;
5371 5386                          break;
5372 5387                  case 'A':
5373 5388                  case 'e':
5374 5389                  case 'F':
5375 5390                  case 'k':
5376 5391                  case 'L':
5377 5392                  case 'P':
5378 5393                  case 'q':
5379 5394                  case 'X':
5380 5395                          dump_opt[c]++;
5381 5396                          break;
5382 5397                  /* NB: Sort single match options below. */
5383 5398                  case 'I':
5384 5399                          max_inflight = strtoull(optarg, NULL, 0);
5385 5400                          if (max_inflight == 0) {
5386 5401                                  (void) fprintf(stderr, "maximum number "
5387 5402                                      "of inflight I/Os must be greater "
5388 5403                                      "than 0\n");
5389 5404                                  usage();
5390 5405                          }
5391 5406                          break;
5392 5407                  case 'o':
5393 5408                          error = set_global_var(optarg);
5394 5409                          if (error != 0)
5395 5410                                  usage();
5396 5411                          break;
5397 5412                  case 'p':
5398 5413                          if (searchdirs == NULL) {
5399 5414                                  searchdirs = umem_alloc(sizeof (char *),
5400 5415                                      UMEM_NOFAIL);
5401 5416                          } else {
5402 5417                                  char **tmp = umem_alloc((nsearch + 1) *
5403 5418                                      sizeof (char *), UMEM_NOFAIL);
5404 5419                                  bcopy(searchdirs, tmp, nsearch *
5405 5420                                      sizeof (char *));
5406 5421                                  umem_free(searchdirs,
5407 5422                                      nsearch * sizeof (char *));
5408 5423                                  searchdirs = tmp;
5409 5424                          }
5410 5425                          searchdirs[nsearch++] = optarg;
5411 5426                          break;
5412 5427                  case 't':
5413 5428                          max_txg = strtoull(optarg, NULL, 0);
5414 5429                          if (max_txg < TXG_INITIAL) {
5415 5430                                  (void) fprintf(stderr, "incorrect txg "
5416 5431                                      "specified: %s\n", optarg);
5417 5432                                  usage();
5418 5433                          }
5419 5434                          break;
5420 5435                  case 'U':
5421 5436                          spa_config_path = optarg;
5422 5437                          if (spa_config_path[0] != '/') {
5423 5438                                  (void) fprintf(stderr,
5424 5439                                      "cachefile must be an absolute path "
5425 5440                                      "(i.e. start with a slash)\n");
5426 5441                                  usage();
5427 5442                          }
5428 5443                          break;
5429 5444                  case 'v':
5430 5445                          verbose++;
5431 5446                          break;
5432 5447                  case 'V':
5433 5448                          flags = ZFS_IMPORT_VERBATIM;
5434 5449                          break;
5435 5450                  case 'x':
5436 5451                          vn_dumpdir = optarg;
5437 5452                          break;
5438 5453                  default:
5439 5454                          usage();
5440 5455                          break;
5441 5456                  }
5442 5457          }
5443 5458  
5444 5459          if (!dump_opt['e'] && searchdirs != NULL) {
5445 5460                  (void) fprintf(stderr, "-p option requires use of -e\n");
5446 5461                  usage();
5447 5462          }
5448 5463  
5449 5464          /*
5450 5465           * ZDB does not typically re-read blocks; therefore limit the ARC
5451 5466           * to 256 MB, which can be used entirely for metadata.
5452 5467           */
5453 5468          zfs_arc_max = zfs_arc_meta_limit = 256 * 1024 * 1024;
5454 5469  
5455 5470          /*
5456 5471           * "zdb -c" uses checksum-verifying scrub i/os which are async reads.
5457 5472           * "zdb -b" uses traversal prefetch which uses async reads.
5458 5473           * For good performance, let several of them be active at once.
5459 5474           */
5460 5475          zfs_vdev_async_read_max_active = 10;
5461 5476  
5462 5477          /*
5463 5478           * Disable reference tracking for better performance.
5464 5479           */
5465 5480          reference_tracking_enable = B_FALSE;
5466 5481  
5467 5482          /*
5468 5483           * Do not fail spa_load when spa_load_verify fails. This is needed
5469 5484           * to load non-idle pools.
5470 5485           */
5471 5486          spa_load_verify_dryrun = B_TRUE;
5472 5487  
5473 5488          kernel_init(FREAD);
5474 5489          g_zfs = libzfs_init();
5475 5490          ASSERT(g_zfs != NULL);
5476 5491  
5477 5492          if (dump_all)
5478 5493                  verbose = MAX(verbose, 1);
5479 5494  
5480 5495          for (c = 0; c < 256; c++) {
5481 5496                  if (dump_all && strchr("AeEFklLOPRSX", c) == NULL)
5482 5497                          dump_opt[c] = 1;
5483 5498                  if (dump_opt[c])
5484 5499                          dump_opt[c] += verbose;
5485 5500          }
5486 5501  
5487 5502          aok = (dump_opt['A'] == 1) || (dump_opt['A'] > 2);
5488 5503          zfs_recover = (dump_opt['A'] > 1);
5489 5504  
5490 5505          argc -= optind;
5491 5506          argv += optind;
5492 5507  
5493 5508          if (argc < 2 && dump_opt['R'])
5494 5509                  usage();
5495 5510  
5496 5511          if (dump_opt['E']) {
5497 5512                  if (argc != 1)
5498 5513                          usage();
5499 5514                  zdb_embedded_block(argv[0]);
5500 5515                  return (0);
5501 5516          }
5502 5517  
5503 5518          if (argc < 1) {
5504 5519                  if (!dump_opt['e'] && dump_opt['C']) {
5505 5520                          dump_cachefile(spa_config_path);
5506 5521                          return (0);
5507 5522                  }
5508 5523                  usage();
5509 5524          }
5510 5525  
5511 5526          if (dump_opt['l'])
5512 5527                  return (dump_label(argv[0]));
5513 5528  
5514 5529          if (dump_opt['O']) {
5515 5530                  if (argc != 2)
5516 5531                          usage();
5517 5532                  dump_opt['v'] = verbose + 3;
5518 5533                  return (dump_path(argv[0], argv[1]));
5519 5534          }
5520 5535  
5521 5536          if (dump_opt['X'] || dump_opt['F'])
5522 5537                  rewind = ZPOOL_DO_REWIND |
5523 5538                      (dump_opt['X'] ? ZPOOL_EXTREME_REWIND : 0);
5524 5539  
5525 5540          if (nvlist_alloc(&policy, NV_UNIQUE_NAME_TYPE, 0) != 0 ||
5526 5541              nvlist_add_uint64(policy, ZPOOL_LOAD_REQUEST_TXG, max_txg) != 0 ||
5527 5542              nvlist_add_uint32(policy, ZPOOL_LOAD_REWIND_POLICY, rewind) != 0)
5528 5543                  fatal("internal error: %s", strerror(ENOMEM));
5529 5544  
5530 5545          error = 0;
5531 5546          target = argv[0];
5532 5547  
5533 5548          if (strpbrk(target, "/@") != NULL) {
5534 5549                  size_t targetlen;
5535 5550  
5536 5551                  target_pool = strdup(target);
5537 5552                  *strpbrk(target_pool, "/@") = '\0';
5538 5553  
5539 5554                  target_is_spa = B_FALSE;
5540 5555                  targetlen = strlen(target);
5541 5556                  if (targetlen && target[targetlen - 1] == '/')
5542 5557                          target[targetlen - 1] = '\0';
5543 5558          } else {
5544 5559                  target_pool = target;
5545 5560          }
5546 5561  
5547 5562          if (dump_opt['e']) {
5548 5563                  importargs_t args = { 0 };
5549 5564  
5550 5565                  args.paths = nsearch;
5551 5566                  args.path = searchdirs;
5552 5567                  args.can_be_active = B_TRUE;
5553 5568  
5554 5569                  error = zpool_tryimport(g_zfs, target_pool, &cfg, &args);
5555 5570  
5556 5571                  if (error == 0) {
5557 5572  
5558 5573                          if (nvlist_add_nvlist(cfg,
5559 5574                              ZPOOL_LOAD_POLICY, policy) != 0) {
5560 5575                                  fatal("can't open '%s': %s",
5561 5576                                      target, strerror(ENOMEM));
5562 5577                          }
5563 5578  
5564 5579                          if (dump_opt['C'] > 1) {
5565 5580                                  (void) printf("\nConfiguration for import:\n");
5566 5581                                  dump_nvlist(cfg, 8);
5567 5582                          }
5568 5583  
5569 5584                          /*
5570 5585                           * Disable the activity check to allow examination of
5571 5586                           * active pools.
5572 5587                           */
5573 5588                          error = spa_import(target_pool, cfg, NULL,
5574 5589                              flags | ZFS_IMPORT_SKIP_MMP);
5575 5590                  }
5576 5591          }
5577 5592  
5578 5593          char *checkpoint_pool = NULL;
5579 5594          char *checkpoint_target = NULL;
5580 5595          if (dump_opt['k']) {
5581 5596                  checkpoint_pool = import_checkpointed_state(target, cfg,
5582 5597                      &checkpoint_target);
5583 5598  
5584 5599                  if (checkpoint_target != NULL)
5585 5600                          target = checkpoint_target;
5586 5601  
5587 5602          }
5588 5603  
5589 5604          if (error == 0) {
5590 5605                  if (dump_opt['k'] && (target_is_spa || dump_opt['R'])) {
5591 5606                          ASSERT(checkpoint_pool != NULL);
5592 5607                          ASSERT(checkpoint_target == NULL);
5593 5608  
5594 5609                          error = spa_open(checkpoint_pool, &spa, FTAG);
5595 5610                          if (error != 0) {
5596 5611                                  fatal("Tried to open pool \"%s\" but "
5597 5612                                      "spa_open() failed with error %d\n",
5598 5613                                      checkpoint_pool, error);
5599 5614                          }
5600 5615  
5601 5616                  } else if (target_is_spa || dump_opt['R']) {
5602 5617                          zdb_set_skip_mmp(target);
5603 5618                          error = spa_open_rewind(target, &spa, FTAG, policy,
5604 5619                              NULL);
5605 5620                          if (error) {
5606 5621                                  /*
5607 5622                                   * If we're missing the log device then
5608 5623                                   * try opening the pool after clearing the
5609 5624                                   * log state.
5610 5625                                   */
5611 5626                                  mutex_enter(&spa_namespace_lock);
5612 5627                                  if ((spa = spa_lookup(target)) != NULL &&
5613 5628                                      spa->spa_log_state == SPA_LOG_MISSING) {
5614 5629                                          spa->spa_log_state = SPA_LOG_CLEAR;
5615 5630                                          error = 0;
5616 5631                                  }
5617 5632                                  mutex_exit(&spa_namespace_lock);
5618 5633  
5619 5634                                  if (!error) {
5620 5635                                          error = spa_open_rewind(target, &spa,
5621 5636                                              FTAG, policy, NULL);
5622 5637                                  }
5623 5638                          }
5624 5639                  } else {
5625 5640                          zdb_set_skip_mmp(target);
5626 5641                          error = open_objset(target, DMU_OST_ANY, FTAG, &os);
5627 5642                  }
5628 5643          }
5629 5644          nvlist_free(policy);
5630 5645  
5631 5646          if (error)
5632 5647                  fatal("can't open '%s': %s", target, strerror(error));
5633 5648  
5634 5649          argv++;
5635 5650          argc--;
5636 5651          if (!dump_opt['R']) {
5637 5652                  if (argc > 0) {
5638 5653                          zopt_objects = argc;
5639 5654                          zopt_object = calloc(zopt_objects, sizeof (uint64_t));
5640 5655                          for (unsigned i = 0; i < zopt_objects; i++) {
5641 5656                                  errno = 0;
5642 5657                                  zopt_object[i] = strtoull(argv[i], NULL, 0);
5643 5658                                  if (zopt_object[i] == 0 && errno != 0)
5644 5659                                          fatal("bad number %s: %s",
5645 5660                                              argv[i], strerror(errno));
5646 5661                          }
5647 5662                  }
5648 5663                  if (os != NULL) {
5649 5664                          dump_dir(os);
5650 5665                  } else if (zopt_objects > 0 && !dump_opt['m']) {
5651 5666                          dump_dir(spa->spa_meta_objset);
5652 5667                  } else {
5653 5668                          dump_zpool(spa);
5654 5669                  }
5655 5670          } else {
5656 5671                  flagbits['b'] = ZDB_FLAG_PRINT_BLKPTR;
5657 5672                  flagbits['c'] = ZDB_FLAG_CHECKSUM;
5658 5673                  flagbits['d'] = ZDB_FLAG_DECOMPRESS;
5659 5674                  flagbits['e'] = ZDB_FLAG_BSWAP;
5660 5675                  flagbits['g'] = ZDB_FLAG_GBH;
5661 5676                  flagbits['i'] = ZDB_FLAG_INDIRECT;
5662 5677                  flagbits['p'] = ZDB_FLAG_PHYS;
5663 5678                  flagbits['r'] = ZDB_FLAG_RAW;
5664 5679  
5665 5680                  for (int i = 0; i < argc; i++)
5666 5681                          zdb_read_block(argv[i], spa);
5667 5682          }
5668 5683  
5669 5684          if (dump_opt['k']) {
5670 5685                  free(checkpoint_pool);
5671 5686                  if (!target_is_spa)
5672 5687                          free(checkpoint_target);
5673 5688          }
5674 5689  
5675 5690          if (os != NULL)
5676 5691                  close_objset(os, FTAG);
5677 5692          else
5678 5693                  spa_close(spa, FTAG);
5679 5694  
5680 5695          fuid_table_destroy();
5681 5696  
5682 5697          dump_debug_buffer();
5683 5698  
5684 5699          libzfs_fini(g_zfs);
5685 5700          kernel_fini();
5686 5701  
5687 5702          return (error);
5688 5703  }
  
    | 
      ↓ open down ↓ | 
    1081 lines elided | 
    
      ↑ open up ↑ | 
  
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX