Print this page
10592 misc. metaslab and vdev related ZoL bug fixes
Portions contributed by: Jerry Jelinek <jerry.jelinek@joyent.com>
Reviewed by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed by: Giuseppe Di Natale <guss80@gmail.com>
Reviewed by: George Melikov <mail@gmelikov.ru>
Reviewed by: Paul Dagnelie <pcd@delphix.com>
Reviewed by: Matt Ahrens <mahrens@delphix.com>
Reviewed by: Pavel Zakharov <pavel.zakharov@delphix.com>
Reviewed by: Tony Hutter <hutter2@llnl.gov>
Reviewed by: Kody Kantor <kody.kantor@joyent.com>
Approved by: Dan McDonald <danmcd@joyent.com>
| Split |
Close |
| Expand all |
| Collapse all |
--- old/usr/src/cmd/zdb/zdb.c
+++ new/usr/src/cmd/zdb/zdb.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21
22 22 /*
23 23 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
24 24 * Copyright (c) 2011, 2018 by Delphix. All rights reserved.
25 25 * Copyright (c) 2014 Integros [integros.com]
26 26 * Copyright 2017 Nexenta Systems, Inc.
27 27 * Copyright (c) 2017, 2018 Lawrence Livermore National Security, LLC.
28 28 * Copyright 2017 RackTop Systems.
29 29 */
30 30
31 31 #include <stdio.h>
32 32 #include <unistd.h>
33 33 #include <stdio_ext.h>
34 34 #include <stdlib.h>
35 35 #include <ctype.h>
36 36 #include <sys/zfs_context.h>
37 37 #include <sys/spa.h>
38 38 #include <sys/spa_impl.h>
39 39 #include <sys/dmu.h>
40 40 #include <sys/zap.h>
41 41 #include <sys/fs/zfs.h>
42 42 #include <sys/zfs_znode.h>
43 43 #include <sys/zfs_sa.h>
44 44 #include <sys/sa.h>
45 45 #include <sys/sa_impl.h>
46 46 #include <sys/vdev.h>
47 47 #include <sys/vdev_impl.h>
48 48 #include <sys/metaslab_impl.h>
49 49 #include <sys/dmu_objset.h>
50 50 #include <sys/dsl_dir.h>
51 51 #include <sys/dsl_dataset.h>
52 52 #include <sys/dsl_pool.h>
53 53 #include <sys/dbuf.h>
54 54 #include <sys/zil.h>
55 55 #include <sys/zil_impl.h>
56 56 #include <sys/stat.h>
57 57 #include <sys/resource.h>
58 58 #include <sys/dmu_traverse.h>
59 59 #include <sys/zio_checksum.h>
60 60 #include <sys/zio_compress.h>
61 61 #include <sys/zfs_fuid.h>
62 62 #include <sys/arc.h>
63 63 #include <sys/ddt.h>
64 64 #include <sys/zfeature.h>
65 65 #include <sys/abd.h>
66 66 #include <sys/blkptr.h>
67 67 #include <sys/dsl_scan.h>
68 68 #include <zfs_comutil.h>
69 69 #include <libcmdutils.h>
70 70 #undef verify
71 71 #include <libzfs.h>
72 72
73 73 #include "zdb.h"
74 74
75 75 #define ZDB_COMPRESS_NAME(idx) ((idx) < ZIO_COMPRESS_FUNCTIONS ? \
76 76 zio_compress_table[(idx)].ci_name : "UNKNOWN")
77 77 #define ZDB_CHECKSUM_NAME(idx) ((idx) < ZIO_CHECKSUM_FUNCTIONS ? \
78 78 zio_checksum_table[(idx)].ci_name : "UNKNOWN")
79 79 #define ZDB_OT_NAME(idx) ((idx) < DMU_OT_NUMTYPES ? \
80 80 dmu_ot[(idx)].ot_name : DMU_OT_IS_VALID(idx) ? \
81 81 dmu_ot_byteswap[DMU_OT_BYTESWAP(idx)].ob_name : "UNKNOWN")
82 82 #define ZDB_OT_TYPE(idx) ((idx) < DMU_OT_NUMTYPES ? (idx) : \
83 83 (idx) == DMU_OTN_ZAP_DATA || (idx) == DMU_OTN_ZAP_METADATA ? \
84 84 DMU_OT_ZAP_OTHER : \
85 85 (idx) == DMU_OTN_UINT64_DATA || (idx) == DMU_OTN_UINT64_METADATA ? \
86 86 DMU_OT_UINT64_OTHER : DMU_OT_NUMTYPES)
87 87
88 88 #ifndef lint
89 89 extern int reference_tracking_enable;
90 90 extern boolean_t zfs_recover;
91 91 extern uint64_t zfs_arc_max, zfs_arc_meta_limit;
92 92 extern int zfs_vdev_async_read_max_active;
93 93 extern int aok;
94 94 extern boolean_t spa_load_verify_dryrun;
95 95 #else
96 96 int reference_tracking_enable;
97 97 boolean_t zfs_recover;
98 98 uint64_t zfs_arc_max, zfs_arc_meta_limit;
99 99 int zfs_vdev_async_read_max_active;
100 100 int aok;
101 101 boolean_t spa_load_verify_dryrun;
102 102 #endif
103 103
104 104 static const char cmdname[] = "zdb";
105 105 uint8_t dump_opt[256];
106 106
107 107 typedef void object_viewer_t(objset_t *, uint64_t, void *data, size_t size);
108 108
109 109 uint64_t *zopt_object = NULL;
110 110 static unsigned zopt_objects = 0;
111 111 libzfs_handle_t *g_zfs;
112 112 uint64_t max_inflight = 1000;
113 113 static int leaked_objects = 0;
114 114
115 115 static void snprintf_blkptr_compact(char *, size_t, const blkptr_t *);
116 116 static void mos_obj_refd(uint64_t);
117 117
118 118 /*
119 119 * These libumem hooks provide a reasonable set of defaults for the allocator's
120 120 * debugging facilities.
121 121 */
122 122 const char *
123 123 _umem_debug_init()
124 124 {
125 125 return ("default,verbose"); /* $UMEM_DEBUG setting */
126 126 }
127 127
128 128 const char *
129 129 _umem_logging_init(void)
130 130 {
131 131 return ("fail,contents"); /* $UMEM_LOGGING setting */
132 132 }
133 133
134 134 static void
135 135 usage(void)
136 136 {
137 137 (void) fprintf(stderr,
138 138 "Usage:\t%s [-AbcdDFGhikLMPsvX] [-e [-V] [-p <path> ...]] "
139 139 "[-I <inflight I/Os>]\n"
140 140 "\t\t[-o <var>=<value>]... [-t <txg>] [-U <cache>] [-x <dumpdir>]\n"
141 141 "\t\t[<poolname> [<object> ...]]\n"
142 142 "\t%s [-AdiPv] [-e [-V] [-p <path> ...]] [-U <cache>] <dataset> "
143 143 "[<object> ...]\n"
144 144 "\t%s -C [-A] [-U <cache>]\n"
145 145 "\t%s -l [-Aqu] <device>\n"
146 146 "\t%s -m [-AFLPX] [-e [-V] [-p <path> ...]] [-t <txg>] "
147 147 "[-U <cache>]\n\t\t<poolname> [<vdev> [<metaslab> ...]]\n"
148 148 "\t%s -O <dataset> <path>\n"
149 149 "\t%s -R [-A] [-e [-V] [-p <path> ...]] [-U <cache>]\n"
150 150 "\t\t<poolname> <vdev>:<offset>:<size>[:<flags>]\n"
151 151 "\t%s -E [-A] word0:word1:...:word15\n"
152 152 "\t%s -S [-AP] [-e [-V] [-p <path> ...]] [-U <cache>] "
153 153 "<poolname>\n\n",
154 154 cmdname, cmdname, cmdname, cmdname, cmdname, cmdname, cmdname,
155 155 cmdname, cmdname);
156 156
157 157 (void) fprintf(stderr, " Dataset name must include at least one "
158 158 "separator character '/' or '@'\n");
159 159 (void) fprintf(stderr, " If dataset name is specified, only that "
160 160 "dataset is dumped\n");
161 161 (void) fprintf(stderr, " If object numbers are specified, only "
162 162 "those objects are dumped\n\n");
163 163 (void) fprintf(stderr, " Options to control amount of output:\n");
164 164 (void) fprintf(stderr, " -b block statistics\n");
165 165 (void) fprintf(stderr, " -c checksum all metadata (twice for "
166 166 "all data) blocks\n");
167 167 (void) fprintf(stderr, " -C config (or cachefile if alone)\n");
168 168 (void) fprintf(stderr, " -d dataset(s)\n");
169 169 (void) fprintf(stderr, " -D dedup statistics\n");
170 170 (void) fprintf(stderr, " -E decode and display block from an "
171 171 "embedded block pointer\n");
172 172 (void) fprintf(stderr, " -h pool history\n");
173 173 (void) fprintf(stderr, " -i intent logs\n");
174 174 (void) fprintf(stderr, " -l read label contents\n");
175 175 (void) fprintf(stderr, " -k examine the checkpointed state "
176 176 "of the pool\n");
177 177 (void) fprintf(stderr, " -L disable leak tracking (do not "
178 178 "load spacemaps)\n");
179 179 (void) fprintf(stderr, " -m metaslabs\n");
180 180 (void) fprintf(stderr, " -M metaslab groups\n");
181 181 (void) fprintf(stderr, " -O perform object lookups by path\n");
182 182 (void) fprintf(stderr, " -R read and display block from a "
183 183 "device\n");
184 184 (void) fprintf(stderr, " -s report stats on zdb's I/O\n");
185 185 (void) fprintf(stderr, " -S simulate dedup to measure effect\n");
186 186 (void) fprintf(stderr, " -v verbose (applies to all "
187 187 "others)\n\n");
188 188 (void) fprintf(stderr, " Below options are intended for use "
189 189 "with other options:\n");
190 190 (void) fprintf(stderr, " -A ignore assertions (-A), enable "
191 191 "panic recovery (-AA) or both (-AAA)\n");
192 192 (void) fprintf(stderr, " -e pool is exported/destroyed/"
193 193 "has altroot/not in a cachefile\n");
194 194 (void) fprintf(stderr, " -F attempt automatic rewind within "
195 195 "safe range of transaction groups\n");
196 196 (void) fprintf(stderr, " -G dump zfs_dbgmsg buffer before "
197 197 "exiting\n");
198 198 (void) fprintf(stderr, " -I <number of inflight I/Os> -- "
199 199 "specify the maximum number of "
200 200 "checksumming I/Os [default is 200]\n");
201 201 (void) fprintf(stderr, " -o <variable>=<value> set global "
202 202 "variable to an unsigned 32-bit integer value\n");
203 203 (void) fprintf(stderr, " -p <path> -- use one or more with "
204 204 "-e to specify path to vdev dir\n");
205 205 (void) fprintf(stderr, " -P print numbers in parseable form\n");
206 206 (void) fprintf(stderr, " -q don't print label contents\n");
207 207 (void) fprintf(stderr, " -t <txg> -- highest txg to use when "
208 208 "searching for uberblocks\n");
209 209 (void) fprintf(stderr, " -u uberblock\n");
210 210 (void) fprintf(stderr, " -U <cachefile_path> -- use alternate "
211 211 "cachefile\n");
212 212 (void) fprintf(stderr, " -V do verbatim import\n");
213 213 (void) fprintf(stderr, " -x <dumpdir> -- "
214 214 "dump all read blocks into specified directory\n");
215 215 (void) fprintf(stderr, " -X attempt extreme rewind (does not "
216 216 "work with dataset)\n\n");
217 217 (void) fprintf(stderr, "Specify an option more than once (e.g. -bb) "
218 218 "to make only that option verbose\n");
219 219 (void) fprintf(stderr, "Default is to dump everything non-verbosely\n");
220 220 exit(1);
221 221 }
222 222
223 223 static void
224 224 dump_debug_buffer()
225 225 {
226 226 if (dump_opt['G']) {
227 227 (void) printf("\n");
228 228 zfs_dbgmsg_print("zdb");
229 229 }
230 230 }
231 231
232 232 /*
233 233 * Called for usage errors that are discovered after a call to spa_open(),
234 234 * dmu_bonus_hold(), or pool_match(). abort() is called for other errors.
235 235 */
236 236
237 237 static void
238 238 fatal(const char *fmt, ...)
239 239 {
240 240 va_list ap;
241 241
242 242 va_start(ap, fmt);
243 243 (void) fprintf(stderr, "%s: ", cmdname);
244 244 (void) vfprintf(stderr, fmt, ap);
245 245 va_end(ap);
246 246 (void) fprintf(stderr, "\n");
247 247
248 248 dump_debug_buffer();
249 249
250 250 exit(1);
251 251 }
252 252
253 253 /* ARGSUSED */
254 254 static void
255 255 dump_packed_nvlist(objset_t *os, uint64_t object, void *data, size_t size)
256 256 {
257 257 nvlist_t *nv;
258 258 size_t nvsize = *(uint64_t *)data;
259 259 char *packed = umem_alloc(nvsize, UMEM_NOFAIL);
260 260
261 261 VERIFY(0 == dmu_read(os, object, 0, nvsize, packed, DMU_READ_PREFETCH));
262 262
263 263 VERIFY(nvlist_unpack(packed, nvsize, &nv, 0) == 0);
264 264
265 265 umem_free(packed, nvsize);
266 266
267 267 dump_nvlist(nv, 8);
268 268
269 269 nvlist_free(nv);
270 270 }
271 271
272 272 /* ARGSUSED */
273 273 static void
274 274 dump_history_offsets(objset_t *os, uint64_t object, void *data, size_t size)
275 275 {
276 276 spa_history_phys_t *shp = data;
277 277
278 278 if (shp == NULL)
279 279 return;
280 280
281 281 (void) printf("\t\tpool_create_len = %llu\n",
282 282 (u_longlong_t)shp->sh_pool_create_len);
283 283 (void) printf("\t\tphys_max_off = %llu\n",
284 284 (u_longlong_t)shp->sh_phys_max_off);
285 285 (void) printf("\t\tbof = %llu\n",
286 286 (u_longlong_t)shp->sh_bof);
287 287 (void) printf("\t\teof = %llu\n",
288 288 (u_longlong_t)shp->sh_eof);
289 289 (void) printf("\t\trecords_lost = %llu\n",
290 290 (u_longlong_t)shp->sh_records_lost);
291 291 }
292 292
293 293 static void
294 294 zdb_nicenum(uint64_t num, char *buf, size_t buflen)
295 295 {
296 296 if (dump_opt['P'])
297 297 (void) snprintf(buf, buflen, "%llu", (longlong_t)num);
298 298 else
299 299 nicenum(num, buf, sizeof (buf));
300 300 }
301 301
302 302 static const char histo_stars[] = "****************************************";
303 303 static const uint64_t histo_width = sizeof (histo_stars) - 1;
304 304
305 305 static void
306 306 dump_histogram(const uint64_t *histo, int size, int offset)
307 307 {
308 308 int i;
309 309 int minidx = size - 1;
310 310 int maxidx = 0;
311 311 uint64_t max = 0;
312 312
313 313 for (i = 0; i < size; i++) {
314 314 if (histo[i] > max)
315 315 max = histo[i];
316 316 if (histo[i] > 0 && i > maxidx)
317 317 maxidx = i;
318 318 if (histo[i] > 0 && i < minidx)
319 319 minidx = i;
320 320 }
321 321
322 322 if (max < histo_width)
323 323 max = histo_width;
324 324
325 325 for (i = minidx; i <= maxidx; i++) {
326 326 (void) printf("\t\t\t%3u: %6llu %s\n",
327 327 i + offset, (u_longlong_t)histo[i],
328 328 &histo_stars[(max - histo[i]) * histo_width / max]);
329 329 }
330 330 }
331 331
332 332 static void
333 333 dump_zap_stats(objset_t *os, uint64_t object)
334 334 {
335 335 int error;
336 336 zap_stats_t zs;
337 337
338 338 error = zap_get_stats(os, object, &zs);
339 339 if (error)
340 340 return;
341 341
342 342 if (zs.zs_ptrtbl_len == 0) {
343 343 ASSERT(zs.zs_num_blocks == 1);
344 344 (void) printf("\tmicrozap: %llu bytes, %llu entries\n",
345 345 (u_longlong_t)zs.zs_blocksize,
346 346 (u_longlong_t)zs.zs_num_entries);
347 347 return;
348 348 }
349 349
350 350 (void) printf("\tFat ZAP stats:\n");
351 351
352 352 (void) printf("\t\tPointer table:\n");
353 353 (void) printf("\t\t\t%llu elements\n",
354 354 (u_longlong_t)zs.zs_ptrtbl_len);
355 355 (void) printf("\t\t\tzt_blk: %llu\n",
356 356 (u_longlong_t)zs.zs_ptrtbl_zt_blk);
357 357 (void) printf("\t\t\tzt_numblks: %llu\n",
358 358 (u_longlong_t)zs.zs_ptrtbl_zt_numblks);
359 359 (void) printf("\t\t\tzt_shift: %llu\n",
360 360 (u_longlong_t)zs.zs_ptrtbl_zt_shift);
361 361 (void) printf("\t\t\tzt_blks_copied: %llu\n",
362 362 (u_longlong_t)zs.zs_ptrtbl_blks_copied);
363 363 (void) printf("\t\t\tzt_nextblk: %llu\n",
364 364 (u_longlong_t)zs.zs_ptrtbl_nextblk);
365 365
366 366 (void) printf("\t\tZAP entries: %llu\n",
367 367 (u_longlong_t)zs.zs_num_entries);
368 368 (void) printf("\t\tLeaf blocks: %llu\n",
369 369 (u_longlong_t)zs.zs_num_leafs);
370 370 (void) printf("\t\tTotal blocks: %llu\n",
371 371 (u_longlong_t)zs.zs_num_blocks);
372 372 (void) printf("\t\tzap_block_type: 0x%llx\n",
373 373 (u_longlong_t)zs.zs_block_type);
374 374 (void) printf("\t\tzap_magic: 0x%llx\n",
375 375 (u_longlong_t)zs.zs_magic);
376 376 (void) printf("\t\tzap_salt: 0x%llx\n",
377 377 (u_longlong_t)zs.zs_salt);
378 378
379 379 (void) printf("\t\tLeafs with 2^n pointers:\n");
380 380 dump_histogram(zs.zs_leafs_with_2n_pointers, ZAP_HISTOGRAM_SIZE, 0);
381 381
382 382 (void) printf("\t\tBlocks with n*5 entries:\n");
383 383 dump_histogram(zs.zs_blocks_with_n5_entries, ZAP_HISTOGRAM_SIZE, 0);
384 384
385 385 (void) printf("\t\tBlocks n/10 full:\n");
386 386 dump_histogram(zs.zs_blocks_n_tenths_full, ZAP_HISTOGRAM_SIZE, 0);
387 387
388 388 (void) printf("\t\tEntries with n chunks:\n");
389 389 dump_histogram(zs.zs_entries_using_n_chunks, ZAP_HISTOGRAM_SIZE, 0);
390 390
391 391 (void) printf("\t\tBuckets with n entries:\n");
392 392 dump_histogram(zs.zs_buckets_with_n_entries, ZAP_HISTOGRAM_SIZE, 0);
393 393 }
394 394
395 395 /*ARGSUSED*/
396 396 static void
397 397 dump_none(objset_t *os, uint64_t object, void *data, size_t size)
398 398 {
399 399 }
400 400
401 401 /*ARGSUSED*/
402 402 static void
403 403 dump_unknown(objset_t *os, uint64_t object, void *data, size_t size)
404 404 {
405 405 (void) printf("\tUNKNOWN OBJECT TYPE\n");
406 406 }
407 407
408 408 /*ARGSUSED*/
409 409 static void
410 410 dump_uint8(objset_t *os, uint64_t object, void *data, size_t size)
411 411 {
412 412 }
413 413
414 414 /*ARGSUSED*/
415 415 static void
416 416 dump_uint64(objset_t *os, uint64_t object, void *data, size_t size)
417 417 {
418 418 }
419 419
420 420 /*ARGSUSED*/
421 421 static void
422 422 dump_zap(objset_t *os, uint64_t object, void *data, size_t size)
423 423 {
424 424 zap_cursor_t zc;
425 425 zap_attribute_t attr;
426 426 void *prop;
427 427 unsigned i;
428 428
429 429 dump_zap_stats(os, object);
430 430 (void) printf("\n");
431 431
432 432 for (zap_cursor_init(&zc, os, object);
433 433 zap_cursor_retrieve(&zc, &attr) == 0;
434 434 zap_cursor_advance(&zc)) {
435 435 (void) printf("\t\t%s = ", attr.za_name);
436 436 if (attr.za_num_integers == 0) {
437 437 (void) printf("\n");
438 438 continue;
439 439 }
440 440 prop = umem_zalloc(attr.za_num_integers *
441 441 attr.za_integer_length, UMEM_NOFAIL);
442 442 (void) zap_lookup(os, object, attr.za_name,
443 443 attr.za_integer_length, attr.za_num_integers, prop);
444 444 if (attr.za_integer_length == 1) {
445 445 (void) printf("%s", (char *)prop);
446 446 } else {
447 447 for (i = 0; i < attr.za_num_integers; i++) {
448 448 switch (attr.za_integer_length) {
449 449 case 2:
450 450 (void) printf("%u ",
451 451 ((uint16_t *)prop)[i]);
452 452 break;
453 453 case 4:
454 454 (void) printf("%u ",
455 455 ((uint32_t *)prop)[i]);
456 456 break;
457 457 case 8:
458 458 (void) printf("%lld ",
459 459 (u_longlong_t)((int64_t *)prop)[i]);
460 460 break;
461 461 }
462 462 }
463 463 }
464 464 (void) printf("\n");
465 465 umem_free(prop, attr.za_num_integers * attr.za_integer_length);
466 466 }
467 467 zap_cursor_fini(&zc);
468 468 }
469 469
470 470 static void
471 471 dump_bpobj(objset_t *os, uint64_t object, void *data, size_t size)
472 472 {
473 473 bpobj_phys_t *bpop = data;
474 474 char bytes[32], comp[32], uncomp[32];
475 475
476 476 /* make sure the output won't get truncated */
477 477 CTASSERT(sizeof (bytes) >= NN_NUMBUF_SZ);
478 478 CTASSERT(sizeof (comp) >= NN_NUMBUF_SZ);
479 479 CTASSERT(sizeof (uncomp) >= NN_NUMBUF_SZ);
480 480
481 481 if (bpop == NULL)
482 482 return;
483 483
484 484 zdb_nicenum(bpop->bpo_bytes, bytes, sizeof (bytes));
485 485 zdb_nicenum(bpop->bpo_comp, comp, sizeof (comp));
486 486 zdb_nicenum(bpop->bpo_uncomp, uncomp, sizeof (uncomp));
487 487
488 488 (void) printf("\t\tnum_blkptrs = %llu\n",
489 489 (u_longlong_t)bpop->bpo_num_blkptrs);
490 490 (void) printf("\t\tbytes = %s\n", bytes);
491 491 if (size >= BPOBJ_SIZE_V1) {
492 492 (void) printf("\t\tcomp = %s\n", comp);
493 493 (void) printf("\t\tuncomp = %s\n", uncomp);
494 494 }
495 495 if (size >= sizeof (*bpop)) {
496 496 (void) printf("\t\tsubobjs = %llu\n",
497 497 (u_longlong_t)bpop->bpo_subobjs);
498 498 (void) printf("\t\tnum_subobjs = %llu\n",
499 499 (u_longlong_t)bpop->bpo_num_subobjs);
500 500 }
501 501
502 502 if (dump_opt['d'] < 5)
503 503 return;
504 504
505 505 for (uint64_t i = 0; i < bpop->bpo_num_blkptrs; i++) {
506 506 char blkbuf[BP_SPRINTF_LEN];
507 507 blkptr_t bp;
508 508
509 509 int err = dmu_read(os, object,
510 510 i * sizeof (bp), sizeof (bp), &bp, 0);
511 511 if (err != 0) {
512 512 (void) printf("got error %u from dmu_read\n", err);
513 513 break;
514 514 }
515 515 snprintf_blkptr_compact(blkbuf, sizeof (blkbuf), &bp);
516 516 (void) printf("\t%s\n", blkbuf);
517 517 }
518 518 }
519 519
520 520 /* ARGSUSED */
521 521 static void
522 522 dump_bpobj_subobjs(objset_t *os, uint64_t object, void *data, size_t size)
523 523 {
524 524 dmu_object_info_t doi;
525 525
526 526 VERIFY0(dmu_object_info(os, object, &doi));
527 527 uint64_t *subobjs = kmem_alloc(doi.doi_max_offset, KM_SLEEP);
528 528
529 529 int err = dmu_read(os, object, 0, doi.doi_max_offset, subobjs, 0);
530 530 if (err != 0) {
531 531 (void) printf("got error %u from dmu_read\n", err);
532 532 kmem_free(subobjs, doi.doi_max_offset);
533 533 return;
534 534 }
535 535
536 536 int64_t last_nonzero = -1;
537 537 for (uint64_t i = 0; i < doi.doi_max_offset / 8; i++) {
538 538 if (subobjs[i] != 0)
539 539 last_nonzero = i;
540 540 }
541 541
542 542 for (int64_t i = 0; i <= last_nonzero; i++) {
543 543 (void) printf("\t%llu\n", (longlong_t)subobjs[i]);
544 544 }
545 545 kmem_free(subobjs, doi.doi_max_offset);
546 546 }
547 547
548 548 /*ARGSUSED*/
549 549 static void
550 550 dump_ddt_zap(objset_t *os, uint64_t object, void *data, size_t size)
551 551 {
552 552 dump_zap_stats(os, object);
553 553 /* contents are printed elsewhere, properly decoded */
554 554 }
555 555
556 556 /*ARGSUSED*/
557 557 static void
558 558 dump_sa_attrs(objset_t *os, uint64_t object, void *data, size_t size)
559 559 {
560 560 zap_cursor_t zc;
561 561 zap_attribute_t attr;
562 562
563 563 dump_zap_stats(os, object);
564 564 (void) printf("\n");
565 565
566 566 for (zap_cursor_init(&zc, os, object);
567 567 zap_cursor_retrieve(&zc, &attr) == 0;
568 568 zap_cursor_advance(&zc)) {
569 569 (void) printf("\t\t%s = ", attr.za_name);
570 570 if (attr.za_num_integers == 0) {
571 571 (void) printf("\n");
572 572 continue;
573 573 }
574 574 (void) printf(" %llx : [%d:%d:%d]\n",
575 575 (u_longlong_t)attr.za_first_integer,
576 576 (int)ATTR_LENGTH(attr.za_first_integer),
577 577 (int)ATTR_BSWAP(attr.za_first_integer),
578 578 (int)ATTR_NUM(attr.za_first_integer));
579 579 }
580 580 zap_cursor_fini(&zc);
581 581 }
582 582
583 583 /*ARGSUSED*/
584 584 static void
585 585 dump_sa_layouts(objset_t *os, uint64_t object, void *data, size_t size)
586 586 {
587 587 zap_cursor_t zc;
588 588 zap_attribute_t attr;
589 589 uint16_t *layout_attrs;
590 590 unsigned i;
591 591
592 592 dump_zap_stats(os, object);
593 593 (void) printf("\n");
594 594
595 595 for (zap_cursor_init(&zc, os, object);
596 596 zap_cursor_retrieve(&zc, &attr) == 0;
597 597 zap_cursor_advance(&zc)) {
598 598 (void) printf("\t\t%s = [", attr.za_name);
599 599 if (attr.za_num_integers == 0) {
600 600 (void) printf("\n");
601 601 continue;
602 602 }
603 603
604 604 VERIFY(attr.za_integer_length == 2);
605 605 layout_attrs = umem_zalloc(attr.za_num_integers *
606 606 attr.za_integer_length, UMEM_NOFAIL);
607 607
608 608 VERIFY(zap_lookup(os, object, attr.za_name,
609 609 attr.za_integer_length,
610 610 attr.za_num_integers, layout_attrs) == 0);
611 611
612 612 for (i = 0; i != attr.za_num_integers; i++)
613 613 (void) printf(" %d ", (int)layout_attrs[i]);
614 614 (void) printf("]\n");
615 615 umem_free(layout_attrs,
616 616 attr.za_num_integers * attr.za_integer_length);
617 617 }
618 618 zap_cursor_fini(&zc);
619 619 }
620 620
621 621 /*ARGSUSED*/
622 622 static void
623 623 dump_zpldir(objset_t *os, uint64_t object, void *data, size_t size)
624 624 {
625 625 zap_cursor_t zc;
626 626 zap_attribute_t attr;
627 627 const char *typenames[] = {
628 628 /* 0 */ "not specified",
629 629 /* 1 */ "FIFO",
630 630 /* 2 */ "Character Device",
631 631 /* 3 */ "3 (invalid)",
632 632 /* 4 */ "Directory",
633 633 /* 5 */ "5 (invalid)",
634 634 /* 6 */ "Block Device",
635 635 /* 7 */ "7 (invalid)",
636 636 /* 8 */ "Regular File",
637 637 /* 9 */ "9 (invalid)",
638 638 /* 10 */ "Symbolic Link",
639 639 /* 11 */ "11 (invalid)",
640 640 /* 12 */ "Socket",
641 641 /* 13 */ "Door",
642 642 /* 14 */ "Event Port",
643 643 /* 15 */ "15 (invalid)",
644 644 };
645 645
646 646 dump_zap_stats(os, object);
647 647 (void) printf("\n");
648 648
649 649 for (zap_cursor_init(&zc, os, object);
650 650 zap_cursor_retrieve(&zc, &attr) == 0;
651 651 zap_cursor_advance(&zc)) {
652 652 (void) printf("\t\t%s = %lld (type: %s)\n",
653 653 attr.za_name, ZFS_DIRENT_OBJ(attr.za_first_integer),
654 654 typenames[ZFS_DIRENT_TYPE(attr.za_first_integer)]);
655 655 }
656 656 zap_cursor_fini(&zc);
657 657 }
658 658
659 659 static int
660 660 get_dtl_refcount(vdev_t *vd)
661 661 {
662 662 int refcount = 0;
663 663
664 664 if (vd->vdev_ops->vdev_op_leaf) {
665 665 space_map_t *sm = vd->vdev_dtl_sm;
666 666
667 667 if (sm != NULL &&
668 668 sm->sm_dbuf->db_size == sizeof (space_map_phys_t))
669 669 return (1);
670 670 return (0);
671 671 }
672 672
673 673 for (unsigned c = 0; c < vd->vdev_children; c++)
674 674 refcount += get_dtl_refcount(vd->vdev_child[c]);
675 675 return (refcount);
676 676 }
677 677
678 678 static int
679 679 get_metaslab_refcount(vdev_t *vd)
680 680 {
681 681 int refcount = 0;
682 682
683 683 if (vd->vdev_top == vd) {
684 684 for (uint64_t m = 0; m < vd->vdev_ms_count; m++) {
685 685 space_map_t *sm = vd->vdev_ms[m]->ms_sm;
686 686
687 687 if (sm != NULL &&
688 688 sm->sm_dbuf->db_size == sizeof (space_map_phys_t))
689 689 refcount++;
690 690 }
691 691 }
692 692 for (unsigned c = 0; c < vd->vdev_children; c++)
693 693 refcount += get_metaslab_refcount(vd->vdev_child[c]);
694 694
695 695 return (refcount);
696 696 }
697 697
698 698 static int
699 699 get_obsolete_refcount(vdev_t *vd)
700 700 {
701 701 int refcount = 0;
702 702
703 703 uint64_t obsolete_sm_obj = vdev_obsolete_sm_object(vd);
704 704 if (vd->vdev_top == vd && obsolete_sm_obj != 0) {
705 705 dmu_object_info_t doi;
706 706 VERIFY0(dmu_object_info(vd->vdev_spa->spa_meta_objset,
707 707 obsolete_sm_obj, &doi));
708 708 if (doi.doi_bonus_size == sizeof (space_map_phys_t)) {
709 709 refcount++;
710 710 }
711 711 } else {
712 712 ASSERT3P(vd->vdev_obsolete_sm, ==, NULL);
713 713 ASSERT3U(obsolete_sm_obj, ==, 0);
714 714 }
715 715 for (unsigned c = 0; c < vd->vdev_children; c++) {
716 716 refcount += get_obsolete_refcount(vd->vdev_child[c]);
717 717 }
718 718
719 719 return (refcount);
720 720 }
721 721
722 722 static int
723 723 get_prev_obsolete_spacemap_refcount(spa_t *spa)
724 724 {
725 725 uint64_t prev_obj =
726 726 spa->spa_condensing_indirect_phys.scip_prev_obsolete_sm_object;
727 727 if (prev_obj != 0) {
728 728 dmu_object_info_t doi;
729 729 VERIFY0(dmu_object_info(spa->spa_meta_objset, prev_obj, &doi));
730 730 if (doi.doi_bonus_size == sizeof (space_map_phys_t)) {
731 731 return (1);
732 732 }
733 733 }
734 734 return (0);
735 735 }
736 736
737 737 static int
738 738 get_checkpoint_refcount(vdev_t *vd)
739 739 {
740 740 int refcount = 0;
741 741
742 742 if (vd->vdev_top == vd && vd->vdev_top_zap != 0 &&
743 743 zap_contains(spa_meta_objset(vd->vdev_spa),
744 744 vd->vdev_top_zap, VDEV_TOP_ZAP_POOL_CHECKPOINT_SM) == 0)
745 745 refcount++;
746 746
747 747 for (uint64_t c = 0; c < vd->vdev_children; c++)
748 748 refcount += get_checkpoint_refcount(vd->vdev_child[c]);
749 749
750 750 return (refcount);
751 751 }
752 752
753 753 static int
754 754 verify_spacemap_refcounts(spa_t *spa)
755 755 {
756 756 uint64_t expected_refcount = 0;
757 757 uint64_t actual_refcount;
758 758
759 759 (void) feature_get_refcount(spa,
760 760 &spa_feature_table[SPA_FEATURE_SPACEMAP_HISTOGRAM],
761 761 &expected_refcount);
762 762 actual_refcount = get_dtl_refcount(spa->spa_root_vdev);
763 763 actual_refcount += get_metaslab_refcount(spa->spa_root_vdev);
764 764 actual_refcount += get_obsolete_refcount(spa->spa_root_vdev);
765 765 actual_refcount += get_prev_obsolete_spacemap_refcount(spa);
766 766 actual_refcount += get_checkpoint_refcount(spa->spa_root_vdev);
767 767
768 768 if (expected_refcount != actual_refcount) {
769 769 (void) printf("space map refcount mismatch: expected %lld != "
770 770 "actual %lld\n",
771 771 (longlong_t)expected_refcount,
772 772 (longlong_t)actual_refcount);
773 773 return (2);
774 774 }
775 775 return (0);
776 776 }
777 777
|
↓ open down ↓ |
777 lines elided |
↑ open up ↑ |
778 778 static void
779 779 dump_spacemap(objset_t *os, space_map_t *sm)
780 780 {
781 781 char *ddata[] = { "ALLOC", "FREE", "CONDENSE", "INVALID",
782 782 "INVALID", "INVALID", "INVALID", "INVALID" };
783 783
784 784 if (sm == NULL)
785 785 return;
786 786
787 787 (void) printf("space map object %llu:\n",
788 - (longlong_t)sm->sm_phys->smp_object);
789 - (void) printf(" smp_objsize = 0x%llx\n",
790 - (longlong_t)sm->sm_phys->smp_objsize);
788 + (longlong_t)sm->sm_object);
789 + (void) printf(" smp_length = 0x%llx\n",
790 + (longlong_t)sm->sm_phys->smp_length);
791 791 (void) printf(" smp_alloc = 0x%llx\n",
792 792 (longlong_t)sm->sm_phys->smp_alloc);
793 793
794 + if (dump_opt['d'] < 6 && dump_opt['m'] < 4)
795 + return;
796 +
794 797 /*
795 798 * Print out the freelist entries in both encoded and decoded form.
796 799 */
797 800 uint8_t mapshift = sm->sm_shift;
798 801 int64_t alloc = 0;
799 - uint64_t word;
802 + uint64_t word, entry_id = 0;
800 803 for (uint64_t offset = 0; offset < space_map_length(sm);
801 804 offset += sizeof (word)) {
802 805
803 806 VERIFY0(dmu_read(os, space_map_object(sm), offset,
804 807 sizeof (word), &word, DMU_READ_PREFETCH));
805 808
806 809 if (sm_entry_is_debug(word)) {
807 - (void) printf("\t [%6llu] %s: txg %llu, pass %llu\n",
808 - (u_longlong_t)(offset / sizeof (word)),
810 + (void) printf("\t [%6llu] %s: txg %llu pass %llu\n",
811 + (u_longlong_t)entry_id,
809 812 ddata[SM_DEBUG_ACTION_DECODE(word)],
810 813 (u_longlong_t)SM_DEBUG_TXG_DECODE(word),
811 814 (u_longlong_t)SM_DEBUG_SYNCPASS_DECODE(word));
815 + entry_id++;
812 816 continue;
813 817 }
814 818
815 819 uint8_t words;
816 820 char entry_type;
817 821 uint64_t entry_off, entry_run, entry_vdev = SM_NO_VDEVID;
818 822
819 823 if (sm_entry_is_single_word(word)) {
820 824 entry_type = (SM_TYPE_DECODE(word) == SM_ALLOC) ?
821 825 'A' : 'F';
822 826 entry_off = (SM_OFFSET_DECODE(word) << mapshift) +
823 827 sm->sm_start;
824 828 entry_run = SM_RUN_DECODE(word) << mapshift;
825 829 words = 1;
826 830 } else {
827 831 /* it is a two-word entry so we read another word */
828 832 ASSERT(sm_entry_is_double_word(word));
829 833
830 834 uint64_t extra_word;
831 835 offset += sizeof (extra_word);
832 836 VERIFY0(dmu_read(os, space_map_object(sm), offset,
833 837 sizeof (extra_word), &extra_word,
834 838 DMU_READ_PREFETCH));
835 839
836 840 ASSERT3U(offset, <=, space_map_length(sm));
837 841
838 842 entry_run = SM2_RUN_DECODE(word) << mapshift;
|
↓ open down ↓ |
17 lines elided |
↑ open up ↑ |
839 843 entry_vdev = SM2_VDEV_DECODE(word);
840 844 entry_type = (SM2_TYPE_DECODE(extra_word) == SM_ALLOC) ?
841 845 'A' : 'F';
842 846 entry_off = (SM2_OFFSET_DECODE(extra_word) <<
843 847 mapshift) + sm->sm_start;
844 848 words = 2;
845 849 }
846 850
847 851 (void) printf("\t [%6llu] %c range:"
848 852 " %010llx-%010llx size: %06llx vdev: %06llu words: %u\n",
849 - (u_longlong_t)(offset / sizeof (word)),
853 + (u_longlong_t)entry_id,
850 854 entry_type, (u_longlong_t)entry_off,
851 855 (u_longlong_t)(entry_off + entry_run),
852 856 (u_longlong_t)entry_run,
853 857 (u_longlong_t)entry_vdev, words);
854 858
855 859 if (entry_type == 'A')
856 860 alloc += entry_run;
857 861 else
858 862 alloc -= entry_run;
863 + entry_id++;
859 864 }
860 - if ((uint64_t)alloc != space_map_allocated(sm)) {
865 + if (alloc != space_map_allocated(sm)) {
861 866 (void) printf("space_map_object alloc (%lld) INCONSISTENT "
862 867 "with space map summary (%lld)\n",
863 868 (longlong_t)space_map_allocated(sm), (longlong_t)alloc);
864 869 }
865 870 }
866 871
867 872 static void
868 873 dump_metaslab_stats(metaslab_t *msp)
869 874 {
870 875 char maxbuf[32];
871 876 range_tree_t *rt = msp->ms_allocatable;
872 877 avl_tree_t *t = &msp->ms_allocatable_by_size;
873 878 int free_pct = range_tree_space(rt) * 100 / msp->ms_size;
874 879
875 880 /* max sure nicenum has enough space */
876 881 CTASSERT(sizeof (maxbuf) >= NN_NUMBUF_SZ);
877 882
878 883 zdb_nicenum(metaslab_block_maxsize(msp), maxbuf, sizeof (maxbuf));
879 884
880 885 (void) printf("\t %25s %10lu %7s %6s %4s %4d%%\n",
881 886 "segments", avl_numnodes(t), "maxsize", maxbuf,
882 887 "freepct", free_pct);
883 888 (void) printf("\tIn-memory histogram:\n");
884 889 dump_histogram(rt->rt_histogram, RANGE_TREE_HISTOGRAM_SIZE, 0);
885 890 }
886 891
887 892 static void
888 893 dump_metaslab(metaslab_t *msp)
889 894 {
890 895 vdev_t *vd = msp->ms_group->mg_vd;
891 896 spa_t *spa = vd->vdev_spa;
892 897 space_map_t *sm = msp->ms_sm;
893 898 char freebuf[32];
894 899
895 900 zdb_nicenum(msp->ms_size - space_map_allocated(sm), freebuf,
896 901 sizeof (freebuf));
897 902
898 903 (void) printf(
899 904 "\tmetaslab %6llu offset %12llx spacemap %6llu free %5s\n",
900 905 (u_longlong_t)msp->ms_id, (u_longlong_t)msp->ms_start,
901 906 (u_longlong_t)space_map_object(sm), freebuf);
902 907
903 908 if (dump_opt['m'] > 2 && !dump_opt['L']) {
904 909 mutex_enter(&msp->ms_lock);
905 910 VERIFY0(metaslab_load(msp));
906 911 range_tree_stat_verify(msp->ms_allocatable);
907 912 dump_metaslab_stats(msp);
908 913 metaslab_unload(msp);
909 914 mutex_exit(&msp->ms_lock);
910 915 }
911 916
912 917 if (dump_opt['m'] > 1 && sm != NULL &&
913 918 spa_feature_is_active(spa, SPA_FEATURE_SPACEMAP_HISTOGRAM)) {
|
↓ open down ↓ |
43 lines elided |
↑ open up ↑ |
914 919 /*
915 920 * The space map histogram represents free space in chunks
916 921 * of sm_shift (i.e. bucket 0 refers to 2^sm_shift).
917 922 */
918 923 (void) printf("\tOn-disk histogram:\t\tfragmentation %llu\n",
919 924 (u_longlong_t)msp->ms_fragmentation);
920 925 dump_histogram(sm->sm_phys->smp_histogram,
921 926 SPACE_MAP_HISTOGRAM_SIZE, sm->sm_shift);
922 927 }
923 928
924 - if (dump_opt['d'] > 5 || dump_opt['m'] > 3) {
925 - ASSERT(msp->ms_size == (1ULL << vd->vdev_ms_shift));
926 -
927 - dump_spacemap(spa->spa_meta_objset, msp->ms_sm);
928 - }
929 + ASSERT(msp->ms_size == (1ULL << vd->vdev_ms_shift));
930 + dump_spacemap(spa->spa_meta_objset, msp->ms_sm);
929 931 }
930 932
931 933 static void
932 934 print_vdev_metaslab_header(vdev_t *vd)
933 935 {
934 936 vdev_alloc_bias_t alloc_bias = vd->vdev_alloc_bias;
935 937 const char *bias_str;
936 938
937 939 bias_str = (alloc_bias == VDEV_BIAS_LOG || vd->vdev_islog) ?
938 940 VDEV_ALLOC_BIAS_LOG :
939 941 (alloc_bias == VDEV_BIAS_SPECIAL) ? VDEV_ALLOC_BIAS_SPECIAL :
940 942 (alloc_bias == VDEV_BIAS_DEDUP) ? VDEV_ALLOC_BIAS_DEDUP :
941 943 vd->vdev_islog ? "log" : "";
942 944
943 945 (void) printf("\tvdev %10llu %s\n"
944 946 "\t%-10s%5llu %-19s %-15s %-12s\n",
945 947 (u_longlong_t)vd->vdev_id, bias_str,
946 948 "metaslabs", (u_longlong_t)vd->vdev_ms_count,
947 949 "offset", "spacemap", "free");
948 950 (void) printf("\t%15s %19s %15s %12s\n",
949 951 "---------------", "-------------------",
950 952 "---------------", "------------");
951 953 }
952 954
953 955 static void
954 956 dump_metaslab_groups(spa_t *spa)
955 957 {
956 958 vdev_t *rvd = spa->spa_root_vdev;
957 959 metaslab_class_t *mc = spa_normal_class(spa);
958 960 uint64_t fragmentation;
959 961
960 962 metaslab_class_histogram_verify(mc);
961 963
962 964 for (unsigned c = 0; c < rvd->vdev_children; c++) {
963 965 vdev_t *tvd = rvd->vdev_child[c];
964 966 metaslab_group_t *mg = tvd->vdev_mg;
965 967
966 968 if (mg == NULL || mg->mg_class != mc)
967 969 continue;
968 970
969 971 metaslab_group_histogram_verify(mg);
970 972 mg->mg_fragmentation = metaslab_group_fragmentation(mg);
971 973
972 974 (void) printf("\tvdev %10llu\t\tmetaslabs%5llu\t\t"
973 975 "fragmentation",
974 976 (u_longlong_t)tvd->vdev_id,
975 977 (u_longlong_t)tvd->vdev_ms_count);
976 978 if (mg->mg_fragmentation == ZFS_FRAG_INVALID) {
977 979 (void) printf("%3s\n", "-");
978 980 } else {
979 981 (void) printf("%3llu%%\n",
980 982 (u_longlong_t)mg->mg_fragmentation);
981 983 }
982 984 dump_histogram(mg->mg_histogram, RANGE_TREE_HISTOGRAM_SIZE, 0);
983 985 }
984 986
985 987 (void) printf("\tpool %s\tfragmentation", spa_name(spa));
986 988 fragmentation = metaslab_class_fragmentation(mc);
987 989 if (fragmentation == ZFS_FRAG_INVALID)
988 990 (void) printf("\t%3s\n", "-");
989 991 else
990 992 (void) printf("\t%3llu%%\n", (u_longlong_t)fragmentation);
991 993 dump_histogram(mc->mc_histogram, RANGE_TREE_HISTOGRAM_SIZE, 0);
992 994 }
993 995
994 996 static void
995 997 print_vdev_indirect(vdev_t *vd)
996 998 {
997 999 vdev_indirect_config_t *vic = &vd->vdev_indirect_config;
998 1000 vdev_indirect_mapping_t *vim = vd->vdev_indirect_mapping;
999 1001 vdev_indirect_births_t *vib = vd->vdev_indirect_births;
1000 1002
1001 1003 if (vim == NULL) {
1002 1004 ASSERT3P(vib, ==, NULL);
1003 1005 return;
1004 1006 }
1005 1007
1006 1008 ASSERT3U(vdev_indirect_mapping_object(vim), ==,
1007 1009 vic->vic_mapping_object);
1008 1010 ASSERT3U(vdev_indirect_births_object(vib), ==,
1009 1011 vic->vic_births_object);
1010 1012
1011 1013 (void) printf("indirect births obj %llu:\n",
1012 1014 (longlong_t)vic->vic_births_object);
1013 1015 (void) printf(" vib_count = %llu\n",
1014 1016 (longlong_t)vdev_indirect_births_count(vib));
1015 1017 for (uint64_t i = 0; i < vdev_indirect_births_count(vib); i++) {
1016 1018 vdev_indirect_birth_entry_phys_t *cur_vibe =
1017 1019 &vib->vib_entries[i];
1018 1020 (void) printf("\toffset %llx -> txg %llu\n",
1019 1021 (longlong_t)cur_vibe->vibe_offset,
1020 1022 (longlong_t)cur_vibe->vibe_phys_birth_txg);
1021 1023 }
1022 1024 (void) printf("\n");
1023 1025
1024 1026 (void) printf("indirect mapping obj %llu:\n",
1025 1027 (longlong_t)vic->vic_mapping_object);
1026 1028 (void) printf(" vim_max_offset = 0x%llx\n",
1027 1029 (longlong_t)vdev_indirect_mapping_max_offset(vim));
1028 1030 (void) printf(" vim_bytes_mapped = 0x%llx\n",
1029 1031 (longlong_t)vdev_indirect_mapping_bytes_mapped(vim));
1030 1032 (void) printf(" vim_count = %llu\n",
1031 1033 (longlong_t)vdev_indirect_mapping_num_entries(vim));
1032 1034
1033 1035 if (dump_opt['d'] <= 5 && dump_opt['m'] <= 3)
1034 1036 return;
1035 1037
1036 1038 uint32_t *counts = vdev_indirect_mapping_load_obsolete_counts(vim);
1037 1039
1038 1040 for (uint64_t i = 0; i < vdev_indirect_mapping_num_entries(vim); i++) {
1039 1041 vdev_indirect_mapping_entry_phys_t *vimep =
1040 1042 &vim->vim_entries[i];
1041 1043 (void) printf("\t<%llx:%llx:%llx> -> "
1042 1044 "<%llx:%llx:%llx> (%x obsolete)\n",
1043 1045 (longlong_t)vd->vdev_id,
1044 1046 (longlong_t)DVA_MAPPING_GET_SRC_OFFSET(vimep),
1045 1047 (longlong_t)DVA_GET_ASIZE(&vimep->vimep_dst),
1046 1048 (longlong_t)DVA_GET_VDEV(&vimep->vimep_dst),
1047 1049 (longlong_t)DVA_GET_OFFSET(&vimep->vimep_dst),
1048 1050 (longlong_t)DVA_GET_ASIZE(&vimep->vimep_dst),
1049 1051 counts[i]);
1050 1052 }
1051 1053 (void) printf("\n");
1052 1054
1053 1055 uint64_t obsolete_sm_object = vdev_obsolete_sm_object(vd);
1054 1056 if (obsolete_sm_object != 0) {
1055 1057 objset_t *mos = vd->vdev_spa->spa_meta_objset;
1056 1058 (void) printf("obsolete space map object %llu:\n",
1057 1059 (u_longlong_t)obsolete_sm_object);
1058 1060 ASSERT(vd->vdev_obsolete_sm != NULL);
1059 1061 ASSERT3U(space_map_object(vd->vdev_obsolete_sm), ==,
1060 1062 obsolete_sm_object);
1061 1063 dump_spacemap(mos, vd->vdev_obsolete_sm);
1062 1064 (void) printf("\n");
1063 1065 }
1064 1066 }
1065 1067
1066 1068 static void
1067 1069 dump_metaslabs(spa_t *spa)
1068 1070 {
1069 1071 vdev_t *vd, *rvd = spa->spa_root_vdev;
1070 1072 uint64_t m, c = 0, children = rvd->vdev_children;
1071 1073
1072 1074 (void) printf("\nMetaslabs:\n");
1073 1075
1074 1076 if (!dump_opt['d'] && zopt_objects > 0) {
1075 1077 c = zopt_object[0];
1076 1078
1077 1079 if (c >= children)
1078 1080 (void) fatal("bad vdev id: %llu", (u_longlong_t)c);
1079 1081
1080 1082 if (zopt_objects > 1) {
1081 1083 vd = rvd->vdev_child[c];
1082 1084 print_vdev_metaslab_header(vd);
1083 1085
1084 1086 for (m = 1; m < zopt_objects; m++) {
1085 1087 if (zopt_object[m] < vd->vdev_ms_count)
1086 1088 dump_metaslab(
1087 1089 vd->vdev_ms[zopt_object[m]]);
1088 1090 else
1089 1091 (void) fprintf(stderr, "bad metaslab "
1090 1092 "number %llu\n",
1091 1093 (u_longlong_t)zopt_object[m]);
1092 1094 }
1093 1095 (void) printf("\n");
1094 1096 return;
1095 1097 }
1096 1098 children = c + 1;
1097 1099 }
1098 1100 for (; c < children; c++) {
1099 1101 vd = rvd->vdev_child[c];
1100 1102 print_vdev_metaslab_header(vd);
1101 1103
1102 1104 print_vdev_indirect(vd);
1103 1105
1104 1106 for (m = 0; m < vd->vdev_ms_count; m++)
1105 1107 dump_metaslab(vd->vdev_ms[m]);
1106 1108 (void) printf("\n");
1107 1109 }
1108 1110 }
1109 1111
1110 1112 static void
1111 1113 dump_dde(const ddt_t *ddt, const ddt_entry_t *dde, uint64_t index)
1112 1114 {
1113 1115 const ddt_phys_t *ddp = dde->dde_phys;
1114 1116 const ddt_key_t *ddk = &dde->dde_key;
1115 1117 const char *types[4] = { "ditto", "single", "double", "triple" };
1116 1118 char blkbuf[BP_SPRINTF_LEN];
1117 1119 blkptr_t blk;
1118 1120
1119 1121 for (int p = 0; p < DDT_PHYS_TYPES; p++, ddp++) {
1120 1122 if (ddp->ddp_phys_birth == 0)
1121 1123 continue;
1122 1124 ddt_bp_create(ddt->ddt_checksum, ddk, ddp, &blk);
1123 1125 snprintf_blkptr(blkbuf, sizeof (blkbuf), &blk);
1124 1126 (void) printf("index %llx refcnt %llu %s %s\n",
1125 1127 (u_longlong_t)index, (u_longlong_t)ddp->ddp_refcnt,
1126 1128 types[p], blkbuf);
1127 1129 }
1128 1130 }
1129 1131
1130 1132 static void
1131 1133 dump_dedup_ratio(const ddt_stat_t *dds)
1132 1134 {
1133 1135 double rL, rP, rD, D, dedup, compress, copies;
1134 1136
1135 1137 if (dds->dds_blocks == 0)
1136 1138 return;
1137 1139
1138 1140 rL = (double)dds->dds_ref_lsize;
1139 1141 rP = (double)dds->dds_ref_psize;
1140 1142 rD = (double)dds->dds_ref_dsize;
1141 1143 D = (double)dds->dds_dsize;
1142 1144
1143 1145 dedup = rD / D;
1144 1146 compress = rL / rP;
1145 1147 copies = rD / rP;
1146 1148
1147 1149 (void) printf("dedup = %.2f, compress = %.2f, copies = %.2f, "
1148 1150 "dedup * compress / copies = %.2f\n\n",
1149 1151 dedup, compress, copies, dedup * compress / copies);
1150 1152 }
1151 1153
1152 1154 static void
1153 1155 dump_ddt(ddt_t *ddt, enum ddt_type type, enum ddt_class class)
1154 1156 {
1155 1157 char name[DDT_NAMELEN];
1156 1158 ddt_entry_t dde;
1157 1159 uint64_t walk = 0;
1158 1160 dmu_object_info_t doi;
1159 1161 uint64_t count, dspace, mspace;
1160 1162 int error;
1161 1163
1162 1164 error = ddt_object_info(ddt, type, class, &doi);
1163 1165
1164 1166 if (error == ENOENT)
1165 1167 return;
1166 1168 ASSERT(error == 0);
1167 1169
1168 1170 if ((count = ddt_object_count(ddt, type, class)) == 0)
1169 1171 return;
1170 1172
1171 1173 dspace = doi.doi_physical_blocks_512 << 9;
1172 1174 mspace = doi.doi_fill_count * doi.doi_data_block_size;
1173 1175
1174 1176 ddt_object_name(ddt, type, class, name);
1175 1177
1176 1178 (void) printf("%s: %llu entries, size %llu on disk, %llu in core\n",
1177 1179 name,
1178 1180 (u_longlong_t)count,
1179 1181 (u_longlong_t)(dspace / count),
1180 1182 (u_longlong_t)(mspace / count));
1181 1183
1182 1184 if (dump_opt['D'] < 3)
1183 1185 return;
1184 1186
1185 1187 zpool_dump_ddt(NULL, &ddt->ddt_histogram[type][class]);
1186 1188
1187 1189 if (dump_opt['D'] < 4)
1188 1190 return;
1189 1191
1190 1192 if (dump_opt['D'] < 5 && class == DDT_CLASS_UNIQUE)
1191 1193 return;
1192 1194
1193 1195 (void) printf("%s contents:\n\n", name);
1194 1196
1195 1197 while ((error = ddt_object_walk(ddt, type, class, &walk, &dde)) == 0)
1196 1198 dump_dde(ddt, &dde, walk);
1197 1199
1198 1200 ASSERT3U(error, ==, ENOENT);
1199 1201
1200 1202 (void) printf("\n");
1201 1203 }
1202 1204
1203 1205 static void
1204 1206 dump_all_ddts(spa_t *spa)
1205 1207 {
1206 1208 ddt_histogram_t ddh_total;
1207 1209 ddt_stat_t dds_total;
1208 1210
1209 1211 bzero(&ddh_total, sizeof (ddh_total));
1210 1212 bzero(&dds_total, sizeof (dds_total));
1211 1213
1212 1214 for (enum zio_checksum c = 0; c < ZIO_CHECKSUM_FUNCTIONS; c++) {
1213 1215 ddt_t *ddt = spa->spa_ddt[c];
1214 1216 for (enum ddt_type type = 0; type < DDT_TYPES; type++) {
1215 1217 for (enum ddt_class class = 0; class < DDT_CLASSES;
1216 1218 class++) {
1217 1219 dump_ddt(ddt, type, class);
1218 1220 }
1219 1221 }
1220 1222 }
1221 1223
1222 1224 ddt_get_dedup_stats(spa, &dds_total);
1223 1225
1224 1226 if (dds_total.dds_blocks == 0) {
1225 1227 (void) printf("All DDTs are empty\n");
1226 1228 return;
1227 1229 }
1228 1230
1229 1231 (void) printf("\n");
1230 1232
1231 1233 if (dump_opt['D'] > 1) {
1232 1234 (void) printf("DDT histogram (aggregated over all DDTs):\n");
1233 1235 ddt_get_dedup_histogram(spa, &ddh_total);
1234 1236 zpool_dump_ddt(&dds_total, &ddh_total);
1235 1237 }
1236 1238
1237 1239 dump_dedup_ratio(&dds_total);
1238 1240 }
1239 1241
1240 1242 static void
1241 1243 dump_dtl_seg(void *arg, uint64_t start, uint64_t size)
1242 1244 {
1243 1245 char *prefix = arg;
1244 1246
1245 1247 (void) printf("%s [%llu,%llu) length %llu\n",
1246 1248 prefix,
1247 1249 (u_longlong_t)start,
1248 1250 (u_longlong_t)(start + size),
1249 1251 (u_longlong_t)(size));
1250 1252 }
1251 1253
1252 1254 static void
1253 1255 dump_dtl(vdev_t *vd, int indent)
1254 1256 {
1255 1257 spa_t *spa = vd->vdev_spa;
1256 1258 boolean_t required;
1257 1259 const char *name[DTL_TYPES] = { "missing", "partial", "scrub",
1258 1260 "outage" };
1259 1261 char prefix[256];
1260 1262
1261 1263 spa_vdev_state_enter(spa, SCL_NONE);
1262 1264 required = vdev_dtl_required(vd);
1263 1265 (void) spa_vdev_state_exit(spa, NULL, 0);
1264 1266
1265 1267 if (indent == 0)
1266 1268 (void) printf("\nDirty time logs:\n\n");
1267 1269
1268 1270 (void) printf("\t%*s%s [%s]\n", indent, "",
1269 1271 vd->vdev_path ? vd->vdev_path :
1270 1272 vd->vdev_parent ? vd->vdev_ops->vdev_op_type : spa_name(spa),
1271 1273 required ? "DTL-required" : "DTL-expendable");
1272 1274
1273 1275 for (int t = 0; t < DTL_TYPES; t++) {
1274 1276 range_tree_t *rt = vd->vdev_dtl[t];
1275 1277 if (range_tree_space(rt) == 0)
1276 1278 continue;
1277 1279 (void) snprintf(prefix, sizeof (prefix), "\t%*s%s",
1278 1280 indent + 2, "", name[t]);
1279 1281 range_tree_walk(rt, dump_dtl_seg, prefix);
1280 1282 if (dump_opt['d'] > 5 && vd->vdev_children == 0)
1281 1283 dump_spacemap(spa->spa_meta_objset, vd->vdev_dtl_sm);
1282 1284 }
1283 1285
1284 1286 for (unsigned c = 0; c < vd->vdev_children; c++)
1285 1287 dump_dtl(vd->vdev_child[c], indent + 4);
1286 1288 }
1287 1289
1288 1290 static void
1289 1291 dump_history(spa_t *spa)
1290 1292 {
1291 1293 nvlist_t **events = NULL;
1292 1294 uint64_t resid, len, off = 0;
1293 1295 uint_t num = 0;
1294 1296 int error;
1295 1297 time_t tsec;
1296 1298 struct tm t;
1297 1299 char tbuf[30];
1298 1300 char internalstr[MAXPATHLEN];
1299 1301
1300 1302 char *buf = umem_alloc(SPA_MAXBLOCKSIZE, UMEM_NOFAIL);
1301 1303 do {
1302 1304 len = SPA_MAXBLOCKSIZE;
1303 1305
1304 1306 if ((error = spa_history_get(spa, &off, &len, buf)) != 0) {
1305 1307 (void) fprintf(stderr, "Unable to read history: "
1306 1308 "error %d\n", error);
1307 1309 umem_free(buf, SPA_MAXBLOCKSIZE);
1308 1310 return;
1309 1311 }
1310 1312
1311 1313 if (zpool_history_unpack(buf, len, &resid, &events, &num) != 0)
1312 1314 break;
1313 1315
1314 1316 off -= resid;
1315 1317 } while (len != 0);
1316 1318 umem_free(buf, SPA_MAXBLOCKSIZE);
1317 1319
1318 1320 (void) printf("\nHistory:\n");
1319 1321 for (unsigned i = 0; i < num; i++) {
1320 1322 uint64_t time, txg, ievent;
1321 1323 char *cmd, *intstr;
1322 1324 boolean_t printed = B_FALSE;
1323 1325
1324 1326 if (nvlist_lookup_uint64(events[i], ZPOOL_HIST_TIME,
1325 1327 &time) != 0)
1326 1328 goto next;
1327 1329 if (nvlist_lookup_string(events[i], ZPOOL_HIST_CMD,
1328 1330 &cmd) != 0) {
1329 1331 if (nvlist_lookup_uint64(events[i],
1330 1332 ZPOOL_HIST_INT_EVENT, &ievent) != 0)
1331 1333 goto next;
1332 1334 verify(nvlist_lookup_uint64(events[i],
1333 1335 ZPOOL_HIST_TXG, &txg) == 0);
1334 1336 verify(nvlist_lookup_string(events[i],
1335 1337 ZPOOL_HIST_INT_STR, &intstr) == 0);
1336 1338 if (ievent >= ZFS_NUM_LEGACY_HISTORY_EVENTS)
1337 1339 goto next;
1338 1340
1339 1341 (void) snprintf(internalstr,
1340 1342 sizeof (internalstr),
1341 1343 "[internal %s txg:%ju] %s",
1342 1344 zfs_history_event_names[ievent], (uintmax_t)txg,
1343 1345 intstr);
1344 1346 cmd = internalstr;
1345 1347 }
1346 1348 tsec = time;
1347 1349 (void) localtime_r(&tsec, &t);
1348 1350 (void) strftime(tbuf, sizeof (tbuf), "%F.%T", &t);
1349 1351 (void) printf("%s %s\n", tbuf, cmd);
1350 1352 printed = B_TRUE;
1351 1353
1352 1354 next:
1353 1355 if (dump_opt['h'] > 1) {
1354 1356 if (!printed)
1355 1357 (void) printf("unrecognized record:\n");
1356 1358 dump_nvlist(events[i], 2);
1357 1359 }
1358 1360 }
1359 1361 }
1360 1362
1361 1363 /*ARGSUSED*/
1362 1364 static void
1363 1365 dump_dnode(objset_t *os, uint64_t object, void *data, size_t size)
1364 1366 {
1365 1367 }
1366 1368
1367 1369 static uint64_t
1368 1370 blkid2offset(const dnode_phys_t *dnp, const blkptr_t *bp,
1369 1371 const zbookmark_phys_t *zb)
1370 1372 {
1371 1373 if (dnp == NULL) {
1372 1374 ASSERT(zb->zb_level < 0);
1373 1375 if (zb->zb_object == 0)
1374 1376 return (zb->zb_blkid);
1375 1377 return (zb->zb_blkid * BP_GET_LSIZE(bp));
1376 1378 }
1377 1379
1378 1380 ASSERT(zb->zb_level >= 0);
1379 1381
1380 1382 return ((zb->zb_blkid <<
1381 1383 (zb->zb_level * (dnp->dn_indblkshift - SPA_BLKPTRSHIFT))) *
1382 1384 dnp->dn_datablkszsec << SPA_MINBLOCKSHIFT);
1383 1385 }
1384 1386
1385 1387 static void
1386 1388 snprintf_blkptr_compact(char *blkbuf, size_t buflen, const blkptr_t *bp)
1387 1389 {
1388 1390 const dva_t *dva = bp->blk_dva;
1389 1391 int ndvas = dump_opt['d'] > 5 ? BP_GET_NDVAS(bp) : 1;
1390 1392
1391 1393 if (dump_opt['b'] >= 6) {
1392 1394 snprintf_blkptr(blkbuf, buflen, bp);
1393 1395 return;
1394 1396 }
1395 1397
1396 1398 if (BP_IS_EMBEDDED(bp)) {
1397 1399 (void) sprintf(blkbuf,
1398 1400 "EMBEDDED et=%u %llxL/%llxP B=%llu",
1399 1401 (int)BPE_GET_ETYPE(bp),
1400 1402 (u_longlong_t)BPE_GET_LSIZE(bp),
1401 1403 (u_longlong_t)BPE_GET_PSIZE(bp),
1402 1404 (u_longlong_t)bp->blk_birth);
1403 1405 return;
1404 1406 }
1405 1407
1406 1408 blkbuf[0] = '\0';
1407 1409 for (int i = 0; i < ndvas; i++)
1408 1410 (void) snprintf(blkbuf + strlen(blkbuf),
1409 1411 buflen - strlen(blkbuf), "%llu:%llx:%llx ",
1410 1412 (u_longlong_t)DVA_GET_VDEV(&dva[i]),
1411 1413 (u_longlong_t)DVA_GET_OFFSET(&dva[i]),
1412 1414 (u_longlong_t)DVA_GET_ASIZE(&dva[i]));
1413 1415
1414 1416 if (BP_IS_HOLE(bp)) {
1415 1417 (void) snprintf(blkbuf + strlen(blkbuf),
1416 1418 buflen - strlen(blkbuf),
1417 1419 "%llxL B=%llu",
1418 1420 (u_longlong_t)BP_GET_LSIZE(bp),
1419 1421 (u_longlong_t)bp->blk_birth);
1420 1422 } else {
1421 1423 (void) snprintf(blkbuf + strlen(blkbuf),
1422 1424 buflen - strlen(blkbuf),
1423 1425 "%llxL/%llxP F=%llu B=%llu/%llu",
1424 1426 (u_longlong_t)BP_GET_LSIZE(bp),
1425 1427 (u_longlong_t)BP_GET_PSIZE(bp),
1426 1428 (u_longlong_t)BP_GET_FILL(bp),
1427 1429 (u_longlong_t)bp->blk_birth,
1428 1430 (u_longlong_t)BP_PHYSICAL_BIRTH(bp));
1429 1431 }
1430 1432 }
1431 1433
1432 1434 static void
1433 1435 print_indirect(blkptr_t *bp, const zbookmark_phys_t *zb,
1434 1436 const dnode_phys_t *dnp)
1435 1437 {
1436 1438 char blkbuf[BP_SPRINTF_LEN];
1437 1439 int l;
1438 1440
1439 1441 if (!BP_IS_EMBEDDED(bp)) {
1440 1442 ASSERT3U(BP_GET_TYPE(bp), ==, dnp->dn_type);
1441 1443 ASSERT3U(BP_GET_LEVEL(bp), ==, zb->zb_level);
1442 1444 }
1443 1445
1444 1446 (void) printf("%16llx ", (u_longlong_t)blkid2offset(dnp, bp, zb));
1445 1447
1446 1448 ASSERT(zb->zb_level >= 0);
1447 1449
1448 1450 for (l = dnp->dn_nlevels - 1; l >= -1; l--) {
1449 1451 if (l == zb->zb_level) {
1450 1452 (void) printf("L%llx", (u_longlong_t)zb->zb_level);
1451 1453 } else {
1452 1454 (void) printf(" ");
1453 1455 }
1454 1456 }
1455 1457
1456 1458 snprintf_blkptr_compact(blkbuf, sizeof (blkbuf), bp);
1457 1459 (void) printf("%s\n", blkbuf);
1458 1460 }
1459 1461
1460 1462 static int
1461 1463 visit_indirect(spa_t *spa, const dnode_phys_t *dnp,
1462 1464 blkptr_t *bp, const zbookmark_phys_t *zb)
1463 1465 {
1464 1466 int err = 0;
1465 1467
1466 1468 if (bp->blk_birth == 0)
1467 1469 return (0);
1468 1470
1469 1471 print_indirect(bp, zb, dnp);
1470 1472
1471 1473 if (BP_GET_LEVEL(bp) > 0 && !BP_IS_HOLE(bp)) {
1472 1474 arc_flags_t flags = ARC_FLAG_WAIT;
1473 1475 int i;
1474 1476 blkptr_t *cbp;
1475 1477 int epb = BP_GET_LSIZE(bp) >> SPA_BLKPTRSHIFT;
1476 1478 arc_buf_t *buf;
1477 1479 uint64_t fill = 0;
1478 1480
1479 1481 err = arc_read(NULL, spa, bp, arc_getbuf_func, &buf,
1480 1482 ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL, &flags, zb);
1481 1483 if (err)
1482 1484 return (err);
1483 1485 ASSERT(buf->b_data);
1484 1486
1485 1487 /* recursively visit blocks below this */
1486 1488 cbp = buf->b_data;
1487 1489 for (i = 0; i < epb; i++, cbp++) {
1488 1490 zbookmark_phys_t czb;
1489 1491
1490 1492 SET_BOOKMARK(&czb, zb->zb_objset, zb->zb_object,
1491 1493 zb->zb_level - 1,
1492 1494 zb->zb_blkid * epb + i);
1493 1495 err = visit_indirect(spa, dnp, cbp, &czb);
1494 1496 if (err)
1495 1497 break;
1496 1498 fill += BP_GET_FILL(cbp);
1497 1499 }
1498 1500 if (!err)
1499 1501 ASSERT3U(fill, ==, BP_GET_FILL(bp));
1500 1502 arc_buf_destroy(buf, &buf);
1501 1503 }
1502 1504
1503 1505 return (err);
1504 1506 }
1505 1507
1506 1508 /*ARGSUSED*/
1507 1509 static void
1508 1510 dump_indirect(dnode_t *dn)
1509 1511 {
1510 1512 dnode_phys_t *dnp = dn->dn_phys;
1511 1513 int j;
1512 1514 zbookmark_phys_t czb;
1513 1515
1514 1516 (void) printf("Indirect blocks:\n");
1515 1517
1516 1518 SET_BOOKMARK(&czb, dmu_objset_id(dn->dn_objset),
1517 1519 dn->dn_object, dnp->dn_nlevels - 1, 0);
1518 1520 for (j = 0; j < dnp->dn_nblkptr; j++) {
1519 1521 czb.zb_blkid = j;
1520 1522 (void) visit_indirect(dmu_objset_spa(dn->dn_objset), dnp,
1521 1523 &dnp->dn_blkptr[j], &czb);
1522 1524 }
1523 1525
1524 1526 (void) printf("\n");
1525 1527 }
1526 1528
1527 1529 /*ARGSUSED*/
1528 1530 static void
1529 1531 dump_dsl_dir(objset_t *os, uint64_t object, void *data, size_t size)
1530 1532 {
1531 1533 dsl_dir_phys_t *dd = data;
1532 1534 time_t crtime;
1533 1535 char nice[32];
1534 1536
1535 1537 /* make sure nicenum has enough space */
1536 1538 CTASSERT(sizeof (nice) >= NN_NUMBUF_SZ);
1537 1539
1538 1540 if (dd == NULL)
1539 1541 return;
1540 1542
1541 1543 ASSERT3U(size, >=, sizeof (dsl_dir_phys_t));
1542 1544
1543 1545 crtime = dd->dd_creation_time;
1544 1546 (void) printf("\t\tcreation_time = %s", ctime(&crtime));
1545 1547 (void) printf("\t\thead_dataset_obj = %llu\n",
1546 1548 (u_longlong_t)dd->dd_head_dataset_obj);
1547 1549 (void) printf("\t\tparent_dir_obj = %llu\n",
1548 1550 (u_longlong_t)dd->dd_parent_obj);
1549 1551 (void) printf("\t\torigin_obj = %llu\n",
1550 1552 (u_longlong_t)dd->dd_origin_obj);
1551 1553 (void) printf("\t\tchild_dir_zapobj = %llu\n",
1552 1554 (u_longlong_t)dd->dd_child_dir_zapobj);
1553 1555 zdb_nicenum(dd->dd_used_bytes, nice, sizeof (nice));
1554 1556 (void) printf("\t\tused_bytes = %s\n", nice);
1555 1557 zdb_nicenum(dd->dd_compressed_bytes, nice, sizeof (nice));
1556 1558 (void) printf("\t\tcompressed_bytes = %s\n", nice);
1557 1559 zdb_nicenum(dd->dd_uncompressed_bytes, nice, sizeof (nice));
1558 1560 (void) printf("\t\tuncompressed_bytes = %s\n", nice);
1559 1561 zdb_nicenum(dd->dd_quota, nice, sizeof (nice));
1560 1562 (void) printf("\t\tquota = %s\n", nice);
1561 1563 zdb_nicenum(dd->dd_reserved, nice, sizeof (nice));
1562 1564 (void) printf("\t\treserved = %s\n", nice);
1563 1565 (void) printf("\t\tprops_zapobj = %llu\n",
1564 1566 (u_longlong_t)dd->dd_props_zapobj);
1565 1567 (void) printf("\t\tdeleg_zapobj = %llu\n",
1566 1568 (u_longlong_t)dd->dd_deleg_zapobj);
1567 1569 (void) printf("\t\tflags = %llx\n",
1568 1570 (u_longlong_t)dd->dd_flags);
1569 1571
1570 1572 #define DO(which) \
1571 1573 zdb_nicenum(dd->dd_used_breakdown[DD_USED_ ## which], nice, \
1572 1574 sizeof (nice)); \
1573 1575 (void) printf("\t\tused_breakdown[" #which "] = %s\n", nice)
1574 1576 DO(HEAD);
1575 1577 DO(SNAP);
1576 1578 DO(CHILD);
1577 1579 DO(CHILD_RSRV);
1578 1580 DO(REFRSRV);
1579 1581 #undef DO
1580 1582 (void) printf("\t\tclones = %llu\n",
1581 1583 (u_longlong_t)dd->dd_clones);
1582 1584 }
1583 1585
1584 1586 /*ARGSUSED*/
1585 1587 static void
1586 1588 dump_dsl_dataset(objset_t *os, uint64_t object, void *data, size_t size)
1587 1589 {
1588 1590 dsl_dataset_phys_t *ds = data;
1589 1591 time_t crtime;
1590 1592 char used[32], compressed[32], uncompressed[32], unique[32];
1591 1593 char blkbuf[BP_SPRINTF_LEN];
1592 1594
1593 1595 /* make sure nicenum has enough space */
1594 1596 CTASSERT(sizeof (used) >= NN_NUMBUF_SZ);
1595 1597 CTASSERT(sizeof (compressed) >= NN_NUMBUF_SZ);
1596 1598 CTASSERT(sizeof (uncompressed) >= NN_NUMBUF_SZ);
1597 1599 CTASSERT(sizeof (unique) >= NN_NUMBUF_SZ);
1598 1600
1599 1601 if (ds == NULL)
1600 1602 return;
1601 1603
1602 1604 ASSERT(size == sizeof (*ds));
1603 1605 crtime = ds->ds_creation_time;
1604 1606 zdb_nicenum(ds->ds_referenced_bytes, used, sizeof (used));
1605 1607 zdb_nicenum(ds->ds_compressed_bytes, compressed, sizeof (compressed));
1606 1608 zdb_nicenum(ds->ds_uncompressed_bytes, uncompressed,
1607 1609 sizeof (uncompressed));
1608 1610 zdb_nicenum(ds->ds_unique_bytes, unique, sizeof (unique));
1609 1611 snprintf_blkptr(blkbuf, sizeof (blkbuf), &ds->ds_bp);
1610 1612
1611 1613 (void) printf("\t\tdir_obj = %llu\n",
1612 1614 (u_longlong_t)ds->ds_dir_obj);
1613 1615 (void) printf("\t\tprev_snap_obj = %llu\n",
1614 1616 (u_longlong_t)ds->ds_prev_snap_obj);
1615 1617 (void) printf("\t\tprev_snap_txg = %llu\n",
1616 1618 (u_longlong_t)ds->ds_prev_snap_txg);
1617 1619 (void) printf("\t\tnext_snap_obj = %llu\n",
1618 1620 (u_longlong_t)ds->ds_next_snap_obj);
1619 1621 (void) printf("\t\tsnapnames_zapobj = %llu\n",
1620 1622 (u_longlong_t)ds->ds_snapnames_zapobj);
1621 1623 (void) printf("\t\tnum_children = %llu\n",
1622 1624 (u_longlong_t)ds->ds_num_children);
1623 1625 (void) printf("\t\tuserrefs_obj = %llu\n",
1624 1626 (u_longlong_t)ds->ds_userrefs_obj);
1625 1627 (void) printf("\t\tcreation_time = %s", ctime(&crtime));
1626 1628 (void) printf("\t\tcreation_txg = %llu\n",
1627 1629 (u_longlong_t)ds->ds_creation_txg);
1628 1630 (void) printf("\t\tdeadlist_obj = %llu\n",
1629 1631 (u_longlong_t)ds->ds_deadlist_obj);
1630 1632 (void) printf("\t\tused_bytes = %s\n", used);
1631 1633 (void) printf("\t\tcompressed_bytes = %s\n", compressed);
1632 1634 (void) printf("\t\tuncompressed_bytes = %s\n", uncompressed);
1633 1635 (void) printf("\t\tunique = %s\n", unique);
1634 1636 (void) printf("\t\tfsid_guid = %llu\n",
1635 1637 (u_longlong_t)ds->ds_fsid_guid);
1636 1638 (void) printf("\t\tguid = %llu\n",
1637 1639 (u_longlong_t)ds->ds_guid);
1638 1640 (void) printf("\t\tflags = %llx\n",
1639 1641 (u_longlong_t)ds->ds_flags);
1640 1642 (void) printf("\t\tnext_clones_obj = %llu\n",
1641 1643 (u_longlong_t)ds->ds_next_clones_obj);
1642 1644 (void) printf("\t\tprops_obj = %llu\n",
1643 1645 (u_longlong_t)ds->ds_props_obj);
1644 1646 (void) printf("\t\tbp = %s\n", blkbuf);
1645 1647 }
1646 1648
1647 1649 /* ARGSUSED */
1648 1650 static int
1649 1651 dump_bptree_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx)
1650 1652 {
1651 1653 char blkbuf[BP_SPRINTF_LEN];
1652 1654
1653 1655 if (bp->blk_birth != 0) {
1654 1656 snprintf_blkptr(blkbuf, sizeof (blkbuf), bp);
1655 1657 (void) printf("\t%s\n", blkbuf);
1656 1658 }
1657 1659 return (0);
1658 1660 }
1659 1661
1660 1662 static void
1661 1663 dump_bptree(objset_t *os, uint64_t obj, const char *name)
1662 1664 {
1663 1665 char bytes[32];
1664 1666 bptree_phys_t *bt;
1665 1667 dmu_buf_t *db;
1666 1668
1667 1669 /* make sure nicenum has enough space */
1668 1670 CTASSERT(sizeof (bytes) >= NN_NUMBUF_SZ);
1669 1671
1670 1672 if (dump_opt['d'] < 3)
1671 1673 return;
1672 1674
1673 1675 VERIFY3U(0, ==, dmu_bonus_hold(os, obj, FTAG, &db));
1674 1676 bt = db->db_data;
1675 1677 zdb_nicenum(bt->bt_bytes, bytes, sizeof (bytes));
1676 1678 (void) printf("\n %s: %llu datasets, %s\n",
1677 1679 name, (unsigned long long)(bt->bt_end - bt->bt_begin), bytes);
1678 1680 dmu_buf_rele(db, FTAG);
1679 1681
1680 1682 if (dump_opt['d'] < 5)
1681 1683 return;
1682 1684
1683 1685 (void) printf("\n");
1684 1686
1685 1687 (void) bptree_iterate(os, obj, B_FALSE, dump_bptree_cb, NULL, NULL);
1686 1688 }
1687 1689
1688 1690 /* ARGSUSED */
1689 1691 static int
1690 1692 dump_bpobj_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx)
1691 1693 {
1692 1694 char blkbuf[BP_SPRINTF_LEN];
1693 1695
1694 1696 ASSERT(bp->blk_birth != 0);
1695 1697 snprintf_blkptr_compact(blkbuf, sizeof (blkbuf), bp);
1696 1698 (void) printf("\t%s\n", blkbuf);
1697 1699 return (0);
1698 1700 }
1699 1701
1700 1702 static void
1701 1703 dump_full_bpobj(bpobj_t *bpo, const char *name, int indent)
1702 1704 {
1703 1705 char bytes[32];
1704 1706 char comp[32];
1705 1707 char uncomp[32];
1706 1708
1707 1709 /* make sure nicenum has enough space */
1708 1710 CTASSERT(sizeof (bytes) >= NN_NUMBUF_SZ);
1709 1711 CTASSERT(sizeof (comp) >= NN_NUMBUF_SZ);
1710 1712 CTASSERT(sizeof (uncomp) >= NN_NUMBUF_SZ);
1711 1713
1712 1714 if (dump_opt['d'] < 3)
1713 1715 return;
1714 1716
1715 1717 zdb_nicenum(bpo->bpo_phys->bpo_bytes, bytes, sizeof (bytes));
1716 1718 if (bpo->bpo_havesubobj && bpo->bpo_phys->bpo_subobjs != 0) {
1717 1719 zdb_nicenum(bpo->bpo_phys->bpo_comp, comp, sizeof (comp));
1718 1720 zdb_nicenum(bpo->bpo_phys->bpo_uncomp, uncomp, sizeof (uncomp));
1719 1721 (void) printf(" %*s: object %llu, %llu local blkptrs, "
1720 1722 "%llu subobjs in object %llu, %s (%s/%s comp)\n",
1721 1723 indent * 8, name,
1722 1724 (u_longlong_t)bpo->bpo_object,
1723 1725 (u_longlong_t)bpo->bpo_phys->bpo_num_blkptrs,
1724 1726 (u_longlong_t)bpo->bpo_phys->bpo_num_subobjs,
1725 1727 (u_longlong_t)bpo->bpo_phys->bpo_subobjs,
1726 1728 bytes, comp, uncomp);
1727 1729
1728 1730 for (uint64_t i = 0; i < bpo->bpo_phys->bpo_num_subobjs; i++) {
1729 1731 uint64_t subobj;
1730 1732 bpobj_t subbpo;
1731 1733 int error;
1732 1734 VERIFY0(dmu_read(bpo->bpo_os,
1733 1735 bpo->bpo_phys->bpo_subobjs,
1734 1736 i * sizeof (subobj), sizeof (subobj), &subobj, 0));
1735 1737 error = bpobj_open(&subbpo, bpo->bpo_os, subobj);
1736 1738 if (error != 0) {
1737 1739 (void) printf("ERROR %u while trying to open "
1738 1740 "subobj id %llu\n",
1739 1741 error, (u_longlong_t)subobj);
1740 1742 continue;
1741 1743 }
1742 1744 dump_full_bpobj(&subbpo, "subobj", indent + 1);
1743 1745 bpobj_close(&subbpo);
1744 1746 }
1745 1747 } else {
1746 1748 (void) printf(" %*s: object %llu, %llu blkptrs, %s\n",
1747 1749 indent * 8, name,
1748 1750 (u_longlong_t)bpo->bpo_object,
1749 1751 (u_longlong_t)bpo->bpo_phys->bpo_num_blkptrs,
1750 1752 bytes);
1751 1753 }
1752 1754
1753 1755 if (dump_opt['d'] < 5)
1754 1756 return;
1755 1757
1756 1758
1757 1759 if (indent == 0) {
1758 1760 (void) bpobj_iterate_nofree(bpo, dump_bpobj_cb, NULL, NULL);
1759 1761 (void) printf("\n");
1760 1762 }
1761 1763 }
1762 1764
1763 1765 static void
1764 1766 bpobj_count_refd(bpobj_t *bpo)
1765 1767 {
1766 1768 mos_obj_refd(bpo->bpo_object);
1767 1769
1768 1770 if (bpo->bpo_havesubobj && bpo->bpo_phys->bpo_subobjs != 0) {
1769 1771 mos_obj_refd(bpo->bpo_phys->bpo_subobjs);
1770 1772 for (uint64_t i = 0; i < bpo->bpo_phys->bpo_num_subobjs; i++) {
1771 1773 uint64_t subobj;
1772 1774 bpobj_t subbpo;
1773 1775 int error;
1774 1776 VERIFY0(dmu_read(bpo->bpo_os,
1775 1777 bpo->bpo_phys->bpo_subobjs,
1776 1778 i * sizeof (subobj), sizeof (subobj), &subobj, 0));
1777 1779 error = bpobj_open(&subbpo, bpo->bpo_os, subobj);
1778 1780 if (error != 0) {
1779 1781 (void) printf("ERROR %u while trying to open "
1780 1782 "subobj id %llu\n",
1781 1783 error, (u_longlong_t)subobj);
1782 1784 continue;
1783 1785 }
1784 1786 bpobj_count_refd(&subbpo);
1785 1787 bpobj_close(&subbpo);
1786 1788 }
1787 1789 }
1788 1790 }
1789 1791
1790 1792 static void
1791 1793 dump_deadlist(dsl_deadlist_t *dl)
1792 1794 {
1793 1795 dsl_deadlist_entry_t *dle;
1794 1796 uint64_t unused;
1795 1797 char bytes[32];
1796 1798 char comp[32];
1797 1799 char uncomp[32];
1798 1800 uint64_t empty_bpobj =
1799 1801 dmu_objset_spa(dl->dl_os)->spa_dsl_pool->dp_empty_bpobj;
1800 1802
1801 1803 /* force the tree to be loaded */
1802 1804 dsl_deadlist_space_range(dl, 0, UINT64_MAX, &unused, &unused, &unused);
1803 1805
1804 1806 if (dl->dl_oldfmt) {
1805 1807 if (dl->dl_bpobj.bpo_object != empty_bpobj)
1806 1808 bpobj_count_refd(&dl->dl_bpobj);
1807 1809 } else {
1808 1810 mos_obj_refd(dl->dl_object);
1809 1811 for (dle = avl_first(&dl->dl_tree); dle;
1810 1812 dle = AVL_NEXT(&dl->dl_tree, dle)) {
1811 1813 if (dle->dle_bpobj.bpo_object != empty_bpobj)
1812 1814 bpobj_count_refd(&dle->dle_bpobj);
1813 1815 }
1814 1816 }
1815 1817
1816 1818 /* make sure nicenum has enough space */
1817 1819 CTASSERT(sizeof (bytes) >= NN_NUMBUF_SZ);
1818 1820 CTASSERT(sizeof (comp) >= NN_NUMBUF_SZ);
1819 1821 CTASSERT(sizeof (uncomp) >= NN_NUMBUF_SZ);
1820 1822
1821 1823 if (dump_opt['d'] < 3)
1822 1824 return;
1823 1825
1824 1826 if (dl->dl_oldfmt) {
1825 1827 dump_full_bpobj(&dl->dl_bpobj, "old-format deadlist", 0);
1826 1828 return;
1827 1829 }
1828 1830
1829 1831 zdb_nicenum(dl->dl_phys->dl_used, bytes, sizeof (bytes));
1830 1832 zdb_nicenum(dl->dl_phys->dl_comp, comp, sizeof (comp));
1831 1833 zdb_nicenum(dl->dl_phys->dl_uncomp, uncomp, sizeof (uncomp));
1832 1834 (void) printf("\n Deadlist: %s (%s/%s comp)\n",
1833 1835 bytes, comp, uncomp);
1834 1836
1835 1837 if (dump_opt['d'] < 4)
1836 1838 return;
1837 1839
1838 1840 (void) printf("\n");
1839 1841
1840 1842 for (dle = avl_first(&dl->dl_tree); dle;
1841 1843 dle = AVL_NEXT(&dl->dl_tree, dle)) {
1842 1844 if (dump_opt['d'] >= 5) {
1843 1845 char buf[128];
1844 1846 (void) snprintf(buf, sizeof (buf),
1845 1847 "mintxg %llu -> obj %llu",
1846 1848 (longlong_t)dle->dle_mintxg,
1847 1849 (longlong_t)dle->dle_bpobj.bpo_object);
1848 1850
1849 1851 dump_full_bpobj(&dle->dle_bpobj, buf, 0);
1850 1852 } else {
1851 1853 (void) printf("mintxg %llu -> obj %llu\n",
1852 1854 (longlong_t)dle->dle_mintxg,
1853 1855 (longlong_t)dle->dle_bpobj.bpo_object);
1854 1856 }
1855 1857 }
1856 1858 }
1857 1859
1858 1860 static avl_tree_t idx_tree;
1859 1861 static avl_tree_t domain_tree;
1860 1862 static boolean_t fuid_table_loaded;
1861 1863 static objset_t *sa_os = NULL;
1862 1864 static sa_attr_type_t *sa_attr_table = NULL;
1863 1865
1864 1866 static int
1865 1867 open_objset(const char *path, dmu_objset_type_t type, void *tag, objset_t **osp)
1866 1868 {
1867 1869 int err;
1868 1870 uint64_t sa_attrs = 0;
1869 1871 uint64_t version = 0;
1870 1872
1871 1873 VERIFY3P(sa_os, ==, NULL);
1872 1874 err = dmu_objset_own(path, type, B_TRUE, tag, osp);
1873 1875 if (err != 0) {
1874 1876 (void) fprintf(stderr, "failed to own dataset '%s': %s\n", path,
1875 1877 strerror(err));
1876 1878 return (err);
1877 1879 }
1878 1880
1879 1881 if (dmu_objset_type(*osp) == DMU_OST_ZFS) {
1880 1882 (void) zap_lookup(*osp, MASTER_NODE_OBJ, ZPL_VERSION_STR,
1881 1883 8, 1, &version);
1882 1884 if (version >= ZPL_VERSION_SA) {
1883 1885 (void) zap_lookup(*osp, MASTER_NODE_OBJ, ZFS_SA_ATTRS,
1884 1886 8, 1, &sa_attrs);
1885 1887 }
1886 1888 err = sa_setup(*osp, sa_attrs, zfs_attr_table, ZPL_END,
1887 1889 &sa_attr_table);
1888 1890 if (err != 0) {
1889 1891 (void) fprintf(stderr, "sa_setup failed: %s\n",
1890 1892 strerror(err));
1891 1893 dmu_objset_disown(*osp, tag);
1892 1894 *osp = NULL;
1893 1895 }
1894 1896 }
1895 1897 sa_os = *osp;
1896 1898
1897 1899 return (0);
1898 1900 }
1899 1901
1900 1902 static void
1901 1903 close_objset(objset_t *os, void *tag)
1902 1904 {
1903 1905 VERIFY3P(os, ==, sa_os);
1904 1906 if (os->os_sa != NULL)
1905 1907 sa_tear_down(os);
1906 1908 dmu_objset_disown(os, tag);
1907 1909 sa_attr_table = NULL;
1908 1910 sa_os = NULL;
1909 1911 }
1910 1912
1911 1913 static void
1912 1914 fuid_table_destroy()
1913 1915 {
1914 1916 if (fuid_table_loaded) {
1915 1917 zfs_fuid_table_destroy(&idx_tree, &domain_tree);
1916 1918 fuid_table_loaded = B_FALSE;
1917 1919 }
1918 1920 }
1919 1921
1920 1922 /*
1921 1923 * print uid or gid information.
1922 1924 * For normal POSIX id just the id is printed in decimal format.
1923 1925 * For CIFS files with FUID the fuid is printed in hex followed by
1924 1926 * the domain-rid string.
1925 1927 */
1926 1928 static void
1927 1929 print_idstr(uint64_t id, const char *id_type)
1928 1930 {
1929 1931 if (FUID_INDEX(id)) {
1930 1932 char *domain;
1931 1933
1932 1934 domain = zfs_fuid_idx_domain(&idx_tree, FUID_INDEX(id));
1933 1935 (void) printf("\t%s %llx [%s-%d]\n", id_type,
1934 1936 (u_longlong_t)id, domain, (int)FUID_RID(id));
1935 1937 } else {
1936 1938 (void) printf("\t%s %llu\n", id_type, (u_longlong_t)id);
1937 1939 }
1938 1940
1939 1941 }
1940 1942
1941 1943 static void
1942 1944 dump_uidgid(objset_t *os, uint64_t uid, uint64_t gid)
1943 1945 {
1944 1946 uint32_t uid_idx, gid_idx;
1945 1947
1946 1948 uid_idx = FUID_INDEX(uid);
1947 1949 gid_idx = FUID_INDEX(gid);
1948 1950
1949 1951 /* Load domain table, if not already loaded */
1950 1952 if (!fuid_table_loaded && (uid_idx || gid_idx)) {
1951 1953 uint64_t fuid_obj;
1952 1954
1953 1955 /* first find the fuid object. It lives in the master node */
1954 1956 VERIFY(zap_lookup(os, MASTER_NODE_OBJ, ZFS_FUID_TABLES,
1955 1957 8, 1, &fuid_obj) == 0);
1956 1958 zfs_fuid_avl_tree_create(&idx_tree, &domain_tree);
1957 1959 (void) zfs_fuid_table_load(os, fuid_obj,
1958 1960 &idx_tree, &domain_tree);
1959 1961 fuid_table_loaded = B_TRUE;
1960 1962 }
1961 1963
1962 1964 print_idstr(uid, "uid");
1963 1965 print_idstr(gid, "gid");
1964 1966 }
1965 1967
1966 1968 /*ARGSUSED*/
1967 1969 static void
1968 1970 dump_znode(objset_t *os, uint64_t object, void *data, size_t size)
1969 1971 {
1970 1972 char path[MAXPATHLEN * 2]; /* allow for xattr and failure prefix */
1971 1973 sa_handle_t *hdl;
1972 1974 uint64_t xattr, rdev, gen;
1973 1975 uint64_t uid, gid, mode, fsize, parent, links;
1974 1976 uint64_t pflags;
1975 1977 uint64_t acctm[2], modtm[2], chgtm[2], crtm[2];
1976 1978 time_t z_crtime, z_atime, z_mtime, z_ctime;
1977 1979 sa_bulk_attr_t bulk[12];
1978 1980 int idx = 0;
1979 1981 int error;
1980 1982
1981 1983 VERIFY3P(os, ==, sa_os);
1982 1984 if (sa_handle_get(os, object, NULL, SA_HDL_PRIVATE, &hdl)) {
1983 1985 (void) printf("Failed to get handle for SA znode\n");
1984 1986 return;
1985 1987 }
1986 1988
1987 1989 SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_UID], NULL, &uid, 8);
1988 1990 SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_GID], NULL, &gid, 8);
1989 1991 SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_LINKS], NULL,
1990 1992 &links, 8);
1991 1993 SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_GEN], NULL, &gen, 8);
1992 1994 SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_MODE], NULL,
1993 1995 &mode, 8);
1994 1996 SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_PARENT],
1995 1997 NULL, &parent, 8);
1996 1998 SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_SIZE], NULL,
1997 1999 &fsize, 8);
1998 2000 SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_ATIME], NULL,
1999 2001 acctm, 16);
2000 2002 SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_MTIME], NULL,
2001 2003 modtm, 16);
2002 2004 SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_CRTIME], NULL,
2003 2005 crtm, 16);
2004 2006 SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_CTIME], NULL,
2005 2007 chgtm, 16);
2006 2008 SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_FLAGS], NULL,
2007 2009 &pflags, 8);
2008 2010
2009 2011 if (sa_bulk_lookup(hdl, bulk, idx)) {
2010 2012 (void) sa_handle_destroy(hdl);
2011 2013 return;
2012 2014 }
2013 2015
2014 2016 z_crtime = (time_t)crtm[0];
2015 2017 z_atime = (time_t)acctm[0];
2016 2018 z_mtime = (time_t)modtm[0];
2017 2019 z_ctime = (time_t)chgtm[0];
2018 2020
2019 2021 if (dump_opt['d'] > 4) {
2020 2022 error = zfs_obj_to_path(os, object, path, sizeof (path));
2021 2023 if (error == ESTALE) {
2022 2024 (void) snprintf(path, sizeof (path), "on delete queue");
2023 2025 } else if (error != 0) {
2024 2026 leaked_objects++;
2025 2027 (void) snprintf(path, sizeof (path),
2026 2028 "path not found, possibly leaked");
2027 2029 }
2028 2030 (void) printf("\tpath %s\n", path);
2029 2031 }
2030 2032 dump_uidgid(os, uid, gid);
2031 2033 (void) printf("\tatime %s", ctime(&z_atime));
2032 2034 (void) printf("\tmtime %s", ctime(&z_mtime));
2033 2035 (void) printf("\tctime %s", ctime(&z_ctime));
2034 2036 (void) printf("\tcrtime %s", ctime(&z_crtime));
2035 2037 (void) printf("\tgen %llu\n", (u_longlong_t)gen);
2036 2038 (void) printf("\tmode %llo\n", (u_longlong_t)mode);
2037 2039 (void) printf("\tsize %llu\n", (u_longlong_t)fsize);
2038 2040 (void) printf("\tparent %llu\n", (u_longlong_t)parent);
2039 2041 (void) printf("\tlinks %llu\n", (u_longlong_t)links);
2040 2042 (void) printf("\tpflags %llx\n", (u_longlong_t)pflags);
2041 2043 if (sa_lookup(hdl, sa_attr_table[ZPL_XATTR], &xattr,
2042 2044 sizeof (uint64_t)) == 0)
2043 2045 (void) printf("\txattr %llu\n", (u_longlong_t)xattr);
2044 2046 if (sa_lookup(hdl, sa_attr_table[ZPL_RDEV], &rdev,
2045 2047 sizeof (uint64_t)) == 0)
2046 2048 (void) printf("\trdev 0x%016llx\n", (u_longlong_t)rdev);
2047 2049 sa_handle_destroy(hdl);
2048 2050 }
2049 2051
2050 2052 /*ARGSUSED*/
2051 2053 static void
2052 2054 dump_acl(objset_t *os, uint64_t object, void *data, size_t size)
2053 2055 {
2054 2056 }
2055 2057
2056 2058 /*ARGSUSED*/
2057 2059 static void
2058 2060 dump_dmu_objset(objset_t *os, uint64_t object, void *data, size_t size)
2059 2061 {
2060 2062 }
2061 2063
2062 2064 static object_viewer_t *object_viewer[DMU_OT_NUMTYPES + 1] = {
2063 2065 dump_none, /* unallocated */
2064 2066 dump_zap, /* object directory */
2065 2067 dump_uint64, /* object array */
2066 2068 dump_none, /* packed nvlist */
2067 2069 dump_packed_nvlist, /* packed nvlist size */
2068 2070 dump_none, /* bpobj */
2069 2071 dump_bpobj, /* bpobj header */
2070 2072 dump_none, /* SPA space map header */
2071 2073 dump_none, /* SPA space map */
2072 2074 dump_none, /* ZIL intent log */
2073 2075 dump_dnode, /* DMU dnode */
2074 2076 dump_dmu_objset, /* DMU objset */
2075 2077 dump_dsl_dir, /* DSL directory */
2076 2078 dump_zap, /* DSL directory child map */
2077 2079 dump_zap, /* DSL dataset snap map */
2078 2080 dump_zap, /* DSL props */
2079 2081 dump_dsl_dataset, /* DSL dataset */
2080 2082 dump_znode, /* ZFS znode */
2081 2083 dump_acl, /* ZFS V0 ACL */
2082 2084 dump_uint8, /* ZFS plain file */
2083 2085 dump_zpldir, /* ZFS directory */
2084 2086 dump_zap, /* ZFS master node */
2085 2087 dump_zap, /* ZFS delete queue */
2086 2088 dump_uint8, /* zvol object */
2087 2089 dump_zap, /* zvol prop */
2088 2090 dump_uint8, /* other uint8[] */
2089 2091 dump_uint64, /* other uint64[] */
2090 2092 dump_zap, /* other ZAP */
2091 2093 dump_zap, /* persistent error log */
2092 2094 dump_uint8, /* SPA history */
2093 2095 dump_history_offsets, /* SPA history offsets */
2094 2096 dump_zap, /* Pool properties */
2095 2097 dump_zap, /* DSL permissions */
2096 2098 dump_acl, /* ZFS ACL */
2097 2099 dump_uint8, /* ZFS SYSACL */
2098 2100 dump_none, /* FUID nvlist */
2099 2101 dump_packed_nvlist, /* FUID nvlist size */
2100 2102 dump_zap, /* DSL dataset next clones */
2101 2103 dump_zap, /* DSL scrub queue */
2102 2104 dump_zap, /* ZFS user/group used */
2103 2105 dump_zap, /* ZFS user/group quota */
2104 2106 dump_zap, /* snapshot refcount tags */
2105 2107 dump_ddt_zap, /* DDT ZAP object */
2106 2108 dump_zap, /* DDT statistics */
2107 2109 dump_znode, /* SA object */
2108 2110 dump_zap, /* SA Master Node */
2109 2111 dump_sa_attrs, /* SA attribute registration */
2110 2112 dump_sa_layouts, /* SA attribute layouts */
2111 2113 dump_zap, /* DSL scrub translations */
2112 2114 dump_none, /* fake dedup BP */
2113 2115 dump_zap, /* deadlist */
2114 2116 dump_none, /* deadlist hdr */
2115 2117 dump_zap, /* dsl clones */
2116 2118 dump_bpobj_subobjs, /* bpobj subobjs */
2117 2119 dump_unknown, /* Unknown type, must be last */
2118 2120 };
2119 2121
2120 2122 static void
2121 2123 dump_object(objset_t *os, uint64_t object, int verbosity, int *print_header,
2122 2124 uint64_t *dnode_slots_used)
2123 2125 {
2124 2126 dmu_buf_t *db = NULL;
2125 2127 dmu_object_info_t doi;
2126 2128 dnode_t *dn;
2127 2129 void *bonus = NULL;
2128 2130 size_t bsize = 0;
2129 2131 char iblk[32], dblk[32], lsize[32], asize[32], fill[32], dnsize[32];
2130 2132 char bonus_size[32];
2131 2133 char aux[50];
2132 2134 int error;
2133 2135
2134 2136 /* make sure nicenum has enough space */
2135 2137 CTASSERT(sizeof (iblk) >= NN_NUMBUF_SZ);
2136 2138 CTASSERT(sizeof (dblk) >= NN_NUMBUF_SZ);
2137 2139 CTASSERT(sizeof (lsize) >= NN_NUMBUF_SZ);
2138 2140 CTASSERT(sizeof (asize) >= NN_NUMBUF_SZ);
2139 2141 CTASSERT(sizeof (bonus_size) >= NN_NUMBUF_SZ);
2140 2142
2141 2143 if (*print_header) {
2142 2144 (void) printf("\n%10s %3s %5s %5s %5s %6s %5s %6s %s\n",
2143 2145 "Object", "lvl", "iblk", "dblk", "dsize", "dnsize",
2144 2146 "lsize", "%full", "type");
2145 2147 *print_header = 0;
2146 2148 }
2147 2149
2148 2150 if (object == 0) {
2149 2151 dn = DMU_META_DNODE(os);
2150 2152 } else {
2151 2153 error = dmu_bonus_hold(os, object, FTAG, &db);
2152 2154 if (error)
2153 2155 fatal("dmu_bonus_hold(%llu) failed, errno %u",
2154 2156 object, error);
2155 2157 bonus = db->db_data;
2156 2158 bsize = db->db_size;
2157 2159 dn = DB_DNODE((dmu_buf_impl_t *)db);
2158 2160 }
2159 2161 dmu_object_info_from_dnode(dn, &doi);
2160 2162
2161 2163 if (dnode_slots_used != NULL)
2162 2164 *dnode_slots_used = doi.doi_dnodesize / DNODE_MIN_SIZE;
2163 2165
2164 2166 zdb_nicenum(doi.doi_metadata_block_size, iblk, sizeof (iblk));
2165 2167 zdb_nicenum(doi.doi_data_block_size, dblk, sizeof (dblk));
2166 2168 zdb_nicenum(doi.doi_max_offset, lsize, sizeof (lsize));
2167 2169 zdb_nicenum(doi.doi_physical_blocks_512 << 9, asize, sizeof (asize));
2168 2170 zdb_nicenum(doi.doi_bonus_size, bonus_size, sizeof (bonus_size));
2169 2171 zdb_nicenum(doi.doi_dnodesize, dnsize, sizeof (dnsize));
2170 2172 (void) sprintf(fill, "%6.2f", 100.0 * doi.doi_fill_count *
2171 2173 doi.doi_data_block_size / (object == 0 ? DNODES_PER_BLOCK : 1) /
2172 2174 doi.doi_max_offset);
2173 2175
2174 2176 aux[0] = '\0';
2175 2177
2176 2178 if (doi.doi_checksum != ZIO_CHECKSUM_INHERIT || verbosity >= 6) {
2177 2179 (void) snprintf(aux + strlen(aux), sizeof (aux), " (K=%s)",
2178 2180 ZDB_CHECKSUM_NAME(doi.doi_checksum));
2179 2181 }
2180 2182
2181 2183 if (doi.doi_compress != ZIO_COMPRESS_INHERIT || verbosity >= 6) {
2182 2184 (void) snprintf(aux + strlen(aux), sizeof (aux), " (Z=%s)",
2183 2185 ZDB_COMPRESS_NAME(doi.doi_compress));
2184 2186 }
2185 2187
2186 2188 (void) printf("%10" PRIu64
2187 2189 " %3u %5s %5s %5s %5s %5s %6s %s%s\n",
2188 2190 object, doi.doi_indirection, iblk, dblk,
2189 2191 asize, dnsize, lsize, fill, ZDB_OT_NAME(doi.doi_type), aux);
2190 2192
2191 2193 if (doi.doi_bonus_type != DMU_OT_NONE && verbosity > 3) {
2192 2194 (void) printf("%10s %3s %5s %5s %5s %5s %5s %6s %s\n",
2193 2195 "", "", "", "", "", "", bonus_size, "bonus",
2194 2196 ZDB_OT_NAME(doi.doi_bonus_type));
2195 2197 }
2196 2198
2197 2199 if (verbosity >= 4) {
2198 2200 (void) printf("\tdnode flags: %s%s%s\n",
2199 2201 (dn->dn_phys->dn_flags & DNODE_FLAG_USED_BYTES) ?
2200 2202 "USED_BYTES " : "",
2201 2203 (dn->dn_phys->dn_flags & DNODE_FLAG_USERUSED_ACCOUNTED) ?
2202 2204 "USERUSED_ACCOUNTED " : "",
2203 2205 (dn->dn_phys->dn_flags & DNODE_FLAG_SPILL_BLKPTR) ?
2204 2206 "SPILL_BLKPTR" : "");
2205 2207 (void) printf("\tdnode maxblkid: %llu\n",
2206 2208 (longlong_t)dn->dn_phys->dn_maxblkid);
2207 2209
2208 2210 object_viewer[ZDB_OT_TYPE(doi.doi_bonus_type)](os, object,
2209 2211 bonus, bsize);
2210 2212 object_viewer[ZDB_OT_TYPE(doi.doi_type)](os, object, NULL, 0);
2211 2213 *print_header = 1;
2212 2214 }
2213 2215
2214 2216 if (verbosity >= 5)
2215 2217 dump_indirect(dn);
2216 2218
2217 2219 if (verbosity >= 5) {
2218 2220 /*
2219 2221 * Report the list of segments that comprise the object.
2220 2222 */
2221 2223 uint64_t start = 0;
2222 2224 uint64_t end;
2223 2225 uint64_t blkfill = 1;
2224 2226 int minlvl = 1;
2225 2227
2226 2228 if (dn->dn_type == DMU_OT_DNODE) {
2227 2229 minlvl = 0;
2228 2230 blkfill = DNODES_PER_BLOCK;
2229 2231 }
2230 2232
2231 2233 for (;;) {
2232 2234 char segsize[32];
2233 2235 /* make sure nicenum has enough space */
2234 2236 CTASSERT(sizeof (segsize) >= NN_NUMBUF_SZ);
2235 2237 error = dnode_next_offset(dn,
2236 2238 0, &start, minlvl, blkfill, 0);
2237 2239 if (error)
2238 2240 break;
2239 2241 end = start;
2240 2242 error = dnode_next_offset(dn,
2241 2243 DNODE_FIND_HOLE, &end, minlvl, blkfill, 0);
2242 2244 zdb_nicenum(end - start, segsize, sizeof (segsize));
2243 2245 (void) printf("\t\tsegment [%016llx, %016llx)"
2244 2246 " size %5s\n", (u_longlong_t)start,
2245 2247 (u_longlong_t)end, segsize);
2246 2248 if (error)
2247 2249 break;
2248 2250 start = end;
2249 2251 }
2250 2252 }
2251 2253
2252 2254 if (db != NULL)
2253 2255 dmu_buf_rele(db, FTAG);
2254 2256 }
2255 2257
2256 2258 static void
2257 2259 count_dir_mos_objects(dsl_dir_t *dd)
2258 2260 {
2259 2261 mos_obj_refd(dd->dd_object);
2260 2262 mos_obj_refd(dsl_dir_phys(dd)->dd_child_dir_zapobj);
2261 2263 mos_obj_refd(dsl_dir_phys(dd)->dd_deleg_zapobj);
2262 2264 mos_obj_refd(dsl_dir_phys(dd)->dd_props_zapobj);
2263 2265 mos_obj_refd(dsl_dir_phys(dd)->dd_clones);
2264 2266 }
2265 2267
2266 2268 static void
2267 2269 count_ds_mos_objects(dsl_dataset_t *ds)
2268 2270 {
2269 2271 mos_obj_refd(ds->ds_object);
2270 2272 mos_obj_refd(dsl_dataset_phys(ds)->ds_next_clones_obj);
2271 2273 mos_obj_refd(dsl_dataset_phys(ds)->ds_props_obj);
2272 2274 mos_obj_refd(dsl_dataset_phys(ds)->ds_userrefs_obj);
2273 2275 mos_obj_refd(dsl_dataset_phys(ds)->ds_snapnames_zapobj);
2274 2276
2275 2277 if (!dsl_dataset_is_snapshot(ds)) {
2276 2278 count_dir_mos_objects(ds->ds_dir);
2277 2279 }
2278 2280 }
2279 2281
2280 2282 static const char *objset_types[DMU_OST_NUMTYPES] = {
2281 2283 "NONE", "META", "ZPL", "ZVOL", "OTHER", "ANY" };
2282 2284
2283 2285 static void
2284 2286 dump_dir(objset_t *os)
2285 2287 {
2286 2288 dmu_objset_stats_t dds;
2287 2289 uint64_t object, object_count;
2288 2290 uint64_t refdbytes, usedobjs, scratch;
2289 2291 char numbuf[32];
2290 2292 char blkbuf[BP_SPRINTF_LEN + 20];
2291 2293 char osname[ZFS_MAX_DATASET_NAME_LEN];
2292 2294 const char *type = "UNKNOWN";
2293 2295 int verbosity = dump_opt['d'];
2294 2296 int print_header = 1;
2295 2297 unsigned i;
2296 2298 int error;
2297 2299 uint64_t total_slots_used = 0;
2298 2300 uint64_t max_slot_used = 0;
2299 2301 uint64_t dnode_slots;
2300 2302
2301 2303 /* make sure nicenum has enough space */
2302 2304 CTASSERT(sizeof (numbuf) >= NN_NUMBUF_SZ);
2303 2305
2304 2306 dsl_pool_config_enter(dmu_objset_pool(os), FTAG);
2305 2307 dmu_objset_fast_stat(os, &dds);
2306 2308 dsl_pool_config_exit(dmu_objset_pool(os), FTAG);
2307 2309
2308 2310 if (dds.dds_type < DMU_OST_NUMTYPES)
2309 2311 type = objset_types[dds.dds_type];
2310 2312
2311 2313 if (dds.dds_type == DMU_OST_META) {
2312 2314 dds.dds_creation_txg = TXG_INITIAL;
2313 2315 usedobjs = BP_GET_FILL(os->os_rootbp);
2314 2316 refdbytes = dsl_dir_phys(os->os_spa->spa_dsl_pool->dp_mos_dir)->
2315 2317 dd_used_bytes;
2316 2318 } else {
2317 2319 dmu_objset_space(os, &refdbytes, &scratch, &usedobjs, &scratch);
2318 2320 }
2319 2321
2320 2322 ASSERT3U(usedobjs, ==, BP_GET_FILL(os->os_rootbp));
2321 2323
2322 2324 zdb_nicenum(refdbytes, numbuf, sizeof (numbuf));
2323 2325
2324 2326 if (verbosity >= 4) {
2325 2327 (void) snprintf(blkbuf, sizeof (blkbuf), ", rootbp ");
2326 2328 (void) snprintf_blkptr(blkbuf + strlen(blkbuf),
2327 2329 sizeof (blkbuf) - strlen(blkbuf), os->os_rootbp);
2328 2330 } else {
2329 2331 blkbuf[0] = '\0';
2330 2332 }
2331 2333
2332 2334 dmu_objset_name(os, osname);
2333 2335
2334 2336 (void) printf("Dataset %s [%s], ID %llu, cr_txg %llu, "
2335 2337 "%s, %llu objects%s%s\n",
2336 2338 osname, type, (u_longlong_t)dmu_objset_id(os),
2337 2339 (u_longlong_t)dds.dds_creation_txg,
2338 2340 numbuf, (u_longlong_t)usedobjs, blkbuf,
2339 2341 (dds.dds_inconsistent) ? " (inconsistent)" : "");
2340 2342
2341 2343 if (zopt_objects != 0) {
2342 2344 for (i = 0; i < zopt_objects; i++)
2343 2345 dump_object(os, zopt_object[i], verbosity,
2344 2346 &print_header, NULL);
2345 2347 (void) printf("\n");
2346 2348 return;
2347 2349 }
2348 2350
2349 2351 if (dump_opt['i'] != 0 || verbosity >= 2)
2350 2352 dump_intent_log(dmu_objset_zil(os));
2351 2353
2352 2354 if (dmu_objset_ds(os) != NULL) {
2353 2355 dsl_dataset_t *ds = dmu_objset_ds(os);
2354 2356 dump_deadlist(&ds->ds_deadlist);
2355 2357
2356 2358 if (dsl_dataset_remap_deadlist_exists(ds)) {
2357 2359 (void) printf("ds_remap_deadlist:\n");
2358 2360 dump_deadlist(&ds->ds_remap_deadlist);
2359 2361 }
2360 2362 count_ds_mos_objects(ds);
2361 2363 }
2362 2364
2363 2365 if (verbosity < 2)
2364 2366 return;
2365 2367
2366 2368 if (BP_IS_HOLE(os->os_rootbp))
2367 2369 return;
2368 2370
2369 2371 dump_object(os, 0, verbosity, &print_header, NULL);
2370 2372 object_count = 0;
2371 2373 if (DMU_USERUSED_DNODE(os) != NULL &&
2372 2374 DMU_USERUSED_DNODE(os)->dn_type != 0) {
2373 2375 dump_object(os, DMU_USERUSED_OBJECT, verbosity, &print_header,
2374 2376 NULL);
2375 2377 dump_object(os, DMU_GROUPUSED_OBJECT, verbosity, &print_header,
2376 2378 NULL);
2377 2379 }
2378 2380
2379 2381 object = 0;
2380 2382 while ((error = dmu_object_next(os, &object, B_FALSE, 0)) == 0) {
2381 2383 dump_object(os, object, verbosity, &print_header, &dnode_slots);
2382 2384 object_count++;
2383 2385 total_slots_used += dnode_slots;
2384 2386 max_slot_used = object + dnode_slots - 1;
2385 2387 }
2386 2388
2387 2389 ASSERT3U(object_count, ==, usedobjs);
2388 2390
2389 2391 (void) printf("\n");
2390 2392
2391 2393 (void) printf(" Dnode slots:\n");
2392 2394 (void) printf("\tTotal used: %10llu\n",
2393 2395 (u_longlong_t)total_slots_used);
2394 2396 (void) printf("\tMax used: %10llu\n",
2395 2397 (u_longlong_t)max_slot_used);
2396 2398 (void) printf("\tPercent empty: %10lf\n",
2397 2399 (double)(max_slot_used - total_slots_used)*100 /
2398 2400 (double)max_slot_used);
2399 2401
2400 2402 (void) printf("\n");
2401 2403
2402 2404 if (error != ESRCH) {
2403 2405 (void) fprintf(stderr, "dmu_object_next() = %d\n", error);
2404 2406 abort();
2405 2407 }
2406 2408 if (leaked_objects != 0) {
2407 2409 (void) printf("%d potentially leaked objects detected\n",
2408 2410 leaked_objects);
2409 2411 leaked_objects = 0;
2410 2412 }
2411 2413 }
2412 2414
2413 2415 static void
2414 2416 dump_uberblock(uberblock_t *ub, const char *header, const char *footer)
2415 2417 {
2416 2418 time_t timestamp = ub->ub_timestamp;
2417 2419
2418 2420 (void) printf("%s", header ? header : "");
2419 2421 (void) printf("\tmagic = %016llx\n", (u_longlong_t)ub->ub_magic);
2420 2422 (void) printf("\tversion = %llu\n", (u_longlong_t)ub->ub_version);
2421 2423 (void) printf("\ttxg = %llu\n", (u_longlong_t)ub->ub_txg);
2422 2424 (void) printf("\tguid_sum = %llu\n", (u_longlong_t)ub->ub_guid_sum);
2423 2425 (void) printf("\ttimestamp = %llu UTC = %s",
2424 2426 (u_longlong_t)ub->ub_timestamp, asctime(localtime(×tamp)));
2425 2427
2426 2428 (void) printf("\tmmp_magic = %016llx\n",
2427 2429 (u_longlong_t)ub->ub_mmp_magic);
2428 2430 if (ub->ub_mmp_magic == MMP_MAGIC)
2429 2431 (void) printf("\tmmp_delay = %0llu\n",
2430 2432 (u_longlong_t)ub->ub_mmp_delay);
2431 2433
2432 2434 if (dump_opt['u'] >= 3) {
2433 2435 char blkbuf[BP_SPRINTF_LEN];
2434 2436 snprintf_blkptr(blkbuf, sizeof (blkbuf), &ub->ub_rootbp);
2435 2437 (void) printf("\trootbp = %s\n", blkbuf);
2436 2438 }
2437 2439 (void) printf("\tcheckpoint_txg = %llu\n",
2438 2440 (u_longlong_t)ub->ub_checkpoint_txg);
2439 2441 (void) printf("%s", footer ? footer : "");
2440 2442 }
2441 2443
2442 2444 static void
2443 2445 dump_config(spa_t *spa)
2444 2446 {
2445 2447 dmu_buf_t *db;
2446 2448 size_t nvsize = 0;
2447 2449 int error = 0;
2448 2450
2449 2451
2450 2452 error = dmu_bonus_hold(spa->spa_meta_objset,
2451 2453 spa->spa_config_object, FTAG, &db);
2452 2454
2453 2455 if (error == 0) {
2454 2456 nvsize = *(uint64_t *)db->db_data;
2455 2457 dmu_buf_rele(db, FTAG);
2456 2458
2457 2459 (void) printf("\nMOS Configuration:\n");
2458 2460 dump_packed_nvlist(spa->spa_meta_objset,
2459 2461 spa->spa_config_object, (void *)&nvsize, 1);
2460 2462 } else {
2461 2463 (void) fprintf(stderr, "dmu_bonus_hold(%llu) failed, errno %d",
2462 2464 (u_longlong_t)spa->spa_config_object, error);
2463 2465 }
2464 2466 }
2465 2467
2466 2468 static void
2467 2469 dump_cachefile(const char *cachefile)
2468 2470 {
2469 2471 int fd;
2470 2472 struct stat64 statbuf;
2471 2473 char *buf;
2472 2474 nvlist_t *config;
2473 2475
2474 2476 if ((fd = open64(cachefile, O_RDONLY)) < 0) {
2475 2477 (void) printf("cannot open '%s': %s\n", cachefile,
2476 2478 strerror(errno));
2477 2479 exit(1);
2478 2480 }
2479 2481
2480 2482 if (fstat64(fd, &statbuf) != 0) {
2481 2483 (void) printf("failed to stat '%s': %s\n", cachefile,
2482 2484 strerror(errno));
2483 2485 exit(1);
2484 2486 }
2485 2487
2486 2488 if ((buf = malloc(statbuf.st_size)) == NULL) {
2487 2489 (void) fprintf(stderr, "failed to allocate %llu bytes\n",
2488 2490 (u_longlong_t)statbuf.st_size);
2489 2491 exit(1);
2490 2492 }
2491 2493
2492 2494 if (read(fd, buf, statbuf.st_size) != statbuf.st_size) {
2493 2495 (void) fprintf(stderr, "failed to read %llu bytes\n",
2494 2496 (u_longlong_t)statbuf.st_size);
2495 2497 exit(1);
2496 2498 }
2497 2499
2498 2500 (void) close(fd);
2499 2501
2500 2502 if (nvlist_unpack(buf, statbuf.st_size, &config, 0) != 0) {
2501 2503 (void) fprintf(stderr, "failed to unpack nvlist\n");
2502 2504 exit(1);
2503 2505 }
2504 2506
2505 2507 free(buf);
2506 2508
2507 2509 dump_nvlist(config, 0);
2508 2510
2509 2511 nvlist_free(config);
2510 2512 }
2511 2513
2512 2514 #define ZDB_MAX_UB_HEADER_SIZE 32
2513 2515
2514 2516 static void
2515 2517 dump_label_uberblocks(vdev_label_t *lbl, uint64_t ashift)
2516 2518 {
2517 2519 vdev_t vd;
2518 2520 vdev_t *vdp = &vd;
2519 2521 char header[ZDB_MAX_UB_HEADER_SIZE];
2520 2522
2521 2523 vd.vdev_ashift = ashift;
2522 2524 vdp->vdev_top = vdp;
2523 2525
2524 2526 for (int i = 0; i < VDEV_UBERBLOCK_COUNT(vdp); i++) {
2525 2527 uint64_t uoff = VDEV_UBERBLOCK_OFFSET(vdp, i);
2526 2528 uberblock_t *ub = (void *)((char *)lbl + uoff);
2527 2529
2528 2530 if (uberblock_verify(ub))
2529 2531 continue;
2530 2532
2531 2533 if ((dump_opt['u'] < 4) &&
2532 2534 (ub->ub_mmp_magic == MMP_MAGIC) && ub->ub_mmp_delay &&
2533 2535 (i >= VDEV_UBERBLOCK_COUNT(&vd) - MMP_BLOCKS_PER_LABEL))
2534 2536 continue;
2535 2537
2536 2538 (void) snprintf(header, ZDB_MAX_UB_HEADER_SIZE,
2537 2539 "Uberblock[%d]\n", i);
2538 2540 dump_uberblock(ub, header, "");
2539 2541 }
2540 2542 }
2541 2543
2542 2544 static char curpath[PATH_MAX];
2543 2545
2544 2546 /*
2545 2547 * Iterate through the path components, recursively passing
2546 2548 * current one's obj and remaining path until we find the obj
2547 2549 * for the last one.
2548 2550 */
2549 2551 static int
2550 2552 dump_path_impl(objset_t *os, uint64_t obj, char *name)
2551 2553 {
2552 2554 int err;
2553 2555 int header = 1;
2554 2556 uint64_t child_obj;
2555 2557 char *s;
2556 2558 dmu_buf_t *db;
2557 2559 dmu_object_info_t doi;
2558 2560
2559 2561 if ((s = strchr(name, '/')) != NULL)
2560 2562 *s = '\0';
2561 2563 err = zap_lookup(os, obj, name, 8, 1, &child_obj);
2562 2564
2563 2565 (void) strlcat(curpath, name, sizeof (curpath));
2564 2566
2565 2567 if (err != 0) {
2566 2568 (void) fprintf(stderr, "failed to lookup %s: %s\n",
2567 2569 curpath, strerror(err));
2568 2570 return (err);
2569 2571 }
2570 2572
2571 2573 child_obj = ZFS_DIRENT_OBJ(child_obj);
2572 2574 err = sa_buf_hold(os, child_obj, FTAG, &db);
2573 2575 if (err != 0) {
2574 2576 (void) fprintf(stderr,
2575 2577 "failed to get SA dbuf for obj %llu: %s\n",
2576 2578 (u_longlong_t)child_obj, strerror(err));
2577 2579 return (EINVAL);
2578 2580 }
2579 2581 dmu_object_info_from_db(db, &doi);
2580 2582 sa_buf_rele(db, FTAG);
2581 2583
2582 2584 if (doi.doi_bonus_type != DMU_OT_SA &&
2583 2585 doi.doi_bonus_type != DMU_OT_ZNODE) {
2584 2586 (void) fprintf(stderr, "invalid bonus type %d for obj %llu\n",
2585 2587 doi.doi_bonus_type, (u_longlong_t)child_obj);
2586 2588 return (EINVAL);
2587 2589 }
2588 2590
2589 2591 if (dump_opt['v'] > 6) {
2590 2592 (void) printf("obj=%llu %s type=%d bonustype=%d\n",
2591 2593 (u_longlong_t)child_obj, curpath, doi.doi_type,
2592 2594 doi.doi_bonus_type);
2593 2595 }
2594 2596
2595 2597 (void) strlcat(curpath, "/", sizeof (curpath));
2596 2598
2597 2599 switch (doi.doi_type) {
2598 2600 case DMU_OT_DIRECTORY_CONTENTS:
2599 2601 if (s != NULL && *(s + 1) != '\0')
2600 2602 return (dump_path_impl(os, child_obj, s + 1));
2601 2603 /*FALLTHROUGH*/
2602 2604 case DMU_OT_PLAIN_FILE_CONTENTS:
2603 2605 dump_object(os, child_obj, dump_opt['v'], &header, NULL);
2604 2606 return (0);
2605 2607 default:
2606 2608 (void) fprintf(stderr, "object %llu has non-file/directory "
2607 2609 "type %d\n", (u_longlong_t)obj, doi.doi_type);
2608 2610 break;
2609 2611 }
2610 2612
2611 2613 return (EINVAL);
2612 2614 }
2613 2615
2614 2616 /*
2615 2617 * Dump the blocks for the object specified by path inside the dataset.
2616 2618 */
2617 2619 static int
2618 2620 dump_path(char *ds, char *path)
2619 2621 {
2620 2622 int err;
2621 2623 objset_t *os;
2622 2624 uint64_t root_obj;
2623 2625
2624 2626 err = open_objset(ds, DMU_OST_ZFS, FTAG, &os);
2625 2627 if (err != 0)
2626 2628 return (err);
2627 2629
2628 2630 err = zap_lookup(os, MASTER_NODE_OBJ, ZFS_ROOT_OBJ, 8, 1, &root_obj);
2629 2631 if (err != 0) {
2630 2632 (void) fprintf(stderr, "can't lookup root znode: %s\n",
2631 2633 strerror(err));
2632 2634 dmu_objset_disown(os, FTAG);
2633 2635 return (EINVAL);
2634 2636 }
2635 2637
2636 2638 (void) snprintf(curpath, sizeof (curpath), "dataset=%s path=/", ds);
2637 2639
2638 2640 err = dump_path_impl(os, root_obj, path);
2639 2641
2640 2642 close_objset(os, FTAG);
2641 2643 return (err);
2642 2644 }
2643 2645
2644 2646 static int
2645 2647 dump_label(const char *dev)
2646 2648 {
2647 2649 int fd;
2648 2650 vdev_label_t label;
2649 2651 char path[MAXPATHLEN];
2650 2652 char *buf = label.vl_vdev_phys.vp_nvlist;
2651 2653 size_t buflen = sizeof (label.vl_vdev_phys.vp_nvlist);
2652 2654 struct stat64 statbuf;
2653 2655 uint64_t psize, ashift;
2654 2656 boolean_t label_found = B_FALSE;
2655 2657
2656 2658 (void) strlcpy(path, dev, sizeof (path));
2657 2659 if (dev[0] == '/') {
2658 2660 if (strncmp(dev, ZFS_DISK_ROOTD,
2659 2661 strlen(ZFS_DISK_ROOTD)) == 0) {
2660 2662 (void) snprintf(path, sizeof (path), "%s%s",
2661 2663 ZFS_RDISK_ROOTD, dev + strlen(ZFS_DISK_ROOTD));
2662 2664 }
2663 2665 } else if (stat64(path, &statbuf) != 0) {
2664 2666 char *s;
2665 2667
2666 2668 (void) snprintf(path, sizeof (path), "%s%s", ZFS_RDISK_ROOTD,
2667 2669 dev);
2668 2670 if (((s = strrchr(dev, 's')) == NULL &&
2669 2671 (s = strchr(dev, 'p')) == NULL) ||
2670 2672 !isdigit(*(s + 1)))
2671 2673 (void) strlcat(path, "s0", sizeof (path));
2672 2674 }
2673 2675
2674 2676 if ((fd = open64(path, O_RDONLY)) < 0) {
2675 2677 (void) fprintf(stderr, "cannot open '%s': %s\n", path,
2676 2678 strerror(errno));
2677 2679 exit(1);
2678 2680 }
2679 2681
2680 2682 if (fstat64(fd, &statbuf) != 0) {
2681 2683 (void) fprintf(stderr, "failed to stat '%s': %s\n", path,
2682 2684 strerror(errno));
2683 2685 (void) close(fd);
2684 2686 exit(1);
2685 2687 }
2686 2688
2687 2689 if (S_ISBLK(statbuf.st_mode)) {
2688 2690 (void) fprintf(stderr,
2689 2691 "cannot use '%s': character device required\n", path);
2690 2692 (void) close(fd);
2691 2693 exit(1);
2692 2694 }
2693 2695
2694 2696 psize = statbuf.st_size;
2695 2697 psize = P2ALIGN(psize, (uint64_t)sizeof (vdev_label_t));
2696 2698
2697 2699 for (int l = 0; l < VDEV_LABELS; l++) {
2698 2700 nvlist_t *config = NULL;
2699 2701
2700 2702 if (!dump_opt['q']) {
2701 2703 (void) printf("------------------------------------\n");
2702 2704 (void) printf("LABEL %d\n", l);
2703 2705 (void) printf("------------------------------------\n");
2704 2706 }
2705 2707
2706 2708 if (pread64(fd, &label, sizeof (label),
2707 2709 vdev_label_offset(psize, l, 0)) != sizeof (label)) {
2708 2710 if (!dump_opt['q'])
2709 2711 (void) printf("failed to read label %d\n", l);
2710 2712 continue;
2711 2713 }
2712 2714
2713 2715 if (nvlist_unpack(buf, buflen, &config, 0) != 0) {
2714 2716 if (!dump_opt['q'])
2715 2717 (void) printf("failed to unpack label %d\n", l);
2716 2718 ashift = SPA_MINBLOCKSHIFT;
2717 2719 } else {
2718 2720 nvlist_t *vdev_tree = NULL;
2719 2721
2720 2722 if (!dump_opt['q'])
2721 2723 dump_nvlist(config, 4);
2722 2724 if ((nvlist_lookup_nvlist(config,
2723 2725 ZPOOL_CONFIG_VDEV_TREE, &vdev_tree) != 0) ||
2724 2726 (nvlist_lookup_uint64(vdev_tree,
2725 2727 ZPOOL_CONFIG_ASHIFT, &ashift) != 0))
2726 2728 ashift = SPA_MINBLOCKSHIFT;
2727 2729 nvlist_free(config);
2728 2730 label_found = B_TRUE;
2729 2731 }
2730 2732 if (dump_opt['u'])
2731 2733 dump_label_uberblocks(&label, ashift);
2732 2734 }
2733 2735
2734 2736 (void) close(fd);
2735 2737
2736 2738 return (label_found ? 0 : 2);
2737 2739 }
2738 2740
2739 2741 static uint64_t dataset_feature_count[SPA_FEATURES];
2740 2742 static uint64_t remap_deadlist_count = 0;
2741 2743
2742 2744 /*ARGSUSED*/
2743 2745 static int
2744 2746 dump_one_dir(const char *dsname, void *arg)
2745 2747 {
2746 2748 int error;
2747 2749 objset_t *os;
2748 2750
2749 2751 error = open_objset(dsname, DMU_OST_ANY, FTAG, &os);
2750 2752 if (error != 0)
2751 2753 return (0);
2752 2754
2753 2755 for (spa_feature_t f = 0; f < SPA_FEATURES; f++) {
2754 2756 if (!dmu_objset_ds(os)->ds_feature_inuse[f])
2755 2757 continue;
2756 2758 ASSERT(spa_feature_table[f].fi_flags &
2757 2759 ZFEATURE_FLAG_PER_DATASET);
2758 2760 dataset_feature_count[f]++;
2759 2761 }
2760 2762
2761 2763 if (dsl_dataset_remap_deadlist_exists(dmu_objset_ds(os))) {
2762 2764 remap_deadlist_count++;
2763 2765 }
2764 2766
2765 2767 dump_dir(os);
2766 2768 close_objset(os, FTAG);
2767 2769 fuid_table_destroy();
2768 2770 return (0);
2769 2771 }
2770 2772
2771 2773 /*
2772 2774 * Block statistics.
2773 2775 */
2774 2776 #define PSIZE_HISTO_SIZE (SPA_OLD_MAXBLOCKSIZE / SPA_MINBLOCKSIZE + 2)
2775 2777 typedef struct zdb_blkstats {
2776 2778 uint64_t zb_asize;
2777 2779 uint64_t zb_lsize;
2778 2780 uint64_t zb_psize;
2779 2781 uint64_t zb_count;
2780 2782 uint64_t zb_gangs;
2781 2783 uint64_t zb_ditto_samevdev;
2782 2784 uint64_t zb_ditto_same_ms;
2783 2785 uint64_t zb_psize_histogram[PSIZE_HISTO_SIZE];
2784 2786 } zdb_blkstats_t;
2785 2787
2786 2788 /*
2787 2789 * Extended object types to report deferred frees and dedup auto-ditto blocks.
2788 2790 */
2789 2791 #define ZDB_OT_DEFERRED (DMU_OT_NUMTYPES + 0)
2790 2792 #define ZDB_OT_DITTO (DMU_OT_NUMTYPES + 1)
2791 2793 #define ZDB_OT_OTHER (DMU_OT_NUMTYPES + 2)
2792 2794 #define ZDB_OT_TOTAL (DMU_OT_NUMTYPES + 3)
2793 2795
2794 2796 static const char *zdb_ot_extname[] = {
2795 2797 "deferred free",
2796 2798 "dedup ditto",
2797 2799 "other",
2798 2800 "Total",
2799 2801 };
2800 2802
2801 2803 #define ZB_TOTAL DN_MAX_LEVELS
2802 2804
2803 2805 typedef struct zdb_cb {
2804 2806 zdb_blkstats_t zcb_type[ZB_TOTAL + 1][ZDB_OT_TOTAL + 1];
2805 2807 uint64_t zcb_removing_size;
2806 2808 uint64_t zcb_checkpoint_size;
2807 2809 uint64_t zcb_dedup_asize;
2808 2810 uint64_t zcb_dedup_blocks;
2809 2811 uint64_t zcb_embedded_blocks[NUM_BP_EMBEDDED_TYPES];
2810 2812 uint64_t zcb_embedded_histogram[NUM_BP_EMBEDDED_TYPES]
2811 2813 [BPE_PAYLOAD_SIZE];
2812 2814 uint64_t zcb_start;
2813 2815 hrtime_t zcb_lastprint;
2814 2816 uint64_t zcb_totalasize;
2815 2817 uint64_t zcb_errors[256];
2816 2818 int zcb_readfails;
2817 2819 int zcb_haderrors;
2818 2820 spa_t *zcb_spa;
2819 2821 uint32_t **zcb_vd_obsolete_counts;
2820 2822 } zdb_cb_t;
2821 2823
2822 2824 /* test if two DVA offsets from same vdev are within the same metaslab */
2823 2825 static boolean_t
2824 2826 same_metaslab(spa_t *spa, uint64_t vdev, uint64_t off1, uint64_t off2)
2825 2827 {
2826 2828 vdev_t *vd = vdev_lookup_top(spa, vdev);
2827 2829 uint64_t ms_shift = vd->vdev_ms_shift;
2828 2830
2829 2831 return ((off1 >> ms_shift) == (off2 >> ms_shift));
2830 2832 }
2831 2833
2832 2834 static void
2833 2835 zdb_count_block(zdb_cb_t *zcb, zilog_t *zilog, const blkptr_t *bp,
2834 2836 dmu_object_type_t type)
2835 2837 {
2836 2838 uint64_t refcnt = 0;
2837 2839
2838 2840 ASSERT(type < ZDB_OT_TOTAL);
2839 2841
2840 2842 if (zilog && zil_bp_tree_add(zilog, bp) != 0)
2841 2843 return;
2842 2844
2843 2845 spa_config_enter(zcb->zcb_spa, SCL_CONFIG, FTAG, RW_READER);
2844 2846
2845 2847 for (int i = 0; i < 4; i++) {
2846 2848 int l = (i < 2) ? BP_GET_LEVEL(bp) : ZB_TOTAL;
2847 2849 int t = (i & 1) ? type : ZDB_OT_TOTAL;
2848 2850 int equal;
2849 2851 zdb_blkstats_t *zb = &zcb->zcb_type[l][t];
2850 2852
2851 2853 zb->zb_asize += BP_GET_ASIZE(bp);
2852 2854 zb->zb_lsize += BP_GET_LSIZE(bp);
2853 2855 zb->zb_psize += BP_GET_PSIZE(bp);
2854 2856 zb->zb_count++;
2855 2857
2856 2858 /*
2857 2859 * The histogram is only big enough to record blocks up to
2858 2860 * SPA_OLD_MAXBLOCKSIZE; larger blocks go into the last,
2859 2861 * "other", bucket.
2860 2862 */
2861 2863 unsigned idx = BP_GET_PSIZE(bp) >> SPA_MINBLOCKSHIFT;
2862 2864 idx = MIN(idx, SPA_OLD_MAXBLOCKSIZE / SPA_MINBLOCKSIZE + 1);
2863 2865 zb->zb_psize_histogram[idx]++;
2864 2866
2865 2867 zb->zb_gangs += BP_COUNT_GANG(bp);
2866 2868
2867 2869 switch (BP_GET_NDVAS(bp)) {
2868 2870 case 2:
2869 2871 if (DVA_GET_VDEV(&bp->blk_dva[0]) ==
2870 2872 DVA_GET_VDEV(&bp->blk_dva[1])) {
2871 2873 zb->zb_ditto_samevdev++;
2872 2874
2873 2875 if (same_metaslab(zcb->zcb_spa,
2874 2876 DVA_GET_VDEV(&bp->blk_dva[0]),
2875 2877 DVA_GET_OFFSET(&bp->blk_dva[0]),
2876 2878 DVA_GET_OFFSET(&bp->blk_dva[1])))
2877 2879 zb->zb_ditto_same_ms++;
2878 2880 }
2879 2881 break;
2880 2882 case 3:
2881 2883 equal = (DVA_GET_VDEV(&bp->blk_dva[0]) ==
2882 2884 DVA_GET_VDEV(&bp->blk_dva[1])) +
2883 2885 (DVA_GET_VDEV(&bp->blk_dva[0]) ==
2884 2886 DVA_GET_VDEV(&bp->blk_dva[2])) +
2885 2887 (DVA_GET_VDEV(&bp->blk_dva[1]) ==
2886 2888 DVA_GET_VDEV(&bp->blk_dva[2]));
2887 2889 if (equal != 0) {
2888 2890 zb->zb_ditto_samevdev++;
2889 2891
2890 2892 if (DVA_GET_VDEV(&bp->blk_dva[0]) ==
2891 2893 DVA_GET_VDEV(&bp->blk_dva[1]) &&
2892 2894 same_metaslab(zcb->zcb_spa,
2893 2895 DVA_GET_VDEV(&bp->blk_dva[0]),
2894 2896 DVA_GET_OFFSET(&bp->blk_dva[0]),
2895 2897 DVA_GET_OFFSET(&bp->blk_dva[1])))
2896 2898 zb->zb_ditto_same_ms++;
2897 2899 else if (DVA_GET_VDEV(&bp->blk_dva[0]) ==
2898 2900 DVA_GET_VDEV(&bp->blk_dva[2]) &&
2899 2901 same_metaslab(zcb->zcb_spa,
2900 2902 DVA_GET_VDEV(&bp->blk_dva[0]),
2901 2903 DVA_GET_OFFSET(&bp->blk_dva[0]),
2902 2904 DVA_GET_OFFSET(&bp->blk_dva[2])))
2903 2905 zb->zb_ditto_same_ms++;
2904 2906 else if (DVA_GET_VDEV(&bp->blk_dva[1]) ==
2905 2907 DVA_GET_VDEV(&bp->blk_dva[2]) &&
2906 2908 same_metaslab(zcb->zcb_spa,
2907 2909 DVA_GET_VDEV(&bp->blk_dva[1]),
2908 2910 DVA_GET_OFFSET(&bp->blk_dva[1]),
2909 2911 DVA_GET_OFFSET(&bp->blk_dva[2])))
2910 2912 zb->zb_ditto_same_ms++;
2911 2913 }
2912 2914 break;
2913 2915 }
2914 2916 }
2915 2917
2916 2918 spa_config_exit(zcb->zcb_spa, SCL_CONFIG, FTAG);
2917 2919
2918 2920 if (BP_IS_EMBEDDED(bp)) {
2919 2921 zcb->zcb_embedded_blocks[BPE_GET_ETYPE(bp)]++;
2920 2922 zcb->zcb_embedded_histogram[BPE_GET_ETYPE(bp)]
2921 2923 [BPE_GET_PSIZE(bp)]++;
2922 2924 return;
2923 2925 }
2924 2926
2925 2927 if (dump_opt['L'])
2926 2928 return;
2927 2929
2928 2930 if (BP_GET_DEDUP(bp)) {
2929 2931 ddt_t *ddt;
2930 2932 ddt_entry_t *dde;
2931 2933
2932 2934 ddt = ddt_select(zcb->zcb_spa, bp);
2933 2935 ddt_enter(ddt);
2934 2936 dde = ddt_lookup(ddt, bp, B_FALSE);
2935 2937
2936 2938 if (dde == NULL) {
2937 2939 refcnt = 0;
2938 2940 } else {
2939 2941 ddt_phys_t *ddp = ddt_phys_select(dde, bp);
2940 2942 ddt_phys_decref(ddp);
2941 2943 refcnt = ddp->ddp_refcnt;
2942 2944 if (ddt_phys_total_refcnt(dde) == 0)
2943 2945 ddt_remove(ddt, dde);
2944 2946 }
2945 2947 ddt_exit(ddt);
2946 2948 }
2947 2949
2948 2950 VERIFY3U(zio_wait(zio_claim(NULL, zcb->zcb_spa,
2949 2951 refcnt ? 0 : spa_min_claim_txg(zcb->zcb_spa),
2950 2952 bp, NULL, NULL, ZIO_FLAG_CANFAIL)), ==, 0);
2951 2953 }
2952 2954
2953 2955 static void
2954 2956 zdb_blkptr_done(zio_t *zio)
2955 2957 {
2956 2958 spa_t *spa = zio->io_spa;
2957 2959 blkptr_t *bp = zio->io_bp;
2958 2960 int ioerr = zio->io_error;
2959 2961 zdb_cb_t *zcb = zio->io_private;
2960 2962 zbookmark_phys_t *zb = &zio->io_bookmark;
2961 2963
2962 2964 abd_free(zio->io_abd);
2963 2965
2964 2966 mutex_enter(&spa->spa_scrub_lock);
2965 2967 spa->spa_scrub_inflight--;
2966 2968 cv_broadcast(&spa->spa_scrub_io_cv);
2967 2969
2968 2970 if (ioerr && !(zio->io_flags & ZIO_FLAG_SPECULATIVE)) {
2969 2971 char blkbuf[BP_SPRINTF_LEN];
2970 2972
2971 2973 zcb->zcb_haderrors = 1;
2972 2974 zcb->zcb_errors[ioerr]++;
2973 2975
2974 2976 if (dump_opt['b'] >= 2)
2975 2977 snprintf_blkptr(blkbuf, sizeof (blkbuf), bp);
2976 2978 else
2977 2979 blkbuf[0] = '\0';
2978 2980
2979 2981 (void) printf("zdb_blkptr_cb: "
2980 2982 "Got error %d reading "
2981 2983 "<%llu, %llu, %lld, %llx> %s -- skipping\n",
2982 2984 ioerr,
2983 2985 (u_longlong_t)zb->zb_objset,
2984 2986 (u_longlong_t)zb->zb_object,
2985 2987 (u_longlong_t)zb->zb_level,
2986 2988 (u_longlong_t)zb->zb_blkid,
2987 2989 blkbuf);
2988 2990 }
2989 2991 mutex_exit(&spa->spa_scrub_lock);
2990 2992 }
2991 2993
2992 2994 static int
2993 2995 zdb_blkptr_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
2994 2996 const zbookmark_phys_t *zb, const dnode_phys_t *dnp, void *arg)
2995 2997 {
2996 2998 zdb_cb_t *zcb = arg;
2997 2999 dmu_object_type_t type;
2998 3000 boolean_t is_metadata;
2999 3001
3000 3002 if (bp == NULL)
3001 3003 return (0);
3002 3004
3003 3005 if (dump_opt['b'] >= 5 && bp->blk_birth > 0) {
3004 3006 char blkbuf[BP_SPRINTF_LEN];
3005 3007 snprintf_blkptr(blkbuf, sizeof (blkbuf), bp);
3006 3008 (void) printf("objset %llu object %llu "
3007 3009 "level %lld offset 0x%llx %s\n",
3008 3010 (u_longlong_t)zb->zb_objset,
3009 3011 (u_longlong_t)zb->zb_object,
3010 3012 (longlong_t)zb->zb_level,
3011 3013 (u_longlong_t)blkid2offset(dnp, bp, zb),
3012 3014 blkbuf);
3013 3015 }
3014 3016
3015 3017 if (BP_IS_HOLE(bp))
3016 3018 return (0);
3017 3019
3018 3020 type = BP_GET_TYPE(bp);
3019 3021
3020 3022 zdb_count_block(zcb, zilog, bp,
3021 3023 (type & DMU_OT_NEWTYPE) ? ZDB_OT_OTHER : type);
3022 3024
3023 3025 is_metadata = (BP_GET_LEVEL(bp) != 0 || DMU_OT_IS_METADATA(type));
3024 3026
3025 3027 if (!BP_IS_EMBEDDED(bp) &&
3026 3028 (dump_opt['c'] > 1 || (dump_opt['c'] && is_metadata))) {
3027 3029 size_t size = BP_GET_PSIZE(bp);
3028 3030 abd_t *abd = abd_alloc(size, B_FALSE);
3029 3031 int flags = ZIO_FLAG_CANFAIL | ZIO_FLAG_SCRUB | ZIO_FLAG_RAW;
3030 3032
3031 3033 /* If it's an intent log block, failure is expected. */
3032 3034 if (zb->zb_level == ZB_ZIL_LEVEL)
3033 3035 flags |= ZIO_FLAG_SPECULATIVE;
3034 3036
3035 3037 mutex_enter(&spa->spa_scrub_lock);
3036 3038 while (spa->spa_scrub_inflight > max_inflight)
3037 3039 cv_wait(&spa->spa_scrub_io_cv, &spa->spa_scrub_lock);
3038 3040 spa->spa_scrub_inflight++;
3039 3041 mutex_exit(&spa->spa_scrub_lock);
3040 3042
3041 3043 zio_nowait(zio_read(NULL, spa, bp, abd, size,
3042 3044 zdb_blkptr_done, zcb, ZIO_PRIORITY_ASYNC_READ, flags, zb));
3043 3045 }
3044 3046
3045 3047 zcb->zcb_readfails = 0;
3046 3048
3047 3049 /* only call gethrtime() every 100 blocks */
3048 3050 static int iters;
3049 3051 if (++iters > 100)
3050 3052 iters = 0;
3051 3053 else
3052 3054 return (0);
3053 3055
3054 3056 if (dump_opt['b'] < 5 && gethrtime() > zcb->zcb_lastprint + NANOSEC) {
3055 3057 uint64_t now = gethrtime();
3056 3058 char buf[10];
3057 3059 uint64_t bytes = zcb->zcb_type[ZB_TOTAL][ZDB_OT_TOTAL].zb_asize;
3058 3060 int kb_per_sec =
3059 3061 1 + bytes / (1 + ((now - zcb->zcb_start) / 1000 / 1000));
3060 3062 int sec_remaining =
3061 3063 (zcb->zcb_totalasize - bytes) / 1024 / kb_per_sec;
3062 3064
3063 3065 /* make sure nicenum has enough space */
3064 3066 CTASSERT(sizeof (buf) >= NN_NUMBUF_SZ);
3065 3067
3066 3068 zfs_nicenum(bytes, buf, sizeof (buf));
3067 3069 (void) fprintf(stderr,
3068 3070 "\r%5s completed (%4dMB/s) "
3069 3071 "estimated time remaining: %uhr %02umin %02usec ",
3070 3072 buf, kb_per_sec / 1024,
3071 3073 sec_remaining / 60 / 60,
3072 3074 sec_remaining / 60 % 60,
3073 3075 sec_remaining % 60);
3074 3076
3075 3077 zcb->zcb_lastprint = now;
3076 3078 }
3077 3079
3078 3080 return (0);
3079 3081 }
3080 3082
3081 3083 static void
3082 3084 zdb_leak(void *arg, uint64_t start, uint64_t size)
3083 3085 {
3084 3086 vdev_t *vd = arg;
3085 3087
3086 3088 (void) printf("leaked space: vdev %llu, offset 0x%llx, size %llu\n",
3087 3089 (u_longlong_t)vd->vdev_id, (u_longlong_t)start, (u_longlong_t)size);
3088 3090 }
3089 3091
3090 3092 static metaslab_ops_t zdb_metaslab_ops = {
|
↓ open down ↓ |
2152 lines elided |
↑ open up ↑ |
3091 3093 NULL /* alloc */
3092 3094 };
3093 3095
3094 3096 static void
3095 3097 zdb_ddt_leak_init(spa_t *spa, zdb_cb_t *zcb)
3096 3098 {
3097 3099 ddt_bookmark_t ddb;
3098 3100 ddt_entry_t dde;
3099 3101 int error;
3100 3102
3103 + ASSERT(!dump_opt['L']);
3104 +
3101 3105 bzero(&ddb, sizeof (ddb));
3102 3106 while ((error = ddt_walk(spa, &ddb, &dde)) == 0) {
3103 3107 blkptr_t blk;
3104 3108 ddt_phys_t *ddp = dde.dde_phys;
3105 3109
3106 3110 if (ddb.ddb_class == DDT_CLASS_UNIQUE)
3107 3111 return;
3108 3112
3109 3113 ASSERT(ddt_phys_total_refcnt(&dde) > 1);
3110 3114
3111 3115 for (int p = 0; p < DDT_PHYS_TYPES; p++, ddp++) {
3112 3116 if (ddp->ddp_phys_birth == 0)
3113 3117 continue;
|
↓ open down ↓ |
3 lines elided |
↑ open up ↑ |
3114 3118 ddt_bp_create(ddb.ddb_checksum,
3115 3119 &dde.dde_key, ddp, &blk);
3116 3120 if (p == DDT_PHYS_DITTO) {
3117 3121 zdb_count_block(zcb, NULL, &blk, ZDB_OT_DITTO);
3118 3122 } else {
3119 3123 zcb->zcb_dedup_asize +=
3120 3124 BP_GET_ASIZE(&blk) * (ddp->ddp_refcnt - 1);
3121 3125 zcb->zcb_dedup_blocks++;
3122 3126 }
3123 3127 }
3124 - if (!dump_opt['L']) {
3125 - ddt_t *ddt = spa->spa_ddt[ddb.ddb_checksum];
3126 - ddt_enter(ddt);
3127 - VERIFY(ddt_lookup(ddt, &blk, B_TRUE) != NULL);
3128 - ddt_exit(ddt);
3129 - }
3128 + ddt_t *ddt = spa->spa_ddt[ddb.ddb_checksum];
3129 + ddt_enter(ddt);
3130 + VERIFY(ddt_lookup(ddt, &blk, B_TRUE) != NULL);
3131 + ddt_exit(ddt);
3130 3132 }
3131 3133
3132 3134 ASSERT(error == ENOENT);
3133 3135 }
3134 3136
3135 3137 /* ARGSUSED */
3136 3138 static void
3137 3139 claim_segment_impl_cb(uint64_t inner_offset, vdev_t *vd, uint64_t offset,
3138 3140 uint64_t size, void *arg)
3139 3141 {
3140 3142 /*
3141 3143 * This callback was called through a remap from
3142 3144 * a device being removed. Therefore, the vdev that
3143 3145 * this callback is applied to is a concrete
3144 3146 * vdev.
3145 3147 */
3146 3148 ASSERT(vdev_is_concrete(vd));
3147 3149
3148 3150 VERIFY0(metaslab_claim_impl(vd, offset, size,
3149 3151 spa_min_claim_txg(vd->vdev_spa)));
3150 3152 }
3151 3153
3152 3154 static void
3153 3155 claim_segment_cb(void *arg, uint64_t offset, uint64_t size)
3154 3156 {
3155 3157 vdev_t *vd = arg;
3156 3158
3157 3159 vdev_indirect_ops.vdev_op_remap(vd, offset, size,
3158 3160 claim_segment_impl_cb, NULL);
3159 3161 }
3160 3162
|
↓ open down ↓ |
21 lines elided |
↑ open up ↑ |
3161 3163 /*
3162 3164 * After accounting for all allocated blocks that are directly referenced,
3163 3165 * we might have missed a reference to a block from a partially complete
3164 3166 * (and thus unused) indirect mapping object. We perform a secondary pass
3165 3167 * through the metaslabs we have already mapped and claim the destination
3166 3168 * blocks.
3167 3169 */
3168 3170 static void
3169 3171 zdb_claim_removing(spa_t *spa, zdb_cb_t *zcb)
3170 3172 {
3173 + if (dump_opt['L'])
3174 + return;
3175 +
3171 3176 if (spa->spa_vdev_removal == NULL)
3172 3177 return;
3173 3178
3174 3179 spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER);
3175 3180
3176 3181 spa_vdev_removal_t *svr = spa->spa_vdev_removal;
3177 3182 vdev_t *vd = vdev_lookup_top(spa, svr->svr_vdev_id);
3178 3183 vdev_indirect_mapping_t *vim = vd->vdev_indirect_mapping;
3179 3184
3180 3185 for (uint64_t msi = 0; msi < vd->vdev_ms_count; msi++) {
3181 3186 metaslab_t *msp = vd->vdev_ms[msi];
3182 3187
3183 3188 if (msp->ms_start >= vdev_indirect_mapping_max_offset(vim))
3184 3189 break;
3185 3190
3186 3191 ASSERT0(range_tree_space(svr->svr_allocd_segs));
3187 3192
3188 3193 if (msp->ms_sm != NULL) {
3189 3194 VERIFY0(space_map_load(msp->ms_sm,
3190 3195 svr->svr_allocd_segs, SM_ALLOC));
3191 3196
3192 3197 /*
3193 3198 * Clear everything past what has been synced unless
3194 3199 * it's past the spacemap, because we have not allocated
3195 3200 * mappings for it yet.
3196 3201 */
3197 3202 uint64_t vim_max_offset =
3198 3203 vdev_indirect_mapping_max_offset(vim);
3199 3204 uint64_t sm_end = msp->ms_sm->sm_start +
3200 3205 msp->ms_sm->sm_size;
3201 3206 if (sm_end > vim_max_offset)
3202 3207 range_tree_clear(svr->svr_allocd_segs,
3203 3208 vim_max_offset, sm_end - vim_max_offset);
3204 3209 }
3205 3210
3206 3211 zcb->zcb_removing_size +=
3207 3212 range_tree_space(svr->svr_allocd_segs);
3208 3213 range_tree_vacate(svr->svr_allocd_segs, claim_segment_cb, vd);
3209 3214 }
3210 3215
3211 3216 spa_config_exit(spa, SCL_CONFIG, FTAG);
3212 3217 }
3213 3218
3214 3219 /* ARGSUSED */
3215 3220 static int
3216 3221 increment_indirect_mapping_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx)
3217 3222 {
3218 3223 zdb_cb_t *zcb = arg;
3219 3224 spa_t *spa = zcb->zcb_spa;
3220 3225 vdev_t *vd;
3221 3226 const dva_t *dva = &bp->blk_dva[0];
3222 3227
3223 3228 ASSERT(!dump_opt['L']);
3224 3229 ASSERT3U(BP_GET_NDVAS(bp), ==, 1);
3225 3230
3226 3231 spa_config_enter(spa, SCL_VDEV, FTAG, RW_READER);
3227 3232 vd = vdev_lookup_top(zcb->zcb_spa, DVA_GET_VDEV(dva));
3228 3233 ASSERT3P(vd, !=, NULL);
3229 3234 spa_config_exit(spa, SCL_VDEV, FTAG);
3230 3235
3231 3236 ASSERT(vd->vdev_indirect_config.vic_mapping_object != 0);
3232 3237 ASSERT3P(zcb->zcb_vd_obsolete_counts[vd->vdev_id], !=, NULL);
3233 3238
3234 3239 vdev_indirect_mapping_increment_obsolete_count(
3235 3240 vd->vdev_indirect_mapping,
3236 3241 DVA_GET_OFFSET(dva), DVA_GET_ASIZE(dva),
3237 3242 zcb->zcb_vd_obsolete_counts[vd->vdev_id]);
3238 3243
3239 3244 return (0);
3240 3245 }
3241 3246
3242 3247 static uint32_t *
3243 3248 zdb_load_obsolete_counts(vdev_t *vd)
3244 3249 {
3245 3250 vdev_indirect_mapping_t *vim = vd->vdev_indirect_mapping;
3246 3251 spa_t *spa = vd->vdev_spa;
3247 3252 spa_condensing_indirect_phys_t *scip =
3248 3253 &spa->spa_condensing_indirect_phys;
3249 3254 uint32_t *counts;
3250 3255
3251 3256 EQUIV(vdev_obsolete_sm_object(vd) != 0, vd->vdev_obsolete_sm != NULL);
|
↓ open down ↓ |
71 lines elided |
↑ open up ↑ |
3252 3257 counts = vdev_indirect_mapping_load_obsolete_counts(vim);
3253 3258 if (vd->vdev_obsolete_sm != NULL) {
3254 3259 vdev_indirect_mapping_load_obsolete_spacemap(vim, counts,
3255 3260 vd->vdev_obsolete_sm);
3256 3261 }
3257 3262 if (scip->scip_vdev == vd->vdev_id &&
3258 3263 scip->scip_prev_obsolete_sm_object != 0) {
3259 3264 space_map_t *prev_obsolete_sm = NULL;
3260 3265 VERIFY0(space_map_open(&prev_obsolete_sm, spa->spa_meta_objset,
3261 3266 scip->scip_prev_obsolete_sm_object, 0, vd->vdev_asize, 0));
3262 - space_map_update(prev_obsolete_sm);
3263 3267 vdev_indirect_mapping_load_obsolete_spacemap(vim, counts,
3264 3268 prev_obsolete_sm);
3265 3269 space_map_close(prev_obsolete_sm);
3266 3270 }
3267 3271 return (counts);
3268 3272 }
3269 3273
3270 3274 typedef struct checkpoint_sm_exclude_entry_arg {
3271 3275 vdev_t *cseea_vd;
3272 3276 uint64_t cseea_checkpoint_size;
3273 3277 } checkpoint_sm_exclude_entry_arg_t;
3274 3278
3275 3279 static int
3276 3280 checkpoint_sm_exclude_entry_cb(space_map_entry_t *sme, void *arg)
3277 3281 {
3278 3282 checkpoint_sm_exclude_entry_arg_t *cseea = arg;
3279 3283 vdev_t *vd = cseea->cseea_vd;
3280 3284 metaslab_t *ms = vd->vdev_ms[sme->sme_offset >> vd->vdev_ms_shift];
3281 3285 uint64_t end = sme->sme_offset + sme->sme_run;
3282 3286
3283 3287 ASSERT(sme->sme_type == SM_FREE);
3284 3288
3285 3289 /*
3286 3290 * Since the vdev_checkpoint_sm exists in the vdev level
3287 3291 * and the ms_sm space maps exist in the metaslab level,
3288 3292 * an entry in the checkpoint space map could theoretically
3289 3293 * cross the boundaries of the metaslab that it belongs.
3290 3294 *
3291 3295 * In reality, because of the way that we populate and
3292 3296 * manipulate the checkpoint's space maps currently,
3293 3297 * there shouldn't be any entries that cross metaslabs.
3294 3298 * Hence the assertion below.
3295 3299 *
3296 3300 * That said, there is no fundamental requirement that
3297 3301 * the checkpoint's space map entries should not cross
3298 3302 * metaslab boundaries. So if needed we could add code
3299 3303 * that handles metaslab-crossing segments in the future.
3300 3304 */
3301 3305 VERIFY3U(sme->sme_offset, >=, ms->ms_start);
3302 3306 VERIFY3U(end, <=, ms->ms_start + ms->ms_size);
3303 3307
3304 3308 /*
3305 3309 * By removing the entry from the allocated segments we
3306 3310 * also verify that the entry is there to begin with.
3307 3311 */
3308 3312 mutex_enter(&ms->ms_lock);
3309 3313 range_tree_remove(ms->ms_allocatable, sme->sme_offset, sme->sme_run);
3310 3314 mutex_exit(&ms->ms_lock);
3311 3315
3312 3316 cseea->cseea_checkpoint_size += sme->sme_run;
3313 3317 return (0);
3314 3318 }
3315 3319
3316 3320 static void
3317 3321 zdb_leak_init_vdev_exclude_checkpoint(vdev_t *vd, zdb_cb_t *zcb)
3318 3322 {
3319 3323 spa_t *spa = vd->vdev_spa;
3320 3324 space_map_t *checkpoint_sm = NULL;
3321 3325 uint64_t checkpoint_sm_obj;
3322 3326
3323 3327 /*
3324 3328 * If there is no vdev_top_zap, we are in a pool whose
3325 3329 * version predates the pool checkpoint feature.
3326 3330 */
3327 3331 if (vd->vdev_top_zap == 0)
3328 3332 return;
3329 3333
3330 3334 /*
3331 3335 * If there is no reference of the vdev_checkpoint_sm in
3332 3336 * the vdev_top_zap, then one of the following scenarios
3333 3337 * is true:
3334 3338 *
3335 3339 * 1] There is no checkpoint
3336 3340 * 2] There is a checkpoint, but no checkpointed blocks
3337 3341 * have been freed yet
3338 3342 * 3] The current vdev is indirect
3339 3343 *
3340 3344 * In these cases we return immediately.
3341 3345 */
3342 3346 if (zap_contains(spa_meta_objset(spa), vd->vdev_top_zap,
3343 3347 VDEV_TOP_ZAP_POOL_CHECKPOINT_SM) != 0)
3344 3348 return;
3345 3349
|
↓ open down ↓ |
73 lines elided |
↑ open up ↑ |
3346 3350 VERIFY0(zap_lookup(spa_meta_objset(spa), vd->vdev_top_zap,
3347 3351 VDEV_TOP_ZAP_POOL_CHECKPOINT_SM, sizeof (uint64_t), 1,
3348 3352 &checkpoint_sm_obj));
3349 3353
3350 3354 checkpoint_sm_exclude_entry_arg_t cseea;
3351 3355 cseea.cseea_vd = vd;
3352 3356 cseea.cseea_checkpoint_size = 0;
3353 3357
3354 3358 VERIFY0(space_map_open(&checkpoint_sm, spa_meta_objset(spa),
3355 3359 checkpoint_sm_obj, 0, vd->vdev_asize, vd->vdev_ashift));
3356 - space_map_update(checkpoint_sm);
3357 3360
3358 3361 VERIFY0(space_map_iterate(checkpoint_sm,
3362 + space_map_length(checkpoint_sm),
3359 3363 checkpoint_sm_exclude_entry_cb, &cseea));
3360 3364 space_map_close(checkpoint_sm);
3361 3365
3362 3366 zcb->zcb_checkpoint_size += cseea.cseea_checkpoint_size;
3363 3367 }
3364 3368
3365 3369 static void
3366 3370 zdb_leak_init_exclude_checkpoint(spa_t *spa, zdb_cb_t *zcb)
3367 3371 {
3372 + ASSERT(!dump_opt['L']);
3373 +
3368 3374 vdev_t *rvd = spa->spa_root_vdev;
3369 3375 for (uint64_t c = 0; c < rvd->vdev_children; c++) {
3370 3376 ASSERT3U(c, ==, rvd->vdev_child[c]->vdev_id);
3371 3377 zdb_leak_init_vdev_exclude_checkpoint(rvd->vdev_child[c], zcb);
3372 3378 }
3373 3379 }
3374 3380
3375 3381 static void
3376 3382 load_concrete_ms_allocatable_trees(spa_t *spa, maptype_t maptype)
3377 3383 {
3378 3384 vdev_t *rvd = spa->spa_root_vdev;
3379 3385 for (uint64_t i = 0; i < rvd->vdev_children; i++) {
3380 3386 vdev_t *vd = rvd->vdev_child[i];
3381 3387
3382 3388 ASSERT3U(i, ==, vd->vdev_id);
3383 3389
3384 3390 if (vd->vdev_ops == &vdev_indirect_ops)
3385 3391 continue;
3386 3392
3387 3393 for (uint64_t m = 0; m < vd->vdev_ms_count; m++) {
3388 3394 metaslab_t *msp = vd->vdev_ms[m];
3389 3395
3390 3396 (void) fprintf(stderr,
3391 3397 "\rloading concrete vdev %llu, "
3392 3398 "metaslab %llu of %llu ...",
3393 3399 (longlong_t)vd->vdev_id,
3394 3400 (longlong_t)msp->ms_id,
3395 3401 (longlong_t)vd->vdev_ms_count);
3396 3402
3397 3403 mutex_enter(&msp->ms_lock);
3398 3404 metaslab_unload(msp);
3399 3405
3400 3406 /*
3401 3407 * We don't want to spend the CPU manipulating the
3402 3408 * size-ordered tree, so clear the range_tree ops.
3403 3409 */
3404 3410 msp->ms_allocatable->rt_ops = NULL;
3405 3411
3406 3412 if (msp->ms_sm != NULL) {
3407 3413 VERIFY0(space_map_load(msp->ms_sm,
3408 3414 msp->ms_allocatable, maptype));
3409 3415 }
3410 3416 if (!msp->ms_loaded)
3411 3417 msp->ms_loaded = B_TRUE;
3412 3418 mutex_exit(&msp->ms_lock);
3413 3419 }
3414 3420 }
3415 3421 }
3416 3422
3417 3423 /*
3418 3424 * vm_idxp is an in-out parameter which (for indirect vdevs) is the
3419 3425 * index in vim_entries that has the first entry in this metaslab.
3420 3426 * On return, it will be set to the first entry after this metaslab.
3421 3427 */
3422 3428 static void
3423 3429 load_indirect_ms_allocatable_tree(vdev_t *vd, metaslab_t *msp,
3424 3430 uint64_t *vim_idxp)
3425 3431 {
3426 3432 vdev_indirect_mapping_t *vim = vd->vdev_indirect_mapping;
3427 3433
3428 3434 mutex_enter(&msp->ms_lock);
3429 3435 metaslab_unload(msp);
3430 3436
3431 3437 /*
3432 3438 * We don't want to spend the CPU manipulating the
3433 3439 * size-ordered tree, so clear the range_tree ops.
3434 3440 */
3435 3441 msp->ms_allocatable->rt_ops = NULL;
3436 3442
3437 3443 for (; *vim_idxp < vdev_indirect_mapping_num_entries(vim);
3438 3444 (*vim_idxp)++) {
3439 3445 vdev_indirect_mapping_entry_phys_t *vimep =
3440 3446 &vim->vim_entries[*vim_idxp];
3441 3447 uint64_t ent_offset = DVA_MAPPING_GET_SRC_OFFSET(vimep);
3442 3448 uint64_t ent_len = DVA_GET_ASIZE(&vimep->vimep_dst);
3443 3449 ASSERT3U(ent_offset, >=, msp->ms_start);
3444 3450 if (ent_offset >= msp->ms_start + msp->ms_size)
3445 3451 break;
3446 3452
3447 3453 /*
3448 3454 * Mappings do not cross metaslab boundaries,
3449 3455 * because we create them by walking the metaslabs.
3450 3456 */
3451 3457 ASSERT3U(ent_offset + ent_len, <=,
3452 3458 msp->ms_start + msp->ms_size);
3453 3459 range_tree_add(msp->ms_allocatable, ent_offset, ent_len);
|
↓ open down ↓ |
76 lines elided |
↑ open up ↑ |
3454 3460 }
3455 3461
3456 3462 if (!msp->ms_loaded)
3457 3463 msp->ms_loaded = B_TRUE;
3458 3464 mutex_exit(&msp->ms_lock);
3459 3465 }
3460 3466
3461 3467 static void
3462 3468 zdb_leak_init_prepare_indirect_vdevs(spa_t *spa, zdb_cb_t *zcb)
3463 3469 {
3470 + ASSERT(!dump_opt['L']);
3471 +
3464 3472 vdev_t *rvd = spa->spa_root_vdev;
3465 3473 for (uint64_t c = 0; c < rvd->vdev_children; c++) {
3466 3474 vdev_t *vd = rvd->vdev_child[c];
3467 3475
3468 3476 ASSERT3U(c, ==, vd->vdev_id);
3469 3477
3470 3478 if (vd->vdev_ops != &vdev_indirect_ops)
3471 3479 continue;
3472 3480
3473 3481 /*
3474 3482 * Note: we don't check for mapping leaks on
3475 3483 * removing vdevs because their ms_allocatable's
3476 3484 * are used to look for leaks in allocated space.
3477 3485 */
3478 3486 zcb->zcb_vd_obsolete_counts[c] = zdb_load_obsolete_counts(vd);
3479 3487
3480 3488 /*
3481 3489 * Normally, indirect vdevs don't have any
3482 3490 * metaslabs. We want to set them up for
3483 3491 * zio_claim().
3484 3492 */
3485 3493 VERIFY0(vdev_metaslab_init(vd, 0));
3486 3494
3487 3495 vdev_indirect_mapping_t *vim = vd->vdev_indirect_mapping;
3488 3496 uint64_t vim_idx = 0;
3489 3497 for (uint64_t m = 0; m < vd->vdev_ms_count; m++) {
3490 3498
3491 3499 (void) fprintf(stderr,
3492 3500 "\rloading indirect vdev %llu, "
3493 3501 "metaslab %llu of %llu ...",
3494 3502 (longlong_t)vd->vdev_id,
3495 3503 (longlong_t)vd->vdev_ms[m]->ms_id,
3496 3504 (longlong_t)vd->vdev_ms_count);
3497 3505
3498 3506 load_indirect_ms_allocatable_tree(vd, vd->vdev_ms[m],
3499 3507 &vim_idx);
|
↓ open down ↓ |
26 lines elided |
↑ open up ↑ |
3500 3508 }
3501 3509 ASSERT3U(vim_idx, ==, vdev_indirect_mapping_num_entries(vim));
3502 3510 }
3503 3511 }
3504 3512
3505 3513 static void
3506 3514 zdb_leak_init(spa_t *spa, zdb_cb_t *zcb)
3507 3515 {
3508 3516 zcb->zcb_spa = spa;
3509 3517
3510 - if (!dump_opt['L']) {
3511 - dsl_pool_t *dp = spa->spa_dsl_pool;
3512 - vdev_t *rvd = spa->spa_root_vdev;
3518 + if (dump_opt['L'])
3519 + return;
3513 3520
3514 - /*
3515 - * We are going to be changing the meaning of the metaslab's
3516 - * ms_allocatable. Ensure that the allocator doesn't try to
3517 - * use the tree.
3518 - */
3519 - spa->spa_normal_class->mc_ops = &zdb_metaslab_ops;
3520 - spa->spa_log_class->mc_ops = &zdb_metaslab_ops;
3521 + dsl_pool_t *dp = spa->spa_dsl_pool;
3522 + vdev_t *rvd = spa->spa_root_vdev;
3521 3523
3522 - zcb->zcb_vd_obsolete_counts =
3523 - umem_zalloc(rvd->vdev_children * sizeof (uint32_t *),
3524 - UMEM_NOFAIL);
3524 + /*
3525 + * We are going to be changing the meaning of the metaslab's
3526 + * ms_allocatable. Ensure that the allocator doesn't try to
3527 + * use the tree.
3528 + */
3529 + spa->spa_normal_class->mc_ops = &zdb_metaslab_ops;
3530 + spa->spa_log_class->mc_ops = &zdb_metaslab_ops;
3525 3531
3526 - /*
3527 - * For leak detection, we overload the ms_allocatable trees
3528 - * to contain allocated segments instead of free segments.
3529 - * As a result, we can't use the normal metaslab_load/unload
3530 - * interfaces.
3531 - */
3532 - zdb_leak_init_prepare_indirect_vdevs(spa, zcb);
3533 - load_concrete_ms_allocatable_trees(spa, SM_ALLOC);
3532 + zcb->zcb_vd_obsolete_counts =
3533 + umem_zalloc(rvd->vdev_children * sizeof (uint32_t *),
3534 + UMEM_NOFAIL);
3534 3535
3535 - /*
3536 - * On load_concrete_ms_allocatable_trees() we loaded all the
3537 - * allocated entries from the ms_sm to the ms_allocatable for
3538 - * each metaslab. If the pool has a checkpoint or is in the
3539 - * middle of discarding a checkpoint, some of these blocks
3540 - * may have been freed but their ms_sm may not have been
3541 - * updated because they are referenced by the checkpoint. In
3542 - * order to avoid false-positives during leak-detection, we
3543 - * go through the vdev's checkpoint space map and exclude all
3544 - * its entries from their relevant ms_allocatable.
3545 - *
3546 - * We also aggregate the space held by the checkpoint and add
3547 - * it to zcb_checkpoint_size.
3548 - *
3549 - * Note that at this point we are also verifying that all the
3550 - * entries on the checkpoint_sm are marked as allocated in
3551 - * the ms_sm of their relevant metaslab.
3552 - * [see comment in checkpoint_sm_exclude_entry_cb()]
3553 - */
3554 - zdb_leak_init_exclude_checkpoint(spa, zcb);
3536 + /*
3537 + * For leak detection, we overload the ms_allocatable trees
3538 + * to contain allocated segments instead of free segments.
3539 + * As a result, we can't use the normal metaslab_load/unload
3540 + * interfaces.
3541 + */
3542 + zdb_leak_init_prepare_indirect_vdevs(spa, zcb);
3543 + load_concrete_ms_allocatable_trees(spa, SM_ALLOC);
3555 3544
3556 - /* for cleaner progress output */
3557 - (void) fprintf(stderr, "\n");
3545 + /*
3546 + * On load_concrete_ms_allocatable_trees() we loaded all the
3547 + * allocated entries from the ms_sm to the ms_allocatable for
3548 + * each metaslab. If the pool has a checkpoint or is in the
3549 + * middle of discarding a checkpoint, some of these blocks
3550 + * may have been freed but their ms_sm may not have been
3551 + * updated because they are referenced by the checkpoint. In
3552 + * order to avoid false-positives during leak-detection, we
3553 + * go through the vdev's checkpoint space map and exclude all
3554 + * its entries from their relevant ms_allocatable.
3555 + *
3556 + * We also aggregate the space held by the checkpoint and add
3557 + * it to zcb_checkpoint_size.
3558 + *
3559 + * Note that at this point we are also verifying that all the
3560 + * entries on the checkpoint_sm are marked as allocated in
3561 + * the ms_sm of their relevant metaslab.
3562 + * [see comment in checkpoint_sm_exclude_entry_cb()]
3563 + */
3564 + zdb_leak_init_exclude_checkpoint(spa, zcb);
3565 + ASSERT3U(zcb->zcb_checkpoint_size, ==, spa_get_checkpoint_space(spa));
3558 3566
3559 - if (bpobj_is_open(&dp->dp_obsolete_bpobj)) {
3560 - ASSERT(spa_feature_is_enabled(spa,
3561 - SPA_FEATURE_DEVICE_REMOVAL));
3562 - (void) bpobj_iterate_nofree(&dp->dp_obsolete_bpobj,
3563 - increment_indirect_mapping_cb, zcb, NULL);
3564 - }
3565 - } else {
3566 - /*
3567 - * If leak tracing is disabled, we still need to consider
3568 - * any checkpointed space in our space verification.
3569 - */
3570 - zcb->zcb_checkpoint_size += spa_get_checkpoint_space(spa);
3567 + /* for cleaner progress output */
3568 + (void) fprintf(stderr, "\n");
3569 +
3570 + if (bpobj_is_open(&dp->dp_obsolete_bpobj)) {
3571 + ASSERT(spa_feature_is_enabled(spa,
3572 + SPA_FEATURE_DEVICE_REMOVAL));
3573 + (void) bpobj_iterate_nofree(&dp->dp_obsolete_bpobj,
3574 + increment_indirect_mapping_cb, zcb, NULL);
3571 3575 }
3572 3576
3573 3577 spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER);
3574 3578 zdb_ddt_leak_init(spa, zcb);
3575 3579 spa_config_exit(spa, SCL_CONFIG, FTAG);
3576 3580 }
3577 3581
3578 3582 static boolean_t
3579 3583 zdb_check_for_obsolete_leaks(vdev_t *vd, zdb_cb_t *zcb)
3580 3584 {
3581 3585 boolean_t leaks = B_FALSE;
3582 3586 vdev_indirect_mapping_t *vim = vd->vdev_indirect_mapping;
3583 3587 uint64_t total_leaked = 0;
3584 3588
3585 3589 ASSERT(vim != NULL);
3586 3590
3587 3591 for (uint64_t i = 0; i < vdev_indirect_mapping_num_entries(vim); i++) {
3588 3592 vdev_indirect_mapping_entry_phys_t *vimep =
3589 3593 &vim->vim_entries[i];
3590 3594 uint64_t obsolete_bytes = 0;
3591 3595 uint64_t offset = DVA_MAPPING_GET_SRC_OFFSET(vimep);
3592 3596 metaslab_t *msp = vd->vdev_ms[offset >> vd->vdev_ms_shift];
3593 3597
3594 3598 /*
3595 3599 * This is not very efficient but it's easy to
3596 3600 * verify correctness.
3597 3601 */
3598 3602 for (uint64_t inner_offset = 0;
3599 3603 inner_offset < DVA_GET_ASIZE(&vimep->vimep_dst);
3600 3604 inner_offset += 1 << vd->vdev_ashift) {
3601 3605 if (range_tree_contains(msp->ms_allocatable,
3602 3606 offset + inner_offset, 1 << vd->vdev_ashift)) {
3603 3607 obsolete_bytes += 1 << vd->vdev_ashift;
3604 3608 }
3605 3609 }
3606 3610
3607 3611 int64_t bytes_leaked = obsolete_bytes -
3608 3612 zcb->zcb_vd_obsolete_counts[vd->vdev_id][i];
3609 3613 ASSERT3U(DVA_GET_ASIZE(&vimep->vimep_dst), >=,
3610 3614 zcb->zcb_vd_obsolete_counts[vd->vdev_id][i]);
3611 3615 if (bytes_leaked != 0 &&
3612 3616 (vdev_obsolete_counts_are_precise(vd) ||
3613 3617 dump_opt['d'] >= 5)) {
3614 3618 (void) printf("obsolete indirect mapping count "
3615 3619 "mismatch on %llu:%llx:%llx : %llx bytes leaked\n",
3616 3620 (u_longlong_t)vd->vdev_id,
3617 3621 (u_longlong_t)DVA_MAPPING_GET_SRC_OFFSET(vimep),
3618 3622 (u_longlong_t)DVA_GET_ASIZE(&vimep->vimep_dst),
3619 3623 (u_longlong_t)bytes_leaked);
3620 3624 }
3621 3625 total_leaked += ABS(bytes_leaked);
3622 3626 }
3623 3627
3624 3628 if (!vdev_obsolete_counts_are_precise(vd) && total_leaked > 0) {
3625 3629 int pct_leaked = total_leaked * 100 /
3626 3630 vdev_indirect_mapping_bytes_mapped(vim);
3627 3631 (void) printf("cannot verify obsolete indirect mapping "
3628 3632 "counts of vdev %llu because precise feature was not "
3629 3633 "enabled when it was removed: %d%% (%llx bytes) of mapping"
3630 3634 "unreferenced\n",
3631 3635 (u_longlong_t)vd->vdev_id, pct_leaked,
3632 3636 (u_longlong_t)total_leaked);
3633 3637 } else if (total_leaked > 0) {
3634 3638 (void) printf("obsolete indirect mapping count mismatch "
3635 3639 "for vdev %llu -- %llx total bytes mismatched\n",
3636 3640 (u_longlong_t)vd->vdev_id,
3637 3641 (u_longlong_t)total_leaked);
3638 3642 leaks |= B_TRUE;
3639 3643 }
3640 3644
|
↓ open down ↓ |
60 lines elided |
↑ open up ↑ |
3641 3645 vdev_indirect_mapping_free_obsolete_counts(vim,
3642 3646 zcb->zcb_vd_obsolete_counts[vd->vdev_id]);
3643 3647 zcb->zcb_vd_obsolete_counts[vd->vdev_id] = NULL;
3644 3648
3645 3649 return (leaks);
3646 3650 }
3647 3651
3648 3652 static boolean_t
3649 3653 zdb_leak_fini(spa_t *spa, zdb_cb_t *zcb)
3650 3654 {
3655 + if (dump_opt['L'])
3656 + return (B_FALSE);
3657 +
3651 3658 boolean_t leaks = B_FALSE;
3652 - if (!dump_opt['L']) {
3653 - vdev_t *rvd = spa->spa_root_vdev;
3654 - for (unsigned c = 0; c < rvd->vdev_children; c++) {
3655 - vdev_t *vd = rvd->vdev_child[c];
3656 - metaslab_group_t *mg = vd->vdev_mg;
3657 3659
3658 - if (zcb->zcb_vd_obsolete_counts[c] != NULL) {
3659 - leaks |= zdb_check_for_obsolete_leaks(vd, zcb);
3660 - }
3660 + vdev_t *rvd = spa->spa_root_vdev;
3661 + for (unsigned c = 0; c < rvd->vdev_children; c++) {
3662 + vdev_t *vd = rvd->vdev_child[c];
3663 +#if DEBUG
3664 + metaslab_group_t *mg = vd->vdev_mg;
3665 +#endif
3661 3666
3662 - for (uint64_t m = 0; m < vd->vdev_ms_count; m++) {
3663 - metaslab_t *msp = vd->vdev_ms[m];
3664 - ASSERT3P(mg, ==, msp->ms_group);
3667 + if (zcb->zcb_vd_obsolete_counts[c] != NULL) {
3668 + leaks |= zdb_check_for_obsolete_leaks(vd, zcb);
3669 + }
3665 3670
3666 - /*
3667 - * ms_allocatable has been overloaded
3668 - * to contain allocated segments. Now that
3669 - * we finished traversing all blocks, any
3670 - * block that remains in the ms_allocatable
3671 - * represents an allocated block that we
3672 - * did not claim during the traversal.
3673 - * Claimed blocks would have been removed
3674 - * from the ms_allocatable. For indirect
3675 - * vdevs, space remaining in the tree
3676 - * represents parts of the mapping that are
3677 - * not referenced, which is not a bug.
3678 - */
3679 - if (vd->vdev_ops == &vdev_indirect_ops) {
3680 - range_tree_vacate(msp->ms_allocatable,
3681 - NULL, NULL);
3682 - } else {
3683 - range_tree_vacate(msp->ms_allocatable,
3684 - zdb_leak, vd);
3685 - }
3671 + for (uint64_t m = 0; m < vd->vdev_ms_count; m++) {
3672 + metaslab_t *msp = vd->vdev_ms[m];
3673 + ASSERT3P(mg, ==, msp->ms_group);
3686 3674
3687 - if (msp->ms_loaded) {
3688 - msp->ms_loaded = B_FALSE;
3689 - }
3675 + /*
3676 + * ms_allocatable has been overloaded
3677 + * to contain allocated segments. Now that
3678 + * we finished traversing all blocks, any
3679 + * block that remains in the ms_allocatable
3680 + * represents an allocated block that we
3681 + * did not claim during the traversal.
3682 + * Claimed blocks would have been removed
3683 + * from the ms_allocatable. For indirect
3684 + * vdevs, space remaining in the tree
3685 + * represents parts of the mapping that are
3686 + * not referenced, which is not a bug.
3687 + */
3688 + if (vd->vdev_ops == &vdev_indirect_ops) {
3689 + range_tree_vacate(msp->ms_allocatable,
3690 + NULL, NULL);
3691 + } else {
3692 + range_tree_vacate(msp->ms_allocatable,
3693 + zdb_leak, vd);
3690 3694 }
3695 +
3696 + if (msp->ms_loaded) {
3697 + msp->ms_loaded = B_FALSE;
3698 + }
3691 3699 }
3692 3700
3693 - umem_free(zcb->zcb_vd_obsolete_counts,
3694 - rvd->vdev_children * sizeof (uint32_t *));
3695 - zcb->zcb_vd_obsolete_counts = NULL;
3696 3701 }
3702 +
3703 + umem_free(zcb->zcb_vd_obsolete_counts,
3704 + rvd->vdev_children * sizeof (uint32_t *));
3705 + zcb->zcb_vd_obsolete_counts = NULL;
3706 +
3697 3707 return (leaks);
3698 3708 }
3699 3709
3700 3710 /* ARGSUSED */
3701 3711 static int
3702 3712 count_block_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx)
3703 3713 {
3704 3714 zdb_cb_t *zcb = arg;
3705 3715
3706 3716 if (dump_opt['b'] >= 5) {
3707 3717 char blkbuf[BP_SPRINTF_LEN];
3708 3718 snprintf_blkptr(blkbuf, sizeof (blkbuf), bp);
3709 3719 (void) printf("[%s] %s\n",
3710 3720 "deferred free", blkbuf);
3711 3721 }
3712 3722 zdb_count_block(zcb, NULL, bp, ZDB_OT_DEFERRED);
3713 3723 return (0);
3714 3724 }
3715 3725
3716 3726 static int
3717 3727 dump_block_stats(spa_t *spa)
3718 3728 {
3719 3729 zdb_cb_t zcb;
3720 3730 zdb_blkstats_t *zb, *tzb;
3721 3731 uint64_t norm_alloc, norm_space, total_alloc, total_found;
3722 3732 int flags = TRAVERSE_PRE | TRAVERSE_PREFETCH_METADATA | TRAVERSE_HARD;
3723 3733 boolean_t leaks = B_FALSE;
3724 3734 int err;
|
↓ open down ↓ |
18 lines elided |
↑ open up ↑ |
3725 3735
3726 3736 bzero(&zcb, sizeof (zcb));
3727 3737 (void) printf("\nTraversing all blocks %s%s%s%s%s...\n\n",
3728 3738 (dump_opt['c'] || !dump_opt['L']) ? "to verify " : "",
3729 3739 (dump_opt['c'] == 1) ? "metadata " : "",
3730 3740 dump_opt['c'] ? "checksums " : "",
3731 3741 (dump_opt['c'] && !dump_opt['L']) ? "and verify " : "",
3732 3742 !dump_opt['L'] ? "nothing leaked " : "");
3733 3743
3734 3744 /*
3735 - * Load all space maps as SM_ALLOC maps, then traverse the pool
3736 - * claiming each block we discover. If the pool is perfectly
3737 - * consistent, the space maps will be empty when we're done.
3738 - * Anything left over is a leak; any block we can't claim (because
3739 - * it's not part of any space map) is a double allocation,
3740 - * reference to a freed block, or an unclaimed log block.
3745 + * When leak detection is enabled we load all space maps as SM_ALLOC
3746 + * maps, then traverse the pool claiming each block we discover. If
3747 + * the pool is perfectly consistent, the segment trees will be empty
3748 + * when we're done. Anything left over is a leak; any block we can't
3749 + * claim (because it's not part of any space map) is a double
3750 + * allocation, reference to a freed block, or an unclaimed log block.
3751 + *
3752 + * When leak detection is disabled (-L option) we still traverse the
3753 + * pool claiming each block we discover, but we skip opening any space
3754 + * maps.
3741 3755 */
3756 + bzero(&zcb, sizeof (zdb_cb_t));
3742 3757 zdb_leak_init(spa, &zcb);
3743 3758
3744 3759 /*
3745 3760 * If there's a deferred-free bplist, process that first.
3746 3761 */
3747 3762 (void) bpobj_iterate_nofree(&spa->spa_deferred_bpobj,
3748 3763 count_block_cb, &zcb, NULL);
3749 3764
3750 3765 if (spa_version(spa) >= SPA_VERSION_DEADLISTS) {
3751 3766 (void) bpobj_iterate_nofree(&spa->spa_dsl_pool->dp_free_bpobj,
3752 3767 count_block_cb, &zcb, NULL);
3753 3768 }
3754 3769
3755 3770 zdb_claim_removing(spa, &zcb);
3756 3771
3757 3772 if (spa_feature_is_active(spa, SPA_FEATURE_ASYNC_DESTROY)) {
3758 3773 VERIFY3U(0, ==, bptree_iterate(spa->spa_meta_objset,
3759 3774 spa->spa_dsl_pool->dp_bptree_obj, B_FALSE, count_block_cb,
3760 3775 &zcb, NULL));
3761 3776 }
3762 3777
3763 3778 if (dump_opt['c'] > 1)
3764 3779 flags |= TRAVERSE_PREFETCH_DATA;
3765 3780
3766 3781 zcb.zcb_totalasize = metaslab_class_get_alloc(spa_normal_class(spa));
3767 3782 zcb.zcb_totalasize += metaslab_class_get_alloc(spa_special_class(spa));
3768 3783 zcb.zcb_totalasize += metaslab_class_get_alloc(spa_dedup_class(spa));
3769 3784 zcb.zcb_start = zcb.zcb_lastprint = gethrtime();
3770 3785 err = traverse_pool(spa, 0, flags, zdb_blkptr_cb, &zcb);
3771 3786
3772 3787 /*
3773 3788 * If we've traversed the data blocks then we need to wait for those
3774 3789 * I/Os to complete. We leverage "The Godfather" zio to wait on
3775 3790 * all async I/Os to complete.
3776 3791 */
3777 3792 if (dump_opt['c']) {
3778 3793 for (int i = 0; i < max_ncpus; i++) {
3779 3794 (void) zio_wait(spa->spa_async_zio_root[i]);
3780 3795 spa->spa_async_zio_root[i] = zio_root(spa, NULL, NULL,
3781 3796 ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE |
3782 3797 ZIO_FLAG_GODFATHER);
3783 3798 }
3784 3799 }
3785 3800
3786 3801 /*
3787 3802 * Done after zio_wait() since zcb_haderrors is modified in
3788 3803 * zdb_blkptr_done()
3789 3804 */
3790 3805 zcb.zcb_haderrors |= err;
3791 3806
3792 3807 if (zcb.zcb_haderrors) {
3793 3808 (void) printf("\nError counts:\n\n");
3794 3809 (void) printf("\t%5s %s\n", "errno", "count");
3795 3810 for (int e = 0; e < 256; e++) {
3796 3811 if (zcb.zcb_errors[e] != 0) {
3797 3812 (void) printf("\t%5d %llu\n",
3798 3813 e, (u_longlong_t)zcb.zcb_errors[e]);
3799 3814 }
3800 3815 }
3801 3816 }
3802 3817
3803 3818 /*
3804 3819 * Report any leaked segments.
3805 3820 */
3806 3821 leaks |= zdb_leak_fini(spa, &zcb);
3807 3822
3808 3823 tzb = &zcb.zcb_type[ZB_TOTAL][ZDB_OT_TOTAL];
3809 3824
|
↓ open down ↓ |
58 lines elided |
↑ open up ↑ |
3810 3825 norm_alloc = metaslab_class_get_alloc(spa_normal_class(spa));
3811 3826 norm_space = metaslab_class_get_space(spa_normal_class(spa));
3812 3827
3813 3828 total_alloc = norm_alloc +
3814 3829 metaslab_class_get_alloc(spa_log_class(spa)) +
3815 3830 metaslab_class_get_alloc(spa_special_class(spa)) +
3816 3831 metaslab_class_get_alloc(spa_dedup_class(spa));
3817 3832 total_found = tzb->zb_asize - zcb.zcb_dedup_asize +
3818 3833 zcb.zcb_removing_size + zcb.zcb_checkpoint_size;
3819 3834
3820 - if (total_found == total_alloc) {
3821 - if (!dump_opt['L'])
3822 - (void) printf("\n\tNo leaks (block sum matches space"
3823 - " maps exactly)\n");
3824 - } else {
3835 + if (total_found == total_alloc && !dump_opt['L']) {
3836 + (void) printf("\n\tNo leaks (block sum matches space"
3837 + " maps exactly)\n");
3838 + } else if (!dump_opt['L']) {
3825 3839 (void) printf("block traversal size %llu != alloc %llu "
3826 3840 "(%s %lld)\n",
3827 3841 (u_longlong_t)total_found,
3828 3842 (u_longlong_t)total_alloc,
3829 3843 (dump_opt['L']) ? "unreachable" : "leaked",
3830 3844 (longlong_t)(total_alloc - total_found));
3831 3845 leaks = B_TRUE;
3832 3846 }
3833 3847
3834 3848 if (tzb->zb_count == 0)
3835 3849 return (2);
3836 3850
3837 3851 (void) printf("\n");
3838 3852 (void) printf("\t%-16s %14llu\n", "bp count:",
3839 3853 (u_longlong_t)tzb->zb_count);
3840 3854 (void) printf("\t%-16s %14llu\n", "ganged count:",
3841 3855 (longlong_t)tzb->zb_gangs);
3842 3856 (void) printf("\t%-16s %14llu avg: %6llu\n", "bp logical:",
3843 3857 (u_longlong_t)tzb->zb_lsize,
3844 3858 (u_longlong_t)(tzb->zb_lsize / tzb->zb_count));
3845 3859 (void) printf("\t%-16s %14llu avg: %6llu compression: %6.2f\n",
3846 3860 "bp physical:", (u_longlong_t)tzb->zb_psize,
3847 3861 (u_longlong_t)(tzb->zb_psize / tzb->zb_count),
3848 3862 (double)tzb->zb_lsize / tzb->zb_psize);
3849 3863 (void) printf("\t%-16s %14llu avg: %6llu compression: %6.2f\n",
3850 3864 "bp allocated:", (u_longlong_t)tzb->zb_asize,
3851 3865 (u_longlong_t)(tzb->zb_asize / tzb->zb_count),
3852 3866 (double)tzb->zb_lsize / tzb->zb_asize);
3853 3867 (void) printf("\t%-16s %14llu ref>1: %6llu deduplication: %6.2f\n",
3854 3868 "bp deduped:", (u_longlong_t)zcb.zcb_dedup_asize,
3855 3869 (u_longlong_t)zcb.zcb_dedup_blocks,
3856 3870 (double)zcb.zcb_dedup_asize / tzb->zb_asize + 1.0);
3857 3871 (void) printf("\t%-16s %14llu used: %5.2f%%\n", "Normal class:",
3858 3872 (u_longlong_t)norm_alloc, 100.0 * norm_alloc / norm_space);
3859 3873
3860 3874 if (spa_special_class(spa)->mc_rotor != NULL) {
3861 3875 uint64_t alloc = metaslab_class_get_alloc(
3862 3876 spa_special_class(spa));
3863 3877 uint64_t space = metaslab_class_get_space(
3864 3878 spa_special_class(spa));
3865 3879
3866 3880 (void) printf("\t%-16s %14llu used: %5.2f%%\n",
3867 3881 "Special class", (u_longlong_t)alloc,
3868 3882 100.0 * alloc / space);
3869 3883 }
3870 3884
3871 3885 if (spa_dedup_class(spa)->mc_rotor != NULL) {
3872 3886 uint64_t alloc = metaslab_class_get_alloc(
3873 3887 spa_dedup_class(spa));
3874 3888 uint64_t space = metaslab_class_get_space(
3875 3889 spa_dedup_class(spa));
3876 3890
3877 3891 (void) printf("\t%-16s %14llu used: %5.2f%%\n",
3878 3892 "Dedup class", (u_longlong_t)alloc,
3879 3893 100.0 * alloc / space);
3880 3894 }
3881 3895
3882 3896 for (bp_embedded_type_t i = 0; i < NUM_BP_EMBEDDED_TYPES; i++) {
3883 3897 if (zcb.zcb_embedded_blocks[i] == 0)
3884 3898 continue;
3885 3899 (void) printf("\n");
3886 3900 (void) printf("\tadditional, non-pointer bps of type %u: "
3887 3901 "%10llu\n",
3888 3902 i, (u_longlong_t)zcb.zcb_embedded_blocks[i]);
3889 3903
3890 3904 if (dump_opt['b'] >= 3) {
3891 3905 (void) printf("\t number of (compressed) bytes: "
3892 3906 "number of bps\n");
3893 3907 dump_histogram(zcb.zcb_embedded_histogram[i],
3894 3908 sizeof (zcb.zcb_embedded_histogram[i]) /
3895 3909 sizeof (zcb.zcb_embedded_histogram[i][0]), 0);
3896 3910 }
3897 3911 }
3898 3912
3899 3913 if (tzb->zb_ditto_samevdev != 0) {
3900 3914 (void) printf("\tDittoed blocks on same vdev: %llu\n",
3901 3915 (longlong_t)tzb->zb_ditto_samevdev);
3902 3916 }
3903 3917 if (tzb->zb_ditto_same_ms != 0) {
3904 3918 (void) printf("\tDittoed blocks in same metaslab: %llu\n",
3905 3919 (longlong_t)tzb->zb_ditto_same_ms);
3906 3920 }
3907 3921
3908 3922 for (uint64_t v = 0; v < spa->spa_root_vdev->vdev_children; v++) {
3909 3923 vdev_t *vd = spa->spa_root_vdev->vdev_child[v];
3910 3924 vdev_indirect_mapping_t *vim = vd->vdev_indirect_mapping;
3911 3925
3912 3926 if (vim == NULL) {
3913 3927 continue;
3914 3928 }
3915 3929
3916 3930 char mem[32];
3917 3931 zdb_nicenum(vdev_indirect_mapping_num_entries(vim),
3918 3932 mem, vdev_indirect_mapping_size(vim));
3919 3933
3920 3934 (void) printf("\tindirect vdev id %llu has %llu segments "
3921 3935 "(%s in memory)\n",
3922 3936 (longlong_t)vd->vdev_id,
3923 3937 (longlong_t)vdev_indirect_mapping_num_entries(vim), mem);
3924 3938 }
3925 3939
3926 3940 if (dump_opt['b'] >= 2) {
3927 3941 int l, t, level;
3928 3942 (void) printf("\nBlocks\tLSIZE\tPSIZE\tASIZE"
3929 3943 "\t avg\t comp\t%%Total\tType\n");
3930 3944
3931 3945 for (t = 0; t <= ZDB_OT_TOTAL; t++) {
3932 3946 char csize[32], lsize[32], psize[32], asize[32];
3933 3947 char avg[32], gang[32];
3934 3948 const char *typename;
3935 3949
3936 3950 /* make sure nicenum has enough space */
3937 3951 CTASSERT(sizeof (csize) >= NN_NUMBUF_SZ);
3938 3952 CTASSERT(sizeof (lsize) >= NN_NUMBUF_SZ);
3939 3953 CTASSERT(sizeof (psize) >= NN_NUMBUF_SZ);
3940 3954 CTASSERT(sizeof (asize) >= NN_NUMBUF_SZ);
3941 3955 CTASSERT(sizeof (avg) >= NN_NUMBUF_SZ);
3942 3956 CTASSERT(sizeof (gang) >= NN_NUMBUF_SZ);
3943 3957
3944 3958 if (t < DMU_OT_NUMTYPES)
3945 3959 typename = dmu_ot[t].ot_name;
3946 3960 else
3947 3961 typename = zdb_ot_extname[t - DMU_OT_NUMTYPES];
3948 3962
3949 3963 if (zcb.zcb_type[ZB_TOTAL][t].zb_asize == 0) {
3950 3964 (void) printf("%6s\t%5s\t%5s\t%5s"
3951 3965 "\t%5s\t%5s\t%6s\t%s\n",
3952 3966 "-",
3953 3967 "-",
3954 3968 "-",
3955 3969 "-",
3956 3970 "-",
3957 3971 "-",
3958 3972 "-",
3959 3973 typename);
3960 3974 continue;
3961 3975 }
3962 3976
3963 3977 for (l = ZB_TOTAL - 1; l >= -1; l--) {
3964 3978 level = (l == -1 ? ZB_TOTAL : l);
3965 3979 zb = &zcb.zcb_type[level][t];
3966 3980
3967 3981 if (zb->zb_asize == 0)
3968 3982 continue;
3969 3983
3970 3984 if (dump_opt['b'] < 3 && level != ZB_TOTAL)
3971 3985 continue;
3972 3986
3973 3987 if (level == 0 && zb->zb_asize ==
3974 3988 zcb.zcb_type[ZB_TOTAL][t].zb_asize)
3975 3989 continue;
3976 3990
3977 3991 zdb_nicenum(zb->zb_count, csize,
3978 3992 sizeof (csize));
3979 3993 zdb_nicenum(zb->zb_lsize, lsize,
3980 3994 sizeof (lsize));
3981 3995 zdb_nicenum(zb->zb_psize, psize,
3982 3996 sizeof (psize));
3983 3997 zdb_nicenum(zb->zb_asize, asize,
3984 3998 sizeof (asize));
3985 3999 zdb_nicenum(zb->zb_asize / zb->zb_count, avg,
3986 4000 sizeof (avg));
3987 4001 zdb_nicenum(zb->zb_gangs, gang, sizeof (gang));
3988 4002
3989 4003 (void) printf("%6s\t%5s\t%5s\t%5s\t%5s"
3990 4004 "\t%5.2f\t%6.2f\t",
3991 4005 csize, lsize, psize, asize, avg,
3992 4006 (double)zb->zb_lsize / zb->zb_psize,
3993 4007 100.0 * zb->zb_asize / tzb->zb_asize);
3994 4008
3995 4009 if (level == ZB_TOTAL)
3996 4010 (void) printf("%s\n", typename);
3997 4011 else
3998 4012 (void) printf(" L%d %s\n",
3999 4013 level, typename);
4000 4014
4001 4015 if (dump_opt['b'] >= 3 && zb->zb_gangs > 0) {
4002 4016 (void) printf("\t number of ganged "
4003 4017 "blocks: %s\n", gang);
4004 4018 }
4005 4019
4006 4020 if (dump_opt['b'] >= 4) {
4007 4021 (void) printf("psize "
4008 4022 "(in 512-byte sectors): "
4009 4023 "number of blocks\n");
4010 4024 dump_histogram(zb->zb_psize_histogram,
4011 4025 PSIZE_HISTO_SIZE, 0);
4012 4026 }
4013 4027 }
4014 4028 }
4015 4029 }
4016 4030
4017 4031 (void) printf("\n");
4018 4032
4019 4033 if (leaks)
4020 4034 return (2);
4021 4035
4022 4036 if (zcb.zcb_haderrors)
4023 4037 return (3);
4024 4038
4025 4039 return (0);
4026 4040 }
4027 4041
4028 4042 typedef struct zdb_ddt_entry {
4029 4043 ddt_key_t zdde_key;
4030 4044 uint64_t zdde_ref_blocks;
4031 4045 uint64_t zdde_ref_lsize;
4032 4046 uint64_t zdde_ref_psize;
4033 4047 uint64_t zdde_ref_dsize;
4034 4048 avl_node_t zdde_node;
4035 4049 } zdb_ddt_entry_t;
4036 4050
4037 4051 /* ARGSUSED */
4038 4052 static int
4039 4053 zdb_ddt_add_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
4040 4054 const zbookmark_phys_t *zb, const dnode_phys_t *dnp, void *arg)
4041 4055 {
4042 4056 avl_tree_t *t = arg;
4043 4057 avl_index_t where;
4044 4058 zdb_ddt_entry_t *zdde, zdde_search;
4045 4059
4046 4060 if (bp == NULL || BP_IS_HOLE(bp) || BP_IS_EMBEDDED(bp))
4047 4061 return (0);
4048 4062
4049 4063 if (dump_opt['S'] > 1 && zb->zb_level == ZB_ROOT_LEVEL) {
4050 4064 (void) printf("traversing objset %llu, %llu objects, "
4051 4065 "%lu blocks so far\n",
4052 4066 (u_longlong_t)zb->zb_objset,
4053 4067 (u_longlong_t)BP_GET_FILL(bp),
4054 4068 avl_numnodes(t));
4055 4069 }
4056 4070
4057 4071 if (BP_IS_HOLE(bp) || BP_GET_CHECKSUM(bp) == ZIO_CHECKSUM_OFF ||
4058 4072 BP_GET_LEVEL(bp) > 0 || DMU_OT_IS_METADATA(BP_GET_TYPE(bp)))
4059 4073 return (0);
4060 4074
4061 4075 ddt_key_fill(&zdde_search.zdde_key, bp);
4062 4076
4063 4077 zdde = avl_find(t, &zdde_search, &where);
4064 4078
4065 4079 if (zdde == NULL) {
4066 4080 zdde = umem_zalloc(sizeof (*zdde), UMEM_NOFAIL);
4067 4081 zdde->zdde_key = zdde_search.zdde_key;
4068 4082 avl_insert(t, zdde, where);
4069 4083 }
4070 4084
4071 4085 zdde->zdde_ref_blocks += 1;
4072 4086 zdde->zdde_ref_lsize += BP_GET_LSIZE(bp);
4073 4087 zdde->zdde_ref_psize += BP_GET_PSIZE(bp);
4074 4088 zdde->zdde_ref_dsize += bp_get_dsize_sync(spa, bp);
4075 4089
4076 4090 return (0);
4077 4091 }
4078 4092
4079 4093 static void
4080 4094 dump_simulated_ddt(spa_t *spa)
4081 4095 {
4082 4096 avl_tree_t t;
4083 4097 void *cookie = NULL;
4084 4098 zdb_ddt_entry_t *zdde;
4085 4099 ddt_histogram_t ddh_total;
4086 4100 ddt_stat_t dds_total;
4087 4101
4088 4102 bzero(&ddh_total, sizeof (ddh_total));
4089 4103 bzero(&dds_total, sizeof (dds_total));
4090 4104 avl_create(&t, ddt_entry_compare,
4091 4105 sizeof (zdb_ddt_entry_t), offsetof(zdb_ddt_entry_t, zdde_node));
4092 4106
4093 4107 spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER);
4094 4108
4095 4109 (void) traverse_pool(spa, 0, TRAVERSE_PRE | TRAVERSE_PREFETCH_METADATA,
4096 4110 zdb_ddt_add_cb, &t);
4097 4111
4098 4112 spa_config_exit(spa, SCL_CONFIG, FTAG);
4099 4113
4100 4114 while ((zdde = avl_destroy_nodes(&t, &cookie)) != NULL) {
4101 4115 ddt_stat_t dds;
4102 4116 uint64_t refcnt = zdde->zdde_ref_blocks;
4103 4117 ASSERT(refcnt != 0);
4104 4118
4105 4119 dds.dds_blocks = zdde->zdde_ref_blocks / refcnt;
4106 4120 dds.dds_lsize = zdde->zdde_ref_lsize / refcnt;
4107 4121 dds.dds_psize = zdde->zdde_ref_psize / refcnt;
4108 4122 dds.dds_dsize = zdde->zdde_ref_dsize / refcnt;
4109 4123
4110 4124 dds.dds_ref_blocks = zdde->zdde_ref_blocks;
4111 4125 dds.dds_ref_lsize = zdde->zdde_ref_lsize;
4112 4126 dds.dds_ref_psize = zdde->zdde_ref_psize;
4113 4127 dds.dds_ref_dsize = zdde->zdde_ref_dsize;
4114 4128
4115 4129 ddt_stat_add(&ddh_total.ddh_stat[highbit64(refcnt) - 1],
4116 4130 &dds, 0);
4117 4131
4118 4132 umem_free(zdde, sizeof (*zdde));
4119 4133 }
4120 4134
4121 4135 avl_destroy(&t);
4122 4136
4123 4137 ddt_histogram_stat(&dds_total, &ddh_total);
4124 4138
4125 4139 (void) printf("Simulated DDT histogram:\n");
4126 4140
4127 4141 zpool_dump_ddt(&dds_total, &ddh_total);
4128 4142
4129 4143 dump_dedup_ratio(&dds_total);
4130 4144 }
4131 4145
4132 4146 static int
4133 4147 verify_device_removal_feature_counts(spa_t *spa)
4134 4148 {
4135 4149 uint64_t dr_feature_refcount = 0;
4136 4150 uint64_t oc_feature_refcount = 0;
4137 4151 uint64_t indirect_vdev_count = 0;
4138 4152 uint64_t precise_vdev_count = 0;
4139 4153 uint64_t obsolete_counts_object_count = 0;
4140 4154 uint64_t obsolete_sm_count = 0;
4141 4155 uint64_t obsolete_counts_count = 0;
4142 4156 uint64_t scip_count = 0;
4143 4157 uint64_t obsolete_bpobj_count = 0;
4144 4158 int ret = 0;
4145 4159
4146 4160 spa_condensing_indirect_phys_t *scip =
4147 4161 &spa->spa_condensing_indirect_phys;
4148 4162 if (scip->scip_next_mapping_object != 0) {
4149 4163 vdev_t *vd = spa->spa_root_vdev->vdev_child[scip->scip_vdev];
4150 4164 ASSERT(scip->scip_prev_obsolete_sm_object != 0);
4151 4165 ASSERT3P(vd->vdev_ops, ==, &vdev_indirect_ops);
4152 4166
4153 4167 (void) printf("Condensing indirect vdev %llu: new mapping "
|
↓ open down ↓ |
319 lines elided |
↑ open up ↑ |
4154 4168 "object %llu, prev obsolete sm %llu\n",
4155 4169 (u_longlong_t)scip->scip_vdev,
4156 4170 (u_longlong_t)scip->scip_next_mapping_object,
4157 4171 (u_longlong_t)scip->scip_prev_obsolete_sm_object);
4158 4172 if (scip->scip_prev_obsolete_sm_object != 0) {
4159 4173 space_map_t *prev_obsolete_sm = NULL;
4160 4174 VERIFY0(space_map_open(&prev_obsolete_sm,
4161 4175 spa->spa_meta_objset,
4162 4176 scip->scip_prev_obsolete_sm_object,
4163 4177 0, vd->vdev_asize, 0));
4164 - space_map_update(prev_obsolete_sm);
4165 4178 dump_spacemap(spa->spa_meta_objset, prev_obsolete_sm);
4166 4179 (void) printf("\n");
4167 4180 space_map_close(prev_obsolete_sm);
4168 4181 }
4169 4182
4170 4183 scip_count += 2;
4171 4184 }
4172 4185
4173 4186 for (uint64_t i = 0; i < spa->spa_root_vdev->vdev_children; i++) {
4174 4187 vdev_t *vd = spa->spa_root_vdev->vdev_child[i];
4175 4188 vdev_indirect_config_t *vic = &vd->vdev_indirect_config;
4176 4189
4177 4190 if (vic->vic_mapping_object != 0) {
4178 4191 ASSERT(vd->vdev_ops == &vdev_indirect_ops ||
4179 4192 vd->vdev_removing);
4180 4193 indirect_vdev_count++;
4181 4194
4182 4195 if (vd->vdev_indirect_mapping->vim_havecounts) {
4183 4196 obsolete_counts_count++;
4184 4197 }
4185 4198 }
4186 4199 if (vdev_obsolete_counts_are_precise(vd)) {
4187 4200 ASSERT(vic->vic_mapping_object != 0);
4188 4201 precise_vdev_count++;
4189 4202 }
4190 4203 if (vdev_obsolete_sm_object(vd) != 0) {
4191 4204 ASSERT(vic->vic_mapping_object != 0);
4192 4205 obsolete_sm_count++;
4193 4206 }
4194 4207 }
4195 4208
4196 4209 (void) feature_get_refcount(spa,
4197 4210 &spa_feature_table[SPA_FEATURE_DEVICE_REMOVAL],
4198 4211 &dr_feature_refcount);
4199 4212 (void) feature_get_refcount(spa,
4200 4213 &spa_feature_table[SPA_FEATURE_OBSOLETE_COUNTS],
4201 4214 &oc_feature_refcount);
4202 4215
4203 4216 if (dr_feature_refcount != indirect_vdev_count) {
4204 4217 ret = 1;
4205 4218 (void) printf("Number of indirect vdevs (%llu) " \
4206 4219 "does not match feature count (%llu)\n",
4207 4220 (u_longlong_t)indirect_vdev_count,
4208 4221 (u_longlong_t)dr_feature_refcount);
4209 4222 } else {
4210 4223 (void) printf("Verified device_removal feature refcount " \
4211 4224 "of %llu is correct\n",
4212 4225 (u_longlong_t)dr_feature_refcount);
4213 4226 }
4214 4227
4215 4228 if (zap_contains(spa_meta_objset(spa), DMU_POOL_DIRECTORY_OBJECT,
4216 4229 DMU_POOL_OBSOLETE_BPOBJ) == 0) {
4217 4230 obsolete_bpobj_count++;
4218 4231 }
4219 4232
4220 4233
4221 4234 obsolete_counts_object_count = precise_vdev_count;
4222 4235 obsolete_counts_object_count += obsolete_sm_count;
4223 4236 obsolete_counts_object_count += obsolete_counts_count;
4224 4237 obsolete_counts_object_count += scip_count;
4225 4238 obsolete_counts_object_count += obsolete_bpobj_count;
4226 4239 obsolete_counts_object_count += remap_deadlist_count;
4227 4240
4228 4241 if (oc_feature_refcount != obsolete_counts_object_count) {
4229 4242 ret = 1;
4230 4243 (void) printf("Number of obsolete counts objects (%llu) " \
4231 4244 "does not match feature count (%llu)\n",
4232 4245 (u_longlong_t)obsolete_counts_object_count,
4233 4246 (u_longlong_t)oc_feature_refcount);
4234 4247 (void) printf("pv:%llu os:%llu oc:%llu sc:%llu "
4235 4248 "ob:%llu rd:%llu\n",
4236 4249 (u_longlong_t)precise_vdev_count,
4237 4250 (u_longlong_t)obsolete_sm_count,
4238 4251 (u_longlong_t)obsolete_counts_count,
4239 4252 (u_longlong_t)scip_count,
4240 4253 (u_longlong_t)obsolete_bpobj_count,
4241 4254 (u_longlong_t)remap_deadlist_count);
4242 4255 } else {
4243 4256 (void) printf("Verified indirect_refcount feature refcount " \
4244 4257 "of %llu is correct\n",
4245 4258 (u_longlong_t)oc_feature_refcount);
4246 4259 }
4247 4260 return (ret);
4248 4261 }
4249 4262
4250 4263 static void
4251 4264 zdb_set_skip_mmp(char *target)
4252 4265 {
4253 4266 spa_t *spa;
4254 4267
4255 4268 /*
4256 4269 * Disable the activity check to allow examination of
4257 4270 * active pools.
4258 4271 */
4259 4272 mutex_enter(&spa_namespace_lock);
4260 4273 if ((spa = spa_lookup(target)) != NULL) {
4261 4274 spa->spa_import_flags |= ZFS_IMPORT_SKIP_MMP;
4262 4275 }
4263 4276 mutex_exit(&spa_namespace_lock);
4264 4277 }
4265 4278
4266 4279 #define BOGUS_SUFFIX "_CHECKPOINTED_UNIVERSE"
4267 4280 /*
4268 4281 * Import the checkpointed state of the pool specified by the target
4269 4282 * parameter as readonly. The function also accepts a pool config
4270 4283 * as an optional parameter, else it attempts to infer the config by
4271 4284 * the name of the target pool.
4272 4285 *
4273 4286 * Note that the checkpointed state's pool name will be the name of
4274 4287 * the original pool with the above suffix appened to it. In addition,
4275 4288 * if the target is not a pool name (e.g. a path to a dataset) then
4276 4289 * the new_path parameter is populated with the updated path to
4277 4290 * reflect the fact that we are looking into the checkpointed state.
4278 4291 *
4279 4292 * The function returns a newly-allocated copy of the name of the
4280 4293 * pool containing the checkpointed state. When this copy is no
4281 4294 * longer needed it should be freed with free(3C). Same thing
4282 4295 * applies to the new_path parameter if allocated.
4283 4296 */
4284 4297 static char *
4285 4298 import_checkpointed_state(char *target, nvlist_t *cfg, char **new_path)
4286 4299 {
4287 4300 int error = 0;
4288 4301 char *poolname, *bogus_name;
4289 4302
4290 4303 /* If the target is not a pool, the extract the pool name */
4291 4304 char *path_start = strchr(target, '/');
4292 4305 if (path_start != NULL) {
4293 4306 size_t poolname_len = path_start - target;
4294 4307 poolname = strndup(target, poolname_len);
4295 4308 } else {
4296 4309 poolname = target;
4297 4310 }
4298 4311
4299 4312 if (cfg == NULL) {
4300 4313 zdb_set_skip_mmp(poolname);
4301 4314 error = spa_get_stats(poolname, &cfg, NULL, 0);
4302 4315 if (error != 0) {
4303 4316 fatal("Tried to read config of pool \"%s\" but "
4304 4317 "spa_get_stats() failed with error %d\n",
4305 4318 poolname, error);
4306 4319 }
4307 4320 }
4308 4321
4309 4322 (void) asprintf(&bogus_name, "%s%s", poolname, BOGUS_SUFFIX);
4310 4323 fnvlist_add_string(cfg, ZPOOL_CONFIG_POOL_NAME, bogus_name);
4311 4324
4312 4325 error = spa_import(bogus_name, cfg, NULL,
4313 4326 ZFS_IMPORT_MISSING_LOG | ZFS_IMPORT_CHECKPOINT |
4314 4327 ZFS_IMPORT_SKIP_MMP);
4315 4328 if (error != 0) {
4316 4329 fatal("Tried to import pool \"%s\" but spa_import() failed "
4317 4330 "with error %d\n", bogus_name, error);
4318 4331 }
4319 4332
4320 4333 if (new_path != NULL && path_start != NULL)
4321 4334 (void) asprintf(new_path, "%s%s", bogus_name, path_start);
4322 4335
4323 4336 if (target != poolname)
4324 4337 free(poolname);
4325 4338
4326 4339 return (bogus_name);
4327 4340 }
4328 4341
4329 4342 typedef struct verify_checkpoint_sm_entry_cb_arg {
4330 4343 vdev_t *vcsec_vd;
4331 4344
4332 4345 /* the following fields are only used for printing progress */
4333 4346 uint64_t vcsec_entryid;
4334 4347 uint64_t vcsec_num_entries;
4335 4348 } verify_checkpoint_sm_entry_cb_arg_t;
4336 4349
4337 4350 #define ENTRIES_PER_PROGRESS_UPDATE 10000
4338 4351
4339 4352 static int
4340 4353 verify_checkpoint_sm_entry_cb(space_map_entry_t *sme, void *arg)
4341 4354 {
4342 4355 verify_checkpoint_sm_entry_cb_arg_t *vcsec = arg;
4343 4356 vdev_t *vd = vcsec->vcsec_vd;
4344 4357 metaslab_t *ms = vd->vdev_ms[sme->sme_offset >> vd->vdev_ms_shift];
4345 4358 uint64_t end = sme->sme_offset + sme->sme_run;
4346 4359
4347 4360 ASSERT(sme->sme_type == SM_FREE);
4348 4361
4349 4362 if ((vcsec->vcsec_entryid % ENTRIES_PER_PROGRESS_UPDATE) == 0) {
4350 4363 (void) fprintf(stderr,
4351 4364 "\rverifying vdev %llu, space map entry %llu of %llu ...",
4352 4365 (longlong_t)vd->vdev_id,
4353 4366 (longlong_t)vcsec->vcsec_entryid,
4354 4367 (longlong_t)vcsec->vcsec_num_entries);
4355 4368 }
4356 4369 vcsec->vcsec_entryid++;
4357 4370
4358 4371 /*
4359 4372 * See comment in checkpoint_sm_exclude_entry_cb()
|
↓ open down ↓ |
185 lines elided |
↑ open up ↑ |
4360 4373 */
4361 4374 VERIFY3U(sme->sme_offset, >=, ms->ms_start);
4362 4375 VERIFY3U(end, <=, ms->ms_start + ms->ms_size);
4363 4376
4364 4377 /*
4365 4378 * The entries in the vdev_checkpoint_sm should be marked as
4366 4379 * allocated in the checkpointed state of the pool, therefore
4367 4380 * their respective ms_allocateable trees should not contain them.
4368 4381 */
4369 4382 mutex_enter(&ms->ms_lock);
4370 - range_tree_verify(ms->ms_allocatable, sme->sme_offset, sme->sme_run);
4383 + range_tree_verify_not_present(ms->ms_allocatable,
4384 + sme->sme_offset, sme->sme_run);
4371 4385 mutex_exit(&ms->ms_lock);
4372 4386
4373 4387 return (0);
4374 4388 }
4375 4389
4376 4390 /*
4377 4391 * Verify that all segments in the vdev_checkpoint_sm are allocated
4378 4392 * according to the checkpoint's ms_sm (i.e. are not in the checkpoint's
4379 4393 * ms_allocatable).
4380 4394 *
4381 4395 * Do so by comparing the checkpoint space maps (vdev_checkpoint_sm) of
4382 4396 * each vdev in the current state of the pool to the metaslab space maps
4383 4397 * (ms_sm) of the checkpointed state of the pool.
4384 4398 *
4385 4399 * Note that the function changes the state of the ms_allocatable
4386 4400 * trees of the current spa_t. The entries of these ms_allocatable
4387 4401 * trees are cleared out and then repopulated from with the free
4388 4402 * entries of their respective ms_sm space maps.
4389 4403 */
4390 4404 static void
4391 4405 verify_checkpoint_vdev_spacemaps(spa_t *checkpoint, spa_t *current)
4392 4406 {
4393 4407 vdev_t *ckpoint_rvd = checkpoint->spa_root_vdev;
4394 4408 vdev_t *current_rvd = current->spa_root_vdev;
4395 4409
4396 4410 load_concrete_ms_allocatable_trees(checkpoint, SM_FREE);
4397 4411
4398 4412 for (uint64_t c = 0; c < ckpoint_rvd->vdev_children; c++) {
4399 4413 vdev_t *ckpoint_vd = ckpoint_rvd->vdev_child[c];
4400 4414 vdev_t *current_vd = current_rvd->vdev_child[c];
4401 4415
4402 4416 space_map_t *checkpoint_sm = NULL;
4403 4417 uint64_t checkpoint_sm_obj;
4404 4418
4405 4419 if (ckpoint_vd->vdev_ops == &vdev_indirect_ops) {
4406 4420 /*
4407 4421 * Since we don't allow device removal in a pool
4408 4422 * that has a checkpoint, we expect that all removed
4409 4423 * vdevs were removed from the pool before the
4410 4424 * checkpoint.
4411 4425 */
4412 4426 ASSERT3P(current_vd->vdev_ops, ==, &vdev_indirect_ops);
4413 4427 continue;
4414 4428 }
4415 4429
4416 4430 /*
4417 4431 * If the checkpoint space map doesn't exist, then nothing
4418 4432 * here is checkpointed so there's nothing to verify.
4419 4433 */
4420 4434 if (current_vd->vdev_top_zap == 0 ||
4421 4435 zap_contains(spa_meta_objset(current),
4422 4436 current_vd->vdev_top_zap,
|
↓ open down ↓ |
42 lines elided |
↑ open up ↑ |
4423 4437 VDEV_TOP_ZAP_POOL_CHECKPOINT_SM) != 0)
4424 4438 continue;
4425 4439
4426 4440 VERIFY0(zap_lookup(spa_meta_objset(current),
4427 4441 current_vd->vdev_top_zap, VDEV_TOP_ZAP_POOL_CHECKPOINT_SM,
4428 4442 sizeof (uint64_t), 1, &checkpoint_sm_obj));
4429 4443
4430 4444 VERIFY0(space_map_open(&checkpoint_sm, spa_meta_objset(current),
4431 4445 checkpoint_sm_obj, 0, current_vd->vdev_asize,
4432 4446 current_vd->vdev_ashift));
4433 - space_map_update(checkpoint_sm);
4434 4447
4435 4448 verify_checkpoint_sm_entry_cb_arg_t vcsec;
4436 4449 vcsec.vcsec_vd = ckpoint_vd;
4437 4450 vcsec.vcsec_entryid = 0;
4438 4451 vcsec.vcsec_num_entries =
4439 4452 space_map_length(checkpoint_sm) / sizeof (uint64_t);
4440 4453 VERIFY0(space_map_iterate(checkpoint_sm,
4454 + space_map_length(checkpoint_sm),
4441 4455 verify_checkpoint_sm_entry_cb, &vcsec));
4442 4456 dump_spacemap(current->spa_meta_objset, checkpoint_sm);
4443 4457 space_map_close(checkpoint_sm);
4444 4458 }
4445 4459
4446 4460 /*
4447 4461 * If we've added vdevs since we took the checkpoint, ensure
4448 4462 * that their checkpoint space maps are empty.
4449 4463 */
4450 4464 if (ckpoint_rvd->vdev_children < current_rvd->vdev_children) {
4451 4465 for (uint64_t c = ckpoint_rvd->vdev_children;
4452 4466 c < current_rvd->vdev_children; c++) {
4453 4467 vdev_t *current_vd = current_rvd->vdev_child[c];
4454 4468 ASSERT3P(current_vd->vdev_checkpoint_sm, ==, NULL);
4455 4469 }
4456 4470 }
4457 4471
4458 4472 /* for cleaner progress output */
4459 4473 (void) fprintf(stderr, "\n");
4460 4474 }
4461 4475
4462 4476 /*
4463 4477 * Verifies that all space that's allocated in the checkpoint is
4464 4478 * still allocated in the current version, by checking that everything
4465 4479 * in checkpoint's ms_allocatable (which is actually allocated, not
4466 4480 * allocatable/free) is not present in current's ms_allocatable.
4467 4481 *
4468 4482 * Note that the function changes the state of the ms_allocatable
4469 4483 * trees of both spas when called. The entries of all ms_allocatable
4470 4484 * trees are cleared out and then repopulated from their respective
4471 4485 * ms_sm space maps. In the checkpointed state we load the allocated
4472 4486 * entries, and in the current state we load the free entries.
4473 4487 */
4474 4488 static void
4475 4489 verify_checkpoint_ms_spacemaps(spa_t *checkpoint, spa_t *current)
4476 4490 {
4477 4491 vdev_t *ckpoint_rvd = checkpoint->spa_root_vdev;
4478 4492 vdev_t *current_rvd = current->spa_root_vdev;
4479 4493
4480 4494 load_concrete_ms_allocatable_trees(checkpoint, SM_ALLOC);
4481 4495 load_concrete_ms_allocatable_trees(current, SM_FREE);
4482 4496
4483 4497 for (uint64_t i = 0; i < ckpoint_rvd->vdev_children; i++) {
4484 4498 vdev_t *ckpoint_vd = ckpoint_rvd->vdev_child[i];
4485 4499 vdev_t *current_vd = current_rvd->vdev_child[i];
4486 4500
4487 4501 if (ckpoint_vd->vdev_ops == &vdev_indirect_ops) {
4488 4502 /*
4489 4503 * See comment in verify_checkpoint_vdev_spacemaps()
4490 4504 */
4491 4505 ASSERT3P(current_vd->vdev_ops, ==, &vdev_indirect_ops);
4492 4506 continue;
4493 4507 }
4494 4508
4495 4509 for (uint64_t m = 0; m < ckpoint_vd->vdev_ms_count; m++) {
4496 4510 metaslab_t *ckpoint_msp = ckpoint_vd->vdev_ms[m];
4497 4511 metaslab_t *current_msp = current_vd->vdev_ms[m];
4498 4512
4499 4513 (void) fprintf(stderr,
4500 4514 "\rverifying vdev %llu of %llu, "
4501 4515 "metaslab %llu of %llu ...",
4502 4516 (longlong_t)current_vd->vdev_id,
4503 4517 (longlong_t)current_rvd->vdev_children,
4504 4518 (longlong_t)current_vd->vdev_ms[m]->ms_id,
4505 4519 (longlong_t)current_vd->vdev_ms_count);
4506 4520
4507 4521 /*
4508 4522 * We walk through the ms_allocatable trees that
4509 4523 * are loaded with the allocated blocks from the
|
↓ open down ↓ |
59 lines elided |
↑ open up ↑ |
4510 4524 * ms_sm spacemaps of the checkpoint. For each
4511 4525 * one of these ranges we ensure that none of them
4512 4526 * exists in the ms_allocatable trees of the
4513 4527 * current state which are loaded with the ranges
4514 4528 * that are currently free.
4515 4529 *
4516 4530 * This way we ensure that none of the blocks that
4517 4531 * are part of the checkpoint were freed by mistake.
4518 4532 */
4519 4533 range_tree_walk(ckpoint_msp->ms_allocatable,
4520 - (range_tree_func_t *)range_tree_verify,
4534 + (range_tree_func_t *)range_tree_verify_not_present,
4521 4535 current_msp->ms_allocatable);
4522 4536 }
4523 4537 }
4524 4538
4525 4539 /* for cleaner progress output */
4526 4540 (void) fprintf(stderr, "\n");
4527 4541 }
4528 4542
4529 4543 static void
4530 4544 verify_checkpoint_blocks(spa_t *spa)
4531 4545 {
4546 + ASSERT(!dump_opt['L']);
4547 +
4532 4548 spa_t *checkpoint_spa;
4533 4549 char *checkpoint_pool;
4534 4550 nvlist_t *config = NULL;
4535 4551 int error = 0;
4536 4552
4537 4553 /*
4538 4554 * We import the checkpointed state of the pool (under a different
4539 4555 * name) so we can do verification on it against the current state
4540 4556 * of the pool.
4541 4557 */
4542 4558 checkpoint_pool = import_checkpointed_state(spa->spa_name, config,
4543 4559 NULL);
4544 4560 ASSERT(strcmp(spa->spa_name, checkpoint_pool) != 0);
4545 4561
4546 4562 error = spa_open(checkpoint_pool, &checkpoint_spa, FTAG);
4547 4563 if (error != 0) {
4548 4564 fatal("Tried to open pool \"%s\" but spa_open() failed with "
4549 4565 "error %d\n", checkpoint_pool, error);
4550 4566 }
4551 4567
4552 4568 /*
4553 4569 * Ensure that ranges in the checkpoint space maps of each vdev
4554 4570 * are allocated according to the checkpointed state's metaslab
4555 4571 * space maps.
4556 4572 */
4557 4573 verify_checkpoint_vdev_spacemaps(checkpoint_spa, spa);
4558 4574
4559 4575 /*
4560 4576 * Ensure that allocated ranges in the checkpoint's metaslab
4561 4577 * space maps remain allocated in the metaslab space maps of
4562 4578 * the current state.
4563 4579 */
4564 4580 verify_checkpoint_ms_spacemaps(checkpoint_spa, spa);
4565 4581
4566 4582 /*
4567 4583 * Once we are done, we get rid of the checkpointed state.
4568 4584 */
4569 4585 spa_close(checkpoint_spa, FTAG);
4570 4586 free(checkpoint_pool);
4571 4587 }
4572 4588
4573 4589 static void
4574 4590 dump_leftover_checkpoint_blocks(spa_t *spa)
4575 4591 {
4576 4592 vdev_t *rvd = spa->spa_root_vdev;
4577 4593
4578 4594 for (uint64_t i = 0; i < rvd->vdev_children; i++) {
4579 4595 vdev_t *vd = rvd->vdev_child[i];
4580 4596
4581 4597 space_map_t *checkpoint_sm = NULL;
4582 4598 uint64_t checkpoint_sm_obj;
4583 4599
4584 4600 if (vd->vdev_top_zap == 0)
4585 4601 continue;
4586 4602
|
↓ open down ↓ |
45 lines elided |
↑ open up ↑ |
4587 4603 if (zap_contains(spa_meta_objset(spa), vd->vdev_top_zap,
4588 4604 VDEV_TOP_ZAP_POOL_CHECKPOINT_SM) != 0)
4589 4605 continue;
4590 4606
4591 4607 VERIFY0(zap_lookup(spa_meta_objset(spa), vd->vdev_top_zap,
4592 4608 VDEV_TOP_ZAP_POOL_CHECKPOINT_SM,
4593 4609 sizeof (uint64_t), 1, &checkpoint_sm_obj));
4594 4610
4595 4611 VERIFY0(space_map_open(&checkpoint_sm, spa_meta_objset(spa),
4596 4612 checkpoint_sm_obj, 0, vd->vdev_asize, vd->vdev_ashift));
4597 - space_map_update(checkpoint_sm);
4598 4613 dump_spacemap(spa->spa_meta_objset, checkpoint_sm);
4599 4614 space_map_close(checkpoint_sm);
4600 4615 }
4601 4616 }
4602 4617
4603 4618 static int
4604 4619 verify_checkpoint(spa_t *spa)
4605 4620 {
4606 4621 uberblock_t checkpoint;
4607 4622 int error;
4608 4623
4609 4624 if (!spa_feature_is_active(spa, SPA_FEATURE_POOL_CHECKPOINT))
4610 4625 return (0);
4611 4626
4612 4627 error = zap_lookup(spa->spa_meta_objset, DMU_POOL_DIRECTORY_OBJECT,
4613 4628 DMU_POOL_ZPOOL_CHECKPOINT, sizeof (uint64_t),
4614 4629 sizeof (uberblock_t) / sizeof (uint64_t), &checkpoint);
4615 4630
4616 4631 if (error == ENOENT && !dump_opt['L']) {
4617 4632 /*
4618 4633 * If the feature is active but the uberblock is missing
4619 4634 * then we must be in the middle of discarding the
4620 4635 * checkpoint.
4621 4636 */
4622 4637 (void) printf("\nPartially discarded checkpoint "
4623 4638 "state found:\n");
4624 4639 dump_leftover_checkpoint_blocks(spa);
4625 4640 return (0);
4626 4641 } else if (error != 0) {
4627 4642 (void) printf("lookup error %d when looking for "
4628 4643 "checkpointed uberblock in MOS\n", error);
4629 4644 return (error);
4630 4645 }
4631 4646 dump_uberblock(&checkpoint, "\nCheckpointed uberblock found:\n", "\n");
4632 4647
4633 4648 if (checkpoint.ub_checkpoint_txg == 0) {
4634 4649 (void) printf("\nub_checkpoint_txg not set in checkpointed "
4635 4650 "uberblock\n");
4636 4651 error = 3;
4637 4652 }
4638 4653
4639 4654 if (error == 0 && !dump_opt['L'])
4640 4655 verify_checkpoint_blocks(spa);
4641 4656
4642 4657 return (error);
4643 4658 }
4644 4659
4645 4660 /* ARGSUSED */
4646 4661 static void
4647 4662 mos_leaks_cb(void *arg, uint64_t start, uint64_t size)
4648 4663 {
4649 4664 for (uint64_t i = start; i < size; i++) {
4650 4665 (void) printf("MOS object %llu referenced but not allocated\n",
4651 4666 (u_longlong_t)i);
4652 4667 }
4653 4668 }
4654 4669
4655 4670 static range_tree_t *mos_refd_objs;
4656 4671
4657 4672 static void
4658 4673 mos_obj_refd(uint64_t obj)
4659 4674 {
4660 4675 if (obj != 0 && mos_refd_objs != NULL)
4661 4676 range_tree_add(mos_refd_objs, obj, 1);
4662 4677 }
4663 4678
4664 4679 static void
4665 4680 mos_leak_vdev(vdev_t *vd)
4666 4681 {
4667 4682 mos_obj_refd(vd->vdev_dtl_object);
4668 4683 mos_obj_refd(vd->vdev_ms_array);
4669 4684 mos_obj_refd(vd->vdev_top_zap);
4670 4685 mos_obj_refd(vd->vdev_indirect_config.vic_births_object);
4671 4686 mos_obj_refd(vd->vdev_indirect_config.vic_mapping_object);
4672 4687 mos_obj_refd(vd->vdev_leaf_zap);
4673 4688 if (vd->vdev_checkpoint_sm != NULL)
4674 4689 mos_obj_refd(vd->vdev_checkpoint_sm->sm_object);
4675 4690 if (vd->vdev_indirect_mapping != NULL) {
4676 4691 mos_obj_refd(vd->vdev_indirect_mapping->
4677 4692 vim_phys->vimp_counts_object);
4678 4693 }
4679 4694 if (vd->vdev_obsolete_sm != NULL)
4680 4695 mos_obj_refd(vd->vdev_obsolete_sm->sm_object);
4681 4696
4682 4697 for (uint64_t m = 0; m < vd->vdev_ms_count; m++) {
4683 4698 metaslab_t *ms = vd->vdev_ms[m];
4684 4699 mos_obj_refd(space_map_object(ms->ms_sm));
4685 4700 }
4686 4701
4687 4702 for (uint64_t c = 0; c < vd->vdev_children; c++) {
4688 4703 mos_leak_vdev(vd->vdev_child[c]);
4689 4704 }
4690 4705 }
4691 4706
4692 4707 static int
4693 4708 dump_mos_leaks(spa_t *spa)
4694 4709 {
4695 4710 int rv = 0;
4696 4711 objset_t *mos = spa->spa_meta_objset;
4697 4712 dsl_pool_t *dp = spa->spa_dsl_pool;
4698 4713
4699 4714 /* Visit and mark all referenced objects in the MOS */
4700 4715
4701 4716 mos_obj_refd(DMU_POOL_DIRECTORY_OBJECT);
4702 4717 mos_obj_refd(spa->spa_pool_props_object);
4703 4718 mos_obj_refd(spa->spa_config_object);
4704 4719 mos_obj_refd(spa->spa_ddt_stat_object);
4705 4720 mos_obj_refd(spa->spa_feat_desc_obj);
4706 4721 mos_obj_refd(spa->spa_feat_enabled_txg_obj);
4707 4722 mos_obj_refd(spa->spa_feat_for_read_obj);
4708 4723 mos_obj_refd(spa->spa_feat_for_write_obj);
4709 4724 mos_obj_refd(spa->spa_history);
4710 4725 mos_obj_refd(spa->spa_errlog_last);
4711 4726 mos_obj_refd(spa->spa_errlog_scrub);
4712 4727 mos_obj_refd(spa->spa_all_vdev_zaps);
4713 4728 mos_obj_refd(spa->spa_dsl_pool->dp_bptree_obj);
4714 4729 mos_obj_refd(spa->spa_dsl_pool->dp_tmp_userrefs_obj);
4715 4730 mos_obj_refd(spa->spa_dsl_pool->dp_scan->scn_phys.scn_queue_obj);
4716 4731 bpobj_count_refd(&spa->spa_deferred_bpobj);
4717 4732 mos_obj_refd(dp->dp_empty_bpobj);
4718 4733 bpobj_count_refd(&dp->dp_obsolete_bpobj);
4719 4734 bpobj_count_refd(&dp->dp_free_bpobj);
4720 4735 mos_obj_refd(spa->spa_l2cache.sav_object);
4721 4736 mos_obj_refd(spa->spa_spares.sav_object);
4722 4737
4723 4738 mos_obj_refd(spa->spa_condensing_indirect_phys.
4724 4739 scip_next_mapping_object);
4725 4740 mos_obj_refd(spa->spa_condensing_indirect_phys.
4726 4741 scip_prev_obsolete_sm_object);
4727 4742 if (spa->spa_condensing_indirect_phys.scip_next_mapping_object != 0) {
4728 4743 vdev_indirect_mapping_t *vim =
4729 4744 vdev_indirect_mapping_open(mos,
4730 4745 spa->spa_condensing_indirect_phys.scip_next_mapping_object);
4731 4746 mos_obj_refd(vim->vim_phys->vimp_counts_object);
4732 4747 vdev_indirect_mapping_close(vim);
4733 4748 }
4734 4749
4735 4750 if (dp->dp_origin_snap != NULL) {
4736 4751 dsl_dataset_t *ds;
4737 4752
4738 4753 dsl_pool_config_enter(dp, FTAG);
4739 4754 VERIFY0(dsl_dataset_hold_obj(dp,
4740 4755 dsl_dataset_phys(dp->dp_origin_snap)->ds_next_snap_obj,
4741 4756 FTAG, &ds));
4742 4757 count_ds_mos_objects(ds);
4743 4758 dump_deadlist(&ds->ds_deadlist);
4744 4759 dsl_dataset_rele(ds, FTAG);
4745 4760 dsl_pool_config_exit(dp, FTAG);
4746 4761
4747 4762 count_ds_mos_objects(dp->dp_origin_snap);
4748 4763 dump_deadlist(&dp->dp_origin_snap->ds_deadlist);
4749 4764 }
4750 4765 count_dir_mos_objects(dp->dp_mos_dir);
4751 4766 if (dp->dp_free_dir != NULL)
4752 4767 count_dir_mos_objects(dp->dp_free_dir);
4753 4768 if (dp->dp_leak_dir != NULL)
4754 4769 count_dir_mos_objects(dp->dp_leak_dir);
4755 4770
4756 4771 mos_leak_vdev(spa->spa_root_vdev);
4757 4772
4758 4773 for (uint64_t class = 0; class < DDT_CLASSES; class++) {
4759 4774 for (uint64_t type = 0; type < DDT_TYPES; type++) {
4760 4775 for (uint64_t cksum = 0;
4761 4776 cksum < ZIO_CHECKSUM_FUNCTIONS; cksum++) {
4762 4777 ddt_t *ddt = spa->spa_ddt[cksum];
4763 4778 mos_obj_refd(ddt->ddt_object[type][class]);
4764 4779 }
4765 4780 }
4766 4781 }
4767 4782
4768 4783 /*
4769 4784 * Visit all allocated objects and make sure they are referenced.
4770 4785 */
4771 4786 uint64_t object = 0;
4772 4787 while (dmu_object_next(mos, &object, B_FALSE, 0) == 0) {
4773 4788 if (range_tree_contains(mos_refd_objs, object, 1)) {
4774 4789 range_tree_remove(mos_refd_objs, object, 1);
4775 4790 } else {
4776 4791 dmu_object_info_t doi;
4777 4792 const char *name;
4778 4793 dmu_object_info(mos, object, &doi);
4779 4794 if (doi.doi_type & DMU_OT_NEWTYPE) {
4780 4795 dmu_object_byteswap_t bswap =
4781 4796 DMU_OT_BYTESWAP(doi.doi_type);
4782 4797 name = dmu_ot_byteswap[bswap].ob_name;
4783 4798 } else {
4784 4799 name = dmu_ot[doi.doi_type].ot_name;
4785 4800 }
4786 4801
4787 4802 (void) printf("MOS object %llu (%s) leaked\n",
4788 4803 (u_longlong_t)object, name);
4789 4804 rv = 2;
4790 4805 }
4791 4806 }
4792 4807 (void) range_tree_walk(mos_refd_objs, mos_leaks_cb, NULL);
4793 4808 if (!range_tree_is_empty(mos_refd_objs))
4794 4809 rv = 2;
4795 4810 range_tree_vacate(mos_refd_objs, NULL, NULL);
4796 4811 range_tree_destroy(mos_refd_objs);
4797 4812 return (rv);
4798 4813 }
4799 4814
4800 4815 static void
4801 4816 dump_zpool(spa_t *spa)
4802 4817 {
4803 4818 dsl_pool_t *dp = spa_get_dsl(spa);
4804 4819 int rc = 0;
4805 4820
4806 4821 if (dump_opt['S']) {
4807 4822 dump_simulated_ddt(spa);
4808 4823 return;
4809 4824 }
4810 4825
4811 4826 if (!dump_opt['e'] && dump_opt['C'] > 1) {
4812 4827 (void) printf("\nCached configuration:\n");
4813 4828 dump_nvlist(spa->spa_config, 8);
4814 4829 }
4815 4830
4816 4831 if (dump_opt['C'])
4817 4832 dump_config(spa);
4818 4833
4819 4834 if (dump_opt['u'])
4820 4835 dump_uberblock(&spa->spa_uberblock, "\nUberblock:\n", "\n");
4821 4836
4822 4837 if (dump_opt['D'])
4823 4838 dump_all_ddts(spa);
4824 4839
4825 4840 if (dump_opt['d'] > 2 || dump_opt['m'])
4826 4841 dump_metaslabs(spa);
4827 4842 if (dump_opt['M'])
4828 4843 dump_metaslab_groups(spa);
4829 4844
4830 4845 if (dump_opt['d'] || dump_opt['i']) {
4831 4846 mos_refd_objs = range_tree_create(NULL, NULL);
4832 4847 dump_dir(dp->dp_meta_objset);
4833 4848
4834 4849 if (dump_opt['d'] >= 3) {
4835 4850 dsl_pool_t *dp = spa->spa_dsl_pool;
4836 4851 dump_full_bpobj(&spa->spa_deferred_bpobj,
4837 4852 "Deferred frees", 0);
4838 4853 if (spa_version(spa) >= SPA_VERSION_DEADLISTS) {
4839 4854 dump_full_bpobj(&dp->dp_free_bpobj,
4840 4855 "Pool snapshot frees", 0);
4841 4856 }
4842 4857 if (bpobj_is_open(&dp->dp_obsolete_bpobj)) {
4843 4858 ASSERT(spa_feature_is_enabled(spa,
4844 4859 SPA_FEATURE_DEVICE_REMOVAL));
4845 4860 dump_full_bpobj(&dp->dp_obsolete_bpobj,
4846 4861 "Pool obsolete blocks", 0);
4847 4862 }
4848 4863
4849 4864 if (spa_feature_is_active(spa,
4850 4865 SPA_FEATURE_ASYNC_DESTROY)) {
4851 4866 dump_bptree(spa->spa_meta_objset,
4852 4867 dp->dp_bptree_obj,
4853 4868 "Pool dataset frees");
4854 4869 }
4855 4870 dump_dtl(spa->spa_root_vdev, 0);
4856 4871 }
4857 4872 (void) dmu_objset_find(spa_name(spa), dump_one_dir,
4858 4873 NULL, DS_FIND_SNAPSHOTS | DS_FIND_CHILDREN);
4859 4874
4860 4875 if (rc == 0 && !dump_opt['L'])
4861 4876 rc = dump_mos_leaks(spa);
4862 4877
4863 4878 for (spa_feature_t f = 0; f < SPA_FEATURES; f++) {
4864 4879 uint64_t refcount;
4865 4880
4866 4881 if (!(spa_feature_table[f].fi_flags &
4867 4882 ZFEATURE_FLAG_PER_DATASET) ||
4868 4883 !spa_feature_is_enabled(spa, f)) {
4869 4884 ASSERT0(dataset_feature_count[f]);
4870 4885 continue;
4871 4886 }
4872 4887 (void) feature_get_refcount(spa,
4873 4888 &spa_feature_table[f], &refcount);
4874 4889 if (dataset_feature_count[f] != refcount) {
4875 4890 (void) printf("%s feature refcount mismatch: "
4876 4891 "%lld datasets != %lld refcount\n",
4877 4892 spa_feature_table[f].fi_uname,
4878 4893 (longlong_t)dataset_feature_count[f],
4879 4894 (longlong_t)refcount);
4880 4895 rc = 2;
4881 4896 } else {
4882 4897 (void) printf("Verified %s feature refcount "
4883 4898 "of %llu is correct\n",
4884 4899 spa_feature_table[f].fi_uname,
4885 4900 (longlong_t)refcount);
4886 4901 }
4887 4902 }
4888 4903
4889 4904 if (rc == 0) {
4890 4905 rc = verify_device_removal_feature_counts(spa);
4891 4906 }
4892 4907 }
4893 4908
4894 4909 if (rc == 0 && (dump_opt['b'] || dump_opt['c']))
4895 4910 rc = dump_block_stats(spa);
4896 4911
4897 4912 if (rc == 0)
4898 4913 rc = verify_spacemap_refcounts(spa);
4899 4914
4900 4915 if (dump_opt['s'])
4901 4916 show_pool_stats(spa);
4902 4917
4903 4918 if (dump_opt['h'])
4904 4919 dump_history(spa);
4905 4920
4906 4921 if (rc == 0)
4907 4922 rc = verify_checkpoint(spa);
4908 4923
4909 4924 if (rc != 0) {
4910 4925 dump_debug_buffer();
4911 4926 exit(rc);
4912 4927 }
4913 4928 }
4914 4929
4915 4930 #define ZDB_FLAG_CHECKSUM 0x0001
4916 4931 #define ZDB_FLAG_DECOMPRESS 0x0002
4917 4932 #define ZDB_FLAG_BSWAP 0x0004
4918 4933 #define ZDB_FLAG_GBH 0x0008
4919 4934 #define ZDB_FLAG_INDIRECT 0x0010
4920 4935 #define ZDB_FLAG_PHYS 0x0020
4921 4936 #define ZDB_FLAG_RAW 0x0040
4922 4937 #define ZDB_FLAG_PRINT_BLKPTR 0x0080
4923 4938
4924 4939 static int flagbits[256];
4925 4940
4926 4941 static void
4927 4942 zdb_print_blkptr(blkptr_t *bp, int flags)
4928 4943 {
4929 4944 char blkbuf[BP_SPRINTF_LEN];
4930 4945
4931 4946 if (flags & ZDB_FLAG_BSWAP)
4932 4947 byteswap_uint64_array((void *)bp, sizeof (blkptr_t));
4933 4948
4934 4949 snprintf_blkptr(blkbuf, sizeof (blkbuf), bp);
4935 4950 (void) printf("%s\n", blkbuf);
4936 4951 }
4937 4952
4938 4953 static void
4939 4954 zdb_dump_indirect(blkptr_t *bp, int nbps, int flags)
4940 4955 {
4941 4956 int i;
4942 4957
4943 4958 for (i = 0; i < nbps; i++)
4944 4959 zdb_print_blkptr(&bp[i], flags);
4945 4960 }
4946 4961
4947 4962 static void
4948 4963 zdb_dump_gbh(void *buf, int flags)
4949 4964 {
4950 4965 zdb_dump_indirect((blkptr_t *)buf, SPA_GBH_NBLKPTRS, flags);
4951 4966 }
4952 4967
4953 4968 static void
4954 4969 zdb_dump_block_raw(void *buf, uint64_t size, int flags)
4955 4970 {
4956 4971 if (flags & ZDB_FLAG_BSWAP)
4957 4972 byteswap_uint64_array(buf, size);
4958 4973 (void) write(1, buf, size);
4959 4974 }
4960 4975
4961 4976 static void
4962 4977 zdb_dump_block(char *label, void *buf, uint64_t size, int flags)
4963 4978 {
4964 4979 uint64_t *d = (uint64_t *)buf;
4965 4980 unsigned nwords = size / sizeof (uint64_t);
4966 4981 int do_bswap = !!(flags & ZDB_FLAG_BSWAP);
4967 4982 unsigned i, j;
4968 4983 const char *hdr;
4969 4984 char *c;
4970 4985
4971 4986
4972 4987 if (do_bswap)
4973 4988 hdr = " 7 6 5 4 3 2 1 0 f e d c b a 9 8";
4974 4989 else
4975 4990 hdr = " 0 1 2 3 4 5 6 7 8 9 a b c d e f";
4976 4991
4977 4992 (void) printf("\n%s\n%6s %s 0123456789abcdef\n", label, "", hdr);
4978 4993
4979 4994 for (i = 0; i < nwords; i += 2) {
4980 4995 (void) printf("%06llx: %016llx %016llx ",
4981 4996 (u_longlong_t)(i * sizeof (uint64_t)),
4982 4997 (u_longlong_t)(do_bswap ? BSWAP_64(d[i]) : d[i]),
4983 4998 (u_longlong_t)(do_bswap ? BSWAP_64(d[i + 1]) : d[i + 1]));
4984 4999
4985 5000 c = (char *)&d[i];
4986 5001 for (j = 0; j < 2 * sizeof (uint64_t); j++)
4987 5002 (void) printf("%c", isprint(c[j]) ? c[j] : '.');
4988 5003 (void) printf("\n");
4989 5004 }
4990 5005 }
4991 5006
4992 5007 /*
4993 5008 * There are two acceptable formats:
4994 5009 * leaf_name - For example: c1t0d0 or /tmp/ztest.0a
4995 5010 * child[.child]* - For example: 0.1.1
4996 5011 *
4997 5012 * The second form can be used to specify arbitrary vdevs anywhere
4998 5013 * in the heirarchy. For example, in a pool with a mirror of
4999 5014 * RAID-Zs, you can specify either RAID-Z vdev with 0.0 or 0.1 .
5000 5015 */
5001 5016 static vdev_t *
5002 5017 zdb_vdev_lookup(vdev_t *vdev, const char *path)
5003 5018 {
5004 5019 char *s, *p, *q;
5005 5020 unsigned i;
5006 5021
5007 5022 if (vdev == NULL)
5008 5023 return (NULL);
5009 5024
5010 5025 /* First, assume the x.x.x.x format */
5011 5026 i = strtoul(path, &s, 10);
5012 5027 if (s == path || (s && *s != '.' && *s != '\0'))
5013 5028 goto name;
5014 5029 if (i >= vdev->vdev_children)
5015 5030 return (NULL);
5016 5031
5017 5032 vdev = vdev->vdev_child[i];
5018 5033 if (*s == '\0')
5019 5034 return (vdev);
5020 5035 return (zdb_vdev_lookup(vdev, s+1));
5021 5036
5022 5037 name:
5023 5038 for (i = 0; i < vdev->vdev_children; i++) {
5024 5039 vdev_t *vc = vdev->vdev_child[i];
5025 5040
5026 5041 if (vc->vdev_path == NULL) {
5027 5042 vc = zdb_vdev_lookup(vc, path);
5028 5043 if (vc == NULL)
5029 5044 continue;
5030 5045 else
5031 5046 return (vc);
5032 5047 }
5033 5048
5034 5049 p = strrchr(vc->vdev_path, '/');
5035 5050 p = p ? p + 1 : vc->vdev_path;
5036 5051 q = &vc->vdev_path[strlen(vc->vdev_path) - 2];
5037 5052
5038 5053 if (strcmp(vc->vdev_path, path) == 0)
5039 5054 return (vc);
5040 5055 if (strcmp(p, path) == 0)
5041 5056 return (vc);
5042 5057 if (strcmp(q, "s0") == 0 && strncmp(p, path, q - p) == 0)
5043 5058 return (vc);
5044 5059 }
5045 5060
5046 5061 return (NULL);
5047 5062 }
5048 5063
5049 5064 /* ARGSUSED */
5050 5065 static int
5051 5066 random_get_pseudo_bytes_cb(void *buf, size_t len, void *unused)
5052 5067 {
5053 5068 return (random_get_pseudo_bytes(buf, len));
5054 5069 }
5055 5070
5056 5071 /*
5057 5072 * Read a block from a pool and print it out. The syntax of the
5058 5073 * block descriptor is:
5059 5074 *
5060 5075 * pool:vdev_specifier:offset:size[:flags]
5061 5076 *
5062 5077 * pool - The name of the pool you wish to read from
5063 5078 * vdev_specifier - Which vdev (see comment for zdb_vdev_lookup)
5064 5079 * offset - offset, in hex, in bytes
5065 5080 * size - Amount of data to read, in hex, in bytes
5066 5081 * flags - A string of characters specifying options
5067 5082 * b: Decode a blkptr at given offset within block
5068 5083 * *c: Calculate and display checksums
5069 5084 * d: Decompress data before dumping
5070 5085 * e: Byteswap data before dumping
5071 5086 * g: Display data as a gang block header
5072 5087 * i: Display as an indirect block
5073 5088 * p: Do I/O to physical offset
5074 5089 * r: Dump raw data to stdout
5075 5090 *
5076 5091 * * = not yet implemented
5077 5092 */
5078 5093 static void
5079 5094 zdb_read_block(char *thing, spa_t *spa)
5080 5095 {
5081 5096 blkptr_t blk, *bp = &blk;
5082 5097 dva_t *dva = bp->blk_dva;
5083 5098 int flags = 0;
5084 5099 uint64_t offset = 0, size = 0, psize = 0, lsize = 0, blkptr_offset = 0;
5085 5100 zio_t *zio;
5086 5101 vdev_t *vd;
5087 5102 abd_t *pabd;
5088 5103 void *lbuf, *buf;
5089 5104 const char *s, *vdev;
5090 5105 char *p, *dup, *flagstr;
5091 5106 int i, error;
5092 5107
5093 5108 dup = strdup(thing);
5094 5109 s = strtok(dup, ":");
5095 5110 vdev = s ? s : "";
5096 5111 s = strtok(NULL, ":");
5097 5112 offset = strtoull(s ? s : "", NULL, 16);
5098 5113 s = strtok(NULL, ":");
5099 5114 size = strtoull(s ? s : "", NULL, 16);
5100 5115 s = strtok(NULL, ":");
5101 5116 if (s)
5102 5117 flagstr = strdup(s);
5103 5118 else
5104 5119 flagstr = strdup("");
5105 5120
5106 5121 s = NULL;
5107 5122 if (size == 0)
5108 5123 s = "size must not be zero";
5109 5124 if (!IS_P2ALIGNED(size, DEV_BSIZE))
5110 5125 s = "size must be a multiple of sector size";
5111 5126 if (!IS_P2ALIGNED(offset, DEV_BSIZE))
5112 5127 s = "offset must be a multiple of sector size";
5113 5128 if (s) {
5114 5129 (void) printf("Invalid block specifier: %s - %s\n", thing, s);
5115 5130 free(dup);
5116 5131 return;
5117 5132 }
5118 5133
5119 5134 for (s = strtok(flagstr, ":"); s; s = strtok(NULL, ":")) {
5120 5135 for (i = 0; flagstr[i]; i++) {
5121 5136 int bit = flagbits[(uchar_t)flagstr[i]];
5122 5137
5123 5138 if (bit == 0) {
5124 5139 (void) printf("***Invalid flag: %c\n",
5125 5140 flagstr[i]);
5126 5141 continue;
5127 5142 }
5128 5143 flags |= bit;
5129 5144
5130 5145 /* If it's not something with an argument, keep going */
5131 5146 if ((bit & (ZDB_FLAG_CHECKSUM |
5132 5147 ZDB_FLAG_PRINT_BLKPTR)) == 0)
5133 5148 continue;
5134 5149
5135 5150 p = &flagstr[i + 1];
5136 5151 if (bit == ZDB_FLAG_PRINT_BLKPTR)
5137 5152 blkptr_offset = strtoull(p, &p, 16);
5138 5153 if (*p != ':' && *p != '\0') {
5139 5154 (void) printf("***Invalid flag arg: '%s'\n", s);
5140 5155 free(dup);
5141 5156 return;
5142 5157 }
5143 5158 }
5144 5159 }
5145 5160 free(flagstr);
5146 5161
5147 5162 vd = zdb_vdev_lookup(spa->spa_root_vdev, vdev);
5148 5163 if (vd == NULL) {
5149 5164 (void) printf("***Invalid vdev: %s\n", vdev);
5150 5165 free(dup);
5151 5166 return;
5152 5167 } else {
5153 5168 if (vd->vdev_path)
5154 5169 (void) fprintf(stderr, "Found vdev: %s\n",
5155 5170 vd->vdev_path);
5156 5171 else
5157 5172 (void) fprintf(stderr, "Found vdev type: %s\n",
5158 5173 vd->vdev_ops->vdev_op_type);
5159 5174 }
5160 5175
5161 5176 psize = size;
5162 5177 lsize = size;
5163 5178
5164 5179 pabd = abd_alloc_linear(SPA_MAXBLOCKSIZE, B_FALSE);
5165 5180 lbuf = umem_alloc(SPA_MAXBLOCKSIZE, UMEM_NOFAIL);
5166 5181
5167 5182 BP_ZERO(bp);
5168 5183
5169 5184 DVA_SET_VDEV(&dva[0], vd->vdev_id);
5170 5185 DVA_SET_OFFSET(&dva[0], offset);
5171 5186 DVA_SET_GANG(&dva[0], !!(flags & ZDB_FLAG_GBH));
5172 5187 DVA_SET_ASIZE(&dva[0], vdev_psize_to_asize(vd, psize));
5173 5188
5174 5189 BP_SET_BIRTH(bp, TXG_INITIAL, TXG_INITIAL);
5175 5190
5176 5191 BP_SET_LSIZE(bp, lsize);
5177 5192 BP_SET_PSIZE(bp, psize);
5178 5193 BP_SET_COMPRESS(bp, ZIO_COMPRESS_OFF);
5179 5194 BP_SET_CHECKSUM(bp, ZIO_CHECKSUM_OFF);
5180 5195 BP_SET_TYPE(bp, DMU_OT_NONE);
5181 5196 BP_SET_LEVEL(bp, 0);
5182 5197 BP_SET_DEDUP(bp, 0);
5183 5198 BP_SET_BYTEORDER(bp, ZFS_HOST_BYTEORDER);
5184 5199
5185 5200 spa_config_enter(spa, SCL_STATE, FTAG, RW_READER);
5186 5201 zio = zio_root(spa, NULL, NULL, 0);
5187 5202
5188 5203 if (vd == vd->vdev_top) {
5189 5204 /*
5190 5205 * Treat this as a normal block read.
5191 5206 */
5192 5207 zio_nowait(zio_read(zio, spa, bp, pabd, psize, NULL, NULL,
5193 5208 ZIO_PRIORITY_SYNC_READ,
5194 5209 ZIO_FLAG_CANFAIL | ZIO_FLAG_RAW, NULL));
5195 5210 } else {
5196 5211 /*
5197 5212 * Treat this as a vdev child I/O.
5198 5213 */
5199 5214 zio_nowait(zio_vdev_child_io(zio, bp, vd, offset, pabd,
5200 5215 psize, ZIO_TYPE_READ, ZIO_PRIORITY_SYNC_READ,
5201 5216 ZIO_FLAG_DONT_CACHE | ZIO_FLAG_DONT_QUEUE |
5202 5217 ZIO_FLAG_DONT_PROPAGATE | ZIO_FLAG_DONT_RETRY |
5203 5218 ZIO_FLAG_CANFAIL | ZIO_FLAG_RAW | ZIO_FLAG_OPTIONAL,
5204 5219 NULL, NULL));
5205 5220 }
5206 5221
5207 5222 error = zio_wait(zio);
5208 5223 spa_config_exit(spa, SCL_STATE, FTAG);
5209 5224
5210 5225 if (error) {
5211 5226 (void) printf("Read of %s failed, error: %d\n", thing, error);
5212 5227 goto out;
5213 5228 }
5214 5229
5215 5230 if (flags & ZDB_FLAG_DECOMPRESS) {
5216 5231 /*
5217 5232 * We don't know how the data was compressed, so just try
5218 5233 * every decompress function at every inflated blocksize.
5219 5234 */
5220 5235 enum zio_compress c;
5221 5236 void *pbuf2 = umem_alloc(SPA_MAXBLOCKSIZE, UMEM_NOFAIL);
5222 5237 void *lbuf2 = umem_alloc(SPA_MAXBLOCKSIZE, UMEM_NOFAIL);
5223 5238
5224 5239 abd_copy_to_buf(pbuf2, pabd, psize);
5225 5240
5226 5241 VERIFY0(abd_iterate_func(pabd, psize, SPA_MAXBLOCKSIZE - psize,
5227 5242 random_get_pseudo_bytes_cb, NULL));
5228 5243
5229 5244 VERIFY0(random_get_pseudo_bytes((uint8_t *)pbuf2 + psize,
5230 5245 SPA_MAXBLOCKSIZE - psize));
5231 5246
5232 5247 for (lsize = SPA_MAXBLOCKSIZE; lsize > psize;
5233 5248 lsize -= SPA_MINBLOCKSIZE) {
5234 5249 for (c = 0; c < ZIO_COMPRESS_FUNCTIONS; c++) {
5235 5250 if (zio_decompress_data(c, pabd,
5236 5251 lbuf, psize, lsize) == 0 &&
5237 5252 zio_decompress_data_buf(c, pbuf2,
5238 5253 lbuf2, psize, lsize) == 0 &&
5239 5254 bcmp(lbuf, lbuf2, lsize) == 0)
5240 5255 break;
5241 5256 }
5242 5257 if (c != ZIO_COMPRESS_FUNCTIONS)
5243 5258 break;
5244 5259 lsize -= SPA_MINBLOCKSIZE;
5245 5260 }
5246 5261
5247 5262 umem_free(pbuf2, SPA_MAXBLOCKSIZE);
5248 5263 umem_free(lbuf2, SPA_MAXBLOCKSIZE);
5249 5264
5250 5265 if (lsize <= psize) {
5251 5266 (void) printf("Decompress of %s failed\n", thing);
5252 5267 goto out;
5253 5268 }
5254 5269 buf = lbuf;
5255 5270 size = lsize;
5256 5271 } else {
5257 5272 buf = abd_to_buf(pabd);
5258 5273 size = psize;
5259 5274 }
5260 5275
5261 5276 if (flags & ZDB_FLAG_PRINT_BLKPTR)
5262 5277 zdb_print_blkptr((blkptr_t *)(void *)
5263 5278 ((uintptr_t)buf + (uintptr_t)blkptr_offset), flags);
5264 5279 else if (flags & ZDB_FLAG_RAW)
5265 5280 zdb_dump_block_raw(buf, size, flags);
5266 5281 else if (flags & ZDB_FLAG_INDIRECT)
5267 5282 zdb_dump_indirect((blkptr_t *)buf, size / sizeof (blkptr_t),
5268 5283 flags);
5269 5284 else if (flags & ZDB_FLAG_GBH)
5270 5285 zdb_dump_gbh(buf, flags);
5271 5286 else
5272 5287 zdb_dump_block(thing, buf, size, flags);
5273 5288
5274 5289 out:
5275 5290 abd_free(pabd);
5276 5291 umem_free(lbuf, SPA_MAXBLOCKSIZE);
5277 5292 free(dup);
5278 5293 }
5279 5294
5280 5295 static void
5281 5296 zdb_embedded_block(char *thing)
5282 5297 {
5283 5298 blkptr_t bp;
5284 5299 unsigned long long *words = (void *)&bp;
5285 5300 char *buf;
5286 5301 int err;
5287 5302
5288 5303 bzero(&bp, sizeof (bp));
5289 5304 err = sscanf(thing, "%llx:%llx:%llx:%llx:%llx:%llx:%llx:%llx:"
5290 5305 "%llx:%llx:%llx:%llx:%llx:%llx:%llx:%llx",
5291 5306 words + 0, words + 1, words + 2, words + 3,
5292 5307 words + 4, words + 5, words + 6, words + 7,
5293 5308 words + 8, words + 9, words + 10, words + 11,
5294 5309 words + 12, words + 13, words + 14, words + 15);
5295 5310 if (err != 16) {
5296 5311 (void) fprintf(stderr, "invalid input format\n");
5297 5312 exit(1);
5298 5313 }
5299 5314 ASSERT3U(BPE_GET_LSIZE(&bp), <=, SPA_MAXBLOCKSIZE);
5300 5315 buf = malloc(SPA_MAXBLOCKSIZE);
5301 5316 if (buf == NULL) {
5302 5317 (void) fprintf(stderr, "out of memory\n");
5303 5318 exit(1);
5304 5319 }
5305 5320 err = decode_embedded_bp(&bp, buf, BPE_GET_LSIZE(&bp));
5306 5321 if (err != 0) {
5307 5322 (void) fprintf(stderr, "decode failed: %u\n", err);
5308 5323 exit(1);
5309 5324 }
5310 5325 zdb_dump_block_raw(buf, BPE_GET_LSIZE(&bp), 0);
5311 5326 free(buf);
5312 5327 }
5313 5328
5314 5329 int
5315 5330 main(int argc, char **argv)
5316 5331 {
5317 5332 int c;
5318 5333 struct rlimit rl = { 1024, 1024 };
5319 5334 spa_t *spa = NULL;
5320 5335 objset_t *os = NULL;
5321 5336 int dump_all = 1;
5322 5337 int verbose = 0;
5323 5338 int error = 0;
5324 5339 char **searchdirs = NULL;
5325 5340 int nsearch = 0;
5326 5341 char *target, *target_pool;
5327 5342 nvlist_t *policy = NULL;
5328 5343 uint64_t max_txg = UINT64_MAX;
5329 5344 int flags = ZFS_IMPORT_MISSING_LOG;
5330 5345 int rewind = ZPOOL_NEVER_REWIND;
5331 5346 char *spa_config_path_env;
5332 5347 boolean_t target_is_spa = B_TRUE;
5333 5348 nvlist_t *cfg = NULL;
5334 5349
5335 5350 (void) setrlimit(RLIMIT_NOFILE, &rl);
5336 5351 (void) enable_extended_FILE_stdio(-1, -1);
5337 5352
5338 5353 dprintf_setup(&argc, argv);
5339 5354
5340 5355 /*
5341 5356 * If there is an environment variable SPA_CONFIG_PATH it overrides
5342 5357 * default spa_config_path setting. If -U flag is specified it will
5343 5358 * override this environment variable settings once again.
5344 5359 */
5345 5360 spa_config_path_env = getenv("SPA_CONFIG_PATH");
5346 5361 if (spa_config_path_env != NULL)
5347 5362 spa_config_path = spa_config_path_env;
5348 5363
5349 5364 while ((c = getopt(argc, argv,
5350 5365 "AbcCdDeEFGhiI:klLmMo:Op:PqRsSt:uU:vVx:X")) != -1) {
5351 5366 switch (c) {
5352 5367 case 'b':
5353 5368 case 'c':
5354 5369 case 'C':
5355 5370 case 'd':
5356 5371 case 'D':
5357 5372 case 'E':
5358 5373 case 'G':
5359 5374 case 'h':
5360 5375 case 'i':
5361 5376 case 'l':
5362 5377 case 'm':
5363 5378 case 'M':
5364 5379 case 'O':
5365 5380 case 'R':
5366 5381 case 's':
5367 5382 case 'S':
5368 5383 case 'u':
5369 5384 dump_opt[c]++;
5370 5385 dump_all = 0;
5371 5386 break;
5372 5387 case 'A':
5373 5388 case 'e':
5374 5389 case 'F':
5375 5390 case 'k':
5376 5391 case 'L':
5377 5392 case 'P':
5378 5393 case 'q':
5379 5394 case 'X':
5380 5395 dump_opt[c]++;
5381 5396 break;
5382 5397 /* NB: Sort single match options below. */
5383 5398 case 'I':
5384 5399 max_inflight = strtoull(optarg, NULL, 0);
5385 5400 if (max_inflight == 0) {
5386 5401 (void) fprintf(stderr, "maximum number "
5387 5402 "of inflight I/Os must be greater "
5388 5403 "than 0\n");
5389 5404 usage();
5390 5405 }
5391 5406 break;
5392 5407 case 'o':
5393 5408 error = set_global_var(optarg);
5394 5409 if (error != 0)
5395 5410 usage();
5396 5411 break;
5397 5412 case 'p':
5398 5413 if (searchdirs == NULL) {
5399 5414 searchdirs = umem_alloc(sizeof (char *),
5400 5415 UMEM_NOFAIL);
5401 5416 } else {
5402 5417 char **tmp = umem_alloc((nsearch + 1) *
5403 5418 sizeof (char *), UMEM_NOFAIL);
5404 5419 bcopy(searchdirs, tmp, nsearch *
5405 5420 sizeof (char *));
5406 5421 umem_free(searchdirs,
5407 5422 nsearch * sizeof (char *));
5408 5423 searchdirs = tmp;
5409 5424 }
5410 5425 searchdirs[nsearch++] = optarg;
5411 5426 break;
5412 5427 case 't':
5413 5428 max_txg = strtoull(optarg, NULL, 0);
5414 5429 if (max_txg < TXG_INITIAL) {
5415 5430 (void) fprintf(stderr, "incorrect txg "
5416 5431 "specified: %s\n", optarg);
5417 5432 usage();
5418 5433 }
5419 5434 break;
5420 5435 case 'U':
5421 5436 spa_config_path = optarg;
5422 5437 if (spa_config_path[0] != '/') {
5423 5438 (void) fprintf(stderr,
5424 5439 "cachefile must be an absolute path "
5425 5440 "(i.e. start with a slash)\n");
5426 5441 usage();
5427 5442 }
5428 5443 break;
5429 5444 case 'v':
5430 5445 verbose++;
5431 5446 break;
5432 5447 case 'V':
5433 5448 flags = ZFS_IMPORT_VERBATIM;
5434 5449 break;
5435 5450 case 'x':
5436 5451 vn_dumpdir = optarg;
5437 5452 break;
5438 5453 default:
5439 5454 usage();
5440 5455 break;
5441 5456 }
5442 5457 }
5443 5458
5444 5459 if (!dump_opt['e'] && searchdirs != NULL) {
5445 5460 (void) fprintf(stderr, "-p option requires use of -e\n");
5446 5461 usage();
5447 5462 }
5448 5463
5449 5464 /*
5450 5465 * ZDB does not typically re-read blocks; therefore limit the ARC
5451 5466 * to 256 MB, which can be used entirely for metadata.
5452 5467 */
5453 5468 zfs_arc_max = zfs_arc_meta_limit = 256 * 1024 * 1024;
5454 5469
5455 5470 /*
5456 5471 * "zdb -c" uses checksum-verifying scrub i/os which are async reads.
5457 5472 * "zdb -b" uses traversal prefetch which uses async reads.
5458 5473 * For good performance, let several of them be active at once.
5459 5474 */
5460 5475 zfs_vdev_async_read_max_active = 10;
5461 5476
5462 5477 /*
5463 5478 * Disable reference tracking for better performance.
5464 5479 */
5465 5480 reference_tracking_enable = B_FALSE;
5466 5481
5467 5482 /*
5468 5483 * Do not fail spa_load when spa_load_verify fails. This is needed
5469 5484 * to load non-idle pools.
5470 5485 */
5471 5486 spa_load_verify_dryrun = B_TRUE;
5472 5487
5473 5488 kernel_init(FREAD);
5474 5489 g_zfs = libzfs_init();
5475 5490 ASSERT(g_zfs != NULL);
5476 5491
5477 5492 if (dump_all)
5478 5493 verbose = MAX(verbose, 1);
5479 5494
5480 5495 for (c = 0; c < 256; c++) {
5481 5496 if (dump_all && strchr("AeEFklLOPRSX", c) == NULL)
5482 5497 dump_opt[c] = 1;
5483 5498 if (dump_opt[c])
5484 5499 dump_opt[c] += verbose;
5485 5500 }
5486 5501
5487 5502 aok = (dump_opt['A'] == 1) || (dump_opt['A'] > 2);
5488 5503 zfs_recover = (dump_opt['A'] > 1);
5489 5504
5490 5505 argc -= optind;
5491 5506 argv += optind;
5492 5507
5493 5508 if (argc < 2 && dump_opt['R'])
5494 5509 usage();
5495 5510
5496 5511 if (dump_opt['E']) {
5497 5512 if (argc != 1)
5498 5513 usage();
5499 5514 zdb_embedded_block(argv[0]);
5500 5515 return (0);
5501 5516 }
5502 5517
5503 5518 if (argc < 1) {
5504 5519 if (!dump_opt['e'] && dump_opt['C']) {
5505 5520 dump_cachefile(spa_config_path);
5506 5521 return (0);
5507 5522 }
5508 5523 usage();
5509 5524 }
5510 5525
5511 5526 if (dump_opt['l'])
5512 5527 return (dump_label(argv[0]));
5513 5528
5514 5529 if (dump_opt['O']) {
5515 5530 if (argc != 2)
5516 5531 usage();
5517 5532 dump_opt['v'] = verbose + 3;
5518 5533 return (dump_path(argv[0], argv[1]));
5519 5534 }
5520 5535
5521 5536 if (dump_opt['X'] || dump_opt['F'])
5522 5537 rewind = ZPOOL_DO_REWIND |
5523 5538 (dump_opt['X'] ? ZPOOL_EXTREME_REWIND : 0);
5524 5539
5525 5540 if (nvlist_alloc(&policy, NV_UNIQUE_NAME_TYPE, 0) != 0 ||
5526 5541 nvlist_add_uint64(policy, ZPOOL_LOAD_REQUEST_TXG, max_txg) != 0 ||
5527 5542 nvlist_add_uint32(policy, ZPOOL_LOAD_REWIND_POLICY, rewind) != 0)
5528 5543 fatal("internal error: %s", strerror(ENOMEM));
5529 5544
5530 5545 error = 0;
5531 5546 target = argv[0];
5532 5547
5533 5548 if (strpbrk(target, "/@") != NULL) {
5534 5549 size_t targetlen;
5535 5550
5536 5551 target_pool = strdup(target);
5537 5552 *strpbrk(target_pool, "/@") = '\0';
5538 5553
5539 5554 target_is_spa = B_FALSE;
5540 5555 targetlen = strlen(target);
5541 5556 if (targetlen && target[targetlen - 1] == '/')
5542 5557 target[targetlen - 1] = '\0';
5543 5558 } else {
5544 5559 target_pool = target;
5545 5560 }
5546 5561
5547 5562 if (dump_opt['e']) {
5548 5563 importargs_t args = { 0 };
5549 5564
5550 5565 args.paths = nsearch;
5551 5566 args.path = searchdirs;
5552 5567 args.can_be_active = B_TRUE;
5553 5568
5554 5569 error = zpool_tryimport(g_zfs, target_pool, &cfg, &args);
5555 5570
5556 5571 if (error == 0) {
5557 5572
5558 5573 if (nvlist_add_nvlist(cfg,
5559 5574 ZPOOL_LOAD_POLICY, policy) != 0) {
5560 5575 fatal("can't open '%s': %s",
5561 5576 target, strerror(ENOMEM));
5562 5577 }
5563 5578
5564 5579 if (dump_opt['C'] > 1) {
5565 5580 (void) printf("\nConfiguration for import:\n");
5566 5581 dump_nvlist(cfg, 8);
5567 5582 }
5568 5583
5569 5584 /*
5570 5585 * Disable the activity check to allow examination of
5571 5586 * active pools.
5572 5587 */
5573 5588 error = spa_import(target_pool, cfg, NULL,
5574 5589 flags | ZFS_IMPORT_SKIP_MMP);
5575 5590 }
5576 5591 }
5577 5592
5578 5593 char *checkpoint_pool = NULL;
5579 5594 char *checkpoint_target = NULL;
5580 5595 if (dump_opt['k']) {
5581 5596 checkpoint_pool = import_checkpointed_state(target, cfg,
5582 5597 &checkpoint_target);
5583 5598
5584 5599 if (checkpoint_target != NULL)
5585 5600 target = checkpoint_target;
5586 5601
5587 5602 }
5588 5603
5589 5604 if (error == 0) {
5590 5605 if (dump_opt['k'] && (target_is_spa || dump_opt['R'])) {
5591 5606 ASSERT(checkpoint_pool != NULL);
5592 5607 ASSERT(checkpoint_target == NULL);
5593 5608
5594 5609 error = spa_open(checkpoint_pool, &spa, FTAG);
5595 5610 if (error != 0) {
5596 5611 fatal("Tried to open pool \"%s\" but "
5597 5612 "spa_open() failed with error %d\n",
5598 5613 checkpoint_pool, error);
5599 5614 }
5600 5615
5601 5616 } else if (target_is_spa || dump_opt['R']) {
5602 5617 zdb_set_skip_mmp(target);
5603 5618 error = spa_open_rewind(target, &spa, FTAG, policy,
5604 5619 NULL);
5605 5620 if (error) {
5606 5621 /*
5607 5622 * If we're missing the log device then
5608 5623 * try opening the pool after clearing the
5609 5624 * log state.
5610 5625 */
5611 5626 mutex_enter(&spa_namespace_lock);
5612 5627 if ((spa = spa_lookup(target)) != NULL &&
5613 5628 spa->spa_log_state == SPA_LOG_MISSING) {
5614 5629 spa->spa_log_state = SPA_LOG_CLEAR;
5615 5630 error = 0;
5616 5631 }
5617 5632 mutex_exit(&spa_namespace_lock);
5618 5633
5619 5634 if (!error) {
5620 5635 error = spa_open_rewind(target, &spa,
5621 5636 FTAG, policy, NULL);
5622 5637 }
5623 5638 }
5624 5639 } else {
5625 5640 zdb_set_skip_mmp(target);
5626 5641 error = open_objset(target, DMU_OST_ANY, FTAG, &os);
5627 5642 }
5628 5643 }
5629 5644 nvlist_free(policy);
5630 5645
5631 5646 if (error)
5632 5647 fatal("can't open '%s': %s", target, strerror(error));
5633 5648
5634 5649 argv++;
5635 5650 argc--;
5636 5651 if (!dump_opt['R']) {
5637 5652 if (argc > 0) {
5638 5653 zopt_objects = argc;
5639 5654 zopt_object = calloc(zopt_objects, sizeof (uint64_t));
5640 5655 for (unsigned i = 0; i < zopt_objects; i++) {
5641 5656 errno = 0;
5642 5657 zopt_object[i] = strtoull(argv[i], NULL, 0);
5643 5658 if (zopt_object[i] == 0 && errno != 0)
5644 5659 fatal("bad number %s: %s",
5645 5660 argv[i], strerror(errno));
5646 5661 }
5647 5662 }
5648 5663 if (os != NULL) {
5649 5664 dump_dir(os);
5650 5665 } else if (zopt_objects > 0 && !dump_opt['m']) {
5651 5666 dump_dir(spa->spa_meta_objset);
5652 5667 } else {
5653 5668 dump_zpool(spa);
5654 5669 }
5655 5670 } else {
5656 5671 flagbits['b'] = ZDB_FLAG_PRINT_BLKPTR;
5657 5672 flagbits['c'] = ZDB_FLAG_CHECKSUM;
5658 5673 flagbits['d'] = ZDB_FLAG_DECOMPRESS;
5659 5674 flagbits['e'] = ZDB_FLAG_BSWAP;
5660 5675 flagbits['g'] = ZDB_FLAG_GBH;
5661 5676 flagbits['i'] = ZDB_FLAG_INDIRECT;
5662 5677 flagbits['p'] = ZDB_FLAG_PHYS;
5663 5678 flagbits['r'] = ZDB_FLAG_RAW;
5664 5679
5665 5680 for (int i = 0; i < argc; i++)
5666 5681 zdb_read_block(argv[i], spa);
5667 5682 }
5668 5683
5669 5684 if (dump_opt['k']) {
5670 5685 free(checkpoint_pool);
5671 5686 if (!target_is_spa)
5672 5687 free(checkpoint_target);
5673 5688 }
5674 5689
5675 5690 if (os != NULL)
5676 5691 close_objset(os, FTAG);
5677 5692 else
5678 5693 spa_close(spa, FTAG);
5679 5694
5680 5695 fuid_table_destroy();
5681 5696
5682 5697 dump_debug_buffer();
5683 5698
5684 5699 libzfs_fini(g_zfs);
5685 5700 kernel_fini();
5686 5701
5687 5702 return (error);
5688 5703 }
|
↓ open down ↓ |
1081 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX