Print this page
4374 dn_free_ranges should use range_tree_t
Reviewed by: George Wilson <george.wilson@delphix.com>
Reviewed by: Max Grossman <max.grossman@delphix.com>
Reviewed by: Christopher Siden <christopher.siden@delphix.com
Reviewed by: Garrett D'Amore <garrett@damore.org>
Reviewed by: Dan McDonald <danmcd@omniti.com>
Approved by: Dan McDonald <danmcd@omniti.com>
| Split |
Close |
| Expand all |
| Collapse all |
--- old/usr/src/cmd/zdb/zdb.c
+++ new/usr/src/cmd/zdb/zdb.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
|
↓ open down ↓ |
13 lines elided |
↑ open up ↑ |
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21
22 22 /*
23 23 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
24 - * Copyright (c) 2013 by Delphix. All rights reserved.
24 + * Copyright (c) 2012, 2014 by Delphix. All rights reserved.
25 25 */
26 26
27 27 #include <stdio.h>
28 28 #include <unistd.h>
29 29 #include <stdio_ext.h>
30 30 #include <stdlib.h>
31 31 #include <ctype.h>
32 32 #include <sys/zfs_context.h>
33 33 #include <sys/spa.h>
34 34 #include <sys/spa_impl.h>
35 35 #include <sys/dmu.h>
36 36 #include <sys/zap.h>
37 37 #include <sys/fs/zfs.h>
38 38 #include <sys/zfs_znode.h>
39 39 #include <sys/zfs_sa.h>
40 40 #include <sys/sa.h>
41 41 #include <sys/sa_impl.h>
42 42 #include <sys/vdev.h>
43 43 #include <sys/vdev_impl.h>
44 44 #include <sys/metaslab_impl.h>
45 45 #include <sys/dmu_objset.h>
46 46 #include <sys/dsl_dir.h>
47 47 #include <sys/dsl_dataset.h>
48 48 #include <sys/dsl_pool.h>
49 49 #include <sys/dbuf.h>
50 50 #include <sys/zil.h>
51 51 #include <sys/zil_impl.h>
52 52 #include <sys/stat.h>
53 53 #include <sys/resource.h>
54 54 #include <sys/dmu_traverse.h>
55 55 #include <sys/zio_checksum.h>
56 56 #include <sys/zio_compress.h>
57 57 #include <sys/zfs_fuid.h>
58 58 #include <sys/arc.h>
59 59 #include <sys/ddt.h>
60 60 #include <sys/zfeature.h>
61 61 #include <zfs_comutil.h>
62 62 #undef ZFS_MAXNAMELEN
63 63 #undef verify
64 64 #include <libzfs.h>
65 65
66 66 #define ZDB_COMPRESS_NAME(idx) ((idx) < ZIO_COMPRESS_FUNCTIONS ? \
67 67 zio_compress_table[(idx)].ci_name : "UNKNOWN")
68 68 #define ZDB_CHECKSUM_NAME(idx) ((idx) < ZIO_CHECKSUM_FUNCTIONS ? \
69 69 zio_checksum_table[(idx)].ci_name : "UNKNOWN")
70 70 #define ZDB_OT_NAME(idx) ((idx) < DMU_OT_NUMTYPES ? \
71 71 dmu_ot[(idx)].ot_name : DMU_OT_IS_VALID(idx) ? \
72 72 dmu_ot_byteswap[DMU_OT_BYTESWAP(idx)].ob_name : "UNKNOWN")
73 73 #define ZDB_OT_TYPE(idx) ((idx) < DMU_OT_NUMTYPES ? (idx) : \
74 74 (((idx) == DMU_OTN_ZAP_DATA || (idx) == DMU_OTN_ZAP_METADATA) ? \
75 75 DMU_OT_ZAP_OTHER : DMU_OT_NUMTYPES))
76 76
77 77 #ifndef lint
78 78 extern int zfs_recover;
79 79 #else
80 80 int zfs_recover;
81 81 #endif
82 82
83 83 const char cmdname[] = "zdb";
84 84 uint8_t dump_opt[256];
85 85
86 86 typedef void object_viewer_t(objset_t *, uint64_t, void *data, size_t size);
87 87
88 88 extern void dump_intent_log(zilog_t *);
89 89 uint64_t *zopt_object = NULL;
90 90 int zopt_objects = 0;
91 91 libzfs_handle_t *g_zfs;
92 92 uint64_t max_inflight = 200;
93 93
94 94 /*
95 95 * These libumem hooks provide a reasonable set of defaults for the allocator's
96 96 * debugging facilities.
97 97 */
98 98 const char *
99 99 _umem_debug_init()
100 100 {
101 101 return ("default,verbose"); /* $UMEM_DEBUG setting */
102 102 }
103 103
104 104 const char *
105 105 _umem_logging_init(void)
106 106 {
107 107 return ("fail,contents"); /* $UMEM_LOGGING setting */
108 108 }
109 109
110 110 static void
111 111 usage(void)
112 112 {
113 113 (void) fprintf(stderr,
114 114 "Usage: %s [-CumdibcsDvhLXFPA] [-t txg] [-e [-p path...]] "
115 115 "[-U config] [-M inflight I/Os] poolname [object...]\n"
116 116 " %s [-divPA] [-e -p path...] [-U config] dataset "
117 117 "[object...]\n"
118 118 " %s -m [-LXFPA] [-t txg] [-e [-p path...]] [-U config] "
119 119 "poolname [vdev [metaslab...]]\n"
120 120 " %s -R [-A] [-e [-p path...]] poolname "
121 121 "vdev:offset:size[:flags]\n"
122 122 " %s -S [-PA] [-e [-p path...]] [-U config] poolname\n"
123 123 " %s -l [-uA] device\n"
124 124 " %s -C [-A] [-U config]\n\n",
125 125 cmdname, cmdname, cmdname, cmdname, cmdname, cmdname, cmdname);
126 126
127 127 (void) fprintf(stderr, " Dataset name must include at least one "
128 128 "separator character '/' or '@'\n");
129 129 (void) fprintf(stderr, " If dataset name is specified, only that "
130 130 "dataset is dumped\n");
131 131 (void) fprintf(stderr, " If object numbers are specified, only "
132 132 "those objects are dumped\n\n");
133 133 (void) fprintf(stderr, " Options to control amount of output:\n");
134 134 (void) fprintf(stderr, " -u uberblock\n");
135 135 (void) fprintf(stderr, " -d dataset(s)\n");
136 136 (void) fprintf(stderr, " -i intent logs\n");
137 137 (void) fprintf(stderr, " -C config (or cachefile if alone)\n");
138 138 (void) fprintf(stderr, " -h pool history\n");
139 139 (void) fprintf(stderr, " -b block statistics\n");
140 140 (void) fprintf(stderr, " -m metaslabs\n");
141 141 (void) fprintf(stderr, " -c checksum all metadata (twice for "
142 142 "all data) blocks\n");
143 143 (void) fprintf(stderr, " -s report stats on zdb's I/O\n");
144 144 (void) fprintf(stderr, " -D dedup statistics\n");
145 145 (void) fprintf(stderr, " -S simulate dedup to measure effect\n");
146 146 (void) fprintf(stderr, " -v verbose (applies to all others)\n");
147 147 (void) fprintf(stderr, " -l dump label contents\n");
148 148 (void) fprintf(stderr, " -L disable leak tracking (do not "
149 149 "load spacemaps)\n");
150 150 (void) fprintf(stderr, " -R read and display block from a "
151 151 "device\n\n");
152 152 (void) fprintf(stderr, " Below options are intended for use "
153 153 "with other options (except -l):\n");
154 154 (void) fprintf(stderr, " -A ignore assertions (-A), enable "
155 155 "panic recovery (-AA) or both (-AAA)\n");
156 156 (void) fprintf(stderr, " -F attempt automatic rewind within "
157 157 "safe range of transaction groups\n");
158 158 (void) fprintf(stderr, " -U <cachefile_path> -- use alternate "
159 159 "cachefile\n");
160 160 (void) fprintf(stderr, " -X attempt extreme rewind (does not "
161 161 "work with dataset)\n");
162 162 (void) fprintf(stderr, " -e pool is exported/destroyed/"
163 163 "has altroot/not in a cachefile\n");
164 164 (void) fprintf(stderr, " -p <path> -- use one or more with "
165 165 "-e to specify path to vdev dir\n");
166 166 (void) fprintf(stderr, " -P print numbers in parseable form\n");
167 167 (void) fprintf(stderr, " -t <txg> -- highest txg to use when "
168 168 "searching for uberblocks\n");
169 169 (void) fprintf(stderr, " -M <number of inflight I/Os> -- "
170 170 "specify the maximum number of checksumming I/Os [default is 200]");
171 171 (void) fprintf(stderr, "Specify an option more than once (e.g. -bb) "
172 172 "to make only that option verbose\n");
173 173 (void) fprintf(stderr, "Default is to dump everything non-verbosely\n");
174 174 exit(1);
175 175 }
176 176
177 177 /*
178 178 * Called for usage errors that are discovered after a call to spa_open(),
179 179 * dmu_bonus_hold(), or pool_match(). abort() is called for other errors.
180 180 */
181 181
182 182 static void
183 183 fatal(const char *fmt, ...)
184 184 {
185 185 va_list ap;
186 186
187 187 va_start(ap, fmt);
188 188 (void) fprintf(stderr, "%s: ", cmdname);
189 189 (void) vfprintf(stderr, fmt, ap);
190 190 va_end(ap);
191 191 (void) fprintf(stderr, "\n");
192 192
193 193 exit(1);
194 194 }
195 195
196 196 /* ARGSUSED */
197 197 static void
198 198 dump_packed_nvlist(objset_t *os, uint64_t object, void *data, size_t size)
199 199 {
200 200 nvlist_t *nv;
201 201 size_t nvsize = *(uint64_t *)data;
202 202 char *packed = umem_alloc(nvsize, UMEM_NOFAIL);
203 203
204 204 VERIFY(0 == dmu_read(os, object, 0, nvsize, packed, DMU_READ_PREFETCH));
205 205
206 206 VERIFY(nvlist_unpack(packed, nvsize, &nv, 0) == 0);
207 207
208 208 umem_free(packed, nvsize);
209 209
210 210 dump_nvlist(nv, 8);
211 211
212 212 nvlist_free(nv);
213 213 }
214 214
215 215 /* ARGSUSED */
216 216 static void
217 217 dump_history_offsets(objset_t *os, uint64_t object, void *data, size_t size)
218 218 {
219 219 spa_history_phys_t *shp = data;
220 220
221 221 if (shp == NULL)
222 222 return;
223 223
224 224 (void) printf("\t\tpool_create_len = %llu\n",
225 225 (u_longlong_t)shp->sh_pool_create_len);
226 226 (void) printf("\t\tphys_max_off = %llu\n",
227 227 (u_longlong_t)shp->sh_phys_max_off);
228 228 (void) printf("\t\tbof = %llu\n",
229 229 (u_longlong_t)shp->sh_bof);
230 230 (void) printf("\t\teof = %llu\n",
231 231 (u_longlong_t)shp->sh_eof);
232 232 (void) printf("\t\trecords_lost = %llu\n",
233 233 (u_longlong_t)shp->sh_records_lost);
234 234 }
235 235
236 236 static void
237 237 zdb_nicenum(uint64_t num, char *buf)
238 238 {
239 239 if (dump_opt['P'])
240 240 (void) sprintf(buf, "%llu", (longlong_t)num);
241 241 else
242 242 nicenum(num, buf);
243 243 }
244 244
245 245 const char histo_stars[] = "****************************************";
246 246 const int histo_width = sizeof (histo_stars) - 1;
247 247
248 248 static void
249 249 dump_histogram(const uint64_t *histo, int size, int offset)
250 250 {
251 251 int i;
252 252 int minidx = size - 1;
253 253 int maxidx = 0;
254 254 uint64_t max = 0;
255 255
256 256 for (i = 0; i < size; i++) {
257 257 if (histo[i] > max)
258 258 max = histo[i];
259 259 if (histo[i] > 0 && i > maxidx)
260 260 maxidx = i;
261 261 if (histo[i] > 0 && i < minidx)
262 262 minidx = i;
263 263 }
264 264
265 265 if (max < histo_width)
266 266 max = histo_width;
267 267
268 268 for (i = minidx; i <= maxidx; i++) {
269 269 (void) printf("\t\t\t%3u: %6llu %s\n",
270 270 i + offset, (u_longlong_t)histo[i],
271 271 &histo_stars[(max - histo[i]) * histo_width / max]);
272 272 }
273 273 }
274 274
275 275 static void
276 276 dump_zap_stats(objset_t *os, uint64_t object)
277 277 {
278 278 int error;
279 279 zap_stats_t zs;
280 280
281 281 error = zap_get_stats(os, object, &zs);
282 282 if (error)
283 283 return;
284 284
285 285 if (zs.zs_ptrtbl_len == 0) {
286 286 ASSERT(zs.zs_num_blocks == 1);
287 287 (void) printf("\tmicrozap: %llu bytes, %llu entries\n",
288 288 (u_longlong_t)zs.zs_blocksize,
289 289 (u_longlong_t)zs.zs_num_entries);
290 290 return;
291 291 }
292 292
293 293 (void) printf("\tFat ZAP stats:\n");
294 294
295 295 (void) printf("\t\tPointer table:\n");
296 296 (void) printf("\t\t\t%llu elements\n",
297 297 (u_longlong_t)zs.zs_ptrtbl_len);
298 298 (void) printf("\t\t\tzt_blk: %llu\n",
299 299 (u_longlong_t)zs.zs_ptrtbl_zt_blk);
300 300 (void) printf("\t\t\tzt_numblks: %llu\n",
301 301 (u_longlong_t)zs.zs_ptrtbl_zt_numblks);
302 302 (void) printf("\t\t\tzt_shift: %llu\n",
303 303 (u_longlong_t)zs.zs_ptrtbl_zt_shift);
304 304 (void) printf("\t\t\tzt_blks_copied: %llu\n",
305 305 (u_longlong_t)zs.zs_ptrtbl_blks_copied);
306 306 (void) printf("\t\t\tzt_nextblk: %llu\n",
307 307 (u_longlong_t)zs.zs_ptrtbl_nextblk);
308 308
309 309 (void) printf("\t\tZAP entries: %llu\n",
310 310 (u_longlong_t)zs.zs_num_entries);
311 311 (void) printf("\t\tLeaf blocks: %llu\n",
312 312 (u_longlong_t)zs.zs_num_leafs);
313 313 (void) printf("\t\tTotal blocks: %llu\n",
314 314 (u_longlong_t)zs.zs_num_blocks);
315 315 (void) printf("\t\tzap_block_type: 0x%llx\n",
316 316 (u_longlong_t)zs.zs_block_type);
317 317 (void) printf("\t\tzap_magic: 0x%llx\n",
318 318 (u_longlong_t)zs.zs_magic);
319 319 (void) printf("\t\tzap_salt: 0x%llx\n",
320 320 (u_longlong_t)zs.zs_salt);
321 321
322 322 (void) printf("\t\tLeafs with 2^n pointers:\n");
323 323 dump_histogram(zs.zs_leafs_with_2n_pointers, ZAP_HISTOGRAM_SIZE, 0);
324 324
325 325 (void) printf("\t\tBlocks with n*5 entries:\n");
326 326 dump_histogram(zs.zs_blocks_with_n5_entries, ZAP_HISTOGRAM_SIZE, 0);
327 327
328 328 (void) printf("\t\tBlocks n/10 full:\n");
329 329 dump_histogram(zs.zs_blocks_n_tenths_full, ZAP_HISTOGRAM_SIZE, 0);
330 330
331 331 (void) printf("\t\tEntries with n chunks:\n");
332 332 dump_histogram(zs.zs_entries_using_n_chunks, ZAP_HISTOGRAM_SIZE, 0);
333 333
334 334 (void) printf("\t\tBuckets with n entries:\n");
335 335 dump_histogram(zs.zs_buckets_with_n_entries, ZAP_HISTOGRAM_SIZE, 0);
336 336 }
337 337
338 338 /*ARGSUSED*/
339 339 static void
340 340 dump_none(objset_t *os, uint64_t object, void *data, size_t size)
341 341 {
342 342 }
343 343
344 344 /*ARGSUSED*/
345 345 static void
346 346 dump_unknown(objset_t *os, uint64_t object, void *data, size_t size)
347 347 {
348 348 (void) printf("\tUNKNOWN OBJECT TYPE\n");
349 349 }
350 350
351 351 /*ARGSUSED*/
352 352 void
353 353 dump_uint8(objset_t *os, uint64_t object, void *data, size_t size)
354 354 {
355 355 }
356 356
357 357 /*ARGSUSED*/
358 358 static void
359 359 dump_uint64(objset_t *os, uint64_t object, void *data, size_t size)
360 360 {
361 361 }
362 362
363 363 /*ARGSUSED*/
364 364 static void
365 365 dump_zap(objset_t *os, uint64_t object, void *data, size_t size)
366 366 {
367 367 zap_cursor_t zc;
368 368 zap_attribute_t attr;
369 369 void *prop;
370 370 int i;
371 371
372 372 dump_zap_stats(os, object);
373 373 (void) printf("\n");
374 374
375 375 for (zap_cursor_init(&zc, os, object);
376 376 zap_cursor_retrieve(&zc, &attr) == 0;
377 377 zap_cursor_advance(&zc)) {
378 378 (void) printf("\t\t%s = ", attr.za_name);
379 379 if (attr.za_num_integers == 0) {
380 380 (void) printf("\n");
381 381 continue;
382 382 }
383 383 prop = umem_zalloc(attr.za_num_integers *
384 384 attr.za_integer_length, UMEM_NOFAIL);
385 385 (void) zap_lookup(os, object, attr.za_name,
386 386 attr.za_integer_length, attr.za_num_integers, prop);
387 387 if (attr.za_integer_length == 1) {
388 388 (void) printf("%s", (char *)prop);
389 389 } else {
390 390 for (i = 0; i < attr.za_num_integers; i++) {
391 391 switch (attr.za_integer_length) {
392 392 case 2:
393 393 (void) printf("%u ",
394 394 ((uint16_t *)prop)[i]);
395 395 break;
396 396 case 4:
397 397 (void) printf("%u ",
398 398 ((uint32_t *)prop)[i]);
399 399 break;
400 400 case 8:
401 401 (void) printf("%lld ",
402 402 (u_longlong_t)((int64_t *)prop)[i]);
403 403 break;
404 404 }
405 405 }
406 406 }
407 407 (void) printf("\n");
408 408 umem_free(prop, attr.za_num_integers * attr.za_integer_length);
409 409 }
410 410 zap_cursor_fini(&zc);
411 411 }
412 412
413 413 /*ARGSUSED*/
414 414 static void
415 415 dump_ddt_zap(objset_t *os, uint64_t object, void *data, size_t size)
416 416 {
417 417 dump_zap_stats(os, object);
418 418 /* contents are printed elsewhere, properly decoded */
419 419 }
420 420
421 421 /*ARGSUSED*/
422 422 static void
423 423 dump_sa_attrs(objset_t *os, uint64_t object, void *data, size_t size)
424 424 {
425 425 zap_cursor_t zc;
426 426 zap_attribute_t attr;
427 427
428 428 dump_zap_stats(os, object);
429 429 (void) printf("\n");
430 430
431 431 for (zap_cursor_init(&zc, os, object);
432 432 zap_cursor_retrieve(&zc, &attr) == 0;
433 433 zap_cursor_advance(&zc)) {
434 434 (void) printf("\t\t%s = ", attr.za_name);
435 435 if (attr.za_num_integers == 0) {
436 436 (void) printf("\n");
437 437 continue;
438 438 }
439 439 (void) printf(" %llx : [%d:%d:%d]\n",
440 440 (u_longlong_t)attr.za_first_integer,
441 441 (int)ATTR_LENGTH(attr.za_first_integer),
442 442 (int)ATTR_BSWAP(attr.za_first_integer),
443 443 (int)ATTR_NUM(attr.za_first_integer));
444 444 }
445 445 zap_cursor_fini(&zc);
446 446 }
447 447
448 448 /*ARGSUSED*/
449 449 static void
450 450 dump_sa_layouts(objset_t *os, uint64_t object, void *data, size_t size)
451 451 {
452 452 zap_cursor_t zc;
453 453 zap_attribute_t attr;
454 454 uint16_t *layout_attrs;
455 455 int i;
456 456
457 457 dump_zap_stats(os, object);
458 458 (void) printf("\n");
459 459
460 460 for (zap_cursor_init(&zc, os, object);
461 461 zap_cursor_retrieve(&zc, &attr) == 0;
462 462 zap_cursor_advance(&zc)) {
463 463 (void) printf("\t\t%s = [", attr.za_name);
464 464 if (attr.za_num_integers == 0) {
465 465 (void) printf("\n");
466 466 continue;
467 467 }
468 468
469 469 VERIFY(attr.za_integer_length == 2);
470 470 layout_attrs = umem_zalloc(attr.za_num_integers *
471 471 attr.za_integer_length, UMEM_NOFAIL);
472 472
473 473 VERIFY(zap_lookup(os, object, attr.za_name,
474 474 attr.za_integer_length,
475 475 attr.za_num_integers, layout_attrs) == 0);
476 476
477 477 for (i = 0; i != attr.za_num_integers; i++)
478 478 (void) printf(" %d ", (int)layout_attrs[i]);
479 479 (void) printf("]\n");
480 480 umem_free(layout_attrs,
481 481 attr.za_num_integers * attr.za_integer_length);
482 482 }
483 483 zap_cursor_fini(&zc);
484 484 }
485 485
486 486 /*ARGSUSED*/
487 487 static void
488 488 dump_zpldir(objset_t *os, uint64_t object, void *data, size_t size)
489 489 {
490 490 zap_cursor_t zc;
491 491 zap_attribute_t attr;
492 492 const char *typenames[] = {
493 493 /* 0 */ "not specified",
494 494 /* 1 */ "FIFO",
495 495 /* 2 */ "Character Device",
496 496 /* 3 */ "3 (invalid)",
497 497 /* 4 */ "Directory",
498 498 /* 5 */ "5 (invalid)",
499 499 /* 6 */ "Block Device",
500 500 /* 7 */ "7 (invalid)",
501 501 /* 8 */ "Regular File",
502 502 /* 9 */ "9 (invalid)",
503 503 /* 10 */ "Symbolic Link",
504 504 /* 11 */ "11 (invalid)",
505 505 /* 12 */ "Socket",
506 506 /* 13 */ "Door",
507 507 /* 14 */ "Event Port",
508 508 /* 15 */ "15 (invalid)",
509 509 };
510 510
511 511 dump_zap_stats(os, object);
512 512 (void) printf("\n");
513 513
514 514 for (zap_cursor_init(&zc, os, object);
515 515 zap_cursor_retrieve(&zc, &attr) == 0;
516 516 zap_cursor_advance(&zc)) {
517 517 (void) printf("\t\t%s = %lld (type: %s)\n",
518 518 attr.za_name, ZFS_DIRENT_OBJ(attr.za_first_integer),
519 519 typenames[ZFS_DIRENT_TYPE(attr.za_first_integer)]);
520 520 }
521 521 zap_cursor_fini(&zc);
522 522 }
523 523
524 524 int
525 525 get_dtl_refcount(vdev_t *vd)
526 526 {
527 527 int refcount = 0;
528 528
529 529 if (vd->vdev_ops->vdev_op_leaf) {
530 530 space_map_t *sm = vd->vdev_dtl_sm;
531 531
532 532 if (sm != NULL &&
533 533 sm->sm_dbuf->db_size == sizeof (space_map_phys_t))
534 534 return (1);
535 535 return (0);
536 536 }
537 537
538 538 for (int c = 0; c < vd->vdev_children; c++)
539 539 refcount += get_dtl_refcount(vd->vdev_child[c]);
540 540 return (refcount);
541 541 }
542 542
543 543 int
544 544 get_metaslab_refcount(vdev_t *vd)
545 545 {
546 546 int refcount = 0;
547 547
548 548 if (vd->vdev_top == vd) {
549 549 for (int m = 0; m < vd->vdev_ms_count; m++) {
550 550 space_map_t *sm = vd->vdev_ms[m]->ms_sm;
551 551
552 552 if (sm != NULL &&
553 553 sm->sm_dbuf->db_size == sizeof (space_map_phys_t))
554 554 refcount++;
555 555 }
556 556 }
557 557 for (int c = 0; c < vd->vdev_children; c++)
558 558 refcount += get_metaslab_refcount(vd->vdev_child[c]);
559 559
560 560 return (refcount);
561 561 }
562 562
563 563 static int
564 564 verify_spacemap_refcounts(spa_t *spa)
565 565 {
566 566 uint64_t expected_refcount = 0;
567 567 uint64_t actual_refcount;
568 568
569 569 (void) feature_get_refcount(spa,
570 570 &spa_feature_table[SPA_FEATURE_SPACEMAP_HISTOGRAM],
571 571 &expected_refcount);
572 572 actual_refcount = get_dtl_refcount(spa->spa_root_vdev);
573 573 actual_refcount += get_metaslab_refcount(spa->spa_root_vdev);
574 574
575 575 if (expected_refcount != actual_refcount) {
576 576 (void) printf("space map refcount mismatch: expected %lld != "
577 577 "actual %lld\n",
578 578 (longlong_t)expected_refcount,
579 579 (longlong_t)actual_refcount);
580 580 return (2);
581 581 }
582 582 return (0);
583 583 }
584 584
585 585 static void
586 586 dump_spacemap(objset_t *os, space_map_t *sm)
587 587 {
588 588 uint64_t alloc, offset, entry;
589 589 char *ddata[] = { "ALLOC", "FREE", "CONDENSE", "INVALID",
590 590 "INVALID", "INVALID", "INVALID", "INVALID" };
591 591
592 592 if (sm == NULL)
593 593 return;
594 594
595 595 /*
596 596 * Print out the freelist entries in both encoded and decoded form.
597 597 */
598 598 alloc = 0;
599 599 for (offset = 0; offset < space_map_length(sm);
600 600 offset += sizeof (entry)) {
601 601 uint8_t mapshift = sm->sm_shift;
602 602
603 603 VERIFY0(dmu_read(os, space_map_object(sm), offset,
604 604 sizeof (entry), &entry, DMU_READ_PREFETCH));
605 605 if (SM_DEBUG_DECODE(entry)) {
606 606
607 607 (void) printf("\t [%6llu] %s: txg %llu, pass %llu\n",
608 608 (u_longlong_t)(offset / sizeof (entry)),
609 609 ddata[SM_DEBUG_ACTION_DECODE(entry)],
610 610 (u_longlong_t)SM_DEBUG_TXG_DECODE(entry),
611 611 (u_longlong_t)SM_DEBUG_SYNCPASS_DECODE(entry));
612 612 } else {
613 613 (void) printf("\t [%6llu] %c range:"
614 614 " %010llx-%010llx size: %06llx\n",
615 615 (u_longlong_t)(offset / sizeof (entry)),
616 616 SM_TYPE_DECODE(entry) == SM_ALLOC ? 'A' : 'F',
617 617 (u_longlong_t)((SM_OFFSET_DECODE(entry) <<
618 618 mapshift) + sm->sm_start),
619 619 (u_longlong_t)((SM_OFFSET_DECODE(entry) <<
620 620 mapshift) + sm->sm_start +
621 621 (SM_RUN_DECODE(entry) << mapshift)),
622 622 (u_longlong_t)(SM_RUN_DECODE(entry) << mapshift));
623 623 if (SM_TYPE_DECODE(entry) == SM_ALLOC)
624 624 alloc += SM_RUN_DECODE(entry) << mapshift;
625 625 else
626 626 alloc -= SM_RUN_DECODE(entry) << mapshift;
627 627 }
628 628 }
629 629 if (alloc != space_map_allocated(sm)) {
630 630 (void) printf("space_map_object alloc (%llu) INCONSISTENT "
631 631 "with space map summary (%llu)\n",
632 632 (u_longlong_t)space_map_allocated(sm), (u_longlong_t)alloc);
633 633 }
634 634 }
635 635
636 636 static void
637 637 dump_metaslab_stats(metaslab_t *msp)
638 638 {
639 639 char maxbuf[32];
640 640 range_tree_t *rt = msp->ms_tree;
641 641 avl_tree_t *t = &msp->ms_size_tree;
642 642 int free_pct = range_tree_space(rt) * 100 / msp->ms_size;
643 643
644 644 zdb_nicenum(metaslab_block_maxsize(msp), maxbuf);
645 645
646 646 (void) printf("\t %25s %10lu %7s %6s %4s %4d%%\n",
647 647 "segments", avl_numnodes(t), "maxsize", maxbuf,
648 648 "freepct", free_pct);
649 649 (void) printf("\tIn-memory histogram:\n");
650 650 dump_histogram(rt->rt_histogram, RANGE_TREE_HISTOGRAM_SIZE, 0);
651 651 }
652 652
653 653 static void
654 654 dump_metaslab(metaslab_t *msp)
655 655 {
656 656 vdev_t *vd = msp->ms_group->mg_vd;
657 657 spa_t *spa = vd->vdev_spa;
658 658 space_map_t *sm = msp->ms_sm;
659 659 char freebuf[32];
660 660
661 661 zdb_nicenum(msp->ms_size - space_map_allocated(sm), freebuf);
662 662
663 663 (void) printf(
664 664 "\tmetaslab %6llu offset %12llx spacemap %6llu free %5s\n",
665 665 (u_longlong_t)msp->ms_id, (u_longlong_t)msp->ms_start,
666 666 (u_longlong_t)space_map_object(sm), freebuf);
667 667
668 668 if (dump_opt['m'] > 2 && !dump_opt['L']) {
669 669 mutex_enter(&msp->ms_lock);
670 670 metaslab_load_wait(msp);
671 671 if (!msp->ms_loaded) {
672 672 VERIFY0(metaslab_load(msp));
673 673 range_tree_stat_verify(msp->ms_tree);
674 674 }
675 675 dump_metaslab_stats(msp);
676 676 metaslab_unload(msp);
677 677 mutex_exit(&msp->ms_lock);
678 678 }
679 679
680 680 if (dump_opt['m'] > 1 && sm != NULL &&
681 681 spa_feature_is_active(spa, SPA_FEATURE_SPACEMAP_HISTOGRAM)) {
682 682 /*
683 683 * The space map histogram represents free space in chunks
684 684 * of sm_shift (i.e. bucket 0 refers to 2^sm_shift).
685 685 */
686 686 (void) printf("\tOn-disk histogram:\n");
687 687 dump_histogram(sm->sm_phys->smp_histogram,
688 688 SPACE_MAP_HISTOGRAM_SIZE(sm), sm->sm_shift);
689 689 }
690 690
691 691 if (dump_opt['d'] > 5 || dump_opt['m'] > 3) {
692 692 ASSERT(msp->ms_size == (1ULL << vd->vdev_ms_shift));
693 693
694 694 mutex_enter(&msp->ms_lock);
695 695 dump_spacemap(spa->spa_meta_objset, msp->ms_sm);
696 696 mutex_exit(&msp->ms_lock);
697 697 }
698 698 }
699 699
700 700 static void
701 701 print_vdev_metaslab_header(vdev_t *vd)
702 702 {
703 703 (void) printf("\tvdev %10llu\n\t%-10s%5llu %-19s %-15s %-10s\n",
704 704 (u_longlong_t)vd->vdev_id,
705 705 "metaslabs", (u_longlong_t)vd->vdev_ms_count,
706 706 "offset", "spacemap", "free");
707 707 (void) printf("\t%15s %19s %15s %10s\n",
708 708 "---------------", "-------------------",
709 709 "---------------", "-------------");
710 710 }
711 711
712 712 static void
713 713 dump_metaslabs(spa_t *spa)
714 714 {
715 715 vdev_t *vd, *rvd = spa->spa_root_vdev;
716 716 uint64_t m, c = 0, children = rvd->vdev_children;
717 717
718 718 (void) printf("\nMetaslabs:\n");
719 719
720 720 if (!dump_opt['d'] && zopt_objects > 0) {
721 721 c = zopt_object[0];
722 722
723 723 if (c >= children)
724 724 (void) fatal("bad vdev id: %llu", (u_longlong_t)c);
725 725
726 726 if (zopt_objects > 1) {
727 727 vd = rvd->vdev_child[c];
728 728 print_vdev_metaslab_header(vd);
729 729
730 730 for (m = 1; m < zopt_objects; m++) {
731 731 if (zopt_object[m] < vd->vdev_ms_count)
732 732 dump_metaslab(
733 733 vd->vdev_ms[zopt_object[m]]);
734 734 else
735 735 (void) fprintf(stderr, "bad metaslab "
736 736 "number %llu\n",
737 737 (u_longlong_t)zopt_object[m]);
738 738 }
739 739 (void) printf("\n");
740 740 return;
741 741 }
742 742 children = c + 1;
743 743 }
744 744 for (; c < children; c++) {
745 745 vd = rvd->vdev_child[c];
746 746 print_vdev_metaslab_header(vd);
747 747
748 748 for (m = 0; m < vd->vdev_ms_count; m++)
749 749 dump_metaslab(vd->vdev_ms[m]);
750 750 (void) printf("\n");
751 751 }
752 752 }
753 753
754 754 static void
755 755 dump_dde(const ddt_t *ddt, const ddt_entry_t *dde, uint64_t index)
756 756 {
757 757 const ddt_phys_t *ddp = dde->dde_phys;
758 758 const ddt_key_t *ddk = &dde->dde_key;
759 759 char *types[4] = { "ditto", "single", "double", "triple" };
760 760 char blkbuf[BP_SPRINTF_LEN];
761 761 blkptr_t blk;
762 762
763 763 for (int p = 0; p < DDT_PHYS_TYPES; p++, ddp++) {
764 764 if (ddp->ddp_phys_birth == 0)
765 765 continue;
766 766 ddt_bp_create(ddt->ddt_checksum, ddk, ddp, &blk);
767 767 snprintf_blkptr(blkbuf, sizeof (blkbuf), &blk);
768 768 (void) printf("index %llx refcnt %llu %s %s\n",
769 769 (u_longlong_t)index, (u_longlong_t)ddp->ddp_refcnt,
770 770 types[p], blkbuf);
771 771 }
772 772 }
773 773
774 774 static void
775 775 dump_dedup_ratio(const ddt_stat_t *dds)
776 776 {
777 777 double rL, rP, rD, D, dedup, compress, copies;
778 778
779 779 if (dds->dds_blocks == 0)
780 780 return;
781 781
782 782 rL = (double)dds->dds_ref_lsize;
783 783 rP = (double)dds->dds_ref_psize;
784 784 rD = (double)dds->dds_ref_dsize;
785 785 D = (double)dds->dds_dsize;
786 786
787 787 dedup = rD / D;
788 788 compress = rL / rP;
789 789 copies = rD / rP;
790 790
791 791 (void) printf("dedup = %.2f, compress = %.2f, copies = %.2f, "
792 792 "dedup * compress / copies = %.2f\n\n",
793 793 dedup, compress, copies, dedup * compress / copies);
794 794 }
795 795
796 796 static void
797 797 dump_ddt(ddt_t *ddt, enum ddt_type type, enum ddt_class class)
798 798 {
799 799 char name[DDT_NAMELEN];
800 800 ddt_entry_t dde;
801 801 uint64_t walk = 0;
802 802 dmu_object_info_t doi;
803 803 uint64_t count, dspace, mspace;
804 804 int error;
805 805
806 806 error = ddt_object_info(ddt, type, class, &doi);
807 807
808 808 if (error == ENOENT)
809 809 return;
810 810 ASSERT(error == 0);
811 811
812 812 if ((count = ddt_object_count(ddt, type, class)) == 0)
813 813 return;
814 814
815 815 dspace = doi.doi_physical_blocks_512 << 9;
816 816 mspace = doi.doi_fill_count * doi.doi_data_block_size;
817 817
818 818 ddt_object_name(ddt, type, class, name);
819 819
820 820 (void) printf("%s: %llu entries, size %llu on disk, %llu in core\n",
821 821 name,
822 822 (u_longlong_t)count,
823 823 (u_longlong_t)(dspace / count),
824 824 (u_longlong_t)(mspace / count));
825 825
826 826 if (dump_opt['D'] < 3)
827 827 return;
828 828
829 829 zpool_dump_ddt(NULL, &ddt->ddt_histogram[type][class]);
830 830
831 831 if (dump_opt['D'] < 4)
832 832 return;
833 833
834 834 if (dump_opt['D'] < 5 && class == DDT_CLASS_UNIQUE)
835 835 return;
836 836
837 837 (void) printf("%s contents:\n\n", name);
838 838
839 839 while ((error = ddt_object_walk(ddt, type, class, &walk, &dde)) == 0)
840 840 dump_dde(ddt, &dde, walk);
841 841
842 842 ASSERT(error == ENOENT);
843 843
844 844 (void) printf("\n");
845 845 }
846 846
847 847 static void
848 848 dump_all_ddts(spa_t *spa)
849 849 {
850 850 ddt_histogram_t ddh_total = { 0 };
851 851 ddt_stat_t dds_total = { 0 };
852 852
853 853 for (enum zio_checksum c = 0; c < ZIO_CHECKSUM_FUNCTIONS; c++) {
854 854 ddt_t *ddt = spa->spa_ddt[c];
855 855 for (enum ddt_type type = 0; type < DDT_TYPES; type++) {
856 856 for (enum ddt_class class = 0; class < DDT_CLASSES;
857 857 class++) {
858 858 dump_ddt(ddt, type, class);
859 859 }
860 860 }
861 861 }
862 862
863 863 ddt_get_dedup_stats(spa, &dds_total);
864 864
865 865 if (dds_total.dds_blocks == 0) {
866 866 (void) printf("All DDTs are empty\n");
867 867 return;
868 868 }
869 869
870 870 (void) printf("\n");
871 871
872 872 if (dump_opt['D'] > 1) {
873 873 (void) printf("DDT histogram (aggregated over all DDTs):\n");
874 874 ddt_get_dedup_histogram(spa, &ddh_total);
875 875 zpool_dump_ddt(&dds_total, &ddh_total);
876 876 }
877 877
878 878 dump_dedup_ratio(&dds_total);
879 879 }
880 880
881 881 static void
882 882 dump_dtl_seg(void *arg, uint64_t start, uint64_t size)
883 883 {
884 884 char *prefix = arg;
885 885
886 886 (void) printf("%s [%llu,%llu) length %llu\n",
887 887 prefix,
888 888 (u_longlong_t)start,
889 889 (u_longlong_t)(start + size),
890 890 (u_longlong_t)(size));
891 891 }
892 892
893 893 static void
894 894 dump_dtl(vdev_t *vd, int indent)
895 895 {
896 896 spa_t *spa = vd->vdev_spa;
897 897 boolean_t required;
898 898 char *name[DTL_TYPES] = { "missing", "partial", "scrub", "outage" };
899 899 char prefix[256];
900 900
901 901 spa_vdev_state_enter(spa, SCL_NONE);
902 902 required = vdev_dtl_required(vd);
903 903 (void) spa_vdev_state_exit(spa, NULL, 0);
904 904
905 905 if (indent == 0)
906 906 (void) printf("\nDirty time logs:\n\n");
907 907
908 908 (void) printf("\t%*s%s [%s]\n", indent, "",
909 909 vd->vdev_path ? vd->vdev_path :
910 910 vd->vdev_parent ? vd->vdev_ops->vdev_op_type : spa_name(spa),
911 911 required ? "DTL-required" : "DTL-expendable");
912 912
913 913 for (int t = 0; t < DTL_TYPES; t++) {
914 914 range_tree_t *rt = vd->vdev_dtl[t];
915 915 if (range_tree_space(rt) == 0)
916 916 continue;
917 917 (void) snprintf(prefix, sizeof (prefix), "\t%*s%s",
918 918 indent + 2, "", name[t]);
919 919 mutex_enter(rt->rt_lock);
920 920 range_tree_walk(rt, dump_dtl_seg, prefix);
921 921 mutex_exit(rt->rt_lock);
922 922 if (dump_opt['d'] > 5 && vd->vdev_children == 0)
923 923 dump_spacemap(spa->spa_meta_objset, vd->vdev_dtl_sm);
924 924 }
925 925
926 926 for (int c = 0; c < vd->vdev_children; c++)
927 927 dump_dtl(vd->vdev_child[c], indent + 4);
928 928 }
929 929
930 930 static void
931 931 dump_history(spa_t *spa)
932 932 {
933 933 nvlist_t **events = NULL;
934 934 char buf[SPA_MAXBLOCKSIZE];
935 935 uint64_t resid, len, off = 0;
936 936 uint_t num = 0;
937 937 int error;
938 938 time_t tsec;
939 939 struct tm t;
940 940 char tbuf[30];
941 941 char internalstr[MAXPATHLEN];
942 942
943 943 do {
944 944 len = sizeof (buf);
945 945
946 946 if ((error = spa_history_get(spa, &off, &len, buf)) != 0) {
947 947 (void) fprintf(stderr, "Unable to read history: "
948 948 "error %d\n", error);
949 949 return;
950 950 }
951 951
952 952 if (zpool_history_unpack(buf, len, &resid, &events, &num) != 0)
953 953 break;
954 954
955 955 off -= resid;
956 956 } while (len != 0);
957 957
958 958 (void) printf("\nHistory:\n");
959 959 for (int i = 0; i < num; i++) {
960 960 uint64_t time, txg, ievent;
961 961 char *cmd, *intstr;
962 962 boolean_t printed = B_FALSE;
963 963
964 964 if (nvlist_lookup_uint64(events[i], ZPOOL_HIST_TIME,
965 965 &time) != 0)
966 966 goto next;
967 967 if (nvlist_lookup_string(events[i], ZPOOL_HIST_CMD,
968 968 &cmd) != 0) {
969 969 if (nvlist_lookup_uint64(events[i],
970 970 ZPOOL_HIST_INT_EVENT, &ievent) != 0)
971 971 goto next;
972 972 verify(nvlist_lookup_uint64(events[i],
973 973 ZPOOL_HIST_TXG, &txg) == 0);
974 974 verify(nvlist_lookup_string(events[i],
975 975 ZPOOL_HIST_INT_STR, &intstr) == 0);
976 976 if (ievent >= ZFS_NUM_LEGACY_HISTORY_EVENTS)
977 977 goto next;
978 978
979 979 (void) snprintf(internalstr,
980 980 sizeof (internalstr),
981 981 "[internal %s txg:%lld] %s",
982 982 zfs_history_event_names[ievent], txg,
983 983 intstr);
984 984 cmd = internalstr;
985 985 }
986 986 tsec = time;
987 987 (void) localtime_r(&tsec, &t);
988 988 (void) strftime(tbuf, sizeof (tbuf), "%F.%T", &t);
989 989 (void) printf("%s %s\n", tbuf, cmd);
990 990 printed = B_TRUE;
991 991
992 992 next:
993 993 if (dump_opt['h'] > 1) {
994 994 if (!printed)
995 995 (void) printf("unrecognized record:\n");
996 996 dump_nvlist(events[i], 2);
997 997 }
998 998 }
999 999 }
1000 1000
1001 1001 /*ARGSUSED*/
1002 1002 static void
1003 1003 dump_dnode(objset_t *os, uint64_t object, void *data, size_t size)
1004 1004 {
1005 1005 }
1006 1006
1007 1007 static uint64_t
1008 1008 blkid2offset(const dnode_phys_t *dnp, const blkptr_t *bp, const zbookmark_t *zb)
1009 1009 {
1010 1010 if (dnp == NULL) {
1011 1011 ASSERT(zb->zb_level < 0);
1012 1012 if (zb->zb_object == 0)
1013 1013 return (zb->zb_blkid);
1014 1014 return (zb->zb_blkid * BP_GET_LSIZE(bp));
1015 1015 }
1016 1016
1017 1017 ASSERT(zb->zb_level >= 0);
1018 1018
1019 1019 return ((zb->zb_blkid <<
1020 1020 (zb->zb_level * (dnp->dn_indblkshift - SPA_BLKPTRSHIFT))) *
1021 1021 dnp->dn_datablkszsec << SPA_MINBLOCKSHIFT);
1022 1022 }
1023 1023
1024 1024 static void
1025 1025 snprintf_blkptr_compact(char *blkbuf, size_t buflen, const blkptr_t *bp)
1026 1026 {
1027 1027 const dva_t *dva = bp->blk_dva;
1028 1028 int ndvas = dump_opt['d'] > 5 ? BP_GET_NDVAS(bp) : 1;
1029 1029
1030 1030 if (dump_opt['b'] >= 6) {
1031 1031 snprintf_blkptr(blkbuf, buflen, bp);
1032 1032 return;
1033 1033 }
1034 1034
1035 1035 blkbuf[0] = '\0';
1036 1036
1037 1037 for (int i = 0; i < ndvas; i++)
1038 1038 (void) snprintf(blkbuf + strlen(blkbuf),
1039 1039 buflen - strlen(blkbuf), "%llu:%llx:%llx ",
1040 1040 (u_longlong_t)DVA_GET_VDEV(&dva[i]),
1041 1041 (u_longlong_t)DVA_GET_OFFSET(&dva[i]),
1042 1042 (u_longlong_t)DVA_GET_ASIZE(&dva[i]));
1043 1043
1044 1044 if (BP_IS_HOLE(bp)) {
1045 1045 (void) snprintf(blkbuf + strlen(blkbuf),
1046 1046 buflen - strlen(blkbuf), "B=%llu",
1047 1047 (u_longlong_t)bp->blk_birth);
1048 1048 } else {
1049 1049 (void) snprintf(blkbuf + strlen(blkbuf),
1050 1050 buflen - strlen(blkbuf),
1051 1051 "%llxL/%llxP F=%llu B=%llu/%llu",
1052 1052 (u_longlong_t)BP_GET_LSIZE(bp),
1053 1053 (u_longlong_t)BP_GET_PSIZE(bp),
1054 1054 (u_longlong_t)bp->blk_fill,
1055 1055 (u_longlong_t)bp->blk_birth,
1056 1056 (u_longlong_t)BP_PHYSICAL_BIRTH(bp));
1057 1057 }
1058 1058 }
1059 1059
1060 1060 static void
1061 1061 print_indirect(blkptr_t *bp, const zbookmark_t *zb,
1062 1062 const dnode_phys_t *dnp)
1063 1063 {
1064 1064 char blkbuf[BP_SPRINTF_LEN];
1065 1065 int l;
1066 1066
1067 1067 ASSERT3U(BP_GET_TYPE(bp), ==, dnp->dn_type);
1068 1068 ASSERT3U(BP_GET_LEVEL(bp), ==, zb->zb_level);
1069 1069
1070 1070 (void) printf("%16llx ", (u_longlong_t)blkid2offset(dnp, bp, zb));
1071 1071
1072 1072 ASSERT(zb->zb_level >= 0);
1073 1073
1074 1074 for (l = dnp->dn_nlevels - 1; l >= -1; l--) {
1075 1075 if (l == zb->zb_level) {
1076 1076 (void) printf("L%llx", (u_longlong_t)zb->zb_level);
1077 1077 } else {
1078 1078 (void) printf(" ");
1079 1079 }
1080 1080 }
1081 1081
1082 1082 snprintf_blkptr_compact(blkbuf, sizeof (blkbuf), bp);
1083 1083 (void) printf("%s\n", blkbuf);
1084 1084 }
1085 1085
1086 1086 static int
1087 1087 visit_indirect(spa_t *spa, const dnode_phys_t *dnp,
1088 1088 blkptr_t *bp, const zbookmark_t *zb)
1089 1089 {
1090 1090 int err = 0;
1091 1091
1092 1092 if (bp->blk_birth == 0)
1093 1093 return (0);
1094 1094
1095 1095 print_indirect(bp, zb, dnp);
1096 1096
1097 1097 if (BP_GET_LEVEL(bp) > 0 && !BP_IS_HOLE(bp)) {
1098 1098 uint32_t flags = ARC_WAIT;
1099 1099 int i;
1100 1100 blkptr_t *cbp;
1101 1101 int epb = BP_GET_LSIZE(bp) >> SPA_BLKPTRSHIFT;
1102 1102 arc_buf_t *buf;
1103 1103 uint64_t fill = 0;
1104 1104
1105 1105 err = arc_read(NULL, spa, bp, arc_getbuf_func, &buf,
1106 1106 ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL, &flags, zb);
1107 1107 if (err)
1108 1108 return (err);
1109 1109 ASSERT(buf->b_data);
1110 1110
1111 1111 /* recursively visit blocks below this */
1112 1112 cbp = buf->b_data;
1113 1113 for (i = 0; i < epb; i++, cbp++) {
1114 1114 zbookmark_t czb;
1115 1115
1116 1116 SET_BOOKMARK(&czb, zb->zb_objset, zb->zb_object,
1117 1117 zb->zb_level - 1,
1118 1118 zb->zb_blkid * epb + i);
1119 1119 err = visit_indirect(spa, dnp, cbp, &czb);
1120 1120 if (err)
1121 1121 break;
1122 1122 fill += cbp->blk_fill;
1123 1123 }
1124 1124 if (!err)
1125 1125 ASSERT3U(fill, ==, bp->blk_fill);
1126 1126 (void) arc_buf_remove_ref(buf, &buf);
1127 1127 }
1128 1128
1129 1129 return (err);
1130 1130 }
1131 1131
1132 1132 /*ARGSUSED*/
1133 1133 static void
1134 1134 dump_indirect(dnode_t *dn)
1135 1135 {
1136 1136 dnode_phys_t *dnp = dn->dn_phys;
1137 1137 int j;
1138 1138 zbookmark_t czb;
1139 1139
1140 1140 (void) printf("Indirect blocks:\n");
1141 1141
1142 1142 SET_BOOKMARK(&czb, dmu_objset_id(dn->dn_objset),
1143 1143 dn->dn_object, dnp->dn_nlevels - 1, 0);
1144 1144 for (j = 0; j < dnp->dn_nblkptr; j++) {
1145 1145 czb.zb_blkid = j;
1146 1146 (void) visit_indirect(dmu_objset_spa(dn->dn_objset), dnp,
1147 1147 &dnp->dn_blkptr[j], &czb);
1148 1148 }
1149 1149
1150 1150 (void) printf("\n");
1151 1151 }
1152 1152
1153 1153 /*ARGSUSED*/
1154 1154 static void
1155 1155 dump_dsl_dir(objset_t *os, uint64_t object, void *data, size_t size)
1156 1156 {
1157 1157 dsl_dir_phys_t *dd = data;
1158 1158 time_t crtime;
1159 1159 char nice[32];
1160 1160
1161 1161 if (dd == NULL)
1162 1162 return;
1163 1163
1164 1164 ASSERT3U(size, >=, sizeof (dsl_dir_phys_t));
1165 1165
1166 1166 crtime = dd->dd_creation_time;
1167 1167 (void) printf("\t\tcreation_time = %s", ctime(&crtime));
1168 1168 (void) printf("\t\thead_dataset_obj = %llu\n",
1169 1169 (u_longlong_t)dd->dd_head_dataset_obj);
1170 1170 (void) printf("\t\tparent_dir_obj = %llu\n",
1171 1171 (u_longlong_t)dd->dd_parent_obj);
1172 1172 (void) printf("\t\torigin_obj = %llu\n",
1173 1173 (u_longlong_t)dd->dd_origin_obj);
1174 1174 (void) printf("\t\tchild_dir_zapobj = %llu\n",
1175 1175 (u_longlong_t)dd->dd_child_dir_zapobj);
1176 1176 zdb_nicenum(dd->dd_used_bytes, nice);
1177 1177 (void) printf("\t\tused_bytes = %s\n", nice);
1178 1178 zdb_nicenum(dd->dd_compressed_bytes, nice);
1179 1179 (void) printf("\t\tcompressed_bytes = %s\n", nice);
1180 1180 zdb_nicenum(dd->dd_uncompressed_bytes, nice);
1181 1181 (void) printf("\t\tuncompressed_bytes = %s\n", nice);
1182 1182 zdb_nicenum(dd->dd_quota, nice);
1183 1183 (void) printf("\t\tquota = %s\n", nice);
1184 1184 zdb_nicenum(dd->dd_reserved, nice);
1185 1185 (void) printf("\t\treserved = %s\n", nice);
1186 1186 (void) printf("\t\tprops_zapobj = %llu\n",
1187 1187 (u_longlong_t)dd->dd_props_zapobj);
1188 1188 (void) printf("\t\tdeleg_zapobj = %llu\n",
1189 1189 (u_longlong_t)dd->dd_deleg_zapobj);
1190 1190 (void) printf("\t\tflags = %llx\n",
1191 1191 (u_longlong_t)dd->dd_flags);
1192 1192
1193 1193 #define DO(which) \
1194 1194 zdb_nicenum(dd->dd_used_breakdown[DD_USED_ ## which], nice); \
1195 1195 (void) printf("\t\tused_breakdown[" #which "] = %s\n", nice)
1196 1196 DO(HEAD);
1197 1197 DO(SNAP);
1198 1198 DO(CHILD);
1199 1199 DO(CHILD_RSRV);
1200 1200 DO(REFRSRV);
1201 1201 #undef DO
1202 1202 }
1203 1203
1204 1204 /*ARGSUSED*/
1205 1205 static void
1206 1206 dump_dsl_dataset(objset_t *os, uint64_t object, void *data, size_t size)
1207 1207 {
1208 1208 dsl_dataset_phys_t *ds = data;
1209 1209 time_t crtime;
1210 1210 char used[32], compressed[32], uncompressed[32], unique[32];
1211 1211 char blkbuf[BP_SPRINTF_LEN];
1212 1212
1213 1213 if (ds == NULL)
1214 1214 return;
1215 1215
1216 1216 ASSERT(size == sizeof (*ds));
1217 1217 crtime = ds->ds_creation_time;
1218 1218 zdb_nicenum(ds->ds_referenced_bytes, used);
1219 1219 zdb_nicenum(ds->ds_compressed_bytes, compressed);
1220 1220 zdb_nicenum(ds->ds_uncompressed_bytes, uncompressed);
1221 1221 zdb_nicenum(ds->ds_unique_bytes, unique);
1222 1222 snprintf_blkptr(blkbuf, sizeof (blkbuf), &ds->ds_bp);
1223 1223
1224 1224 (void) printf("\t\tdir_obj = %llu\n",
1225 1225 (u_longlong_t)ds->ds_dir_obj);
1226 1226 (void) printf("\t\tprev_snap_obj = %llu\n",
1227 1227 (u_longlong_t)ds->ds_prev_snap_obj);
1228 1228 (void) printf("\t\tprev_snap_txg = %llu\n",
1229 1229 (u_longlong_t)ds->ds_prev_snap_txg);
1230 1230 (void) printf("\t\tnext_snap_obj = %llu\n",
1231 1231 (u_longlong_t)ds->ds_next_snap_obj);
1232 1232 (void) printf("\t\tsnapnames_zapobj = %llu\n",
1233 1233 (u_longlong_t)ds->ds_snapnames_zapobj);
1234 1234 (void) printf("\t\tnum_children = %llu\n",
1235 1235 (u_longlong_t)ds->ds_num_children);
1236 1236 (void) printf("\t\tuserrefs_obj = %llu\n",
1237 1237 (u_longlong_t)ds->ds_userrefs_obj);
1238 1238 (void) printf("\t\tcreation_time = %s", ctime(&crtime));
1239 1239 (void) printf("\t\tcreation_txg = %llu\n",
1240 1240 (u_longlong_t)ds->ds_creation_txg);
1241 1241 (void) printf("\t\tdeadlist_obj = %llu\n",
1242 1242 (u_longlong_t)ds->ds_deadlist_obj);
1243 1243 (void) printf("\t\tused_bytes = %s\n", used);
1244 1244 (void) printf("\t\tcompressed_bytes = %s\n", compressed);
1245 1245 (void) printf("\t\tuncompressed_bytes = %s\n", uncompressed);
1246 1246 (void) printf("\t\tunique = %s\n", unique);
1247 1247 (void) printf("\t\tfsid_guid = %llu\n",
1248 1248 (u_longlong_t)ds->ds_fsid_guid);
1249 1249 (void) printf("\t\tguid = %llu\n",
1250 1250 (u_longlong_t)ds->ds_guid);
1251 1251 (void) printf("\t\tflags = %llx\n",
1252 1252 (u_longlong_t)ds->ds_flags);
1253 1253 (void) printf("\t\tnext_clones_obj = %llu\n",
1254 1254 (u_longlong_t)ds->ds_next_clones_obj);
1255 1255 (void) printf("\t\tprops_obj = %llu\n",
1256 1256 (u_longlong_t)ds->ds_props_obj);
1257 1257 (void) printf("\t\tbp = %s\n", blkbuf);
1258 1258 }
1259 1259
1260 1260 /* ARGSUSED */
1261 1261 static int
1262 1262 dump_bptree_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx)
1263 1263 {
1264 1264 char blkbuf[BP_SPRINTF_LEN];
1265 1265
1266 1266 if (bp->blk_birth != 0) {
1267 1267 snprintf_blkptr(blkbuf, sizeof (blkbuf), bp);
1268 1268 (void) printf("\t%s\n", blkbuf);
1269 1269 }
1270 1270 return (0);
1271 1271 }
1272 1272
1273 1273 static void
1274 1274 dump_bptree(objset_t *os, uint64_t obj, char *name)
1275 1275 {
1276 1276 char bytes[32];
1277 1277 bptree_phys_t *bt;
1278 1278 dmu_buf_t *db;
1279 1279
1280 1280 if (dump_opt['d'] < 3)
1281 1281 return;
1282 1282
1283 1283 VERIFY3U(0, ==, dmu_bonus_hold(os, obj, FTAG, &db));
1284 1284 bt = db->db_data;
1285 1285 zdb_nicenum(bt->bt_bytes, bytes);
1286 1286 (void) printf("\n %s: %llu datasets, %s\n",
1287 1287 name, (unsigned long long)(bt->bt_end - bt->bt_begin), bytes);
1288 1288 dmu_buf_rele(db, FTAG);
1289 1289
1290 1290 if (dump_opt['d'] < 5)
1291 1291 return;
1292 1292
1293 1293 (void) printf("\n");
1294 1294
1295 1295 (void) bptree_iterate(os, obj, B_FALSE, dump_bptree_cb, NULL, NULL);
1296 1296 }
1297 1297
1298 1298 /* ARGSUSED */
1299 1299 static int
1300 1300 dump_bpobj_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx)
1301 1301 {
1302 1302 char blkbuf[BP_SPRINTF_LEN];
1303 1303
1304 1304 ASSERT(bp->blk_birth != 0);
1305 1305 snprintf_blkptr_compact(blkbuf, sizeof (blkbuf), bp);
1306 1306 (void) printf("\t%s\n", blkbuf);
1307 1307 return (0);
1308 1308 }
1309 1309
1310 1310 static void
1311 1311 dump_bpobj(bpobj_t *bpo, char *name, int indent)
1312 1312 {
1313 1313 char bytes[32];
1314 1314 char comp[32];
1315 1315 char uncomp[32];
1316 1316
1317 1317 if (dump_opt['d'] < 3)
1318 1318 return;
1319 1319
1320 1320 zdb_nicenum(bpo->bpo_phys->bpo_bytes, bytes);
1321 1321 if (bpo->bpo_havesubobj && bpo->bpo_phys->bpo_subobjs != 0) {
1322 1322 zdb_nicenum(bpo->bpo_phys->bpo_comp, comp);
1323 1323 zdb_nicenum(bpo->bpo_phys->bpo_uncomp, uncomp);
1324 1324 (void) printf(" %*s: object %llu, %llu local blkptrs, "
1325 1325 "%llu subobjs, %s (%s/%s comp)\n",
1326 1326 indent * 8, name,
1327 1327 (u_longlong_t)bpo->bpo_object,
1328 1328 (u_longlong_t)bpo->bpo_phys->bpo_num_blkptrs,
1329 1329 (u_longlong_t)bpo->bpo_phys->bpo_num_subobjs,
1330 1330 bytes, comp, uncomp);
1331 1331
1332 1332 for (uint64_t i = 0; i < bpo->bpo_phys->bpo_num_subobjs; i++) {
1333 1333 uint64_t subobj;
1334 1334 bpobj_t subbpo;
1335 1335 int error;
1336 1336 VERIFY0(dmu_read(bpo->bpo_os,
1337 1337 bpo->bpo_phys->bpo_subobjs,
1338 1338 i * sizeof (subobj), sizeof (subobj), &subobj, 0));
1339 1339 error = bpobj_open(&subbpo, bpo->bpo_os, subobj);
1340 1340 if (error != 0) {
1341 1341 (void) printf("ERROR %u while trying to open "
1342 1342 "subobj id %llu\n",
1343 1343 error, (u_longlong_t)subobj);
1344 1344 continue;
1345 1345 }
1346 1346 dump_bpobj(&subbpo, "subobj", indent + 1);
1347 1347 bpobj_close(&subbpo);
1348 1348 }
1349 1349 } else {
1350 1350 (void) printf(" %*s: object %llu, %llu blkptrs, %s\n",
1351 1351 indent * 8, name,
1352 1352 (u_longlong_t)bpo->bpo_object,
1353 1353 (u_longlong_t)bpo->bpo_phys->bpo_num_blkptrs,
1354 1354 bytes);
1355 1355 }
1356 1356
1357 1357 if (dump_opt['d'] < 5)
1358 1358 return;
1359 1359
1360 1360
1361 1361 if (indent == 0) {
1362 1362 (void) bpobj_iterate_nofree(bpo, dump_bpobj_cb, NULL, NULL);
1363 1363 (void) printf("\n");
1364 1364 }
1365 1365 }
1366 1366
1367 1367 static void
1368 1368 dump_deadlist(dsl_deadlist_t *dl)
1369 1369 {
1370 1370 dsl_deadlist_entry_t *dle;
1371 1371 uint64_t unused;
1372 1372 char bytes[32];
1373 1373 char comp[32];
1374 1374 char uncomp[32];
1375 1375
1376 1376 if (dump_opt['d'] < 3)
1377 1377 return;
1378 1378
1379 1379 zdb_nicenum(dl->dl_phys->dl_used, bytes);
1380 1380 zdb_nicenum(dl->dl_phys->dl_comp, comp);
1381 1381 zdb_nicenum(dl->dl_phys->dl_uncomp, uncomp);
1382 1382 (void) printf("\n Deadlist: %s (%s/%s comp)\n",
1383 1383 bytes, comp, uncomp);
1384 1384
1385 1385 if (dump_opt['d'] < 4)
1386 1386 return;
1387 1387
1388 1388 (void) printf("\n");
1389 1389
1390 1390 /* force the tree to be loaded */
1391 1391 dsl_deadlist_space_range(dl, 0, UINT64_MAX, &unused, &unused, &unused);
1392 1392
1393 1393 for (dle = avl_first(&dl->dl_tree); dle;
1394 1394 dle = AVL_NEXT(&dl->dl_tree, dle)) {
1395 1395 if (dump_opt['d'] >= 5) {
1396 1396 char buf[128];
1397 1397 (void) snprintf(buf, sizeof (buf), "mintxg %llu -> ",
1398 1398 (longlong_t)dle->dle_mintxg,
1399 1399 (longlong_t)dle->dle_bpobj.bpo_object);
1400 1400
1401 1401 dump_bpobj(&dle->dle_bpobj, buf, 0);
1402 1402 } else {
1403 1403 (void) printf("mintxg %llu -> obj %llu\n",
1404 1404 (longlong_t)dle->dle_mintxg,
1405 1405 (longlong_t)dle->dle_bpobj.bpo_object);
1406 1406
1407 1407 }
1408 1408 }
1409 1409 }
1410 1410
1411 1411 static avl_tree_t idx_tree;
1412 1412 static avl_tree_t domain_tree;
1413 1413 static boolean_t fuid_table_loaded;
1414 1414 static boolean_t sa_loaded;
1415 1415 sa_attr_type_t *sa_attr_table;
1416 1416
1417 1417 static void
1418 1418 fuid_table_destroy()
1419 1419 {
1420 1420 if (fuid_table_loaded) {
1421 1421 zfs_fuid_table_destroy(&idx_tree, &domain_tree);
1422 1422 fuid_table_loaded = B_FALSE;
1423 1423 }
1424 1424 }
1425 1425
1426 1426 /*
1427 1427 * print uid or gid information.
1428 1428 * For normal POSIX id just the id is printed in decimal format.
1429 1429 * For CIFS files with FUID the fuid is printed in hex followed by
1430 1430 * the domain-rid string.
1431 1431 */
1432 1432 static void
1433 1433 print_idstr(uint64_t id, const char *id_type)
1434 1434 {
1435 1435 if (FUID_INDEX(id)) {
1436 1436 char *domain;
1437 1437
1438 1438 domain = zfs_fuid_idx_domain(&idx_tree, FUID_INDEX(id));
1439 1439 (void) printf("\t%s %llx [%s-%d]\n", id_type,
1440 1440 (u_longlong_t)id, domain, (int)FUID_RID(id));
1441 1441 } else {
1442 1442 (void) printf("\t%s %llu\n", id_type, (u_longlong_t)id);
1443 1443 }
1444 1444
1445 1445 }
1446 1446
1447 1447 static void
1448 1448 dump_uidgid(objset_t *os, uint64_t uid, uint64_t gid)
1449 1449 {
1450 1450 uint32_t uid_idx, gid_idx;
1451 1451
1452 1452 uid_idx = FUID_INDEX(uid);
1453 1453 gid_idx = FUID_INDEX(gid);
1454 1454
1455 1455 /* Load domain table, if not already loaded */
1456 1456 if (!fuid_table_loaded && (uid_idx || gid_idx)) {
1457 1457 uint64_t fuid_obj;
1458 1458
1459 1459 /* first find the fuid object. It lives in the master node */
1460 1460 VERIFY(zap_lookup(os, MASTER_NODE_OBJ, ZFS_FUID_TABLES,
1461 1461 8, 1, &fuid_obj) == 0);
1462 1462 zfs_fuid_avl_tree_create(&idx_tree, &domain_tree);
1463 1463 (void) zfs_fuid_table_load(os, fuid_obj,
1464 1464 &idx_tree, &domain_tree);
1465 1465 fuid_table_loaded = B_TRUE;
1466 1466 }
1467 1467
1468 1468 print_idstr(uid, "uid");
1469 1469 print_idstr(gid, "gid");
1470 1470 }
1471 1471
1472 1472 /*ARGSUSED*/
1473 1473 static void
1474 1474 dump_znode(objset_t *os, uint64_t object, void *data, size_t size)
1475 1475 {
1476 1476 char path[MAXPATHLEN * 2]; /* allow for xattr and failure prefix */
1477 1477 sa_handle_t *hdl;
1478 1478 uint64_t xattr, rdev, gen;
1479 1479 uint64_t uid, gid, mode, fsize, parent, links;
1480 1480 uint64_t pflags;
1481 1481 uint64_t acctm[2], modtm[2], chgtm[2], crtm[2];
1482 1482 time_t z_crtime, z_atime, z_mtime, z_ctime;
1483 1483 sa_bulk_attr_t bulk[12];
1484 1484 int idx = 0;
1485 1485 int error;
1486 1486
1487 1487 if (!sa_loaded) {
1488 1488 uint64_t sa_attrs = 0;
1489 1489 uint64_t version;
1490 1490
1491 1491 VERIFY(zap_lookup(os, MASTER_NODE_OBJ, ZPL_VERSION_STR,
1492 1492 8, 1, &version) == 0);
1493 1493 if (version >= ZPL_VERSION_SA) {
1494 1494 VERIFY(zap_lookup(os, MASTER_NODE_OBJ, ZFS_SA_ATTRS,
1495 1495 8, 1, &sa_attrs) == 0);
1496 1496 }
1497 1497 if ((error = sa_setup(os, sa_attrs, zfs_attr_table,
1498 1498 ZPL_END, &sa_attr_table)) != 0) {
1499 1499 (void) printf("sa_setup failed errno %d, can't "
1500 1500 "display znode contents\n", error);
1501 1501 return;
1502 1502 }
1503 1503 sa_loaded = B_TRUE;
1504 1504 }
1505 1505
1506 1506 if (sa_handle_get(os, object, NULL, SA_HDL_PRIVATE, &hdl)) {
1507 1507 (void) printf("Failed to get handle for SA znode\n");
1508 1508 return;
1509 1509 }
1510 1510
1511 1511 SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_UID], NULL, &uid, 8);
1512 1512 SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_GID], NULL, &gid, 8);
1513 1513 SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_LINKS], NULL,
1514 1514 &links, 8);
1515 1515 SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_GEN], NULL, &gen, 8);
1516 1516 SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_MODE], NULL,
1517 1517 &mode, 8);
1518 1518 SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_PARENT],
1519 1519 NULL, &parent, 8);
1520 1520 SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_SIZE], NULL,
1521 1521 &fsize, 8);
1522 1522 SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_ATIME], NULL,
1523 1523 acctm, 16);
1524 1524 SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_MTIME], NULL,
1525 1525 modtm, 16);
1526 1526 SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_CRTIME], NULL,
1527 1527 crtm, 16);
1528 1528 SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_CTIME], NULL,
1529 1529 chgtm, 16);
1530 1530 SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_FLAGS], NULL,
1531 1531 &pflags, 8);
1532 1532
1533 1533 if (sa_bulk_lookup(hdl, bulk, idx)) {
1534 1534 (void) sa_handle_destroy(hdl);
1535 1535 return;
1536 1536 }
1537 1537
1538 1538 error = zfs_obj_to_path(os, object, path, sizeof (path));
1539 1539 if (error != 0) {
1540 1540 (void) snprintf(path, sizeof (path), "\?\?\?<object#%llu>",
1541 1541 (u_longlong_t)object);
1542 1542 }
1543 1543 if (dump_opt['d'] < 3) {
1544 1544 (void) printf("\t%s\n", path);
1545 1545 (void) sa_handle_destroy(hdl);
1546 1546 return;
1547 1547 }
1548 1548
1549 1549 z_crtime = (time_t)crtm[0];
1550 1550 z_atime = (time_t)acctm[0];
1551 1551 z_mtime = (time_t)modtm[0];
1552 1552 z_ctime = (time_t)chgtm[0];
1553 1553
1554 1554 (void) printf("\tpath %s\n", path);
1555 1555 dump_uidgid(os, uid, gid);
1556 1556 (void) printf("\tatime %s", ctime(&z_atime));
1557 1557 (void) printf("\tmtime %s", ctime(&z_mtime));
1558 1558 (void) printf("\tctime %s", ctime(&z_ctime));
1559 1559 (void) printf("\tcrtime %s", ctime(&z_crtime));
1560 1560 (void) printf("\tgen %llu\n", (u_longlong_t)gen);
1561 1561 (void) printf("\tmode %llo\n", (u_longlong_t)mode);
1562 1562 (void) printf("\tsize %llu\n", (u_longlong_t)fsize);
1563 1563 (void) printf("\tparent %llu\n", (u_longlong_t)parent);
1564 1564 (void) printf("\tlinks %llu\n", (u_longlong_t)links);
1565 1565 (void) printf("\tpflags %llx\n", (u_longlong_t)pflags);
1566 1566 if (sa_lookup(hdl, sa_attr_table[ZPL_XATTR], &xattr,
1567 1567 sizeof (uint64_t)) == 0)
1568 1568 (void) printf("\txattr %llu\n", (u_longlong_t)xattr);
1569 1569 if (sa_lookup(hdl, sa_attr_table[ZPL_RDEV], &rdev,
1570 1570 sizeof (uint64_t)) == 0)
1571 1571 (void) printf("\trdev 0x%016llx\n", (u_longlong_t)rdev);
1572 1572 sa_handle_destroy(hdl);
1573 1573 }
1574 1574
1575 1575 /*ARGSUSED*/
1576 1576 static void
1577 1577 dump_acl(objset_t *os, uint64_t object, void *data, size_t size)
1578 1578 {
1579 1579 }
1580 1580
1581 1581 /*ARGSUSED*/
1582 1582 static void
1583 1583 dump_dmu_objset(objset_t *os, uint64_t object, void *data, size_t size)
1584 1584 {
1585 1585 }
1586 1586
1587 1587 static object_viewer_t *object_viewer[DMU_OT_NUMTYPES + 1] = {
1588 1588 dump_none, /* unallocated */
1589 1589 dump_zap, /* object directory */
1590 1590 dump_uint64, /* object array */
1591 1591 dump_none, /* packed nvlist */
1592 1592 dump_packed_nvlist, /* packed nvlist size */
1593 1593 dump_none, /* bplist */
1594 1594 dump_none, /* bplist header */
1595 1595 dump_none, /* SPA space map header */
1596 1596 dump_none, /* SPA space map */
1597 1597 dump_none, /* ZIL intent log */
1598 1598 dump_dnode, /* DMU dnode */
1599 1599 dump_dmu_objset, /* DMU objset */
1600 1600 dump_dsl_dir, /* DSL directory */
1601 1601 dump_zap, /* DSL directory child map */
1602 1602 dump_zap, /* DSL dataset snap map */
1603 1603 dump_zap, /* DSL props */
1604 1604 dump_dsl_dataset, /* DSL dataset */
1605 1605 dump_znode, /* ZFS znode */
1606 1606 dump_acl, /* ZFS V0 ACL */
1607 1607 dump_uint8, /* ZFS plain file */
1608 1608 dump_zpldir, /* ZFS directory */
1609 1609 dump_zap, /* ZFS master node */
1610 1610 dump_zap, /* ZFS delete queue */
1611 1611 dump_uint8, /* zvol object */
1612 1612 dump_zap, /* zvol prop */
1613 1613 dump_uint8, /* other uint8[] */
1614 1614 dump_uint64, /* other uint64[] */
1615 1615 dump_zap, /* other ZAP */
1616 1616 dump_zap, /* persistent error log */
1617 1617 dump_uint8, /* SPA history */
1618 1618 dump_history_offsets, /* SPA history offsets */
1619 1619 dump_zap, /* Pool properties */
1620 1620 dump_zap, /* DSL permissions */
1621 1621 dump_acl, /* ZFS ACL */
1622 1622 dump_uint8, /* ZFS SYSACL */
1623 1623 dump_none, /* FUID nvlist */
1624 1624 dump_packed_nvlist, /* FUID nvlist size */
1625 1625 dump_zap, /* DSL dataset next clones */
1626 1626 dump_zap, /* DSL scrub queue */
1627 1627 dump_zap, /* ZFS user/group used */
1628 1628 dump_zap, /* ZFS user/group quota */
1629 1629 dump_zap, /* snapshot refcount tags */
1630 1630 dump_ddt_zap, /* DDT ZAP object */
1631 1631 dump_zap, /* DDT statistics */
1632 1632 dump_znode, /* SA object */
1633 1633 dump_zap, /* SA Master Node */
1634 1634 dump_sa_attrs, /* SA attribute registration */
1635 1635 dump_sa_layouts, /* SA attribute layouts */
1636 1636 dump_zap, /* DSL scrub translations */
1637 1637 dump_none, /* fake dedup BP */
1638 1638 dump_zap, /* deadlist */
1639 1639 dump_none, /* deadlist hdr */
1640 1640 dump_zap, /* dsl clones */
1641 1641 dump_none, /* bpobj subobjs */
1642 1642 dump_unknown, /* Unknown type, must be last */
1643 1643 };
1644 1644
1645 1645 static void
1646 1646 dump_object(objset_t *os, uint64_t object, int verbosity, int *print_header)
1647 1647 {
1648 1648 dmu_buf_t *db = NULL;
1649 1649 dmu_object_info_t doi;
1650 1650 dnode_t *dn;
1651 1651 void *bonus = NULL;
1652 1652 size_t bsize = 0;
1653 1653 char iblk[32], dblk[32], lsize[32], asize[32], fill[32];
1654 1654 char bonus_size[32];
1655 1655 char aux[50];
1656 1656 int error;
1657 1657
1658 1658 if (*print_header) {
1659 1659 (void) printf("\n%10s %3s %5s %5s %5s %5s %6s %s\n",
1660 1660 "Object", "lvl", "iblk", "dblk", "dsize", "lsize",
1661 1661 "%full", "type");
1662 1662 *print_header = 0;
1663 1663 }
1664 1664
1665 1665 if (object == 0) {
1666 1666 dn = DMU_META_DNODE(os);
1667 1667 } else {
1668 1668 error = dmu_bonus_hold(os, object, FTAG, &db);
1669 1669 if (error)
1670 1670 fatal("dmu_bonus_hold(%llu) failed, errno %u",
1671 1671 object, error);
1672 1672 bonus = db->db_data;
1673 1673 bsize = db->db_size;
1674 1674 dn = DB_DNODE((dmu_buf_impl_t *)db);
1675 1675 }
1676 1676 dmu_object_info_from_dnode(dn, &doi);
1677 1677
1678 1678 zdb_nicenum(doi.doi_metadata_block_size, iblk);
1679 1679 zdb_nicenum(doi.doi_data_block_size, dblk);
1680 1680 zdb_nicenum(doi.doi_max_offset, lsize);
1681 1681 zdb_nicenum(doi.doi_physical_blocks_512 << 9, asize);
1682 1682 zdb_nicenum(doi.doi_bonus_size, bonus_size);
1683 1683 (void) sprintf(fill, "%6.2f", 100.0 * doi.doi_fill_count *
1684 1684 doi.doi_data_block_size / (object == 0 ? DNODES_PER_BLOCK : 1) /
1685 1685 doi.doi_max_offset);
1686 1686
1687 1687 aux[0] = '\0';
1688 1688
1689 1689 if (doi.doi_checksum != ZIO_CHECKSUM_INHERIT || verbosity >= 6) {
1690 1690 (void) snprintf(aux + strlen(aux), sizeof (aux), " (K=%s)",
1691 1691 ZDB_CHECKSUM_NAME(doi.doi_checksum));
1692 1692 }
1693 1693
1694 1694 if (doi.doi_compress != ZIO_COMPRESS_INHERIT || verbosity >= 6) {
1695 1695 (void) snprintf(aux + strlen(aux), sizeof (aux), " (Z=%s)",
1696 1696 ZDB_COMPRESS_NAME(doi.doi_compress));
1697 1697 }
1698 1698
1699 1699 (void) printf("%10lld %3u %5s %5s %5s %5s %6s %s%s\n",
1700 1700 (u_longlong_t)object, doi.doi_indirection, iblk, dblk,
1701 1701 asize, lsize, fill, ZDB_OT_NAME(doi.doi_type), aux);
1702 1702
1703 1703 if (doi.doi_bonus_type != DMU_OT_NONE && verbosity > 3) {
1704 1704 (void) printf("%10s %3s %5s %5s %5s %5s %6s %s\n",
1705 1705 "", "", "", "", "", bonus_size, "bonus",
1706 1706 ZDB_OT_NAME(doi.doi_bonus_type));
1707 1707 }
1708 1708
1709 1709 if (verbosity >= 4) {
1710 1710 (void) printf("\tdnode flags: %s%s%s\n",
1711 1711 (dn->dn_phys->dn_flags & DNODE_FLAG_USED_BYTES) ?
1712 1712 "USED_BYTES " : "",
1713 1713 (dn->dn_phys->dn_flags & DNODE_FLAG_USERUSED_ACCOUNTED) ?
1714 1714 "USERUSED_ACCOUNTED " : "",
1715 1715 (dn->dn_phys->dn_flags & DNODE_FLAG_SPILL_BLKPTR) ?
1716 1716 "SPILL_BLKPTR" : "");
1717 1717 (void) printf("\tdnode maxblkid: %llu\n",
1718 1718 (longlong_t)dn->dn_phys->dn_maxblkid);
1719 1719
1720 1720 object_viewer[ZDB_OT_TYPE(doi.doi_bonus_type)](os, object,
1721 1721 bonus, bsize);
1722 1722 object_viewer[ZDB_OT_TYPE(doi.doi_type)](os, object, NULL, 0);
1723 1723 *print_header = 1;
1724 1724 }
1725 1725
1726 1726 if (verbosity >= 5)
1727 1727 dump_indirect(dn);
1728 1728
1729 1729 if (verbosity >= 5) {
1730 1730 /*
1731 1731 * Report the list of segments that comprise the object.
1732 1732 */
1733 1733 uint64_t start = 0;
1734 1734 uint64_t end;
1735 1735 uint64_t blkfill = 1;
1736 1736 int minlvl = 1;
1737 1737
1738 1738 if (dn->dn_type == DMU_OT_DNODE) {
1739 1739 minlvl = 0;
1740 1740 blkfill = DNODES_PER_BLOCK;
1741 1741 }
1742 1742
1743 1743 for (;;) {
1744 1744 char segsize[32];
1745 1745 error = dnode_next_offset(dn,
1746 1746 0, &start, minlvl, blkfill, 0);
1747 1747 if (error)
1748 1748 break;
1749 1749 end = start;
1750 1750 error = dnode_next_offset(dn,
1751 1751 DNODE_FIND_HOLE, &end, minlvl, blkfill, 0);
1752 1752 zdb_nicenum(end - start, segsize);
1753 1753 (void) printf("\t\tsegment [%016llx, %016llx)"
1754 1754 " size %5s\n", (u_longlong_t)start,
1755 1755 (u_longlong_t)end, segsize);
1756 1756 if (error)
1757 1757 break;
1758 1758 start = end;
1759 1759 }
1760 1760 }
1761 1761
1762 1762 if (db != NULL)
1763 1763 dmu_buf_rele(db, FTAG);
1764 1764 }
1765 1765
1766 1766 static char *objset_types[DMU_OST_NUMTYPES] = {
1767 1767 "NONE", "META", "ZPL", "ZVOL", "OTHER", "ANY" };
1768 1768
1769 1769 static void
1770 1770 dump_dir(objset_t *os)
1771 1771 {
1772 1772 dmu_objset_stats_t dds;
1773 1773 uint64_t object, object_count;
1774 1774 uint64_t refdbytes, usedobjs, scratch;
1775 1775 char numbuf[32];
1776 1776 char blkbuf[BP_SPRINTF_LEN + 20];
1777 1777 char osname[MAXNAMELEN];
1778 1778 char *type = "UNKNOWN";
1779 1779 int verbosity = dump_opt['d'];
1780 1780 int print_header = 1;
1781 1781 int i, error;
1782 1782
1783 1783 dsl_pool_config_enter(dmu_objset_pool(os), FTAG);
1784 1784 dmu_objset_fast_stat(os, &dds);
1785 1785 dsl_pool_config_exit(dmu_objset_pool(os), FTAG);
1786 1786
1787 1787 if (dds.dds_type < DMU_OST_NUMTYPES)
1788 1788 type = objset_types[dds.dds_type];
1789 1789
1790 1790 if (dds.dds_type == DMU_OST_META) {
1791 1791 dds.dds_creation_txg = TXG_INITIAL;
1792 1792 usedobjs = os->os_rootbp->blk_fill;
1793 1793 refdbytes = os->os_spa->spa_dsl_pool->
1794 1794 dp_mos_dir->dd_phys->dd_used_bytes;
1795 1795 } else {
1796 1796 dmu_objset_space(os, &refdbytes, &scratch, &usedobjs, &scratch);
1797 1797 }
1798 1798
1799 1799 ASSERT3U(usedobjs, ==, os->os_rootbp->blk_fill);
1800 1800
1801 1801 zdb_nicenum(refdbytes, numbuf);
1802 1802
1803 1803 if (verbosity >= 4) {
1804 1804 (void) snprintf(blkbuf, sizeof (blkbuf), ", rootbp ");
1805 1805 (void) snprintf_blkptr(blkbuf + strlen(blkbuf),
1806 1806 sizeof (blkbuf) - strlen(blkbuf), os->os_rootbp);
1807 1807 } else {
1808 1808 blkbuf[0] = '\0';
1809 1809 }
1810 1810
1811 1811 dmu_objset_name(os, osname);
1812 1812
1813 1813 (void) printf("Dataset %s [%s], ID %llu, cr_txg %llu, "
1814 1814 "%s, %llu objects%s\n",
1815 1815 osname, type, (u_longlong_t)dmu_objset_id(os),
1816 1816 (u_longlong_t)dds.dds_creation_txg,
1817 1817 numbuf, (u_longlong_t)usedobjs, blkbuf);
1818 1818
1819 1819 if (zopt_objects != 0) {
1820 1820 for (i = 0; i < zopt_objects; i++)
1821 1821 dump_object(os, zopt_object[i], verbosity,
1822 1822 &print_header);
1823 1823 (void) printf("\n");
1824 1824 return;
1825 1825 }
1826 1826
1827 1827 if (dump_opt['i'] != 0 || verbosity >= 2)
1828 1828 dump_intent_log(dmu_objset_zil(os));
1829 1829
1830 1830 if (dmu_objset_ds(os) != NULL)
1831 1831 dump_deadlist(&dmu_objset_ds(os)->ds_deadlist);
1832 1832
1833 1833 if (verbosity < 2)
1834 1834 return;
1835 1835
1836 1836 if (BP_IS_HOLE(os->os_rootbp))
1837 1837 return;
1838 1838
1839 1839 dump_object(os, 0, verbosity, &print_header);
1840 1840 object_count = 0;
1841 1841 if (DMU_USERUSED_DNODE(os) != NULL &&
1842 1842 DMU_USERUSED_DNODE(os)->dn_type != 0) {
1843 1843 dump_object(os, DMU_USERUSED_OBJECT, verbosity, &print_header);
1844 1844 dump_object(os, DMU_GROUPUSED_OBJECT, verbosity, &print_header);
1845 1845 }
1846 1846
1847 1847 object = 0;
1848 1848 while ((error = dmu_object_next(os, &object, B_FALSE, 0)) == 0) {
1849 1849 dump_object(os, object, verbosity, &print_header);
1850 1850 object_count++;
1851 1851 }
1852 1852
1853 1853 ASSERT3U(object_count, ==, usedobjs);
1854 1854
1855 1855 (void) printf("\n");
1856 1856
1857 1857 if (error != ESRCH) {
1858 1858 (void) fprintf(stderr, "dmu_object_next() = %d\n", error);
1859 1859 abort();
1860 1860 }
1861 1861 }
1862 1862
1863 1863 static void
1864 1864 dump_uberblock(uberblock_t *ub, const char *header, const char *footer)
1865 1865 {
1866 1866 time_t timestamp = ub->ub_timestamp;
1867 1867
1868 1868 (void) printf(header ? header : "");
1869 1869 (void) printf("\tmagic = %016llx\n", (u_longlong_t)ub->ub_magic);
1870 1870 (void) printf("\tversion = %llu\n", (u_longlong_t)ub->ub_version);
1871 1871 (void) printf("\ttxg = %llu\n", (u_longlong_t)ub->ub_txg);
1872 1872 (void) printf("\tguid_sum = %llu\n", (u_longlong_t)ub->ub_guid_sum);
1873 1873 (void) printf("\ttimestamp = %llu UTC = %s",
1874 1874 (u_longlong_t)ub->ub_timestamp, asctime(localtime(×tamp)));
1875 1875 if (dump_opt['u'] >= 3) {
1876 1876 char blkbuf[BP_SPRINTF_LEN];
1877 1877 snprintf_blkptr(blkbuf, sizeof (blkbuf), &ub->ub_rootbp);
1878 1878 (void) printf("\trootbp = %s\n", blkbuf);
1879 1879 }
1880 1880 (void) printf(footer ? footer : "");
1881 1881 }
1882 1882
1883 1883 static void
1884 1884 dump_config(spa_t *spa)
1885 1885 {
1886 1886 dmu_buf_t *db;
1887 1887 size_t nvsize = 0;
1888 1888 int error = 0;
1889 1889
1890 1890
1891 1891 error = dmu_bonus_hold(spa->spa_meta_objset,
1892 1892 spa->spa_config_object, FTAG, &db);
1893 1893
1894 1894 if (error == 0) {
1895 1895 nvsize = *(uint64_t *)db->db_data;
1896 1896 dmu_buf_rele(db, FTAG);
1897 1897
1898 1898 (void) printf("\nMOS Configuration:\n");
1899 1899 dump_packed_nvlist(spa->spa_meta_objset,
1900 1900 spa->spa_config_object, (void *)&nvsize, 1);
1901 1901 } else {
1902 1902 (void) fprintf(stderr, "dmu_bonus_hold(%llu) failed, errno %d",
1903 1903 (u_longlong_t)spa->spa_config_object, error);
1904 1904 }
1905 1905 }
1906 1906
1907 1907 static void
1908 1908 dump_cachefile(const char *cachefile)
1909 1909 {
1910 1910 int fd;
1911 1911 struct stat64 statbuf;
1912 1912 char *buf;
1913 1913 nvlist_t *config;
1914 1914
1915 1915 if ((fd = open64(cachefile, O_RDONLY)) < 0) {
1916 1916 (void) printf("cannot open '%s': %s\n", cachefile,
1917 1917 strerror(errno));
1918 1918 exit(1);
1919 1919 }
1920 1920
1921 1921 if (fstat64(fd, &statbuf) != 0) {
1922 1922 (void) printf("failed to stat '%s': %s\n", cachefile,
1923 1923 strerror(errno));
1924 1924 exit(1);
1925 1925 }
1926 1926
1927 1927 if ((buf = malloc(statbuf.st_size)) == NULL) {
1928 1928 (void) fprintf(stderr, "failed to allocate %llu bytes\n",
1929 1929 (u_longlong_t)statbuf.st_size);
1930 1930 exit(1);
1931 1931 }
1932 1932
1933 1933 if (read(fd, buf, statbuf.st_size) != statbuf.st_size) {
1934 1934 (void) fprintf(stderr, "failed to read %llu bytes\n",
1935 1935 (u_longlong_t)statbuf.st_size);
1936 1936 exit(1);
1937 1937 }
1938 1938
1939 1939 (void) close(fd);
1940 1940
1941 1941 if (nvlist_unpack(buf, statbuf.st_size, &config, 0) != 0) {
1942 1942 (void) fprintf(stderr, "failed to unpack nvlist\n");
1943 1943 exit(1);
1944 1944 }
1945 1945
1946 1946 free(buf);
1947 1947
1948 1948 dump_nvlist(config, 0);
1949 1949
1950 1950 nvlist_free(config);
1951 1951 }
1952 1952
1953 1953 #define ZDB_MAX_UB_HEADER_SIZE 32
1954 1954
1955 1955 static void
1956 1956 dump_label_uberblocks(vdev_label_t *lbl, uint64_t ashift)
1957 1957 {
1958 1958 vdev_t vd;
1959 1959 vdev_t *vdp = &vd;
1960 1960 char header[ZDB_MAX_UB_HEADER_SIZE];
1961 1961
1962 1962 vd.vdev_ashift = ashift;
1963 1963 vdp->vdev_top = vdp;
1964 1964
1965 1965 for (int i = 0; i < VDEV_UBERBLOCK_COUNT(vdp); i++) {
1966 1966 uint64_t uoff = VDEV_UBERBLOCK_OFFSET(vdp, i);
1967 1967 uberblock_t *ub = (void *)((char *)lbl + uoff);
1968 1968
1969 1969 if (uberblock_verify(ub))
1970 1970 continue;
1971 1971 (void) snprintf(header, ZDB_MAX_UB_HEADER_SIZE,
1972 1972 "Uberblock[%d]\n", i);
1973 1973 dump_uberblock(ub, header, "");
1974 1974 }
1975 1975 }
1976 1976
1977 1977 static void
1978 1978 dump_label(const char *dev)
1979 1979 {
1980 1980 int fd;
1981 1981 vdev_label_t label;
1982 1982 char *path, *buf = label.vl_vdev_phys.vp_nvlist;
1983 1983 size_t buflen = sizeof (label.vl_vdev_phys.vp_nvlist);
1984 1984 struct stat64 statbuf;
1985 1985 uint64_t psize, ashift;
1986 1986 int len = strlen(dev) + 1;
1987 1987
1988 1988 if (strncmp(dev, "/dev/dsk/", 9) == 0) {
1989 1989 len++;
1990 1990 path = malloc(len);
1991 1991 (void) snprintf(path, len, "%s%s", "/dev/rdsk/", dev + 9);
1992 1992 } else {
1993 1993 path = strdup(dev);
1994 1994 }
1995 1995
1996 1996 if ((fd = open64(path, O_RDONLY)) < 0) {
1997 1997 (void) printf("cannot open '%s': %s\n", path, strerror(errno));
1998 1998 free(path);
1999 1999 exit(1);
2000 2000 }
2001 2001
2002 2002 if (fstat64(fd, &statbuf) != 0) {
2003 2003 (void) printf("failed to stat '%s': %s\n", path,
2004 2004 strerror(errno));
2005 2005 free(path);
2006 2006 (void) close(fd);
2007 2007 exit(1);
2008 2008 }
2009 2009
2010 2010 if (S_ISBLK(statbuf.st_mode)) {
2011 2011 (void) printf("cannot use '%s': character device required\n",
2012 2012 path);
2013 2013 free(path);
2014 2014 (void) close(fd);
2015 2015 exit(1);
2016 2016 }
2017 2017
2018 2018 psize = statbuf.st_size;
2019 2019 psize = P2ALIGN(psize, (uint64_t)sizeof (vdev_label_t));
2020 2020
2021 2021 for (int l = 0; l < VDEV_LABELS; l++) {
2022 2022 nvlist_t *config = NULL;
2023 2023
2024 2024 (void) printf("--------------------------------------------\n");
2025 2025 (void) printf("LABEL %d\n", l);
2026 2026 (void) printf("--------------------------------------------\n");
2027 2027
2028 2028 if (pread64(fd, &label, sizeof (label),
2029 2029 vdev_label_offset(psize, l, 0)) != sizeof (label)) {
2030 2030 (void) printf("failed to read label %d\n", l);
2031 2031 continue;
2032 2032 }
2033 2033
2034 2034 if (nvlist_unpack(buf, buflen, &config, 0) != 0) {
2035 2035 (void) printf("failed to unpack label %d\n", l);
2036 2036 ashift = SPA_MINBLOCKSHIFT;
2037 2037 } else {
2038 2038 nvlist_t *vdev_tree = NULL;
2039 2039
2040 2040 dump_nvlist(config, 4);
2041 2041 if ((nvlist_lookup_nvlist(config,
2042 2042 ZPOOL_CONFIG_VDEV_TREE, &vdev_tree) != 0) ||
2043 2043 (nvlist_lookup_uint64(vdev_tree,
2044 2044 ZPOOL_CONFIG_ASHIFT, &ashift) != 0))
2045 2045 ashift = SPA_MINBLOCKSHIFT;
2046 2046 nvlist_free(config);
2047 2047 }
2048 2048 if (dump_opt['u'])
2049 2049 dump_label_uberblocks(&label, ashift);
2050 2050 }
2051 2051
2052 2052 free(path);
2053 2053 (void) close(fd);
2054 2054 }
2055 2055
2056 2056 /*ARGSUSED*/
2057 2057 static int
2058 2058 dump_one_dir(const char *dsname, void *arg)
2059 2059 {
2060 2060 int error;
2061 2061 objset_t *os;
2062 2062
2063 2063 error = dmu_objset_own(dsname, DMU_OST_ANY, B_TRUE, FTAG, &os);
2064 2064 if (error) {
2065 2065 (void) printf("Could not open %s, error %d\n", dsname, error);
2066 2066 return (0);
2067 2067 }
2068 2068 dump_dir(os);
2069 2069 dmu_objset_disown(os, FTAG);
2070 2070 fuid_table_destroy();
2071 2071 sa_loaded = B_FALSE;
2072 2072 return (0);
2073 2073 }
2074 2074
2075 2075 /*
2076 2076 * Block statistics.
2077 2077 */
2078 2078 #define PSIZE_HISTO_SIZE (SPA_MAXBLOCKSIZE / SPA_MINBLOCKSIZE + 1)
2079 2079 typedef struct zdb_blkstats {
2080 2080 uint64_t zb_asize;
2081 2081 uint64_t zb_lsize;
2082 2082 uint64_t zb_psize;
2083 2083 uint64_t zb_count;
2084 2084 uint64_t zb_gangs;
2085 2085 uint64_t zb_ditto_samevdev;
2086 2086 uint64_t zb_psize_histogram[PSIZE_HISTO_SIZE];
2087 2087 } zdb_blkstats_t;
2088 2088
2089 2089 /*
2090 2090 * Extended object types to report deferred frees and dedup auto-ditto blocks.
2091 2091 */
2092 2092 #define ZDB_OT_DEFERRED (DMU_OT_NUMTYPES + 0)
2093 2093 #define ZDB_OT_DITTO (DMU_OT_NUMTYPES + 1)
2094 2094 #define ZDB_OT_OTHER (DMU_OT_NUMTYPES + 2)
2095 2095 #define ZDB_OT_TOTAL (DMU_OT_NUMTYPES + 3)
2096 2096
2097 2097 static char *zdb_ot_extname[] = {
2098 2098 "deferred free",
2099 2099 "dedup ditto",
2100 2100 "other",
2101 2101 "Total",
2102 2102 };
2103 2103
2104 2104 #define ZB_TOTAL DN_MAX_LEVELS
2105 2105
2106 2106 typedef struct zdb_cb {
2107 2107 zdb_blkstats_t zcb_type[ZB_TOTAL + 1][ZDB_OT_TOTAL + 1];
2108 2108 uint64_t zcb_dedup_asize;
2109 2109 uint64_t zcb_dedup_blocks;
2110 2110 uint64_t zcb_start;
2111 2111 uint64_t zcb_lastprint;
2112 2112 uint64_t zcb_totalasize;
2113 2113 uint64_t zcb_errors[256];
2114 2114 int zcb_readfails;
2115 2115 int zcb_haderrors;
2116 2116 spa_t *zcb_spa;
2117 2117 } zdb_cb_t;
2118 2118
2119 2119 static void
2120 2120 zdb_count_block(zdb_cb_t *zcb, zilog_t *zilog, const blkptr_t *bp,
2121 2121 dmu_object_type_t type)
2122 2122 {
2123 2123 uint64_t refcnt = 0;
2124 2124
2125 2125 ASSERT(type < ZDB_OT_TOTAL);
2126 2126
2127 2127 if (zilog && zil_bp_tree_add(zilog, bp) != 0)
2128 2128 return;
2129 2129
2130 2130 for (int i = 0; i < 4; i++) {
2131 2131 int l = (i < 2) ? BP_GET_LEVEL(bp) : ZB_TOTAL;
2132 2132 int t = (i & 1) ? type : ZDB_OT_TOTAL;
2133 2133 int equal;
2134 2134 zdb_blkstats_t *zb = &zcb->zcb_type[l][t];
2135 2135
2136 2136 zb->zb_asize += BP_GET_ASIZE(bp);
2137 2137 zb->zb_lsize += BP_GET_LSIZE(bp);
2138 2138 zb->zb_psize += BP_GET_PSIZE(bp);
2139 2139 zb->zb_count++;
2140 2140 zb->zb_psize_histogram[BP_GET_PSIZE(bp) >> SPA_MINBLOCKSHIFT]++;
2141 2141
2142 2142 zb->zb_gangs += BP_COUNT_GANG(bp);
2143 2143
2144 2144 switch (BP_GET_NDVAS(bp)) {
2145 2145 case 2:
2146 2146 if (DVA_GET_VDEV(&bp->blk_dva[0]) ==
2147 2147 DVA_GET_VDEV(&bp->blk_dva[1]))
2148 2148 zb->zb_ditto_samevdev++;
2149 2149 break;
2150 2150 case 3:
2151 2151 equal = (DVA_GET_VDEV(&bp->blk_dva[0]) ==
2152 2152 DVA_GET_VDEV(&bp->blk_dva[1])) +
2153 2153 (DVA_GET_VDEV(&bp->blk_dva[0]) ==
2154 2154 DVA_GET_VDEV(&bp->blk_dva[2])) +
2155 2155 (DVA_GET_VDEV(&bp->blk_dva[1]) ==
2156 2156 DVA_GET_VDEV(&bp->blk_dva[2]));
2157 2157 if (equal != 0)
2158 2158 zb->zb_ditto_samevdev++;
2159 2159 break;
2160 2160 }
2161 2161
2162 2162 }
2163 2163
2164 2164 if (dump_opt['L'])
2165 2165 return;
2166 2166
2167 2167 if (BP_GET_DEDUP(bp)) {
2168 2168 ddt_t *ddt;
2169 2169 ddt_entry_t *dde;
2170 2170
2171 2171 ddt = ddt_select(zcb->zcb_spa, bp);
2172 2172 ddt_enter(ddt);
2173 2173 dde = ddt_lookup(ddt, bp, B_FALSE);
2174 2174
2175 2175 if (dde == NULL) {
2176 2176 refcnt = 0;
2177 2177 } else {
2178 2178 ddt_phys_t *ddp = ddt_phys_select(dde, bp);
2179 2179 ddt_phys_decref(ddp);
2180 2180 refcnt = ddp->ddp_refcnt;
2181 2181 if (ddt_phys_total_refcnt(dde) == 0)
2182 2182 ddt_remove(ddt, dde);
2183 2183 }
2184 2184 ddt_exit(ddt);
2185 2185 }
2186 2186
2187 2187 VERIFY3U(zio_wait(zio_claim(NULL, zcb->zcb_spa,
2188 2188 refcnt ? 0 : spa_first_txg(zcb->zcb_spa),
2189 2189 bp, NULL, NULL, ZIO_FLAG_CANFAIL)), ==, 0);
2190 2190 }
2191 2191
2192 2192 static void
2193 2193 zdb_blkptr_done(zio_t *zio)
2194 2194 {
2195 2195 spa_t *spa = zio->io_spa;
2196 2196 blkptr_t *bp = zio->io_bp;
2197 2197 int ioerr = zio->io_error;
2198 2198 zdb_cb_t *zcb = zio->io_private;
2199 2199 zbookmark_t *zb = &zio->io_bookmark;
2200 2200
2201 2201 zio_data_buf_free(zio->io_data, zio->io_size);
2202 2202
2203 2203 mutex_enter(&spa->spa_scrub_lock);
2204 2204 spa->spa_scrub_inflight--;
2205 2205 cv_broadcast(&spa->spa_scrub_io_cv);
2206 2206
2207 2207 if (ioerr && !(zio->io_flags & ZIO_FLAG_SPECULATIVE)) {
2208 2208 char blkbuf[BP_SPRINTF_LEN];
2209 2209
2210 2210 zcb->zcb_haderrors = 1;
2211 2211 zcb->zcb_errors[ioerr]++;
2212 2212
2213 2213 if (dump_opt['b'] >= 2)
2214 2214 snprintf_blkptr(blkbuf, sizeof (blkbuf), bp);
2215 2215 else
2216 2216 blkbuf[0] = '\0';
2217 2217
2218 2218 (void) printf("zdb_blkptr_cb: "
2219 2219 "Got error %d reading "
2220 2220 "<%llu, %llu, %lld, %llx> %s -- skipping\n",
2221 2221 ioerr,
2222 2222 (u_longlong_t)zb->zb_objset,
2223 2223 (u_longlong_t)zb->zb_object,
2224 2224 (u_longlong_t)zb->zb_level,
2225 2225 (u_longlong_t)zb->zb_blkid,
2226 2226 blkbuf);
2227 2227 }
2228 2228 mutex_exit(&spa->spa_scrub_lock);
2229 2229 }
2230 2230
2231 2231 static int
2232 2232 zdb_blkptr_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
2233 2233 const zbookmark_t *zb, const dnode_phys_t *dnp, void *arg)
2234 2234 {
2235 2235 zdb_cb_t *zcb = arg;
2236 2236 dmu_object_type_t type;
2237 2237 boolean_t is_metadata;
2238 2238
2239 2239 if (dump_opt['b'] >= 5 && bp->blk_birth > 0) {
2240 2240 char blkbuf[BP_SPRINTF_LEN];
2241 2241 snprintf_blkptr(blkbuf, sizeof (blkbuf), bp);
2242 2242 (void) printf("objset %llu object %llu "
2243 2243 "level %lld offset 0x%llx %s\n",
2244 2244 (u_longlong_t)zb->zb_objset,
2245 2245 (u_longlong_t)zb->zb_object,
2246 2246 (longlong_t)zb->zb_level,
2247 2247 (u_longlong_t)blkid2offset(dnp, bp, zb),
2248 2248 blkbuf);
2249 2249 }
2250 2250
2251 2251 if (BP_IS_HOLE(bp))
2252 2252 return (0);
2253 2253
2254 2254 type = BP_GET_TYPE(bp);
2255 2255
2256 2256 zdb_count_block(zcb, zilog, bp,
2257 2257 (type & DMU_OT_NEWTYPE) ? ZDB_OT_OTHER : type);
2258 2258
2259 2259 is_metadata = (BP_GET_LEVEL(bp) != 0 || DMU_OT_IS_METADATA(type));
2260 2260
2261 2261 if (dump_opt['c'] > 1 || (dump_opt['c'] && is_metadata)) {
2262 2262 size_t size = BP_GET_PSIZE(bp);
2263 2263 void *data = zio_data_buf_alloc(size);
2264 2264 int flags = ZIO_FLAG_CANFAIL | ZIO_FLAG_SCRUB | ZIO_FLAG_RAW;
2265 2265
2266 2266 /* If it's an intent log block, failure is expected. */
2267 2267 if (zb->zb_level == ZB_ZIL_LEVEL)
2268 2268 flags |= ZIO_FLAG_SPECULATIVE;
2269 2269
2270 2270 mutex_enter(&spa->spa_scrub_lock);
2271 2271 while (spa->spa_scrub_inflight > max_inflight)
2272 2272 cv_wait(&spa->spa_scrub_io_cv, &spa->spa_scrub_lock);
2273 2273 spa->spa_scrub_inflight++;
2274 2274 mutex_exit(&spa->spa_scrub_lock);
2275 2275
2276 2276 zio_nowait(zio_read(NULL, spa, bp, data, size,
2277 2277 zdb_blkptr_done, zcb, ZIO_PRIORITY_ASYNC_READ, flags, zb));
2278 2278 }
2279 2279
2280 2280 zcb->zcb_readfails = 0;
2281 2281
2282 2282 if (dump_opt['b'] < 5 && isatty(STDERR_FILENO) &&
2283 2283 gethrtime() > zcb->zcb_lastprint + NANOSEC) {
2284 2284 uint64_t now = gethrtime();
2285 2285 char buf[10];
2286 2286 uint64_t bytes = zcb->zcb_type[ZB_TOTAL][ZDB_OT_TOTAL].zb_asize;
2287 2287 int kb_per_sec =
2288 2288 1 + bytes / (1 + ((now - zcb->zcb_start) / 1000 / 1000));
2289 2289 int sec_remaining =
2290 2290 (zcb->zcb_totalasize - bytes) / 1024 / kb_per_sec;
2291 2291
2292 2292 zfs_nicenum(bytes, buf, sizeof (buf));
2293 2293 (void) fprintf(stderr,
2294 2294 "\r%5s completed (%4dMB/s) "
2295 2295 "estimated time remaining: %uhr %02umin %02usec ",
2296 2296 buf, kb_per_sec / 1024,
2297 2297 sec_remaining / 60 / 60,
2298 2298 sec_remaining / 60 % 60,
2299 2299 sec_remaining % 60);
2300 2300
2301 2301 zcb->zcb_lastprint = now;
2302 2302 }
2303 2303
2304 2304 return (0);
2305 2305 }
2306 2306
2307 2307 static void
2308 2308 zdb_leak(void *arg, uint64_t start, uint64_t size)
2309 2309 {
2310 2310 vdev_t *vd = arg;
2311 2311
2312 2312 (void) printf("leaked space: vdev %llu, offset 0x%llx, size %llu\n",
2313 2313 (u_longlong_t)vd->vdev_id, (u_longlong_t)start, (u_longlong_t)size);
2314 2314 }
2315 2315
2316 2316 static metaslab_ops_t zdb_metaslab_ops = {
2317 2317 NULL, /* alloc */
2318 2318 NULL /* fragmented */
2319 2319 };
2320 2320
2321 2321 static void
2322 2322 zdb_ddt_leak_init(spa_t *spa, zdb_cb_t *zcb)
2323 2323 {
2324 2324 ddt_bookmark_t ddb = { 0 };
2325 2325 ddt_entry_t dde;
2326 2326 int error;
2327 2327
2328 2328 while ((error = ddt_walk(spa, &ddb, &dde)) == 0) {
2329 2329 blkptr_t blk;
2330 2330 ddt_phys_t *ddp = dde.dde_phys;
2331 2331
2332 2332 if (ddb.ddb_class == DDT_CLASS_UNIQUE)
2333 2333 return;
2334 2334
2335 2335 ASSERT(ddt_phys_total_refcnt(&dde) > 1);
2336 2336
2337 2337 for (int p = 0; p < DDT_PHYS_TYPES; p++, ddp++) {
2338 2338 if (ddp->ddp_phys_birth == 0)
2339 2339 continue;
2340 2340 ddt_bp_create(ddb.ddb_checksum,
2341 2341 &dde.dde_key, ddp, &blk);
2342 2342 if (p == DDT_PHYS_DITTO) {
2343 2343 zdb_count_block(zcb, NULL, &blk, ZDB_OT_DITTO);
2344 2344 } else {
2345 2345 zcb->zcb_dedup_asize +=
2346 2346 BP_GET_ASIZE(&blk) * (ddp->ddp_refcnt - 1);
2347 2347 zcb->zcb_dedup_blocks++;
2348 2348 }
2349 2349 }
2350 2350 if (!dump_opt['L']) {
2351 2351 ddt_t *ddt = spa->spa_ddt[ddb.ddb_checksum];
2352 2352 ddt_enter(ddt);
2353 2353 VERIFY(ddt_lookup(ddt, &blk, B_TRUE) != NULL);
2354 2354 ddt_exit(ddt);
2355 2355 }
2356 2356 }
2357 2357
2358 2358 ASSERT(error == ENOENT);
2359 2359 }
2360 2360
2361 2361 static void
2362 2362 zdb_leak_init(spa_t *spa, zdb_cb_t *zcb)
2363 2363 {
2364 2364 zcb->zcb_spa = spa;
2365 2365
2366 2366 if (!dump_opt['L']) {
2367 2367 vdev_t *rvd = spa->spa_root_vdev;
2368 2368 for (int c = 0; c < rvd->vdev_children; c++) {
2369 2369 vdev_t *vd = rvd->vdev_child[c];
2370 2370 for (int m = 0; m < vd->vdev_ms_count; m++) {
2371 2371 metaslab_t *msp = vd->vdev_ms[m];
2372 2372 mutex_enter(&msp->ms_lock);
2373 2373 metaslab_unload(msp);
2374 2374
2375 2375 /*
2376 2376 * For leak detection, we overload the metaslab
2377 2377 * ms_tree to contain allocated segments
2378 2378 * instead of free segments. As a result,
2379 2379 * we can't use the normal metaslab_load/unload
2380 2380 * interfaces.
2381 2381 */
2382 2382 if (msp->ms_sm != NULL) {
2383 2383 msp->ms_ops = &zdb_metaslab_ops;
2384 2384 VERIFY0(space_map_load(msp->ms_sm,
2385 2385 msp->ms_tree, SM_ALLOC));
2386 2386 msp->ms_loaded = B_TRUE;
2387 2387 }
2388 2388 mutex_exit(&msp->ms_lock);
2389 2389 }
2390 2390 }
2391 2391 }
2392 2392
2393 2393 spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER);
2394 2394
2395 2395 zdb_ddt_leak_init(spa, zcb);
2396 2396
2397 2397 spa_config_exit(spa, SCL_CONFIG, FTAG);
2398 2398 }
2399 2399
2400 2400 static void
2401 2401 zdb_leak_fini(spa_t *spa)
2402 2402 {
2403 2403 if (!dump_opt['L']) {
2404 2404 vdev_t *rvd = spa->spa_root_vdev;
2405 2405 for (int c = 0; c < rvd->vdev_children; c++) {
2406 2406 vdev_t *vd = rvd->vdev_child[c];
2407 2407 for (int m = 0; m < vd->vdev_ms_count; m++) {
2408 2408 metaslab_t *msp = vd->vdev_ms[m];
2409 2409 mutex_enter(&msp->ms_lock);
2410 2410
2411 2411 /*
2412 2412 * The ms_tree has been overloaded to
2413 2413 * contain allocated segments. Now that we
2414 2414 * finished traversing all blocks, any
2415 2415 * block that remains in the ms_tree
2416 2416 * represents an allocated block that we
2417 2417 * did not claim during the traversal.
2418 2418 * Claimed blocks would have been removed
2419 2419 * from the ms_tree.
2420 2420 */
2421 2421 range_tree_vacate(msp->ms_tree, zdb_leak, vd);
2422 2422 msp->ms_loaded = B_FALSE;
2423 2423
2424 2424 mutex_exit(&msp->ms_lock);
2425 2425 }
2426 2426 }
2427 2427 }
2428 2428 }
2429 2429
2430 2430 /* ARGSUSED */
2431 2431 static int
2432 2432 count_block_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx)
2433 2433 {
2434 2434 zdb_cb_t *zcb = arg;
2435 2435
2436 2436 if (dump_opt['b'] >= 5) {
2437 2437 char blkbuf[BP_SPRINTF_LEN];
2438 2438 snprintf_blkptr(blkbuf, sizeof (blkbuf), bp);
2439 2439 (void) printf("[%s] %s\n",
2440 2440 "deferred free", blkbuf);
2441 2441 }
2442 2442 zdb_count_block(zcb, NULL, bp, ZDB_OT_DEFERRED);
2443 2443 return (0);
2444 2444 }
2445 2445
2446 2446 static int
2447 2447 dump_block_stats(spa_t *spa)
2448 2448 {
2449 2449 zdb_cb_t zcb = { 0 };
2450 2450 zdb_blkstats_t *zb, *tzb;
2451 2451 uint64_t norm_alloc, norm_space, total_alloc, total_found;
2452 2452 int flags = TRAVERSE_PRE | TRAVERSE_PREFETCH_METADATA | TRAVERSE_HARD;
2453 2453 int leaks = 0;
2454 2454
2455 2455 (void) printf("\nTraversing all blocks %s%s%s%s%s...\n\n",
2456 2456 (dump_opt['c'] || !dump_opt['L']) ? "to verify " : "",
2457 2457 (dump_opt['c'] == 1) ? "metadata " : "",
2458 2458 dump_opt['c'] ? "checksums " : "",
2459 2459 (dump_opt['c'] && !dump_opt['L']) ? "and verify " : "",
2460 2460 !dump_opt['L'] ? "nothing leaked " : "");
2461 2461
2462 2462 /*
2463 2463 * Load all space maps as SM_ALLOC maps, then traverse the pool
2464 2464 * claiming each block we discover. If the pool is perfectly
2465 2465 * consistent, the space maps will be empty when we're done.
2466 2466 * Anything left over is a leak; any block we can't claim (because
2467 2467 * it's not part of any space map) is a double allocation,
2468 2468 * reference to a freed block, or an unclaimed log block.
2469 2469 */
2470 2470 zdb_leak_init(spa, &zcb);
2471 2471
2472 2472 /*
2473 2473 * If there's a deferred-free bplist, process that first.
2474 2474 */
2475 2475 (void) bpobj_iterate_nofree(&spa->spa_deferred_bpobj,
2476 2476 count_block_cb, &zcb, NULL);
2477 2477 if (spa_version(spa) >= SPA_VERSION_DEADLISTS) {
2478 2478 (void) bpobj_iterate_nofree(&spa->spa_dsl_pool->dp_free_bpobj,
2479 2479 count_block_cb, &zcb, NULL);
2480 2480 }
2481 2481 if (spa_feature_is_active(spa, SPA_FEATURE_ASYNC_DESTROY)) {
2482 2482 VERIFY3U(0, ==, bptree_iterate(spa->spa_meta_objset,
2483 2483 spa->spa_dsl_pool->dp_bptree_obj, B_FALSE, count_block_cb,
2484 2484 &zcb, NULL));
2485 2485 }
2486 2486
2487 2487 if (dump_opt['c'] > 1)
2488 2488 flags |= TRAVERSE_PREFETCH_DATA;
2489 2489
2490 2490 zcb.zcb_totalasize = metaslab_class_get_alloc(spa_normal_class(spa));
2491 2491 zcb.zcb_start = zcb.zcb_lastprint = gethrtime();
2492 2492 zcb.zcb_haderrors |= traverse_pool(spa, 0, flags, zdb_blkptr_cb, &zcb);
2493 2493
2494 2494 /*
2495 2495 * If we've traversed the data blocks then we need to wait for those
2496 2496 * I/Os to complete. We leverage "The Godfather" zio to wait on
2497 2497 * all async I/Os to complete.
2498 2498 */
2499 2499 if (dump_opt['c']) {
2500 2500 (void) zio_wait(spa->spa_async_zio_root);
2501 2501 spa->spa_async_zio_root = zio_root(spa, NULL, NULL,
2502 2502 ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE |
2503 2503 ZIO_FLAG_GODFATHER);
2504 2504 }
2505 2505
2506 2506 if (zcb.zcb_haderrors) {
2507 2507 (void) printf("\nError counts:\n\n");
2508 2508 (void) printf("\t%5s %s\n", "errno", "count");
2509 2509 for (int e = 0; e < 256; e++) {
2510 2510 if (zcb.zcb_errors[e] != 0) {
2511 2511 (void) printf("\t%5d %llu\n",
2512 2512 e, (u_longlong_t)zcb.zcb_errors[e]);
2513 2513 }
2514 2514 }
2515 2515 }
2516 2516
2517 2517 /*
2518 2518 * Report any leaked segments.
2519 2519 */
2520 2520 zdb_leak_fini(spa);
2521 2521
2522 2522 tzb = &zcb.zcb_type[ZB_TOTAL][ZDB_OT_TOTAL];
2523 2523
2524 2524 norm_alloc = metaslab_class_get_alloc(spa_normal_class(spa));
2525 2525 norm_space = metaslab_class_get_space(spa_normal_class(spa));
2526 2526
2527 2527 total_alloc = norm_alloc + metaslab_class_get_alloc(spa_log_class(spa));
2528 2528 total_found = tzb->zb_asize - zcb.zcb_dedup_asize;
2529 2529
2530 2530 if (total_found == total_alloc) {
2531 2531 if (!dump_opt['L'])
2532 2532 (void) printf("\n\tNo leaks (block sum matches space"
2533 2533 " maps exactly)\n");
2534 2534 } else {
2535 2535 (void) printf("block traversal size %llu != alloc %llu "
2536 2536 "(%s %lld)\n",
2537 2537 (u_longlong_t)total_found,
2538 2538 (u_longlong_t)total_alloc,
2539 2539 (dump_opt['L']) ? "unreachable" : "leaked",
2540 2540 (longlong_t)(total_alloc - total_found));
2541 2541 leaks = 1;
2542 2542 }
2543 2543
2544 2544 if (tzb->zb_count == 0)
2545 2545 return (2);
2546 2546
2547 2547 (void) printf("\n");
2548 2548 (void) printf("\tbp count: %10llu\n",
2549 2549 (u_longlong_t)tzb->zb_count);
2550 2550 (void) printf("\tganged count: %10llu\n",
2551 2551 (longlong_t)tzb->zb_gangs);
2552 2552 (void) printf("\tbp logical: %10llu avg: %6llu\n",
2553 2553 (u_longlong_t)tzb->zb_lsize,
2554 2554 (u_longlong_t)(tzb->zb_lsize / tzb->zb_count));
2555 2555 (void) printf("\tbp physical: %10llu avg:"
2556 2556 " %6llu compression: %6.2f\n",
2557 2557 (u_longlong_t)tzb->zb_psize,
2558 2558 (u_longlong_t)(tzb->zb_psize / tzb->zb_count),
2559 2559 (double)tzb->zb_lsize / tzb->zb_psize);
2560 2560 (void) printf("\tbp allocated: %10llu avg:"
2561 2561 " %6llu compression: %6.2f\n",
2562 2562 (u_longlong_t)tzb->zb_asize,
2563 2563 (u_longlong_t)(tzb->zb_asize / tzb->zb_count),
2564 2564 (double)tzb->zb_lsize / tzb->zb_asize);
2565 2565 (void) printf("\tbp deduped: %10llu ref>1:"
2566 2566 " %6llu deduplication: %6.2f\n",
2567 2567 (u_longlong_t)zcb.zcb_dedup_asize,
2568 2568 (u_longlong_t)zcb.zcb_dedup_blocks,
2569 2569 (double)zcb.zcb_dedup_asize / tzb->zb_asize + 1.0);
2570 2570 (void) printf("\tSPA allocated: %10llu used: %5.2f%%\n",
2571 2571 (u_longlong_t)norm_alloc, 100.0 * norm_alloc / norm_space);
2572 2572
2573 2573 if (tzb->zb_ditto_samevdev != 0) {
2574 2574 (void) printf("\tDittoed blocks on same vdev: %llu\n",
2575 2575 (longlong_t)tzb->zb_ditto_samevdev);
2576 2576 }
2577 2577
2578 2578 if (dump_opt['b'] >= 2) {
2579 2579 int l, t, level;
2580 2580 (void) printf("\nBlocks\tLSIZE\tPSIZE\tASIZE"
2581 2581 "\t avg\t comp\t%%Total\tType\n");
2582 2582
2583 2583 for (t = 0; t <= ZDB_OT_TOTAL; t++) {
2584 2584 char csize[32], lsize[32], psize[32], asize[32];
2585 2585 char avg[32], gang[32];
2586 2586 char *typename;
2587 2587
2588 2588 if (t < DMU_OT_NUMTYPES)
2589 2589 typename = dmu_ot[t].ot_name;
2590 2590 else
2591 2591 typename = zdb_ot_extname[t - DMU_OT_NUMTYPES];
2592 2592
2593 2593 if (zcb.zcb_type[ZB_TOTAL][t].zb_asize == 0) {
2594 2594 (void) printf("%6s\t%5s\t%5s\t%5s"
2595 2595 "\t%5s\t%5s\t%6s\t%s\n",
2596 2596 "-",
2597 2597 "-",
2598 2598 "-",
2599 2599 "-",
2600 2600 "-",
2601 2601 "-",
2602 2602 "-",
2603 2603 typename);
2604 2604 continue;
2605 2605 }
2606 2606
2607 2607 for (l = ZB_TOTAL - 1; l >= -1; l--) {
2608 2608 level = (l == -1 ? ZB_TOTAL : l);
2609 2609 zb = &zcb.zcb_type[level][t];
2610 2610
2611 2611 if (zb->zb_asize == 0)
2612 2612 continue;
2613 2613
2614 2614 if (dump_opt['b'] < 3 && level != ZB_TOTAL)
2615 2615 continue;
2616 2616
2617 2617 if (level == 0 && zb->zb_asize ==
2618 2618 zcb.zcb_type[ZB_TOTAL][t].zb_asize)
2619 2619 continue;
2620 2620
2621 2621 zdb_nicenum(zb->zb_count, csize);
2622 2622 zdb_nicenum(zb->zb_lsize, lsize);
2623 2623 zdb_nicenum(zb->zb_psize, psize);
2624 2624 zdb_nicenum(zb->zb_asize, asize);
2625 2625 zdb_nicenum(zb->zb_asize / zb->zb_count, avg);
2626 2626 zdb_nicenum(zb->zb_gangs, gang);
2627 2627
2628 2628 (void) printf("%6s\t%5s\t%5s\t%5s\t%5s"
2629 2629 "\t%5.2f\t%6.2f\t",
2630 2630 csize, lsize, psize, asize, avg,
2631 2631 (double)zb->zb_lsize / zb->zb_psize,
2632 2632 100.0 * zb->zb_asize / tzb->zb_asize);
2633 2633
2634 2634 if (level == ZB_TOTAL)
2635 2635 (void) printf("%s\n", typename);
2636 2636 else
2637 2637 (void) printf(" L%d %s\n",
2638 2638 level, typename);
2639 2639
2640 2640 if (dump_opt['b'] >= 3 && zb->zb_gangs > 0) {
2641 2641 (void) printf("\t number of ganged "
2642 2642 "blocks: %s\n", gang);
2643 2643 }
2644 2644
2645 2645 if (dump_opt['b'] >= 4) {
2646 2646 (void) printf("psize "
2647 2647 "(in 512-byte sectors): "
2648 2648 "number of blocks\n");
2649 2649 dump_histogram(zb->zb_psize_histogram,
2650 2650 PSIZE_HISTO_SIZE, 0);
2651 2651 }
2652 2652 }
2653 2653 }
2654 2654 }
2655 2655
2656 2656 (void) printf("\n");
2657 2657
2658 2658 if (leaks)
2659 2659 return (2);
2660 2660
2661 2661 if (zcb.zcb_haderrors)
2662 2662 return (3);
2663 2663
2664 2664 return (0);
2665 2665 }
2666 2666
2667 2667 typedef struct zdb_ddt_entry {
2668 2668 ddt_key_t zdde_key;
2669 2669 uint64_t zdde_ref_blocks;
2670 2670 uint64_t zdde_ref_lsize;
2671 2671 uint64_t zdde_ref_psize;
2672 2672 uint64_t zdde_ref_dsize;
2673 2673 avl_node_t zdde_node;
2674 2674 } zdb_ddt_entry_t;
2675 2675
2676 2676 /* ARGSUSED */
2677 2677 static int
2678 2678 zdb_ddt_add_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
2679 2679 const zbookmark_t *zb, const dnode_phys_t *dnp, void *arg)
2680 2680 {
2681 2681 avl_tree_t *t = arg;
2682 2682 avl_index_t where;
2683 2683 zdb_ddt_entry_t *zdde, zdde_search;
2684 2684
2685 2685 if (BP_IS_HOLE(bp))
2686 2686 return (0);
2687 2687
2688 2688 if (dump_opt['S'] > 1 && zb->zb_level == ZB_ROOT_LEVEL) {
2689 2689 (void) printf("traversing objset %llu, %llu objects, "
2690 2690 "%lu blocks so far\n",
2691 2691 (u_longlong_t)zb->zb_objset,
2692 2692 (u_longlong_t)bp->blk_fill,
2693 2693 avl_numnodes(t));
2694 2694 }
2695 2695
2696 2696 if (BP_IS_HOLE(bp) || BP_GET_CHECKSUM(bp) == ZIO_CHECKSUM_OFF ||
2697 2697 BP_GET_LEVEL(bp) > 0 || DMU_OT_IS_METADATA(BP_GET_TYPE(bp)))
2698 2698 return (0);
2699 2699
2700 2700 ddt_key_fill(&zdde_search.zdde_key, bp);
2701 2701
2702 2702 zdde = avl_find(t, &zdde_search, &where);
2703 2703
2704 2704 if (zdde == NULL) {
2705 2705 zdde = umem_zalloc(sizeof (*zdde), UMEM_NOFAIL);
2706 2706 zdde->zdde_key = zdde_search.zdde_key;
2707 2707 avl_insert(t, zdde, where);
2708 2708 }
2709 2709
2710 2710 zdde->zdde_ref_blocks += 1;
2711 2711 zdde->zdde_ref_lsize += BP_GET_LSIZE(bp);
2712 2712 zdde->zdde_ref_psize += BP_GET_PSIZE(bp);
2713 2713 zdde->zdde_ref_dsize += bp_get_dsize_sync(spa, bp);
2714 2714
2715 2715 return (0);
2716 2716 }
2717 2717
2718 2718 static void
2719 2719 dump_simulated_ddt(spa_t *spa)
2720 2720 {
2721 2721 avl_tree_t t;
2722 2722 void *cookie = NULL;
2723 2723 zdb_ddt_entry_t *zdde;
2724 2724 ddt_histogram_t ddh_total = { 0 };
2725 2725 ddt_stat_t dds_total = { 0 };
2726 2726
2727 2727 avl_create(&t, ddt_entry_compare,
2728 2728 sizeof (zdb_ddt_entry_t), offsetof(zdb_ddt_entry_t, zdde_node));
2729 2729
2730 2730 spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER);
2731 2731
2732 2732 (void) traverse_pool(spa, 0, TRAVERSE_PRE | TRAVERSE_PREFETCH_METADATA,
2733 2733 zdb_ddt_add_cb, &t);
2734 2734
2735 2735 spa_config_exit(spa, SCL_CONFIG, FTAG);
2736 2736
2737 2737 while ((zdde = avl_destroy_nodes(&t, &cookie)) != NULL) {
2738 2738 ddt_stat_t dds;
2739 2739 uint64_t refcnt = zdde->zdde_ref_blocks;
2740 2740 ASSERT(refcnt != 0);
2741 2741
|
↓ open down ↓ |
2707 lines elided |
↑ open up ↑ |
2742 2742 dds.dds_blocks = zdde->zdde_ref_blocks / refcnt;
2743 2743 dds.dds_lsize = zdde->zdde_ref_lsize / refcnt;
2744 2744 dds.dds_psize = zdde->zdde_ref_psize / refcnt;
2745 2745 dds.dds_dsize = zdde->zdde_ref_dsize / refcnt;
2746 2746
2747 2747 dds.dds_ref_blocks = zdde->zdde_ref_blocks;
2748 2748 dds.dds_ref_lsize = zdde->zdde_ref_lsize;
2749 2749 dds.dds_ref_psize = zdde->zdde_ref_psize;
2750 2750 dds.dds_ref_dsize = zdde->zdde_ref_dsize;
2751 2751
2752 - ddt_stat_add(&ddh_total.ddh_stat[highbit(refcnt) - 1], &dds, 0);
2752 + ddt_stat_add(&ddh_total.ddh_stat[highbit64(refcnt) - 1],
2753 + &dds, 0);
2753 2754
2754 2755 umem_free(zdde, sizeof (*zdde));
2755 2756 }
2756 2757
2757 2758 avl_destroy(&t);
2758 2759
2759 2760 ddt_histogram_stat(&dds_total, &ddh_total);
2760 2761
2761 2762 (void) printf("Simulated DDT histogram:\n");
2762 2763
2763 2764 zpool_dump_ddt(&dds_total, &ddh_total);
2764 2765
2765 2766 dump_dedup_ratio(&dds_total);
2766 2767 }
2767 2768
2768 2769 static void
2769 2770 dump_zpool(spa_t *spa)
2770 2771 {
2771 2772 dsl_pool_t *dp = spa_get_dsl(spa);
2772 2773 int rc = 0;
2773 2774
2774 2775 if (dump_opt['S']) {
2775 2776 dump_simulated_ddt(spa);
2776 2777 return;
2777 2778 }
2778 2779
2779 2780 if (!dump_opt['e'] && dump_opt['C'] > 1) {
2780 2781 (void) printf("\nCached configuration:\n");
2781 2782 dump_nvlist(spa->spa_config, 8);
2782 2783 }
2783 2784
2784 2785 if (dump_opt['C'])
2785 2786 dump_config(spa);
2786 2787
2787 2788 if (dump_opt['u'])
2788 2789 dump_uberblock(&spa->spa_uberblock, "\nUberblock:\n", "\n");
2789 2790
2790 2791 if (dump_opt['D'])
2791 2792 dump_all_ddts(spa);
2792 2793
2793 2794 if (dump_opt['d'] > 2 || dump_opt['m'])
2794 2795 dump_metaslabs(spa);
2795 2796
2796 2797 if (dump_opt['d'] || dump_opt['i']) {
2797 2798 dump_dir(dp->dp_meta_objset);
2798 2799 if (dump_opt['d'] >= 3) {
2799 2800 dump_bpobj(&spa->spa_deferred_bpobj,
2800 2801 "Deferred frees", 0);
2801 2802 if (spa_version(spa) >= SPA_VERSION_DEADLISTS) {
2802 2803 dump_bpobj(&spa->spa_dsl_pool->dp_free_bpobj,
2803 2804 "Pool snapshot frees", 0);
2804 2805 }
2805 2806
2806 2807 if (spa_feature_is_active(spa,
2807 2808 SPA_FEATURE_ASYNC_DESTROY)) {
2808 2809 dump_bptree(spa->spa_meta_objset,
2809 2810 spa->spa_dsl_pool->dp_bptree_obj,
2810 2811 "Pool dataset frees");
2811 2812 }
2812 2813 dump_dtl(spa->spa_root_vdev, 0);
2813 2814 }
2814 2815 (void) dmu_objset_find(spa_name(spa), dump_one_dir,
2815 2816 NULL, DS_FIND_SNAPSHOTS | DS_FIND_CHILDREN);
2816 2817 }
2817 2818 if (dump_opt['b'] || dump_opt['c'])
2818 2819 rc = dump_block_stats(spa);
2819 2820
2820 2821 if (rc == 0)
2821 2822 rc = verify_spacemap_refcounts(spa);
2822 2823
2823 2824 if (dump_opt['s'])
2824 2825 show_pool_stats(spa);
2825 2826
2826 2827 if (dump_opt['h'])
2827 2828 dump_history(spa);
2828 2829
2829 2830 if (rc != 0)
2830 2831 exit(rc);
2831 2832 }
2832 2833
2833 2834 #define ZDB_FLAG_CHECKSUM 0x0001
2834 2835 #define ZDB_FLAG_DECOMPRESS 0x0002
2835 2836 #define ZDB_FLAG_BSWAP 0x0004
2836 2837 #define ZDB_FLAG_GBH 0x0008
2837 2838 #define ZDB_FLAG_INDIRECT 0x0010
2838 2839 #define ZDB_FLAG_PHYS 0x0020
2839 2840 #define ZDB_FLAG_RAW 0x0040
2840 2841 #define ZDB_FLAG_PRINT_BLKPTR 0x0080
2841 2842
2842 2843 int flagbits[256];
2843 2844
2844 2845 static void
2845 2846 zdb_print_blkptr(blkptr_t *bp, int flags)
2846 2847 {
2847 2848 char blkbuf[BP_SPRINTF_LEN];
2848 2849
2849 2850 if (flags & ZDB_FLAG_BSWAP)
2850 2851 byteswap_uint64_array((void *)bp, sizeof (blkptr_t));
2851 2852
2852 2853 snprintf_blkptr(blkbuf, sizeof (blkbuf), bp);
2853 2854 (void) printf("%s\n", blkbuf);
2854 2855 }
2855 2856
2856 2857 static void
2857 2858 zdb_dump_indirect(blkptr_t *bp, int nbps, int flags)
2858 2859 {
2859 2860 int i;
2860 2861
2861 2862 for (i = 0; i < nbps; i++)
2862 2863 zdb_print_blkptr(&bp[i], flags);
2863 2864 }
2864 2865
2865 2866 static void
2866 2867 zdb_dump_gbh(void *buf, int flags)
2867 2868 {
2868 2869 zdb_dump_indirect((blkptr_t *)buf, SPA_GBH_NBLKPTRS, flags);
2869 2870 }
2870 2871
2871 2872 static void
2872 2873 zdb_dump_block_raw(void *buf, uint64_t size, int flags)
2873 2874 {
2874 2875 if (flags & ZDB_FLAG_BSWAP)
2875 2876 byteswap_uint64_array(buf, size);
2876 2877 (void) write(1, buf, size);
2877 2878 }
2878 2879
2879 2880 static void
2880 2881 zdb_dump_block(char *label, void *buf, uint64_t size, int flags)
2881 2882 {
2882 2883 uint64_t *d = (uint64_t *)buf;
2883 2884 int nwords = size / sizeof (uint64_t);
2884 2885 int do_bswap = !!(flags & ZDB_FLAG_BSWAP);
2885 2886 int i, j;
2886 2887 char *hdr, *c;
2887 2888
2888 2889
2889 2890 if (do_bswap)
2890 2891 hdr = " 7 6 5 4 3 2 1 0 f e d c b a 9 8";
2891 2892 else
2892 2893 hdr = " 0 1 2 3 4 5 6 7 8 9 a b c d e f";
2893 2894
2894 2895 (void) printf("\n%s\n%6s %s 0123456789abcdef\n", label, "", hdr);
2895 2896
2896 2897 for (i = 0; i < nwords; i += 2) {
2897 2898 (void) printf("%06llx: %016llx %016llx ",
2898 2899 (u_longlong_t)(i * sizeof (uint64_t)),
2899 2900 (u_longlong_t)(do_bswap ? BSWAP_64(d[i]) : d[i]),
2900 2901 (u_longlong_t)(do_bswap ? BSWAP_64(d[i + 1]) : d[i + 1]));
2901 2902
2902 2903 c = (char *)&d[i];
2903 2904 for (j = 0; j < 2 * sizeof (uint64_t); j++)
2904 2905 (void) printf("%c", isprint(c[j]) ? c[j] : '.');
2905 2906 (void) printf("\n");
2906 2907 }
2907 2908 }
2908 2909
2909 2910 /*
2910 2911 * There are two acceptable formats:
2911 2912 * leaf_name - For example: c1t0d0 or /tmp/ztest.0a
2912 2913 * child[.child]* - For example: 0.1.1
2913 2914 *
2914 2915 * The second form can be used to specify arbitrary vdevs anywhere
2915 2916 * in the heirarchy. For example, in a pool with a mirror of
2916 2917 * RAID-Zs, you can specify either RAID-Z vdev with 0.0 or 0.1 .
2917 2918 */
2918 2919 static vdev_t *
2919 2920 zdb_vdev_lookup(vdev_t *vdev, char *path)
2920 2921 {
2921 2922 char *s, *p, *q;
2922 2923 int i;
2923 2924
2924 2925 if (vdev == NULL)
2925 2926 return (NULL);
2926 2927
2927 2928 /* First, assume the x.x.x.x format */
2928 2929 i = (int)strtoul(path, &s, 10);
2929 2930 if (s == path || (s && *s != '.' && *s != '\0'))
2930 2931 goto name;
2931 2932 if (i < 0 || i >= vdev->vdev_children)
2932 2933 return (NULL);
2933 2934
2934 2935 vdev = vdev->vdev_child[i];
2935 2936 if (*s == '\0')
2936 2937 return (vdev);
2937 2938 return (zdb_vdev_lookup(vdev, s+1));
2938 2939
2939 2940 name:
2940 2941 for (i = 0; i < vdev->vdev_children; i++) {
2941 2942 vdev_t *vc = vdev->vdev_child[i];
2942 2943
2943 2944 if (vc->vdev_path == NULL) {
2944 2945 vc = zdb_vdev_lookup(vc, path);
2945 2946 if (vc == NULL)
2946 2947 continue;
2947 2948 else
2948 2949 return (vc);
2949 2950 }
2950 2951
2951 2952 p = strrchr(vc->vdev_path, '/');
2952 2953 p = p ? p + 1 : vc->vdev_path;
2953 2954 q = &vc->vdev_path[strlen(vc->vdev_path) - 2];
2954 2955
2955 2956 if (strcmp(vc->vdev_path, path) == 0)
2956 2957 return (vc);
2957 2958 if (strcmp(p, path) == 0)
2958 2959 return (vc);
2959 2960 if (strcmp(q, "s0") == 0 && strncmp(p, path, q - p) == 0)
2960 2961 return (vc);
2961 2962 }
2962 2963
2963 2964 return (NULL);
2964 2965 }
2965 2966
2966 2967 /*
2967 2968 * Read a block from a pool and print it out. The syntax of the
2968 2969 * block descriptor is:
2969 2970 *
2970 2971 * pool:vdev_specifier:offset:size[:flags]
2971 2972 *
2972 2973 * pool - The name of the pool you wish to read from
2973 2974 * vdev_specifier - Which vdev (see comment for zdb_vdev_lookup)
2974 2975 * offset - offset, in hex, in bytes
2975 2976 * size - Amount of data to read, in hex, in bytes
2976 2977 * flags - A string of characters specifying options
2977 2978 * b: Decode a blkptr at given offset within block
2978 2979 * *c: Calculate and display checksums
2979 2980 * d: Decompress data before dumping
2980 2981 * e: Byteswap data before dumping
2981 2982 * g: Display data as a gang block header
2982 2983 * i: Display as an indirect block
2983 2984 * p: Do I/O to physical offset
2984 2985 * r: Dump raw data to stdout
2985 2986 *
2986 2987 * * = not yet implemented
2987 2988 */
2988 2989 static void
2989 2990 zdb_read_block(char *thing, spa_t *spa)
2990 2991 {
2991 2992 blkptr_t blk, *bp = &blk;
2992 2993 dva_t *dva = bp->blk_dva;
2993 2994 int flags = 0;
2994 2995 uint64_t offset = 0, size = 0, psize = 0, lsize = 0, blkptr_offset = 0;
2995 2996 zio_t *zio;
2996 2997 vdev_t *vd;
2997 2998 void *pbuf, *lbuf, *buf;
2998 2999 char *s, *p, *dup, *vdev, *flagstr;
2999 3000 int i, error;
3000 3001
3001 3002 dup = strdup(thing);
3002 3003 s = strtok(dup, ":");
3003 3004 vdev = s ? s : "";
3004 3005 s = strtok(NULL, ":");
3005 3006 offset = strtoull(s ? s : "", NULL, 16);
3006 3007 s = strtok(NULL, ":");
3007 3008 size = strtoull(s ? s : "", NULL, 16);
3008 3009 s = strtok(NULL, ":");
3009 3010 flagstr = s ? s : "";
3010 3011
3011 3012 s = NULL;
3012 3013 if (size == 0)
3013 3014 s = "size must not be zero";
3014 3015 if (!IS_P2ALIGNED(size, DEV_BSIZE))
3015 3016 s = "size must be a multiple of sector size";
3016 3017 if (!IS_P2ALIGNED(offset, DEV_BSIZE))
3017 3018 s = "offset must be a multiple of sector size";
3018 3019 if (s) {
3019 3020 (void) printf("Invalid block specifier: %s - %s\n", thing, s);
3020 3021 free(dup);
3021 3022 return;
3022 3023 }
3023 3024
3024 3025 for (s = strtok(flagstr, ":"); s; s = strtok(NULL, ":")) {
3025 3026 for (i = 0; flagstr[i]; i++) {
3026 3027 int bit = flagbits[(uchar_t)flagstr[i]];
3027 3028
3028 3029 if (bit == 0) {
3029 3030 (void) printf("***Invalid flag: %c\n",
3030 3031 flagstr[i]);
3031 3032 continue;
3032 3033 }
3033 3034 flags |= bit;
3034 3035
3035 3036 /* If it's not something with an argument, keep going */
3036 3037 if ((bit & (ZDB_FLAG_CHECKSUM |
3037 3038 ZDB_FLAG_PRINT_BLKPTR)) == 0)
3038 3039 continue;
3039 3040
3040 3041 p = &flagstr[i + 1];
3041 3042 if (bit == ZDB_FLAG_PRINT_BLKPTR)
3042 3043 blkptr_offset = strtoull(p, &p, 16);
3043 3044 if (*p != ':' && *p != '\0') {
3044 3045 (void) printf("***Invalid flag arg: '%s'\n", s);
3045 3046 free(dup);
3046 3047 return;
3047 3048 }
3048 3049 }
3049 3050 }
3050 3051
3051 3052 vd = zdb_vdev_lookup(spa->spa_root_vdev, vdev);
3052 3053 if (vd == NULL) {
3053 3054 (void) printf("***Invalid vdev: %s\n", vdev);
3054 3055 free(dup);
3055 3056 return;
3056 3057 } else {
3057 3058 if (vd->vdev_path)
3058 3059 (void) fprintf(stderr, "Found vdev: %s\n",
3059 3060 vd->vdev_path);
3060 3061 else
3061 3062 (void) fprintf(stderr, "Found vdev type: %s\n",
3062 3063 vd->vdev_ops->vdev_op_type);
3063 3064 }
3064 3065
3065 3066 psize = size;
3066 3067 lsize = size;
3067 3068
3068 3069 pbuf = umem_alloc(SPA_MAXBLOCKSIZE, UMEM_NOFAIL);
3069 3070 lbuf = umem_alloc(SPA_MAXBLOCKSIZE, UMEM_NOFAIL);
3070 3071
3071 3072 BP_ZERO(bp);
3072 3073
3073 3074 DVA_SET_VDEV(&dva[0], vd->vdev_id);
3074 3075 DVA_SET_OFFSET(&dva[0], offset);
3075 3076 DVA_SET_GANG(&dva[0], !!(flags & ZDB_FLAG_GBH));
3076 3077 DVA_SET_ASIZE(&dva[0], vdev_psize_to_asize(vd, psize));
3077 3078
3078 3079 BP_SET_BIRTH(bp, TXG_INITIAL, TXG_INITIAL);
3079 3080
3080 3081 BP_SET_LSIZE(bp, lsize);
3081 3082 BP_SET_PSIZE(bp, psize);
3082 3083 BP_SET_COMPRESS(bp, ZIO_COMPRESS_OFF);
3083 3084 BP_SET_CHECKSUM(bp, ZIO_CHECKSUM_OFF);
3084 3085 BP_SET_TYPE(bp, DMU_OT_NONE);
3085 3086 BP_SET_LEVEL(bp, 0);
3086 3087 BP_SET_DEDUP(bp, 0);
3087 3088 BP_SET_BYTEORDER(bp, ZFS_HOST_BYTEORDER);
3088 3089
3089 3090 spa_config_enter(spa, SCL_STATE, FTAG, RW_READER);
3090 3091 zio = zio_root(spa, NULL, NULL, 0);
3091 3092
3092 3093 if (vd == vd->vdev_top) {
3093 3094 /*
3094 3095 * Treat this as a normal block read.
3095 3096 */
3096 3097 zio_nowait(zio_read(zio, spa, bp, pbuf, psize, NULL, NULL,
3097 3098 ZIO_PRIORITY_SYNC_READ,
3098 3099 ZIO_FLAG_CANFAIL | ZIO_FLAG_RAW, NULL));
3099 3100 } else {
3100 3101 /*
3101 3102 * Treat this as a vdev child I/O.
3102 3103 */
3103 3104 zio_nowait(zio_vdev_child_io(zio, bp, vd, offset, pbuf, psize,
3104 3105 ZIO_TYPE_READ, ZIO_PRIORITY_SYNC_READ,
3105 3106 ZIO_FLAG_DONT_CACHE | ZIO_FLAG_DONT_QUEUE |
3106 3107 ZIO_FLAG_DONT_PROPAGATE | ZIO_FLAG_DONT_RETRY |
3107 3108 ZIO_FLAG_CANFAIL | ZIO_FLAG_RAW, NULL, NULL));
3108 3109 }
3109 3110
3110 3111 error = zio_wait(zio);
3111 3112 spa_config_exit(spa, SCL_STATE, FTAG);
3112 3113
3113 3114 if (error) {
3114 3115 (void) printf("Read of %s failed, error: %d\n", thing, error);
3115 3116 goto out;
3116 3117 }
3117 3118
3118 3119 if (flags & ZDB_FLAG_DECOMPRESS) {
3119 3120 /*
3120 3121 * We don't know how the data was compressed, so just try
3121 3122 * every decompress function at every inflated blocksize.
3122 3123 */
3123 3124 enum zio_compress c;
3124 3125 void *pbuf2 = umem_alloc(SPA_MAXBLOCKSIZE, UMEM_NOFAIL);
3125 3126 void *lbuf2 = umem_alloc(SPA_MAXBLOCKSIZE, UMEM_NOFAIL);
3126 3127
3127 3128 bcopy(pbuf, pbuf2, psize);
3128 3129
3129 3130 VERIFY(random_get_pseudo_bytes((uint8_t *)pbuf + psize,
3130 3131 SPA_MAXBLOCKSIZE - psize) == 0);
3131 3132
3132 3133 VERIFY(random_get_pseudo_bytes((uint8_t *)pbuf2 + psize,
3133 3134 SPA_MAXBLOCKSIZE - psize) == 0);
3134 3135
3135 3136 for (lsize = SPA_MAXBLOCKSIZE; lsize > psize;
3136 3137 lsize -= SPA_MINBLOCKSIZE) {
3137 3138 for (c = 0; c < ZIO_COMPRESS_FUNCTIONS; c++) {
3138 3139 if (zio_decompress_data(c, pbuf, lbuf,
3139 3140 psize, lsize) == 0 &&
3140 3141 zio_decompress_data(c, pbuf2, lbuf2,
3141 3142 psize, lsize) == 0 &&
3142 3143 bcmp(lbuf, lbuf2, lsize) == 0)
3143 3144 break;
3144 3145 }
3145 3146 if (c != ZIO_COMPRESS_FUNCTIONS)
3146 3147 break;
3147 3148 lsize -= SPA_MINBLOCKSIZE;
3148 3149 }
3149 3150
3150 3151 umem_free(pbuf2, SPA_MAXBLOCKSIZE);
3151 3152 umem_free(lbuf2, SPA_MAXBLOCKSIZE);
3152 3153
3153 3154 if (lsize <= psize) {
3154 3155 (void) printf("Decompress of %s failed\n", thing);
3155 3156 goto out;
3156 3157 }
3157 3158 buf = lbuf;
3158 3159 size = lsize;
3159 3160 } else {
3160 3161 buf = pbuf;
3161 3162 size = psize;
3162 3163 }
3163 3164
3164 3165 if (flags & ZDB_FLAG_PRINT_BLKPTR)
3165 3166 zdb_print_blkptr((blkptr_t *)(void *)
3166 3167 ((uintptr_t)buf + (uintptr_t)blkptr_offset), flags);
3167 3168 else if (flags & ZDB_FLAG_RAW)
3168 3169 zdb_dump_block_raw(buf, size, flags);
3169 3170 else if (flags & ZDB_FLAG_INDIRECT)
3170 3171 zdb_dump_indirect((blkptr_t *)buf, size / sizeof (blkptr_t),
3171 3172 flags);
3172 3173 else if (flags & ZDB_FLAG_GBH)
3173 3174 zdb_dump_gbh(buf, flags);
3174 3175 else
3175 3176 zdb_dump_block(thing, buf, size, flags);
3176 3177
3177 3178 out:
3178 3179 umem_free(pbuf, SPA_MAXBLOCKSIZE);
3179 3180 umem_free(lbuf, SPA_MAXBLOCKSIZE);
3180 3181 free(dup);
3181 3182 }
3182 3183
3183 3184 static boolean_t
3184 3185 pool_match(nvlist_t *cfg, char *tgt)
3185 3186 {
3186 3187 uint64_t v, guid = strtoull(tgt, NULL, 0);
3187 3188 char *s;
3188 3189
3189 3190 if (guid != 0) {
3190 3191 if (nvlist_lookup_uint64(cfg, ZPOOL_CONFIG_POOL_GUID, &v) == 0)
3191 3192 return (v == guid);
3192 3193 } else {
3193 3194 if (nvlist_lookup_string(cfg, ZPOOL_CONFIG_POOL_NAME, &s) == 0)
3194 3195 return (strcmp(s, tgt) == 0);
3195 3196 }
3196 3197 return (B_FALSE);
3197 3198 }
3198 3199
3199 3200 static char *
3200 3201 find_zpool(char **target, nvlist_t **configp, int dirc, char **dirv)
3201 3202 {
3202 3203 nvlist_t *pools;
3203 3204 nvlist_t *match = NULL;
3204 3205 char *name = NULL;
3205 3206 char *sepp = NULL;
3206 3207 char sep;
3207 3208 int count = 0;
3208 3209 importargs_t args = { 0 };
3209 3210
3210 3211 args.paths = dirc;
3211 3212 args.path = dirv;
3212 3213 args.can_be_active = B_TRUE;
3213 3214
3214 3215 if ((sepp = strpbrk(*target, "/@")) != NULL) {
3215 3216 sep = *sepp;
3216 3217 *sepp = '\0';
3217 3218 }
3218 3219
3219 3220 pools = zpool_search_import(g_zfs, &args);
3220 3221
3221 3222 if (pools != NULL) {
3222 3223 nvpair_t *elem = NULL;
3223 3224 while ((elem = nvlist_next_nvpair(pools, elem)) != NULL) {
3224 3225 verify(nvpair_value_nvlist(elem, configp) == 0);
3225 3226 if (pool_match(*configp, *target)) {
3226 3227 count++;
3227 3228 if (match != NULL) {
3228 3229 /* print previously found config */
3229 3230 if (name != NULL) {
3230 3231 (void) printf("%s\n", name);
3231 3232 dump_nvlist(match, 8);
3232 3233 name = NULL;
3233 3234 }
3234 3235 (void) printf("%s\n",
3235 3236 nvpair_name(elem));
3236 3237 dump_nvlist(*configp, 8);
3237 3238 } else {
3238 3239 match = *configp;
3239 3240 name = nvpair_name(elem);
3240 3241 }
3241 3242 }
3242 3243 }
3243 3244 }
3244 3245 if (count > 1)
3245 3246 (void) fatal("\tMatched %d pools - use pool GUID "
3246 3247 "instead of pool name or \n"
3247 3248 "\tpool name part of a dataset name to select pool", count);
3248 3249
3249 3250 if (sepp)
3250 3251 *sepp = sep;
3251 3252 /*
3252 3253 * If pool GUID was specified for pool id, replace it with pool name
3253 3254 */
3254 3255 if (name && (strstr(*target, name) != *target)) {
3255 3256 int sz = 1 + strlen(name) + ((sepp) ? strlen(sepp) : 0);
3256 3257
3257 3258 *target = umem_alloc(sz, UMEM_NOFAIL);
3258 3259 (void) snprintf(*target, sz, "%s%s", name, sepp ? sepp : "");
3259 3260 }
3260 3261
3261 3262 *configp = name ? match : NULL;
3262 3263
3263 3264 return (name);
3264 3265 }
3265 3266
3266 3267 int
3267 3268 main(int argc, char **argv)
3268 3269 {
3269 3270 int i, c;
3270 3271 struct rlimit rl = { 1024, 1024 };
3271 3272 spa_t *spa = NULL;
3272 3273 objset_t *os = NULL;
3273 3274 int dump_all = 1;
3274 3275 int verbose = 0;
3275 3276 int error = 0;
3276 3277 char **searchdirs = NULL;
3277 3278 int nsearch = 0;
3278 3279 char *target;
3279 3280 nvlist_t *policy = NULL;
3280 3281 uint64_t max_txg = UINT64_MAX;
3281 3282 int rewind = ZPOOL_NEVER_REWIND;
3282 3283
3283 3284 (void) setrlimit(RLIMIT_NOFILE, &rl);
3284 3285 (void) enable_extended_FILE_stdio(-1, -1);
3285 3286
3286 3287 dprintf_setup(&argc, argv);
3287 3288
3288 3289 while ((c = getopt(argc, argv, "bcdhilmM:suCDRSAFLXevp:t:U:P")) != -1) {
3289 3290 switch (c) {
3290 3291 case 'b':
3291 3292 case 'c':
3292 3293 case 'd':
3293 3294 case 'h':
3294 3295 case 'i':
3295 3296 case 'l':
3296 3297 case 'm':
3297 3298 case 's':
3298 3299 case 'u':
3299 3300 case 'C':
3300 3301 case 'D':
3301 3302 case 'R':
3302 3303 case 'S':
3303 3304 dump_opt[c]++;
3304 3305 dump_all = 0;
3305 3306 break;
3306 3307 case 'A':
3307 3308 case 'F':
3308 3309 case 'L':
3309 3310 case 'X':
3310 3311 case 'e':
3311 3312 case 'P':
3312 3313 dump_opt[c]++;
3313 3314 break;
3314 3315 case 'v':
3315 3316 verbose++;
3316 3317 break;
3317 3318 case 'M':
3318 3319 max_inflight = strtoull(optarg, NULL, 0);
3319 3320 if (max_inflight == 0) {
3320 3321 (void) fprintf(stderr, "maximum number "
3321 3322 "of inflight I/Os must be greater "
3322 3323 "than 0\n");
3323 3324 usage();
3324 3325 }
3325 3326 break;
3326 3327 case 'p':
3327 3328 if (searchdirs == NULL) {
3328 3329 searchdirs = umem_alloc(sizeof (char *),
3329 3330 UMEM_NOFAIL);
3330 3331 } else {
3331 3332 char **tmp = umem_alloc((nsearch + 1) *
3332 3333 sizeof (char *), UMEM_NOFAIL);
3333 3334 bcopy(searchdirs, tmp, nsearch *
3334 3335 sizeof (char *));
3335 3336 umem_free(searchdirs,
3336 3337 nsearch * sizeof (char *));
3337 3338 searchdirs = tmp;
3338 3339 }
3339 3340 searchdirs[nsearch++] = optarg;
3340 3341 break;
3341 3342 case 't':
3342 3343 max_txg = strtoull(optarg, NULL, 0);
3343 3344 if (max_txg < TXG_INITIAL) {
3344 3345 (void) fprintf(stderr, "incorrect txg "
3345 3346 "specified: %s\n", optarg);
3346 3347 usage();
3347 3348 }
3348 3349 break;
3349 3350 case 'U':
3350 3351 spa_config_path = optarg;
3351 3352 break;
3352 3353 default:
3353 3354 usage();
3354 3355 break;
3355 3356 }
3356 3357 }
3357 3358
3358 3359 if (!dump_opt['e'] && searchdirs != NULL) {
3359 3360 (void) fprintf(stderr, "-p option requires use of -e\n");
3360 3361 usage();
3361 3362 }
3362 3363
3363 3364 kernel_init(FREAD);
3364 3365 g_zfs = libzfs_init();
3365 3366 ASSERT(g_zfs != NULL);
3366 3367
3367 3368 if (dump_all)
3368 3369 verbose = MAX(verbose, 1);
3369 3370
3370 3371 for (c = 0; c < 256; c++) {
3371 3372 if (dump_all && !strchr("elAFLRSXP", c))
3372 3373 dump_opt[c] = 1;
3373 3374 if (dump_opt[c])
3374 3375 dump_opt[c] += verbose;
3375 3376 }
3376 3377
3377 3378 aok = (dump_opt['A'] == 1) || (dump_opt['A'] > 2);
3378 3379 zfs_recover = (dump_opt['A'] > 1);
3379 3380
3380 3381 argc -= optind;
3381 3382 argv += optind;
3382 3383
3383 3384 if (argc < 2 && dump_opt['R'])
3384 3385 usage();
3385 3386 if (argc < 1) {
3386 3387 if (!dump_opt['e'] && dump_opt['C']) {
3387 3388 dump_cachefile(spa_config_path);
3388 3389 return (0);
3389 3390 }
3390 3391 usage();
3391 3392 }
3392 3393
3393 3394 if (dump_opt['l']) {
3394 3395 dump_label(argv[0]);
3395 3396 return (0);
3396 3397 }
3397 3398
3398 3399 if (dump_opt['X'] || dump_opt['F'])
3399 3400 rewind = ZPOOL_DO_REWIND |
3400 3401 (dump_opt['X'] ? ZPOOL_EXTREME_REWIND : 0);
3401 3402
3402 3403 if (nvlist_alloc(&policy, NV_UNIQUE_NAME_TYPE, 0) != 0 ||
3403 3404 nvlist_add_uint64(policy, ZPOOL_REWIND_REQUEST_TXG, max_txg) != 0 ||
3404 3405 nvlist_add_uint32(policy, ZPOOL_REWIND_REQUEST, rewind) != 0)
3405 3406 fatal("internal error: %s", strerror(ENOMEM));
3406 3407
3407 3408 error = 0;
3408 3409 target = argv[0];
3409 3410
3410 3411 if (dump_opt['e']) {
3411 3412 nvlist_t *cfg = NULL;
3412 3413 char *name = find_zpool(&target, &cfg, nsearch, searchdirs);
3413 3414
3414 3415 error = ENOENT;
3415 3416 if (name) {
3416 3417 if (dump_opt['C'] > 1) {
3417 3418 (void) printf("\nConfiguration for import:\n");
3418 3419 dump_nvlist(cfg, 8);
3419 3420 }
3420 3421 if (nvlist_add_nvlist(cfg,
3421 3422 ZPOOL_REWIND_POLICY, policy) != 0) {
3422 3423 fatal("can't open '%s': %s",
3423 3424 target, strerror(ENOMEM));
3424 3425 }
3425 3426 if ((error = spa_import(name, cfg, NULL,
3426 3427 ZFS_IMPORT_MISSING_LOG)) != 0) {
3427 3428 error = spa_import(name, cfg, NULL,
3428 3429 ZFS_IMPORT_VERBATIM);
3429 3430 }
3430 3431 }
3431 3432 }
3432 3433
3433 3434 if (error == 0) {
3434 3435 if (strpbrk(target, "/@") == NULL || dump_opt['R']) {
3435 3436 error = spa_open_rewind(target, &spa, FTAG, policy,
3436 3437 NULL);
3437 3438 if (error) {
3438 3439 /*
3439 3440 * If we're missing the log device then
3440 3441 * try opening the pool after clearing the
3441 3442 * log state.
3442 3443 */
3443 3444 mutex_enter(&spa_namespace_lock);
3444 3445 if ((spa = spa_lookup(target)) != NULL &&
3445 3446 spa->spa_log_state == SPA_LOG_MISSING) {
3446 3447 spa->spa_log_state = SPA_LOG_CLEAR;
3447 3448 error = 0;
3448 3449 }
3449 3450 mutex_exit(&spa_namespace_lock);
3450 3451
3451 3452 if (!error) {
3452 3453 error = spa_open_rewind(target, &spa,
3453 3454 FTAG, policy, NULL);
3454 3455 }
3455 3456 }
3456 3457 } else {
3457 3458 error = dmu_objset_own(target, DMU_OST_ANY,
3458 3459 B_TRUE, FTAG, &os);
3459 3460 }
3460 3461 }
3461 3462 nvlist_free(policy);
3462 3463
3463 3464 if (error)
3464 3465 fatal("can't open '%s': %s", target, strerror(error));
3465 3466
3466 3467 argv++;
3467 3468 argc--;
3468 3469 if (!dump_opt['R']) {
3469 3470 if (argc > 0) {
3470 3471 zopt_objects = argc;
3471 3472 zopt_object = calloc(zopt_objects, sizeof (uint64_t));
3472 3473 for (i = 0; i < zopt_objects; i++) {
3473 3474 errno = 0;
3474 3475 zopt_object[i] = strtoull(argv[i], NULL, 0);
3475 3476 if (zopt_object[i] == 0 && errno != 0)
3476 3477 fatal("bad number %s: %s",
3477 3478 argv[i], strerror(errno));
3478 3479 }
3479 3480 }
3480 3481 if (os != NULL) {
3481 3482 dump_dir(os);
3482 3483 } else if (zopt_objects > 0 && !dump_opt['m']) {
3483 3484 dump_dir(spa->spa_meta_objset);
3484 3485 } else {
3485 3486 dump_zpool(spa);
3486 3487 }
3487 3488 } else {
3488 3489 flagbits['b'] = ZDB_FLAG_PRINT_BLKPTR;
3489 3490 flagbits['c'] = ZDB_FLAG_CHECKSUM;
3490 3491 flagbits['d'] = ZDB_FLAG_DECOMPRESS;
3491 3492 flagbits['e'] = ZDB_FLAG_BSWAP;
3492 3493 flagbits['g'] = ZDB_FLAG_GBH;
3493 3494 flagbits['i'] = ZDB_FLAG_INDIRECT;
3494 3495 flagbits['p'] = ZDB_FLAG_PHYS;
3495 3496 flagbits['r'] = ZDB_FLAG_RAW;
3496 3497
3497 3498 for (i = 0; i < argc; i++)
3498 3499 zdb_read_block(argv[i], spa);
3499 3500 }
3500 3501
3501 3502 (os != NULL) ? dmu_objset_disown(os, FTAG) : spa_close(spa, FTAG);
3502 3503
3503 3504 fuid_table_destroy();
3504 3505 sa_loaded = B_FALSE;
3505 3506
3506 3507 libzfs_fini(g_zfs);
3507 3508 kernel_fini();
3508 3509
3509 3510 return (0);
3510 3511 }
|
↓ open down ↓ |
748 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX