1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
23 * Copyright 2019 Nexenta Systems, Inc.
24 * Copyright (c) 2011, 2016 by Delphix. All rights reserved.
25 * Copyright (c) 2017, Joyent, Inc. All rights reserved.
26 */
27
28 /* Portions Copyright 2010 Robert Milkowski */
29
30 #include <mdb/mdb_ctf.h>
31 #include <sys/zfs_context.h>
32 #include <sys/mdb_modapi.h>
33 #include <sys/dbuf.h>
34 #include <sys/dmu_objset.h>
35 #include <sys/dsl_dir.h>
36 #include <sys/dsl_pool.h>
37 #include <sys/metaslab_impl.h>
38 #include <sys/space_map.h>
39 #include <sys/list.h>
40 #include <sys/vdev_impl.h>
41 #include <sys/zap_leaf.h>
42 #include <sys/zap_impl.h>
43 #include <ctype.h>
44 #include <sys/zfs_acl.h>
45 #include <sys/sa_impl.h>
46 #include <sys/multilist.h>
47
48 #ifdef _KERNEL
49 #define ZFS_OBJ_NAME "zfs"
50 extern int64_t mdb_gethrtime(void);
51 #else
52 #define ZFS_OBJ_NAME "libzpool.so.1"
53 #endif
54
55 #define ZFS_STRUCT "struct " ZFS_OBJ_NAME "`"
56
57 #ifndef _KERNEL
58 int aok;
59 #endif
60
61 enum spa_flags {
62 SPA_FLAG_CONFIG = 1 << 0,
63 SPA_FLAG_VDEVS = 1 << 1,
64 SPA_FLAG_ERRORS = 1 << 2,
65 SPA_FLAG_METASLAB_GROUPS = 1 << 3,
66 SPA_FLAG_METASLABS = 1 << 4,
67 SPA_FLAG_HISTOGRAMS = 1 << 5
68 };
69
70 /*
71 * If any of these flags are set, call spa_vdevs in spa_print
72 */
73 #define SPA_FLAG_ALL_VDEV \
74 (SPA_FLAG_VDEVS | SPA_FLAG_ERRORS | SPA_FLAG_METASLAB_GROUPS | \
75 SPA_FLAG_METASLABS)
76
77 static int
78 getmember(uintptr_t addr, const char *type, mdb_ctf_id_t *idp,
79 const char *member, int len, void *buf)
80 {
81 mdb_ctf_id_t id;
82 ulong_t off;
83 char name[64];
84
85 if (idp == NULL) {
86 if (mdb_ctf_lookup_by_name(type, &id) == -1) {
87 mdb_warn("couldn't find type %s", type);
88 return (DCMD_ERR);
89 }
90 idp = &id;
91 } else {
92 type = name;
93 mdb_ctf_type_name(*idp, name, sizeof (name));
94 }
95
96 if (mdb_ctf_offsetof(*idp, member, &off) == -1) {
97 mdb_warn("couldn't find member %s of type %s\n", member, type);
98 return (DCMD_ERR);
99 }
100 if (off % 8 != 0) {
101 mdb_warn("member %s of type %s is unsupported bitfield",
102 member, type);
103 return (DCMD_ERR);
104 }
105 off /= 8;
106
107 if (mdb_vread(buf, len, addr + off) == -1) {
108 mdb_warn("failed to read %s from %s at %p",
109 member, type, addr + off);
110 return (DCMD_ERR);
111 }
112 /* mdb_warn("read %s from %s at %p+%llx\n", member, type, addr, off); */
113
114 return (0);
115 }
116
117 #define GETMEMB(addr, structname, member, dest) \
118 getmember(addr, ZFS_STRUCT structname, NULL, #member, \
119 sizeof (dest), &(dest))
120
121 #define GETMEMBID(addr, ctfid, member, dest) \
122 getmember(addr, NULL, ctfid, #member, sizeof (dest), &(dest))
123
124 static boolean_t
125 strisprint(const char *cp)
126 {
127 for (; *cp; cp++) {
128 if (!isprint(*cp))
129 return (B_FALSE);
130 }
131 return (B_TRUE);
132 }
133
134 #define NICENUM_BUFLEN 6
135
136 static int
137 snprintfrac(char *buf, int len,
138 uint64_t numerator, uint64_t denom, int frac_digits)
139 {
140 int mul = 1;
141 int whole, frac, i;
142
143 for (i = frac_digits; i; i--)
144 mul *= 10;
145 whole = numerator / denom;
146 frac = mul * numerator / denom - mul * whole;
147 return (mdb_snprintf(buf, len, "%u.%0*u", whole, frac_digits, frac));
148 }
149
150 static void
151 mdb_nicenum(uint64_t num, char *buf)
152 {
153 uint64_t n = num;
154 int index = 0;
155 char *u;
156
157 while (n >= 1024) {
158 n = (n + (1024 / 2)) / 1024; /* Round up or down */
159 index++;
160 }
161
162 u = &" \0K\0M\0G\0T\0P\0E\0"[index*2];
163
164 if (index == 0) {
165 (void) mdb_snprintf(buf, NICENUM_BUFLEN, "%llu",
166 (u_longlong_t)n);
167 } else if (n < 10 && (num & (num - 1)) != 0) {
168 (void) snprintfrac(buf, NICENUM_BUFLEN,
169 num, 1ULL << 10 * index, 2);
170 strcat(buf, u);
171 } else if (n < 100 && (num & (num - 1)) != 0) {
172 (void) snprintfrac(buf, NICENUM_BUFLEN,
173 num, 1ULL << 10 * index, 1);
174 strcat(buf, u);
175 } else {
176 (void) mdb_snprintf(buf, NICENUM_BUFLEN, "%llu%s",
177 (u_longlong_t)n, u);
178 }
179 }
180
181 static int verbose;
182
183 static int
184 freelist_walk_init(mdb_walk_state_t *wsp)
185 {
186 if (wsp->walk_addr == NULL) {
187 mdb_warn("must supply starting address\n");
188 return (WALK_ERR);
189 }
190
191 wsp->walk_data = 0; /* Index into the freelist */
192 return (WALK_NEXT);
193 }
194
195 static int
196 freelist_walk_step(mdb_walk_state_t *wsp)
197 {
198 uint64_t entry;
199 uintptr_t number = (uintptr_t)wsp->walk_data;
200 char *ddata[] = { "ALLOC", "FREE", "CONDENSE", "INVALID",
201 "INVALID", "INVALID", "INVALID", "INVALID" };
202 int mapshift = SPA_MINBLOCKSHIFT;
203
204 if (mdb_vread(&entry, sizeof (entry), wsp->walk_addr) == -1) {
205 mdb_warn("failed to read freelist entry %p", wsp->walk_addr);
206 return (WALK_DONE);
207 }
208 wsp->walk_addr += sizeof (entry);
209 wsp->walk_data = (void *)(number + 1);
210
211 if (SM_DEBUG_DECODE(entry)) {
212 mdb_printf("DEBUG: %3u %10s: txg=%llu pass=%llu\n",
213 number,
214 ddata[SM_DEBUG_ACTION_DECODE(entry)],
215 SM_DEBUG_TXG_DECODE(entry),
216 SM_DEBUG_SYNCPASS_DECODE(entry));
217 } else {
218 mdb_printf("Entry: %3u offsets=%08llx-%08llx type=%c "
219 "size=%06llx", number,
220 SM_OFFSET_DECODE(entry) << mapshift,
221 (SM_OFFSET_DECODE(entry) + SM_RUN_DECODE(entry)) <<
222 mapshift,
223 SM_TYPE_DECODE(entry) == SM_ALLOC ? 'A' : 'F',
224 SM_RUN_DECODE(entry) << mapshift);
225 if (verbose)
226 mdb_printf(" (raw=%012llx)\n", entry);
227 mdb_printf("\n");
228 }
229 return (WALK_NEXT);
230 }
231
232 static int
233 mdb_dsl_dir_name(uintptr_t addr, char *buf)
234 {
235 static int gotid;
236 static mdb_ctf_id_t dd_id;
237 uintptr_t dd_parent;
238 char dd_myname[ZFS_MAX_DATASET_NAME_LEN];
239
240 if (!gotid) {
241 if (mdb_ctf_lookup_by_name(ZFS_STRUCT "dsl_dir",
242 &dd_id) == -1) {
243 mdb_warn("couldn't find struct dsl_dir");
244 return (DCMD_ERR);
245 }
246 gotid = TRUE;
247 }
248 if (GETMEMBID(addr, &dd_id, dd_parent, dd_parent) ||
249 GETMEMBID(addr, &dd_id, dd_myname, dd_myname)) {
250 return (DCMD_ERR);
251 }
252
253 if (dd_parent) {
254 if (mdb_dsl_dir_name(dd_parent, buf))
255 return (DCMD_ERR);
256 strcat(buf, "/");
257 }
258
259 if (dd_myname[0])
260 strcat(buf, dd_myname);
261 else
262 strcat(buf, "???");
263
264 return (0);
265 }
266
267 static int
268 objset_name(uintptr_t addr, char *buf)
269 {
270 static int gotid;
271 static mdb_ctf_id_t os_id, ds_id;
272 uintptr_t os_dsl_dataset;
273 char ds_snapname[ZFS_MAX_DATASET_NAME_LEN];
274 uintptr_t ds_dir;
275
276 buf[0] = '\0';
277
278 if (!gotid) {
279 if (mdb_ctf_lookup_by_name(ZFS_STRUCT "objset",
280 &os_id) == -1) {
281 mdb_warn("couldn't find struct objset");
282 return (DCMD_ERR);
283 }
284 if (mdb_ctf_lookup_by_name(ZFS_STRUCT "dsl_dataset",
285 &ds_id) == -1) {
286 mdb_warn("couldn't find struct dsl_dataset");
287 return (DCMD_ERR);
288 }
289
290 gotid = TRUE;
291 }
292
293 if (GETMEMBID(addr, &os_id, os_dsl_dataset, os_dsl_dataset))
294 return (DCMD_ERR);
295
296 if (os_dsl_dataset == 0) {
297 strcat(buf, "mos");
298 return (0);
299 }
300
301 if (GETMEMBID(os_dsl_dataset, &ds_id, ds_snapname, ds_snapname) ||
302 GETMEMBID(os_dsl_dataset, &ds_id, ds_dir, ds_dir)) {
303 return (DCMD_ERR);
304 }
305
306 if (ds_dir && mdb_dsl_dir_name(ds_dir, buf))
307 return (DCMD_ERR);
308
309 if (ds_snapname[0]) {
310 strcat(buf, "@");
311 strcat(buf, ds_snapname);
312 }
313 return (0);
314 }
315
316 static int
317 enum_lookup(char *type, int val, const char *prefix, size_t size, char *out)
318 {
319 const char *cp;
320 size_t len = strlen(prefix);
321 mdb_ctf_id_t enum_type;
322
323 if (mdb_ctf_lookup_by_name(type, &enum_type) != 0) {
324 mdb_warn("Could not find enum for %s", type);
325 return (-1);
326 }
327
328 if ((cp = mdb_ctf_enum_name(enum_type, val)) != NULL) {
329 if (strncmp(cp, prefix, len) == 0)
330 cp += len;
331 (void) strncpy(out, cp, size);
332 } else {
333 mdb_snprintf(out, size, "? (%d)", val);
334 }
335 return (0);
336 }
337
338 /* ARGSUSED */
339 static int
340 zfs_params(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
341 {
342 /*
343 * This table can be approximately generated by running:
344 * egrep "^[a-z0-9_]+ [a-z0-9_]+( =.*)?;" *.c | cut -d ' ' -f 2
345 */
346 static const char *params[] = {
347 "arc_lotsfree_percent",
348 "arc_pages_pp_reserve",
349 "arc_reduce_dnlc_percent",
350 "arc_swapfs_reserve",
351 "arc_zio_arena_free_shift",
352 "dbuf_cache_hiwater_pct",
353 "dbuf_cache_lowater_pct",
354 "dbuf_cache_max_bytes",
355 "dbuf_cache_shift",
356 "dbuf_metadata_cache_max_bytes",
357 "dbuf_metadata_cache_overflow",
358 "dbuf_metadata_cache_shift",
359 "ddt_zap_indirect_blockshift",
360 "ddt_zap_leaf_blockshift",
361 "ditto_same_vdev_distance_shift",
362 "dmu_find_threads",
363 "dmu_rescan_dnode_threshold",
364 "dsl_scan_delay_completion",
365 "fzap_default_block_shift",
366 "krrp_debug",
367 "l2arc_feed_again",
368 "l2arc_feed_min_ms",
369 "l2arc_feed_secs",
370 "l2arc_headroom",
371 "l2arc_headroom_boost",
372 "l2arc_noprefetch",
373 "l2arc_norw",
374 "l2arc_rebuild_enabled",
375 "l2arc_write_boost",
376 "l2arc_write_max",
377 "last_free_memory",
378 "last_free_reason",
379 "metaslab_aliquot",
380 "metaslab_alloc_dva_algorithm",
381 "metaslab_bias_enabled",
382 "metaslab_debug_load",
383 "metaslab_debug_unload",
384 "metaslab_df_alloc_threshold",
385 "metaslab_df_free_pct",
386 "metaslab_fragmentation_factor_enabled",
387 "metaslab_gang_bang",
388 "metaslab_lba_weighting_enabled",
389 "metaslab_load_pct",
390 "metaslab_min_alloc_size",
391 "metaslab_ndf_clump_shift",
392 "metaslab_preload_enabled",
393 "metaslab_preload_limit",
394 "metaslab_trace_enabled",
395 "metaslab_trace_max_entries",
396 "metaslab_unload_delay",
397 "metaslabs_per_vdev",
398 "nms_worm_transition_time",
399 "rrw_tsd_key",
400 "send_holes_without_birth_time",
401 "spa_asize_inflation",
402 "spa_avg_stat_update_ticks",
403 "spa_load_verify_data",
404 "spa_load_verify_maxinflight",
405 "spa_load_verify_metadata",
406 "spa_max_replication_override",
407 "spa_min_latency_delta",
408 "spa_min_slop",
409 "spa_mode_global",
410 "spa_namespace_lock",
411 "spa_obj_mtx_sz",
412 "spa_rotor_load_adjusting",
413 "spa_rotor_use_weight",
414 "spa_slop_shift",
415 "spa_special_factor",
416 "spa_special_to_normal_delta",
417 "spa_special_to_normal_ratio",
418 "spa_static_routing_percentage",
419 "space_map_blksz",
420 "vdev_mirror_shift",
421 "vdev_raidz_default_to_general",
422 "wbc_arc_enabled",
423 "wbc_force_trigger",
424 "wbc_idle_delay_ms",
425 "wbc_max_move_tasks_count",
426 "wbc_min_move_tasks_count",
427 "wbc_mv_cancel_threshold_cap",
428 "wbc_mv_cancel_threshold_initial",
429 "wbc_mv_cancel_threshold_step",
430 "wbc_spa_util_high_wm",
431 "wbc_spa_util_low_wm",
432 "wbc_throttle_move_delay_ms",
433 "wbc_update_statistics_interval_ms",
434 "wbc_window_roll_delay_ms",
435 "zcr_blksz_max",
436 "zcr_blksz_min",
437 "zfs_abd_chunk_size",
438 "zfs_abd_scatter_enabled",
439 "zfs_arc_average_blocksize",
440 "zfs_arc_ddt_limit",
441 "zfs_arc_evict_batch_limit",
442 "zfs_arc_grow_retry",
443 "zfs_arc_max",
444 "zfs_arc_meta_limit",
445 "zfs_arc_meta_min",
446 "zfs_arc_min",
447 "zfs_arc_p_min_shift",
448 "zfs_arc_segregate_ddt",
449 "zfs_arc_shrink_shift",
450 "zfs_commit_timeout_pct",
451 "zfs_compressed_arc_enabled",
452 "zfs_condense_pct",
453 "zfs_dbgmsg_maxsize",
454 "zfs_dbgmsg_size",
455 "zfs_dbuf_evict_key",
456 "zfs_ddt_byte_ceiling",
457 "zfs_ddt_limit_type",
458 "zfs_ddts_msize",
459 "zfs_deadman_checktime_ms",
460 "zfs_deadman_enabled",
461 "zfs_deadman_synctime_ms",
462 "zfs_dedup_prefetch",
463 "zfs_default_bs",
464 "zfs_default_ibs",
465 "zfs_delay_max_ns",
466 "zfs_delay_min_dirty_percent",
467 "zfs_delay_resolution_ns",
468 "zfs_delay_scale",
469 "zfs_dequeue_run_bonus_ms",
470 "zfs_dirty_data_max",
471 "zfs_dirty_data_max_max",
472 "zfs_dirty_data_max_percent",
473 "zfs_dirty_data_sync",
474 "zfs_do_async_free",
475 "zfs_fastflush",
476 "zfs_flags",
477 "zfs_flush_ntasks",
478 "zfs_free_bpobj_enabled",
479 "zfs_free_leak_on_eio",
480 "zfs_free_max_blocks",
481 "zfs_free_min_time_ms",
482 "zfs_fsync_sync_cnt",
483 "zfs_fsyncer_key",
484 "zfs_immediate_write_sz",
485 "zfs_l2arc_async_evict",
486 "zfs_li",
487 "zfs_lua_check_instrlimit_interval",
488 "zfs_lua_max_instrlimit",
489 "zfs_lua_max_memlimit",
490 "zfs_max_recordsize",
491 "zfs_mdcomp_disable",
492 "zfs_metaslab_condense_block_threshold",
493 "zfs_metaslab_fragmentation_threshold",
494 "zfs_metaslab_segment_weight_enabled",
495 "zfs_metaslab_switch_threshold",
496 "zfs_mg_fragmentation_threshold",
497 "zfs_mg_noalloc_threshold",
498 "zfs_multilist_num_sublists",
499 "zfs_no_scrub_io",
500 "zfs_no_scrub_prefetch",
501 "zfs_nocacheflush",
502 "zfs_nopwrite_enabled",
503 "zfs_pd_bytes_max",
504 "zfs_per_txg_dirty_frees_percent",
505 "zfs_prefetch_disable",
506 "zfs_read_chunk_size",
507 "zfs_recover",
508 "zfs_recv_queue_length",
509 "zfs_redundant_metadata_most_ditto_level",
510 "zfs_resilver_min_time_ms",
511 "zfs_root_latency_alpha",
512 "zfs_scan_checkpoint_intval",
513 "zfs_scan_dequeue_min",
514 "zfs_scan_dequeue_run_target_ms",
515 "zfs_scan_direct",
516 "zfs_scan_fill_weight",
517 "zfs_scan_max_ext_gap",
518 "zfs_scan_mem_lim_fact",
519 "zfs_scan_mem_lim_min",
520 "zfs_scan_mem_lim_soft_fact",
521 "zfs_scan_mem_lim_soft_max",
522 "zfs_scan_min_time_ms",
523 "zfs_scrub_limit",
524 "zfs_send_corrupt_data",
525 "zfs_send_queue_length",
526 "zfs_send_set_freerecords_bit",
527 "zfs_send_timeout",
528 "zfs_share_lock",
529 "zfs_smartcomp_interval",
530 "zfs_smartcomp_interval_exp",
531 "zfs_smartcomp_threshold_factor",
532 "zfs_sync_pass_deferred_free",
533 "zfs_sync_pass_dont_compress",
534 "zfs_sync_pass_rewrite",
535 "zfs_sync_taskq_batch_pct",
536 "zfs_top_maxinflight",
537 "zfs_trim",
538 "zfs_trim_min_ext_sz",
539 "zfs_txg_timeout",
540 "zfs_vdev_aggregation_limit",
541 "zfs_vdev_async_read_max_active",
542 "zfs_vdev_async_read_min_active",
543 "zfs_vdev_async_write_active_max_dirty_percent",
544 "zfs_vdev_async_write_active_min_dirty_percent",
545 "zfs_vdev_async_write_max_active",
546 "zfs_vdev_async_write_min_active",
547 "zfs_vdev_cache_bshift",
548 "zfs_vdev_cache_max",
549 "zfs_vdev_cache_size",
550 "zfs_vdev_max_active",
551 "zfs_vdev_queue_depth_pct",
552 "zfs_vdev_read_gap_limit",
553 "zfs_vdev_resilver_max_active",
554 "zfs_vdev_resilver_min_active",
555 "zfs_vdev_scrub_max_active",
556 "zfs_vdev_scrub_min_active",
557 "zfs_vdev_sync_read_max_active",
558 "zfs_vdev_sync_read_min_active",
559 "zfs_vdev_sync_write_max_active",
560 "zfs_vdev_sync_write_min_active",
561 "zfs_vdev_write_gap_limit",
562 "zfs_vn_rele_max_tasks",
563 "zfs_vs_latency_alpha",
564 "zfs_wbc_data_max",
565 "zfs_wbc_schedtmo",
566 "zfs_write_implies_delete_child",
567 "zfs_zil_clean_taskq_maxalloc",
568 "zfs_zil_clean_taskq_minalloc",
569 "zfs_zil_clean_taskq_nthr_pct",
570 "zil_replay_disable",
571 "zil_slog_bulk",
572 "zio_buf_debug_limit",
573 "zio_dva_throttle_enabled",
574 "zio_faulty_vdev_delay_us",
575 "zio_faulty_vdev_enabled",
576 "zio_faulty_vdev_guid",
577 "zio_injection_enabled",
578 "zvol_immediate_write_sz",
579 "zvol_maxphys",
580 "zvol_unmap_enabled",
581 "zvol_unmap_sync_enabled",
582 };
583
584 for (int i = 0; i < sizeof (params) / sizeof (params[0]); i++) {
585 int sz;
586 uint64_t val64;
587 uint32_t *val32p = (uint32_t *)&val64;
588
589 sz = mdb_readvar(&val64, params[i]);
590 if (sz == 4) {
591 mdb_printf("%s = 0x%x\n", params[i], *val32p);
592 } else if (sz == 8) {
593 mdb_printf("%s = 0x%llx\n", params[i], val64);
594 } else {
595 mdb_warn("variable %s not found", params[i]);
596 }
597 }
598
599 return (DCMD_OK);
600 }
601
602 /* ARGSUSED */
603 static int
604 blkptr(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
605 {
606 char type[80], checksum[80], compress[80];
607 blkptr_t blk, *bp = &blk;
608 char buf[BP_SPRINTF_LEN];
609
610 if (mdb_vread(&blk, sizeof (blkptr_t), addr) == -1) {
611 mdb_warn("failed to read blkptr_t");
612 return (DCMD_ERR);
613 }
614
615 if (enum_lookup("enum dmu_object_type", BP_GET_TYPE(bp), "DMU_OT_",
616 sizeof (type), type) == -1 ||
617 enum_lookup("enum zio_checksum", BP_GET_CHECKSUM(bp),
618 "ZIO_CHECKSUM_", sizeof (checksum), checksum) == -1 ||
619 enum_lookup("enum zio_compress", BP_GET_COMPRESS(bp),
620 "ZIO_COMPRESS_", sizeof (compress), compress) == -1) {
621 mdb_warn("Could not find blkptr enumerated types");
622 return (DCMD_ERR);
623 }
624
625 SNPRINTF_BLKPTR(mdb_snprintf, '\n', buf, sizeof (buf), bp, type,
626 checksum, compress);
627
628 mdb_printf("%s\n", buf);
629
630 return (DCMD_OK);
631 }
632
633 typedef struct mdb_dmu_buf_impl {
634 struct {
635 uint64_t db_object;
636 uintptr_t db_data;
637 } db;
638 uintptr_t db_objset;
639 uint64_t db_level;
640 uint64_t db_blkid;
641 struct {
642 uint64_t rc_count;
643 } db_holds;
644 } mdb_dmu_buf_impl_t;
645
646 /* ARGSUSED */
647 static int
648 dbuf(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
649 {
650 mdb_dmu_buf_impl_t db;
651 char objectname[32];
652 char blkidname[32];
653 char path[ZFS_MAX_DATASET_NAME_LEN];
654 int ptr_width = (int)(sizeof (void *)) * 2;
655
656 if (DCMD_HDRSPEC(flags))
657 mdb_printf("%*s %8s %3s %9s %5s %s\n",
658 ptr_width, "addr", "object", "lvl", "blkid", "holds", "os");
659
660 if (mdb_ctf_vread(&db, ZFS_STRUCT "dmu_buf_impl", "mdb_dmu_buf_impl_t",
661 addr, 0) == -1)
662 return (DCMD_ERR);
663
664 if (db.db.db_object == DMU_META_DNODE_OBJECT)
665 (void) strcpy(objectname, "mdn");
666 else
667 (void) mdb_snprintf(objectname, sizeof (objectname), "%llx",
668 (u_longlong_t)db.db.db_object);
669
670 if (db.db_blkid == DMU_BONUS_BLKID)
671 (void) strcpy(blkidname, "bonus");
672 else
673 (void) mdb_snprintf(blkidname, sizeof (blkidname), "%llx",
674 (u_longlong_t)db.db_blkid);
675
676 if (objset_name(db.db_objset, path)) {
677 return (DCMD_ERR);
678 }
679
680 mdb_printf("%*p %8s %3u %9s %5llu %s\n", ptr_width, addr,
681 objectname, (int)db.db_level, blkidname,
682 db.db_holds.rc_count, path);
683
684 return (DCMD_OK);
685 }
686
687 /* ARGSUSED */
688 static int
689 dbuf_stats(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
690 {
691 #define HISTOSZ 32
692 uintptr_t dbp;
693 dmu_buf_impl_t db;
694 dbuf_hash_table_t ht;
695 uint64_t bucket, ndbufs;
696 uint64_t histo[HISTOSZ];
697 uint64_t histo2[HISTOSZ];
698 int i, maxidx;
699
700 if (mdb_readvar(&ht, "dbuf_hash_table") == -1) {
701 mdb_warn("failed to read 'dbuf_hash_table'");
702 return (DCMD_ERR);
703 }
704
705 for (i = 0; i < HISTOSZ; i++) {
706 histo[i] = 0;
707 histo2[i] = 0;
708 }
709
710 ndbufs = 0;
711 for (bucket = 0; bucket < ht.hash_table_mask+1; bucket++) {
712 int len;
713
714 if (mdb_vread(&dbp, sizeof (void *),
715 (uintptr_t)(ht.hash_table+bucket)) == -1) {
716 mdb_warn("failed to read hash bucket %u at %p",
717 bucket, ht.hash_table+bucket);
718 return (DCMD_ERR);
719 }
720
721 len = 0;
722 while (dbp != 0) {
723 if (mdb_vread(&db, sizeof (dmu_buf_impl_t),
724 dbp) == -1) {
725 mdb_warn("failed to read dbuf at %p", dbp);
726 return (DCMD_ERR);
727 }
728 dbp = (uintptr_t)db.db_hash_next;
729 for (i = MIN(len, HISTOSZ - 1); i >= 0; i--)
730 histo2[i]++;
731 len++;
732 ndbufs++;
733 }
734
735 if (len >= HISTOSZ)
736 len = HISTOSZ-1;
737 histo[len]++;
738 }
739
740 mdb_printf("hash table has %llu buckets, %llu dbufs "
741 "(avg %llu buckets/dbuf)\n",
742 ht.hash_table_mask+1, ndbufs,
743 (ht.hash_table_mask+1)/ndbufs);
744
745 mdb_printf("\n");
746 maxidx = 0;
747 for (i = 0; i < HISTOSZ; i++)
748 if (histo[i] > 0)
749 maxidx = i;
750 mdb_printf("hash chain length number of buckets\n");
751 for (i = 0; i <= maxidx; i++)
752 mdb_printf("%u %llu\n", i, histo[i]);
753
754 mdb_printf("\n");
755 maxidx = 0;
756 for (i = 0; i < HISTOSZ; i++)
757 if (histo2[i] > 0)
758 maxidx = i;
759 mdb_printf("hash chain depth number of dbufs\n");
760 for (i = 0; i <= maxidx; i++)
761 mdb_printf("%u or more %llu %llu%%\n",
762 i, histo2[i], histo2[i]*100/ndbufs);
763
764
765 return (DCMD_OK);
766 }
767
768 #define CHAIN_END 0xffff
769 /*
770 * ::zap_leaf [-v]
771 *
772 * Print a zap_leaf_phys_t, assumed to be 16k
773 */
774 /* ARGSUSED */
775 static int
776 zap_leaf(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
777 {
778 char buf[16*1024];
779 int verbose = B_FALSE;
780 int four = B_FALSE;
781 dmu_buf_t l_dbuf;
782 zap_leaf_t l;
783 zap_leaf_phys_t *zlp = (void *)buf;
784 int i;
785
786 if (mdb_getopts(argc, argv,
787 'v', MDB_OPT_SETBITS, TRUE, &verbose,
788 '4', MDB_OPT_SETBITS, TRUE, &four,
789 NULL) != argc)
790 return (DCMD_USAGE);
791
792 l_dbuf.db_data = zlp;
793 l.l_dbuf = &l_dbuf;
794 l.l_bs = 14; /* assume 16k blocks */
795 if (four)
796 l.l_bs = 12;
797
798 if (!(flags & DCMD_ADDRSPEC)) {
799 return (DCMD_USAGE);
800 }
801
802 if (mdb_vread(buf, sizeof (buf), addr) == -1) {
803 mdb_warn("failed to read zap_leaf_phys_t at %p", addr);
804 return (DCMD_ERR);
805 }
806
807 if (zlp->l_hdr.lh_block_type != ZBT_LEAF ||
808 zlp->l_hdr.lh_magic != ZAP_LEAF_MAGIC) {
809 mdb_warn("This does not appear to be a zap_leaf_phys_t");
810 return (DCMD_ERR);
811 }
812
813 mdb_printf("zap_leaf_phys_t at %p:\n", addr);
814 mdb_printf(" lh_prefix_len = %u\n", zlp->l_hdr.lh_prefix_len);
815 mdb_printf(" lh_prefix = %llx\n", zlp->l_hdr.lh_prefix);
816 mdb_printf(" lh_nentries = %u\n", zlp->l_hdr.lh_nentries);
817 mdb_printf(" lh_nfree = %u\n", zlp->l_hdr.lh_nfree,
818 zlp->l_hdr.lh_nfree * 100 / (ZAP_LEAF_NUMCHUNKS(&l)));
819 mdb_printf(" lh_freelist = %u\n", zlp->l_hdr.lh_freelist);
820 mdb_printf(" lh_flags = %x (%s)\n", zlp->l_hdr.lh_flags,
821 zlp->l_hdr.lh_flags & ZLF_ENTRIES_CDSORTED ?
822 "ENTRIES_CDSORTED" : "");
823
824 if (verbose) {
825 mdb_printf(" hash table:\n");
826 for (i = 0; i < ZAP_LEAF_HASH_NUMENTRIES(&l); i++) {
827 if (zlp->l_hash[i] != CHAIN_END)
828 mdb_printf(" %u: %u\n", i, zlp->l_hash[i]);
829 }
830 }
831
832 mdb_printf(" chunks:\n");
833 for (i = 0; i < ZAP_LEAF_NUMCHUNKS(&l); i++) {
834 /* LINTED: alignment */
835 zap_leaf_chunk_t *zlc = &ZAP_LEAF_CHUNK(&l, i);
836 switch (zlc->l_entry.le_type) {
837 case ZAP_CHUNK_FREE:
838 if (verbose) {
839 mdb_printf(" %u: free; lf_next = %u\n",
840 i, zlc->l_free.lf_next);
841 }
842 break;
843 case ZAP_CHUNK_ENTRY:
844 mdb_printf(" %u: entry\n", i);
845 if (verbose) {
846 mdb_printf(" le_next = %u\n",
847 zlc->l_entry.le_next);
848 }
849 mdb_printf(" le_name_chunk = %u\n",
850 zlc->l_entry.le_name_chunk);
851 mdb_printf(" le_name_numints = %u\n",
852 zlc->l_entry.le_name_numints);
853 mdb_printf(" le_value_chunk = %u\n",
854 zlc->l_entry.le_value_chunk);
855 mdb_printf(" le_value_intlen = %u\n",
856 zlc->l_entry.le_value_intlen);
857 mdb_printf(" le_value_numints = %u\n",
858 zlc->l_entry.le_value_numints);
859 mdb_printf(" le_cd = %u\n",
860 zlc->l_entry.le_cd);
861 mdb_printf(" le_hash = %llx\n",
862 zlc->l_entry.le_hash);
863 break;
864 case ZAP_CHUNK_ARRAY:
865 mdb_printf(" %u: array", i);
866 if (strisprint((char *)zlc->l_array.la_array))
867 mdb_printf(" \"%s\"", zlc->l_array.la_array);
868 mdb_printf("\n");
869 if (verbose) {
870 int j;
871 mdb_printf(" ");
872 for (j = 0; j < ZAP_LEAF_ARRAY_BYTES; j++) {
873 mdb_printf("%02x ",
874 zlc->l_array.la_array[j]);
875 }
876 mdb_printf("\n");
877 }
878 if (zlc->l_array.la_next != CHAIN_END) {
879 mdb_printf(" lf_next = %u\n",
880 zlc->l_array.la_next);
881 }
882 break;
883 default:
884 mdb_printf(" %u: undefined type %u\n",
885 zlc->l_entry.le_type);
886 }
887 }
888
889 return (DCMD_OK);
890 }
891
892 typedef struct dbufs_data {
893 mdb_ctf_id_t id;
894 uint64_t objset;
895 uint64_t object;
896 uint64_t level;
897 uint64_t blkid;
898 char *osname;
899 } dbufs_data_t;
900
901 #define DBUFS_UNSET (0xbaddcafedeadbeefULL)
902
903 /* ARGSUSED */
904 static int
905 dbufs_cb(uintptr_t addr, const void *unknown, void *arg)
906 {
907 dbufs_data_t *data = arg;
908 uintptr_t objset;
909 dmu_buf_t db;
910 uint8_t level;
911 uint64_t blkid;
912 char osname[ZFS_MAX_DATASET_NAME_LEN];
913
914 if (GETMEMBID(addr, &data->id, db_objset, objset) ||
915 GETMEMBID(addr, &data->id, db, db) ||
916 GETMEMBID(addr, &data->id, db_level, level) ||
917 GETMEMBID(addr, &data->id, db_blkid, blkid)) {
918 return (WALK_ERR);
919 }
920
921 if ((data->objset == DBUFS_UNSET || data->objset == objset) &&
922 (data->osname == NULL || (objset_name(objset, osname) == 0 &&
923 strcmp(data->osname, osname) == 0)) &&
924 (data->object == DBUFS_UNSET || data->object == db.db_object) &&
925 (data->level == DBUFS_UNSET || data->level == level) &&
926 (data->blkid == DBUFS_UNSET || data->blkid == blkid)) {
927 mdb_printf("%#lr\n", addr);
928 }
929 return (WALK_NEXT);
930 }
931
932 /* ARGSUSED */
933 static int
934 dbufs(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
935 {
936 dbufs_data_t data;
937 char *object = NULL;
938 char *blkid = NULL;
939
940 data.objset = data.object = data.level = data.blkid = DBUFS_UNSET;
941 data.osname = NULL;
942
943 if (mdb_getopts(argc, argv,
944 'O', MDB_OPT_UINT64, &data.objset,
945 'n', MDB_OPT_STR, &data.osname,
946 'o', MDB_OPT_STR, &object,
947 'l', MDB_OPT_UINT64, &data.level,
948 'b', MDB_OPT_STR, &blkid) != argc) {
949 return (DCMD_USAGE);
950 }
951
952 if (object) {
953 if (strcmp(object, "mdn") == 0) {
954 data.object = DMU_META_DNODE_OBJECT;
955 } else {
956 data.object = mdb_strtoull(object);
957 }
958 }
959
960 if (blkid) {
961 if (strcmp(blkid, "bonus") == 0) {
962 data.blkid = DMU_BONUS_BLKID;
963 } else {
964 data.blkid = mdb_strtoull(blkid);
965 }
966 }
967
968 if (mdb_ctf_lookup_by_name(ZFS_STRUCT "dmu_buf_impl", &data.id) == -1) {
969 mdb_warn("couldn't find struct dmu_buf_impl_t");
970 return (DCMD_ERR);
971 }
972
973 if (mdb_walk("dmu_buf_impl_t", dbufs_cb, &data) != 0) {
974 mdb_warn("can't walk dbufs");
975 return (DCMD_ERR);
976 }
977
978 return (DCMD_OK);
979 }
980
981 typedef struct abuf_find_data {
982 dva_t dva;
983 mdb_ctf_id_t id;
984 } abuf_find_data_t;
985
986 /* ARGSUSED */
987 static int
988 abuf_find_cb(uintptr_t addr, const void *unknown, void *arg)
989 {
990 abuf_find_data_t *data = arg;
991 dva_t dva;
992
993 if (GETMEMBID(addr, &data->id, b_dva, dva)) {
994 return (WALK_ERR);
995 }
996
997 if (dva.dva_word[0] == data->dva.dva_word[0] &&
998 dva.dva_word[1] == data->dva.dva_word[1]) {
999 mdb_printf("%#lr\n", addr);
1000 }
1001 return (WALK_NEXT);
1002 }
1003
1004 /* ARGSUSED */
1005 static int
1006 abuf_find(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
1007 {
1008 abuf_find_data_t data;
1009 GElf_Sym sym;
1010 int i;
1011 const char *syms[] = {
1012 "ARC_mru",
1013 "ARC_mru_ghost",
1014 "ARC_mfu",
1015 "ARC_mfu_ghost",
1016 };
1017
1018 if (argc != 2)
1019 return (DCMD_USAGE);
1020
1021 for (i = 0; i < 2; i ++) {
1022 switch (argv[i].a_type) {
1023 case MDB_TYPE_STRING:
1024 data.dva.dva_word[i] = mdb_strtoull(argv[i].a_un.a_str);
1025 break;
1026 case MDB_TYPE_IMMEDIATE:
1027 data.dva.dva_word[i] = argv[i].a_un.a_val;
1028 break;
1029 default:
1030 return (DCMD_USAGE);
1031 }
1032 }
1033
1034 if (mdb_ctf_lookup_by_name(ZFS_STRUCT "arc_buf_hdr", &data.id) == -1) {
1035 mdb_warn("couldn't find struct arc_buf_hdr");
1036 return (DCMD_ERR);
1037 }
1038
1039 for (i = 0; i < sizeof (syms) / sizeof (syms[0]); i++) {
1040 if (mdb_lookup_by_obj(ZFS_OBJ_NAME, syms[i], &sym)) {
1041 mdb_warn("can't find symbol %s", syms[i]);
1042 return (DCMD_ERR);
1043 }
1044
1045 if (mdb_pwalk("list", abuf_find_cb, &data, sym.st_value) != 0) {
1046 mdb_warn("can't walk %s", syms[i]);
1047 return (DCMD_ERR);
1048 }
1049 }
1050
1051 return (DCMD_OK);
1052 }
1053
1054
1055 typedef struct dbgmsg_arg {
1056 boolean_t da_verbose;
1057 boolean_t da_address;
1058 } dbgmsg_arg_t;
1059
1060 /* ARGSUSED */
1061 static int
1062 dbgmsg_cb(uintptr_t addr, const void *unknown, void *arg)
1063 {
1064 static mdb_ctf_id_t id;
1065 static boolean_t gotid;
1066 static ulong_t off;
1067
1068 dbgmsg_arg_t *da = arg;
1069 time_t timestamp;
1070 char buf[1024];
1071
1072 if (!gotid) {
1073 if (mdb_ctf_lookup_by_name(ZFS_STRUCT "zfs_dbgmsg", &id) ==
1074 -1) {
1075 mdb_warn("couldn't find struct zfs_dbgmsg");
1076 return (WALK_ERR);
1077 }
1078 gotid = TRUE;
1079 if (mdb_ctf_offsetof(id, "zdm_msg", &off) == -1) {
1080 mdb_warn("couldn't find zdm_msg");
1081 return (WALK_ERR);
1082 }
1083 off /= 8;
1084 }
1085
1086
1087 if (GETMEMBID(addr, &id, zdm_timestamp, timestamp)) {
1088 return (WALK_ERR);
1089 }
1090
1091 if (mdb_readstr(buf, sizeof (buf), addr + off) == -1) {
1092 mdb_warn("failed to read zdm_msg at %p\n", addr + off);
1093 return (DCMD_ERR);
1094 }
1095
1096 if (da->da_address)
1097 mdb_printf("%p ", addr);
1098 if (da->da_verbose)
1099 mdb_printf("%Y ", timestamp);
1100
1101 mdb_printf("%s\n", buf);
1102
1103 if (da->da_verbose)
1104 (void) mdb_call_dcmd("whatis", addr, DCMD_ADDRSPEC, 0, NULL);
1105
1106 return (WALK_NEXT);
1107 }
1108
1109 /* ARGSUSED */
1110 static int
1111 dbgmsg(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
1112 {
1113 GElf_Sym sym;
1114 dbgmsg_arg_t da = { 0 };
1115
1116 if (mdb_getopts(argc, argv,
1117 'v', MDB_OPT_SETBITS, B_TRUE, &da.da_verbose,
1118 'a', MDB_OPT_SETBITS, B_TRUE, &da.da_address,
1119 NULL) != argc)
1120 return (DCMD_USAGE);
1121
1122 if (mdb_lookup_by_obj(ZFS_OBJ_NAME, "zfs_dbgmsgs", &sym)) {
1123 mdb_warn("can't find zfs_dbgmsgs");
1124 return (DCMD_ERR);
1125 }
1126
1127 if (mdb_pwalk("list", dbgmsg_cb, &da, sym.st_value) != 0) {
1128 mdb_warn("can't walk zfs_dbgmsgs");
1129 return (DCMD_ERR);
1130 }
1131
1132 return (DCMD_OK);
1133 }
1134
1135 /*ARGSUSED*/
1136 static int
1137 arc_print(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
1138 {
1139 kstat_named_t *stats;
1140 GElf_Sym sym;
1141 int nstats, i;
1142 uint_t opt_a = FALSE;
1143 uint_t opt_b = FALSE;
1144 uint_t shift = 0;
1145 const char *suffix;
1146
1147 static const char *bytestats[] = {
1148 "p", "c", "c_min", "c_max", "size", "duplicate_buffers_size",
1149 "arc_meta_used", "arc_meta_limit", "arc_meta_max",
1150 "arc_meta_min", "hdr_size", "data_size", "metadata_size",
1151 "ddt_size", "other_size", "anon_size", "anon_evictable_data",
1152 "anon_evictable_metadata", "anon_evictable_ddt", "mru_size",
1153 "mru_evictable_data", "mru_evictable_metadata",
1154 "mru_evictable_ddt", "mru_ghost_size",
1155 "mru_ghost_evictable_data", "mru_ghost_evictable_metadata",
1156 "mru_ghost_evictable_ddt", "mfu_size", "mfu_evictable_data",
1157 "mfu_evictable_metadata", "mfu_evictable_ddt",
1158 "mfu_ghost_size", "mfu_ghost_evictable_data",
1159 "mfu_ghost_evictable_metadata", "mfu_ghost_evictable_ddt",
1160 "evict_l2_cached", "evict_l2_eligible", "evict_l2_ineligible",
1161 "l2_read_bytes", "l2_ddt_read_bytes", "l2_write_bytes",
1162 "l2_ddt_write_bytes", "l2_size", "l2_asize",
1163 "l2_hdr_size", "compressed_size", "uncompressed_size",
1164 "overhead_size",
1165 NULL
1166 };
1167
1168 static const char *extras[] = {
1169 "arc_no_grow", "arc_tempreserve",
1170 NULL
1171 };
1172
1173 if (mdb_lookup_by_obj(ZFS_OBJ_NAME, "arc_stats", &sym) == -1) {
1174 mdb_warn("failed to find 'arc_stats'");
1175 return (DCMD_ERR);
1176 }
1177
1178 stats = mdb_zalloc(sym.st_size, UM_SLEEP | UM_GC);
1179
1180 if (mdb_vread(stats, sym.st_size, sym.st_value) == -1) {
1181 mdb_warn("couldn't read 'arc_stats' at %p", sym.st_value);
1182 return (DCMD_ERR);
1183 }
1184
1185 nstats = sym.st_size / sizeof (kstat_named_t);
1186
1187 /* NB: -a / opt_a are ignored for backwards compatability */
1188 if (mdb_getopts(argc, argv,
1189 'a', MDB_OPT_SETBITS, TRUE, &opt_a,
1190 'b', MDB_OPT_SETBITS, TRUE, &opt_b,
1191 'k', MDB_OPT_SETBITS, 10, &shift,
1192 'm', MDB_OPT_SETBITS, 20, &shift,
1193 'g', MDB_OPT_SETBITS, 30, &shift,
1194 NULL) != argc)
1195 return (DCMD_USAGE);
1196
1197 if (!opt_b && !shift)
1198 shift = 20;
1199
1200 switch (shift) {
1201 case 0:
1202 suffix = "B";
1203 break;
1204 case 10:
1205 suffix = "KB";
1206 break;
1207 case 20:
1208 suffix = "MB";
1209 break;
1210 case 30:
1211 suffix = "GB";
1212 break;
1213 default:
1214 suffix = "XX";
1215 }
1216
1217 for (i = 0; i < nstats; i++) {
1218 int j;
1219 boolean_t bytes = B_FALSE;
1220
1221 for (j = 0; bytestats[j]; j++) {
1222 if (strcmp(stats[i].name, bytestats[j]) == 0) {
1223 bytes = B_TRUE;
1224 break;
1225 }
1226 }
1227
1228 if (bytes) {
1229 mdb_printf("%-25s = %9llu %s\n", stats[i].name,
1230 stats[i].value.ui64 >> shift, suffix);
1231 } else {
1232 mdb_printf("%-25s = %9llu\n", stats[i].name,
1233 stats[i].value.ui64);
1234 }
1235 }
1236
1237 for (i = 0; extras[i]; i++) {
1238 uint64_t buf;
1239
1240 if (mdb_lookup_by_obj(ZFS_OBJ_NAME, extras[i], &sym) == -1) {
1241 mdb_warn("failed to find '%s'", extras[i]);
1242 return (DCMD_ERR);
1243 }
1244
1245 if (sym.st_size != sizeof (uint64_t) &&
1246 sym.st_size != sizeof (uint32_t)) {
1247 mdb_warn("expected scalar for variable '%s'\n",
1248 extras[i]);
1249 return (DCMD_ERR);
1250 }
1251
1252 if (mdb_vread(&buf, sym.st_size, sym.st_value) == -1) {
1253 mdb_warn("couldn't read '%s'", extras[i]);
1254 return (DCMD_ERR);
1255 }
1256
1257 mdb_printf("%-25s = ", extras[i]);
1258
1259 /* NB: all the 64-bit extras happen to be byte counts */
1260 if (sym.st_size == sizeof (uint64_t))
1261 mdb_printf("%9llu %s\n", buf >> shift, suffix);
1262
1263 if (sym.st_size == sizeof (uint32_t))
1264 mdb_printf("%9d\n", *((uint32_t *)&buf));
1265 }
1266 return (DCMD_OK);
1267 }
1268
1269 typedef struct mdb_spa_print {
1270 pool_state_t spa_state;
1271 char spa_name[ZFS_MAX_DATASET_NAME_LEN];
1272 uintptr_t spa_normal_class;
1273 } mdb_spa_print_t;
1274
1275
1276 const char histo_stars[] = "****************************************";
1277 const int histo_width = sizeof (histo_stars) - 1;
1278
1279 static void
1280 dump_histogram(const uint64_t *histo, int size, int offset)
1281 {
1282 int i;
1283 int minidx = size - 1;
1284 int maxidx = 0;
1285 uint64_t max = 0;
1286
1287 for (i = 0; i < size; i++) {
1288 if (histo[i] > max)
1289 max = histo[i];
1290 if (histo[i] > 0 && i > maxidx)
1291 maxidx = i;
1292 if (histo[i] > 0 && i < minidx)
1293 minidx = i;
1294 }
1295
1296 if (max < histo_width)
1297 max = histo_width;
1298
1299 for (i = minidx; i <= maxidx; i++) {
1300 mdb_printf("%3u: %6llu %s\n",
1301 i + offset, (u_longlong_t)histo[i],
1302 &histo_stars[(max - histo[i]) * histo_width / max]);
1303 }
1304 }
1305
1306 typedef struct mdb_metaslab_class {
1307 uint64_t mc_histogram[RANGE_TREE_HISTOGRAM_SIZE];
1308 } mdb_metaslab_class_t;
1309
1310 /*
1311 * spa_class_histogram(uintptr_t class_addr)
1312 *
1313 * Prints free space histogram for a device class
1314 *
1315 * Returns DCMD_OK, or DCMD_ERR.
1316 */
1317 static int
1318 spa_class_histogram(uintptr_t class_addr)
1319 {
1320 mdb_metaslab_class_t mc;
1321 if (mdb_ctf_vread(&mc, "metaslab_class_t",
1322 "mdb_metaslab_class_t", class_addr, 0) == -1)
1323 return (DCMD_ERR);
1324
1325 mdb_inc_indent(4);
1326 dump_histogram(mc.mc_histogram, RANGE_TREE_HISTOGRAM_SIZE, 0);
1327 mdb_dec_indent(4);
1328 return (DCMD_OK);
1329 }
1330
1331 /*
1332 * ::spa
1333 *
1334 * -c Print configuration information as well
1335 * -v Print vdev state
1336 * -e Print vdev error stats
1337 * -m Print vdev metaslab info
1338 * -M print vdev metaslab group info
1339 * -h Print histogram info (must be combined with -m or -M)
1340 *
1341 * Print a summarized spa_t. When given no arguments, prints out a table of all
1342 * active pools on the system.
1343 */
1344 /* ARGSUSED */
1345 static int
1346 spa_print(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
1347 {
1348 const char *statetab[] = { "ACTIVE", "EXPORTED", "DESTROYED",
1349 "SPARE", "L2CACHE", "UNINIT", "UNAVAIL", "POTENTIAL" };
1350 const char *state;
1351 int spa_flags = 0;
1352
1353 if (mdb_getopts(argc, argv,
1354 'c', MDB_OPT_SETBITS, SPA_FLAG_CONFIG, &spa_flags,
1355 'v', MDB_OPT_SETBITS, SPA_FLAG_VDEVS, &spa_flags,
1356 'e', MDB_OPT_SETBITS, SPA_FLAG_ERRORS, &spa_flags,
1357 'M', MDB_OPT_SETBITS, SPA_FLAG_METASLAB_GROUPS, &spa_flags,
1358 'm', MDB_OPT_SETBITS, SPA_FLAG_METASLABS, &spa_flags,
1359 'h', MDB_OPT_SETBITS, SPA_FLAG_HISTOGRAMS, &spa_flags,
1360 NULL) != argc)
1361 return (DCMD_USAGE);
1362
1363 if (!(flags & DCMD_ADDRSPEC)) {
1364 if (mdb_walk_dcmd("spa", "spa", argc, argv) == -1) {
1365 mdb_warn("can't walk spa");
1366 return (DCMD_ERR);
1367 }
1368
1369 return (DCMD_OK);
1370 }
1371
1372 if (flags & DCMD_PIPE_OUT) {
1373 mdb_printf("%#lr\n", addr);
1374 return (DCMD_OK);
1375 }
1376
1377 if (DCMD_HDRSPEC(flags))
1378 mdb_printf("%<u>%-?s %9s %-*s%</u>\n", "ADDR", "STATE",
1379 sizeof (uintptr_t) == 4 ? 60 : 52, "NAME");
1380
1381 mdb_spa_print_t spa;
1382 if (mdb_ctf_vread(&spa, "spa_t", "mdb_spa_print_t", addr, 0) == -1)
1383 return (DCMD_ERR);
1384
1385 if (spa.spa_state < 0 || spa.spa_state > POOL_STATE_UNAVAIL)
1386 state = "UNKNOWN";
1387 else
1388 state = statetab[spa.spa_state];
1389
1390 mdb_printf("%0?p %9s %s\n", addr, state, spa.spa_name);
1391 if (spa_flags & SPA_FLAG_HISTOGRAMS)
1392 spa_class_histogram(spa.spa_normal_class);
1393
1394 if (spa_flags & SPA_FLAG_CONFIG) {
1395 mdb_printf("\n");
1396 mdb_inc_indent(4);
1397 if (mdb_call_dcmd("spa_config", addr, flags, 0,
1398 NULL) != DCMD_OK)
1399 return (DCMD_ERR);
1400 mdb_dec_indent(4);
1401 }
1402
1403 if (spa_flags & SPA_FLAG_ALL_VDEV) {
1404 mdb_arg_t v;
1405 char opts[100] = "-";
1406 int args =
1407 (spa_flags | SPA_FLAG_VDEVS) == SPA_FLAG_VDEVS ? 0 : 1;
1408
1409 if (spa_flags & SPA_FLAG_ERRORS)
1410 strcat(opts, "e");
1411 if (spa_flags & SPA_FLAG_METASLABS)
1412 strcat(opts, "m");
1413 if (spa_flags & SPA_FLAG_METASLAB_GROUPS)
1414 strcat(opts, "M");
1415 if (spa_flags & SPA_FLAG_HISTOGRAMS)
1416 strcat(opts, "h");
1417
1418 v.a_type = MDB_TYPE_STRING;
1419 v.a_un.a_str = opts;
1420
1421 mdb_printf("\n");
1422 mdb_inc_indent(4);
1423 if (mdb_call_dcmd("spa_vdevs", addr, flags, args,
1424 &v) != DCMD_OK)
1425 return (DCMD_ERR);
1426 mdb_dec_indent(4);
1427 }
1428
1429 return (DCMD_OK);
1430 }
1431
1432 typedef struct mdb_spa_config_spa {
1433 uintptr_t spa_config;
1434 } mdb_spa_config_spa_t;
1435
1436 /*
1437 * ::spa_config
1438 *
1439 * Given a spa_t, print the configuration information stored in spa_config.
1440 * Since it's just an nvlist, format it as an indented list of name=value pairs.
1441 * We simply read the value of spa_config and pass off to ::nvlist.
1442 */
1443 /* ARGSUSED */
1444 static int
1445 spa_print_config(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
1446 {
1447 mdb_spa_config_spa_t spa;
1448
1449 if (argc != 0 || !(flags & DCMD_ADDRSPEC))
1450 return (DCMD_USAGE);
1451
1452 if (mdb_ctf_vread(&spa, ZFS_STRUCT "spa", "mdb_spa_config_spa_t",
1453 addr, 0) == -1)
1454 return (DCMD_ERR);
1455
1456 if (spa.spa_config == 0) {
1457 mdb_printf("(none)\n");
1458 return (DCMD_OK);
1459 }
1460
1461 return (mdb_call_dcmd("nvlist", spa.spa_config, flags,
1462 0, NULL));
1463 }
1464
1465
1466
1467 typedef struct mdb_range_tree {
1468 uint64_t rt_space;
1469 } mdb_range_tree_t;
1470
1471 typedef struct mdb_metaslab_group {
1472 uint64_t mg_fragmentation;
1473 uint64_t mg_histogram[RANGE_TREE_HISTOGRAM_SIZE];
1474 uintptr_t mg_vd;
1475 } mdb_metaslab_group_t;
1476
1477 typedef struct mdb_metaslab {
1478 uint64_t ms_id;
1479 uint64_t ms_start;
1480 uint64_t ms_size;
1481 int64_t ms_deferspace;
1482 uint64_t ms_fragmentation;
1483 uint64_t ms_weight;
1484 uintptr_t ms_alloctree[TXG_SIZE];
1485 uintptr_t ms_freeingtree;
1486 uintptr_t ms_freedtree;
1487 uintptr_t ms_tree;
1488 uintptr_t ms_sm;
1489 } mdb_metaslab_t;
1490
1491 typedef struct mdb_space_map_phys_t {
1492 uint64_t smp_alloc;
1493 uint64_t smp_histogram[SPACE_MAP_HISTOGRAM_SIZE];
1494 } mdb_space_map_phys_t;
1495
1496 typedef struct mdb_space_map {
1497 uint64_t sm_size;
1498 uint8_t sm_shift;
1499 uint64_t sm_alloc;
1500 uintptr_t sm_phys;
1501 } mdb_space_map_t;
1502
1503 typedef struct mdb_vdev {
1504 uintptr_t vdev_path;
1505 uintptr_t vdev_ms;
1506 uintptr_t vdev_ops;
1507 uint64_t vdev_ms_count;
1508 uint64_t vdev_id;
1509 vdev_stat_t vdev_stat;
1510 } mdb_vdev_t;
1511
1512 typedef struct mdb_vdev_ops {
1513 char vdev_op_type[16];
1514 } mdb_vdev_ops_t;
1515
1516 static int
1517 metaslab_stats(uintptr_t addr, int spa_flags)
1518 {
1519 mdb_vdev_t vdev;
1520 uintptr_t *vdev_ms;
1521
1522 if (mdb_ctf_vread(&vdev, "vdev_t", "mdb_vdev_t",
1523 (uintptr_t)addr, 0) == -1) {
1524 mdb_warn("failed to read vdev at %p\n", addr);
1525 return (DCMD_ERR);
1526 }
1527
1528 mdb_inc_indent(4);
1529 mdb_printf("%<u>%-?s %6s %20s %10s %9s%</u>\n", "ADDR", "ID",
1530 "OFFSET", "FREE", "FRAGMENTATION");
1531
1532 vdev_ms = mdb_alloc(vdev.vdev_ms_count * sizeof (void *),
1533 UM_SLEEP | UM_GC);
1534 if (mdb_vread(vdev_ms, vdev.vdev_ms_count * sizeof (void *),
1535 (uintptr_t)vdev.vdev_ms) == -1) {
1536 mdb_warn("failed to read vdev_ms at %p\n", vdev.vdev_ms);
1537 return (DCMD_ERR);
1538 }
1539
1540 for (int m = 0; m < vdev.vdev_ms_count; m++) {
1541 mdb_metaslab_t ms;
1542 mdb_space_map_t sm = { 0 };
1543 char free[NICENUM_BUFLEN];
1544
1545 if (mdb_ctf_vread(&ms, "metaslab_t", "mdb_metaslab_t",
1546 (uintptr_t)vdev_ms[m], 0) == -1)
1547 return (DCMD_ERR);
1548
1549 if (ms.ms_sm != NULL &&
1550 mdb_ctf_vread(&sm, "space_map_t", "mdb_space_map_t",
1551 ms.ms_sm, 0) == -1)
1552 return (DCMD_ERR);
1553
1554 mdb_nicenum(ms.ms_size - sm.sm_alloc, free);
1555
1556 mdb_printf("%0?p %6llu %20llx %10s ", vdev_ms[m], ms.ms_id,
1557 ms.ms_start, free);
1558 if (ms.ms_fragmentation == ZFS_FRAG_INVALID)
1559 mdb_printf("%9s\n", "-");
1560 else
1561 mdb_printf("%9llu%%\n", ms.ms_fragmentation);
1562
1563 if ((spa_flags & SPA_FLAG_HISTOGRAMS) && ms.ms_sm != NULL) {
1564 mdb_space_map_phys_t smp;
1565
1566 if (sm.sm_phys == NULL)
1567 continue;
1568
1569 (void) mdb_ctf_vread(&smp, "space_map_phys_t",
1570 "mdb_space_map_phys_t", sm.sm_phys, 0);
1571
1572 dump_histogram(smp.smp_histogram,
1573 SPACE_MAP_HISTOGRAM_SIZE, sm.sm_shift);
1574 }
1575 }
1576 mdb_dec_indent(4);
1577 return (DCMD_OK);
1578 }
1579
1580 static int
1581 metaslab_group_stats(uintptr_t addr, int spa_flags)
1582 {
1583 mdb_metaslab_group_t mg;
1584 if (mdb_ctf_vread(&mg, "metaslab_group_t", "mdb_metaslab_group_t",
1585 (uintptr_t)addr, 0) == -1) {
1586 mdb_warn("failed to read vdev_mg at %p\n", addr);
1587 return (DCMD_ERR);
1588 }
1589
1590 mdb_inc_indent(4);
1591 mdb_printf("%<u>%-?s %15s%</u>\n", "ADDR", "FRAGMENTATION");
1592 if (mg.mg_fragmentation == ZFS_FRAG_INVALID)
1593 mdb_printf("%0?p %15s\n", addr, "-");
1594 else
1595 mdb_printf("%0?p %15llu%%\n", addr, mg.mg_fragmentation);
1596
1597 if (spa_flags & SPA_FLAG_HISTOGRAMS)
1598 dump_histogram(mg.mg_histogram, RANGE_TREE_HISTOGRAM_SIZE, 0);
1599 mdb_dec_indent(4);
1600 return (DCMD_OK);
1601 }
1602
1603 /*
1604 * ::vdev
1605 *
1606 * Print out a summarized vdev_t, in the following form:
1607 *
1608 * ADDR STATE AUX DESC
1609 * fffffffbcde23df0 HEALTHY - /dev/dsk/c0t0d0
1610 *
1611 * If '-r' is specified, recursively visit all children.
1612 *
1613 * With '-e', the statistics associated with the vdev are printed as well.
1614 */
1615 static int
1616 do_print_vdev(uintptr_t addr, int flags, int depth, boolean_t recursive,
1617 int spa_flags)
1618 {
1619 vdev_t vdev;
1620 char desc[MAXNAMELEN];
1621 int c, children;
1622 uintptr_t *child;
1623 const char *state, *aux;
1624
1625 if (mdb_vread(&vdev, sizeof (vdev), (uintptr_t)addr) == -1) {
1626 mdb_warn("failed to read vdev_t at %p\n", (uintptr_t)addr);
1627 return (DCMD_ERR);
1628 }
1629
1630 if (flags & DCMD_PIPE_OUT) {
1631 mdb_printf("%#lr\n", addr);
1632 } else {
1633 if (vdev.vdev_path != NULL) {
1634 if (mdb_readstr(desc, sizeof (desc),
1635 (uintptr_t)vdev.vdev_path) == -1) {
1636 mdb_warn("failed to read vdev_path at %p\n",
1637 vdev.vdev_path);
1638 return (DCMD_ERR);
1639 }
1640 } else if (vdev.vdev_ops != NULL) {
1641 vdev_ops_t ops;
1642 if (mdb_vread(&ops, sizeof (ops),
1643 (uintptr_t)vdev.vdev_ops) == -1) {
1644 mdb_warn("failed to read vdev_ops at %p\n",
1645 vdev.vdev_ops);
1646 return (DCMD_ERR);
1647 }
1648 (void) strcpy(desc, ops.vdev_op_type);
1649 } else {
1650 (void) strcpy(desc, "<unknown>");
1651 }
1652
1653 if (depth == 0 && DCMD_HDRSPEC(flags))
1654 mdb_printf("%<u>%-?s %-9s %-12s %-*s%</u>\n",
1655 "ADDR", "STATE", "AUX",
1656 sizeof (uintptr_t) == 4 ? 43 : 35,
1657 "DESCRIPTION");
1658
1659 mdb_printf("%0?p ", addr);
1660
1661 switch (vdev.vdev_state) {
1662 case VDEV_STATE_CLOSED:
1663 state = "CLOSED";
1664 break;
1665 case VDEV_STATE_OFFLINE:
1666 state = "OFFLINE";
1667 break;
1668 case VDEV_STATE_CANT_OPEN:
1669 state = "CANT_OPEN";
1670 break;
1671 case VDEV_STATE_DEGRADED:
1672 state = "DEGRADED";
1673 break;
1674 case VDEV_STATE_HEALTHY:
1675 state = "HEALTHY";
1676 break;
1677 case VDEV_STATE_REMOVED:
1678 state = "REMOVED";
1679 break;
1680 case VDEV_STATE_FAULTED:
1681 state = "FAULTED";
1682 break;
1683 default:
1684 state = "UNKNOWN";
1685 break;
1686 }
1687
1688 switch (vdev.vdev_stat.vs_aux) {
1689 case VDEV_AUX_NONE:
1690 aux = "-";
1691 break;
1692 case VDEV_AUX_OPEN_FAILED:
1693 aux = "OPEN_FAILED";
1694 break;
1695 case VDEV_AUX_CORRUPT_DATA:
1696 aux = "CORRUPT_DATA";
1697 break;
1698 case VDEV_AUX_NO_REPLICAS:
1699 aux = "NO_REPLICAS";
1700 break;
1701 case VDEV_AUX_BAD_GUID_SUM:
1702 aux = "BAD_GUID_SUM";
1703 break;
1704 case VDEV_AUX_TOO_SMALL:
1705 aux = "TOO_SMALL";
1706 break;
1707 case VDEV_AUX_BAD_LABEL:
1708 aux = "BAD_LABEL";
1709 break;
1710 case VDEV_AUX_VERSION_NEWER:
1711 aux = "VERS_NEWER";
1712 break;
1713 case VDEV_AUX_VERSION_OLDER:
1714 aux = "VERS_OLDER";
1715 break;
1716 case VDEV_AUX_UNSUP_FEAT:
1717 aux = "UNSUP_FEAT";
1718 break;
1719 case VDEV_AUX_SPARED:
1720 aux = "SPARED";
1721 break;
1722 case VDEV_AUX_ERR_EXCEEDED:
1723 aux = "ERR_EXCEEDED";
1724 break;
1725 case VDEV_AUX_IO_FAILURE:
1726 aux = "IO_FAILURE";
1727 break;
1728 case VDEV_AUX_BAD_LOG:
1729 aux = "BAD_LOG";
1730 break;
1731 case VDEV_AUX_EXTERNAL:
1732 aux = "EXTERNAL";
1733 break;
1734 case VDEV_AUX_SPLIT_POOL:
1735 aux = "SPLIT_POOL";
1736 break;
1737 default:
1738 aux = "UNKNOWN";
1739 break;
1740 }
1741
1742 mdb_printf("%-9s %-12s %*s%s\n", state, aux, depth, "", desc);
1743
1744 if (spa_flags & SPA_FLAG_ERRORS) {
1745 vdev_stat_t *vs = &vdev.vdev_stat;
1746 int i;
1747
1748 mdb_inc_indent(4);
1749 mdb_printf("\n");
1750 mdb_printf("%<u> %12s %12s %12s %12s "
1751 "%12s%</u>\n", "READ", "WRITE", "FREE", "CLAIM",
1752 "IOCTL");
1753 mdb_printf("OPS ");
1754 for (i = 1; i < ZIO_TYPES; i++)
1755 mdb_printf("%11#llx%s", vs->vs_ops[i],
1756 i == ZIO_TYPES - 1 ? "" : " ");
1757 mdb_printf("\n");
1758 mdb_printf("BYTES ");
1759 for (i = 1; i < ZIO_TYPES; i++)
1760 mdb_printf("%11#llx%s", vs->vs_bytes[i],
1761 i == ZIO_TYPES - 1 ? "" : " ");
1762
1763
1764 mdb_printf("\n");
1765 mdb_printf("EREAD %10#llx\n", vs->vs_read_errors);
1766 mdb_printf("EWRITE %10#llx\n", vs->vs_write_errors);
1767 mdb_printf("ECKSUM %10#llx\n",
1768 vs->vs_checksum_errors);
1769 mdb_dec_indent(4);
1770 mdb_printf("\n");
1771 }
1772
1773 if (spa_flags & SPA_FLAG_METASLAB_GROUPS &&
1774 vdev.vdev_mg != NULL) {
1775 metaslab_group_stats((uintptr_t)vdev.vdev_mg,
1776 spa_flags);
1777 }
1778 if (spa_flags & SPA_FLAG_METASLABS && vdev.vdev_ms != NULL) {
1779 metaslab_stats((uintptr_t)addr, spa_flags);
1780 }
1781 }
1782
1783 children = vdev.vdev_children;
1784
1785 if (children == 0 || !recursive)
1786 return (DCMD_OK);
1787
1788 child = mdb_alloc(children * sizeof (void *), UM_SLEEP | UM_GC);
1789 if (mdb_vread(child, children * sizeof (void *),
1790 (uintptr_t)vdev.vdev_child) == -1) {
1791 mdb_warn("failed to read vdev children at %p", vdev.vdev_child);
1792 return (DCMD_ERR);
1793 }
1794
1795 for (c = 0; c < children; c++) {
1796 if (do_print_vdev(child[c], flags, depth + 2, recursive,
1797 spa_flags)) {
1798 return (DCMD_ERR);
1799 }
1800 }
1801
1802 return (DCMD_OK);
1803 }
1804
1805 static int
1806 vdev_print(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
1807 {
1808 uint64_t depth = 0;
1809 boolean_t recursive = B_FALSE;
1810 int spa_flags = 0;
1811
1812 if (mdb_getopts(argc, argv,
1813 'e', MDB_OPT_SETBITS, SPA_FLAG_ERRORS, &spa_flags,
1814 'm', MDB_OPT_SETBITS, SPA_FLAG_METASLABS, &spa_flags,
1815 'M', MDB_OPT_SETBITS, SPA_FLAG_METASLAB_GROUPS, &spa_flags,
1816 'h', MDB_OPT_SETBITS, SPA_FLAG_HISTOGRAMS, &spa_flags,
1817 'r', MDB_OPT_SETBITS, TRUE, &recursive,
1818 'd', MDB_OPT_UINT64, &depth, NULL) != argc)
1819 return (DCMD_USAGE);
1820
1821 if (!(flags & DCMD_ADDRSPEC)) {
1822 mdb_warn("no vdev_t address given\n");
1823 return (DCMD_ERR);
1824 }
1825
1826 return (do_print_vdev(addr, flags, (int)depth, recursive, spa_flags));
1827 }
1828
1829 typedef struct mdb_metaslab_alloc_trace {
1830 uintptr_t mat_mg;
1831 uintptr_t mat_msp;
1832 uint64_t mat_size;
1833 uint64_t mat_weight;
1834 uint64_t mat_offset;
1835 uint32_t mat_dva_id;
1836 } mdb_metaslab_alloc_trace_t;
1837
1838 static void
1839 metaslab_print_weight(uint64_t weight)
1840 {
1841 char buf[100];
1842
1843 if (WEIGHT_IS_SPACEBASED(weight)) {
1844 mdb_nicenum(
1845 weight & ~(METASLAB_ACTIVE_MASK | METASLAB_WEIGHT_TYPE),
1846 buf);
1847 } else {
1848 char size[NICENUM_BUFLEN];
1849 mdb_nicenum(1ULL << WEIGHT_GET_INDEX(weight), size);
1850 (void) mdb_snprintf(buf, sizeof (buf), "%llu x %s",
1851 WEIGHT_GET_COUNT(weight), size);
1852 }
1853 mdb_printf("%11s ", buf);
1854 }
1855
1856 /* ARGSUSED */
1857 static int
1858 metaslab_weight(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
1859 {
1860 uint64_t weight = 0;
1861 char active;
1862
1863 if (argc == 0 && (flags & DCMD_ADDRSPEC)) {
1864 if (mdb_vread(&weight, sizeof (uint64_t), addr) == -1) {
1865 mdb_warn("failed to read weight at %p\n", addr);
1866 return (DCMD_ERR);
1867 }
1868 } else if (argc == 1 && !(flags & DCMD_ADDRSPEC)) {
1869 weight = (argv[0].a_type == MDB_TYPE_IMMEDIATE) ?
1870 argv[0].a_un.a_val : mdb_strtoull(argv[0].a_un.a_str);
1871 } else {
1872 return (DCMD_USAGE);
1873 }
1874
1875 if (DCMD_HDRSPEC(flags)) {
1876 mdb_printf("%<u>%-6s %9s %9s%</u>\n",
1877 "ACTIVE", "ALGORITHM", "WEIGHT");
1878 }
1879
1880 if (weight & METASLAB_WEIGHT_PRIMARY)
1881 active = 'P';
1882 else if (weight & METASLAB_WEIGHT_SECONDARY)
1883 active = 'S';
1884 else
1885 active = '-';
1886 mdb_printf("%6c %8s ", active,
1887 WEIGHT_IS_SPACEBASED(weight) ? "SPACE" : "SEGMENT");
1888 metaslab_print_weight(weight);
1889 mdb_printf("\n");
1890
1891 return (DCMD_OK);
1892 }
1893
1894 /* ARGSUSED */
1895 static int
1896 metaslab_trace(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
1897 {
1898 mdb_metaslab_alloc_trace_t mat;
1899 mdb_metaslab_group_t mg = { 0 };
1900 char result_type[100];
1901
1902 if (mdb_ctf_vread(&mat, "metaslab_alloc_trace_t",
1903 "mdb_metaslab_alloc_trace_t", addr, 0) == -1) {
1904 return (DCMD_ERR);
1905 }
1906
1907 if (!(flags & DCMD_PIPE_OUT) && DCMD_HDRSPEC(flags)) {
1908 mdb_printf("%<u>%6s %6s %8s %11s %18s %18s%</u>\n",
1909 "MSID", "DVA", "ASIZE", "WEIGHT", "RESULT", "VDEV");
1910 }
1911
1912 if (mat.mat_msp != NULL) {
1913 mdb_metaslab_t ms;
1914
1915 if (mdb_ctf_vread(&ms, "metaslab_t", "mdb_metaslab_t",
1916 mat.mat_msp, 0) == -1) {
1917 return (DCMD_ERR);
1918 }
1919 mdb_printf("%6llu ", ms.ms_id);
1920 } else {
1921 mdb_printf("%6s ", "-");
1922 }
1923
1924 mdb_printf("%6d %8llx ", mat.mat_dva_id, mat.mat_size);
1925
1926 metaslab_print_weight(mat.mat_weight);
1927
1928 if ((int64_t)mat.mat_offset < 0) {
1929 if (enum_lookup("enum trace_alloc_type", mat.mat_offset,
1930 "TRACE_", sizeof (result_type), result_type) == -1) {
1931 mdb_warn("Could not find enum for trace_alloc_type");
1932 return (DCMD_ERR);
1933 }
1934 mdb_printf("%18s ", result_type);
1935 } else {
1936 mdb_printf("%<b>%18llx%</b> ", mat.mat_offset);
1937 }
1938
1939 if (mat.mat_mg != NULL &&
1940 mdb_ctf_vread(&mg, "metaslab_group_t", "mdb_metaslab_group_t",
1941 mat.mat_mg, 0) == -1) {
1942 return (DCMD_ERR);
1943 }
1944
1945 if (mg.mg_vd != NULL) {
1946 mdb_vdev_t vdev;
1947 char desc[MAXNAMELEN];
1948
1949 if (mdb_ctf_vread(&vdev, "vdev_t", "mdb_vdev_t",
1950 mg.mg_vd, 0) == -1) {
1951 return (DCMD_ERR);
1952 }
1953
1954 if (vdev.vdev_path != NULL) {
1955 char path[MAXNAMELEN];
1956
1957 if (mdb_readstr(path, sizeof (path),
1958 vdev.vdev_path) == -1) {
1959 mdb_warn("failed to read vdev_path at %p\n",
1960 vdev.vdev_path);
1961 return (DCMD_ERR);
1962 }
1963 char *slash;
1964 if ((slash = strrchr(path, '/')) != NULL) {
1965 strcpy(desc, slash + 1);
1966 } else {
1967 strcpy(desc, path);
1968 }
1969 } else if (vdev.vdev_ops != NULL) {
1970 mdb_vdev_ops_t ops;
1971 if (mdb_ctf_vread(&ops, "vdev_ops_t", "mdb_vdev_ops_t",
1972 vdev.vdev_ops, 0) == -1) {
1973 mdb_warn("failed to read vdev_ops at %p\n",
1974 vdev.vdev_ops);
1975 return (DCMD_ERR);
1976 }
1977 (void) mdb_snprintf(desc, sizeof (desc),
1978 "%s-%llu", ops.vdev_op_type, vdev.vdev_id);
1979 } else {
1980 (void) strcpy(desc, "<unknown>");
1981 }
1982 mdb_printf("%18s\n", desc);
1983 }
1984
1985 return (DCMD_OK);
1986 }
1987
1988 typedef struct metaslab_walk_data {
1989 uint64_t mw_numvdevs;
1990 uintptr_t *mw_vdevs;
1991 int mw_curvdev;
1992 uint64_t mw_nummss;
1993 uintptr_t *mw_mss;
1994 int mw_curms;
1995 } metaslab_walk_data_t;
1996
1997 static int
1998 metaslab_walk_step(mdb_walk_state_t *wsp)
1999 {
2000 metaslab_walk_data_t *mw = wsp->walk_data;
2001 metaslab_t ms;
2002 uintptr_t msp;
2003
2004 if (mw->mw_curvdev >= mw->mw_numvdevs)
2005 return (WALK_DONE);
2006
2007 if (mw->mw_mss == NULL) {
2008 uintptr_t mssp;
2009 uintptr_t vdevp;
2010
2011 ASSERT(mw->mw_curms == 0);
2012 ASSERT(mw->mw_nummss == 0);
2013
2014 vdevp = mw->mw_vdevs[mw->mw_curvdev];
2015 if (GETMEMB(vdevp, "vdev", vdev_ms, mssp) ||
2016 GETMEMB(vdevp, "vdev", vdev_ms_count, mw->mw_nummss)) {
2017 return (WALK_ERR);
2018 }
2019
2020 mw->mw_mss = mdb_alloc(mw->mw_nummss * sizeof (void*),
2021 UM_SLEEP | UM_GC);
2022 if (mdb_vread(mw->mw_mss, mw->mw_nummss * sizeof (void*),
2023 mssp) == -1) {
2024 mdb_warn("failed to read vdev_ms at %p", mssp);
2025 return (WALK_ERR);
2026 }
2027 }
2028
2029 if (mw->mw_curms >= mw->mw_nummss) {
2030 mw->mw_mss = NULL;
2031 mw->mw_curms = 0;
2032 mw->mw_nummss = 0;
2033 mw->mw_curvdev++;
2034 return (WALK_NEXT);
2035 }
2036
2037 msp = mw->mw_mss[mw->mw_curms];
2038 if (mdb_vread(&ms, sizeof (metaslab_t), msp) == -1) {
2039 mdb_warn("failed to read metaslab_t at %p", msp);
2040 return (WALK_ERR);
2041 }
2042
2043 mw->mw_curms++;
2044
2045 return (wsp->walk_callback(msp, &ms, wsp->walk_cbdata));
2046 }
2047
2048 static int
2049 metaslab_walk_init(mdb_walk_state_t *wsp)
2050 {
2051 metaslab_walk_data_t *mw;
2052 uintptr_t root_vdevp;
2053 uintptr_t childp;
2054
2055 if (wsp->walk_addr == NULL) {
2056 mdb_warn("must supply address of spa_t\n");
2057 return (WALK_ERR);
2058 }
2059
2060 mw = mdb_zalloc(sizeof (metaslab_walk_data_t), UM_SLEEP | UM_GC);
2061
2062 if (GETMEMB(wsp->walk_addr, "spa", spa_root_vdev, root_vdevp) ||
2063 GETMEMB(root_vdevp, "vdev", vdev_children, mw->mw_numvdevs) ||
2064 GETMEMB(root_vdevp, "vdev", vdev_child, childp)) {
2065 return (DCMD_ERR);
2066 }
2067
2068 mw->mw_vdevs = mdb_alloc(mw->mw_numvdevs * sizeof (void *),
2069 UM_SLEEP | UM_GC);
2070 if (mdb_vread(mw->mw_vdevs, mw->mw_numvdevs * sizeof (void *),
2071 childp) == -1) {
2072 mdb_warn("failed to read root vdev children at %p", childp);
2073 return (DCMD_ERR);
2074 }
2075
2076 wsp->walk_data = mw;
2077
2078 return (WALK_NEXT);
2079 }
2080
2081 typedef struct mdb_spa {
2082 uintptr_t spa_dsl_pool;
2083 uintptr_t spa_root_vdev;
2084 } mdb_spa_t;
2085
2086 typedef struct mdb_dsl_pool {
2087 uintptr_t dp_root_dir;
2088 } mdb_dsl_pool_t;
2089
2090 typedef struct mdb_dsl_dir {
2091 uintptr_t dd_dbuf;
2092 int64_t dd_space_towrite[TXG_SIZE];
2093 } mdb_dsl_dir_t;
2094
2095 typedef struct mdb_dsl_dir_phys {
2096 uint64_t dd_used_bytes;
2097 uint64_t dd_compressed_bytes;
2098 uint64_t dd_uncompressed_bytes;
2099 } mdb_dsl_dir_phys_t;
2100
2101 typedef struct space_data {
2102 uint64_t ms_alloctree[TXG_SIZE];
2103 uint64_t ms_freeingtree;
2104 uint64_t ms_freedtree;
2105 uint64_t ms_tree;
2106 int64_t ms_deferspace;
2107 uint64_t avail;
2108 uint64_t nowavail;
2109 } space_data_t;
2110
2111 /* ARGSUSED */
2112 static int
2113 space_cb(uintptr_t addr, const void *unknown, void *arg)
2114 {
2115 space_data_t *sd = arg;
2116 mdb_metaslab_t ms;
2117 mdb_range_tree_t rt;
2118 mdb_space_map_t sm = { 0 };
2119 mdb_space_map_phys_t smp = { 0 };
2120 int i;
2121
2122 if (mdb_ctf_vread(&ms, "metaslab_t", "mdb_metaslab_t",
2123 addr, 0) == -1)
2124 return (WALK_ERR);
2125
2126 for (i = 0; i < TXG_SIZE; i++) {
2127 if (mdb_ctf_vread(&rt, "range_tree_t",
2128 "mdb_range_tree_t", ms.ms_alloctree[i], 0) == -1)
2129 return (WALK_ERR);
2130
2131 sd->ms_alloctree[i] += rt.rt_space;
2132
2133 }
2134
2135 if (mdb_ctf_vread(&rt, "range_tree_t",
2136 "mdb_range_tree_t", ms.ms_freeingtree, 0) == -1)
2137 return (WALK_ERR);
2138 sd->ms_freeingtree += rt.rt_space;
2139
2140 if (mdb_ctf_vread(&rt, "range_tree_t",
2141 "mdb_range_tree_t", ms.ms_freedtree, 0) == -1)
2142 return (WALK_ERR);
2143 sd->ms_freedtree += rt.rt_space;
2144
2145 if (mdb_ctf_vread(&rt, "range_tree_t",
2146 "mdb_range_tree_t", ms.ms_tree, 0) == -1)
2147 return (WALK_ERR);
2148 sd->ms_tree += rt.rt_space;
2149
2150 if (ms.ms_sm != NULL &&
2151 mdb_ctf_vread(&sm, "space_map_t",
2152 "mdb_space_map_t", ms.ms_sm, 0) == -1)
2153 return (WALK_ERR);
2154
2155 if (sm.sm_phys != NULL) {
2156 (void) mdb_ctf_vread(&smp, "space_map_phys_t",
2157 "mdb_space_map_phys_t", sm.sm_phys, 0);
2158 }
2159
2160 sd->ms_deferspace += ms.ms_deferspace;
2161 sd->avail += sm.sm_size - sm.sm_alloc;
2162 sd->nowavail += sm.sm_size - smp.smp_alloc;
2163
2164 return (WALK_NEXT);
2165 }
2166
2167 /*
2168 * ::spa_space [-b]
2169 *
2170 * Given a spa_t, print out it's on-disk space usage and in-core
2171 * estimates of future usage. If -b is given, print space in bytes.
2172 * Otherwise print in megabytes.
2173 */
2174 /* ARGSUSED */
2175 static int
2176 spa_space(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
2177 {
2178 mdb_spa_t spa;
2179 mdb_dsl_pool_t dp;
2180 mdb_dsl_dir_t dd;
2181 mdb_dmu_buf_impl_t db;
2182 mdb_dsl_dir_phys_t dsp;
2183 space_data_t sd;
2184 int shift = 20;
2185 char *suffix = "M";
2186 int bytes = B_FALSE;
2187
2188 if (mdb_getopts(argc, argv, 'b', MDB_OPT_SETBITS, TRUE, &bytes, NULL) !=
2189 argc)
2190 return (DCMD_USAGE);
2191 if (!(flags & DCMD_ADDRSPEC))
2192 return (DCMD_USAGE);
2193
2194 if (bytes) {
2195 shift = 0;
2196 suffix = "";
2197 }
2198
2199 if (mdb_ctf_vread(&spa, ZFS_STRUCT "spa", "mdb_spa_t",
2200 addr, 0) == -1 ||
2201 mdb_ctf_vread(&dp, ZFS_STRUCT "dsl_pool", "mdb_dsl_pool_t",
2202 spa.spa_dsl_pool, 0) == -1 ||
2203 mdb_ctf_vread(&dd, ZFS_STRUCT "dsl_dir", "mdb_dsl_dir_t",
2204 dp.dp_root_dir, 0) == -1 ||
2205 mdb_ctf_vread(&db, ZFS_STRUCT "dmu_buf_impl", "mdb_dmu_buf_impl_t",
2206 dd.dd_dbuf, 0) == -1 ||
2207 mdb_ctf_vread(&dsp, ZFS_STRUCT "dsl_dir_phys",
2208 "mdb_dsl_dir_phys_t", db.db.db_data, 0) == -1) {
2209 return (DCMD_ERR);
2210 }
2211
2212 mdb_printf("dd_space_towrite = %llu%s %llu%s %llu%s %llu%s\n",
2213 dd.dd_space_towrite[0] >> shift, suffix,
2214 dd.dd_space_towrite[1] >> shift, suffix,
2215 dd.dd_space_towrite[2] >> shift, suffix,
2216 dd.dd_space_towrite[3] >> shift, suffix);
2217
2218 mdb_printf("dd_phys.dd_used_bytes = %llu%s\n",
2219 dsp.dd_used_bytes >> shift, suffix);
2220 mdb_printf("dd_phys.dd_compressed_bytes = %llu%s\n",
2221 dsp.dd_compressed_bytes >> shift, suffix);
2222 mdb_printf("dd_phys.dd_uncompressed_bytes = %llu%s\n",
2223 dsp.dd_uncompressed_bytes >> shift, suffix);
2224
2225 bzero(&sd, sizeof (sd));
2226 if (mdb_pwalk("metaslab", space_cb, &sd, addr) != 0) {
2227 mdb_warn("can't walk metaslabs");
2228 return (DCMD_ERR);
2229 }
2230
2231 mdb_printf("ms_allocmap = %llu%s %llu%s %llu%s %llu%s\n",
2232 sd.ms_alloctree[0] >> shift, suffix,
2233 sd.ms_alloctree[1] >> shift, suffix,
2234 sd.ms_alloctree[2] >> shift, suffix,
2235 sd.ms_alloctree[3] >> shift, suffix);
2236 mdb_printf("ms_freeingtree = %llu%s\n",
2237 sd.ms_freeingtree >> shift, suffix);
2238 mdb_printf("ms_freedtree = %llu%s\n",
2239 sd.ms_freedtree >> shift, suffix);
2240 mdb_printf("ms_tree = %llu%s\n", sd.ms_tree >> shift, suffix);
2241 mdb_printf("ms_deferspace = %llu%s\n",
2242 sd.ms_deferspace >> shift, suffix);
2243 mdb_printf("last synced avail = %llu%s\n", sd.avail >> shift, suffix);
2244 mdb_printf("current syncing avail = %llu%s\n",
2245 sd.nowavail >> shift, suffix);
2246
2247 return (DCMD_OK);
2248 }
2249
2250 typedef struct mdb_spa_aux_vdev {
2251 int sav_count;
2252 uintptr_t sav_vdevs;
2253 } mdb_spa_aux_vdev_t;
2254
2255 typedef struct mdb_spa_vdevs {
2256 uintptr_t spa_root_vdev;
2257 mdb_spa_aux_vdev_t spa_l2cache;
2258 mdb_spa_aux_vdev_t spa_spares;
2259 } mdb_spa_vdevs_t;
2260
2261 static int
2262 spa_print_aux(mdb_spa_aux_vdev_t *sav, uint_t flags, mdb_arg_t *v,
2263 const char *name)
2264 {
2265 uintptr_t *aux;
2266 size_t len;
2267 int ret, i;
2268
2269 /*
2270 * Iterate over aux vdevs and print those out as well. This is a
2271 * little annoying because we don't have a root vdev to pass to ::vdev.
2272 * Instead, we print a single line and then call it for each child
2273 * vdev.
2274 */
2275 if (sav->sav_count != 0) {
2276 v[1].a_type = MDB_TYPE_STRING;
2277 v[1].a_un.a_str = "-d";
2278 v[2].a_type = MDB_TYPE_IMMEDIATE;
2279 v[2].a_un.a_val = 2;
2280
2281 len = sav->sav_count * sizeof (uintptr_t);
2282 aux = mdb_alloc(len, UM_SLEEP);
2283 if (mdb_vread(aux, len, sav->sav_vdevs) == -1) {
2284 mdb_free(aux, len);
2285 mdb_warn("failed to read l2cache vdevs at %p",
2286 sav->sav_vdevs);
2287 return (DCMD_ERR);
2288 }
2289
2290 mdb_printf("%-?s %-9s %-12s %s\n", "-", "-", "-", name);
2291
2292 for (i = 0; i < sav->sav_count; i++) {
2293 ret = mdb_call_dcmd("vdev", aux[i], flags, 3, v);
2294 if (ret != DCMD_OK) {
2295 mdb_free(aux, len);
2296 return (ret);
2297 }
2298 }
2299
2300 mdb_free(aux, len);
2301 }
2302
2303 return (0);
2304 }
2305
2306 /*
2307 * ::spa_vdevs
2308 *
2309 * -e Include error stats
2310 * -m Include metaslab information
2311 * -M Include metaslab group information
2312 * -h Include histogram information (requires -m or -M)
2313 *
2314 * Print out a summarized list of vdevs for the given spa_t.
2315 * This is accomplished by invoking "::vdev -re" on the root vdev, as well as
2316 * iterating over the cache devices.
2317 */
2318 /* ARGSUSED */
2319 static int
2320 spa_vdevs(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
2321 {
2322 mdb_arg_t v[3];
2323 int ret;
2324 char opts[100] = "-r";
2325 int spa_flags = 0;
2326
2327 if (mdb_getopts(argc, argv,
2328 'e', MDB_OPT_SETBITS, SPA_FLAG_ERRORS, &spa_flags,
2329 'm', MDB_OPT_SETBITS, SPA_FLAG_METASLABS, &spa_flags,
2330 'M', MDB_OPT_SETBITS, SPA_FLAG_METASLAB_GROUPS, &spa_flags,
2331 'h', MDB_OPT_SETBITS, SPA_FLAG_HISTOGRAMS, &spa_flags,
2332 NULL) != argc)
2333 return (DCMD_USAGE);
2334
2335 if (!(flags & DCMD_ADDRSPEC))
2336 return (DCMD_USAGE);
2337
2338 mdb_spa_vdevs_t spa;
2339 if (mdb_ctf_vread(&spa, "spa_t", "mdb_spa_vdevs_t", addr, 0) == -1)
2340 return (DCMD_ERR);
2341
2342 /*
2343 * Unitialized spa_t structures can have a NULL root vdev.
2344 */
2345 if (spa.spa_root_vdev == NULL) {
2346 mdb_printf("no associated vdevs\n");
2347 return (DCMD_OK);
2348 }
2349
2350 if (spa_flags & SPA_FLAG_ERRORS)
2351 strcat(opts, "e");
2352 if (spa_flags & SPA_FLAG_METASLABS)
2353 strcat(opts, "m");
2354 if (spa_flags & SPA_FLAG_METASLAB_GROUPS)
2355 strcat(opts, "M");
2356 if (spa_flags & SPA_FLAG_HISTOGRAMS)
2357 strcat(opts, "h");
2358
2359 v[0].a_type = MDB_TYPE_STRING;
2360 v[0].a_un.a_str = opts;
2361
2362 ret = mdb_call_dcmd("vdev", (uintptr_t)spa.spa_root_vdev,
2363 flags, 1, v);
2364 if (ret != DCMD_OK)
2365 return (ret);
2366
2367 if (spa_print_aux(&spa.spa_l2cache, flags, v, "cache") != 0 ||
2368 spa_print_aux(&spa.spa_spares, flags, v, "spares") != 0)
2369 return (DCMD_ERR);
2370
2371 return (DCMD_OK);
2372 }
2373
2374 /*
2375 * ::zio
2376 *
2377 * Print a summary of zio_t and all its children. This is intended to display a
2378 * zio tree, and hence we only pick the most important pieces of information for
2379 * the main summary. More detailed information can always be found by doing a
2380 * '::print zio' on the underlying zio_t. The columns we display are:
2381 *
2382 * ADDRESS TYPE STAGE WAITER TIME_ELAPSED
2383 *
2384 * The 'address' column is indented by one space for each depth level as we
2385 * descend down the tree.
2386 */
2387
2388 #define ZIO_MAXINDENT 7
2389 #define ZIO_MAXWIDTH (sizeof (uintptr_t) * 2 + ZIO_MAXINDENT)
2390 #define ZIO_WALK_SELF 0
2391 #define ZIO_WALK_CHILD 1
2392 #define ZIO_WALK_PARENT 2
2393
2394 typedef struct zio_print_args {
2395 int zpa_current_depth;
2396 int zpa_min_depth;
2397 int zpa_max_depth;
2398 int zpa_type;
2399 uint_t zpa_flags;
2400 } zio_print_args_t;
2401
2402 typedef struct mdb_zio {
2403 enum zio_type io_type;
2404 enum zio_stage io_stage;
2405 uintptr_t io_waiter;
2406 uintptr_t io_spa;
2407 struct {
2408 struct {
2409 uintptr_t list_next;
2410 } list_head;
2411 } io_parent_list;
2412 int io_error;
2413 } mdb_zio_t;
2414
2415 typedef struct mdb_zio_timestamp {
2416 hrtime_t io_timestamp;
2417 } mdb_zio_timestamp_t;
2418
2419 static int zio_child_cb(uintptr_t addr, const void *unknown, void *arg);
2420
2421 static int
2422 zio_print_cb(uintptr_t addr, zio_print_args_t *zpa)
2423 {
2424 mdb_ctf_id_t type_enum, stage_enum;
2425 int indent = zpa->zpa_current_depth;
2426 const char *type, *stage;
2427 uintptr_t laddr;
2428 mdb_zio_t zio;
2429 mdb_zio_timestamp_t zio_timestamp = { 0 };
2430
2431 if (mdb_ctf_vread(&zio, ZFS_STRUCT "zio", "mdb_zio_t", addr, 0) == -1)
2432 return (WALK_ERR);
2433 (void) mdb_ctf_vread(&zio_timestamp, ZFS_STRUCT "zio",
2434 "mdb_zio_timestamp_t", addr, MDB_CTF_VREAD_QUIET);
2435
2436 if (indent > ZIO_MAXINDENT)
2437 indent = ZIO_MAXINDENT;
2438
2439 if (mdb_ctf_lookup_by_name("enum zio_type", &type_enum) == -1 ||
2440 mdb_ctf_lookup_by_name("enum zio_stage", &stage_enum) == -1) {
2441 mdb_warn("failed to lookup zio enums");
2442 return (WALK_ERR);
2443 }
2444
2445 if ((type = mdb_ctf_enum_name(type_enum, zio.io_type)) != NULL)
2446 type += sizeof ("ZIO_TYPE_") - 1;
2447 else
2448 type = "?";
2449
2450 if (zio.io_error == 0) {
2451 stage = mdb_ctf_enum_name(stage_enum, zio.io_stage);
2452 if (stage != NULL)
2453 stage += sizeof ("ZIO_STAGE_") - 1;
2454 else
2455 stage = "?";
2456 } else {
2457 stage = "FAILED";
2458 }
2459
2460 if (zpa->zpa_current_depth >= zpa->zpa_min_depth) {
2461 if (zpa->zpa_flags & DCMD_PIPE_OUT) {
2462 mdb_printf("%?p\n", addr);
2463 } else {
2464 mdb_printf("%*s%-*p %-5s %-16s ", indent, "",
2465 ZIO_MAXWIDTH - indent, addr, type, stage);
2466 if (zio.io_waiter != 0)
2467 mdb_printf("%-16lx ", zio.io_waiter);
2468 else
2469 mdb_printf("%-16s ", "-");
2470 #ifdef _KERNEL
2471 if (zio_timestamp.io_timestamp != 0) {
2472 mdb_printf("%llums", (mdb_gethrtime() -
2473 zio_timestamp.io_timestamp) /
2474 1000000);
2475 } else {
2476 mdb_printf("%-12s ", "-");
2477 }
2478 #else
2479 mdb_printf("%-12s ", "-");
2480 #endif
2481 mdb_printf("\n");
2482 }
2483 }
2484
2485 if (zpa->zpa_current_depth >= zpa->zpa_max_depth)
2486 return (WALK_NEXT);
2487
2488 if (zpa->zpa_type == ZIO_WALK_PARENT)
2489 laddr = addr + mdb_ctf_offsetof_by_name(ZFS_STRUCT "zio",
2490 "io_parent_list");
2491 else
2492 laddr = addr + mdb_ctf_offsetof_by_name(ZFS_STRUCT "zio",
2493 "io_child_list");
2494
2495 zpa->zpa_current_depth++;
2496 if (mdb_pwalk("list", zio_child_cb, zpa, laddr) != 0) {
2497 mdb_warn("failed to walk zio_t children at %p\n", laddr);
2498 return (WALK_ERR);
2499 }
2500 zpa->zpa_current_depth--;
2501
2502 return (WALK_NEXT);
2503 }
2504
2505 /* ARGSUSED */
2506 static int
2507 zio_child_cb(uintptr_t addr, const void *unknown, void *arg)
2508 {
2509 zio_link_t zl;
2510 uintptr_t ziop;
2511 zio_print_args_t *zpa = arg;
2512
2513 if (mdb_vread(&zl, sizeof (zl), addr) == -1) {
2514 mdb_warn("failed to read zio_link_t at %p", addr);
2515 return (WALK_ERR);
2516 }
2517
2518 if (zpa->zpa_type == ZIO_WALK_PARENT)
2519 ziop = (uintptr_t)zl.zl_parent;
2520 else
2521 ziop = (uintptr_t)zl.zl_child;
2522
2523 return (zio_print_cb(ziop, zpa));
2524 }
2525
2526 /* ARGSUSED */
2527 static int
2528 zio_print(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
2529 {
2530 zio_print_args_t zpa = { 0 };
2531
2532 if (!(flags & DCMD_ADDRSPEC))
2533 return (DCMD_USAGE);
2534
2535 if (mdb_getopts(argc, argv,
2536 'r', MDB_OPT_SETBITS, INT_MAX, &zpa.zpa_max_depth,
2537 'c', MDB_OPT_SETBITS, ZIO_WALK_CHILD, &zpa.zpa_type,
2538 'p', MDB_OPT_SETBITS, ZIO_WALK_PARENT, &zpa.zpa_type,
2539 NULL) != argc)
2540 return (DCMD_USAGE);
2541
2542 zpa.zpa_flags = flags;
2543 if (zpa.zpa_max_depth != 0) {
2544 if (zpa.zpa_type == ZIO_WALK_SELF)
2545 zpa.zpa_type = ZIO_WALK_CHILD;
2546 } else if (zpa.zpa_type != ZIO_WALK_SELF) {
2547 zpa.zpa_min_depth = 1;
2548 zpa.zpa_max_depth = 1;
2549 }
2550
2551 if (!(flags & DCMD_PIPE_OUT) && DCMD_HDRSPEC(flags)) {
2552 mdb_printf("%<u>%-*s %-5s %-16s %-16s %-12s%</u>\n",
2553 ZIO_MAXWIDTH, "ADDRESS", "TYPE", "STAGE", "WAITER",
2554 "TIME_ELAPSED");
2555 }
2556
2557 if (zio_print_cb(addr, &zpa) != WALK_NEXT)
2558 return (DCMD_ERR);
2559
2560 return (DCMD_OK);
2561 }
2562
2563 /*
2564 * [addr]::zio_state
2565 *
2566 * Print a summary of all zio_t structures on the system, or for a particular
2567 * pool. This is equivalent to '::walk zio_root | ::zio'.
2568 */
2569 /*ARGSUSED*/
2570 static int
2571 zio_state(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
2572 {
2573 /*
2574 * MDB will remember the last address of the pipeline, so if we don't
2575 * zero this we'll end up trying to walk zio structures for a
2576 * non-existent spa_t.
2577 */
2578 if (!(flags & DCMD_ADDRSPEC))
2579 addr = 0;
2580
2581 return (mdb_pwalk_dcmd("zio_root", "zio", argc, argv, addr));
2582 }
2583
2584 typedef struct mdb_multilist {
2585 uint64_t ml_num_sublists;
2586 uintptr_t ml_sublists;
2587 } mdb_multilist_t;
2588
2589 typedef struct multilist_walk_data {
2590 uint64_t mwd_idx;
2591 mdb_multilist_t mwd_ml;
2592 } multilist_walk_data_t;
2593
2594 /* ARGSUSED */
2595 static int
2596 multilist_print_cb(uintptr_t addr, const void *unknown, void *arg)
2597 {
2598 mdb_printf("%#lr\n", addr);
2599 return (WALK_NEXT);
2600 }
2601
2602 static int
2603 multilist_walk_step(mdb_walk_state_t *wsp)
2604 {
2605 multilist_walk_data_t *mwd = wsp->walk_data;
2606
2607 if (mwd->mwd_idx >= mwd->mwd_ml.ml_num_sublists)
2608 return (WALK_DONE);
2609
2610 wsp->walk_addr = mwd->mwd_ml.ml_sublists +
2611 mdb_ctf_sizeof_by_name("multilist_sublist_t") * mwd->mwd_idx +
2612 mdb_ctf_offsetof_by_name("multilist_sublist_t", "mls_list");
2613
2614 mdb_pwalk("list", multilist_print_cb, (void*)NULL, wsp->walk_addr);
2615 mwd->mwd_idx++;
2616
2617 return (WALK_NEXT);
2618 }
2619
2620 static int
2621 multilist_walk_init(mdb_walk_state_t *wsp)
2622 {
2623 multilist_walk_data_t *mwd;
2624
2625 if (wsp->walk_addr == NULL) {
2626 mdb_warn("must supply address of multilist_t\n");
2627 return (WALK_ERR);
2628 }
2629
2630 mwd = mdb_zalloc(sizeof (multilist_walk_data_t), UM_SLEEP | UM_GC);
2631 if (mdb_ctf_vread(&mwd->mwd_ml, "multilist_t", "mdb_multilist_t",
2632 wsp->walk_addr, 0) == -1) {
2633 return (WALK_ERR);
2634 }
2635
2636 if (mwd->mwd_ml.ml_num_sublists == 0 ||
2637 mwd->mwd_ml.ml_sublists == NULL) {
2638 mdb_warn("invalid or uninitialized multilist at %#lx\n",
2639 wsp->walk_addr);
2640 return (WALK_ERR);
2641 }
2642
2643 wsp->walk_data = mwd;
2644 return (WALK_NEXT);
2645 }
2646
2647 typedef struct mdb_txg_list {
2648 size_t tl_offset;
2649 uintptr_t tl_head[TXG_SIZE];
2650 } mdb_txg_list_t;
2651
2652 typedef struct txg_list_walk_data {
2653 uintptr_t lw_head[TXG_SIZE];
2654 int lw_txgoff;
2655 int lw_maxoff;
2656 size_t lw_offset;
2657 void *lw_obj;
2658 } txg_list_walk_data_t;
2659
2660 static int
2661 txg_list_walk_init_common(mdb_walk_state_t *wsp, int txg, int maxoff)
2662 {
2663 txg_list_walk_data_t *lwd;
2664 mdb_txg_list_t list;
2665 int i;
2666
2667 lwd = mdb_alloc(sizeof (txg_list_walk_data_t), UM_SLEEP | UM_GC);
2668 if (mdb_ctf_vread(&list, "txg_list_t", "mdb_txg_list_t", wsp->walk_addr,
2669 0) == -1) {
2670 mdb_warn("failed to read txg_list_t at %#lx", wsp->walk_addr);
2671 return (WALK_ERR);
2672 }
2673
2674 for (i = 0; i < TXG_SIZE; i++)
2675 lwd->lw_head[i] = list.tl_head[i];
2676 lwd->lw_offset = list.tl_offset;
2677 lwd->lw_obj = mdb_alloc(lwd->lw_offset + sizeof (txg_node_t),
2678 UM_SLEEP | UM_GC);
2679 lwd->lw_txgoff = txg;
2680 lwd->lw_maxoff = maxoff;
2681
2682 wsp->walk_addr = lwd->lw_head[lwd->lw_txgoff];
2683 wsp->walk_data = lwd;
2684
2685 return (WALK_NEXT);
2686 }
2687
2688 static int
2689 txg_list_walk_init(mdb_walk_state_t *wsp)
2690 {
2691 return (txg_list_walk_init_common(wsp, 0, TXG_SIZE-1));
2692 }
2693
2694 static int
2695 txg_list0_walk_init(mdb_walk_state_t *wsp)
2696 {
2697 return (txg_list_walk_init_common(wsp, 0, 0));
2698 }
2699
2700 static int
2701 txg_list1_walk_init(mdb_walk_state_t *wsp)
2702 {
2703 return (txg_list_walk_init_common(wsp, 1, 1));
2704 }
2705
2706 static int
2707 txg_list2_walk_init(mdb_walk_state_t *wsp)
2708 {
2709 return (txg_list_walk_init_common(wsp, 2, 2));
2710 }
2711
2712 static int
2713 txg_list3_walk_init(mdb_walk_state_t *wsp)
2714 {
2715 return (txg_list_walk_init_common(wsp, 3, 3));
2716 }
2717
2718 static int
2719 txg_list_walk_step(mdb_walk_state_t *wsp)
2720 {
2721 txg_list_walk_data_t *lwd = wsp->walk_data;
2722 uintptr_t addr;
2723 txg_node_t *node;
2724 int status;
2725
2726 while (wsp->walk_addr == NULL && lwd->lw_txgoff < lwd->lw_maxoff) {
2727 lwd->lw_txgoff++;
2728 wsp->walk_addr = lwd->lw_head[lwd->lw_txgoff];
2729 }
2730
2731 if (wsp->walk_addr == NULL)
2732 return (WALK_DONE);
2733
2734 addr = wsp->walk_addr - lwd->lw_offset;
2735
2736 if (mdb_vread(lwd->lw_obj,
2737 lwd->lw_offset + sizeof (txg_node_t), addr) == -1) {
2738 mdb_warn("failed to read list element at %#lx", addr);
2739 return (WALK_ERR);
2740 }
2741
2742 status = wsp->walk_callback(addr, lwd->lw_obj, wsp->walk_cbdata);
2743 node = (txg_node_t *)((uintptr_t)lwd->lw_obj + lwd->lw_offset);
2744 wsp->walk_addr = (uintptr_t)node->tn_next[lwd->lw_txgoff];
2745
2746 return (status);
2747 }
2748
2749 /*
2750 * ::walk spa
2751 *
2752 * Walk all named spa_t structures in the namespace. This is nothing more than
2753 * a layered avl walk.
2754 */
2755 static int
2756 spa_walk_init(mdb_walk_state_t *wsp)
2757 {
2758 GElf_Sym sym;
2759
2760 if (wsp->walk_addr != NULL) {
2761 mdb_warn("spa walk only supports global walks\n");
2762 return (WALK_ERR);
2763 }
2764
2765 if (mdb_lookup_by_obj(ZFS_OBJ_NAME, "spa_namespace_avl", &sym) == -1) {
2766 mdb_warn("failed to find symbol 'spa_namespace_avl'");
2767 return (WALK_ERR);
2768 }
2769
2770 wsp->walk_addr = (uintptr_t)sym.st_value;
2771
2772 if (mdb_layered_walk("avl", wsp) == -1) {
2773 mdb_warn("failed to walk 'avl'\n");
2774 return (WALK_ERR);
2775 }
2776
2777 return (WALK_NEXT);
2778 }
2779
2780 static int
2781 spa_walk_step(mdb_walk_state_t *wsp)
2782 {
2783 return (wsp->walk_callback(wsp->walk_addr, NULL, wsp->walk_cbdata));
2784 }
2785
2786 /*
2787 * [addr]::walk zio
2788 *
2789 * Walk all active zio_t structures on the system. This is simply a layered
2790 * walk on top of ::walk zio_cache, with the optional ability to limit the
2791 * structures to a particular pool.
2792 */
2793 static int
2794 zio_walk_init(mdb_walk_state_t *wsp)
2795 {
2796 wsp->walk_data = (void *)wsp->walk_addr;
2797
2798 if (mdb_layered_walk("zio_cache", wsp) == -1) {
2799 mdb_warn("failed to walk 'zio_cache'\n");
2800 return (WALK_ERR);
2801 }
2802
2803 return (WALK_NEXT);
2804 }
2805
2806 static int
2807 zio_walk_step(mdb_walk_state_t *wsp)
2808 {
2809 mdb_zio_t zio;
2810 uintptr_t spa = (uintptr_t)wsp->walk_data;
2811
2812 if (mdb_ctf_vread(&zio, ZFS_STRUCT "zio", "mdb_zio_t",
2813 wsp->walk_addr, 0) == -1)
2814 return (WALK_ERR);
2815
2816 if (spa != 0 && spa != zio.io_spa)
2817 return (WALK_NEXT);
2818
2819 return (wsp->walk_callback(wsp->walk_addr, &zio, wsp->walk_cbdata));
2820 }
2821
2822 /*
2823 * [addr]::walk zio_root
2824 *
2825 * Walk only root zio_t structures, optionally for a particular spa_t.
2826 */
2827 static int
2828 zio_walk_root_step(mdb_walk_state_t *wsp)
2829 {
2830 mdb_zio_t zio;
2831 uintptr_t spa = (uintptr_t)wsp->walk_data;
2832
2833 if (mdb_ctf_vread(&zio, ZFS_STRUCT "zio", "mdb_zio_t",
2834 wsp->walk_addr, 0) == -1)
2835 return (WALK_ERR);
2836
2837 if (spa != 0 && spa != zio.io_spa)
2838 return (WALK_NEXT);
2839
2840 /* If the parent list is not empty, ignore */
2841 if (zio.io_parent_list.list_head.list_next !=
2842 wsp->walk_addr +
2843 mdb_ctf_offsetof_by_name(ZFS_STRUCT "zio", "io_parent_list") +
2844 mdb_ctf_offsetof_by_name("struct list", "list_head"))
2845 return (WALK_NEXT);
2846
2847 return (wsp->walk_callback(wsp->walk_addr, &zio, wsp->walk_cbdata));
2848 }
2849
2850 /*
2851 * ::zfs_blkstats
2852 *
2853 * -v print verbose per-level information
2854 *
2855 */
2856 static int
2857 zfs_blkstats(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
2858 {
2859 boolean_t verbose = B_FALSE;
2860 zfs_all_blkstats_t stats;
2861 dmu_object_type_t t;
2862 zfs_blkstat_t *tzb;
2863 uint64_t ditto;
2864 dmu_object_type_info_t dmu_ot[DMU_OT_NUMTYPES + 10];
2865 /* +10 in case it grew */
2866
2867 if (mdb_readvar(&dmu_ot, "dmu_ot") == -1) {
2868 mdb_warn("failed to read 'dmu_ot'");
2869 return (DCMD_ERR);
2870 }
2871
2872 if (mdb_getopts(argc, argv,
2873 'v', MDB_OPT_SETBITS, TRUE, &verbose,
2874 NULL) != argc)
2875 return (DCMD_USAGE);
2876
2877 if (!(flags & DCMD_ADDRSPEC))
2878 return (DCMD_USAGE);
2879
2880 if (GETMEMB(addr, "spa", spa_dsl_pool, addr) ||
2881 GETMEMB(addr, "dsl_pool", dp_blkstats, addr) ||
2882 mdb_vread(&stats, sizeof (zfs_all_blkstats_t), addr) == -1) {
2883 mdb_warn("failed to read data at %p;", addr);
2884 mdb_printf("maybe no stats? run \"zpool scrub\" first.");
2885 return (DCMD_ERR);
2886 }
2887
2888 tzb = &stats.zab_type[DN_MAX_LEVELS][DMU_OT_TOTAL];
2889 if (tzb->zb_gangs != 0) {
2890 mdb_printf("Ganged blocks: %llu\n",
2891 (longlong_t)tzb->zb_gangs);
2892 }
2893
2894 ditto = tzb->zb_ditto_2_of_2_samevdev + tzb->zb_ditto_2_of_3_samevdev +
2895 tzb->zb_ditto_3_of_3_samevdev;
2896 if (ditto != 0) {
2897 mdb_printf("Dittoed blocks on same vdev: %llu\n",
2898 (longlong_t)ditto);
2899 }
2900
2901 mdb_printf("\nBlocks\tLSIZE\tPSIZE\tASIZE"
2902 "\t avg\t comp\t%%Total\tType\n");
2903
2904 for (t = 0; t <= DMU_OT_TOTAL; t++) {
2905 char csize[NICENUM_BUFLEN], lsize[NICENUM_BUFLEN];
2906 char psize[NICENUM_BUFLEN], asize[NICENUM_BUFLEN];
2907 char avg[NICENUM_BUFLEN];
2908 char comp[NICENUM_BUFLEN], pct[NICENUM_BUFLEN];
2909 char typename[64];
2910 int l;
2911
2912
2913 if (t == DMU_OT_DEFERRED)
2914 strcpy(typename, "deferred free");
2915 else if (t == DMU_OT_OTHER)
2916 strcpy(typename, "other");
2917 else if (t == DMU_OT_TOTAL)
2918 strcpy(typename, "Total");
2919 else if (mdb_readstr(typename, sizeof (typename),
2920 (uintptr_t)dmu_ot[t].ot_name) == -1) {
2921 mdb_warn("failed to read type name");
2922 return (DCMD_ERR);
2923 }
2924
2925 if (stats.zab_type[DN_MAX_LEVELS][t].zb_asize == 0)
2926 continue;
2927
2928 for (l = -1; l < DN_MAX_LEVELS; l++) {
2929 int level = (l == -1 ? DN_MAX_LEVELS : l);
2930 zfs_blkstat_t *zb = &stats.zab_type[level][t];
2931
2932 if (zb->zb_asize == 0)
2933 continue;
2934
2935 /*
2936 * Don't print each level unless requested.
2937 */
2938 if (!verbose && level != DN_MAX_LEVELS)
2939 continue;
2940
2941 /*
2942 * If all the space is level 0, don't print the
2943 * level 0 separately.
2944 */
2945 if (level == 0 && zb->zb_asize ==
2946 stats.zab_type[DN_MAX_LEVELS][t].zb_asize)
2947 continue;
2948
2949 mdb_nicenum(zb->zb_count, csize);
2950 mdb_nicenum(zb->zb_lsize, lsize);
2951 mdb_nicenum(zb->zb_psize, psize);
2952 mdb_nicenum(zb->zb_asize, asize);
2953 mdb_nicenum(zb->zb_asize / zb->zb_count, avg);
2954 (void) snprintfrac(comp, NICENUM_BUFLEN,
2955 zb->zb_lsize, zb->zb_psize, 2);
2956 (void) snprintfrac(pct, NICENUM_BUFLEN,
2957 100 * zb->zb_asize, tzb->zb_asize, 2);
2958
2959 mdb_printf("%6s\t%5s\t%5s\t%5s\t%5s"
2960 "\t%5s\t%6s\t",
2961 csize, lsize, psize, asize, avg, comp, pct);
2962
2963 if (level == DN_MAX_LEVELS)
2964 mdb_printf("%s\n", typename);
2965 else
2966 mdb_printf(" L%d %s\n",
2967 level, typename);
2968 }
2969 }
2970
2971 return (DCMD_OK);
2972 }
2973
2974 typedef struct mdb_reference {
2975 uintptr_t ref_holder;
2976 uintptr_t ref_removed;
2977 uint64_t ref_number;
2978 } mdb_reference_t;
2979
2980 /* ARGSUSED */
2981 static int
2982 reference_cb(uintptr_t addr, const void *ignored, void *arg)
2983 {
2984 mdb_reference_t ref;
2985 boolean_t holder_is_str = B_FALSE;
2986 char holder_str[128];
2987 boolean_t removed = (boolean_t)arg;
2988
2989 if (mdb_ctf_vread(&ref, "reference_t", "mdb_reference_t", addr,
2990 0) == -1)
2991 return (DCMD_ERR);
2992
2993 if (mdb_readstr(holder_str, sizeof (holder_str),
2994 ref.ref_holder) != -1)
2995 holder_is_str = strisprint(holder_str);
2996
2997 if (removed)
2998 mdb_printf("removed ");
2999 mdb_printf("reference ");
3000 if (ref.ref_number != 1)
3001 mdb_printf("with count=%llu ", ref.ref_number);
3002 mdb_printf("with tag %lx", ref.ref_holder);
3003 if (holder_is_str)
3004 mdb_printf(" \"%s\"", holder_str);
3005 mdb_printf(", held at:\n");
3006
3007 (void) mdb_call_dcmd("whatis", addr, DCMD_ADDRSPEC, 0, NULL);
3008
3009 if (removed) {
3010 mdb_printf("removed at:\n");
3011 (void) mdb_call_dcmd("whatis", ref.ref_removed,
3012 DCMD_ADDRSPEC, 0, NULL);
3013 }
3014
3015 mdb_printf("\n");
3016
3017 return (WALK_NEXT);
3018 }
3019
3020 typedef struct mdb_refcount {
3021 uint64_t rc_count;
3022 } mdb_refcount_t;
3023
3024 typedef struct mdb_refcount_removed {
3025 uint64_t rc_removed_count;
3026 } mdb_refcount_removed_t;
3027
3028 typedef struct mdb_refcount_tracked {
3029 boolean_t rc_tracked;
3030 } mdb_refcount_tracked_t;
3031
3032 /* ARGSUSED */
3033 static int
3034 refcount(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
3035 {
3036 mdb_refcount_t rc;
3037 mdb_refcount_removed_t rcr;
3038 mdb_refcount_tracked_t rct;
3039 int off;
3040 boolean_t released = B_FALSE;
3041
3042 if (!(flags & DCMD_ADDRSPEC))
3043 return (DCMD_USAGE);
3044
3045 if (mdb_getopts(argc, argv,
3046 'r', MDB_OPT_SETBITS, B_TRUE, &released,
3047 NULL) != argc)
3048 return (DCMD_USAGE);
3049
3050 if (mdb_ctf_vread(&rc, "refcount_t", "mdb_refcount_t", addr,
3051 0) == -1)
3052 return (DCMD_ERR);
3053
3054 if (mdb_ctf_vread(&rcr, "refcount_t", "mdb_refcount_removed_t", addr,
3055 MDB_CTF_VREAD_QUIET) == -1) {
3056 mdb_printf("refcount_t at %p has %llu holds (untracked)\n",
3057 addr, (longlong_t)rc.rc_count);
3058 return (DCMD_OK);
3059 }
3060
3061 if (mdb_ctf_vread(&rct, "refcount_t", "mdb_refcount_tracked_t", addr,
3062 MDB_CTF_VREAD_QUIET) == -1) {
3063 /* If this is an old target, it might be tracked. */
3064 rct.rc_tracked = B_TRUE;
3065 }
3066
3067 mdb_printf("refcount_t at %p has %llu current holds, "
3068 "%llu recently released holds\n",
3069 addr, (longlong_t)rc.rc_count, (longlong_t)rcr.rc_removed_count);
3070
3071 if (rct.rc_tracked && rc.rc_count > 0)
3072 mdb_printf("current holds:\n");
3073 off = mdb_ctf_offsetof_by_name("refcount_t", "rc_list");
3074 if (off == -1)
3075 return (DCMD_ERR);
3076 mdb_pwalk("list", reference_cb, (void*)B_FALSE, addr + off);
3077
3078 if (released && rcr.rc_removed_count > 0) {
3079 mdb_printf("released holds:\n");
3080
3081 off = mdb_ctf_offsetof_by_name("refcount_t", "rc_removed");
3082 if (off == -1)
3083 return (DCMD_ERR);
3084 mdb_pwalk("list", reference_cb, (void*)B_TRUE, addr + off);
3085 }
3086
3087 return (DCMD_OK);
3088 }
3089
3090 /* ARGSUSED */
3091 static int
3092 sa_attr_table(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
3093 {
3094 sa_attr_table_t *table;
3095 sa_os_t sa_os;
3096 char *name;
3097 int i;
3098
3099 if (mdb_vread(&sa_os, sizeof (sa_os_t), addr) == -1) {
3100 mdb_warn("failed to read sa_os at %p", addr);
3101 return (DCMD_ERR);
3102 }
3103
3104 table = mdb_alloc(sizeof (sa_attr_table_t) * sa_os.sa_num_attrs,
3105 UM_SLEEP | UM_GC);
3106 name = mdb_alloc(MAXPATHLEN, UM_SLEEP | UM_GC);
3107
3108 if (mdb_vread(table, sizeof (sa_attr_table_t) * sa_os.sa_num_attrs,
3109 (uintptr_t)sa_os.sa_attr_table) == -1) {
3110 mdb_warn("failed to read sa_os at %p", addr);
3111 return (DCMD_ERR);
3112 }
3113
3114 mdb_printf("%<u>%-10s %-10s %-10s %-10s %s%</u>\n",
3115 "ATTR ID", "REGISTERED", "LENGTH", "BSWAP", "NAME");
3116 for (i = 0; i != sa_os.sa_num_attrs; i++) {
3117 mdb_readstr(name, MAXPATHLEN, (uintptr_t)table[i].sa_name);
3118 mdb_printf("%5x %8x %8x %8x %-s\n",
3119 (int)table[i].sa_attr, (int)table[i].sa_registered,
3120 (int)table[i].sa_length, table[i].sa_byteswap, name);
3121 }
3122
3123 return (DCMD_OK);
3124 }
3125
3126 static int
3127 sa_get_off_table(uintptr_t addr, uint32_t **off_tab, int attr_count)
3128 {
3129 uintptr_t idx_table;
3130
3131 if (GETMEMB(addr, "sa_idx_tab", sa_idx_tab, idx_table)) {
3132 mdb_printf("can't find offset table in sa_idx_tab\n");
3133 return (-1);
3134 }
3135
3136 *off_tab = mdb_alloc(attr_count * sizeof (uint32_t),
3137 UM_SLEEP | UM_GC);
3138
3139 if (mdb_vread(*off_tab,
3140 attr_count * sizeof (uint32_t), idx_table) == -1) {
3141 mdb_warn("failed to attribute offset table %p", idx_table);
3142 return (-1);
3143 }
3144
3145 return (DCMD_OK);
3146 }
3147
3148 /*ARGSUSED*/
3149 static int
3150 sa_attr_print(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
3151 {
3152 uint32_t *offset_tab;
3153 int attr_count;
3154 uint64_t attr_id;
3155 uintptr_t attr_addr;
3156 uintptr_t bonus_tab, spill_tab;
3157 uintptr_t db_bonus, db_spill;
3158 uintptr_t os, os_sa;
3159 uintptr_t db_data;
3160
3161 if (argc != 1)
3162 return (DCMD_USAGE);
3163
3164 if (argv[0].a_type == MDB_TYPE_STRING)
3165 attr_id = mdb_strtoull(argv[0].a_un.a_str);
3166 else
3167 return (DCMD_USAGE);
3168
3169 if (GETMEMB(addr, "sa_handle", sa_bonus_tab, bonus_tab) ||
3170 GETMEMB(addr, "sa_handle", sa_spill_tab, spill_tab) ||
3171 GETMEMB(addr, "sa_handle", sa_os, os) ||
3172 GETMEMB(addr, "sa_handle", sa_bonus, db_bonus) ||
3173 GETMEMB(addr, "sa_handle", sa_spill, db_spill)) {
3174 mdb_printf("Can't find necessary information in sa_handle "
3175 "in sa_handle\n");
3176 return (DCMD_ERR);
3177 }
3178
3179 if (GETMEMB(os, "objset", os_sa, os_sa)) {
3180 mdb_printf("Can't find os_sa in objset\n");
3181 return (DCMD_ERR);
3182 }
3183
3184 if (GETMEMB(os_sa, "sa_os", sa_num_attrs, attr_count)) {
3185 mdb_printf("Can't find sa_num_attrs\n");
3186 return (DCMD_ERR);
3187 }
3188
3189 if (attr_id > attr_count) {
3190 mdb_printf("attribute id number is out of range\n");
3191 return (DCMD_ERR);
3192 }
3193
3194 if (bonus_tab) {
3195 if (sa_get_off_table(bonus_tab, &offset_tab,
3196 attr_count) == -1) {
3197 return (DCMD_ERR);
3198 }
3199
3200 if (GETMEMB(db_bonus, "dmu_buf", db_data, db_data)) {
3201 mdb_printf("can't find db_data in bonus dbuf\n");
3202 return (DCMD_ERR);
3203 }
3204 }
3205
3206 if (bonus_tab && !TOC_ATTR_PRESENT(offset_tab[attr_id]) &&
3207 spill_tab == NULL) {
3208 mdb_printf("Attribute does not exist\n");
3209 return (DCMD_ERR);
3210 } else if (!TOC_ATTR_PRESENT(offset_tab[attr_id]) && spill_tab) {
3211 if (sa_get_off_table(spill_tab, &offset_tab,
3212 attr_count) == -1) {
3213 return (DCMD_ERR);
3214 }
3215 if (GETMEMB(db_spill, "dmu_buf", db_data, db_data)) {
3216 mdb_printf("can't find db_data in spill dbuf\n");
3217 return (DCMD_ERR);
3218 }
3219 if (!TOC_ATTR_PRESENT(offset_tab[attr_id])) {
3220 mdb_printf("Attribute does not exist\n");
3221 return (DCMD_ERR);
3222 }
3223 }
3224 attr_addr = db_data + TOC_OFF(offset_tab[attr_id]);
3225 mdb_printf("%p\n", attr_addr);
3226 return (DCMD_OK);
3227 }
3228
3229 /* ARGSUSED */
3230 static int
3231 zfs_ace_print_common(uintptr_t addr, uint_t flags,
3232 uint64_t id, uint32_t access_mask, uint16_t ace_flags,
3233 uint16_t ace_type, int verbose)
3234 {
3235 if (DCMD_HDRSPEC(flags) && !verbose)
3236 mdb_printf("%<u>%-?s %-8s %-8s %-8s %s%</u>\n",
3237 "ADDR", "FLAGS", "MASK", "TYPE", "ID");
3238
3239 if (!verbose) {
3240 mdb_printf("%0?p %-8x %-8x %-8x %-llx\n", addr,
3241 ace_flags, access_mask, ace_type, id);
3242 return (DCMD_OK);
3243 }
3244
3245 switch (ace_flags & ACE_TYPE_FLAGS) {
3246 case ACE_OWNER:
3247 mdb_printf("owner@:");
3248 break;
3249 case (ACE_IDENTIFIER_GROUP | ACE_GROUP):
3250 mdb_printf("group@:");
3251 break;
3252 case ACE_EVERYONE:
3253 mdb_printf("everyone@:");
3254 break;
3255 case ACE_IDENTIFIER_GROUP:
3256 mdb_printf("group:%llx:", (u_longlong_t)id);
3257 break;
3258 case 0: /* User entry */
3259 mdb_printf("user:%llx:", (u_longlong_t)id);
3260 break;
3261 }
3262
3263 /* print out permission mask */
3264 if (access_mask & ACE_READ_DATA)
3265 mdb_printf("r");
3266 else
3267 mdb_printf("-");
3268 if (access_mask & ACE_WRITE_DATA)
3269 mdb_printf("w");
3270 else
3271 mdb_printf("-");
3272 if (access_mask & ACE_EXECUTE)
3273 mdb_printf("x");
3274 else
3275 mdb_printf("-");
3276 if (access_mask & ACE_APPEND_DATA)
3277 mdb_printf("p");
3278 else
3279 mdb_printf("-");
3280 if (access_mask & ACE_DELETE)
3281 mdb_printf("d");
3282 else
3283 mdb_printf("-");
3284 if (access_mask & ACE_DELETE_CHILD)
3285 mdb_printf("D");
3286 else
3287 mdb_printf("-");
3288 if (access_mask & ACE_READ_ATTRIBUTES)
3289 mdb_printf("a");
3290 else
3291 mdb_printf("-");
3292 if (access_mask & ACE_WRITE_ATTRIBUTES)
3293 mdb_printf("A");
3294 else
3295 mdb_printf("-");
3296 if (access_mask & ACE_READ_NAMED_ATTRS)
3297 mdb_printf("R");
3298 else
3299 mdb_printf("-");
3300 if (access_mask & ACE_WRITE_NAMED_ATTRS)
3301 mdb_printf("W");
3302 else
3303 mdb_printf("-");
3304 if (access_mask & ACE_READ_ACL)
3305 mdb_printf("c");
3306 else
3307 mdb_printf("-");
3308 if (access_mask & ACE_WRITE_ACL)
3309 mdb_printf("C");
3310 else
3311 mdb_printf("-");
3312 if (access_mask & ACE_WRITE_OWNER)
3313 mdb_printf("o");
3314 else
3315 mdb_printf("-");
3316 if (access_mask & ACE_SYNCHRONIZE)
3317 mdb_printf("s");
3318 else
3319 mdb_printf("-");
3320
3321 mdb_printf(":");
3322
3323 /* Print out inheritance flags */
3324 if (ace_flags & ACE_FILE_INHERIT_ACE)
3325 mdb_printf("f");
3326 else
3327 mdb_printf("-");
3328 if (ace_flags & ACE_DIRECTORY_INHERIT_ACE)
3329 mdb_printf("d");
3330 else
3331 mdb_printf("-");
3332 if (ace_flags & ACE_INHERIT_ONLY_ACE)
3333 mdb_printf("i");
3334 else
3335 mdb_printf("-");
3336 if (ace_flags & ACE_NO_PROPAGATE_INHERIT_ACE)
3337 mdb_printf("n");
3338 else
3339 mdb_printf("-");
3340 if (ace_flags & ACE_SUCCESSFUL_ACCESS_ACE_FLAG)
3341 mdb_printf("S");
3342 else
3343 mdb_printf("-");
3344 if (ace_flags & ACE_FAILED_ACCESS_ACE_FLAG)
3345 mdb_printf("F");
3346 else
3347 mdb_printf("-");
3348 if (ace_flags & ACE_INHERITED_ACE)
3349 mdb_printf("I");
3350 else
3351 mdb_printf("-");
3352
3353 switch (ace_type) {
3354 case ACE_ACCESS_ALLOWED_ACE_TYPE:
3355 mdb_printf(":allow\n");
3356 break;
3357 case ACE_ACCESS_DENIED_ACE_TYPE:
3358 mdb_printf(":deny\n");
3359 break;
3360 case ACE_SYSTEM_AUDIT_ACE_TYPE:
3361 mdb_printf(":audit\n");
3362 break;
3363 case ACE_SYSTEM_ALARM_ACE_TYPE:
3364 mdb_printf(":alarm\n");
3365 break;
3366 default:
3367 mdb_printf(":?\n");
3368 }
3369 return (DCMD_OK);
3370 }
3371
3372 /* ARGSUSED */
3373 static int
3374 zfs_ace_print(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
3375 {
3376 zfs_ace_t zace;
3377 int verbose = FALSE;
3378 uint64_t id;
3379
3380 if (!(flags & DCMD_ADDRSPEC))
3381 return (DCMD_USAGE);
3382
3383 if (mdb_getopts(argc, argv,
3384 'v', MDB_OPT_SETBITS, TRUE, &verbose, TRUE, NULL) != argc)
3385 return (DCMD_USAGE);
3386
3387 if (mdb_vread(&zace, sizeof (zfs_ace_t), addr) == -1) {
3388 mdb_warn("failed to read zfs_ace_t");
3389 return (DCMD_ERR);
3390 }
3391
3392 if ((zace.z_hdr.z_flags & ACE_TYPE_FLAGS) == 0 ||
3393 (zace.z_hdr.z_flags & ACE_TYPE_FLAGS) == ACE_IDENTIFIER_GROUP)
3394 id = zace.z_fuid;
3395 else
3396 id = -1;
3397
3398 return (zfs_ace_print_common(addr, flags, id, zace.z_hdr.z_access_mask,
3399 zace.z_hdr.z_flags, zace.z_hdr.z_type, verbose));
3400 }
3401
3402 /* ARGSUSED */
3403 static int
3404 zfs_ace0_print(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
3405 {
3406 ace_t ace;
3407 uint64_t id;
3408 int verbose = FALSE;
3409
3410 if (!(flags & DCMD_ADDRSPEC))
3411 return (DCMD_USAGE);
3412
3413 if (mdb_getopts(argc, argv,
3414 'v', MDB_OPT_SETBITS, TRUE, &verbose, TRUE, NULL) != argc)
3415 return (DCMD_USAGE);
3416
3417 if (mdb_vread(&ace, sizeof (ace_t), addr) == -1) {
3418 mdb_warn("failed to read ace_t");
3419 return (DCMD_ERR);
3420 }
3421
3422 if ((ace.a_flags & ACE_TYPE_FLAGS) == 0 ||
3423 (ace.a_flags & ACE_TYPE_FLAGS) == ACE_IDENTIFIER_GROUP)
3424 id = ace.a_who;
3425 else
3426 id = -1;
3427
3428 return (zfs_ace_print_common(addr, flags, id, ace.a_access_mask,
3429 ace.a_flags, ace.a_type, verbose));
3430 }
3431
3432 typedef struct acl_dump_args {
3433 int a_argc;
3434 const mdb_arg_t *a_argv;
3435 uint16_t a_version;
3436 int a_flags;
3437 } acl_dump_args_t;
3438
3439 /* ARGSUSED */
3440 static int
3441 acl_aces_cb(uintptr_t addr, const void *unknown, void *arg)
3442 {
3443 acl_dump_args_t *acl_args = (acl_dump_args_t *)arg;
3444
3445 if (acl_args->a_version == 1) {
3446 if (mdb_call_dcmd("zfs_ace", addr,
3447 DCMD_ADDRSPEC|acl_args->a_flags, acl_args->a_argc,
3448 acl_args->a_argv) != DCMD_OK) {
3449 return (WALK_ERR);
3450 }
3451 } else {
3452 if (mdb_call_dcmd("zfs_ace0", addr,
3453 DCMD_ADDRSPEC|acl_args->a_flags, acl_args->a_argc,
3454 acl_args->a_argv) != DCMD_OK) {
3455 return (WALK_ERR);
3456 }
3457 }
3458 acl_args->a_flags = DCMD_LOOP;
3459 return (WALK_NEXT);
3460 }
3461
3462 /* ARGSUSED */
3463 static int
3464 acl_cb(uintptr_t addr, const void *unknown, void *arg)
3465 {
3466 acl_dump_args_t *acl_args = (acl_dump_args_t *)arg;
3467
3468 if (acl_args->a_version == 1) {
3469 if (mdb_pwalk("zfs_acl_node_aces", acl_aces_cb,
3470 arg, addr) != 0) {
3471 mdb_warn("can't walk ACEs");
3472 return (DCMD_ERR);
3473 }
3474 } else {
3475 if (mdb_pwalk("zfs_acl_node_aces0", acl_aces_cb,
3476 arg, addr) != 0) {
3477 mdb_warn("can't walk ACEs");
3478 return (DCMD_ERR);
3479 }
3480 }
3481 return (WALK_NEXT);
3482 }
3483
3484 /* ARGSUSED */
3485 static int
3486 zfs_acl_dump(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
3487 {
3488 zfs_acl_t zacl;
3489 int verbose = FALSE;
3490 acl_dump_args_t acl_args;
3491
3492 if (!(flags & DCMD_ADDRSPEC))
3493 return (DCMD_USAGE);
3494
3495 if (mdb_getopts(argc, argv,
3496 'v', MDB_OPT_SETBITS, TRUE, &verbose, TRUE, NULL) != argc)
3497 return (DCMD_USAGE);
3498
3499 if (mdb_vread(&zacl, sizeof (zfs_acl_t), addr) == -1) {
3500 mdb_warn("failed to read zfs_acl_t");
3501 return (DCMD_ERR);
3502 }
3503
3504 acl_args.a_argc = argc;
3505 acl_args.a_argv = argv;
3506 acl_args.a_version = zacl.z_version;
3507 acl_args.a_flags = DCMD_LOOPFIRST;
3508
3509 if (mdb_pwalk("zfs_acl_node", acl_cb, &acl_args, addr) != 0) {
3510 mdb_warn("can't walk ACL");
3511 return (DCMD_ERR);
3512 }
3513
3514 return (DCMD_OK);
3515 }
3516
3517 /* ARGSUSED */
3518 static int
3519 zfs_acl_node_walk_init(mdb_walk_state_t *wsp)
3520 {
3521 if (wsp->walk_addr == NULL) {
3522 mdb_warn("must supply address of zfs_acl_node_t\n");
3523 return (WALK_ERR);
3524 }
3525
3526 wsp->walk_addr +=
3527 mdb_ctf_offsetof_by_name(ZFS_STRUCT "zfs_acl", "z_acl");
3528
3529 if (mdb_layered_walk("list", wsp) == -1) {
3530 mdb_warn("failed to walk 'list'\n");
3531 return (WALK_ERR);
3532 }
3533
3534 return (WALK_NEXT);
3535 }
3536
3537 static int
3538 zfs_acl_node_walk_step(mdb_walk_state_t *wsp)
3539 {
3540 zfs_acl_node_t aclnode;
3541
3542 if (mdb_vread(&aclnode, sizeof (zfs_acl_node_t),
3543 wsp->walk_addr) == -1) {
3544 mdb_warn("failed to read zfs_acl_node at %p", wsp->walk_addr);
3545 return (WALK_ERR);
3546 }
3547
3548 return (wsp->walk_callback(wsp->walk_addr, &aclnode, wsp->walk_cbdata));
3549 }
3550
3551 typedef struct ace_walk_data {
3552 int ace_count;
3553 int ace_version;
3554 } ace_walk_data_t;
3555
3556 static int
3557 zfs_aces_walk_init_common(mdb_walk_state_t *wsp, int version,
3558 int ace_count, uintptr_t ace_data)
3559 {
3560 ace_walk_data_t *ace_walk_data;
3561
3562 if (wsp->walk_addr == NULL) {
3563 mdb_warn("must supply address of zfs_acl_node_t\n");
3564 return (WALK_ERR);
3565 }
3566
3567 ace_walk_data = mdb_alloc(sizeof (ace_walk_data_t), UM_SLEEP | UM_GC);
3568
3569 ace_walk_data->ace_count = ace_count;
3570 ace_walk_data->ace_version = version;
3571
3572 wsp->walk_addr = ace_data;
3573 wsp->walk_data = ace_walk_data;
3574
3575 return (WALK_NEXT);
3576 }
3577
3578 static int
3579 zfs_acl_node_aces_walk_init_common(mdb_walk_state_t *wsp, int version)
3580 {
3581 static int gotid;
3582 static mdb_ctf_id_t acl_id;
3583 int z_ace_count;
3584 uintptr_t z_acldata;
3585
3586 if (!gotid) {
3587 if (mdb_ctf_lookup_by_name("struct zfs_acl_node",
3588 &acl_id) == -1) {
3589 mdb_warn("couldn't find struct zfs_acl_node");
3590 return (DCMD_ERR);
3591 }
3592 gotid = TRUE;
3593 }
3594
3595 if (GETMEMBID(wsp->walk_addr, &acl_id, z_ace_count, z_ace_count)) {
3596 return (DCMD_ERR);
3597 }
3598 if (GETMEMBID(wsp->walk_addr, &acl_id, z_acldata, z_acldata)) {
3599 return (DCMD_ERR);
3600 }
3601
3602 return (zfs_aces_walk_init_common(wsp, version,
3603 z_ace_count, z_acldata));
3604 }
3605
3606 /* ARGSUSED */
3607 static int
3608 zfs_acl_node_aces_walk_init(mdb_walk_state_t *wsp)
3609 {
3610 return (zfs_acl_node_aces_walk_init_common(wsp, 1));
3611 }
3612
3613 /* ARGSUSED */
3614 static int
3615 zfs_acl_node_aces0_walk_init(mdb_walk_state_t *wsp)
3616 {
3617 return (zfs_acl_node_aces_walk_init_common(wsp, 0));
3618 }
3619
3620 static int
3621 zfs_aces_walk_step(mdb_walk_state_t *wsp)
3622 {
3623 ace_walk_data_t *ace_data = wsp->walk_data;
3624 zfs_ace_t zace;
3625 ace_t *acep;
3626 int status;
3627 int entry_type;
3628 int allow_type;
3629 uintptr_t ptr;
3630
3631 if (ace_data->ace_count == 0)
3632 return (WALK_DONE);
3633
3634 if (mdb_vread(&zace, sizeof (zfs_ace_t), wsp->walk_addr) == -1) {
3635 mdb_warn("failed to read zfs_ace_t at %#lx",
3636 wsp->walk_addr);
3637 return (WALK_ERR);
3638 }
3639
3640 switch (ace_data->ace_version) {
3641 case 0:
3642 acep = (ace_t *)&zace;
3643 entry_type = acep->a_flags & ACE_TYPE_FLAGS;
3644 allow_type = acep->a_type;
3645 break;
3646 case 1:
3647 entry_type = zace.z_hdr.z_flags & ACE_TYPE_FLAGS;
3648 allow_type = zace.z_hdr.z_type;
3649 break;
3650 default:
3651 return (WALK_ERR);
3652 }
3653
3654 ptr = (uintptr_t)wsp->walk_addr;
3655 switch (entry_type) {
3656 case ACE_OWNER:
3657 case ACE_EVERYONE:
3658 case (ACE_IDENTIFIER_GROUP | ACE_GROUP):
3659 ptr += ace_data->ace_version == 0 ?
3660 sizeof (ace_t) : sizeof (zfs_ace_hdr_t);
3661 break;
3662 case ACE_IDENTIFIER_GROUP:
3663 default:
3664 switch (allow_type) {
3665 case ACE_ACCESS_ALLOWED_OBJECT_ACE_TYPE:
3666 case ACE_ACCESS_DENIED_OBJECT_ACE_TYPE:
3667 case ACE_SYSTEM_AUDIT_OBJECT_ACE_TYPE:
3668 case ACE_SYSTEM_ALARM_OBJECT_ACE_TYPE:
3669 ptr += ace_data->ace_version == 0 ?
3670 sizeof (ace_t) : sizeof (zfs_object_ace_t);
3671 break;
3672 default:
3673 ptr += ace_data->ace_version == 0 ?
3674 sizeof (ace_t) : sizeof (zfs_ace_t);
3675 break;
3676 }
3677 }
3678
3679 ace_data->ace_count--;
3680 status = wsp->walk_callback(wsp->walk_addr,
3681 (void *)(uintptr_t)&zace, wsp->walk_cbdata);
3682
3683 wsp->walk_addr = ptr;
3684 return (status);
3685 }
3686
3687 typedef struct mdb_zfs_rrwlock {
3688 uintptr_t rr_writer;
3689 boolean_t rr_writer_wanted;
3690 } mdb_zfs_rrwlock_t;
3691
3692 static uint_t rrw_key;
3693
3694 /* ARGSUSED */
3695 static int
3696 rrwlock(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
3697 {
3698 mdb_zfs_rrwlock_t rrw;
3699
3700 if (rrw_key == 0) {
3701 if (mdb_ctf_readsym(&rrw_key, "uint_t", "rrw_tsd_key", 0) == -1)
3702 return (DCMD_ERR);
3703 }
3704
3705 if (mdb_ctf_vread(&rrw, "rrwlock_t", "mdb_zfs_rrwlock_t", addr,
3706 0) == -1)
3707 return (DCMD_ERR);
3708
3709 if (rrw.rr_writer != 0) {
3710 mdb_printf("write lock held by thread %lx\n", rrw.rr_writer);
3711 return (DCMD_OK);
3712 }
3713
3714 if (rrw.rr_writer_wanted) {
3715 mdb_printf("writer wanted\n");
3716 }
3717
3718 mdb_printf("anonymous references:\n");
3719 (void) mdb_call_dcmd("refcount", addr +
3720 mdb_ctf_offsetof_by_name(ZFS_STRUCT "rrwlock", "rr_anon_rcount"),
3721 DCMD_ADDRSPEC, 0, NULL);
3722
3723 mdb_printf("linked references:\n");
3724 (void) mdb_call_dcmd("refcount", addr +
3725 mdb_ctf_offsetof_by_name(ZFS_STRUCT "rrwlock", "rr_linked_rcount"),
3726 DCMD_ADDRSPEC, 0, NULL);
3727
3728 /*
3729 * XXX This should find references from
3730 * "::walk thread | ::tsd -v <rrw_key>", but there is no support
3731 * for programmatic consumption of dcmds, so this would be
3732 * difficult, potentially requiring reimplementing ::tsd (both
3733 * user and kernel versions) in this MDB module.
3734 */
3735
3736 return (DCMD_OK);
3737 }
3738
3739 typedef struct mdb_arc_buf_hdr_t {
3740 uint16_t b_psize;
3741 uint16_t b_lsize;
3742 struct {
3743 uint32_t b_bufcnt;
3744 uintptr_t b_state;
3745 } b_l1hdr;
3746 } mdb_arc_buf_hdr_t;
3747
3748 enum arc_cflags {
3749 ARC_CFLAG_VERBOSE = 1 << 0,
3750 ARC_CFLAG_ANON = 1 << 1,
3751 ARC_CFLAG_MRU = 1 << 2,
3752 ARC_CFLAG_MFU = 1 << 3,
3753 ARC_CFLAG_BUFS = 1 << 4,
3754 };
3755
3756 typedef struct arc_compression_stats_data {
3757 GElf_Sym anon_sym; /* ARC_anon symbol */
3758 GElf_Sym mru_sym; /* ARC_mru symbol */
3759 GElf_Sym mrug_sym; /* ARC_mru_ghost symbol */
3760 GElf_Sym mfu_sym; /* ARC_mfu symbol */
3761 GElf_Sym mfug_sym; /* ARC_mfu_ghost symbol */
3762 GElf_Sym l2c_sym; /* ARC_l2c_only symbol */
3763 uint64_t *anon_c_hist; /* histogram of compressed sizes in anon */
3764 uint64_t *anon_u_hist; /* histogram of uncompressed sizes in anon */
3765 uint64_t *anon_bufs; /* histogram of buffer counts in anon state */
3766 uint64_t *mru_c_hist; /* histogram of compressed sizes in mru */
3767 uint64_t *mru_u_hist; /* histogram of uncompressed sizes in mru */
3768 uint64_t *mru_bufs; /* histogram of buffer counts in mru */
3769 uint64_t *mfu_c_hist; /* histogram of compressed sizes in mfu */
3770 uint64_t *mfu_u_hist; /* histogram of uncompressed sizes in mfu */
3771 uint64_t *mfu_bufs; /* histogram of buffer counts in mfu */
3772 uint64_t *all_c_hist; /* histogram of compressed anon + mru + mfu */
3773 uint64_t *all_u_hist; /* histogram of uncompressed anon + mru + mfu */
3774 uint64_t *all_bufs; /* histogram of buffer counts in all states */
3775 int arc_cflags; /* arc compression flags, specified by user */
3776 int hist_nbuckets; /* number of buckets in each histogram */
3777 } arc_compression_stats_data_t;
3778
3779 int
3780 highbit64(uint64_t i)
3781 {
3782 int h = 1;
3783
3784 if (i == 0)
3785 return (0);
3786 if (i & 0xffffffff00000000ULL) {
3787 h += 32; i >>= 32;
3788 }
3789 if (i & 0xffff0000) {
3790 h += 16; i >>= 16;
3791 }
3792 if (i & 0xff00) {
3793 h += 8; i >>= 8;
3794 }
3795 if (i & 0xf0) {
3796 h += 4; i >>= 4;
3797 }
3798 if (i & 0xc) {
3799 h += 2; i >>= 2;
3800 }
3801 if (i & 0x2) {
3802 h += 1;
3803 }
3804 return (h);
3805 }
3806
3807 /* ARGSUSED */
3808 static int
3809 arc_compression_stats_cb(uintptr_t addr, const void *unknown, void *arg)
3810 {
3811 arc_compression_stats_data_t *data = arg;
3812 mdb_arc_buf_hdr_t hdr;
3813 int cbucket, ubucket, bufcnt;
3814
3815 if (mdb_ctf_vread(&hdr, "arc_buf_hdr_t", "mdb_arc_buf_hdr_t",
3816 addr, 0) == -1) {
3817 return (WALK_ERR);
3818 }
3819
3820 /*
3821 * Headers in the ghost states, or the l2c_only state don't have
3822 * arc buffers linked off of them. Thus, their compressed size
3823 * is meaningless, so we skip these from the stats.
3824 */
3825 if (hdr.b_l1hdr.b_state == data->mrug_sym.st_value ||
3826 hdr.b_l1hdr.b_state == data->mfug_sym.st_value ||
3827 hdr.b_l1hdr.b_state == data->l2c_sym.st_value) {
3828 return (WALK_NEXT);
3829 }
3830
3831 /*
3832 * The physical size (compressed) and logical size
3833 * (uncompressed) are in units of SPA_MINBLOCKSIZE. By default,
3834 * we use the log2 of this value (rounded down to the nearest
3835 * integer) to determine the bucket to assign this header to.
3836 * Thus, the histogram is logarithmic with respect to the size
3837 * of the header. For example, the following is a mapping of the
3838 * bucket numbers and the range of header sizes they correspond to:
3839 *
3840 * 0: 0 byte headers
3841 * 1: 512 byte headers
3842 * 2: [1024 - 2048) byte headers
3843 * 3: [2048 - 4096) byte headers
3844 * 4: [4096 - 8192) byte headers
3845 * 5: [8192 - 16394) byte headers
3846 * 6: [16384 - 32768) byte headers
3847 * 7: [32768 - 65536) byte headers
3848 * 8: [65536 - 131072) byte headers
3849 * 9: 131072 byte headers
3850 *
3851 * If the ARC_CFLAG_VERBOSE flag was specified, we use the
3852 * physical and logical sizes directly. Thus, the histogram will
3853 * no longer be logarithmic; instead it will be linear with
3854 * respect to the size of the header. The following is a mapping
3855 * of the first many bucket numbers and the header size they
3856 * correspond to:
3857 *
3858 * 0: 0 byte headers
3859 * 1: 512 byte headers
3860 * 2: 1024 byte headers
3861 * 3: 1536 byte headers
3862 * 4: 2048 byte headers
3863 * 5: 2560 byte headers
3864 * 6: 3072 byte headers
3865 *
3866 * And so on. Keep in mind that a range of sizes isn't used in
3867 * the case of linear scale because the headers can only
3868 * increment or decrement in sizes of 512 bytes. So, it's not
3869 * possible for a header to be sized in between whats listed
3870 * above.
3871 *
3872 * Also, the above mapping values were calculated assuming a
3873 * SPA_MINBLOCKSHIFT of 512 bytes and a SPA_MAXBLOCKSIZE of 128K.
3874 */
3875
3876 if (data->arc_cflags & ARC_CFLAG_VERBOSE) {
3877 cbucket = hdr.b_psize;
3878 ubucket = hdr.b_lsize;
3879 } else {
3880 cbucket = highbit64(hdr.b_psize);
3881 ubucket = highbit64(hdr.b_lsize);
3882 }
3883
3884 bufcnt = hdr.b_l1hdr.b_bufcnt;
3885 if (bufcnt >= data->hist_nbuckets)
3886 bufcnt = data->hist_nbuckets - 1;
3887
3888 /* Ensure we stay within the bounds of the histogram array */
3889 ASSERT3U(cbucket, <, data->hist_nbuckets);
3890 ASSERT3U(ubucket, <, data->hist_nbuckets);
3891
3892 if (hdr.b_l1hdr.b_state == data->anon_sym.st_value) {
3893 data->anon_c_hist[cbucket]++;
3894 data->anon_u_hist[ubucket]++;
3895 data->anon_bufs[bufcnt]++;
3896 } else if (hdr.b_l1hdr.b_state == data->mru_sym.st_value) {
3897 data->mru_c_hist[cbucket]++;
3898 data->mru_u_hist[ubucket]++;
3899 data->mru_bufs[bufcnt]++;
3900 } else if (hdr.b_l1hdr.b_state == data->mfu_sym.st_value) {
3901 data->mfu_c_hist[cbucket]++;
3902 data->mfu_u_hist[ubucket]++;
3903 data->mfu_bufs[bufcnt]++;
3904 }
3905
3906 data->all_c_hist[cbucket]++;
3907 data->all_u_hist[ubucket]++;
3908 data->all_bufs[bufcnt]++;
3909
3910 return (WALK_NEXT);
3911 }
3912
3913 /* ARGSUSED */
3914 static int
3915 arc_compression_stats(uintptr_t addr, uint_t flags, int argc,
3916 const mdb_arg_t *argv)
3917 {
3918 arc_compression_stats_data_t data = { 0 };
3919 unsigned int max_shifted = SPA_MAXBLOCKSIZE >> SPA_MINBLOCKSHIFT;
3920 unsigned int hist_size;
3921 char range[32];
3922 int rc = DCMD_OK;
3923
3924 if (mdb_getopts(argc, argv,
3925 'v', MDB_OPT_SETBITS, ARC_CFLAG_VERBOSE, &data.arc_cflags,
3926 'a', MDB_OPT_SETBITS, ARC_CFLAG_ANON, &data.arc_cflags,
3927 'b', MDB_OPT_SETBITS, ARC_CFLAG_BUFS, &data.arc_cflags,
3928 'r', MDB_OPT_SETBITS, ARC_CFLAG_MRU, &data.arc_cflags,
3929 'f', MDB_OPT_SETBITS, ARC_CFLAG_MFU, &data.arc_cflags) != argc)
3930 return (DCMD_USAGE);
3931
3932 if (mdb_lookup_by_obj(ZFS_OBJ_NAME, "ARC_anon", &data.anon_sym) ||
3933 mdb_lookup_by_obj(ZFS_OBJ_NAME, "ARC_mru", &data.mru_sym) ||
3934 mdb_lookup_by_obj(ZFS_OBJ_NAME, "ARC_mru_ghost", &data.mrug_sym) ||
3935 mdb_lookup_by_obj(ZFS_OBJ_NAME, "ARC_mfu", &data.mfu_sym) ||
3936 mdb_lookup_by_obj(ZFS_OBJ_NAME, "ARC_mfu_ghost", &data.mfug_sym) ||
3937 mdb_lookup_by_obj(ZFS_OBJ_NAME, "ARC_l2c_only", &data.l2c_sym)) {
3938 mdb_warn("can't find arc state symbol");
3939 return (DCMD_ERR);
3940 }
3941
3942 /*
3943 * Determine the maximum expected size for any header, and use
3944 * this to determine the number of buckets needed for each
3945 * histogram. If ARC_CFLAG_VERBOSE is specified, this value is
3946 * used directly; otherwise the log2 of the maximum size is
3947 * used. Thus, if using a log2 scale there's a maximum of 10
3948 * possible buckets, while the linear scale (when using
3949 * ARC_CFLAG_VERBOSE) has a maximum of 257 buckets.
3950 */
3951 if (data.arc_cflags & ARC_CFLAG_VERBOSE)
3952 data.hist_nbuckets = max_shifted + 1;
3953 else
3954 data.hist_nbuckets = highbit64(max_shifted) + 1;
3955
3956 hist_size = sizeof (uint64_t) * data.hist_nbuckets;
3957
3958 data.anon_c_hist = mdb_zalloc(hist_size, UM_SLEEP);
3959 data.anon_u_hist = mdb_zalloc(hist_size, UM_SLEEP);
3960 data.anon_bufs = mdb_zalloc(hist_size, UM_SLEEP);
3961
3962 data.mru_c_hist = mdb_zalloc(hist_size, UM_SLEEP);
3963 data.mru_u_hist = mdb_zalloc(hist_size, UM_SLEEP);
3964 data.mru_bufs = mdb_zalloc(hist_size, UM_SLEEP);
3965
3966 data.mfu_c_hist = mdb_zalloc(hist_size, UM_SLEEP);
3967 data.mfu_u_hist = mdb_zalloc(hist_size, UM_SLEEP);
3968 data.mfu_bufs = mdb_zalloc(hist_size, UM_SLEEP);
3969
3970 data.all_c_hist = mdb_zalloc(hist_size, UM_SLEEP);
3971 data.all_u_hist = mdb_zalloc(hist_size, UM_SLEEP);
3972 data.all_bufs = mdb_zalloc(hist_size, UM_SLEEP);
3973
3974 if (mdb_walk("arc_buf_hdr_t_full", arc_compression_stats_cb,
3975 &data) != 0) {
3976 mdb_warn("can't walk arc_buf_hdr's");
3977 rc = DCMD_ERR;
3978 goto out;
3979 }
3980
3981 if (data.arc_cflags & ARC_CFLAG_VERBOSE) {
3982 rc = mdb_snprintf(range, sizeof (range),
3983 "[n*%llu, (n+1)*%llu)", SPA_MINBLOCKSIZE,
3984 SPA_MINBLOCKSIZE);
3985 } else {
3986 rc = mdb_snprintf(range, sizeof (range),
3987 "[2^(n-1)*%llu, 2^n*%llu)", SPA_MINBLOCKSIZE,
3988 SPA_MINBLOCKSIZE);
3989 }
3990
3991 if (rc < 0) {
3992 /* snprintf failed, abort the dcmd */
3993 rc = DCMD_ERR;
3994 goto out;
3995 } else {
3996 /* snprintf succeeded above, reset return code */
3997 rc = DCMD_OK;
3998 }
3999
4000 if (data.arc_cflags & ARC_CFLAG_ANON) {
4001 if (data.arc_cflags & ARC_CFLAG_BUFS) {
4002 mdb_printf("Histogram of the number of anon buffers "
4003 "that are associated with an arc hdr.\n");
4004 dump_histogram(data.anon_bufs, data.hist_nbuckets, 0);
4005 mdb_printf("\n");
4006 }
4007 mdb_printf("Histogram of compressed anon buffers.\n"
4008 "Each bucket represents buffers of size: %s.\n", range);
4009 dump_histogram(data.anon_c_hist, data.hist_nbuckets, 0);
4010 mdb_printf("\n");
4011
4012 mdb_printf("Histogram of uncompressed anon buffers.\n"
4013 "Each bucket represents buffers of size: %s.\n", range);
4014 dump_histogram(data.anon_u_hist, data.hist_nbuckets, 0);
4015 mdb_printf("\n");
4016 }
4017
4018 if (data.arc_cflags & ARC_CFLAG_MRU) {
4019 if (data.arc_cflags & ARC_CFLAG_BUFS) {
4020 mdb_printf("Histogram of the number of mru buffers "
4021 "that are associated with an arc hdr.\n");
4022 dump_histogram(data.mru_bufs, data.hist_nbuckets, 0);
4023 mdb_printf("\n");
4024 }
4025 mdb_printf("Histogram of compressed mru buffers.\n"
4026 "Each bucket represents buffers of size: %s.\n", range);
4027 dump_histogram(data.mru_c_hist, data.hist_nbuckets, 0);
4028 mdb_printf("\n");
4029
4030 mdb_printf("Histogram of uncompressed mru buffers.\n"
4031 "Each bucket represents buffers of size: %s.\n", range);
4032 dump_histogram(data.mru_u_hist, data.hist_nbuckets, 0);
4033 mdb_printf("\n");
4034 }
4035
4036 if (data.arc_cflags & ARC_CFLAG_MFU) {
4037 if (data.arc_cflags & ARC_CFLAG_BUFS) {
4038 mdb_printf("Histogram of the number of mfu buffers "
4039 "that are associated with an arc hdr.\n");
4040 dump_histogram(data.mfu_bufs, data.hist_nbuckets, 0);
4041 mdb_printf("\n");
4042 }
4043
4044 mdb_printf("Histogram of compressed mfu buffers.\n"
4045 "Each bucket represents buffers of size: %s.\n", range);
4046 dump_histogram(data.mfu_c_hist, data.hist_nbuckets, 0);
4047 mdb_printf("\n");
4048
4049 mdb_printf("Histogram of uncompressed mfu buffers.\n"
4050 "Each bucket represents buffers of size: %s.\n", range);
4051 dump_histogram(data.mfu_u_hist, data.hist_nbuckets, 0);
4052 mdb_printf("\n");
4053 }
4054
4055 if (data.arc_cflags & ARC_CFLAG_BUFS) {
4056 mdb_printf("Histogram of all buffers that "
4057 "are associated with an arc hdr.\n");
4058 dump_histogram(data.all_bufs, data.hist_nbuckets, 0);
4059 mdb_printf("\n");
4060 }
4061
4062 mdb_printf("Histogram of all compressed buffers.\n"
4063 "Each bucket represents buffers of size: %s.\n", range);
4064 dump_histogram(data.all_c_hist, data.hist_nbuckets, 0);
4065 mdb_printf("\n");
4066
4067 mdb_printf("Histogram of all uncompressed buffers.\n"
4068 "Each bucket represents buffers of size: %s.\n", range);
4069 dump_histogram(data.all_u_hist, data.hist_nbuckets, 0);
4070
4071 out:
4072 mdb_free(data.anon_c_hist, hist_size);
4073 mdb_free(data.anon_u_hist, hist_size);
4074 mdb_free(data.anon_bufs, hist_size);
4075
4076 mdb_free(data.mru_c_hist, hist_size);
4077 mdb_free(data.mru_u_hist, hist_size);
4078 mdb_free(data.mru_bufs, hist_size);
4079
4080 mdb_free(data.mfu_c_hist, hist_size);
4081 mdb_free(data.mfu_u_hist, hist_size);
4082 mdb_free(data.mfu_bufs, hist_size);
4083
4084 mdb_free(data.all_c_hist, hist_size);
4085 mdb_free(data.all_u_hist, hist_size);
4086 mdb_free(data.all_bufs, hist_size);
4087
4088 return (rc);
4089 }
4090
4091 /*
4092 * MDB module linkage information:
4093 *
4094 * We declare a list of structures describing our dcmds, and a function
4095 * named _mdb_init to return a pointer to our module information.
4096 */
4097
4098 static const mdb_dcmd_t dcmds[] = {
4099 { "arc", "[-bkmg]", "print ARC variables", arc_print },
4100 { "blkptr", ":", "print blkptr_t", blkptr },
4101 { "dbuf", ":", "print dmu_buf_impl_t", dbuf },
4102 { "dbuf_stats", ":", "dbuf stats", dbuf_stats },
4103 { "dbufs",
4104 "\t[-O objset_t*] [-n objset_name | \"mos\"] "
4105 "[-o object | \"mdn\"] \n"
4106 "\t[-l level] [-b blkid | \"bonus\"]",
4107 "find dmu_buf_impl_t's that match specified criteria", dbufs },
4108 { "abuf_find", "dva_word[0] dva_word[1]",
4109 "find arc_buf_hdr_t of a specified DVA",
4110 abuf_find },
4111 { "spa", "?[-cevmMh]\n"
4112 "\t-c display spa config\n"
4113 "\t-e display vdev statistics\n"
4114 "\t-v display vdev information\n"
4115 "\t-m display metaslab statistics\n"
4116 "\t-M display metaslab group statistics\n"
4117 "\t-h display histogram (requires -m or -M)\n",
4118 "spa_t summary", spa_print },
4119 { "spa_config", ":", "print spa_t configuration", spa_print_config },
4120 { "spa_space", ":[-b]", "print spa_t on-disk space usage", spa_space },
4121 { "spa_vdevs", ":[-emMh]\n"
4122 "\t-e display vdev statistics\n"
4123 "\t-m dispaly metaslab statistics\n"
4124 "\t-M display metaslab group statistic\n"
4125 "\t-h display histogram (requires -m or -M)\n",
4126 "given a spa_t, print vdev summary", spa_vdevs },
4127 { "vdev", ":[-remMh]\n"
4128 "\t-r display recursively\n"
4129 "\t-e display statistics\n"
4130 "\t-m display metaslab statistics (top level vdev only)\n"
4131 "\t-M display metaslab group statistics (top level vdev only)\n"
4132 "\t-h display histogram (requires -m or -M)\n",
4133 "vdev_t summary", vdev_print },
4134 { "zio", ":[-cpr]\n"
4135 "\t-c display children\n"
4136 "\t-p display parents\n"
4137 "\t-r display recursively",
4138 "zio_t summary", zio_print },
4139 { "zio_state", "?", "print out all zio_t structures on system or "
4140 "for a particular pool", zio_state },
4141 { "zfs_blkstats", ":[-v]",
4142 "given a spa_t, print block type stats from last scrub",
4143 zfs_blkstats },
4144 { "zfs_params", "", "print zfs tunable parameters", zfs_params },
4145 { "refcount", ":[-r]\n"
4146 "\t-r display recently removed references",
4147 "print refcount_t holders", refcount },
4148 { "zap_leaf", "", "print zap_leaf_phys_t", zap_leaf },
4149 { "zfs_aces", ":[-v]", "print all ACEs from a zfs_acl_t",
4150 zfs_acl_dump },
4151 { "zfs_ace", ":[-v]", "print zfs_ace", zfs_ace_print },
4152 { "zfs_ace0", ":[-v]", "print zfs_ace0", zfs_ace0_print },
4153 { "sa_attr_table", ":", "print SA attribute table from sa_os_t",
4154 sa_attr_table},
4155 { "sa_attr", ": attr_id",
4156 "print SA attribute address when given sa_handle_t", sa_attr_print},
4157 { "zfs_dbgmsg", ":[-va]",
4158 "print zfs debug log", dbgmsg},
4159 { "rrwlock", ":",
4160 "print rrwlock_t, including readers", rrwlock},
4161 { "metaslab_weight", "weight",
4162 "print metaslab weight", metaslab_weight},
4163 { "metaslab_trace", ":",
4164 "print metaslab allocation trace records", metaslab_trace},
4165 { "arc_compression_stats", ":[-vabrf]\n"
4166 "\t-v verbose, display a linearly scaled histogram\n"
4167 "\t-a display ARC_anon state statistics individually\n"
4168 "\t-r display ARC_mru state statistics individually\n"
4169 "\t-f display ARC_mfu state statistics individually\n"
4170 "\t-b display histogram of buffer counts\n",
4171 "print a histogram of compressed arc buffer sizes",
4172 arc_compression_stats},
4173 { NULL }
4174 };
4175
4176 static const mdb_walker_t walkers[] = {
4177 { "zms_freelist", "walk ZFS metaslab freelist",
4178 freelist_walk_init, freelist_walk_step, NULL },
4179 { "txg_list", "given any txg_list_t *, walk all entries in all txgs",
4180 txg_list_walk_init, txg_list_walk_step, NULL },
4181 { "txg_list0", "given any txg_list_t *, walk all entries in txg 0",
4182 txg_list0_walk_init, txg_list_walk_step, NULL },
4183 { "txg_list1", "given any txg_list_t *, walk all entries in txg 1",
4184 txg_list1_walk_init, txg_list_walk_step, NULL },
4185 { "txg_list2", "given any txg_list_t *, walk all entries in txg 2",
4186 txg_list2_walk_init, txg_list_walk_step, NULL },
4187 { "txg_list3", "given any txg_list_t *, walk all entries in txg 3",
4188 txg_list3_walk_init, txg_list_walk_step, NULL },
4189 { "zio", "walk all zio structures, optionally for a particular spa_t",
4190 zio_walk_init, zio_walk_step, NULL },
4191 { "zio_root",
4192 "walk all root zio_t structures, optionally for a particular spa_t",
4193 zio_walk_init, zio_walk_root_step, NULL },
4194 { "spa", "walk all spa_t entries in the namespace",
4195 spa_walk_init, spa_walk_step, NULL },
4196 { "metaslab", "given a spa_t *, walk all metaslab_t structures",
4197 metaslab_walk_init, metaslab_walk_step, NULL },
4198 { "multilist", "given a multilist_t *, walk all list_t structures",
4199 multilist_walk_init, multilist_walk_step, NULL },
4200 { "zfs_acl_node", "given a zfs_acl_t, walk all zfs_acl_nodes",
4201 zfs_acl_node_walk_init, zfs_acl_node_walk_step, NULL },
4202 { "zfs_acl_node_aces", "given a zfs_acl_node_t, walk all ACEs",
4203 zfs_acl_node_aces_walk_init, zfs_aces_walk_step, NULL },
4204 { "zfs_acl_node_aces0",
4205 "given a zfs_acl_node_t, walk all ACEs as ace_t",
4206 zfs_acl_node_aces0_walk_init, zfs_aces_walk_step, NULL },
4207 { NULL }
4208 };
4209
4210 static const mdb_modinfo_t modinfo = {
4211 MDB_API_VERSION, dcmds, walkers
4212 };
4213
4214 const mdb_modinfo_t *
4215 _mdb_init(void)
4216 {
4217 return (&modinfo);
4218 }