Print this page
    
11927 Log, or optionally panic, on zero-length kmem allocations
Reviewed by: Dan McDonald <danmcd@joyent.com>
Reviewed by: Jason King <jason.brian.king@gmail.com>
    
      
        | Split | 
	Close | 
      
      | Expand all | 
      | Collapse all | 
    
    
          --- old/usr/src/cmd/mdb/common/modules/genunix/kmem.c
          +++ new/usr/src/cmd/mdb/common/modules/genunix/kmem.c
   1    1  /*
   2    2   * CDDL HEADER START
   3    3   *
   4    4   * The contents of this file are subject to the terms of the
   5    5   * Common Development and Distribution License (the "License").
   6    6   * You may not use this file except in compliance with the License.
   7    7   *
   8    8   * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9    9   * or http://www.opensolaris.org/os/licensing.
  10   10   * See the License for the specific language governing permissions
  11   11   * and limitations under the License.
  12   12   *
  13   13   * When distributing Covered Code, include this CDDL HEADER in each
  14   14   * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15   15   * If applicable, add the following below this CDDL HEADER, with the
  16   16   * fields enclosed by brackets "[]" replaced with your own identifying
  17   17   * information: Portions Copyright [yyyy] [name of copyright owner]
  18   18   *
  19   19   * CDDL HEADER END
  20   20   */
  21   21  /*
  22   22   * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  23   23   * Use is subject to license terms.
  24   24   */
  25   25  
  26   26  /*
  27   27   * Copyright 2018 Joyent, Inc.  All rights reserved.
  28   28   * Copyright (c) 2012 by Delphix. All rights reserved.
  29   29   */
  30   30  
  31   31  #include <mdb/mdb_param.h>
  32   32  #include <mdb/mdb_modapi.h>
  33   33  #include <mdb/mdb_ctf.h>
  34   34  #include <mdb/mdb_whatis.h>
  35   35  #include <sys/cpuvar.h>
  36   36  #include <sys/kmem_impl.h>
  37   37  #include <sys/vmem_impl.h>
  38   38  #include <sys/machelf.h>
  39   39  #include <sys/modctl.h>
  40   40  #include <sys/kobj.h>
  41   41  #include <sys/panic.h>
  42   42  #include <sys/stack.h>
  43   43  #include <sys/sysmacros.h>
  44   44  #include <vm/page.h>
  45   45  
  46   46  #include "avl.h"
  47   47  #include "combined.h"
  48   48  #include "dist.h"
  49   49  #include "kmem.h"
  50   50  #include "list.h"
  51   51  
  52   52  #define dprintf(x) if (mdb_debug_level) { \
  53   53          mdb_printf("kmem debug: ");  \
  54   54          /*CSTYLED*/\
  55   55          mdb_printf x ;\
  56   56  }
  57   57  
  58   58  #define KM_ALLOCATED            0x01
  59   59  #define KM_FREE                 0x02
  60   60  #define KM_BUFCTL               0x04
  61   61  #define KM_CONSTRUCTED          0x08    /* only constructed free buffers */
  62   62  #define KM_HASH                 0x10
  63   63  
  64   64  static int mdb_debug_level = 0;
  65   65  
  66   66  /*ARGSUSED*/
  67   67  static int
  68   68  kmem_init_walkers(uintptr_t addr, const kmem_cache_t *c, void *ignored)
  69   69  {
  70   70          mdb_walker_t w;
  71   71          char descr[64];
  72   72  
  73   73          (void) mdb_snprintf(descr, sizeof (descr),
  74   74              "walk the %s cache", c->cache_name);
  75   75  
  76   76          w.walk_name = c->cache_name;
  77   77          w.walk_descr = descr;
  78   78          w.walk_init = kmem_walk_init;
  79   79          w.walk_step = kmem_walk_step;
  80   80          w.walk_fini = kmem_walk_fini;
  81   81          w.walk_init_arg = (void *)addr;
  82   82  
  83   83          if (mdb_add_walker(&w) == -1)
  84   84                  mdb_warn("failed to add %s walker", c->cache_name);
  85   85  
  86   86          return (WALK_NEXT);
  87   87  }
  88   88  
  89   89  /*ARGSUSED*/
  90   90  int
  91   91  kmem_debug(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
  92   92  {
  93   93          mdb_debug_level ^= 1;
  94   94  
  95   95          mdb_printf("kmem: debugging is now %s\n",
  96   96              mdb_debug_level ? "on" : "off");
  97   97  
  98   98          return (DCMD_OK);
  99   99  }
 100  100  
 101  101  int
 102  102  kmem_cache_walk_init(mdb_walk_state_t *wsp)
 103  103  {
 104  104          GElf_Sym sym;
 105  105  
 106  106          if (mdb_lookup_by_name("kmem_caches", &sym) == -1) {
 107  107                  mdb_warn("couldn't find kmem_caches");
 108  108                  return (WALK_ERR);
 109  109          }
 110  110  
 111  111          wsp->walk_addr = (uintptr_t)sym.st_value;
 112  112  
 113  113          return (list_walk_init_named(wsp, "cache list", "cache"));
 114  114  }
 115  115  
 116  116  int
 117  117  kmem_cpu_cache_walk_init(mdb_walk_state_t *wsp)
 118  118  {
 119  119          if (wsp->walk_addr == 0) {
 120  120                  mdb_warn("kmem_cpu_cache doesn't support global walks");
 121  121                  return (WALK_ERR);
 122  122          }
 123  123  
 124  124          if (mdb_layered_walk("cpu", wsp) == -1) {
 125  125                  mdb_warn("couldn't walk 'cpu'");
 126  126                  return (WALK_ERR);
 127  127          }
 128  128  
 129  129          wsp->walk_data = (void *)wsp->walk_addr;
 130  130  
 131  131          return (WALK_NEXT);
 132  132  }
 133  133  
 134  134  int
 135  135  kmem_cpu_cache_walk_step(mdb_walk_state_t *wsp)
 136  136  {
 137  137          uintptr_t caddr = (uintptr_t)wsp->walk_data;
 138  138          const cpu_t *cpu = wsp->walk_layer;
 139  139          kmem_cpu_cache_t cc;
 140  140  
 141  141          caddr += OFFSETOF(kmem_cache_t, cache_cpu[cpu->cpu_seqid]);
 142  142  
 143  143          if (mdb_vread(&cc, sizeof (kmem_cpu_cache_t), caddr) == -1) {
 144  144                  mdb_warn("couldn't read kmem_cpu_cache at %p", caddr);
 145  145                  return (WALK_ERR);
 146  146          }
 147  147  
 148  148          return (wsp->walk_callback(caddr, &cc, wsp->walk_cbdata));
 149  149  }
 150  150  
 151  151  static int
 152  152  kmem_slab_check(void *p, uintptr_t saddr, void *arg)
 153  153  {
 154  154          kmem_slab_t *sp = p;
 155  155          uintptr_t caddr = (uintptr_t)arg;
 156  156          if ((uintptr_t)sp->slab_cache != caddr) {
 157  157                  mdb_warn("slab %p isn't in cache %p (in cache %p)\n",
 158  158                      saddr, caddr, sp->slab_cache);
 159  159                  return (-1);
 160  160          }
 161  161  
 162  162          return (0);
 163  163  }
 164  164  
 165  165  static int
 166  166  kmem_partial_slab_check(void *p, uintptr_t saddr, void *arg)
 167  167  {
 168  168          kmem_slab_t *sp = p;
 169  169  
 170  170          int rc = kmem_slab_check(p, saddr, arg);
 171  171          if (rc != 0) {
 172  172                  return (rc);
 173  173          }
 174  174  
 175  175          if (!KMEM_SLAB_IS_PARTIAL(sp)) {
 176  176                  mdb_warn("slab %p is not a partial slab\n", saddr);
 177  177                  return (-1);
 178  178          }
 179  179  
 180  180          return (0);
 181  181  }
 182  182  
 183  183  static int
 184  184  kmem_complete_slab_check(void *p, uintptr_t saddr, void *arg)
 185  185  {
 186  186          kmem_slab_t *sp = p;
 187  187  
 188  188          int rc = kmem_slab_check(p, saddr, arg);
 189  189          if (rc != 0) {
 190  190                  return (rc);
 191  191          }
 192  192  
 193  193          if (!KMEM_SLAB_IS_ALL_USED(sp)) {
 194  194                  mdb_warn("slab %p is not completely allocated\n", saddr);
 195  195                  return (-1);
 196  196          }
 197  197  
 198  198          return (0);
 199  199  }
 200  200  
 201  201  typedef struct {
 202  202          uintptr_t kns_cache_addr;
 203  203          int kns_nslabs;
 204  204  } kmem_nth_slab_t;
 205  205  
 206  206  static int
 207  207  kmem_nth_slab_check(void *p, uintptr_t saddr, void *arg)
 208  208  {
 209  209          kmem_nth_slab_t *chkp = arg;
 210  210  
 211  211          int rc = kmem_slab_check(p, saddr, (void *)chkp->kns_cache_addr);
 212  212          if (rc != 0) {
 213  213                  return (rc);
 214  214          }
 215  215  
 216  216          return (chkp->kns_nslabs-- == 0 ? 1 : 0);
 217  217  }
 218  218  
 219  219  static int
 220  220  kmem_complete_slab_walk_init(mdb_walk_state_t *wsp)
 221  221  {
 222  222          uintptr_t caddr = wsp->walk_addr;
 223  223  
 224  224          wsp->walk_addr = (uintptr_t)(caddr +
 225  225              offsetof(kmem_cache_t, cache_complete_slabs));
 226  226  
 227  227          return (list_walk_init_checked(wsp, "slab list", "slab",
 228  228              kmem_complete_slab_check, (void *)caddr));
 229  229  }
 230  230  
 231  231  static int
 232  232  kmem_partial_slab_walk_init(mdb_walk_state_t *wsp)
 233  233  {
 234  234          uintptr_t caddr = wsp->walk_addr;
 235  235  
 236  236          wsp->walk_addr = (uintptr_t)(caddr +
 237  237              offsetof(kmem_cache_t, cache_partial_slabs));
 238  238  
 239  239          return (avl_walk_init_checked(wsp, "slab list", "slab",
 240  240              kmem_partial_slab_check, (void *)caddr));
 241  241  }
 242  242  
 243  243  int
 244  244  kmem_slab_walk_init(mdb_walk_state_t *wsp)
 245  245  {
 246  246          uintptr_t caddr = wsp->walk_addr;
 247  247  
 248  248          if (caddr == 0) {
 249  249                  mdb_warn("kmem_slab doesn't support global walks\n");
 250  250                  return (WALK_ERR);
 251  251          }
 252  252  
 253  253          combined_walk_init(wsp);
 254  254          combined_walk_add(wsp,
 255  255              kmem_complete_slab_walk_init, list_walk_step, list_walk_fini);
 256  256          combined_walk_add(wsp,
 257  257              kmem_partial_slab_walk_init, avl_walk_step, avl_walk_fini);
 258  258  
 259  259          return (WALK_NEXT);
 260  260  }
 261  261  
 262  262  static int
 263  263  kmem_first_complete_slab_walk_init(mdb_walk_state_t *wsp)
 264  264  {
 265  265          uintptr_t caddr = wsp->walk_addr;
 266  266          kmem_nth_slab_t *chk;
 267  267  
 268  268          chk = mdb_alloc(sizeof (kmem_nth_slab_t),
 269  269              UM_SLEEP | UM_GC);
 270  270          chk->kns_cache_addr = caddr;
 271  271          chk->kns_nslabs = 1;
 272  272          wsp->walk_addr = (uintptr_t)(caddr +
 273  273              offsetof(kmem_cache_t, cache_complete_slabs));
 274  274  
 275  275          return (list_walk_init_checked(wsp, "slab list", "slab",
 276  276              kmem_nth_slab_check, chk));
 277  277  }
 278  278  
 279  279  int
 280  280  kmem_slab_walk_partial_init(mdb_walk_state_t *wsp)
 281  281  {
 282  282          uintptr_t caddr = wsp->walk_addr;
 283  283          kmem_cache_t c;
 284  284  
 285  285          if (caddr == 0) {
 286  286                  mdb_warn("kmem_slab_partial doesn't support global walks\n");
 287  287                  return (WALK_ERR);
 288  288          }
 289  289  
 290  290          if (mdb_vread(&c, sizeof (c), caddr) == -1) {
 291  291                  mdb_warn("couldn't read kmem_cache at %p", caddr);
 292  292                  return (WALK_ERR);
 293  293          }
 294  294  
 295  295          combined_walk_init(wsp);
 296  296  
 297  297          /*
 298  298           * Some consumers (umem_walk_step(), in particular) require at
 299  299           * least one callback if there are any buffers in the cache.  So
 300  300           * if there are *no* partial slabs, report the first full slab, if
 301  301           * any.
 302  302           *
 303  303           * Yes, this is ugly, but it's cleaner than the other possibilities.
 304  304           */
 305  305          if (c.cache_partial_slabs.avl_numnodes == 0) {
 306  306                  combined_walk_add(wsp, kmem_first_complete_slab_walk_init,
 307  307                      list_walk_step, list_walk_fini);
 308  308          } else {
 309  309                  combined_walk_add(wsp, kmem_partial_slab_walk_init,
 310  310                      avl_walk_step, avl_walk_fini);
 311  311          }
 312  312  
 313  313          return (WALK_NEXT);
 314  314  }
 315  315  
 316  316  int
 317  317  kmem_cache(uintptr_t addr, uint_t flags, int ac, const mdb_arg_t *argv)
 318  318  {
 319  319          kmem_cache_t c;
 320  320          const char *filter = NULL;
 321  321  
 322  322          if (mdb_getopts(ac, argv,
 323  323              'n', MDB_OPT_STR, &filter,
 324  324              NULL) != ac) {
 325  325                  return (DCMD_USAGE);
 326  326          }
 327  327  
 328  328          if (!(flags & DCMD_ADDRSPEC)) {
 329  329                  if (mdb_walk_dcmd("kmem_cache", "kmem_cache", ac, argv) == -1) {
 330  330                          mdb_warn("can't walk kmem_cache");
 331  331                          return (DCMD_ERR);
 332  332                  }
 333  333                  return (DCMD_OK);
 334  334          }
 335  335  
 336  336          if (DCMD_HDRSPEC(flags))
 337  337                  mdb_printf("%-?s %-25s %4s %6s %8s %8s\n", "ADDR", "NAME",
 338  338                      "FLAG", "CFLAG", "BUFSIZE", "BUFTOTL");
 339  339  
 340  340          if (mdb_vread(&c, sizeof (c), addr) == -1) {
 341  341                  mdb_warn("couldn't read kmem_cache at %p", addr);
 342  342                  return (DCMD_ERR);
 343  343          }
 344  344  
 345  345          if ((filter != NULL) && (strstr(c.cache_name, filter) == NULL))
 346  346                  return (DCMD_OK);
 347  347  
 348  348          mdb_printf("%0?p %-25s %04x %06x %8ld %8lld\n", addr, c.cache_name,
 349  349              c.cache_flags, c.cache_cflags, c.cache_bufsize, c.cache_buftotal);
 350  350  
 351  351          return (DCMD_OK);
 352  352  }
 353  353  
 354  354  void
 355  355  kmem_cache_help(void)
 356  356  {
 357  357          mdb_printf("%s", "Print kernel memory caches.\n\n");
 358  358          mdb_dec_indent(2);
 359  359          mdb_printf("%<b>OPTIONS%</b>\n");
 360  360          mdb_inc_indent(2);
 361  361          mdb_printf("%s",
 362  362  "  -n name\n"
 363  363  "        name of kmem cache (or matching partial name)\n"
 364  364  "\n"
 365  365  "Column\tDescription\n"
 366  366  "\n"
 367  367  "ADDR\t\taddress of kmem cache\n"
 368  368  "NAME\t\tname of kmem cache\n"
 369  369  "FLAG\t\tvarious cache state flags\n"
 370  370  "CFLAG\t\tcache creation flags\n"
 371  371  "BUFSIZE\tobject size in bytes\n"
 372  372  "BUFTOTL\tcurrent total buffers in cache (allocated and free)\n");
 373  373  }
 374  374  
 375  375  #define LABEL_WIDTH     11
 376  376  static void
 377  377  kmem_slabs_print_dist(uint_t *ks_bucket, size_t buffers_per_slab,
 378  378      size_t maxbuckets, size_t minbucketsize)
 379  379  {
 380  380          uint64_t total;
 381  381          int buckets;
 382  382          int i;
 383  383          const int *distarray;
 384  384          int complete[2];
 385  385  
 386  386          buckets = buffers_per_slab;
 387  387  
 388  388          total = 0;
 389  389          for (i = 0; i <= buffers_per_slab; i++)
 390  390                  total += ks_bucket[i];
 391  391  
 392  392          if (maxbuckets > 1)
 393  393                  buckets = MIN(buckets, maxbuckets);
 394  394  
 395  395          if (minbucketsize > 1) {
 396  396                  /*
 397  397                   * minbucketsize does not apply to the first bucket reserved
 398  398                   * for completely allocated slabs
 399  399                   */
 400  400                  buckets = MIN(buckets, 1 + ((buffers_per_slab - 1) /
 401  401                      minbucketsize));
 402  402                  if ((buckets < 2) && (buffers_per_slab > 1)) {
 403  403                          buckets = 2;
 404  404                          minbucketsize = (buffers_per_slab - 1);
 405  405                  }
 406  406          }
 407  407  
 408  408          /*
 409  409           * The first printed bucket is reserved for completely allocated slabs.
 410  410           * Passing (buckets - 1) excludes that bucket from the generated
 411  411           * distribution, since we're handling it as a special case.
 412  412           */
 413  413          complete[0] = buffers_per_slab;
 414  414          complete[1] = buffers_per_slab + 1;
 415  415          distarray = dist_linear(buckets - 1, 1, buffers_per_slab - 1);
 416  416  
 417  417          mdb_printf("%*s\n", LABEL_WIDTH, "Allocated");
 418  418          dist_print_header("Buffers", LABEL_WIDTH, "Slabs");
 419  419  
 420  420          dist_print_bucket(complete, 0, ks_bucket, total, LABEL_WIDTH);
 421  421          /*
 422  422           * Print bucket ranges in descending order after the first bucket for
 423  423           * completely allocated slabs, so a person can see immediately whether
 424  424           * or not there is fragmentation without having to scan possibly
 425  425           * multiple screens of output. Starting at (buckets - 2) excludes the
 426  426           * extra terminating bucket.
 427  427           */
 428  428          for (i = buckets - 2; i >= 0; i--) {
 429  429                  dist_print_bucket(distarray, i, ks_bucket, total, LABEL_WIDTH);
 430  430          }
 431  431          mdb_printf("\n");
 432  432  }
 433  433  #undef LABEL_WIDTH
 434  434  
 435  435  /*ARGSUSED*/
 436  436  static int
 437  437  kmem_first_slab(uintptr_t addr, const kmem_slab_t *sp, boolean_t *is_slab)
 438  438  {
 439  439          *is_slab = B_TRUE;
 440  440          return (WALK_DONE);
 441  441  }
 442  442  
 443  443  /*ARGSUSED*/
 444  444  static int
 445  445  kmem_first_partial_slab(uintptr_t addr, const kmem_slab_t *sp,
 446  446      boolean_t *is_slab)
 447  447  {
 448  448          /*
 449  449           * The "kmem_partial_slab" walker reports the first full slab if there
 450  450           * are no partial slabs (for the sake of consumers that require at least
 451  451           * one callback if there are any buffers in the cache).
 452  452           */
 453  453          *is_slab = KMEM_SLAB_IS_PARTIAL(sp);
 454  454          return (WALK_DONE);
 455  455  }
 456  456  
 457  457  typedef struct kmem_slab_usage {
 458  458          int ksu_refcnt;                 /* count of allocated buffers on slab */
 459  459          boolean_t ksu_nomove;           /* slab marked non-reclaimable */
 460  460  } kmem_slab_usage_t;
 461  461  
 462  462  typedef struct kmem_slab_stats {
 463  463          const kmem_cache_t *ks_cp;
 464  464          int ks_slabs;                   /* slabs in cache */
 465  465          int ks_partial_slabs;           /* partially allocated slabs in cache */
 466  466          uint64_t ks_unused_buffers;     /* total unused buffers in cache */
 467  467          int ks_max_buffers_per_slab;    /* max buffers per slab */
 468  468          int ks_usage_len;               /* ks_usage array length */
 469  469          kmem_slab_usage_t *ks_usage;    /* partial slab usage */
 470  470          uint_t *ks_bucket;              /* slab usage distribution */
 471  471  } kmem_slab_stats_t;
 472  472  
 473  473  /*ARGSUSED*/
 474  474  static int
 475  475  kmem_slablist_stat(uintptr_t addr, const kmem_slab_t *sp,
 476  476      kmem_slab_stats_t *ks)
 477  477  {
 478  478          kmem_slab_usage_t *ksu;
 479  479          long unused;
 480  480  
 481  481          ks->ks_slabs++;
 482  482          ks->ks_bucket[sp->slab_refcnt]++;
 483  483  
 484  484          unused = (sp->slab_chunks - sp->slab_refcnt);
 485  485          if (unused == 0) {
 486  486                  return (WALK_NEXT);
 487  487          }
 488  488  
 489  489          ks->ks_partial_slabs++;
 490  490          ks->ks_unused_buffers += unused;
 491  491  
 492  492          if (ks->ks_partial_slabs > ks->ks_usage_len) {
 493  493                  kmem_slab_usage_t *usage;
 494  494                  int len = ks->ks_usage_len;
 495  495  
 496  496                  len = (len == 0 ? 16 : len * 2);
 497  497                  usage = mdb_zalloc(len * sizeof (kmem_slab_usage_t), UM_SLEEP);
 498  498                  if (ks->ks_usage != NULL) {
 499  499                          bcopy(ks->ks_usage, usage,
 500  500                              ks->ks_usage_len * sizeof (kmem_slab_usage_t));
 501  501                          mdb_free(ks->ks_usage,
 502  502                              ks->ks_usage_len * sizeof (kmem_slab_usage_t));
 503  503                  }
 504  504                  ks->ks_usage = usage;
 505  505                  ks->ks_usage_len = len;
 506  506          }
 507  507  
 508  508          ksu = &ks->ks_usage[ks->ks_partial_slabs - 1];
 509  509          ksu->ksu_refcnt = sp->slab_refcnt;
 510  510          ksu->ksu_nomove = (sp->slab_flags & KMEM_SLAB_NOMOVE);
 511  511          return (WALK_NEXT);
 512  512  }
 513  513  
 514  514  static void
 515  515  kmem_slabs_header()
 516  516  {
 517  517          mdb_printf("%-25s %8s %8s %9s %9s %6s\n",
 518  518              "", "", "Partial", "", "Unused", "");
 519  519          mdb_printf("%-25s %8s %8s %9s %9s %6s\n",
 520  520              "Cache Name", "Slabs", "Slabs", "Buffers", "Buffers", "Waste");
 521  521          mdb_printf("%-25s %8s %8s %9s %9s %6s\n",
 522  522              "-------------------------", "--------", "--------", "---------",
 523  523              "---------", "------");
 524  524  }
 525  525  
 526  526  int
 527  527  kmem_slabs(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
 528  528  {
 529  529          kmem_cache_t c;
 530  530          kmem_slab_stats_t stats;
 531  531          mdb_walk_cb_t cb;
 532  532          int pct;
 533  533          int tenths_pct;
 534  534          size_t maxbuckets = 1;
 535  535          size_t minbucketsize = 0;
 536  536          const char *filter = NULL;
 537  537          const char *name = NULL;
 538  538          uint_t opt_v = FALSE;
 539  539          boolean_t buckets = B_FALSE;
 540  540          boolean_t skip = B_FALSE;
 541  541  
 542  542          if (mdb_getopts(argc, argv,
 543  543              'B', MDB_OPT_UINTPTR, &minbucketsize,
 544  544              'b', MDB_OPT_UINTPTR, &maxbuckets,
 545  545              'n', MDB_OPT_STR, &filter,
 546  546              'N', MDB_OPT_STR, &name,
 547  547              'v', MDB_OPT_SETBITS, TRUE, &opt_v,
 548  548              NULL) != argc) {
 549  549                  return (DCMD_USAGE);
 550  550          }
 551  551  
 552  552          if ((maxbuckets != 1) || (minbucketsize != 0)) {
 553  553                  buckets = B_TRUE;
 554  554          }
 555  555  
 556  556          if (!(flags & DCMD_ADDRSPEC)) {
 557  557                  if (mdb_walk_dcmd("kmem_cache", "kmem_slabs", argc,
 558  558                      argv) == -1) {
 559  559                          mdb_warn("can't walk kmem_cache");
 560  560                          return (DCMD_ERR);
 561  561                  }
 562  562                  return (DCMD_OK);
 563  563          }
 564  564  
 565  565          if (mdb_vread(&c, sizeof (c), addr) == -1) {
 566  566                  mdb_warn("couldn't read kmem_cache at %p", addr);
 567  567                  return (DCMD_ERR);
 568  568          }
 569  569  
 570  570          if (name == NULL) {
 571  571                  skip = ((filter != NULL) &&
 572  572                      (strstr(c.cache_name, filter) == NULL));
 573  573          } else if (filter == NULL) {
 574  574                  skip = (strcmp(c.cache_name, name) != 0);
 575  575          } else {
 576  576                  /* match either -n or -N */
 577  577                  skip = ((strcmp(c.cache_name, name) != 0) &&
 578  578                      (strstr(c.cache_name, filter) == NULL));
 579  579          }
 580  580  
 581  581          if (!(opt_v || buckets) && DCMD_HDRSPEC(flags)) {
 582  582                  kmem_slabs_header();
 583  583          } else if ((opt_v || buckets) && !skip) {
 584  584                  if (DCMD_HDRSPEC(flags)) {
 585  585                          kmem_slabs_header();
 586  586                  } else {
 587  587                          boolean_t is_slab = B_FALSE;
 588  588                          const char *walker_name;
 589  589                          if (opt_v) {
 590  590                                  cb = (mdb_walk_cb_t)kmem_first_partial_slab;
 591  591                                  walker_name = "kmem_slab_partial";
 592  592                          } else {
 593  593                                  cb = (mdb_walk_cb_t)kmem_first_slab;
 594  594                                  walker_name = "kmem_slab";
 595  595                          }
 596  596                          (void) mdb_pwalk(walker_name, cb, &is_slab, addr);
 597  597                          if (is_slab) {
 598  598                                  kmem_slabs_header();
 599  599                          }
 600  600                  }
 601  601          }
 602  602  
 603  603          if (skip) {
 604  604                  return (DCMD_OK);
 605  605          }
 606  606  
 607  607          bzero(&stats, sizeof (kmem_slab_stats_t));
 608  608          stats.ks_cp = &c;
 609  609          stats.ks_max_buffers_per_slab = c.cache_maxchunks;
 610  610          /* +1 to include a zero bucket */
 611  611          stats.ks_bucket = mdb_zalloc((stats.ks_max_buffers_per_slab + 1) *
 612  612              sizeof (*stats.ks_bucket), UM_SLEEP);
 613  613          cb = (mdb_walk_cb_t)kmem_slablist_stat;
 614  614          (void) mdb_pwalk("kmem_slab", cb, &stats, addr);
 615  615  
 616  616          if (c.cache_buftotal == 0) {
 617  617                  pct = 0;
 618  618                  tenths_pct = 0;
 619  619          } else {
 620  620                  uint64_t n = stats.ks_unused_buffers * 10000;
 621  621                  pct = (int)(n / c.cache_buftotal);
 622  622                  tenths_pct = pct - ((pct / 100) * 100);
 623  623                  tenths_pct = (tenths_pct + 5) / 10; /* round nearest tenth */
 624  624                  if (tenths_pct == 10) {
 625  625                          pct += 100;
 626  626                          tenths_pct = 0;
 627  627                  }
 628  628          }
 629  629  
 630  630          pct /= 100;
 631  631          mdb_printf("%-25s %8d %8d %9lld %9lld %3d.%1d%%\n", c.cache_name,
 632  632              stats.ks_slabs, stats.ks_partial_slabs, c.cache_buftotal,
 633  633              stats.ks_unused_buffers, pct, tenths_pct);
 634  634  
 635  635          if (maxbuckets == 0) {
 636  636                  maxbuckets = stats.ks_max_buffers_per_slab;
 637  637          }
 638  638  
 639  639          if (((maxbuckets > 1) || (minbucketsize > 0)) &&
 640  640              (stats.ks_slabs > 0)) {
 641  641                  mdb_printf("\n");
 642  642                  kmem_slabs_print_dist(stats.ks_bucket,
 643  643                      stats.ks_max_buffers_per_slab, maxbuckets, minbucketsize);
 644  644          }
 645  645  
 646  646          mdb_free(stats.ks_bucket, (stats.ks_max_buffers_per_slab + 1) *
 647  647              sizeof (*stats.ks_bucket));
 648  648  
 649  649          if (!opt_v) {
 650  650                  return (DCMD_OK);
 651  651          }
 652  652  
 653  653          if (opt_v && (stats.ks_partial_slabs > 0)) {
 654  654                  int i;
 655  655                  kmem_slab_usage_t *ksu;
 656  656  
 657  657                  mdb_printf("  %d complete (%d), %d partial:",
 658  658                      (stats.ks_slabs - stats.ks_partial_slabs),
 659  659                      stats.ks_max_buffers_per_slab,
 660  660                      stats.ks_partial_slabs);
 661  661  
 662  662                  for (i = 0; i < stats.ks_partial_slabs; i++) {
 663  663                          ksu = &stats.ks_usage[i];
 664  664                          mdb_printf(" %d%s", ksu->ksu_refcnt,
 665  665                              (ksu->ksu_nomove ? "*" : ""));
 666  666                  }
 667  667                  mdb_printf("\n\n");
 668  668          }
 669  669  
 670  670          if (stats.ks_usage_len > 0) {
 671  671                  mdb_free(stats.ks_usage,
 672  672                      stats.ks_usage_len * sizeof (kmem_slab_usage_t));
 673  673          }
 674  674  
 675  675          return (DCMD_OK);
 676  676  }
 677  677  
 678  678  void
 679  679  kmem_slabs_help(void)
 680  680  {
 681  681          mdb_printf("%s",
 682  682  "Display slab usage per kmem cache.\n\n");
 683  683          mdb_dec_indent(2);
 684  684          mdb_printf("%<b>OPTIONS%</b>\n");
 685  685          mdb_inc_indent(2);
 686  686          mdb_printf("%s",
 687  687  "  -n name\n"
 688  688  "        name of kmem cache (or matching partial name)\n"
 689  689  "  -N name\n"
 690  690  "        exact name of kmem cache\n"
 691  691  "  -b maxbins\n"
 692  692  "        Print a distribution of allocated buffers per slab using at\n"
 693  693  "        most maxbins bins. The first bin is reserved for completely\n"
 694  694  "        allocated slabs. Setting maxbins to zero (-b 0) has the same\n"
 695  695  "        effect as specifying the maximum allocated buffers per slab\n"
 696  696  "        or setting minbinsize to 1 (-B 1).\n"
 697  697  "  -B minbinsize\n"
 698  698  "        Print a distribution of allocated buffers per slab, making\n"
 699  699  "        all bins (except the first, reserved for completely allocated\n"
 700  700  "        slabs) at least minbinsize buffers apart.\n"
 701  701  "  -v    verbose output: List the allocated buffer count of each partial\n"
 702  702  "        slab on the free list in order from front to back to show how\n"
 703  703  "        closely the slabs are ordered by usage. For example\n"
 704  704  "\n"
 705  705  "          10 complete, 3 partial (8): 7 3 1\n"
 706  706  "\n"
 707  707  "        means there are thirteen slabs with eight buffers each, including\n"
 708  708  "        three partially allocated slabs with less than all eight buffers\n"
 709  709  "        allocated.\n"
 710  710  "\n"
 711  711  "        Buffer allocations are always from the front of the partial slab\n"
 712  712  "        list. When a buffer is freed from a completely used slab, that\n"
 713  713  "        slab is added to the front of the partial slab list. Assuming\n"
 714  714  "        that all buffers are equally likely to be freed soon, the\n"
 715  715  "        desired order of partial slabs is most-used at the front of the\n"
 716  716  "        list and least-used at the back (as in the example above).\n"
 717  717  "        However, if a slab contains an allocated buffer that will not\n"
 718  718  "        soon be freed, it would be better for that slab to be at the\n"
 719  719  "        front where all of its buffers can be allocated. Taking a slab\n"
 720  720  "        off the partial slab list (either with all buffers freed or all\n"
 721  721  "        buffers allocated) reduces cache fragmentation.\n"
 722  722  "\n"
 723  723  "        A slab's allocated buffer count representing a partial slab (9 in\n"
 724  724  "        the example below) may be marked as follows:\n"
 725  725  "\n"
 726  726  "        9*   An asterisk indicates that kmem has marked the slab non-\n"
 727  727  "        reclaimable because the kmem client refused to move one of the\n"
 728  728  "        slab's buffers. Since kmem does not expect to completely free the\n"
 729  729  "        slab, it moves it to the front of the list in the hope of\n"
 730  730  "        completely allocating it instead. A slab marked with an asterisk\n"
 731  731  "        stays marked for as long as it remains on the partial slab list.\n"
 732  732  "\n"
 733  733  "Column\t\tDescription\n"
 734  734  "\n"
 735  735  "Cache Name\t\tname of kmem cache\n"
 736  736  "Slabs\t\t\ttotal slab count\n"
 737  737  "Partial Slabs\t\tcount of partially allocated slabs on the free list\n"
 738  738  "Buffers\t\ttotal buffer count (Slabs * (buffers per slab))\n"
 739  739  "Unused Buffers\tcount of unallocated buffers across all partial slabs\n"
 740  740  "Waste\t\t\t(Unused Buffers / Buffers) does not include space\n"
 741  741  "\t\t\t  for accounting structures (debug mode), slab\n"
 742  742  "\t\t\t  coloring (incremental small offsets to stagger\n"
 743  743  "\t\t\t  buffer alignment), or the per-CPU magazine layer\n");
 744  744  }
 745  745  
 746  746  static int
 747  747  addrcmp(const void *lhs, const void *rhs)
 748  748  {
 749  749          uintptr_t p1 = *((uintptr_t *)lhs);
 750  750          uintptr_t p2 = *((uintptr_t *)rhs);
 751  751  
 752  752          if (p1 < p2)
 753  753                  return (-1);
 754  754          if (p1 > p2)
 755  755                  return (1);
 756  756          return (0);
 757  757  }
 758  758  
 759  759  static int
 760  760  bufctlcmp(const kmem_bufctl_audit_t **lhs, const kmem_bufctl_audit_t **rhs)
 761  761  {
 762  762          const kmem_bufctl_audit_t *bcp1 = *lhs;
 763  763          const kmem_bufctl_audit_t *bcp2 = *rhs;
 764  764  
 765  765          if (bcp1->bc_timestamp > bcp2->bc_timestamp)
 766  766                  return (-1);
 767  767  
 768  768          if (bcp1->bc_timestamp < bcp2->bc_timestamp)
 769  769                  return (1);
 770  770  
 771  771          return (0);
 772  772  }
 773  773  
 774  774  typedef struct kmem_hash_walk {
 775  775          uintptr_t *kmhw_table;
 776  776          size_t kmhw_nelems;
 777  777          size_t kmhw_pos;
 778  778          kmem_bufctl_t kmhw_cur;
 779  779  } kmem_hash_walk_t;
 780  780  
 781  781  int
 782  782  kmem_hash_walk_init(mdb_walk_state_t *wsp)
 783  783  {
 784  784          kmem_hash_walk_t *kmhw;
 785  785          uintptr_t *hash;
 786  786          kmem_cache_t c;
 787  787          uintptr_t haddr, addr = wsp->walk_addr;
 788  788          size_t nelems;
 789  789          size_t hsize;
 790  790  
 791  791          if (addr == 0) {
 792  792                  mdb_warn("kmem_hash doesn't support global walks\n");
 793  793                  return (WALK_ERR);
 794  794          }
 795  795  
 796  796          if (mdb_vread(&c, sizeof (c), addr) == -1) {
 797  797                  mdb_warn("couldn't read cache at addr %p", addr);
 798  798                  return (WALK_ERR);
 799  799          }
 800  800  
 801  801          if (!(c.cache_flags & KMF_HASH)) {
 802  802                  mdb_warn("cache %p doesn't have a hash table\n", addr);
 803  803                  return (WALK_DONE);             /* nothing to do */
 804  804          }
 805  805  
 806  806          kmhw = mdb_zalloc(sizeof (kmem_hash_walk_t), UM_SLEEP);
 807  807          kmhw->kmhw_cur.bc_next = NULL;
 808  808          kmhw->kmhw_pos = 0;
 809  809  
 810  810          kmhw->kmhw_nelems = nelems = c.cache_hash_mask + 1;
 811  811          hsize = nelems * sizeof (uintptr_t);
 812  812          haddr = (uintptr_t)c.cache_hash_table;
 813  813  
 814  814          kmhw->kmhw_table = hash = mdb_alloc(hsize, UM_SLEEP);
 815  815          if (mdb_vread(hash, hsize, haddr) == -1) {
 816  816                  mdb_warn("failed to read hash table at %p", haddr);
 817  817                  mdb_free(hash, hsize);
 818  818                  mdb_free(kmhw, sizeof (kmem_hash_walk_t));
 819  819                  return (WALK_ERR);
 820  820          }
 821  821  
 822  822          wsp->walk_data = kmhw;
 823  823  
 824  824          return (WALK_NEXT);
 825  825  }
 826  826  
 827  827  int
 828  828  kmem_hash_walk_step(mdb_walk_state_t *wsp)
 829  829  {
 830  830          kmem_hash_walk_t *kmhw = wsp->walk_data;
 831  831          uintptr_t addr = 0;
 832  832  
 833  833          if ((addr = (uintptr_t)kmhw->kmhw_cur.bc_next) == 0) {
 834  834                  while (kmhw->kmhw_pos < kmhw->kmhw_nelems) {
 835  835                          if ((addr = kmhw->kmhw_table[kmhw->kmhw_pos++]) != 0)
 836  836                                  break;
 837  837                  }
 838  838          }
 839  839          if (addr == 0)
 840  840                  return (WALK_DONE);
 841  841  
 842  842          if (mdb_vread(&kmhw->kmhw_cur, sizeof (kmem_bufctl_t), addr) == -1) {
 843  843                  mdb_warn("couldn't read kmem_bufctl_t at addr %p", addr);
 844  844                  return (WALK_ERR);
 845  845          }
 846  846  
 847  847          return (wsp->walk_callback(addr, &kmhw->kmhw_cur, wsp->walk_cbdata));
 848  848  }
 849  849  
 850  850  void
 851  851  kmem_hash_walk_fini(mdb_walk_state_t *wsp)
 852  852  {
 853  853          kmem_hash_walk_t *kmhw = wsp->walk_data;
 854  854  
 855  855          if (kmhw == NULL)
 856  856                  return;
 857  857  
 858  858          mdb_free(kmhw->kmhw_table, kmhw->kmhw_nelems * sizeof (uintptr_t));
 859  859          mdb_free(kmhw, sizeof (kmem_hash_walk_t));
 860  860  }
 861  861  
 862  862  /*
 863  863   * Find the address of the bufctl structure for the address 'buf' in cache
 864  864   * 'cp', which is at address caddr, and place it in *out.
 865  865   */
 866  866  static int
 867  867  kmem_hash_lookup(kmem_cache_t *cp, uintptr_t caddr, void *buf, uintptr_t *out)
 868  868  {
 869  869          uintptr_t bucket = (uintptr_t)KMEM_HASH(cp, buf);
 870  870          kmem_bufctl_t *bcp;
 871  871          kmem_bufctl_t bc;
 872  872  
 873  873          if (mdb_vread(&bcp, sizeof (kmem_bufctl_t *), bucket) == -1) {
 874  874                  mdb_warn("unable to read hash bucket for %p in cache %p",
 875  875                      buf, caddr);
 876  876                  return (-1);
 877  877          }
 878  878  
 879  879          while (bcp != NULL) {
 880  880                  if (mdb_vread(&bc, sizeof (kmem_bufctl_t),
 881  881                      (uintptr_t)bcp) == -1) {
 882  882                          mdb_warn("unable to read bufctl at %p", bcp);
 883  883                          return (-1);
 884  884                  }
 885  885                  if (bc.bc_addr == buf) {
 886  886                          *out = (uintptr_t)bcp;
 887  887                          return (0);
 888  888                  }
 889  889                  bcp = bc.bc_next;
 890  890          }
 891  891  
 892  892          mdb_warn("unable to find bufctl for %p in cache %p\n", buf, caddr);
 893  893          return (-1);
 894  894  }
 895  895  
 896  896  int
 897  897  kmem_get_magsize(const kmem_cache_t *cp)
 898  898  {
 899  899          uintptr_t addr = (uintptr_t)cp->cache_magtype;
 900  900          GElf_Sym mt_sym;
 901  901          kmem_magtype_t mt;
 902  902          int res;
 903  903  
 904  904          /*
 905  905           * if cpu 0 has a non-zero magsize, it must be correct.  caches
 906  906           * with KMF_NOMAGAZINE have disabled their magazine layers, so
 907  907           * it is okay to return 0 for them.
 908  908           */
 909  909          if ((res = cp->cache_cpu[0].cc_magsize) != 0 ||
 910  910              (cp->cache_flags & KMF_NOMAGAZINE))
 911  911                  return (res);
 912  912  
 913  913          if (mdb_lookup_by_name("kmem_magtype", &mt_sym) == -1) {
 914  914                  mdb_warn("unable to read 'kmem_magtype'");
 915  915          } else if (addr < mt_sym.st_value ||
 916  916              addr + sizeof (mt) - 1 > mt_sym.st_value + mt_sym.st_size - 1 ||
 917  917              ((addr - mt_sym.st_value) % sizeof (mt)) != 0) {
 918  918                  mdb_warn("cache '%s' has invalid magtype pointer (%p)\n",
 919  919                      cp->cache_name, addr);
 920  920                  return (0);
 921  921          }
 922  922          if (mdb_vread(&mt, sizeof (mt), addr) == -1) {
 923  923                  mdb_warn("unable to read magtype at %a", addr);
 924  924                  return (0);
 925  925          }
 926  926          return (mt.mt_magsize);
 927  927  }
 928  928  
 929  929  /*ARGSUSED*/
 930  930  static int
 931  931  kmem_estimate_slab(uintptr_t addr, const kmem_slab_t *sp, size_t *est)
 932  932  {
 933  933          *est -= (sp->slab_chunks - sp->slab_refcnt);
 934  934  
 935  935          return (WALK_NEXT);
 936  936  }
 937  937  
 938  938  /*
 939  939   * Returns an upper bound on the number of allocated buffers in a given
 940  940   * cache.
 941  941   */
 942  942  size_t
 943  943  kmem_estimate_allocated(uintptr_t addr, const kmem_cache_t *cp)
 944  944  {
 945  945          int magsize;
 946  946          size_t cache_est;
 947  947  
 948  948          cache_est = cp->cache_buftotal;
 949  949  
 950  950          (void) mdb_pwalk("kmem_slab_partial",
 951  951              (mdb_walk_cb_t)kmem_estimate_slab, &cache_est, addr);
 952  952  
 953  953          if ((magsize = kmem_get_magsize(cp)) != 0) {
 954  954                  size_t mag_est = cp->cache_full.ml_total * magsize;
 955  955  
 956  956                  if (cache_est >= mag_est) {
 957  957                          cache_est -= mag_est;
 958  958                  } else {
 959  959                          mdb_warn("cache %p's magazine layer holds more buffers "
 960  960                              "than the slab layer.\n", addr);
 961  961                  }
 962  962          }
 963  963          return (cache_est);
 964  964  }
 965  965  
 966  966  #define READMAG_ROUNDS(rounds) { \
 967  967          if (mdb_vread(mp, magbsize, (uintptr_t)kmp) == -1) { \
 968  968                  mdb_warn("couldn't read magazine at %p", kmp); \
 969  969                  goto fail; \
 970  970          } \
 971  971          for (i = 0; i < rounds; i++) { \
 972  972                  maglist[magcnt++] = mp->mag_round[i]; \
 973  973                  if (magcnt == magmax) { \
 974  974                          mdb_warn("%d magazines exceeds fudge factor\n", \
 975  975                              magcnt); \
 976  976                          goto fail; \
 977  977                  } \
 978  978          } \
 979  979  }
 980  980  
 981  981  int
 982  982  kmem_read_magazines(kmem_cache_t *cp, uintptr_t addr, int ncpus,
 983  983      void ***maglistp, size_t *magcntp, size_t *magmaxp, int alloc_flags)
 984  984  {
 985  985          kmem_magazine_t *kmp, *mp;
 986  986          void **maglist = NULL;
 987  987          int i, cpu;
 988  988          size_t magsize, magmax, magbsize;
 989  989          size_t magcnt = 0;
 990  990  
 991  991          /*
 992  992           * Read the magtype out of the cache, after verifying the pointer's
 993  993           * correctness.
 994  994           */
 995  995          magsize = kmem_get_magsize(cp);
 996  996          if (magsize == 0) {
 997  997                  *maglistp = NULL;
 998  998                  *magcntp = 0;
 999  999                  *magmaxp = 0;
1000 1000                  return (WALK_NEXT);
1001 1001          }
1002 1002  
1003 1003          /*
1004 1004           * There are several places where we need to go buffer hunting:
1005 1005           * the per-CPU loaded magazine, the per-CPU spare full magazine,
1006 1006           * and the full magazine list in the depot.
1007 1007           *
1008 1008           * For an upper bound on the number of buffers in the magazine
1009 1009           * layer, we have the number of magazines on the cache_full
1010 1010           * list plus at most two magazines per CPU (the loaded and the
1011 1011           * spare).  Toss in 100 magazines as a fudge factor in case this
1012 1012           * is live (the number "100" comes from the same fudge factor in
1013 1013           * crash(1M)).
1014 1014           */
1015 1015          magmax = (cp->cache_full.ml_total + 2 * ncpus + 100) * magsize;
1016 1016          magbsize = offsetof(kmem_magazine_t, mag_round[magsize]);
1017 1017  
1018 1018          if (magbsize >= PAGESIZE / 2) {
1019 1019                  mdb_warn("magazine size for cache %p unreasonable (%x)\n",
1020 1020                      addr, magbsize);
1021 1021                  return (WALK_ERR);
1022 1022          }
1023 1023  
1024 1024          maglist = mdb_alloc(magmax * sizeof (void *), alloc_flags);
1025 1025          mp = mdb_alloc(magbsize, alloc_flags);
1026 1026          if (mp == NULL || maglist == NULL)
1027 1027                  goto fail;
1028 1028  
1029 1029          /*
1030 1030           * First up: the magazines in the depot (i.e. on the cache_full list).
1031 1031           */
1032 1032          for (kmp = cp->cache_full.ml_list; kmp != NULL; ) {
1033 1033                  READMAG_ROUNDS(magsize);
1034 1034                  kmp = mp->mag_next;
1035 1035  
1036 1036                  if (kmp == cp->cache_full.ml_list)
1037 1037                          break; /* cache_full list loop detected */
1038 1038          }
1039 1039  
1040 1040          dprintf(("cache_full list done\n"));
1041 1041  
1042 1042          /*
1043 1043           * Now whip through the CPUs, snagging the loaded magazines
1044 1044           * and full spares.
1045 1045           *
1046 1046           * In order to prevent inconsistent dumps, rounds and prounds
1047 1047           * are copied aside before dumping begins.
1048 1048           */
1049 1049          for (cpu = 0; cpu < ncpus; cpu++) {
1050 1050                  kmem_cpu_cache_t *ccp = &cp->cache_cpu[cpu];
1051 1051                  short rounds, prounds;
1052 1052  
1053 1053                  if (KMEM_DUMPCC(ccp)) {
1054 1054                          rounds = ccp->cc_dump_rounds;
1055 1055                          prounds = ccp->cc_dump_prounds;
1056 1056                  } else {
1057 1057                          rounds = ccp->cc_rounds;
1058 1058                          prounds = ccp->cc_prounds;
1059 1059                  }
1060 1060  
1061 1061                  dprintf(("reading cpu cache %p\n",
1062 1062                      (uintptr_t)ccp - (uintptr_t)cp + addr));
1063 1063  
1064 1064                  if (rounds > 0 &&
1065 1065                      (kmp = ccp->cc_loaded) != NULL) {
1066 1066                          dprintf(("reading %d loaded rounds\n", rounds));
1067 1067                          READMAG_ROUNDS(rounds);
1068 1068                  }
1069 1069  
1070 1070                  if (prounds > 0 &&
1071 1071                      (kmp = ccp->cc_ploaded) != NULL) {
1072 1072                          dprintf(("reading %d previously loaded rounds\n",
1073 1073                              prounds));
1074 1074                          READMAG_ROUNDS(prounds);
1075 1075                  }
1076 1076          }
1077 1077  
1078 1078          dprintf(("magazine layer: %d buffers\n", magcnt));
1079 1079  
1080 1080          if (!(alloc_flags & UM_GC))
1081 1081                  mdb_free(mp, magbsize);
1082 1082  
1083 1083          *maglistp = maglist;
1084 1084          *magcntp = magcnt;
1085 1085          *magmaxp = magmax;
1086 1086  
1087 1087          return (WALK_NEXT);
1088 1088  
1089 1089  fail:
1090 1090          if (!(alloc_flags & UM_GC)) {
1091 1091                  if (mp)
1092 1092                          mdb_free(mp, magbsize);
1093 1093                  if (maglist)
1094 1094                          mdb_free(maglist, magmax * sizeof (void *));
1095 1095          }
1096 1096          return (WALK_ERR);
1097 1097  }
1098 1098  
1099 1099  static int
1100 1100  kmem_walk_callback(mdb_walk_state_t *wsp, uintptr_t buf)
1101 1101  {
1102 1102          return (wsp->walk_callback(buf, NULL, wsp->walk_cbdata));
1103 1103  }
1104 1104  
1105 1105  static int
1106 1106  bufctl_walk_callback(kmem_cache_t *cp, mdb_walk_state_t *wsp, uintptr_t buf)
1107 1107  {
1108 1108          kmem_bufctl_audit_t b;
1109 1109  
1110 1110          /*
1111 1111           * if KMF_AUDIT is not set, we know that we're looking at a
1112 1112           * kmem_bufctl_t.
1113 1113           */
1114 1114          if (!(cp->cache_flags & KMF_AUDIT) ||
1115 1115              mdb_vread(&b, sizeof (kmem_bufctl_audit_t), buf) == -1) {
1116 1116                  (void) memset(&b, 0, sizeof (b));
1117 1117                  if (mdb_vread(&b, sizeof (kmem_bufctl_t), buf) == -1) {
1118 1118                          mdb_warn("unable to read bufctl at %p", buf);
1119 1119                          return (WALK_ERR);
1120 1120                  }
1121 1121          }
1122 1122  
1123 1123          return (wsp->walk_callback(buf, &b, wsp->walk_cbdata));
1124 1124  }
1125 1125  
1126 1126  typedef struct kmem_walk {
1127 1127          int kmw_type;
1128 1128  
1129 1129          uintptr_t kmw_addr;             /* cache address */
1130 1130          kmem_cache_t *kmw_cp;
1131 1131          size_t kmw_csize;
1132 1132  
1133 1133          /*
1134 1134           * magazine layer
1135 1135           */
1136 1136          void **kmw_maglist;
1137 1137          size_t kmw_max;
1138 1138          size_t kmw_count;
1139 1139          size_t kmw_pos;
1140 1140  
1141 1141          /*
1142 1142           * slab layer
1143 1143           */
1144 1144          char *kmw_valid;        /* to keep track of freed buffers */
1145 1145          char *kmw_ubase;        /* buffer for slab data */
1146 1146  } kmem_walk_t;
1147 1147  
1148 1148  static int
1149 1149  kmem_walk_init_common(mdb_walk_state_t *wsp, int type)
1150 1150  {
1151 1151          kmem_walk_t *kmw;
1152 1152          int ncpus, csize;
1153 1153          kmem_cache_t *cp;
1154 1154          size_t vm_quantum;
1155 1155  
1156 1156          size_t magmax, magcnt;
1157 1157          void **maglist = NULL;
1158 1158          uint_t chunksize, slabsize;
1159 1159          int status = WALK_ERR;
1160 1160          uintptr_t addr = wsp->walk_addr;
1161 1161          const char *layered;
1162 1162  
1163 1163          type &= ~KM_HASH;
1164 1164  
1165 1165          if (addr == 0) {
1166 1166                  mdb_warn("kmem walk doesn't support global walks\n");
1167 1167                  return (WALK_ERR);
1168 1168          }
1169 1169  
1170 1170          dprintf(("walking %p\n", addr));
1171 1171  
1172 1172          /*
1173 1173           * First we need to figure out how many CPUs are configured in the
1174 1174           * system to know how much to slurp out.
1175 1175           */
1176 1176          mdb_readvar(&ncpus, "max_ncpus");
1177 1177  
1178 1178          csize = KMEM_CACHE_SIZE(ncpus);
1179 1179          cp = mdb_alloc(csize, UM_SLEEP);
1180 1180  
1181 1181          if (mdb_vread(cp, csize, addr) == -1) {
1182 1182                  mdb_warn("couldn't read cache at addr %p", addr);
1183 1183                  goto out2;
1184 1184          }
1185 1185  
1186 1186          /*
1187 1187           * It's easy for someone to hand us an invalid cache address.
1188 1188           * Unfortunately, it is hard for this walker to survive an
1189 1189           * invalid cache cleanly.  So we make sure that:
1190 1190           *
1191 1191           *      1. the vmem arena for the cache is readable,
1192 1192           *      2. the vmem arena's quantum is a power of 2,
1193 1193           *      3. our slabsize is a multiple of the quantum, and
1194 1194           *      4. our chunksize is >0 and less than our slabsize.
1195 1195           */
1196 1196          if (mdb_vread(&vm_quantum, sizeof (vm_quantum),
1197 1197              (uintptr_t)&cp->cache_arena->vm_quantum) == -1 ||
1198 1198              vm_quantum == 0 ||
1199 1199              (vm_quantum & (vm_quantum - 1)) != 0 ||
1200 1200              cp->cache_slabsize < vm_quantum ||
1201 1201              P2PHASE(cp->cache_slabsize, vm_quantum) != 0 ||
1202 1202              cp->cache_chunksize == 0 ||
1203 1203              cp->cache_chunksize > cp->cache_slabsize) {
1204 1204                  mdb_warn("%p is not a valid kmem_cache_t\n", addr);
1205 1205                  goto out2;
1206 1206          }
1207 1207  
1208 1208          dprintf(("buf total is %d\n", cp->cache_buftotal));
1209 1209  
1210 1210          if (cp->cache_buftotal == 0) {
1211 1211                  mdb_free(cp, csize);
1212 1212                  return (WALK_DONE);
1213 1213          }
1214 1214  
1215 1215          /*
1216 1216           * If they ask for bufctls, but it's a small-slab cache,
1217 1217           * there is nothing to report.
1218 1218           */
1219 1219          if ((type & KM_BUFCTL) && !(cp->cache_flags & KMF_HASH)) {
1220 1220                  dprintf(("bufctl requested, not KMF_HASH (flags: %p)\n",
1221 1221                      cp->cache_flags));
1222 1222                  mdb_free(cp, csize);
1223 1223                  return (WALK_DONE);
1224 1224          }
1225 1225  
1226 1226          /*
1227 1227           * If they want constructed buffers, but there's no constructor or
1228 1228           * the cache has DEADBEEF checking enabled, there is nothing to report.
1229 1229           */
1230 1230          if ((type & KM_CONSTRUCTED) && (!(type & KM_FREE) ||
1231 1231              cp->cache_constructor == NULL ||
1232 1232              (cp->cache_flags & (KMF_DEADBEEF | KMF_LITE)) == KMF_DEADBEEF)) {
1233 1233                  mdb_free(cp, csize);
1234 1234                  return (WALK_DONE);
1235 1235          }
1236 1236  
1237 1237          /*
1238 1238           * Read in the contents of the magazine layer
1239 1239           */
1240 1240          if (kmem_read_magazines(cp, addr, ncpus, &maglist, &magcnt,
1241 1241              &magmax, UM_SLEEP) == WALK_ERR)
1242 1242                  goto out2;
1243 1243  
1244 1244          /*
1245 1245           * We have all of the buffers from the magazines;  if we are walking
1246 1246           * allocated buffers, sort them so we can bsearch them later.
1247 1247           */
1248 1248          if (type & KM_ALLOCATED)
1249 1249                  qsort(maglist, magcnt, sizeof (void *), addrcmp);
1250 1250  
1251 1251          wsp->walk_data = kmw = mdb_zalloc(sizeof (kmem_walk_t), UM_SLEEP);
1252 1252  
1253 1253          kmw->kmw_type = type;
1254 1254          kmw->kmw_addr = addr;
1255 1255          kmw->kmw_cp = cp;
1256 1256          kmw->kmw_csize = csize;
1257 1257          kmw->kmw_maglist = maglist;
1258 1258          kmw->kmw_max = magmax;
1259 1259          kmw->kmw_count = magcnt;
1260 1260          kmw->kmw_pos = 0;
1261 1261  
1262 1262          /*
1263 1263           * When walking allocated buffers in a KMF_HASH cache, we walk the
1264 1264           * hash table instead of the slab layer.
1265 1265           */
1266 1266          if ((cp->cache_flags & KMF_HASH) && (type & KM_ALLOCATED)) {
1267 1267                  layered = "kmem_hash";
1268 1268  
1269 1269                  kmw->kmw_type |= KM_HASH;
1270 1270          } else {
1271 1271                  /*
1272 1272                   * If we are walking freed buffers, we only need the
1273 1273                   * magazine layer plus the partially allocated slabs.
1274 1274                   * To walk allocated buffers, we need all of the slabs.
1275 1275                   */
1276 1276                  if (type & KM_ALLOCATED)
1277 1277                          layered = "kmem_slab";
1278 1278                  else
1279 1279                          layered = "kmem_slab_partial";
1280 1280  
1281 1281                  /*
1282 1282                   * for small-slab caches, we read in the entire slab.  For
1283 1283                   * freed buffers, we can just walk the freelist.  For
1284 1284                   * allocated buffers, we use a 'valid' array to track
1285 1285                   * the freed buffers.
1286 1286                   */
1287 1287                  if (!(cp->cache_flags & KMF_HASH)) {
1288 1288                          chunksize = cp->cache_chunksize;
1289 1289                          slabsize = cp->cache_slabsize;
1290 1290  
1291 1291                          kmw->kmw_ubase = mdb_alloc(slabsize +
1292 1292                              sizeof (kmem_bufctl_t), UM_SLEEP);
1293 1293  
1294 1294                          if (type & KM_ALLOCATED)
1295 1295                                  kmw->kmw_valid =
1296 1296                                      mdb_alloc(slabsize / chunksize, UM_SLEEP);
1297 1297                  }
1298 1298          }
1299 1299  
1300 1300          status = WALK_NEXT;
1301 1301  
1302 1302          if (mdb_layered_walk(layered, wsp) == -1) {
1303 1303                  mdb_warn("unable to start layered '%s' walk", layered);
1304 1304                  status = WALK_ERR;
1305 1305          }
1306 1306  
1307 1307  out1:
1308 1308          if (status == WALK_ERR) {
1309 1309                  if (kmw->kmw_valid)
1310 1310                          mdb_free(kmw->kmw_valid, slabsize / chunksize);
1311 1311  
1312 1312                  if (kmw->kmw_ubase)
1313 1313                          mdb_free(kmw->kmw_ubase, slabsize +
1314 1314                              sizeof (kmem_bufctl_t));
1315 1315  
1316 1316                  if (kmw->kmw_maglist)
1317 1317                          mdb_free(kmw->kmw_maglist,
1318 1318                              kmw->kmw_max * sizeof (uintptr_t));
1319 1319  
1320 1320                  mdb_free(kmw, sizeof (kmem_walk_t));
1321 1321                  wsp->walk_data = NULL;
1322 1322          }
1323 1323  
1324 1324  out2:
1325 1325          if (status == WALK_ERR)
1326 1326                  mdb_free(cp, csize);
1327 1327  
1328 1328          return (status);
1329 1329  }
1330 1330  
1331 1331  int
1332 1332  kmem_walk_step(mdb_walk_state_t *wsp)
1333 1333  {
1334 1334          kmem_walk_t *kmw = wsp->walk_data;
1335 1335          int type = kmw->kmw_type;
1336 1336          kmem_cache_t *cp = kmw->kmw_cp;
1337 1337  
1338 1338          void **maglist = kmw->kmw_maglist;
1339 1339          int magcnt = kmw->kmw_count;
1340 1340  
1341 1341          uintptr_t chunksize, slabsize;
1342 1342          uintptr_t addr;
1343 1343          const kmem_slab_t *sp;
1344 1344          const kmem_bufctl_t *bcp;
1345 1345          kmem_bufctl_t bc;
1346 1346  
1347 1347          int chunks;
1348 1348          char *kbase;
1349 1349          void *buf;
1350 1350          int i, ret;
1351 1351  
1352 1352          char *valid, *ubase;
1353 1353  
1354 1354          /*
1355 1355           * first, handle the 'kmem_hash' layered walk case
1356 1356           */
1357 1357          if (type & KM_HASH) {
1358 1358                  /*
1359 1359                   * We have a buffer which has been allocated out of the
1360 1360                   * global layer. We need to make sure that it's not
1361 1361                   * actually sitting in a magazine before we report it as
1362 1362                   * an allocated buffer.
1363 1363                   */
1364 1364                  buf = ((const kmem_bufctl_t *)wsp->walk_layer)->bc_addr;
1365 1365  
1366 1366                  if (magcnt > 0 &&
1367 1367                      bsearch(&buf, maglist, magcnt, sizeof (void *),
1368 1368                      addrcmp) != NULL)
1369 1369                          return (WALK_NEXT);
1370 1370  
1371 1371                  if (type & KM_BUFCTL)
1372 1372                          return (bufctl_walk_callback(cp, wsp, wsp->walk_addr));
1373 1373  
1374 1374                  return (kmem_walk_callback(wsp, (uintptr_t)buf));
1375 1375          }
1376 1376  
1377 1377          ret = WALK_NEXT;
1378 1378  
1379 1379          addr = kmw->kmw_addr;
1380 1380  
1381 1381          /*
1382 1382           * If we're walking freed buffers, report everything in the
1383 1383           * magazine layer before processing the first slab.
1384 1384           */
1385 1385          if ((type & KM_FREE) && magcnt != 0) {
1386 1386                  kmw->kmw_count = 0;             /* only do this once */
1387 1387                  for (i = 0; i < magcnt; i++) {
1388 1388                          buf = maglist[i];
1389 1389  
1390 1390                          if (type & KM_BUFCTL) {
1391 1391                                  uintptr_t out;
1392 1392  
1393 1393                                  if (cp->cache_flags & KMF_BUFTAG) {
1394 1394                                          kmem_buftag_t *btp;
1395 1395                                          kmem_buftag_t tag;
1396 1396  
1397 1397                                          /* LINTED - alignment */
1398 1398                                          btp = KMEM_BUFTAG(cp, buf);
1399 1399                                          if (mdb_vread(&tag, sizeof (tag),
1400 1400                                              (uintptr_t)btp) == -1) {
1401 1401                                                  mdb_warn("reading buftag for "
1402 1402                                                      "%p at %p", buf, btp);
1403 1403                                                  continue;
1404 1404                                          }
1405 1405                                          out = (uintptr_t)tag.bt_bufctl;
1406 1406                                  } else {
1407 1407                                          if (kmem_hash_lookup(cp, addr, buf,
1408 1408                                              &out) == -1)
1409 1409                                                  continue;
1410 1410                                  }
1411 1411                                  ret = bufctl_walk_callback(cp, wsp, out);
1412 1412                          } else {
1413 1413                                  ret = kmem_walk_callback(wsp, (uintptr_t)buf);
1414 1414                          }
1415 1415  
1416 1416                          if (ret != WALK_NEXT)
1417 1417                                  return (ret);
1418 1418                  }
1419 1419          }
1420 1420  
1421 1421          /*
1422 1422           * If they want constructed buffers, we're finished, since the
1423 1423           * magazine layer holds them all.
1424 1424           */
1425 1425          if (type & KM_CONSTRUCTED)
1426 1426                  return (WALK_DONE);
1427 1427  
1428 1428          /*
1429 1429           * Handle the buffers in the current slab
1430 1430           */
1431 1431          chunksize = cp->cache_chunksize;
1432 1432          slabsize = cp->cache_slabsize;
1433 1433  
1434 1434          sp = wsp->walk_layer;
1435 1435          chunks = sp->slab_chunks;
1436 1436          kbase = sp->slab_base;
1437 1437  
1438 1438          dprintf(("kbase is %p\n", kbase));
1439 1439  
1440 1440          if (!(cp->cache_flags & KMF_HASH)) {
1441 1441                  valid = kmw->kmw_valid;
1442 1442                  ubase = kmw->kmw_ubase;
1443 1443  
1444 1444                  if (mdb_vread(ubase, chunks * chunksize,
1445 1445                      (uintptr_t)kbase) == -1) {
1446 1446                          mdb_warn("failed to read slab contents at %p", kbase);
1447 1447                          return (WALK_ERR);
1448 1448                  }
1449 1449  
1450 1450                  /*
1451 1451                   * Set up the valid map as fully allocated -- we'll punch
1452 1452                   * out the freelist.
1453 1453                   */
1454 1454                  if (type & KM_ALLOCATED)
1455 1455                          (void) memset(valid, 1, chunks);
1456 1456          } else {
1457 1457                  valid = NULL;
1458 1458                  ubase = NULL;
1459 1459          }
1460 1460  
1461 1461          /*
1462 1462           * walk the slab's freelist
1463 1463           */
1464 1464          bcp = sp->slab_head;
1465 1465  
1466 1466          dprintf(("refcnt is %d; chunks is %d\n", sp->slab_refcnt, chunks));
1467 1467  
1468 1468          /*
1469 1469           * since we could be in the middle of allocating a buffer,
1470 1470           * our refcnt could be one higher than it aught.  So we
1471 1471           * check one further on the freelist than the count allows.
1472 1472           */
1473 1473          for (i = sp->slab_refcnt; i <= chunks; i++) {
1474 1474                  uint_t ndx;
1475 1475  
1476 1476                  dprintf(("bcp is %p\n", bcp));
1477 1477  
1478 1478                  if (bcp == NULL) {
1479 1479                          if (i == chunks)
1480 1480                                  break;
1481 1481                          mdb_warn(
1482 1482                              "slab %p in cache %p freelist too short by %d\n",
1483 1483                              sp, addr, chunks - i);
1484 1484                          break;
1485 1485                  }
1486 1486  
1487 1487                  if (cp->cache_flags & KMF_HASH) {
1488 1488                          if (mdb_vread(&bc, sizeof (bc), (uintptr_t)bcp) == -1) {
1489 1489                                  mdb_warn("failed to read bufctl ptr at %p",
1490 1490                                      bcp);
1491 1491                                  break;
1492 1492                          }
1493 1493                          buf = bc.bc_addr;
1494 1494                  } else {
1495 1495                          /*
1496 1496                           * Otherwise the buffer is (or should be) in the slab
1497 1497                           * that we've read in; determine its offset in the
1498 1498                           * slab, validate that it's not corrupt, and add to
1499 1499                           * our base address to find the umem_bufctl_t.  (Note
1500 1500                           * that we don't need to add the size of the bufctl
1501 1501                           * to our offset calculation because of the slop that's
1502 1502                           * allocated for the buffer at ubase.)
1503 1503                           */
1504 1504                          uintptr_t offs = (uintptr_t)bcp - (uintptr_t)kbase;
1505 1505  
1506 1506                          if (offs > chunks * chunksize) {
1507 1507                                  mdb_warn("found corrupt bufctl ptr %p"
1508 1508                                      " in slab %p in cache %p\n", bcp,
1509 1509                                      wsp->walk_addr, addr);
1510 1510                                  break;
1511 1511                          }
1512 1512  
1513 1513                          bc = *((kmem_bufctl_t *)((uintptr_t)ubase + offs));
1514 1514                          buf = KMEM_BUF(cp, bcp);
1515 1515                  }
1516 1516  
1517 1517                  ndx = ((uintptr_t)buf - (uintptr_t)kbase) / chunksize;
1518 1518  
1519 1519                  if (ndx > slabsize / cp->cache_bufsize) {
1520 1520                          /*
1521 1521                           * This is very wrong; we have managed to find
1522 1522                           * a buffer in the slab which shouldn't
1523 1523                           * actually be here.  Emit a warning, and
1524 1524                           * try to continue.
1525 1525                           */
1526 1526                          mdb_warn("buf %p is out of range for "
1527 1527                              "slab %p, cache %p\n", buf, sp, addr);
1528 1528                  } else if (type & KM_ALLOCATED) {
1529 1529                          /*
1530 1530                           * we have found a buffer on the slab's freelist;
1531 1531                           * clear its entry
1532 1532                           */
1533 1533                          valid[ndx] = 0;
1534 1534                  } else {
1535 1535                          /*
1536 1536                           * Report this freed buffer
1537 1537                           */
1538 1538                          if (type & KM_BUFCTL) {
1539 1539                                  ret = bufctl_walk_callback(cp, wsp,
1540 1540                                      (uintptr_t)bcp);
1541 1541                          } else {
1542 1542                                  ret = kmem_walk_callback(wsp, (uintptr_t)buf);
1543 1543                          }
1544 1544                          if (ret != WALK_NEXT)
1545 1545                                  return (ret);
1546 1546                  }
1547 1547  
1548 1548                  bcp = bc.bc_next;
1549 1549          }
1550 1550  
1551 1551          if (bcp != NULL) {
1552 1552                  dprintf(("slab %p in cache %p freelist too long (%p)\n",
1553 1553                      sp, addr, bcp));
1554 1554          }
1555 1555  
1556 1556          /*
1557 1557           * If we are walking freed buffers, the loop above handled reporting
1558 1558           * them.
1559 1559           */
1560 1560          if (type & KM_FREE)
1561 1561                  return (WALK_NEXT);
1562 1562  
1563 1563          if (type & KM_BUFCTL) {
1564 1564                  mdb_warn("impossible situation: small-slab KM_BUFCTL walk for "
1565 1565                      "cache %p\n", addr);
1566 1566                  return (WALK_ERR);
1567 1567          }
1568 1568  
1569 1569          /*
1570 1570           * Report allocated buffers, skipping buffers in the magazine layer.
1571 1571           * We only get this far for small-slab caches.
1572 1572           */
1573 1573          for (i = 0; ret == WALK_NEXT && i < chunks; i++) {
1574 1574                  buf = (char *)kbase + i * chunksize;
1575 1575  
1576 1576                  if (!valid[i])
1577 1577                          continue;               /* on slab freelist */
1578 1578  
1579 1579                  if (magcnt > 0 &&
1580 1580                      bsearch(&buf, maglist, magcnt, sizeof (void *),
1581 1581                      addrcmp) != NULL)
1582 1582                          continue;               /* in magazine layer */
1583 1583  
1584 1584                  ret = kmem_walk_callback(wsp, (uintptr_t)buf);
1585 1585          }
1586 1586          return (ret);
1587 1587  }
1588 1588  
1589 1589  void
1590 1590  kmem_walk_fini(mdb_walk_state_t *wsp)
1591 1591  {
1592 1592          kmem_walk_t *kmw = wsp->walk_data;
1593 1593          uintptr_t chunksize;
1594 1594          uintptr_t slabsize;
1595 1595  
1596 1596          if (kmw == NULL)
1597 1597                  return;
1598 1598  
1599 1599          if (kmw->kmw_maglist != NULL)
1600 1600                  mdb_free(kmw->kmw_maglist, kmw->kmw_max * sizeof (void *));
1601 1601  
1602 1602          chunksize = kmw->kmw_cp->cache_chunksize;
1603 1603          slabsize = kmw->kmw_cp->cache_slabsize;
1604 1604  
1605 1605          if (kmw->kmw_valid != NULL)
1606 1606                  mdb_free(kmw->kmw_valid, slabsize / chunksize);
1607 1607          if (kmw->kmw_ubase != NULL)
1608 1608                  mdb_free(kmw->kmw_ubase, slabsize + sizeof (kmem_bufctl_t));
1609 1609  
1610 1610          mdb_free(kmw->kmw_cp, kmw->kmw_csize);
1611 1611          mdb_free(kmw, sizeof (kmem_walk_t));
1612 1612  }
1613 1613  
1614 1614  /*ARGSUSED*/
1615 1615  static int
1616 1616  kmem_walk_all(uintptr_t addr, const kmem_cache_t *c, mdb_walk_state_t *wsp)
1617 1617  {
1618 1618          /*
1619 1619           * Buffers allocated from NOTOUCH caches can also show up as freed
1620 1620           * memory in other caches.  This can be a little confusing, so we
1621 1621           * don't walk NOTOUCH caches when walking all caches (thereby assuring
1622 1622           * that "::walk kmem" and "::walk freemem" yield disjoint output).
1623 1623           */
1624 1624          if (c->cache_cflags & KMC_NOTOUCH)
1625 1625                  return (WALK_NEXT);
1626 1626  
1627 1627          if (mdb_pwalk(wsp->walk_data, wsp->walk_callback,
1628 1628              wsp->walk_cbdata, addr) == -1)
1629 1629                  return (WALK_DONE);
1630 1630  
1631 1631          return (WALK_NEXT);
1632 1632  }
1633 1633  
1634 1634  #define KMEM_WALK_ALL(name, wsp) { \
1635 1635          wsp->walk_data = (name); \
1636 1636          if (mdb_walk("kmem_cache", (mdb_walk_cb_t)kmem_walk_all, wsp) == -1) \
1637 1637                  return (WALK_ERR); \
1638 1638          return (WALK_DONE); \
1639 1639  }
1640 1640  
1641 1641  int
1642 1642  kmem_walk_init(mdb_walk_state_t *wsp)
1643 1643  {
1644 1644          if (wsp->walk_arg != NULL)
1645 1645                  wsp->walk_addr = (uintptr_t)wsp->walk_arg;
1646 1646  
1647 1647          if (wsp->walk_addr == 0)
1648 1648                  KMEM_WALK_ALL("kmem", wsp);
1649 1649          return (kmem_walk_init_common(wsp, KM_ALLOCATED));
1650 1650  }
1651 1651  
1652 1652  int
1653 1653  bufctl_walk_init(mdb_walk_state_t *wsp)
1654 1654  {
1655 1655          if (wsp->walk_addr == 0)
1656 1656                  KMEM_WALK_ALL("bufctl", wsp);
1657 1657          return (kmem_walk_init_common(wsp, KM_ALLOCATED | KM_BUFCTL));
1658 1658  }
1659 1659  
1660 1660  int
1661 1661  freemem_walk_init(mdb_walk_state_t *wsp)
1662 1662  {
1663 1663          if (wsp->walk_addr == 0)
1664 1664                  KMEM_WALK_ALL("freemem", wsp);
1665 1665          return (kmem_walk_init_common(wsp, KM_FREE));
1666 1666  }
1667 1667  
1668 1668  int
1669 1669  freemem_constructed_walk_init(mdb_walk_state_t *wsp)
1670 1670  {
1671 1671          if (wsp->walk_addr == 0)
1672 1672                  KMEM_WALK_ALL("freemem_constructed", wsp);
1673 1673          return (kmem_walk_init_common(wsp, KM_FREE | KM_CONSTRUCTED));
1674 1674  }
1675 1675  
1676 1676  int
1677 1677  freectl_walk_init(mdb_walk_state_t *wsp)
1678 1678  {
1679 1679          if (wsp->walk_addr == 0)
1680 1680                  KMEM_WALK_ALL("freectl", wsp);
1681 1681          return (kmem_walk_init_common(wsp, KM_FREE | KM_BUFCTL));
1682 1682  }
1683 1683  
1684 1684  int
1685 1685  freectl_constructed_walk_init(mdb_walk_state_t *wsp)
1686 1686  {
1687 1687          if (wsp->walk_addr == 0)
1688 1688                  KMEM_WALK_ALL("freectl_constructed", wsp);
1689 1689          return (kmem_walk_init_common(wsp,
1690 1690              KM_FREE | KM_BUFCTL | KM_CONSTRUCTED));
1691 1691  }
1692 1692  
1693 1693  typedef struct bufctl_history_walk {
1694 1694          void            *bhw_next;
1695 1695          kmem_cache_t    *bhw_cache;
1696 1696          kmem_slab_t     *bhw_slab;
1697 1697          hrtime_t        bhw_timestamp;
1698 1698  } bufctl_history_walk_t;
1699 1699  
1700 1700  int
1701 1701  bufctl_history_walk_init(mdb_walk_state_t *wsp)
1702 1702  {
1703 1703          bufctl_history_walk_t *bhw;
1704 1704          kmem_bufctl_audit_t bc;
1705 1705          kmem_bufctl_audit_t bcn;
1706 1706  
1707 1707          if (wsp->walk_addr == 0) {
1708 1708                  mdb_warn("bufctl_history walk doesn't support global walks\n");
1709 1709                  return (WALK_ERR);
1710 1710          }
1711 1711  
1712 1712          if (mdb_vread(&bc, sizeof (bc), wsp->walk_addr) == -1) {
1713 1713                  mdb_warn("unable to read bufctl at %p", wsp->walk_addr);
1714 1714                  return (WALK_ERR);
1715 1715          }
1716 1716  
1717 1717          bhw = mdb_zalloc(sizeof (*bhw), UM_SLEEP);
1718 1718          bhw->bhw_timestamp = 0;
1719 1719          bhw->bhw_cache = bc.bc_cache;
1720 1720          bhw->bhw_slab = bc.bc_slab;
1721 1721  
1722 1722          /*
1723 1723           * sometimes the first log entry matches the base bufctl;  in that
1724 1724           * case, skip the base bufctl.
1725 1725           */
1726 1726          if (bc.bc_lastlog != NULL &&
1727 1727              mdb_vread(&bcn, sizeof (bcn), (uintptr_t)bc.bc_lastlog) != -1 &&
1728 1728              bc.bc_addr == bcn.bc_addr &&
1729 1729              bc.bc_cache == bcn.bc_cache &&
1730 1730              bc.bc_slab == bcn.bc_slab &&
1731 1731              bc.bc_timestamp == bcn.bc_timestamp &&
1732 1732              bc.bc_thread == bcn.bc_thread)
1733 1733                  bhw->bhw_next = bc.bc_lastlog;
1734 1734          else
1735 1735                  bhw->bhw_next = (void *)wsp->walk_addr;
1736 1736  
1737 1737          wsp->walk_addr = (uintptr_t)bc.bc_addr;
1738 1738          wsp->walk_data = bhw;
1739 1739  
1740 1740          return (WALK_NEXT);
1741 1741  }
1742 1742  
1743 1743  int
1744 1744  bufctl_history_walk_step(mdb_walk_state_t *wsp)
1745 1745  {
1746 1746          bufctl_history_walk_t *bhw = wsp->walk_data;
1747 1747          uintptr_t addr = (uintptr_t)bhw->bhw_next;
1748 1748          uintptr_t baseaddr = wsp->walk_addr;
1749 1749          kmem_bufctl_audit_t bc;
1750 1750  
1751 1751          if (addr == 0)
1752 1752                  return (WALK_DONE);
1753 1753  
1754 1754          if (mdb_vread(&bc, sizeof (bc), addr) == -1) {
1755 1755                  mdb_warn("unable to read bufctl at %p", bhw->bhw_next);
1756 1756                  return (WALK_ERR);
1757 1757          }
1758 1758  
1759 1759          /*
1760 1760           * The bufctl is only valid if the address, cache, and slab are
1761 1761           * correct.  We also check that the timestamp is decreasing, to
1762 1762           * prevent infinite loops.
1763 1763           */
1764 1764          if ((uintptr_t)bc.bc_addr != baseaddr ||
1765 1765              bc.bc_cache != bhw->bhw_cache ||
1766 1766              bc.bc_slab != bhw->bhw_slab ||
1767 1767              (bhw->bhw_timestamp != 0 && bc.bc_timestamp >= bhw->bhw_timestamp))
1768 1768                  return (WALK_DONE);
1769 1769  
1770 1770          bhw->bhw_next = bc.bc_lastlog;
1771 1771          bhw->bhw_timestamp = bc.bc_timestamp;
1772 1772  
1773 1773          return (wsp->walk_callback(addr, &bc, wsp->walk_cbdata));
1774 1774  }
1775 1775  
1776 1776  void
1777 1777  bufctl_history_walk_fini(mdb_walk_state_t *wsp)
1778 1778  {
1779 1779          bufctl_history_walk_t *bhw = wsp->walk_data;
1780 1780  
1781 1781          mdb_free(bhw, sizeof (*bhw));
1782 1782  }
1783 1783  
1784 1784  typedef struct kmem_log_walk {
1785 1785          kmem_bufctl_audit_t *klw_base;
1786 1786          kmem_bufctl_audit_t **klw_sorted;
1787 1787          kmem_log_header_t klw_lh;
1788 1788          size_t klw_size;
1789 1789          size_t klw_maxndx;
1790 1790          size_t klw_ndx;
1791 1791  } kmem_log_walk_t;
1792 1792  
1793 1793  int
1794 1794  kmem_log_walk_init(mdb_walk_state_t *wsp)
1795 1795  {
1796 1796          uintptr_t lp = wsp->walk_addr;
1797 1797          kmem_log_walk_t *klw;
1798 1798          kmem_log_header_t *lhp;
1799 1799          int maxndx, i, j, k;
1800 1800  
1801 1801          /*
1802 1802           * By default (global walk), walk the kmem_transaction_log.  Otherwise
1803 1803           * read the log whose kmem_log_header_t is stored at walk_addr.
1804 1804           */
1805 1805          if (lp == 0 && mdb_readvar(&lp, "kmem_transaction_log") == -1) {
1806 1806                  mdb_warn("failed to read 'kmem_transaction_log'");
1807 1807                  return (WALK_ERR);
1808 1808          }
1809 1809  
1810 1810          if (lp == 0) {
1811 1811                  mdb_warn("log is disabled\n");
1812 1812                  return (WALK_ERR);
1813 1813          }
1814 1814  
1815 1815          klw = mdb_zalloc(sizeof (kmem_log_walk_t), UM_SLEEP);
1816 1816          lhp = &klw->klw_lh;
1817 1817  
1818 1818          if (mdb_vread(lhp, sizeof (kmem_log_header_t), lp) == -1) {
1819 1819                  mdb_warn("failed to read log header at %p", lp);
1820 1820                  mdb_free(klw, sizeof (kmem_log_walk_t));
1821 1821                  return (WALK_ERR);
1822 1822          }
1823 1823  
1824 1824          klw->klw_size = lhp->lh_chunksize * lhp->lh_nchunks;
1825 1825          klw->klw_base = mdb_alloc(klw->klw_size, UM_SLEEP);
1826 1826          maxndx = lhp->lh_chunksize / sizeof (kmem_bufctl_audit_t) - 1;
1827 1827  
1828 1828          if (mdb_vread(klw->klw_base, klw->klw_size,
1829 1829              (uintptr_t)lhp->lh_base) == -1) {
1830 1830                  mdb_warn("failed to read log at base %p", lhp->lh_base);
1831 1831                  mdb_free(klw->klw_base, klw->klw_size);
1832 1832                  mdb_free(klw, sizeof (kmem_log_walk_t));
1833 1833                  return (WALK_ERR);
1834 1834          }
1835 1835  
1836 1836          klw->klw_sorted = mdb_alloc(maxndx * lhp->lh_nchunks *
1837 1837              sizeof (kmem_bufctl_audit_t *), UM_SLEEP);
1838 1838  
1839 1839          for (i = 0, k = 0; i < lhp->lh_nchunks; i++) {
1840 1840                  kmem_bufctl_audit_t *chunk = (kmem_bufctl_audit_t *)
1841 1841                      ((uintptr_t)klw->klw_base + i * lhp->lh_chunksize);
1842 1842  
1843 1843                  for (j = 0; j < maxndx; j++)
1844 1844                          klw->klw_sorted[k++] = &chunk[j];
1845 1845          }
1846 1846  
1847 1847          qsort(klw->klw_sorted, k, sizeof (kmem_bufctl_audit_t *),
1848 1848              (int(*)(const void *, const void *))bufctlcmp);
1849 1849  
1850 1850          klw->klw_maxndx = k;
1851 1851          wsp->walk_data = klw;
1852 1852  
1853 1853          return (WALK_NEXT);
1854 1854  }
1855 1855  
1856 1856  int
1857 1857  kmem_log_walk_step(mdb_walk_state_t *wsp)
1858 1858  {
1859 1859          kmem_log_walk_t *klw = wsp->walk_data;
1860 1860          kmem_bufctl_audit_t *bcp;
1861 1861  
1862 1862          if (klw->klw_ndx == klw->klw_maxndx)
1863 1863                  return (WALK_DONE);
1864 1864  
1865 1865          bcp = klw->klw_sorted[klw->klw_ndx++];
1866 1866  
1867 1867          return (wsp->walk_callback((uintptr_t)bcp - (uintptr_t)klw->klw_base +
1868 1868              (uintptr_t)klw->klw_lh.lh_base, bcp, wsp->walk_cbdata));
1869 1869  }
1870 1870  
1871 1871  void
1872 1872  kmem_log_walk_fini(mdb_walk_state_t *wsp)
1873 1873  {
1874 1874          kmem_log_walk_t *klw = wsp->walk_data;
1875 1875  
1876 1876          mdb_free(klw->klw_base, klw->klw_size);
1877 1877          mdb_free(klw->klw_sorted, klw->klw_maxndx *
1878 1878              sizeof (kmem_bufctl_audit_t *));
1879 1879          mdb_free(klw, sizeof (kmem_log_walk_t));
1880 1880  }
1881 1881  
1882 1882  typedef struct allocdby_bufctl {
1883 1883          uintptr_t abb_addr;
1884 1884          hrtime_t abb_ts;
1885 1885  } allocdby_bufctl_t;
1886 1886  
1887 1887  typedef struct allocdby_walk {
1888 1888          const char *abw_walk;
1889 1889          uintptr_t abw_thread;
1890 1890          size_t abw_nbufs;
1891 1891          size_t abw_size;
1892 1892          allocdby_bufctl_t *abw_buf;
1893 1893          size_t abw_ndx;
1894 1894  } allocdby_walk_t;
1895 1895  
1896 1896  int
1897 1897  allocdby_walk_bufctl(uintptr_t addr, const kmem_bufctl_audit_t *bcp,
1898 1898      allocdby_walk_t *abw)
1899 1899  {
1900 1900          if ((uintptr_t)bcp->bc_thread != abw->abw_thread)
1901 1901                  return (WALK_NEXT);
1902 1902  
1903 1903          if (abw->abw_nbufs == abw->abw_size) {
1904 1904                  allocdby_bufctl_t *buf;
1905 1905                  size_t oldsize = sizeof (allocdby_bufctl_t) * abw->abw_size;
1906 1906  
1907 1907                  buf = mdb_zalloc(oldsize << 1, UM_SLEEP);
1908 1908  
1909 1909                  bcopy(abw->abw_buf, buf, oldsize);
1910 1910                  mdb_free(abw->abw_buf, oldsize);
1911 1911  
1912 1912                  abw->abw_size <<= 1;
1913 1913                  abw->abw_buf = buf;
1914 1914          }
1915 1915  
1916 1916          abw->abw_buf[abw->abw_nbufs].abb_addr = addr;
1917 1917          abw->abw_buf[abw->abw_nbufs].abb_ts = bcp->bc_timestamp;
1918 1918          abw->abw_nbufs++;
1919 1919  
1920 1920          return (WALK_NEXT);
1921 1921  }
1922 1922  
1923 1923  /*ARGSUSED*/
1924 1924  int
1925 1925  allocdby_walk_cache(uintptr_t addr, const kmem_cache_t *c, allocdby_walk_t *abw)
1926 1926  {
1927 1927          if (mdb_pwalk(abw->abw_walk, (mdb_walk_cb_t)allocdby_walk_bufctl,
1928 1928              abw, addr) == -1) {
1929 1929                  mdb_warn("couldn't walk bufctl for cache %p", addr);
1930 1930                  return (WALK_DONE);
1931 1931          }
1932 1932  
1933 1933          return (WALK_NEXT);
1934 1934  }
1935 1935  
1936 1936  static int
1937 1937  allocdby_cmp(const allocdby_bufctl_t *lhs, const allocdby_bufctl_t *rhs)
1938 1938  {
1939 1939          if (lhs->abb_ts < rhs->abb_ts)
1940 1940                  return (1);
1941 1941          if (lhs->abb_ts > rhs->abb_ts)
1942 1942                  return (-1);
1943 1943          return (0);
1944 1944  }
1945 1945  
1946 1946  static int
1947 1947  allocdby_walk_init_common(mdb_walk_state_t *wsp, const char *walk)
1948 1948  {
1949 1949          allocdby_walk_t *abw;
1950 1950  
1951 1951          if (wsp->walk_addr == 0) {
1952 1952                  mdb_warn("allocdby walk doesn't support global walks\n");
1953 1953                  return (WALK_ERR);
1954 1954          }
1955 1955  
1956 1956          abw = mdb_zalloc(sizeof (allocdby_walk_t), UM_SLEEP);
1957 1957  
1958 1958          abw->abw_thread = wsp->walk_addr;
1959 1959          abw->abw_walk = walk;
1960 1960          abw->abw_size = 128;    /* something reasonable */
1961 1961          abw->abw_buf =
1962 1962              mdb_zalloc(abw->abw_size * sizeof (allocdby_bufctl_t), UM_SLEEP);
1963 1963  
1964 1964          wsp->walk_data = abw;
1965 1965  
1966 1966          if (mdb_walk("kmem_cache",
1967 1967              (mdb_walk_cb_t)allocdby_walk_cache, abw) == -1) {
1968 1968                  mdb_warn("couldn't walk kmem_cache");
1969 1969                  allocdby_walk_fini(wsp);
1970 1970                  return (WALK_ERR);
1971 1971          }
1972 1972  
1973 1973          qsort(abw->abw_buf, abw->abw_nbufs, sizeof (allocdby_bufctl_t),
1974 1974              (int(*)(const void *, const void *))allocdby_cmp);
1975 1975  
1976 1976          return (WALK_NEXT);
1977 1977  }
1978 1978  
1979 1979  int
1980 1980  allocdby_walk_init(mdb_walk_state_t *wsp)
1981 1981  {
1982 1982          return (allocdby_walk_init_common(wsp, "bufctl"));
1983 1983  }
1984 1984  
1985 1985  int
1986 1986  freedby_walk_init(mdb_walk_state_t *wsp)
1987 1987  {
1988 1988          return (allocdby_walk_init_common(wsp, "freectl"));
1989 1989  }
1990 1990  
1991 1991  int
1992 1992  allocdby_walk_step(mdb_walk_state_t *wsp)
1993 1993  {
1994 1994          allocdby_walk_t *abw = wsp->walk_data;
1995 1995          kmem_bufctl_audit_t bc;
1996 1996          uintptr_t addr;
1997 1997  
1998 1998          if (abw->abw_ndx == abw->abw_nbufs)
1999 1999                  return (WALK_DONE);
2000 2000  
2001 2001          addr = abw->abw_buf[abw->abw_ndx++].abb_addr;
2002 2002  
2003 2003          if (mdb_vread(&bc, sizeof (bc), addr) == -1) {
2004 2004                  mdb_warn("couldn't read bufctl at %p", addr);
2005 2005                  return (WALK_DONE);
2006 2006          }
2007 2007  
2008 2008          return (wsp->walk_callback(addr, &bc, wsp->walk_cbdata));
2009 2009  }
2010 2010  
2011 2011  void
2012 2012  allocdby_walk_fini(mdb_walk_state_t *wsp)
2013 2013  {
2014 2014          allocdby_walk_t *abw = wsp->walk_data;
2015 2015  
2016 2016          mdb_free(abw->abw_buf, sizeof (allocdby_bufctl_t) * abw->abw_size);
2017 2017          mdb_free(abw, sizeof (allocdby_walk_t));
2018 2018  }
2019 2019  
2020 2020  /*ARGSUSED*/
2021 2021  int
2022 2022  allocdby_walk(uintptr_t addr, const kmem_bufctl_audit_t *bcp, void *ignored)
2023 2023  {
2024 2024          char c[MDB_SYM_NAMLEN];
2025 2025          GElf_Sym sym;
2026 2026          int i;
2027 2027  
2028 2028          mdb_printf("%0?p %12llx ", addr, bcp->bc_timestamp);
2029 2029          for (i = 0; i < bcp->bc_depth; i++) {
2030 2030                  if (mdb_lookup_by_addr(bcp->bc_stack[i],
2031 2031                      MDB_SYM_FUZZY, c, sizeof (c), &sym) == -1)
2032 2032                          continue;
2033 2033                  if (strncmp(c, "kmem_", 5) == 0)
2034 2034                          continue;
2035 2035                  mdb_printf("%s+0x%lx",
2036 2036                      c, bcp->bc_stack[i] - (uintptr_t)sym.st_value);
2037 2037                  break;
2038 2038          }
2039 2039          mdb_printf("\n");
2040 2040  
2041 2041          return (WALK_NEXT);
2042 2042  }
2043 2043  
2044 2044  static int
2045 2045  allocdby_common(uintptr_t addr, uint_t flags, const char *w)
2046 2046  {
2047 2047          if (!(flags & DCMD_ADDRSPEC))
2048 2048                  return (DCMD_USAGE);
2049 2049  
2050 2050          mdb_printf("%-?s %12s %s\n", "BUFCTL", "TIMESTAMP", "CALLER");
2051 2051  
2052 2052          if (mdb_pwalk(w, (mdb_walk_cb_t)allocdby_walk, NULL, addr) == -1) {
2053 2053                  mdb_warn("can't walk '%s' for %p", w, addr);
2054 2054                  return (DCMD_ERR);
2055 2055          }
2056 2056  
2057 2057          return (DCMD_OK);
2058 2058  }
2059 2059  
2060 2060  /*ARGSUSED*/
2061 2061  int
2062 2062  allocdby(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
2063 2063  {
2064 2064          return (allocdby_common(addr, flags, "allocdby"));
2065 2065  }
2066 2066  
2067 2067  /*ARGSUSED*/
2068 2068  int
2069 2069  freedby(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
2070 2070  {
2071 2071          return (allocdby_common(addr, flags, "freedby"));
2072 2072  }
2073 2073  
2074 2074  /*
2075 2075   * Return a string describing the address in relation to the given thread's
2076 2076   * stack.
2077 2077   *
2078 2078   * - If the thread state is TS_FREE, return " (inactive interrupt thread)".
2079 2079   *
2080 2080   * - If the address is above the stack pointer, return an empty string
2081 2081   *   signifying that the address is active.
2082 2082   *
2083 2083   * - If the address is below the stack pointer, and the thread is not on proc,
2084 2084   *   return " (below sp)".
2085 2085   *
2086 2086   * - If the address is below the stack pointer, and the thread is on proc,
2087 2087   *   return " (possibly below sp)".  Depending on context, we may or may not
2088 2088   *   have an accurate t_sp.
2089 2089   */
2090 2090  static const char *
2091 2091  stack_active(const kthread_t *t, uintptr_t addr)
2092 2092  {
2093 2093          uintptr_t panicstk;
2094 2094          GElf_Sym sym;
2095 2095  
2096 2096          if (t->t_state == TS_FREE)
2097 2097                  return (" (inactive interrupt thread)");
2098 2098  
2099 2099          /*
2100 2100           * Check to see if we're on the panic stack.  If so, ignore t_sp, as it
2101 2101           * no longer relates to the thread's real stack.
2102 2102           */
2103 2103          if (mdb_lookup_by_name("panic_stack", &sym) == 0) {
2104 2104                  panicstk = (uintptr_t)sym.st_value;
2105 2105  
2106 2106                  if (t->t_sp >= panicstk && t->t_sp < panicstk + PANICSTKSIZE)
2107 2107                          return ("");
2108 2108          }
2109 2109  
2110 2110          if (addr >= t->t_sp + STACK_BIAS)
2111 2111                  return ("");
2112 2112  
2113 2113          if (t->t_state == TS_ONPROC)
2114 2114                  return (" (possibly below sp)");
2115 2115  
2116 2116          return (" (below sp)");
2117 2117  }
2118 2118  
2119 2119  /*
2120 2120   * Additional state for the kmem and vmem ::whatis handlers
2121 2121   */
2122 2122  typedef struct whatis_info {
2123 2123          mdb_whatis_t *wi_w;
2124 2124          const kmem_cache_t *wi_cache;
2125 2125          const vmem_t *wi_vmem;
2126 2126          vmem_t *wi_msb_arena;
2127 2127          size_t wi_slab_size;
2128 2128          uint_t wi_slab_found;
2129 2129          uint_t wi_kmem_lite_count;
2130 2130          uint_t wi_freemem;
2131 2131  } whatis_info_t;
2132 2132  
2133 2133  /* call one of our dcmd functions with "-v" and the provided address */
2134 2134  static void
2135 2135  whatis_call_printer(mdb_dcmd_f *dcmd, uintptr_t addr)
2136 2136  {
2137 2137          mdb_arg_t a;
2138 2138          a.a_type = MDB_TYPE_STRING;
2139 2139          a.a_un.a_str = "-v";
2140 2140  
2141 2141          mdb_printf(":\n");
2142 2142          (void) (*dcmd)(addr, DCMD_ADDRSPEC, 1, &a);
2143 2143  }
2144 2144  
2145 2145  static void
2146 2146  whatis_print_kmf_lite(uintptr_t btaddr, size_t count)
2147 2147  {
2148 2148  #define KMEM_LITE_MAX   16
2149 2149          pc_t callers[KMEM_LITE_MAX];
2150 2150          pc_t uninit = (pc_t)KMEM_UNINITIALIZED_PATTERN;
2151 2151  
2152 2152          kmem_buftag_t bt;
2153 2153          intptr_t stat;
2154 2154          const char *plural = "";
2155 2155          int i;
2156 2156  
2157 2157          /* validate our arguments and read in the buftag */
2158 2158          if (count == 0 || count > KMEM_LITE_MAX ||
2159 2159              mdb_vread(&bt, sizeof (bt), btaddr) == -1)
2160 2160                  return;
2161 2161  
2162 2162          /* validate the buffer state and read in the callers */
2163 2163          stat = (intptr_t)bt.bt_bufctl ^ bt.bt_bxstat;
2164 2164  
2165 2165          if (stat != KMEM_BUFTAG_ALLOC && stat != KMEM_BUFTAG_FREE)
2166 2166                  return;
2167 2167  
2168 2168          if (mdb_vread(callers, count * sizeof (pc_t),
2169 2169              btaddr + offsetof(kmem_buftag_lite_t, bt_history)) == -1)
2170 2170                  return;
2171 2171  
2172 2172          /* If there aren't any filled in callers, bail */
2173 2173          if (callers[0] == uninit)
2174 2174                  return;
2175 2175  
2176 2176          plural = (callers[1] == uninit) ? "" : "s";
2177 2177  
2178 2178          /* Everything's done and checked; print them out */
2179 2179          mdb_printf(":\n");
2180 2180  
2181 2181          mdb_inc_indent(8);
2182 2182          mdb_printf("recent caller%s: %a", plural, callers[0]);
2183 2183          for (i = 1; i < count; i++) {
2184 2184                  if (callers[i] == uninit)
2185 2185                          break;
2186 2186                  mdb_printf(", %a", callers[i]);
2187 2187          }
2188 2188          mdb_dec_indent(8);
2189 2189  }
2190 2190  
2191 2191  static void
2192 2192  whatis_print_kmem(whatis_info_t *wi, uintptr_t maddr, uintptr_t addr,
2193 2193      uintptr_t baddr)
2194 2194  {
2195 2195          mdb_whatis_t *w = wi->wi_w;
2196 2196  
2197 2197          const kmem_cache_t *cp = wi->wi_cache;
2198 2198          /* LINTED pointer cast may result in improper alignment */
2199 2199          uintptr_t btaddr = (uintptr_t)KMEM_BUFTAG(cp, addr);
2200 2200          int quiet = (mdb_whatis_flags(w) & WHATIS_QUIET);
2201 2201          int call_printer = (!quiet && (cp->cache_flags & KMF_AUDIT));
2202 2202  
2203 2203          mdb_whatis_report_object(w, maddr, addr, "");
2204 2204  
2205 2205          if (baddr != 0 && !call_printer)
2206 2206                  mdb_printf("bufctl %p ", baddr);
2207 2207  
2208 2208          mdb_printf("%s from %s",
2209 2209              (wi->wi_freemem == FALSE) ? "allocated" : "freed", cp->cache_name);
2210 2210  
2211 2211          if (baddr != 0 && call_printer) {
2212 2212                  whatis_call_printer(bufctl, baddr);
2213 2213                  return;
2214 2214          }
2215 2215  
2216 2216          /* for KMF_LITE caches, try to print out the previous callers */
2217 2217          if (!quiet && (cp->cache_flags & KMF_LITE))
2218 2218                  whatis_print_kmf_lite(btaddr, wi->wi_kmem_lite_count);
2219 2219  
2220 2220          mdb_printf("\n");
2221 2221  }
2222 2222  
2223 2223  /*ARGSUSED*/
2224 2224  static int
2225 2225  whatis_walk_kmem(uintptr_t addr, void *ignored, whatis_info_t *wi)
2226 2226  {
2227 2227          mdb_whatis_t *w = wi->wi_w;
2228 2228  
2229 2229          uintptr_t cur;
2230 2230          size_t size = wi->wi_cache->cache_bufsize;
2231 2231  
2232 2232          while (mdb_whatis_match(w, addr, size, &cur))
2233 2233                  whatis_print_kmem(wi, cur, addr, 0);
2234 2234  
2235 2235          return (WHATIS_WALKRET(w));
2236 2236  }
2237 2237  
2238 2238  /*ARGSUSED*/
2239 2239  static int
2240 2240  whatis_walk_bufctl(uintptr_t baddr, const kmem_bufctl_t *bcp, whatis_info_t *wi)
2241 2241  {
2242 2242          mdb_whatis_t *w = wi->wi_w;
2243 2243  
2244 2244          uintptr_t cur;
2245 2245          uintptr_t addr = (uintptr_t)bcp->bc_addr;
2246 2246          size_t size = wi->wi_cache->cache_bufsize;
2247 2247  
2248 2248          while (mdb_whatis_match(w, addr, size, &cur))
2249 2249                  whatis_print_kmem(wi, cur, addr, baddr);
2250 2250  
2251 2251          return (WHATIS_WALKRET(w));
2252 2252  }
2253 2253  
2254 2254  static int
2255 2255  whatis_walk_seg(uintptr_t addr, const vmem_seg_t *vs, whatis_info_t *wi)
2256 2256  {
2257 2257          mdb_whatis_t *w = wi->wi_w;
2258 2258  
2259 2259          size_t size = vs->vs_end - vs->vs_start;
2260 2260          uintptr_t cur;
2261 2261  
2262 2262          /* We're not interested in anything but alloc and free segments */
2263 2263          if (vs->vs_type != VMEM_ALLOC && vs->vs_type != VMEM_FREE)
2264 2264                  return (WALK_NEXT);
2265 2265  
2266 2266          while (mdb_whatis_match(w, vs->vs_start, size, &cur)) {
2267 2267                  mdb_whatis_report_object(w, cur, vs->vs_start, "");
2268 2268  
2269 2269                  /*
2270 2270                   * If we're not printing it seperately, provide the vmem_seg
2271 2271                   * pointer if it has a stack trace.
2272 2272                   */
2273 2273                  if ((mdb_whatis_flags(w) & WHATIS_QUIET) &&
2274 2274                      (!(mdb_whatis_flags(w) & WHATIS_BUFCTL) ||
2275 2275                      (vs->vs_type == VMEM_ALLOC && vs->vs_depth != 0))) {
2276 2276                          mdb_printf("vmem_seg %p ", addr);
2277 2277                  }
2278 2278  
2279 2279                  mdb_printf("%s from the %s vmem arena",
2280 2280                      (vs->vs_type == VMEM_ALLOC) ? "allocated" : "freed",
2281 2281                      wi->wi_vmem->vm_name);
2282 2282  
2283 2283                  if (!(mdb_whatis_flags(w) & WHATIS_QUIET))
2284 2284                          whatis_call_printer(vmem_seg, addr);
2285 2285                  else
2286 2286                          mdb_printf("\n");
2287 2287          }
2288 2288  
2289 2289          return (WHATIS_WALKRET(w));
2290 2290  }
2291 2291  
2292 2292  static int
2293 2293  whatis_walk_vmem(uintptr_t addr, const vmem_t *vmem, whatis_info_t *wi)
2294 2294  {
2295 2295          mdb_whatis_t *w = wi->wi_w;
2296 2296          const char *nm = vmem->vm_name;
2297 2297  
2298 2298          int identifier = ((vmem->vm_cflags & VMC_IDENTIFIER) != 0);
2299 2299          int idspace = ((mdb_whatis_flags(w) & WHATIS_IDSPACE) != 0);
2300 2300  
2301 2301          if (identifier != idspace)
2302 2302                  return (WALK_NEXT);
2303 2303  
2304 2304          wi->wi_vmem = vmem;
2305 2305  
2306 2306          if (mdb_whatis_flags(w) & WHATIS_VERBOSE)
2307 2307                  mdb_printf("Searching vmem arena %s...\n", nm);
2308 2308  
2309 2309          if (mdb_pwalk("vmem_seg",
2310 2310              (mdb_walk_cb_t)whatis_walk_seg, wi, addr) == -1) {
2311 2311                  mdb_warn("can't walk vmem_seg for %p", addr);
2312 2312                  return (WALK_NEXT);
2313 2313          }
2314 2314  
2315 2315          return (WHATIS_WALKRET(w));
2316 2316  }
2317 2317  
2318 2318  /*ARGSUSED*/
2319 2319  static int
2320 2320  whatis_walk_slab(uintptr_t saddr, const kmem_slab_t *sp, whatis_info_t *wi)
2321 2321  {
2322 2322          mdb_whatis_t *w = wi->wi_w;
2323 2323  
2324 2324          /* It must overlap with the slab data, or it's not interesting */
2325 2325          if (mdb_whatis_overlaps(w,
2326 2326              (uintptr_t)sp->slab_base, wi->wi_slab_size)) {
2327 2327                  wi->wi_slab_found++;
2328 2328                  return (WALK_DONE);
2329 2329          }
2330 2330          return (WALK_NEXT);
2331 2331  }
2332 2332  
2333 2333  static int
2334 2334  whatis_walk_cache(uintptr_t addr, const kmem_cache_t *c, whatis_info_t *wi)
2335 2335  {
2336 2336          mdb_whatis_t *w = wi->wi_w;
2337 2337  
2338 2338          char *walk, *freewalk;
2339 2339          mdb_walk_cb_t func;
2340 2340          int do_bufctl;
2341 2341  
2342 2342          int identifier = ((c->cache_flags & KMC_IDENTIFIER) != 0);
2343 2343          int idspace = ((mdb_whatis_flags(w) & WHATIS_IDSPACE) != 0);
2344 2344  
2345 2345          if (identifier != idspace)
2346 2346                  return (WALK_NEXT);
2347 2347  
2348 2348          /* Override the '-b' flag as necessary */
2349 2349          if (!(c->cache_flags & KMF_HASH))
2350 2350                  do_bufctl = FALSE;      /* no bufctls to walk */
2351 2351          else if (c->cache_flags & KMF_AUDIT)
2352 2352                  do_bufctl = TRUE;       /* we always want debugging info */
2353 2353          else
2354 2354                  do_bufctl = ((mdb_whatis_flags(w) & WHATIS_BUFCTL) != 0);
2355 2355  
2356 2356          if (do_bufctl) {
2357 2357                  walk = "bufctl";
2358 2358                  freewalk = "freectl";
2359 2359                  func = (mdb_walk_cb_t)whatis_walk_bufctl;
2360 2360          } else {
2361 2361                  walk = "kmem";
2362 2362                  freewalk = "freemem";
2363 2363                  func = (mdb_walk_cb_t)whatis_walk_kmem;
2364 2364          }
2365 2365  
2366 2366          wi->wi_cache = c;
2367 2367  
2368 2368          if (mdb_whatis_flags(w) & WHATIS_VERBOSE)
2369 2369                  mdb_printf("Searching %s...\n", c->cache_name);
2370 2370  
2371 2371          /*
2372 2372           * If more then two buffers live on each slab, figure out if we're
2373 2373           * interested in anything in any slab before doing the more expensive
2374 2374           * kmem/freemem (bufctl/freectl) walkers.
2375 2375           */
2376 2376          wi->wi_slab_size = c->cache_slabsize - c->cache_maxcolor;
2377 2377          if (!(c->cache_flags & KMF_HASH))
2378 2378                  wi->wi_slab_size -= sizeof (kmem_slab_t);
2379 2379  
2380 2380          if ((wi->wi_slab_size / c->cache_chunksize) > 2) {
2381 2381                  wi->wi_slab_found = 0;
2382 2382                  if (mdb_pwalk("kmem_slab", (mdb_walk_cb_t)whatis_walk_slab, wi,
2383 2383                      addr) == -1) {
2384 2384                          mdb_warn("can't find kmem_slab walker");
2385 2385                          return (WALK_DONE);
2386 2386                  }
2387 2387                  if (wi->wi_slab_found == 0)
2388 2388                          return (WALK_NEXT);
2389 2389          }
2390 2390  
2391 2391          wi->wi_freemem = FALSE;
2392 2392          if (mdb_pwalk(walk, func, wi, addr) == -1) {
2393 2393                  mdb_warn("can't find %s walker", walk);
2394 2394                  return (WALK_DONE);
2395 2395          }
2396 2396  
2397 2397          if (mdb_whatis_done(w))
2398 2398                  return (WALK_DONE);
2399 2399  
2400 2400          /*
2401 2401           * We have searched for allocated memory; now search for freed memory.
2402 2402           */
2403 2403          if (mdb_whatis_flags(w) & WHATIS_VERBOSE)
2404 2404                  mdb_printf("Searching %s for free memory...\n", c->cache_name);
2405 2405  
2406 2406          wi->wi_freemem = TRUE;
2407 2407          if (mdb_pwalk(freewalk, func, wi, addr) == -1) {
2408 2408                  mdb_warn("can't find %s walker", freewalk);
2409 2409                  return (WALK_DONE);
2410 2410          }
2411 2411  
2412 2412          return (WHATIS_WALKRET(w));
2413 2413  }
2414 2414  
2415 2415  static int
2416 2416  whatis_walk_touch(uintptr_t addr, const kmem_cache_t *c, whatis_info_t *wi)
2417 2417  {
2418 2418          if (c->cache_arena == wi->wi_msb_arena ||
2419 2419              (c->cache_cflags & KMC_NOTOUCH))
2420 2420                  return (WALK_NEXT);
2421 2421  
2422 2422          return (whatis_walk_cache(addr, c, wi));
2423 2423  }
2424 2424  
2425 2425  static int
2426 2426  whatis_walk_metadata(uintptr_t addr, const kmem_cache_t *c, whatis_info_t *wi)
2427 2427  {
2428 2428          if (c->cache_arena != wi->wi_msb_arena)
2429 2429                  return (WALK_NEXT);
2430 2430  
2431 2431          return (whatis_walk_cache(addr, c, wi));
2432 2432  }
2433 2433  
2434 2434  static int
2435 2435  whatis_walk_notouch(uintptr_t addr, const kmem_cache_t *c, whatis_info_t *wi)
2436 2436  {
2437 2437          if (c->cache_arena == wi->wi_msb_arena ||
2438 2438              !(c->cache_cflags & KMC_NOTOUCH))
2439 2439                  return (WALK_NEXT);
2440 2440  
2441 2441          return (whatis_walk_cache(addr, c, wi));
2442 2442  }
2443 2443  
2444 2444  static int
2445 2445  whatis_walk_thread(uintptr_t addr, const kthread_t *t, mdb_whatis_t *w)
2446 2446  {
2447 2447          uintptr_t cur;
2448 2448          uintptr_t saddr;
2449 2449          size_t size;
2450 2450  
2451 2451          /*
2452 2452           * Often, one calls ::whatis on an address from a thread structure.
2453 2453           * We use this opportunity to short circuit this case...
2454 2454           */
2455 2455          while (mdb_whatis_match(w, addr, sizeof (kthread_t), &cur))
2456 2456                  mdb_whatis_report_object(w, cur, addr,
2457 2457                      "allocated as a thread structure\n");
2458 2458  
2459 2459          /*
2460 2460           * Now check the stack
2461 2461           */
2462 2462          if (t->t_stkbase == NULL)
2463 2463                  return (WALK_NEXT);
2464 2464  
2465 2465          /*
2466 2466           * This assumes that t_stk is the end of the stack, but it's really
2467 2467           * only the initial stack pointer for the thread.  Arguments to the
2468 2468           * initial procedure, SA(MINFRAME), etc. are all after t_stk.  So
2469 2469           * that 't->t_stk::whatis' reports "part of t's stack", we include
2470 2470           * t_stk in the range (the "+ 1", below), but the kernel should
2471 2471           * really include the full stack bounds where we can find it.
2472 2472           */
2473 2473          saddr = (uintptr_t)t->t_stkbase;
2474 2474          size = (uintptr_t)t->t_stk - saddr + 1;
2475 2475          while (mdb_whatis_match(w, saddr, size, &cur))
2476 2476                  mdb_whatis_report_object(w, cur, cur,
2477 2477                      "in thread %p's stack%s\n", addr, stack_active(t, cur));
2478 2478  
2479 2479          return (WHATIS_WALKRET(w));
2480 2480  }
2481 2481  
2482 2482  static void
2483 2483  whatis_modctl_match(mdb_whatis_t *w, const char *name,
2484 2484      uintptr_t base, size_t size, const char *where)
2485 2485  {
2486 2486          uintptr_t cur;
2487 2487  
2488 2488          /*
2489 2489           * Since we're searching for addresses inside a module, we report
2490 2490           * them as symbols.
2491 2491           */
2492 2492          while (mdb_whatis_match(w, base, size, &cur))
2493 2493                  mdb_whatis_report_address(w, cur, "in %s's %s\n", name, where);
2494 2494  }
2495 2495  
2496 2496  static int
2497 2497  whatis_walk_modctl(uintptr_t addr, const struct modctl *m, mdb_whatis_t *w)
2498 2498  {
2499 2499          char name[MODMAXNAMELEN];
2500 2500          struct module mod;
2501 2501          Shdr shdr;
2502 2502  
2503 2503          if (m->mod_mp == NULL)
2504 2504                  return (WALK_NEXT);
2505 2505  
2506 2506          if (mdb_vread(&mod, sizeof (mod), (uintptr_t)m->mod_mp) == -1) {
2507 2507                  mdb_warn("couldn't read modctl %p's module", addr);
2508 2508                  return (WALK_NEXT);
2509 2509          }
2510 2510  
2511 2511          if (mdb_readstr(name, sizeof (name), (uintptr_t)m->mod_modname) == -1)
2512 2512                  (void) mdb_snprintf(name, sizeof (name), "0x%p", addr);
2513 2513  
2514 2514          whatis_modctl_match(w, name,
2515 2515              (uintptr_t)mod.text, mod.text_size, "text segment");
2516 2516          whatis_modctl_match(w, name,
2517 2517              (uintptr_t)mod.data, mod.data_size, "data segment");
2518 2518          whatis_modctl_match(w, name,
2519 2519              (uintptr_t)mod.bss, mod.bss_size, "bss segment");
2520 2520  
2521 2521          if (mdb_vread(&shdr, sizeof (shdr), (uintptr_t)mod.symhdr) == -1) {
2522 2522                  mdb_warn("couldn't read symbol header for %p's module", addr);
2523 2523                  return (WALK_NEXT);
2524 2524          }
2525 2525  
2526 2526          whatis_modctl_match(w, name,
2527 2527              (uintptr_t)mod.symtbl, mod.nsyms * shdr.sh_entsize, "symtab");
2528 2528          whatis_modctl_match(w, name,
2529 2529              (uintptr_t)mod.symspace, mod.symsize, "symtab");
2530 2530  
2531 2531          return (WHATIS_WALKRET(w));
2532 2532  }
2533 2533  
2534 2534  /*ARGSUSED*/
2535 2535  static int
2536 2536  whatis_walk_memseg(uintptr_t addr, const struct memseg *seg, mdb_whatis_t *w)
2537 2537  {
2538 2538          uintptr_t cur;
2539 2539  
2540 2540          uintptr_t base = (uintptr_t)seg->pages;
2541 2541          size_t size = (uintptr_t)seg->epages - base;
2542 2542  
2543 2543          while (mdb_whatis_match(w, base, size, &cur)) {
2544 2544                  /* round our found pointer down to the page_t base. */
2545 2545                  size_t offset = (cur - base) % sizeof (page_t);
2546 2546  
2547 2547                  mdb_whatis_report_object(w, cur, cur - offset,
2548 2548                      "allocated as a page structure\n");
2549 2549          }
2550 2550  
2551 2551          return (WHATIS_WALKRET(w));
2552 2552  }
2553 2553  
2554 2554  /*ARGSUSED*/
2555 2555  static int
2556 2556  whatis_run_modules(mdb_whatis_t *w, void *arg)
2557 2557  {
2558 2558          if (mdb_walk("modctl", (mdb_walk_cb_t)whatis_walk_modctl, w) == -1) {
2559 2559                  mdb_warn("couldn't find modctl walker");
2560 2560                  return (1);
2561 2561          }
2562 2562          return (0);
2563 2563  }
2564 2564  
2565 2565  /*ARGSUSED*/
2566 2566  static int
2567 2567  whatis_run_threads(mdb_whatis_t *w, void *ignored)
2568 2568  {
2569 2569          /*
2570 2570           * Now search all thread stacks.  Yes, this is a little weak; we
2571 2571           * can save a lot of work by first checking to see if the
2572 2572           * address is in segkp vs. segkmem.  But hey, computers are
2573 2573           * fast.
2574 2574           */
2575 2575          if (mdb_walk("thread", (mdb_walk_cb_t)whatis_walk_thread, w) == -1) {
2576 2576                  mdb_warn("couldn't find thread walker");
2577 2577                  return (1);
2578 2578          }
2579 2579          return (0);
2580 2580  }
2581 2581  
2582 2582  /*ARGSUSED*/
2583 2583  static int
2584 2584  whatis_run_pages(mdb_whatis_t *w, void *ignored)
2585 2585  {
2586 2586          if (mdb_walk("memseg", (mdb_walk_cb_t)whatis_walk_memseg, w) == -1) {
2587 2587                  mdb_warn("couldn't find memseg walker");
2588 2588                  return (1);
2589 2589          }
2590 2590          return (0);
2591 2591  }
2592 2592  
2593 2593  /*ARGSUSED*/
2594 2594  static int
2595 2595  whatis_run_kmem(mdb_whatis_t *w, void *ignored)
2596 2596  {
2597 2597          whatis_info_t wi;
2598 2598  
2599 2599          bzero(&wi, sizeof (wi));
2600 2600          wi.wi_w = w;
2601 2601  
2602 2602          if (mdb_readvar(&wi.wi_msb_arena, "kmem_msb_arena") == -1)
2603 2603                  mdb_warn("unable to readvar \"kmem_msb_arena\"");
2604 2604  
2605 2605          if (mdb_readvar(&wi.wi_kmem_lite_count,
2606 2606              "kmem_lite_count") == -1 || wi.wi_kmem_lite_count > 16)
2607 2607                  wi.wi_kmem_lite_count = 0;
2608 2608  
2609 2609          /*
2610 2610           * We process kmem caches in the following order:
2611 2611           *
2612 2612           *      non-KMC_NOTOUCH, non-metadata   (typically the most interesting)
2613 2613           *      metadata                        (can be huge with KMF_AUDIT)
2614 2614           *      KMC_NOTOUCH, non-metadata       (see kmem_walk_all())
2615 2615           */
2616 2616          if (mdb_walk("kmem_cache", (mdb_walk_cb_t)whatis_walk_touch,
2617 2617              &wi) == -1 ||
2618 2618              mdb_walk("kmem_cache", (mdb_walk_cb_t)whatis_walk_metadata,
2619 2619              &wi) == -1 ||
2620 2620              mdb_walk("kmem_cache", (mdb_walk_cb_t)whatis_walk_notouch,
2621 2621              &wi) == -1) {
2622 2622                  mdb_warn("couldn't find kmem_cache walker");
2623 2623                  return (1);
2624 2624          }
2625 2625          return (0);
2626 2626  }
2627 2627  
2628 2628  /*ARGSUSED*/
2629 2629  static int
2630 2630  whatis_run_vmem(mdb_whatis_t *w, void *ignored)
2631 2631  {
2632 2632          whatis_info_t wi;
2633 2633  
2634 2634          bzero(&wi, sizeof (wi));
2635 2635          wi.wi_w = w;
2636 2636  
2637 2637          if (mdb_walk("vmem_postfix",
2638 2638              (mdb_walk_cb_t)whatis_walk_vmem, &wi) == -1) {
2639 2639                  mdb_warn("couldn't find vmem_postfix walker");
2640 2640                  return (1);
2641 2641          }
2642 2642          return (0);
2643 2643  }
2644 2644  
2645 2645  typedef struct kmem_log_cpu {
2646 2646          uintptr_t kmc_low;
2647 2647          uintptr_t kmc_high;
2648 2648  } kmem_log_cpu_t;
2649 2649  
2650 2650  typedef struct kmem_log_data {
2651 2651          uintptr_t kmd_addr;
2652 2652          kmem_log_cpu_t *kmd_cpu;
2653 2653  } kmem_log_data_t;
2654 2654  
2655 2655  int
2656 2656  kmem_log_walk(uintptr_t addr, const kmem_bufctl_audit_t *b,
2657 2657      kmem_log_data_t *kmd)
2658 2658  {
2659 2659          int i;
2660 2660          kmem_log_cpu_t *kmc = kmd->kmd_cpu;
2661 2661          size_t bufsize;
2662 2662  
2663 2663          for (i = 0; i < NCPU; i++) {
2664 2664                  if (addr >= kmc[i].kmc_low && addr < kmc[i].kmc_high)
2665 2665                          break;
2666 2666          }
2667 2667  
2668 2668          if (kmd->kmd_addr) {
2669 2669                  if (b->bc_cache == NULL)
2670 2670                          return (WALK_NEXT);
2671 2671  
2672 2672                  if (mdb_vread(&bufsize, sizeof (bufsize),
2673 2673                      (uintptr_t)&b->bc_cache->cache_bufsize) == -1) {
2674 2674                          mdb_warn(
2675 2675                              "failed to read cache_bufsize for cache at %p",
2676 2676                              b->bc_cache);
2677 2677                          return (WALK_ERR);
2678 2678                  }
2679 2679  
2680 2680                  if (kmd->kmd_addr < (uintptr_t)b->bc_addr ||
2681 2681                      kmd->kmd_addr >= (uintptr_t)b->bc_addr + bufsize)
2682 2682                          return (WALK_NEXT);
2683 2683          }
2684 2684  
2685 2685          if (i == NCPU)
2686 2686                  mdb_printf("   ");
2687 2687          else
2688 2688                  mdb_printf("%3d", i);
2689 2689  
2690 2690          mdb_printf(" %0?p %0?p %16llx %0?p\n", addr, b->bc_addr,
2691 2691              b->bc_timestamp, b->bc_thread);
2692 2692  
2693 2693          return (WALK_NEXT);
2694 2694  }
2695 2695  
2696 2696  /*ARGSUSED*/
2697 2697  int
2698 2698  kmem_log(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
2699 2699  {
2700 2700          kmem_log_header_t lh;
2701 2701          kmem_cpu_log_header_t clh;
2702 2702          uintptr_t lhp, clhp;
2703 2703          int ncpus;
2704 2704          uintptr_t *cpu;
2705 2705          GElf_Sym sym;
2706 2706          kmem_log_cpu_t *kmc;
2707 2707          int i;
2708 2708          kmem_log_data_t kmd;
2709 2709          uint_t opt_b = FALSE;
2710 2710  
2711 2711          if (mdb_getopts(argc, argv,
2712 2712              'b', MDB_OPT_SETBITS, TRUE, &opt_b, NULL) != argc)
2713 2713                  return (DCMD_USAGE);
2714 2714  
2715 2715          if (mdb_readvar(&lhp, "kmem_transaction_log") == -1) {
2716 2716                  mdb_warn("failed to read 'kmem_transaction_log'");
2717 2717                  return (DCMD_ERR);
2718 2718          }
2719 2719  
2720 2720          if (lhp == 0) {
2721 2721                  mdb_warn("no kmem transaction log\n");
2722 2722                  return (DCMD_ERR);
2723 2723          }
2724 2724  
2725 2725          mdb_readvar(&ncpus, "ncpus");
2726 2726  
2727 2727          if (mdb_vread(&lh, sizeof (kmem_log_header_t), lhp) == -1) {
2728 2728                  mdb_warn("failed to read log header at %p", lhp);
2729 2729                  return (DCMD_ERR);
2730 2730          }
2731 2731  
2732 2732          clhp = lhp + ((uintptr_t)&lh.lh_cpu[0] - (uintptr_t)&lh);
2733 2733  
2734 2734          cpu = mdb_alloc(sizeof (uintptr_t) * NCPU, UM_SLEEP | UM_GC);
2735 2735  
2736 2736          if (mdb_lookup_by_name("cpu", &sym) == -1) {
2737 2737                  mdb_warn("couldn't find 'cpu' array");
2738 2738                  return (DCMD_ERR);
2739 2739          }
2740 2740  
2741 2741          if (sym.st_size != NCPU * sizeof (uintptr_t)) {
2742 2742                  mdb_warn("expected 'cpu' to be of size %d; found %d\n",
2743 2743                      NCPU * sizeof (uintptr_t), sym.st_size);
2744 2744                  return (DCMD_ERR);
2745 2745          }
2746 2746  
2747 2747          if (mdb_vread(cpu, sym.st_size, (uintptr_t)sym.st_value) == -1) {
2748 2748                  mdb_warn("failed to read cpu array at %p", sym.st_value);
2749 2749                  return (DCMD_ERR);
2750 2750          }
2751 2751  
2752 2752          kmc = mdb_zalloc(sizeof (kmem_log_cpu_t) * NCPU, UM_SLEEP | UM_GC);
2753 2753          kmd.kmd_addr = 0;
2754 2754          kmd.kmd_cpu = kmc;
2755 2755  
2756 2756          for (i = 0; i < NCPU; i++) {
2757 2757  
2758 2758                  if (cpu[i] == 0)
2759 2759                          continue;
2760 2760  
2761 2761                  if (mdb_vread(&clh, sizeof (clh), clhp) == -1) {
2762 2762                          mdb_warn("cannot read cpu %d's log header at %p",
2763 2763                              i, clhp);
2764 2764                          return (DCMD_ERR);
2765 2765                  }
2766 2766  
2767 2767                  kmc[i].kmc_low = clh.clh_chunk * lh.lh_chunksize +
2768 2768                      (uintptr_t)lh.lh_base;
2769 2769                  kmc[i].kmc_high = (uintptr_t)clh.clh_current;
2770 2770  
2771 2771                  clhp += sizeof (kmem_cpu_log_header_t);
2772 2772          }
2773 2773  
2774 2774          mdb_printf("%3s %-?s %-?s %16s %-?s\n", "CPU", "ADDR", "BUFADDR",
2775 2775              "TIMESTAMP", "THREAD");
2776 2776  
2777 2777          /*
2778 2778           * If we have been passed an address, print out only log entries
2779 2779           * corresponding to that address.  If opt_b is specified, then interpret
2780 2780           * the address as a bufctl.
2781 2781           */
2782 2782          if (flags & DCMD_ADDRSPEC) {
2783 2783                  kmem_bufctl_audit_t b;
2784 2784  
2785 2785                  if (opt_b) {
2786 2786                          kmd.kmd_addr = addr;
2787 2787                  } else {
2788 2788                          if (mdb_vread(&b,
2789 2789                              sizeof (kmem_bufctl_audit_t), addr) == -1) {
2790 2790                                  mdb_warn("failed to read bufctl at %p", addr);
2791 2791                                  return (DCMD_ERR);
2792 2792                          }
2793 2793  
2794 2794                          (void) kmem_log_walk(addr, &b, &kmd);
2795 2795  
2796 2796                          return (DCMD_OK);
2797 2797                  }
2798 2798          }
2799 2799  
2800 2800          if (mdb_walk("kmem_log", (mdb_walk_cb_t)kmem_log_walk, &kmd) == -1) {
2801 2801                  mdb_warn("can't find kmem log walker");
2802 2802                  return (DCMD_ERR);
2803 2803          }
2804 2804  
2805 2805          return (DCMD_OK);
2806 2806  }
2807 2807  
2808 2808  typedef struct bufctl_history_cb {
2809 2809          int             bhc_flags;
2810 2810          int             bhc_argc;
2811 2811          const mdb_arg_t *bhc_argv;
2812 2812          int             bhc_ret;
2813 2813  } bufctl_history_cb_t;
2814 2814  
2815 2815  /*ARGSUSED*/
2816 2816  static int
2817 2817  bufctl_history_callback(uintptr_t addr, const void *ign, void *arg)
2818 2818  {
2819 2819          bufctl_history_cb_t *bhc = arg;
2820 2820  
2821 2821          bhc->bhc_ret =
2822 2822              bufctl(addr, bhc->bhc_flags, bhc->bhc_argc, bhc->bhc_argv);
2823 2823  
2824 2824          bhc->bhc_flags &= ~DCMD_LOOPFIRST;
2825 2825  
2826 2826          return ((bhc->bhc_ret == DCMD_OK)? WALK_NEXT : WALK_DONE);
2827 2827  }
2828 2828  
2829 2829  void
2830 2830  bufctl_help(void)
2831 2831  {
2832 2832          mdb_printf("%s",
2833 2833  "Display the contents of kmem_bufctl_audit_ts, with optional filtering.\n\n");
2834 2834          mdb_dec_indent(2);
2835 2835          mdb_printf("%<b>OPTIONS%</b>\n");
2836 2836          mdb_inc_indent(2);
2837 2837          mdb_printf("%s",
2838 2838  "  -v    Display the full content of the bufctl, including its stack trace\n"
2839 2839  "  -h    retrieve the bufctl's transaction history, if available\n"
2840 2840  "  -a addr\n"
2841 2841  "        filter out bufctls not involving the buffer at addr\n"
2842 2842  "  -c caller\n"
2843 2843  "        filter out bufctls without the function/PC in their stack trace\n"
2844 2844  "  -e earliest\n"
2845 2845  "        filter out bufctls timestamped before earliest\n"
2846 2846  "  -l latest\n"
2847 2847  "        filter out bufctls timestamped after latest\n"
2848 2848  "  -t thread\n"
2849 2849  "        filter out bufctls not involving thread\n");
2850 2850  }
2851 2851  
2852 2852  int
2853 2853  bufctl(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
2854 2854  {
2855 2855          kmem_bufctl_audit_t bc;
2856 2856          uint_t verbose = FALSE;
2857 2857          uint_t history = FALSE;
2858 2858          uint_t in_history = FALSE;
2859 2859          uintptr_t caller = 0, thread = 0;
2860 2860          uintptr_t laddr, haddr, baddr = 0;
2861 2861          hrtime_t earliest = 0, latest = 0;
2862 2862          int i, depth;
2863 2863          char c[MDB_SYM_NAMLEN];
2864 2864          GElf_Sym sym;
2865 2865  
2866 2866          if (mdb_getopts(argc, argv,
2867 2867              'v', MDB_OPT_SETBITS, TRUE, &verbose,
2868 2868              'h', MDB_OPT_SETBITS, TRUE, &history,
2869 2869              'H', MDB_OPT_SETBITS, TRUE, &in_history,            /* internal */
2870 2870              'c', MDB_OPT_UINTPTR, &caller,
2871 2871              't', MDB_OPT_UINTPTR, &thread,
2872 2872              'e', MDB_OPT_UINT64, &earliest,
2873 2873              'l', MDB_OPT_UINT64, &latest,
2874 2874              'a', MDB_OPT_UINTPTR, &baddr, NULL) != argc)
2875 2875                  return (DCMD_USAGE);
2876 2876  
2877 2877          if (!(flags & DCMD_ADDRSPEC))
2878 2878                  return (DCMD_USAGE);
2879 2879  
2880 2880          if (in_history && !history)
2881 2881                  return (DCMD_USAGE);
2882 2882  
2883 2883          if (history && !in_history) {
2884 2884                  mdb_arg_t *nargv = mdb_zalloc(sizeof (*nargv) * (argc + 1),
2885 2885                      UM_SLEEP | UM_GC);
2886 2886                  bufctl_history_cb_t bhc;
2887 2887  
2888 2888                  nargv[0].a_type = MDB_TYPE_STRING;
2889 2889                  nargv[0].a_un.a_str = "-H";             /* prevent recursion */
2890 2890  
2891 2891                  for (i = 0; i < argc; i++)
2892 2892                          nargv[i + 1] = argv[i];
2893 2893  
2894 2894                  /*
2895 2895                   * When in history mode, we treat each element as if it
2896 2896                   * were in a seperate loop, so that the headers group
2897 2897                   * bufctls with similar histories.
2898 2898                   */
2899 2899                  bhc.bhc_flags = flags | DCMD_LOOP | DCMD_LOOPFIRST;
2900 2900                  bhc.bhc_argc = argc + 1;
2901 2901                  bhc.bhc_argv = nargv;
2902 2902                  bhc.bhc_ret = DCMD_OK;
2903 2903  
2904 2904                  if (mdb_pwalk("bufctl_history", bufctl_history_callback, &bhc,
2905 2905                      addr) == -1) {
2906 2906                          mdb_warn("unable to walk bufctl_history");
2907 2907                          return (DCMD_ERR);
2908 2908                  }
2909 2909  
2910 2910                  if (bhc.bhc_ret == DCMD_OK && !(flags & DCMD_PIPE_OUT))
2911 2911                          mdb_printf("\n");
2912 2912  
2913 2913                  return (bhc.bhc_ret);
2914 2914          }
2915 2915  
2916 2916          if (DCMD_HDRSPEC(flags) && !(flags & DCMD_PIPE_OUT)) {
2917 2917                  if (verbose) {
2918 2918                          mdb_printf("%16s %16s %16s %16s\n"
2919 2919                              "%<u>%16s %16s %16s %16s%</u>\n",
2920 2920                              "ADDR", "BUFADDR", "TIMESTAMP", "THREAD",
2921 2921                              "", "CACHE", "LASTLOG", "CONTENTS");
2922 2922                  } else {
2923 2923                          mdb_printf("%<u>%-?s %-?s %-12s %-?s %s%</u>\n",
2924 2924                              "ADDR", "BUFADDR", "TIMESTAMP", "THREAD", "CALLER");
2925 2925                  }
2926 2926          }
2927 2927  
2928 2928          if (mdb_vread(&bc, sizeof (bc), addr) == -1) {
2929 2929                  mdb_warn("couldn't read bufctl at %p", addr);
2930 2930                  return (DCMD_ERR);
2931 2931          }
2932 2932  
2933 2933          /*
2934 2934           * Guard against bogus bc_depth in case the bufctl is corrupt or
2935 2935           * the address does not really refer to a bufctl.
2936 2936           */
2937 2937          depth = MIN(bc.bc_depth, KMEM_STACK_DEPTH);
2938 2938  
2939 2939          if (caller != 0) {
2940 2940                  laddr = caller;
2941 2941                  haddr = caller + sizeof (caller);
2942 2942  
2943 2943                  if (mdb_lookup_by_addr(caller, MDB_SYM_FUZZY, c, sizeof (c),
2944 2944                      &sym) != -1 && caller == (uintptr_t)sym.st_value) {
2945 2945                          /*
2946 2946                           * We were provided an exact symbol value; any
2947 2947                           * address in the function is valid.
2948 2948                           */
2949 2949                          laddr = (uintptr_t)sym.st_value;
2950 2950                          haddr = (uintptr_t)sym.st_value + sym.st_size;
2951 2951                  }
2952 2952  
2953 2953                  for (i = 0; i < depth; i++)
2954 2954                          if (bc.bc_stack[i] >= laddr && bc.bc_stack[i] < haddr)
2955 2955                                  break;
2956 2956  
2957 2957                  if (i == depth)
2958 2958                          return (DCMD_OK);
2959 2959          }
2960 2960  
2961 2961          if (thread != 0 && (uintptr_t)bc.bc_thread != thread)
2962 2962                  return (DCMD_OK);
2963 2963  
2964 2964          if (earliest != 0 && bc.bc_timestamp < earliest)
2965 2965                  return (DCMD_OK);
2966 2966  
2967 2967          if (latest != 0 && bc.bc_timestamp > latest)
2968 2968                  return (DCMD_OK);
2969 2969  
2970 2970          if (baddr != 0 && (uintptr_t)bc.bc_addr != baddr)
2971 2971                  return (DCMD_OK);
2972 2972  
2973 2973          if (flags & DCMD_PIPE_OUT) {
2974 2974                  mdb_printf("%#lr\n", addr);
2975 2975                  return (DCMD_OK);
2976 2976          }
2977 2977  
2978 2978          if (verbose) {
2979 2979                  mdb_printf(
2980 2980                      "%<b>%16p%</b> %16p %16llx %16p\n"
2981 2981                      "%16s %16p %16p %16p\n",
2982 2982                      addr, bc.bc_addr, bc.bc_timestamp, bc.bc_thread,
2983 2983                      "", bc.bc_cache, bc.bc_lastlog, bc.bc_contents);
2984 2984  
2985 2985                  mdb_inc_indent(17);
2986 2986                  for (i = 0; i < depth; i++)
2987 2987                          mdb_printf("%a\n", bc.bc_stack[i]);
2988 2988                  mdb_dec_indent(17);
2989 2989                  mdb_printf("\n");
2990 2990          } else {
2991 2991                  mdb_printf("%0?p %0?p %12llx %0?p", addr, bc.bc_addr,
2992 2992                      bc.bc_timestamp, bc.bc_thread);
2993 2993  
2994 2994                  for (i = 0; i < depth; i++) {
2995 2995                          if (mdb_lookup_by_addr(bc.bc_stack[i],
2996 2996                              MDB_SYM_FUZZY, c, sizeof (c), &sym) == -1)
2997 2997                                  continue;
2998 2998                          if (strncmp(c, "kmem_", 5) == 0)
2999 2999                                  continue;
3000 3000                          mdb_printf(" %a\n", bc.bc_stack[i]);
3001 3001                          break;
3002 3002                  }
3003 3003  
3004 3004                  if (i >= depth)
3005 3005                          mdb_printf("\n");
3006 3006          }
3007 3007  
3008 3008          return (DCMD_OK);
3009 3009  }
3010 3010  
3011 3011  typedef struct kmem_verify {
3012 3012          uint64_t *kmv_buf;              /* buffer to read cache contents into */
3013 3013          size_t kmv_size;                /* number of bytes in kmv_buf */
3014 3014          int kmv_corruption;             /* > 0 if corruption found. */
3015 3015          uint_t kmv_flags;               /* dcmd flags */
3016 3016          struct kmem_cache kmv_cache;    /* the cache we're operating on */
3017 3017  } kmem_verify_t;
3018 3018  
3019 3019  /*
3020 3020   * verify_pattern()
3021 3021   *      verify that buf is filled with the pattern pat.
3022 3022   */
3023 3023  static int64_t
3024 3024  verify_pattern(uint64_t *buf_arg, size_t size, uint64_t pat)
3025 3025  {
3026 3026          /*LINTED*/
3027 3027          uint64_t *bufend = (uint64_t *)((char *)buf_arg + size);
3028 3028          uint64_t *buf;
3029 3029  
3030 3030          for (buf = buf_arg; buf < bufend; buf++)
3031 3031                  if (*buf != pat)
3032 3032                          return ((uintptr_t)buf - (uintptr_t)buf_arg);
3033 3033          return (-1);
3034 3034  }
3035 3035  
3036 3036  /*
3037 3037   * verify_buftag()
3038 3038   *      verify that btp->bt_bxstat == (bcp ^ pat)
3039 3039   */
3040 3040  static int
3041 3041  verify_buftag(kmem_buftag_t *btp, uintptr_t pat)
3042 3042  {
3043 3043          return (btp->bt_bxstat == ((intptr_t)btp->bt_bufctl ^ pat) ? 0 : -1);
3044 3044  }
3045 3045  
3046 3046  /*
3047 3047   * verify_free()
3048 3048   *      verify the integrity of a free block of memory by checking
3049 3049   *      that it is filled with 0xdeadbeef and that its buftag is sane.
3050 3050   */
3051 3051  /*ARGSUSED1*/
3052 3052  static int
3053 3053  verify_free(uintptr_t addr, const void *data, void *private)
3054 3054  {
3055 3055          kmem_verify_t *kmv = (kmem_verify_t *)private;
3056 3056          uint64_t *buf = kmv->kmv_buf;   /* buf to validate */
3057 3057          int64_t corrupt;                /* corruption offset */
3058 3058          kmem_buftag_t *buftagp;         /* ptr to buftag */
3059 3059          kmem_cache_t *cp = &kmv->kmv_cache;
3060 3060          boolean_t besilent = !!(kmv->kmv_flags & (DCMD_LOOP | DCMD_PIPE_OUT));
3061 3061  
3062 3062          /*LINTED*/
3063 3063          buftagp = KMEM_BUFTAG(cp, buf);
3064 3064  
3065 3065          /*
3066 3066           * Read the buffer to check.
3067 3067           */
3068 3068          if (mdb_vread(buf, kmv->kmv_size, addr) == -1) {
3069 3069                  if (!besilent)
3070 3070                          mdb_warn("couldn't read %p", addr);
3071 3071                  return (WALK_NEXT);
3072 3072          }
3073 3073  
3074 3074          if ((corrupt = verify_pattern(buf, cp->cache_verify,
3075 3075              KMEM_FREE_PATTERN)) >= 0) {
3076 3076                  if (!besilent)
3077 3077                          mdb_printf("buffer %p (free) seems corrupted, at %p\n",
3078 3078                              addr, (uintptr_t)addr + corrupt);
3079 3079                  goto corrupt;
3080 3080          }
3081 3081          /*
3082 3082           * When KMF_LITE is set, buftagp->bt_redzone is used to hold
3083 3083           * the first bytes of the buffer, hence we cannot check for red
3084 3084           * zone corruption.
3085 3085           */
3086 3086          if ((cp->cache_flags & (KMF_HASH | KMF_LITE)) == KMF_HASH &&
3087 3087              buftagp->bt_redzone != KMEM_REDZONE_PATTERN) {
3088 3088                  if (!besilent)
3089 3089                          mdb_printf("buffer %p (free) seems to "
3090 3090                              "have a corrupt redzone pattern\n", addr);
3091 3091                  goto corrupt;
3092 3092          }
3093 3093  
3094 3094          /*
3095 3095           * confirm bufctl pointer integrity.
3096 3096           */
3097 3097          if (verify_buftag(buftagp, KMEM_BUFTAG_FREE) == -1) {
3098 3098                  if (!besilent)
3099 3099                          mdb_printf("buffer %p (free) has a corrupt "
3100 3100                              "buftag\n", addr);
3101 3101                  goto corrupt;
3102 3102          }
3103 3103  
3104 3104          return (WALK_NEXT);
3105 3105  corrupt:
3106 3106          if (kmv->kmv_flags & DCMD_PIPE_OUT)
3107 3107                  mdb_printf("%p\n", addr);
3108 3108          kmv->kmv_corruption++;
3109 3109          return (WALK_NEXT);
3110 3110  }
3111 3111  
3112 3112  /*
3113 3113   * verify_alloc()
3114 3114   *      Verify that the buftag of an allocated buffer makes sense with respect
3115 3115   *      to the buffer.
3116 3116   */
3117 3117  /*ARGSUSED1*/
3118 3118  static int
3119 3119  verify_alloc(uintptr_t addr, const void *data, void *private)
3120 3120  {
3121 3121          kmem_verify_t *kmv = (kmem_verify_t *)private;
3122 3122          kmem_cache_t *cp = &kmv->kmv_cache;
3123 3123          uint64_t *buf = kmv->kmv_buf;   /* buf to validate */
3124 3124          /*LINTED*/
3125 3125          kmem_buftag_t *buftagp = KMEM_BUFTAG(cp, buf);
3126 3126          uint32_t *ip = (uint32_t *)buftagp;
3127 3127          uint8_t *bp = (uint8_t *)buf;
3128 3128          int looks_ok = 0, size_ok = 1;  /* flags for finding corruption */
3129 3129          boolean_t besilent = !!(kmv->kmv_flags & (DCMD_LOOP | DCMD_PIPE_OUT));
3130 3130  
3131 3131          /*
3132 3132           * Read the buffer to check.
3133 3133           */
3134 3134          if (mdb_vread(buf, kmv->kmv_size, addr) == -1) {
3135 3135                  if (!besilent)
3136 3136                          mdb_warn("couldn't read %p", addr);
3137 3137                  return (WALK_NEXT);
3138 3138          }
3139 3139  
3140 3140          /*
3141 3141           * There are two cases to handle:
3142 3142           * 1. If the buf was alloc'd using kmem_cache_alloc, it will have
3143 3143           *    0xfeedfacefeedface at the end of it
3144 3144           * 2. If the buf was alloc'd using kmem_alloc, it will have
3145 3145           *    0xbb just past the end of the region in use.  At the buftag,
3146 3146           *    it will have 0xfeedface (or, if the whole buffer is in use,
3147 3147           *    0xfeedface & bb000000 or 0xfeedfacf & 000000bb depending on
3148 3148           *    endianness), followed by 32 bits containing the offset of the
3149 3149           *    0xbb byte in the buffer.
3150 3150           *
3151 3151           * Finally, the two 32-bit words that comprise the second half of the
3152 3152           * buftag should xor to KMEM_BUFTAG_ALLOC
3153 3153           */
3154 3154  
3155 3155          if (buftagp->bt_redzone == KMEM_REDZONE_PATTERN)
3156 3156                  looks_ok = 1;
3157 3157          else if (!KMEM_SIZE_VALID(ip[1]))
3158 3158                  size_ok = 0;
3159 3159          else if (bp[KMEM_SIZE_DECODE(ip[1])] == KMEM_REDZONE_BYTE)
3160 3160                  looks_ok = 1;
3161 3161          else
3162 3162                  size_ok = 0;
3163 3163  
3164 3164          if (!size_ok) {
3165 3165                  if (!besilent)
3166 3166                          mdb_printf("buffer %p (allocated) has a corrupt "
3167 3167                              "redzone size encoding\n", addr);
3168 3168                  goto corrupt;
3169 3169          }
3170 3170  
3171 3171          if (!looks_ok) {
3172 3172                  if (!besilent)
3173 3173                          mdb_printf("buffer %p (allocated) has a corrupt "
3174 3174                              "redzone signature\n", addr);
3175 3175                  goto corrupt;
3176 3176          }
3177 3177  
3178 3178          if (verify_buftag(buftagp, KMEM_BUFTAG_ALLOC) == -1) {
3179 3179                  if (!besilent)
3180 3180                          mdb_printf("buffer %p (allocated) has a "
3181 3181                              "corrupt buftag\n", addr);
3182 3182                  goto corrupt;
3183 3183          }
3184 3184  
3185 3185          return (WALK_NEXT);
3186 3186  corrupt:
3187 3187          if (kmv->kmv_flags & DCMD_PIPE_OUT)
3188 3188                  mdb_printf("%p\n", addr);
3189 3189  
3190 3190          kmv->kmv_corruption++;
3191 3191          return (WALK_NEXT);
3192 3192  }
3193 3193  
3194 3194  /*ARGSUSED2*/
3195 3195  int
3196 3196  kmem_verify(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
3197 3197  {
3198 3198          if (flags & DCMD_ADDRSPEC) {
3199 3199                  int check_alloc = 0, check_free = 0;
3200 3200                  kmem_verify_t kmv;
3201 3201  
3202 3202                  if (mdb_vread(&kmv.kmv_cache, sizeof (kmv.kmv_cache),
3203 3203                      addr) == -1) {
3204 3204                          mdb_warn("couldn't read kmem_cache %p", addr);
3205 3205                          return (DCMD_ERR);
3206 3206                  }
3207 3207  
3208 3208                  if ((kmv.kmv_cache.cache_dump.kd_unsafe ||
3209 3209                      kmv.kmv_cache.cache_dump.kd_alloc_fails) &&
3210 3210                      !(flags & (DCMD_LOOP | DCMD_PIPE_OUT))) {
3211 3211                          mdb_warn("WARNING: cache was used during dump: "
3212 3212                              "corruption may be incorrectly reported\n");
3213 3213                  }
3214 3214  
3215 3215                  kmv.kmv_size = kmv.kmv_cache.cache_buftag +
3216 3216                      sizeof (kmem_buftag_t);
3217 3217                  kmv.kmv_buf = mdb_alloc(kmv.kmv_size, UM_SLEEP | UM_GC);
3218 3218                  kmv.kmv_corruption = 0;
3219 3219                  kmv.kmv_flags = flags;
3220 3220  
3221 3221                  if ((kmv.kmv_cache.cache_flags & KMF_REDZONE)) {
3222 3222                          check_alloc = 1;
3223 3223                          if (kmv.kmv_cache.cache_flags & KMF_DEADBEEF)
3224 3224                                  check_free = 1;
3225 3225                  } else {
3226 3226                          if (!(flags & DCMD_LOOP)) {
3227 3227                                  mdb_warn("cache %p (%s) does not have "
3228 3228                                      "redzone checking enabled\n", addr,
3229 3229                                      kmv.kmv_cache.cache_name);
3230 3230                          }
3231 3231                          return (DCMD_ERR);
3232 3232                  }
3233 3233  
3234 3234                  if (!(flags & (DCMD_LOOP | DCMD_PIPE_OUT))) {
3235 3235                          mdb_printf("Summary for cache '%s'\n",
3236 3236                              kmv.kmv_cache.cache_name);
3237 3237                          mdb_inc_indent(2);
3238 3238                  }
3239 3239  
3240 3240                  if (check_alloc)
3241 3241                          (void) mdb_pwalk("kmem", verify_alloc, &kmv, addr);
3242 3242                  if (check_free)
3243 3243                          (void) mdb_pwalk("freemem", verify_free, &kmv, addr);
3244 3244  
3245 3245                  if (!(flags & DCMD_PIPE_OUT)) {
3246 3246                          if (flags & DCMD_LOOP) {
3247 3247                                  if (kmv.kmv_corruption == 0) {
3248 3248                                          mdb_printf("%-*s %?p clean\n",
3249 3249                                              KMEM_CACHE_NAMELEN,
3250 3250                                              kmv.kmv_cache.cache_name, addr);
3251 3251                                  } else {
3252 3252                                          mdb_printf("%-*s %?p %d corrupt "
3253 3253                                              "buffer%s\n", KMEM_CACHE_NAMELEN,
3254 3254                                              kmv.kmv_cache.cache_name, addr,
3255 3255                                              kmv.kmv_corruption,
3256 3256                                              kmv.kmv_corruption > 1 ? "s" : "");
3257 3257                                  }
3258 3258                          } else {
3259 3259                                  /*
3260 3260                                   * This is the more verbose mode, when the user
3261 3261                                   * typed addr::kmem_verify.  If the cache was
3262 3262                                   * clean, nothing will have yet been printed. So
3263 3263                                   * say something.
3264 3264                                   */
3265 3265                                  if (kmv.kmv_corruption == 0)
3266 3266                                          mdb_printf("clean\n");
3267 3267  
3268 3268                                  mdb_dec_indent(2);
3269 3269                          }
3270 3270                  }
3271 3271          } else {
3272 3272                  /*
3273 3273                   * If the user didn't specify a cache to verify, we'll walk all
3274 3274                   * kmem_cache's, specifying ourself as a callback for each...
3275 3275                   * this is the equivalent of '::walk kmem_cache .::kmem_verify'
3276 3276                   */
3277 3277  
3278 3278                  if (!(flags & DCMD_PIPE_OUT)) {
3279 3279                          uintptr_t dump_curr;
3280 3280                          uintptr_t dump_end;
3281 3281  
3282 3282                          if (mdb_readvar(&dump_curr, "kmem_dump_curr") != -1 &&
3283 3283                              mdb_readvar(&dump_end, "kmem_dump_end") != -1 &&
3284 3284                              dump_curr == dump_end) {
3285 3285                                  mdb_warn("WARNING: exceeded kmem_dump_size; "
3286 3286                                      "corruption may be incorrectly reported\n");
3287 3287                          }
3288 3288  
3289 3289                          mdb_printf("%<u>%-*s %-?s %-20s%</b>\n",
3290 3290                              KMEM_CACHE_NAMELEN, "Cache Name", "Addr",
3291 3291                              "Cache Integrity");
3292 3292                  }
3293 3293  
3294 3294                  (void) (mdb_walk_dcmd("kmem_cache", "kmem_verify", 0, NULL));
3295 3295          }
3296 3296  
3297 3297          return (DCMD_OK);
3298 3298  }
3299 3299  
3300 3300  typedef struct vmem_node {
3301 3301          struct vmem_node *vn_next;
3302 3302          struct vmem_node *vn_parent;
3303 3303          struct vmem_node *vn_sibling;
3304 3304          struct vmem_node *vn_children;
3305 3305          uintptr_t vn_addr;
3306 3306          int vn_marked;
3307 3307          vmem_t vn_vmem;
3308 3308  } vmem_node_t;
3309 3309  
3310 3310  typedef struct vmem_walk {
3311 3311          vmem_node_t *vw_root;
3312 3312          vmem_node_t *vw_current;
3313 3313  } vmem_walk_t;
3314 3314  
3315 3315  int
3316 3316  vmem_walk_init(mdb_walk_state_t *wsp)
3317 3317  {
3318 3318          uintptr_t vaddr, paddr;
3319 3319          vmem_node_t *head = NULL, *root = NULL, *current = NULL, *parent, *vp;
3320 3320          vmem_walk_t *vw;
3321 3321  
3322 3322          if (mdb_readvar(&vaddr, "vmem_list") == -1) {
3323 3323                  mdb_warn("couldn't read 'vmem_list'");
3324 3324                  return (WALK_ERR);
3325 3325          }
3326 3326  
3327 3327          while (vaddr != 0) {
3328 3328                  vp = mdb_zalloc(sizeof (vmem_node_t), UM_SLEEP);
3329 3329                  vp->vn_addr = vaddr;
3330 3330                  vp->vn_next = head;
3331 3331                  head = vp;
3332 3332  
3333 3333                  if (vaddr == wsp->walk_addr)
3334 3334                          current = vp;
3335 3335  
3336 3336                  if (mdb_vread(&vp->vn_vmem, sizeof (vmem_t), vaddr) == -1) {
3337 3337                          mdb_warn("couldn't read vmem_t at %p", vaddr);
3338 3338                          goto err;
3339 3339                  }
3340 3340  
3341 3341                  vaddr = (uintptr_t)vp->vn_vmem.vm_next;
3342 3342          }
3343 3343  
3344 3344          for (vp = head; vp != NULL; vp = vp->vn_next) {
3345 3345  
3346 3346                  if ((paddr = (uintptr_t)vp->vn_vmem.vm_source) == 0) {
3347 3347                          vp->vn_sibling = root;
3348 3348                          root = vp;
3349 3349                          continue;
3350 3350                  }
3351 3351  
3352 3352                  for (parent = head; parent != NULL; parent = parent->vn_next) {
3353 3353                          if (parent->vn_addr != paddr)
3354 3354                                  continue;
3355 3355                          vp->vn_sibling = parent->vn_children;
3356 3356                          parent->vn_children = vp;
3357 3357                          vp->vn_parent = parent;
3358 3358                          break;
3359 3359                  }
3360 3360  
3361 3361                  if (parent == NULL) {
3362 3362                          mdb_warn("couldn't find %p's parent (%p)\n",
3363 3363                              vp->vn_addr, paddr);
3364 3364                          goto err;
3365 3365                  }
3366 3366          }
3367 3367  
3368 3368          vw = mdb_zalloc(sizeof (vmem_walk_t), UM_SLEEP);
3369 3369          vw->vw_root = root;
3370 3370  
3371 3371          if (current != NULL)
3372 3372                  vw->vw_current = current;
3373 3373          else
3374 3374                  vw->vw_current = root;
3375 3375  
3376 3376          wsp->walk_data = vw;
3377 3377          return (WALK_NEXT);
3378 3378  err:
3379 3379          for (vp = head; head != NULL; vp = head) {
3380 3380                  head = vp->vn_next;
3381 3381                  mdb_free(vp, sizeof (vmem_node_t));
3382 3382          }
3383 3383  
3384 3384          return (WALK_ERR);
3385 3385  }
3386 3386  
3387 3387  int
3388 3388  vmem_walk_step(mdb_walk_state_t *wsp)
3389 3389  {
3390 3390          vmem_walk_t *vw = wsp->walk_data;
3391 3391          vmem_node_t *vp;
3392 3392          int rval;
3393 3393  
3394 3394          if ((vp = vw->vw_current) == NULL)
3395 3395                  return (WALK_DONE);
3396 3396  
3397 3397          rval = wsp->walk_callback(vp->vn_addr, &vp->vn_vmem, wsp->walk_cbdata);
3398 3398  
3399 3399          if (vp->vn_children != NULL) {
3400 3400                  vw->vw_current = vp->vn_children;
3401 3401                  return (rval);
3402 3402          }
3403 3403  
3404 3404          do {
3405 3405                  vw->vw_current = vp->vn_sibling;
3406 3406                  vp = vp->vn_parent;
3407 3407          } while (vw->vw_current == NULL && vp != NULL);
3408 3408  
3409 3409          return (rval);
3410 3410  }
3411 3411  
3412 3412  /*
3413 3413   * The "vmem_postfix" walk walks the vmem arenas in post-fix order; all
3414 3414   * children are visited before their parent.  We perform the postfix walk
3415 3415   * iteratively (rather than recursively) to allow mdb to regain control
3416 3416   * after each callback.
3417 3417   */
3418 3418  int
3419 3419  vmem_postfix_walk_step(mdb_walk_state_t *wsp)
3420 3420  {
3421 3421          vmem_walk_t *vw = wsp->walk_data;
3422 3422          vmem_node_t *vp = vw->vw_current;
3423 3423          int rval;
3424 3424  
3425 3425          /*
3426 3426           * If this node is marked, then we know that we have already visited
3427 3427           * all of its children.  If the node has any siblings, they need to
3428 3428           * be visited next; otherwise, we need to visit the parent.  Note
3429 3429           * that vp->vn_marked will only be zero on the first invocation of
3430 3430           * the step function.
3431 3431           */
3432 3432          if (vp->vn_marked) {
3433 3433                  if (vp->vn_sibling != NULL)
3434 3434                          vp = vp->vn_sibling;
3435 3435                  else if (vp->vn_parent != NULL)
3436 3436                          vp = vp->vn_parent;
3437 3437                  else {
3438 3438                          /*
3439 3439                           * We have neither a parent, nor a sibling, and we
3440 3440                           * have already been visited; we're done.
3441 3441                           */
3442 3442                          return (WALK_DONE);
3443 3443                  }
3444 3444          }
3445 3445  
3446 3446          /*
3447 3447           * Before we visit this node, visit its children.
3448 3448           */
3449 3449          while (vp->vn_children != NULL && !vp->vn_children->vn_marked)
3450 3450                  vp = vp->vn_children;
3451 3451  
3452 3452          vp->vn_marked = 1;
3453 3453          vw->vw_current = vp;
3454 3454          rval = wsp->walk_callback(vp->vn_addr, &vp->vn_vmem, wsp->walk_cbdata);
3455 3455  
3456 3456          return (rval);
3457 3457  }
3458 3458  
3459 3459  void
3460 3460  vmem_walk_fini(mdb_walk_state_t *wsp)
3461 3461  {
3462 3462          vmem_walk_t *vw = wsp->walk_data;
3463 3463          vmem_node_t *root = vw->vw_root;
3464 3464          int done;
3465 3465  
3466 3466          if (root == NULL)
3467 3467                  return;
3468 3468  
3469 3469          if ((vw->vw_root = root->vn_children) != NULL)
3470 3470                  vmem_walk_fini(wsp);
3471 3471  
3472 3472          vw->vw_root = root->vn_sibling;
3473 3473          done = (root->vn_sibling == NULL && root->vn_parent == NULL);
3474 3474          mdb_free(root, sizeof (vmem_node_t));
3475 3475  
3476 3476          if (done) {
3477 3477                  mdb_free(vw, sizeof (vmem_walk_t));
3478 3478          } else {
3479 3479                  vmem_walk_fini(wsp);
3480 3480          }
3481 3481  }
3482 3482  
3483 3483  typedef struct vmem_seg_walk {
3484 3484          uint8_t vsw_type;
3485 3485          uintptr_t vsw_start;
3486 3486          uintptr_t vsw_current;
3487 3487  } vmem_seg_walk_t;
3488 3488  
3489 3489  /*ARGSUSED*/
3490 3490  int
3491 3491  vmem_seg_walk_common_init(mdb_walk_state_t *wsp, uint8_t type, char *name)
3492 3492  {
3493 3493          vmem_seg_walk_t *vsw;
3494 3494  
3495 3495          if (wsp->walk_addr == 0) {
3496 3496                  mdb_warn("vmem_%s does not support global walks\n", name);
3497 3497                  return (WALK_ERR);
3498 3498          }
3499 3499  
3500 3500          wsp->walk_data = vsw = mdb_alloc(sizeof (vmem_seg_walk_t), UM_SLEEP);
3501 3501  
3502 3502          vsw->vsw_type = type;
3503 3503          vsw->vsw_start = wsp->walk_addr + offsetof(vmem_t, vm_seg0);
3504 3504          vsw->vsw_current = vsw->vsw_start;
3505 3505  
3506 3506          return (WALK_NEXT);
3507 3507  }
3508 3508  
3509 3509  /*
3510 3510   * vmem segments can't have type 0 (this should be added to vmem_impl.h).
3511 3511   */
3512 3512  #define VMEM_NONE       0
3513 3513  
3514 3514  int
3515 3515  vmem_alloc_walk_init(mdb_walk_state_t *wsp)
3516 3516  {
3517 3517          return (vmem_seg_walk_common_init(wsp, VMEM_ALLOC, "alloc"));
3518 3518  }
3519 3519  
3520 3520  int
3521 3521  vmem_free_walk_init(mdb_walk_state_t *wsp)
3522 3522  {
3523 3523          return (vmem_seg_walk_common_init(wsp, VMEM_FREE, "free"));
3524 3524  }
3525 3525  
3526 3526  int
3527 3527  vmem_span_walk_init(mdb_walk_state_t *wsp)
3528 3528  {
3529 3529          return (vmem_seg_walk_common_init(wsp, VMEM_SPAN, "span"));
3530 3530  }
3531 3531  
3532 3532  int
3533 3533  vmem_seg_walk_init(mdb_walk_state_t *wsp)
3534 3534  {
3535 3535          return (vmem_seg_walk_common_init(wsp, VMEM_NONE, "seg"));
3536 3536  }
3537 3537  
3538 3538  int
3539 3539  vmem_seg_walk_step(mdb_walk_state_t *wsp)
3540 3540  {
3541 3541          vmem_seg_t seg;
3542 3542          vmem_seg_walk_t *vsw = wsp->walk_data;
3543 3543          uintptr_t addr = vsw->vsw_current;
3544 3544          static size_t seg_size = 0;
3545 3545          int rval;
3546 3546  
3547 3547          if (!seg_size) {
3548 3548                  if (mdb_readvar(&seg_size, "vmem_seg_size") == -1) {
3549 3549                          mdb_warn("failed to read 'vmem_seg_size'");
3550 3550                          seg_size = sizeof (vmem_seg_t);
3551 3551                  }
3552 3552          }
3553 3553  
3554 3554          if (seg_size < sizeof (seg))
3555 3555                  bzero((caddr_t)&seg + seg_size, sizeof (seg) - seg_size);
3556 3556  
3557 3557          if (mdb_vread(&seg, seg_size, addr) == -1) {
3558 3558                  mdb_warn("couldn't read vmem_seg at %p", addr);
3559 3559                  return (WALK_ERR);
3560 3560          }
3561 3561  
3562 3562          vsw->vsw_current = (uintptr_t)seg.vs_anext;
3563 3563          if (vsw->vsw_type != VMEM_NONE && seg.vs_type != vsw->vsw_type) {
3564 3564                  rval = WALK_NEXT;
3565 3565          } else {
3566 3566                  rval = wsp->walk_callback(addr, &seg, wsp->walk_cbdata);
3567 3567          }
3568 3568  
3569 3569          if (vsw->vsw_current == vsw->vsw_start)
3570 3570                  return (WALK_DONE);
3571 3571  
3572 3572          return (rval);
3573 3573  }
3574 3574  
3575 3575  void
3576 3576  vmem_seg_walk_fini(mdb_walk_state_t *wsp)
3577 3577  {
3578 3578          vmem_seg_walk_t *vsw = wsp->walk_data;
3579 3579  
3580 3580          mdb_free(vsw, sizeof (vmem_seg_walk_t));
3581 3581  }
3582 3582  
3583 3583  #define VMEM_NAMEWIDTH  22
3584 3584  
3585 3585  int
3586 3586  vmem(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
3587 3587  {
3588 3588          vmem_t v, parent;
3589 3589          vmem_kstat_t *vkp = &v.vm_kstat;
3590 3590          uintptr_t paddr;
3591 3591          int ident = 0;
3592 3592          char c[VMEM_NAMEWIDTH];
3593 3593  
3594 3594          if (!(flags & DCMD_ADDRSPEC)) {
3595 3595                  if (mdb_walk_dcmd("vmem", "vmem", argc, argv) == -1) {
3596 3596                          mdb_warn("can't walk vmem");
3597 3597                          return (DCMD_ERR);
3598 3598                  }
3599 3599                  return (DCMD_OK);
3600 3600          }
3601 3601  
3602 3602          if (DCMD_HDRSPEC(flags))
3603 3603                  mdb_printf("%-?s %-*s %10s %12s %9s %5s\n",
3604 3604                      "ADDR", VMEM_NAMEWIDTH, "NAME", "INUSE",
3605 3605                      "TOTAL", "SUCCEED", "FAIL");
3606 3606  
3607 3607          if (mdb_vread(&v, sizeof (v), addr) == -1) {
3608 3608                  mdb_warn("couldn't read vmem at %p", addr);
3609 3609                  return (DCMD_ERR);
3610 3610          }
3611 3611  
3612 3612          for (paddr = (uintptr_t)v.vm_source; paddr != 0; ident += 2) {
3613 3613                  if (mdb_vread(&parent, sizeof (parent), paddr) == -1) {
3614 3614                          mdb_warn("couldn't trace %p's ancestry", addr);
3615 3615                          ident = 0;
3616 3616                          break;
3617 3617                  }
3618 3618                  paddr = (uintptr_t)parent.vm_source;
3619 3619          }
3620 3620  
3621 3621          (void) mdb_snprintf(c, VMEM_NAMEWIDTH, "%*s%s", ident, "", v.vm_name);
3622 3622  
3623 3623          mdb_printf("%0?p %-*s %10llu %12llu %9llu %5llu\n",
3624 3624              addr, VMEM_NAMEWIDTH, c,
3625 3625              vkp->vk_mem_inuse.value.ui64, vkp->vk_mem_total.value.ui64,
3626 3626              vkp->vk_alloc.value.ui64, vkp->vk_fail.value.ui64);
3627 3627  
3628 3628          return (DCMD_OK);
3629 3629  }
3630 3630  
3631 3631  void
3632 3632  vmem_seg_help(void)
3633 3633  {
3634 3634          mdb_printf("%s",
3635 3635  "Display the contents of vmem_seg_ts, with optional filtering.\n\n"
3636 3636  "\n"
3637 3637  "A vmem_seg_t represents a range of addresses (or arbitrary numbers),\n"
3638 3638  "representing a single chunk of data.  Only ALLOC segments have debugging\n"
3639 3639  "information.\n");
3640 3640          mdb_dec_indent(2);
3641 3641          mdb_printf("%<b>OPTIONS%</b>\n");
3642 3642          mdb_inc_indent(2);
3643 3643          mdb_printf("%s",
3644 3644  "  -v    Display the full content of the vmem_seg, including its stack trace\n"
3645 3645  "  -s    report the size of the segment, instead of the end address\n"
3646 3646  "  -c caller\n"
3647 3647  "        filter out segments without the function/PC in their stack trace\n"
3648 3648  "  -e earliest\n"
3649 3649  "        filter out segments timestamped before earliest\n"
3650 3650  "  -l latest\n"
3651 3651  "        filter out segments timestamped after latest\n"
3652 3652  "  -m minsize\n"
3653 3653  "        filer out segments smaller than minsize\n"
3654 3654  "  -M maxsize\n"
3655 3655  "        filer out segments larger than maxsize\n"
3656 3656  "  -t thread\n"
3657 3657  "        filter out segments not involving thread\n"
3658 3658  "  -T type\n"
3659 3659  "        filter out segments not of type 'type'\n"
3660 3660  "        type is one of: ALLOC/FREE/SPAN/ROTOR/WALKER\n");
3661 3661  }
3662 3662  
3663 3663  /*ARGSUSED*/
3664 3664  int
3665 3665  vmem_seg(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
3666 3666  {
3667 3667          vmem_seg_t vs;
3668 3668          pc_t *stk = vs.vs_stack;
3669 3669          uintptr_t sz;
3670 3670          uint8_t t;
3671 3671          const char *type = NULL;
3672 3672          GElf_Sym sym;
3673 3673          char c[MDB_SYM_NAMLEN];
3674 3674          int no_debug;
3675 3675          int i;
3676 3676          int depth;
3677 3677          uintptr_t laddr, haddr;
3678 3678  
3679 3679          uintptr_t caller = 0, thread = 0;
3680 3680          uintptr_t minsize = 0, maxsize = 0;
3681 3681  
3682 3682          hrtime_t earliest = 0, latest = 0;
3683 3683  
3684 3684          uint_t size = 0;
3685 3685          uint_t verbose = 0;
3686 3686  
3687 3687          if (!(flags & DCMD_ADDRSPEC))
3688 3688                  return (DCMD_USAGE);
3689 3689  
3690 3690          if (mdb_getopts(argc, argv,
3691 3691              'c', MDB_OPT_UINTPTR, &caller,
3692 3692              'e', MDB_OPT_UINT64, &earliest,
3693 3693              'l', MDB_OPT_UINT64, &latest,
3694 3694              's', MDB_OPT_SETBITS, TRUE, &size,
3695 3695              'm', MDB_OPT_UINTPTR, &minsize,
3696 3696              'M', MDB_OPT_UINTPTR, &maxsize,
3697 3697              't', MDB_OPT_UINTPTR, &thread,
3698 3698              'T', MDB_OPT_STR, &type,
3699 3699              'v', MDB_OPT_SETBITS, TRUE, &verbose,
3700 3700              NULL) != argc)
3701 3701                  return (DCMD_USAGE);
3702 3702  
3703 3703          if (DCMD_HDRSPEC(flags) && !(flags & DCMD_PIPE_OUT)) {
3704 3704                  if (verbose) {
3705 3705                          mdb_printf("%16s %4s %16s %16s %16s\n"
3706 3706                              "%<u>%16s %4s %16s %16s %16s%</u>\n",
3707 3707                              "ADDR", "TYPE", "START", "END", "SIZE",
3708 3708                              "", "", "THREAD", "TIMESTAMP", "");
3709 3709                  } else {
3710 3710                          mdb_printf("%?s %4s %?s %?s %s\n", "ADDR", "TYPE",
3711 3711                              "START", size? "SIZE" : "END", "WHO");
3712 3712                  }
3713 3713          }
3714 3714  
3715 3715          if (mdb_vread(&vs, sizeof (vs), addr) == -1) {
3716 3716                  mdb_warn("couldn't read vmem_seg at %p", addr);
3717 3717                  return (DCMD_ERR);
3718 3718          }
3719 3719  
3720 3720          if (type != NULL) {
3721 3721                  if (strcmp(type, "ALLC") == 0 || strcmp(type, "ALLOC") == 0)
3722 3722                          t = VMEM_ALLOC;
3723 3723                  else if (strcmp(type, "FREE") == 0)
3724 3724                          t = VMEM_FREE;
3725 3725                  else if (strcmp(type, "SPAN") == 0)
3726 3726                          t = VMEM_SPAN;
3727 3727                  else if (strcmp(type, "ROTR") == 0 ||
3728 3728                      strcmp(type, "ROTOR") == 0)
3729 3729                          t = VMEM_ROTOR;
3730 3730                  else if (strcmp(type, "WLKR") == 0 ||
3731 3731                      strcmp(type, "WALKER") == 0)
3732 3732                          t = VMEM_WALKER;
3733 3733                  else {
3734 3734                          mdb_warn("\"%s\" is not a recognized vmem_seg type\n",
3735 3735                              type);
3736 3736                          return (DCMD_ERR);
3737 3737                  }
3738 3738  
3739 3739                  if (vs.vs_type != t)
3740 3740                          return (DCMD_OK);
3741 3741          }
3742 3742  
3743 3743          sz = vs.vs_end - vs.vs_start;
3744 3744  
3745 3745          if (minsize != 0 && sz < minsize)
3746 3746                  return (DCMD_OK);
3747 3747  
3748 3748          if (maxsize != 0 && sz > maxsize)
3749 3749                  return (DCMD_OK);
3750 3750  
3751 3751          t = vs.vs_type;
3752 3752          depth = vs.vs_depth;
3753 3753  
3754 3754          /*
3755 3755           * debug info, when present, is only accurate for VMEM_ALLOC segments
3756 3756           */
3757 3757          no_debug = (t != VMEM_ALLOC) ||
3758 3758              (depth == 0 || depth > VMEM_STACK_DEPTH);
3759 3759  
3760 3760          if (no_debug) {
3761 3761                  if (caller != 0 || thread != 0 || earliest != 0 || latest != 0)
3762 3762                          return (DCMD_OK);               /* not enough info */
3763 3763          } else {
3764 3764                  if (caller != 0) {
3765 3765                          laddr = caller;
3766 3766                          haddr = caller + sizeof (caller);
3767 3767  
3768 3768                          if (mdb_lookup_by_addr(caller, MDB_SYM_FUZZY, c,
3769 3769                              sizeof (c), &sym) != -1 &&
3770 3770                              caller == (uintptr_t)sym.st_value) {
3771 3771                                  /*
3772 3772                                   * We were provided an exact symbol value; any
3773 3773                                   * address in the function is valid.
3774 3774                                   */
3775 3775                                  laddr = (uintptr_t)sym.st_value;
3776 3776                                  haddr = (uintptr_t)sym.st_value + sym.st_size;
3777 3777                          }
3778 3778  
3779 3779                          for (i = 0; i < depth; i++)
3780 3780                                  if (vs.vs_stack[i] >= laddr &&
3781 3781                                      vs.vs_stack[i] < haddr)
3782 3782                                          break;
3783 3783  
3784 3784                          if (i == depth)
3785 3785                                  return (DCMD_OK);
3786 3786                  }
3787 3787  
3788 3788                  if (thread != 0 && (uintptr_t)vs.vs_thread != thread)
3789 3789                          return (DCMD_OK);
3790 3790  
3791 3791                  if (earliest != 0 && vs.vs_timestamp < earliest)
3792 3792                          return (DCMD_OK);
3793 3793  
3794 3794                  if (latest != 0 && vs.vs_timestamp > latest)
3795 3795                          return (DCMD_OK);
3796 3796          }
3797 3797  
3798 3798          type = (t == VMEM_ALLOC ? "ALLC" :
3799 3799              t == VMEM_FREE ? "FREE" :
3800 3800              t == VMEM_SPAN ? "SPAN" :
3801 3801              t == VMEM_ROTOR ? "ROTR" :
3802 3802              t == VMEM_WALKER ? "WLKR" :
3803 3803              "????");
3804 3804  
3805 3805          if (flags & DCMD_PIPE_OUT) {
3806 3806                  mdb_printf("%#lr\n", addr);
3807 3807                  return (DCMD_OK);
3808 3808          }
3809 3809  
3810 3810          if (verbose) {
3811 3811                  mdb_printf("%<b>%16p%</b> %4s %16p %16p %16d\n",
3812 3812                      addr, type, vs.vs_start, vs.vs_end, sz);
3813 3813  
3814 3814                  if (no_debug)
3815 3815                          return (DCMD_OK);
3816 3816  
3817 3817                  mdb_printf("%16s %4s %16p %16llx\n",
3818 3818                      "", "", vs.vs_thread, vs.vs_timestamp);
3819 3819  
3820 3820                  mdb_inc_indent(17);
3821 3821                  for (i = 0; i < depth; i++) {
3822 3822                          mdb_printf("%a\n", stk[i]);
3823 3823                  }
3824 3824                  mdb_dec_indent(17);
3825 3825                  mdb_printf("\n");
3826 3826          } else {
3827 3827                  mdb_printf("%0?p %4s %0?p %0?p", addr, type,
3828 3828                      vs.vs_start, size? sz : vs.vs_end);
3829 3829  
3830 3830                  if (no_debug) {
3831 3831                          mdb_printf("\n");
3832 3832                          return (DCMD_OK);
3833 3833                  }
3834 3834  
3835 3835                  for (i = 0; i < depth; i++) {
3836 3836                          if (mdb_lookup_by_addr(stk[i], MDB_SYM_FUZZY,
3837 3837                              c, sizeof (c), &sym) == -1)
3838 3838                                  continue;
3839 3839                          if (strncmp(c, "vmem_", 5) == 0)
3840 3840                                  continue;
3841 3841                          break;
3842 3842                  }
3843 3843                  mdb_printf(" %a\n", stk[i]);
3844 3844          }
3845 3845          return (DCMD_OK);
3846 3846  }
3847 3847  
3848 3848  typedef struct kmalog_data {
3849 3849          uintptr_t       kma_addr;
3850 3850          hrtime_t        kma_newest;
3851 3851  } kmalog_data_t;
3852 3852  
3853 3853  /*ARGSUSED*/
3854 3854  static int
3855 3855  showbc(uintptr_t addr, const kmem_bufctl_audit_t *bcp, kmalog_data_t *kma)
3856 3856  {
3857 3857          char name[KMEM_CACHE_NAMELEN + 1];
3858 3858          hrtime_t delta;
3859 3859          int i, depth;
3860 3860          size_t bufsize;
3861 3861  
3862 3862          if (bcp->bc_timestamp == 0)
3863 3863                  return (WALK_DONE);
3864 3864  
3865 3865          if (kma->kma_newest == 0)
3866 3866                  kma->kma_newest = bcp->bc_timestamp;
3867 3867  
3868 3868          if (kma->kma_addr) {
3869 3869                  if (mdb_vread(&bufsize, sizeof (bufsize),
3870 3870                      (uintptr_t)&bcp->bc_cache->cache_bufsize) == -1) {
3871 3871                          mdb_warn(
3872 3872                              "failed to read cache_bufsize for cache at %p",
3873 3873                              bcp->bc_cache);
3874 3874                          return (WALK_ERR);
3875 3875                  }
3876 3876  
3877 3877                  if (kma->kma_addr < (uintptr_t)bcp->bc_addr ||
3878 3878                      kma->kma_addr >= (uintptr_t)bcp->bc_addr + bufsize)
3879 3879                          return (WALK_NEXT);
3880 3880          }
3881 3881  
3882 3882          delta = kma->kma_newest - bcp->bc_timestamp;
3883 3883          depth = MIN(bcp->bc_depth, KMEM_STACK_DEPTH);
3884 3884  
3885 3885          if (mdb_readstr(name, sizeof (name), (uintptr_t)
3886 3886              &bcp->bc_cache->cache_name) <= 0)
3887 3887                  (void) mdb_snprintf(name, sizeof (name), "%a", bcp->bc_cache);
3888 3888  
3889 3889          mdb_printf("\nT-%lld.%09lld  addr=%p  %s\n",
3890 3890              delta / NANOSEC, delta % NANOSEC, bcp->bc_addr, name);
3891 3891  
3892 3892          for (i = 0; i < depth; i++)
3893 3893                  mdb_printf("\t %a\n", bcp->bc_stack[i]);
3894 3894  
3895 3895          return (WALK_NEXT);
3896 3896  }
3897 3897  
3898 3898  int
3899 3899  kmalog(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
3900 3900  {
3901 3901          const char *logname = "kmem_transaction_log";
3902 3902          kmalog_data_t kma;
3903 3903  
3904 3904          if (argc > 1)
3905 3905                  return (DCMD_USAGE);
3906 3906  
3907 3907          kma.kma_newest = 0;
3908 3908          if (flags & DCMD_ADDRSPEC)
3909 3909                  kma.kma_addr = addr;
  
    | 
      ↓ open down ↓ | 
    3909 lines elided | 
    
      ↑ open up ↑ | 
  
3910 3910          else
3911 3911                  kma.kma_addr = 0;
3912 3912  
3913 3913          if (argc > 0) {
3914 3914                  if (argv->a_type != MDB_TYPE_STRING)
3915 3915                          return (DCMD_USAGE);
3916 3916                  if (strcmp(argv->a_un.a_str, "fail") == 0)
3917 3917                          logname = "kmem_failure_log";
3918 3918                  else if (strcmp(argv->a_un.a_str, "slab") == 0)
3919 3919                          logname = "kmem_slab_log";
     3920 +                else if (strcmp(argv->a_un.a_str, "zerosized") == 0)
     3921 +                        logname = "kmem_zerosized_log";
3920 3922                  else
3921 3923                          return (DCMD_USAGE);
3922 3924          }
3923 3925  
3924 3926          if (mdb_readvar(&addr, logname) == -1) {
3925 3927                  mdb_warn("failed to read %s log header pointer");
3926 3928                  return (DCMD_ERR);
3927 3929          }
3928 3930  
3929 3931          if (mdb_pwalk("kmem_log", (mdb_walk_cb_t)showbc, &kma, addr) == -1) {
3930 3932                  mdb_warn("failed to walk kmem log");
3931 3933                  return (DCMD_ERR);
3932 3934          }
3933 3935  
3934 3936          return (DCMD_OK);
3935 3937  }
3936 3938  
3937 3939  /*
3938 3940   * As the final lure for die-hard crash(1M) users, we provide ::kmausers here.
3939 3941   * The first piece is a structure which we use to accumulate kmem_cache_t
3940 3942   * addresses of interest.  The kmc_add is used as a callback for the kmem_cache
3941 3943   * walker; we either add all caches, or ones named explicitly as arguments.
3942 3944   */
3943 3945  
3944 3946  typedef struct kmclist {
3945 3947          const char *kmc_name;                   /* Name to match (or NULL) */
3946 3948          uintptr_t *kmc_caches;                  /* List of kmem_cache_t addrs */
3947 3949          int kmc_nelems;                         /* Num entries in kmc_caches */
3948 3950          int kmc_size;                           /* Size of kmc_caches array */
3949 3951  } kmclist_t;
3950 3952  
3951 3953  static int
3952 3954  kmc_add(uintptr_t addr, const kmem_cache_t *cp, kmclist_t *kmc)
3953 3955  {
3954 3956          void *p;
3955 3957          int s;
3956 3958  
3957 3959          if (kmc->kmc_name == NULL ||
3958 3960              strcmp(cp->cache_name, kmc->kmc_name) == 0) {
3959 3961                  /*
3960 3962                   * If we have a match, grow our array (if necessary), and then
3961 3963                   * add the virtual address of the matching cache to our list.
3962 3964                   */
3963 3965                  if (kmc->kmc_nelems >= kmc->kmc_size) {
3964 3966                          s = kmc->kmc_size ? kmc->kmc_size * 2 : 256;
3965 3967                          p = mdb_alloc(sizeof (uintptr_t) * s, UM_SLEEP | UM_GC);
3966 3968  
3967 3969                          bcopy(kmc->kmc_caches, p,
3968 3970                              sizeof (uintptr_t) * kmc->kmc_size);
3969 3971  
3970 3972                          kmc->kmc_caches = p;
3971 3973                          kmc->kmc_size = s;
3972 3974                  }
3973 3975  
3974 3976                  kmc->kmc_caches[kmc->kmc_nelems++] = addr;
3975 3977                  return (kmc->kmc_name ? WALK_DONE : WALK_NEXT);
3976 3978          }
3977 3979  
3978 3980          return (WALK_NEXT);
3979 3981  }
3980 3982  
3981 3983  /*
3982 3984   * The second piece of ::kmausers is a hash table of allocations.  Each
3983 3985   * allocation owner is identified by its stack trace and data_size.  We then
3984 3986   * track the total bytes of all such allocations, and the number of allocations
3985 3987   * to report at the end.  Once we have a list of caches, we walk through the
3986 3988   * allocated bufctls of each, and update our hash table accordingly.
3987 3989   */
3988 3990  
3989 3991  typedef struct kmowner {
3990 3992          struct kmowner *kmo_head;               /* First hash elt in bucket */
3991 3993          struct kmowner *kmo_next;               /* Next hash elt in chain */
3992 3994          size_t kmo_signature;                   /* Hash table signature */
3993 3995          uint_t kmo_num;                         /* Number of allocations */
3994 3996          size_t kmo_data_size;                   /* Size of each allocation */
3995 3997          size_t kmo_total_size;                  /* Total bytes of allocation */
3996 3998          int kmo_depth;                          /* Depth of stack trace */
3997 3999          uintptr_t kmo_stack[KMEM_STACK_DEPTH];  /* Stack trace */
3998 4000  } kmowner_t;
3999 4001  
4000 4002  typedef struct kmusers {
4001 4003          uintptr_t kmu_addr;                     /* address of interest */
4002 4004          const kmem_cache_t *kmu_cache;          /* Current kmem cache */
4003 4005          kmowner_t *kmu_hash;                    /* Hash table of owners */
4004 4006          int kmu_nelems;                         /* Number of entries in use */
4005 4007          int kmu_size;                           /* Total number of entries */
4006 4008  } kmusers_t;
4007 4009  
4008 4010  static void
4009 4011  kmu_add(kmusers_t *kmu, const kmem_bufctl_audit_t *bcp,
4010 4012      size_t size, size_t data_size)
4011 4013  {
4012 4014          int i, depth = MIN(bcp->bc_depth, KMEM_STACK_DEPTH);
4013 4015          size_t bucket, signature = data_size;
4014 4016          kmowner_t *kmo, *kmoend;
4015 4017  
4016 4018          /*
4017 4019           * If the hash table is full, double its size and rehash everything.
4018 4020           */
4019 4021          if (kmu->kmu_nelems >= kmu->kmu_size) {
4020 4022                  int s = kmu->kmu_size ? kmu->kmu_size * 2 : 1024;
4021 4023  
4022 4024                  kmo = mdb_alloc(sizeof (kmowner_t) * s, UM_SLEEP | UM_GC);
4023 4025                  bcopy(kmu->kmu_hash, kmo, sizeof (kmowner_t) * kmu->kmu_size);
4024 4026                  kmu->kmu_hash = kmo;
4025 4027                  kmu->kmu_size = s;
4026 4028  
4027 4029                  kmoend = kmu->kmu_hash + kmu->kmu_size;
4028 4030                  for (kmo = kmu->kmu_hash; kmo < kmoend; kmo++)
4029 4031                          kmo->kmo_head = NULL;
4030 4032  
4031 4033                  kmoend = kmu->kmu_hash + kmu->kmu_nelems;
4032 4034                  for (kmo = kmu->kmu_hash; kmo < kmoend; kmo++) {
4033 4035                          bucket = kmo->kmo_signature & (kmu->kmu_size - 1);
4034 4036                          kmo->kmo_next = kmu->kmu_hash[bucket].kmo_head;
4035 4037                          kmu->kmu_hash[bucket].kmo_head = kmo;
4036 4038                  }
4037 4039          }
4038 4040  
4039 4041          /*
4040 4042           * Finish computing the hash signature from the stack trace, and then
4041 4043           * see if the owner is in the hash table.  If so, update our stats.
4042 4044           */
4043 4045          for (i = 0; i < depth; i++)
4044 4046                  signature += bcp->bc_stack[i];
4045 4047  
4046 4048          bucket = signature & (kmu->kmu_size - 1);
4047 4049  
4048 4050          for (kmo = kmu->kmu_hash[bucket].kmo_head; kmo; kmo = kmo->kmo_next) {
4049 4051                  if (kmo->kmo_signature == signature) {
4050 4052                          size_t difference = 0;
4051 4053  
4052 4054                          difference |= kmo->kmo_data_size - data_size;
4053 4055                          difference |= kmo->kmo_depth - depth;
4054 4056  
4055 4057                          for (i = 0; i < depth; i++) {
4056 4058                                  difference |= kmo->kmo_stack[i] -
4057 4059                                      bcp->bc_stack[i];
4058 4060                          }
4059 4061  
4060 4062                          if (difference == 0) {
4061 4063                                  kmo->kmo_total_size += size;
4062 4064                                  kmo->kmo_num++;
4063 4065                                  return;
4064 4066                          }
4065 4067                  }
4066 4068          }
4067 4069  
4068 4070          /*
4069 4071           * If the owner is not yet hashed, grab the next element and fill it
4070 4072           * in based on the allocation information.
4071 4073           */
4072 4074          kmo = &kmu->kmu_hash[kmu->kmu_nelems++];
4073 4075          kmo->kmo_next = kmu->kmu_hash[bucket].kmo_head;
4074 4076          kmu->kmu_hash[bucket].kmo_head = kmo;
4075 4077  
4076 4078          kmo->kmo_signature = signature;
4077 4079          kmo->kmo_num = 1;
4078 4080          kmo->kmo_data_size = data_size;
4079 4081          kmo->kmo_total_size = size;
4080 4082          kmo->kmo_depth = depth;
4081 4083  
4082 4084          for (i = 0; i < depth; i++)
4083 4085                  kmo->kmo_stack[i] = bcp->bc_stack[i];
4084 4086  }
4085 4087  
4086 4088  /*
4087 4089   * When ::kmausers is invoked without the -f flag, we simply update our hash
4088 4090   * table with the information from each allocated bufctl.
4089 4091   */
4090 4092  /*ARGSUSED*/
4091 4093  static int
4092 4094  kmause1(uintptr_t addr, const kmem_bufctl_audit_t *bcp, kmusers_t *kmu)
4093 4095  {
4094 4096          const kmem_cache_t *cp = kmu->kmu_cache;
4095 4097  
4096 4098          kmu_add(kmu, bcp, cp->cache_bufsize, cp->cache_bufsize);
4097 4099          return (WALK_NEXT);
4098 4100  }
4099 4101  
4100 4102  /*
4101 4103   * When ::kmausers is invoked with the -f flag, we print out the information
4102 4104   * for each bufctl as well as updating the hash table.
4103 4105   */
4104 4106  static int
4105 4107  kmause2(uintptr_t addr, const kmem_bufctl_audit_t *bcp, kmusers_t *kmu)
4106 4108  {
4107 4109          int i, depth = MIN(bcp->bc_depth, KMEM_STACK_DEPTH);
4108 4110          const kmem_cache_t *cp = kmu->kmu_cache;
4109 4111          kmem_bufctl_t bufctl;
4110 4112  
4111 4113          if (kmu->kmu_addr) {
4112 4114                  if (mdb_vread(&bufctl, sizeof (bufctl),  addr) == -1)
4113 4115                          mdb_warn("couldn't read bufctl at %p", addr);
4114 4116                  else if (kmu->kmu_addr < (uintptr_t)bufctl.bc_addr ||
4115 4117                      kmu->kmu_addr >= (uintptr_t)bufctl.bc_addr +
4116 4118                      cp->cache_bufsize)
4117 4119                          return (WALK_NEXT);
4118 4120          }
4119 4121  
4120 4122          mdb_printf("size %d, addr %p, thread %p, cache %s\n",
4121 4123              cp->cache_bufsize, addr, bcp->bc_thread, cp->cache_name);
4122 4124  
4123 4125          for (i = 0; i < depth; i++)
4124 4126                  mdb_printf("\t %a\n", bcp->bc_stack[i]);
4125 4127  
4126 4128          kmu_add(kmu, bcp, cp->cache_bufsize, cp->cache_bufsize);
4127 4129          return (WALK_NEXT);
4128 4130  }
4129 4131  
4130 4132  /*
4131 4133   * We sort our results by allocation size before printing them.
4132 4134   */
4133 4135  static int
4134 4136  kmownercmp(const void *lp, const void *rp)
4135 4137  {
4136 4138          const kmowner_t *lhs = lp;
4137 4139          const kmowner_t *rhs = rp;
4138 4140  
4139 4141          return (rhs->kmo_total_size - lhs->kmo_total_size);
4140 4142  }
4141 4143  
4142 4144  /*
4143 4145   * The main engine of ::kmausers is relatively straightforward: First we
4144 4146   * accumulate our list of kmem_cache_t addresses into the kmclist_t. Next we
4145 4147   * iterate over the allocated bufctls of each cache in the list.  Finally,
4146 4148   * we sort and print our results.
4147 4149   */
4148 4150  /*ARGSUSED*/
4149 4151  int
4150 4152  kmausers(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
4151 4153  {
4152 4154          int mem_threshold = 8192;       /* Minimum # bytes for printing */
4153 4155          int cnt_threshold = 100;        /* Minimum # blocks for printing */
4154 4156          int audited_caches = 0;         /* Number of KMF_AUDIT caches found */
4155 4157          int do_all_caches = 1;          /* Do all caches (no arguments) */
4156 4158          int opt_e = FALSE;              /* Include "small" users */
4157 4159          int opt_f = FALSE;              /* Print stack traces */
4158 4160  
4159 4161          mdb_walk_cb_t callback = (mdb_walk_cb_t)kmause1;
4160 4162          kmowner_t *kmo, *kmoend;
4161 4163          int i, oelems;
4162 4164  
4163 4165          kmclist_t kmc;
4164 4166          kmusers_t kmu;
4165 4167  
4166 4168          bzero(&kmc, sizeof (kmc));
4167 4169          bzero(&kmu, sizeof (kmu));
4168 4170  
4169 4171          while ((i = mdb_getopts(argc, argv,
4170 4172              'e', MDB_OPT_SETBITS, TRUE, &opt_e,
4171 4173              'f', MDB_OPT_SETBITS, TRUE, &opt_f, NULL)) != argc) {
4172 4174  
4173 4175                  argv += i;      /* skip past options we just processed */
4174 4176                  argc -= i;      /* adjust argc */
4175 4177  
4176 4178                  if (argv->a_type != MDB_TYPE_STRING || *argv->a_un.a_str == '-')
4177 4179                          return (DCMD_USAGE);
4178 4180  
4179 4181                  oelems = kmc.kmc_nelems;
4180 4182                  kmc.kmc_name = argv->a_un.a_str;
4181 4183                  (void) mdb_walk("kmem_cache", (mdb_walk_cb_t)kmc_add, &kmc);
4182 4184  
4183 4185                  if (kmc.kmc_nelems == oelems) {
4184 4186                          mdb_warn("unknown kmem cache: %s\n", kmc.kmc_name);
4185 4187                          return (DCMD_ERR);
4186 4188                  }
4187 4189  
4188 4190                  do_all_caches = 0;
4189 4191                  argv++;
4190 4192                  argc--;
4191 4193          }
4192 4194  
4193 4195          if (flags & DCMD_ADDRSPEC) {
4194 4196                  opt_f = TRUE;
4195 4197                  kmu.kmu_addr = addr;
4196 4198          } else {
4197 4199                  kmu.kmu_addr = 0;
4198 4200          }
4199 4201  
4200 4202          if (opt_e)
4201 4203                  mem_threshold = cnt_threshold = 0;
4202 4204  
4203 4205          if (opt_f)
4204 4206                  callback = (mdb_walk_cb_t)kmause2;
4205 4207  
4206 4208          if (do_all_caches) {
4207 4209                  kmc.kmc_name = NULL; /* match all cache names */
4208 4210                  (void) mdb_walk("kmem_cache", (mdb_walk_cb_t)kmc_add, &kmc);
4209 4211          }
4210 4212  
4211 4213          for (i = 0; i < kmc.kmc_nelems; i++) {
4212 4214                  uintptr_t cp = kmc.kmc_caches[i];
4213 4215                  kmem_cache_t c;
4214 4216  
4215 4217                  if (mdb_vread(&c, sizeof (c), cp) == -1) {
4216 4218                          mdb_warn("failed to read cache at %p", cp);
4217 4219                          continue;
4218 4220                  }
4219 4221  
4220 4222                  if (!(c.cache_flags & KMF_AUDIT)) {
4221 4223                          if (!do_all_caches) {
4222 4224                                  mdb_warn("KMF_AUDIT is not enabled for %s\n",
4223 4225                                      c.cache_name);
4224 4226                          }
4225 4227                          continue;
4226 4228                  }
4227 4229  
4228 4230                  kmu.kmu_cache = &c;
4229 4231                  (void) mdb_pwalk("bufctl", callback, &kmu, cp);
4230 4232                  audited_caches++;
4231 4233          }
4232 4234  
4233 4235          if (audited_caches == 0 && do_all_caches) {
4234 4236                  mdb_warn("KMF_AUDIT is not enabled for any caches\n");
4235 4237                  return (DCMD_ERR);
4236 4238          }
4237 4239  
4238 4240          qsort(kmu.kmu_hash, kmu.kmu_nelems, sizeof (kmowner_t), kmownercmp);
4239 4241          kmoend = kmu.kmu_hash + kmu.kmu_nelems;
4240 4242  
4241 4243          for (kmo = kmu.kmu_hash; kmo < kmoend; kmo++) {
4242 4244                  if (kmo->kmo_total_size < mem_threshold &&
4243 4245                      kmo->kmo_num < cnt_threshold)
4244 4246                          continue;
4245 4247                  mdb_printf("%lu bytes for %u allocations with data size %lu:\n",
4246 4248                      kmo->kmo_total_size, kmo->kmo_num, kmo->kmo_data_size);
4247 4249                  for (i = 0; i < kmo->kmo_depth; i++)
4248 4250                          mdb_printf("\t %a\n", kmo->kmo_stack[i]);
4249 4251          }
4250 4252  
4251 4253          return (DCMD_OK);
4252 4254  }
4253 4255  
4254 4256  void
4255 4257  kmausers_help(void)
4256 4258  {
4257 4259          mdb_printf(
4258 4260              "Displays the largest users of the kmem allocator, sorted by \n"
4259 4261              "trace.  If one or more caches is specified, only those caches\n"
4260 4262              "will be searched.  By default, all caches are searched.  If an\n"
4261 4263              "address is specified, then only those allocations which include\n"
4262 4264              "the given address are displayed.  Specifying an address implies\n"
4263 4265              "-f.\n"
4264 4266              "\n"
4265 4267              "\t-e\tInclude all users, not just the largest\n"
4266 4268              "\t-f\tDisplay individual allocations.  By default, users are\n"
4267 4269              "\t\tgrouped by stack\n");
4268 4270  }
4269 4271  
4270 4272  static int
4271 4273  kmem_ready_check(void)
4272 4274  {
4273 4275          int ready;
4274 4276  
4275 4277          if (mdb_readvar(&ready, "kmem_ready") < 0)
4276 4278                  return (-1); /* errno is set for us */
4277 4279  
4278 4280          return (ready);
4279 4281  }
4280 4282  
4281 4283  void
4282 4284  kmem_statechange(void)
4283 4285  {
4284 4286          static int been_ready = 0;
4285 4287  
4286 4288          if (been_ready)
4287 4289                  return;
4288 4290  
4289 4291          if (kmem_ready_check() <= 0)
4290 4292                  return;
4291 4293  
4292 4294          been_ready = 1;
4293 4295          (void) mdb_walk("kmem_cache", (mdb_walk_cb_t)kmem_init_walkers, NULL);
4294 4296  }
4295 4297  
4296 4298  void
4297 4299  kmem_init(void)
4298 4300  {
4299 4301          mdb_walker_t w = {
4300 4302                  "kmem_cache", "walk list of kmem caches", kmem_cache_walk_init,
4301 4303                  list_walk_step, list_walk_fini
4302 4304          };
4303 4305  
4304 4306          /*
4305 4307           * If kmem is ready, we'll need to invoke the kmem_cache walker
4306 4308           * immediately.  Walkers in the linkage structure won't be ready until
4307 4309           * _mdb_init returns, so we'll need to add this one manually.  If kmem
4308 4310           * is ready, we'll use the walker to initialize the caches.  If kmem
4309 4311           * isn't ready, we'll register a callback that will allow us to defer
4310 4312           * cache walking until it is.
4311 4313           */
4312 4314          if (mdb_add_walker(&w) != 0) {
4313 4315                  mdb_warn("failed to add kmem_cache walker");
4314 4316                  return;
4315 4317          }
4316 4318  
4317 4319          kmem_statechange();
4318 4320  
4319 4321          /* register our ::whatis handlers */
4320 4322          mdb_whatis_register("modules", whatis_run_modules, NULL,
4321 4323              WHATIS_PRIO_EARLY, WHATIS_REG_NO_ID);
4322 4324          mdb_whatis_register("threads", whatis_run_threads, NULL,
4323 4325              WHATIS_PRIO_EARLY, WHATIS_REG_NO_ID);
4324 4326          mdb_whatis_register("pages", whatis_run_pages, NULL,
4325 4327              WHATIS_PRIO_EARLY, WHATIS_REG_NO_ID);
4326 4328          mdb_whatis_register("kmem", whatis_run_kmem, NULL,
4327 4329              WHATIS_PRIO_ALLOCATOR, 0);
4328 4330          mdb_whatis_register("vmem", whatis_run_vmem, NULL,
4329 4331              WHATIS_PRIO_ALLOCATOR, 0);
4330 4332  }
4331 4333  
4332 4334  typedef struct whatthread {
4333 4335          uintptr_t       wt_target;
4334 4336          int             wt_verbose;
4335 4337  } whatthread_t;
4336 4338  
4337 4339  static int
4338 4340  whatthread_walk_thread(uintptr_t addr, const kthread_t *t, whatthread_t *w)
4339 4341  {
4340 4342          uintptr_t current, data;
4341 4343  
4342 4344          if (t->t_stkbase == NULL)
4343 4345                  return (WALK_NEXT);
4344 4346  
4345 4347          /*
4346 4348           * Warn about swapped out threads, but drive on anyway
4347 4349           */
4348 4350          if (!(t->t_schedflag & TS_LOAD)) {
4349 4351                  mdb_warn("thread %p's stack swapped out\n", addr);
4350 4352                  return (WALK_NEXT);
4351 4353          }
4352 4354  
4353 4355          /*
4354 4356           * Search the thread's stack for the given pointer.  Note that it would
4355 4357           * be more efficient to follow ::kgrep's lead and read in page-sized
4356 4358           * chunks, but this routine is already fast and simple.
4357 4359           */
4358 4360          for (current = (uintptr_t)t->t_stkbase; current < (uintptr_t)t->t_stk;
4359 4361              current += sizeof (uintptr_t)) {
4360 4362                  if (mdb_vread(&data, sizeof (data), current) == -1) {
4361 4363                          mdb_warn("couldn't read thread %p's stack at %p",
4362 4364                              addr, current);
4363 4365                          return (WALK_ERR);
4364 4366                  }
4365 4367  
4366 4368                  if (data == w->wt_target) {
4367 4369                          if (w->wt_verbose) {
4368 4370                                  mdb_printf("%p in thread %p's stack%s\n",
4369 4371                                      current, addr, stack_active(t, current));
4370 4372                          } else {
4371 4373                                  mdb_printf("%#lr\n", addr);
4372 4374                                  return (WALK_NEXT);
4373 4375                          }
4374 4376                  }
4375 4377          }
4376 4378  
4377 4379          return (WALK_NEXT);
4378 4380  }
4379 4381  
4380 4382  int
4381 4383  whatthread(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
4382 4384  {
4383 4385          whatthread_t w;
4384 4386  
4385 4387          if (!(flags & DCMD_ADDRSPEC))
4386 4388                  return (DCMD_USAGE);
4387 4389  
4388 4390          w.wt_verbose = FALSE;
4389 4391          w.wt_target = addr;
4390 4392  
4391 4393          if (mdb_getopts(argc, argv,
4392 4394              'v', MDB_OPT_SETBITS, TRUE, &w.wt_verbose, NULL) != argc)
4393 4395                  return (DCMD_USAGE);
4394 4396  
4395 4397          if (mdb_walk("thread", (mdb_walk_cb_t)whatthread_walk_thread, &w)
4396 4398              == -1) {
4397 4399                  mdb_warn("couldn't walk threads");
4398 4400                  return (DCMD_ERR);
4399 4401          }
4400 4402  
4401 4403          return (DCMD_OK);
4402 4404  }
  
    | 
      ↓ open down ↓ | 
    473 lines elided | 
    
      ↑ open up ↑ | 
  
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX