Print this page
    
9525 kmem_dump_size is a corrupting influence
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
Reviewed by: Robert Mustacchi <rm@joyent.com>
Reviewed by: Richard Lowe <richlowe@richlowe.net>
Approved by: Dan McDonald <danmcd@joyent.com>
    
      
        | Split | 
	Close | 
      
      | Expand all | 
      | Collapse all | 
    
    
          --- old/usr/src/uts/common/os/dumpsubr.c
          +++ new/usr/src/uts/common/os/dumpsubr.c
   1    1  /*
   2    2   * CDDL HEADER START
   3    3   *
   4    4   * The contents of this file are subject to the terms of the
   5    5   * Common Development and Distribution License (the "License").
   6    6   * You may not use this file except in compliance with the License.
   7    7   *
   8    8   * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9    9   * or http://www.opensolaris.org/os/licensing.
  10   10   * See the License for the specific language governing permissions
  11   11   * and limitations under the License.
  12   12   *
  13   13   * When distributing Covered Code, include this CDDL HEADER in each
  
    | 
      ↓ open down ↓ | 
    13 lines elided | 
    
      ↑ open up ↑ | 
  
  14   14   * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15   15   * If applicable, add the following below this CDDL HEADER, with the
  16   16   * fields enclosed by brackets "[]" replaced with your own identifying
  17   17   * information: Portions Copyright [yyyy] [name of copyright owner]
  18   18   *
  19   19   * CDDL HEADER END
  20   20   */
  21   21  
  22   22  /*
  23   23   * Copyright (c) 1998, 2010, Oracle and/or its affiliates. All rights reserved.
  24      - * Copyright 2016 Joyent, Inc.
       24 + * Copyright 2018 Joyent, Inc.
  25   25   */
  26   26  
  27   27  #include <sys/types.h>
  28   28  #include <sys/param.h>
  29   29  #include <sys/systm.h>
  30   30  #include <sys/vm.h>
  31   31  #include <sys/proc.h>
  32   32  #include <sys/file.h>
  33   33  #include <sys/conf.h>
  34   34  #include <sys/kmem.h>
  35   35  #include <sys/mem.h>
  36   36  #include <sys/mman.h>
  37   37  #include <sys/vnode.h>
  38   38  #include <sys/errno.h>
  39   39  #include <sys/memlist.h>
  40   40  #include <sys/dumphdr.h>
  41   41  #include <sys/dumpadm.h>
  42   42  #include <sys/ksyms.h>
  43   43  #include <sys/compress.h>
  44   44  #include <sys/stream.h>
  45   45  #include <sys/strsun.h>
  46   46  #include <sys/cmn_err.h>
  47   47  #include <sys/bitmap.h>
  48   48  #include <sys/modctl.h>
  49   49  #include <sys/utsname.h>
  50   50  #include <sys/systeminfo.h>
  51   51  #include <sys/vmem.h>
  52   52  #include <sys/log.h>
  53   53  #include <sys/var.h>
  54   54  #include <sys/debug.h>
  55   55  #include <sys/sunddi.h>
  56   56  #include <fs/fs_subr.h>
  57   57  #include <sys/fs/snode.h>
  58   58  #include <sys/ontrap.h>
  59   59  #include <sys/panic.h>
  60   60  #include <sys/dkio.h>
  61   61  #include <sys/vtoc.h>
  62   62  #include <sys/errorq.h>
  63   63  #include <sys/fm/util.h>
  64   64  #include <sys/fs/zfs.h>
  65   65  
  66   66  #include <vm/hat.h>
  
    | 
      ↓ open down ↓ | 
    32 lines elided | 
    
      ↑ open up ↑ | 
  
  67   67  #include <vm/as.h>
  68   68  #include <vm/page.h>
  69   69  #include <vm/pvn.h>
  70   70  #include <vm/seg.h>
  71   71  #include <vm/seg_kmem.h>
  72   72  #include <sys/clock_impl.h>
  73   73  #include <sys/hold_page.h>
  74   74  
  75   75  #include <bzip2/bzlib.h>
  76   76  
       77 +#define ONE_GIG (1024 * 1024 * 1024UL)
       78 +
  77   79  /*
  78   80   * Crash dump time is dominated by disk write time.  To reduce this,
  79   81   * the stronger compression method bzip2 is applied to reduce the dump
  80   82   * size and hence reduce I/O time.  However, bzip2 is much more
  81   83   * computationally expensive than the existing lzjb algorithm, so to
  82   84   * avoid increasing compression time, CPUs that are otherwise idle
  83   85   * during panic are employed to parallelize the compression task.
  84   86   * Many helper CPUs are needed to prevent bzip2 from being a
  85   87   * bottleneck, and on systems with too few CPUs, the lzjb algorithm is
  86   88   * parallelized instead. Lastly, I/O and compression are performed by
  87   89   * different CPUs, and are hence overlapped in time, unlike the older
  88   90   * serial code.
  89   91   *
  90   92   * Another important consideration is the speed of the dump
  91   93   * device. Faster disks need less CPUs in order to benefit from
  92   94   * parallel lzjb versus parallel bzip2. Therefore, the CPU count
  93   95   * threshold for switching from parallel lzjb to paralled bzip2 is
  94   96   * elevated for faster disks. The dump device speed is adduced from
  95   97   * the setting for dumpbuf.iosize, see dump_update_clevel.
  96   98   */
  97   99  
  98  100  /*
  99  101   * exported vars
 100  102   */
 101  103  kmutex_t        dump_lock;              /* lock for dump configuration */
 102  104  dumphdr_t       *dumphdr;               /* dump header */
 103  105  int             dump_conflags = DUMP_KERNEL; /* dump configuration flags */
 104  106  vnode_t         *dumpvp;                /* dump device vnode pointer */
 105  107  u_offset_t      dumpvp_size;            /* size of dump device, in bytes */
 106  108  char            *dumppath;              /* pathname of dump device */
 107  109  int             dump_timeout = 120;     /* timeout for dumping pages */
 108  110  int             dump_timeleft;          /* portion of dump_timeout remaining */
 109  111  int             dump_ioerr;             /* dump i/o error */
 110  112  int             dump_check_used;        /* enable check for used pages */
 111  113  char        *dump_stack_scratch; /* scratch area for saving stack summary */
 112  114  
 113  115  /*
 114  116   * Tunables for dump compression and parallelism. These can be set via
 115  117   * /etc/system.
 116  118   *
 117  119   * dump_ncpu_low        number of helpers for parallel lzjb
 118  120   *      This is also the minimum configuration.
 119  121   *
 120  122   * dump_bzip2_level     bzip2 compression level: 1-9
 121  123   *      Higher numbers give greater compression, but take more memory
 122  124   *      and time. Memory used per helper is ~(dump_bzip2_level * 1MB).
 123  125   *
 124  126   * dump_plat_mincpu     the cross-over limit for using bzip2 (per platform):
 125  127   *      if dump_plat_mincpu == 0, then always do single threaded dump
 126  128   *      if ncpu >= dump_plat_mincpu then try to use bzip2
 127  129   *
 128  130   * dump_metrics_on      if set, metrics are collected in the kernel, passed
 129  131   *      to savecore via the dump file, and recorded by savecore in
 130  132   *      METRICS.txt.
  
    | 
      ↓ open down ↓ | 
    44 lines elided | 
    
      ↑ open up ↑ | 
  
 131  133   */
 132  134  uint_t dump_ncpu_low = 4;       /* minimum config for parallel lzjb */
 133  135  uint_t dump_bzip2_level = 1;    /* bzip2 level (1-9) */
 134  136  
 135  137  /* Use dump_plat_mincpu_default unless this variable is set by /etc/system */
 136  138  #define MINCPU_NOT_SET  ((uint_t)-1)
 137  139  uint_t dump_plat_mincpu = MINCPU_NOT_SET;
 138  140  
 139  141  /* tunables for pre-reserved heap */
 140  142  uint_t dump_kmem_permap = 1024;
 141      -uint_t dump_kmem_pages = 8;
      143 +uint_t dump_kmem_pages = 0;
 142  144  
 143  145  /* Define multiple buffers per helper to avoid stalling */
 144  146  #define NCBUF_PER_HELPER        2
 145  147  #define NCMAP_PER_HELPER        4
 146  148  
 147  149  /* minimum number of helpers configured */
 148  150  #define MINHELPERS      (dump_ncpu_low)
 149  151  #define MINCBUFS        (MINHELPERS * NCBUF_PER_HELPER)
 150  152  
 151  153  /*
 152  154   * Define constant parameters.
 153  155   *
 154  156   * CBUF_SIZE            size of an output buffer
 155  157   *
 156  158   * CBUF_MAPSIZE         size of virtual range for mapping pages
 157  159   *
 158  160   * CBUF_MAPNP           size of virtual range in pages
 159  161   *
 160  162   */
 161  163  #define DUMP_1KB        ((size_t)1 << 10)
 162  164  #define DUMP_1MB        ((size_t)1 << 20)
 163  165  #define CBUF_SIZE       ((size_t)1 << 17)
 164  166  #define CBUF_MAPSHIFT   (22)
 165  167  #define CBUF_MAPSIZE    ((size_t)1 << CBUF_MAPSHIFT)
 166  168  #define CBUF_MAPNP      ((size_t)1 << (CBUF_MAPSHIFT - PAGESHIFT))
 167  169  
 168  170  /*
 169  171   * Compression metrics are accumulated nano-second subtotals. The
 170  172   * results are normalized by the number of pages dumped. A report is
 171  173   * generated when dumpsys() completes and is saved in the dump image
 172  174   * after the trailing dump header.
 173  175   *
 174  176   * Metrics are always collected. Set the variable dump_metrics_on to
 175  177   * cause metrics to be saved in the crash file, where savecore will
 176  178   * save it in the file METRICS.txt.
 177  179   */
 178  180  #define PERPAGES \
 179  181          PERPAGE(bitmap) PERPAGE(map) PERPAGE(unmap) \
 180  182          PERPAGE(copy) PERPAGE(compress) \
 181  183          PERPAGE(write) \
 182  184          PERPAGE(inwait) PERPAGE(outwait)
 183  185  
 184  186  typedef struct perpage {
 185  187  #define PERPAGE(x) hrtime_t x;
 186  188          PERPAGES
 187  189  #undef PERPAGE
 188  190  } perpage_t;
 189  191  
 190  192  /*
 191  193   * This macro controls the code generation for collecting dump
 192  194   * performance information. By default, the code is generated, but
 193  195   * automatic saving of the information is disabled. If dump_metrics_on
 194  196   * is set to 1, the timing information is passed to savecore via the
 195  197   * crash file, where it is appended to the file dump-dir/METRICS.txt.
 196  198   */
 197  199  #define COLLECT_METRICS
 198  200  
 199  201  #ifdef COLLECT_METRICS
 200  202  uint_t dump_metrics_on = 0;     /* set to 1 to enable recording metrics */
 201  203  
 202  204  #define HRSTART(v, m)           v##ts.m = gethrtime()
 203  205  #define HRSTOP(v, m)            v.m += gethrtime() - v##ts.m
 204  206  #define HRBEGIN(v, m, s)        v##ts.m = gethrtime(); v.size += s
 205  207  #define HREND(v, m)             v.m += gethrtime() - v##ts.m
 206  208  #define HRNORM(v, m, n)         v.m /= (n)
 207  209  
 208  210  #else
 209  211  #define HRSTART(v, m)
 210  212  #define HRSTOP(v, m)
 211  213  #define HRBEGIN(v, m, s)
 212  214  #define HREND(v, m)
 213  215  #define HRNORM(v, m, n)
 214  216  #endif  /* COLLECT_METRICS */
 215  217  
 216  218  /*
 217  219   * Buffers for copying and compressing memory pages.
 218  220   *
 219  221   * cbuf_t buffer controllers: used for both input and output.
 220  222   *
 221  223   * The buffer state indicates how it is being used:
 222  224   *
 223  225   * CBUF_FREEMAP: CBUF_MAPSIZE virtual address range is available for
 224  226   * mapping input pages.
 225  227   *
 226  228   * CBUF_INREADY: input pages are mapped and ready for compression by a
 227  229   * helper.
 228  230   *
 229  231   * CBUF_USEDMAP: mapping has been consumed by a helper. Needs unmap.
 230  232   *
 231  233   * CBUF_FREEBUF: CBUF_SIZE output buffer, which is available.
 232  234   *
 233  235   * CBUF_WRITE: CBUF_SIZE block of compressed pages from a helper,
 234  236   * ready to write out.
 235  237   *
 236  238   * CBUF_ERRMSG: CBUF_SIZE block of error messages from a helper
 237  239   * (reports UE errors.)
 238  240   */
 239  241  
 240  242  typedef enum cbufstate {
 241  243          CBUF_FREEMAP,
 242  244          CBUF_INREADY,
 243  245          CBUF_USEDMAP,
 244  246          CBUF_FREEBUF,
 245  247          CBUF_WRITE,
 246  248          CBUF_ERRMSG
 247  249  } cbufstate_t;
 248  250  
 249  251  typedef struct cbuf cbuf_t;
 250  252  
 251  253  struct cbuf {
 252  254          cbuf_t *next;                   /* next in list */
 253  255          cbufstate_t state;              /* processing state */
 254  256          size_t used;                    /* amount used */
 255  257          size_t size;                    /* mem size */
 256  258          char *buf;                      /* kmem or vmem */
 257  259          pgcnt_t pagenum;                /* index to pfn map */
 258  260          pgcnt_t bitnum;                 /* first set bitnum */
 259  261          pfn_t pfn;                      /* first pfn in mapped range */
 260  262          int off;                        /* byte offset to first pfn */
 261  263  };
 262  264  
 263  265  static char dump_osimage_uuid[36 + 1];
 264  266  
 265  267  #define isdigit(ch)     ((ch) >= '0' && (ch) <= '9')
 266  268  #define isxdigit(ch)    (isdigit(ch) || ((ch) >= 'a' && (ch) <= 'f') || \
 267  269                          ((ch) >= 'A' && (ch) <= 'F'))
 268  270  
 269  271  /*
 270  272   * cqueue_t queues: a uni-directional channel for communication
 271  273   * from the master to helper tasks or vice-versa using put and
 272  274   * get primitives. Both mappings and data buffers are passed via
 273  275   * queues. Producers close a queue when done. The number of
 274  276   * active producers is reference counted so the consumer can
 275  277   * detect end of data. Concurrent access is mediated by atomic
 276  278   * operations for panic dump, or mutex/cv for live dump.
 277  279   *
 278  280   * There a four queues, used as follows:
 279  281   *
 280  282   * Queue                Dataflow                NewState
 281  283   * --------------------------------------------------
 282  284   * mainq                master -> master        FREEMAP
 283  285   * master has initialized or unmapped an input buffer
 284  286   * --------------------------------------------------
 285  287   * helperq              master -> helper        INREADY
 286  288   * master has mapped input for use by helper
 287  289   * --------------------------------------------------
 288  290   * mainq                master <- helper        USEDMAP
 289  291   * helper is done with input
 290  292   * --------------------------------------------------
 291  293   * freebufq             master -> helper        FREEBUF
 292  294   * master has initialized or written an output buffer
 293  295   * --------------------------------------------------
 294  296   * mainq                master <- helper        WRITE
 295  297   * block of compressed pages from a helper
 296  298   * --------------------------------------------------
 297  299   * mainq                master <- helper        ERRMSG
 298  300   * error messages from a helper (memory error case)
 299  301   * --------------------------------------------------
 300  302   * writerq              master <- master        WRITE
 301  303   * non-blocking queue of blocks to write
 302  304   * --------------------------------------------------
 303  305   */
 304  306  typedef struct cqueue {
 305  307          cbuf_t *volatile first;         /* first in list */
 306  308          cbuf_t *last;                   /* last in list */
 307  309          hrtime_t ts;                    /* timestamp */
 308  310          hrtime_t empty;                 /* total time empty */
 309  311          kmutex_t mutex;                 /* live state lock */
 310  312          kcondvar_t cv;                  /* live wait var */
 311  313          lock_t spinlock;                /* panic mode spin lock */
 312  314          volatile uint_t open;           /* producer ref count */
 313  315  } cqueue_t;
 314  316  
 315  317  /*
 316  318   * Convenience macros for using the cqueue functions
 317  319   * Note that the caller must have defined "dumpsync_t *ds"
 318  320   */
 319  321  #define CQ_IS_EMPTY(q)                                  \
 320  322          (ds->q.first == NULL)
 321  323  
 322  324  #define CQ_OPEN(q)                                      \
 323  325          atomic_inc_uint(&ds->q.open)
 324  326  
 325  327  #define CQ_CLOSE(q)                                     \
 326  328          dumpsys_close_cq(&ds->q, ds->live)
 327  329  
 328  330  #define CQ_PUT(q, cp, st)                               \
 329  331          dumpsys_put_cq(&ds->q, cp, st, ds->live)
 330  332  
 331  333  #define CQ_GET(q)                                       \
 332  334          dumpsys_get_cq(&ds->q, ds->live)
 333  335  
 334  336  /*
 335  337   * Dynamic state when dumpsys() is running.
 336  338   */
 337  339  typedef struct dumpsync {
 338  340          pgcnt_t npages;                 /* subtotal of pages dumped */
 339  341          pgcnt_t pages_mapped;           /* subtotal of pages mapped */
 340  342          pgcnt_t pages_used;             /* subtotal of pages used per map */
 341  343          size_t nwrite;                  /* subtotal of bytes written */
 342  344          uint_t live;                    /* running live dump */
 343  345          uint_t neednl;                  /* will need to print a newline */
 344  346          uint_t percent;                 /* dump progress */
 345  347          uint_t percent_done;            /* dump progress reported */
 346  348          int sec_done;                   /* dump progress last report time */
 347  349          cqueue_t freebufq;              /* free kmem bufs for writing */
 348  350          cqueue_t mainq;                 /* input for main task */
 349  351          cqueue_t helperq;               /* input for helpers */
 350  352          cqueue_t writerq;               /* input for writer */
 351  353          hrtime_t start;                 /* start time */
 352  354          hrtime_t elapsed;               /* elapsed time when completed */
 353  355          hrtime_t iotime;                /* time spent writing nwrite bytes */
 354  356          hrtime_t iowait;                /* time spent waiting for output */
 355  357          hrtime_t iowaitts;              /* iowait timestamp */
 356  358          perpage_t perpage;              /* metrics */
 357  359          perpage_t perpagets;
 358  360          int dumpcpu;                    /* master cpu */
 359  361  } dumpsync_t;
 360  362  
 361  363  static dumpsync_t dumpsync;             /* synchronization vars */
 362  364  
 363  365  /*
 364  366   * helper_t helpers: contains the context for a stream. CPUs run in
 365  367   * parallel at dump time; each CPU creates a single stream of
 366  368   * compression data.  Stream data is divided into CBUF_SIZE blocks.
 367  369   * The blocks are written in order within a stream. But, blocks from
 368  370   * multiple streams can be interleaved. Each stream is identified by a
 369  371   * unique tag.
 370  372   */
 371  373  typedef struct helper {
 372  374          int helper;                     /* bound helper id */
 373  375          int tag;                        /* compression stream tag */
 374  376          perpage_t perpage;              /* per page metrics */
 375  377          perpage_t perpagets;            /* per page metrics (timestamps) */
 376  378          taskqid_t taskqid;              /* live dump task ptr */
 377  379          int in, out;                    /* buffer offsets */
 378  380          cbuf_t *cpin, *cpout, *cperr;   /* cbuf objects in process */
 379  381          dumpsync_t *ds;                 /* pointer to sync vars */
 380  382          size_t used;                    /* counts input consumed */
 381  383          char *page;                     /* buffer for page copy */
 382  384          char *lzbuf;                    /* lzjb output */
 383  385          bz_stream bzstream;             /* bzip2 state */
 384  386  } helper_t;
 385  387  
 386  388  #define MAINHELPER      (-1)            /* helper is also the main task */
 387  389  #define FREEHELPER      (-2)            /* unbound helper */
 388  390  #define DONEHELPER      (-3)            /* helper finished */
 389  391  
 390  392  /*
 391  393   * configuration vars for dumpsys
 392  394   */
 393  395  typedef struct dumpcfg {
 394  396          int     threshold;      /* ncpu threshold for bzip2 */
 395  397          int     nhelper;        /* number of helpers */
 396  398          int     nhelper_used;   /* actual number of helpers used */
 397  399          int     ncmap;          /* number VA pages for compression */
 398  400          int     ncbuf;          /* number of bufs for compression */
 399  401          int     ncbuf_used;     /* number of bufs in use */
 400  402          uint_t  clevel;         /* dump compression level */
 401  403          helper_t *helper;       /* array of helpers */
 402  404          cbuf_t  *cmap;          /* array of input (map) buffers */
 403  405          cbuf_t  *cbuf;          /* array of output  buffers */
 404  406          ulong_t *helpermap;     /* set of dumpsys helper CPU ids */
 405  407          ulong_t *bitmap;        /* bitmap for marking pages to dump */
 406  408          ulong_t *rbitmap;       /* bitmap for used CBUF_MAPSIZE ranges */
 407  409          pgcnt_t bitmapsize;     /* size of bitmap */
 408  410          pgcnt_t rbitmapsize;    /* size of bitmap for ranges */
 409  411          pgcnt_t found4m;        /* number ranges allocated by dump */
 410  412          pgcnt_t foundsm;        /* number small pages allocated by dump */
 411  413          pid_t   *pids;          /* list of process IDs at dump time */
 412  414          size_t  maxsize;        /* memory size needed at dump time */
 413  415          size_t  maxvmsize;      /* size of reserved VM */
 414  416          char    *maxvm;         /* reserved VM for spare pages */
 415  417          lock_t  helper_lock;    /* protect helper state */
 416  418          char    helpers_wanted; /* flag to enable parallelism */
 417  419  } dumpcfg_t;
 418  420  
 419  421  static dumpcfg_t dumpcfg;       /* config vars */
 420  422  
 421  423  /*
 422  424   * The dump I/O buffer.
 423  425   *
 424  426   * There is one I/O buffer used by dumpvp_write and dumvp_flush. It is
 425  427   * sized according to the optimum device transfer speed.
 426  428   */
 427  429  typedef struct dumpbuf {
 428  430          vnode_t *cdev_vp;       /* VCHR open of the dump device */
 429  431          len_t   vp_limit;       /* maximum write offset */
 430  432          offset_t vp_off;        /* current dump device offset */
 431  433          char    *cur;           /* dump write pointer */
 432  434          char    *start;         /* dump buffer address */
 433  435          char    *end;           /* dump buffer end */
 434  436          size_t  size;           /* size of dumpbuf in bytes */
 435  437          size_t  iosize;         /* best transfer size for device */
 436  438  } dumpbuf_t;
 437  439  
 438  440  dumpbuf_t dumpbuf;              /* I/O buffer */
 439  441  
 440  442  /*
 441  443   * The dump I/O buffer must be at least one page, at most xfer_size
 442  444   * bytes, and should scale with physmem in between.  The transfer size
 443  445   * passed in will either represent a global default (maxphys) or the
 444  446   * best size for the device.  The size of the dumpbuf I/O buffer is
 445  447   * limited by dumpbuf_limit (8MB by default) because the dump
 446  448   * performance saturates beyond a certain size.  The default is to
 447  449   * select 1/4096 of the memory.
 448  450   */
 449  451  static int      dumpbuf_fraction = 12;  /* memory size scale factor */
 450  452  static size_t   dumpbuf_limit = 8 * DUMP_1MB;   /* max I/O buf size */
 451  453  
 452  454  static size_t
 453  455  dumpbuf_iosize(size_t xfer_size)
 454  456  {
 455  457          size_t iosize = ptob(physmem >> dumpbuf_fraction);
 456  458  
 457  459          if (iosize < PAGESIZE)
 458  460                  iosize = PAGESIZE;
 459  461          else if (iosize > xfer_size)
 460  462                  iosize = xfer_size;
 461  463          if (iosize > dumpbuf_limit)
 462  464                  iosize = dumpbuf_limit;
 463  465          return (iosize & PAGEMASK);
 464  466  }
 465  467  
 466  468  /*
 467  469   * resize the I/O buffer
 468  470   */
 469  471  static void
 470  472  dumpbuf_resize(void)
 471  473  {
 472  474          char *old_buf = dumpbuf.start;
 473  475          size_t old_size = dumpbuf.size;
 474  476          char *new_buf;
 475  477          size_t new_size;
 476  478  
 477  479          ASSERT(MUTEX_HELD(&dump_lock));
 478  480  
 479  481          new_size = dumpbuf_iosize(MAX(dumpbuf.iosize, maxphys));
 480  482          if (new_size <= old_size)
 481  483                  return; /* no need to reallocate buffer */
 482  484  
 483  485          new_buf = kmem_alloc(new_size, KM_SLEEP);
 484  486          dumpbuf.size = new_size;
 485  487          dumpbuf.start = new_buf;
 486  488          dumpbuf.end = new_buf + new_size;
 487  489          kmem_free(old_buf, old_size);
 488  490  }
 489  491  
 490  492  /*
 491  493   * dump_update_clevel is called when dumpadm configures the dump device.
 492  494   *      Calculate number of helpers and buffers.
 493  495   *      Allocate the minimum configuration for now.
 494  496   *
 495  497   * When the dump file is configured we reserve a minimum amount of
 496  498   * memory for use at crash time. But we reserve VA for all the memory
 497  499   * we really want in order to do the fastest dump possible. The VA is
 498  500   * backed by pages not being dumped, according to the bitmap. If
 499  501   * there is insufficient spare memory, however, we fall back to the
 500  502   * minimum.
 501  503   *
 502  504   * Live dump (savecore -L) always uses the minimum config.
 503  505   *
 504  506   * clevel 0 is single threaded lzjb
 505  507   * clevel 1 is parallel lzjb
 506  508   * clevel 2 is parallel bzip2
 507  509   *
 508  510   * The ncpu threshold is selected with dump_plat_mincpu.
 509  511   * On OPL, set_platform_defaults() overrides the sun4u setting.
 510  512   * The actual values are defined via DUMP_PLAT_*_MINCPU macros.
 511  513   *
 512  514   * Architecture         Threshold       Algorithm
 513  515   * sun4u                <  51           parallel lzjb
 514  516   * sun4u                >= 51           parallel bzip2(*)
 515  517   * sun4u OPL            <  8            parallel lzjb
 516  518   * sun4u OPL            >= 8            parallel bzip2(*)
 517  519   * sun4v                <  128          parallel lzjb
 518  520   * sun4v                >= 128          parallel bzip2(*)
 519  521   * x86                  < 11            parallel lzjb
 520  522   * x86                  >= 11           parallel bzip2(*)
 521  523   * 32-bit               N/A             single-threaded lzjb
 522  524   *
 523  525   * (*) bzip2 is only chosen if there is sufficient available
 524  526   * memory for buffers at dump time. See dumpsys_get_maxmem().
 525  527   *
 526  528   * Faster dump devices have larger I/O buffers. The threshold value is
 527  529   * increased according to the size of the dump I/O buffer, because
 528  530   * parallel lzjb performs better with faster disks. For buffers >= 1MB
 529  531   * the threshold is 3X; for buffers >= 256K threshold is 2X.
 530  532   *
 531  533   * For parallel dumps, the number of helpers is ncpu-1. The CPU
 532  534   * running panic runs the main task. For single-threaded dumps, the
 533  535   * panic CPU does lzjb compression (it is tagged as MAINHELPER.)
 534  536   *
 535  537   * Need multiple buffers per helper so that they do not block waiting
 536  538   * for the main task.
 537  539   *                              parallel        single-threaded
 538  540   * Number of output buffers:    nhelper*2               1
 539  541   * Number of mapping buffers:   nhelper*4               1
 540  542   *
 541  543   */
 542  544  static void
 543  545  dump_update_clevel()
 544  546  {
 545  547          int tag;
 546  548          size_t bz2size;
 547  549          helper_t *hp, *hpend;
 548  550          cbuf_t *cp, *cpend;
 549  551          dumpcfg_t *old = &dumpcfg;
 550  552          dumpcfg_t newcfg = *old;
 551  553          dumpcfg_t *new = &newcfg;
 552  554  
 553  555          ASSERT(MUTEX_HELD(&dump_lock));
 554  556  
 555  557          /*
 556  558           * Free the previously allocated bufs and VM.
 557  559           */
 558  560          if (old->helper != NULL) {
 559  561  
 560  562                  /* helpers */
 561  563                  hpend = &old->helper[old->nhelper];
 562  564                  for (hp = old->helper; hp != hpend; hp++) {
 563  565                          if (hp->lzbuf != NULL)
 564  566                                  kmem_free(hp->lzbuf, PAGESIZE);
 565  567                          if (hp->page != NULL)
 566  568                                  kmem_free(hp->page, PAGESIZE);
 567  569                  }
 568  570                  kmem_free(old->helper, old->nhelper * sizeof (helper_t));
 569  571  
 570  572                  /* VM space for mapping pages */
 571  573                  cpend = &old->cmap[old->ncmap];
 572  574                  for (cp = old->cmap; cp != cpend; cp++)
 573  575                          vmem_xfree(heap_arena, cp->buf, CBUF_MAPSIZE);
 574  576                  kmem_free(old->cmap, old->ncmap * sizeof (cbuf_t));
 575  577  
 576  578                  /* output bufs */
 577  579                  cpend = &old->cbuf[old->ncbuf];
 578  580                  for (cp = old->cbuf; cp != cpend; cp++)
 579  581                          if (cp->buf != NULL)
 580  582                                  kmem_free(cp->buf, cp->size);
 581  583                  kmem_free(old->cbuf, old->ncbuf * sizeof (cbuf_t));
 582  584  
 583  585                  /* reserved VM for dumpsys_get_maxmem */
 584  586                  if (old->maxvmsize > 0)
 585  587                          vmem_xfree(heap_arena, old->maxvm, old->maxvmsize);
 586  588          }
 587  589  
 588  590          /*
 589  591           * Allocate memory and VM.
 590  592           * One CPU runs dumpsys, the rest are helpers.
 591  593           */
 592  594          new->nhelper = ncpus - 1;
 593  595          if (new->nhelper < 1)
 594  596                  new->nhelper = 1;
 595  597  
 596  598          if (new->nhelper > DUMP_MAX_NHELPER)
 597  599                  new->nhelper = DUMP_MAX_NHELPER;
 598  600  
 599  601          /* use platform default, unless /etc/system overrides */
 600  602          if (dump_plat_mincpu == MINCPU_NOT_SET)
 601  603                  dump_plat_mincpu = dump_plat_mincpu_default;
 602  604  
 603  605          /* increase threshold for faster disks */
 604  606          new->threshold = dump_plat_mincpu;
 605  607          if (dumpbuf.iosize >= DUMP_1MB)
 606  608                  new->threshold *= 3;
 607  609          else if (dumpbuf.iosize >= (256 * DUMP_1KB))
 608  610                  new->threshold *= 2;
 609  611  
 610  612          /* figure compression level based upon the computed threshold. */
 611  613          if (dump_plat_mincpu == 0 || new->nhelper < 2) {
 612  614                  new->clevel = 0;
 613  615                  new->nhelper = 1;
 614  616          } else if ((new->nhelper + 1) >= new->threshold) {
 615  617                  new->clevel = DUMP_CLEVEL_BZIP2;
 616  618          } else {
 617  619                  new->clevel = DUMP_CLEVEL_LZJB;
 618  620          }
 619  621  
 620  622          if (new->clevel == 0) {
 621  623                  new->ncbuf = 1;
 622  624                  new->ncmap = 1;
 623  625          } else {
 624  626                  new->ncbuf = NCBUF_PER_HELPER * new->nhelper;
 625  627                  new->ncmap = NCMAP_PER_HELPER * new->nhelper;
 626  628          }
 627  629  
 628  630          /*
 629  631           * Allocate new data structures and buffers for MINHELPERS,
 630  632           * and also figure the max desired size.
 631  633           */
 632  634          bz2size = BZ2_bzCompressInitSize(dump_bzip2_level);
 633  635          new->maxsize = 0;
 634  636          new->maxvmsize = 0;
 635  637          new->maxvm = NULL;
 636  638          tag = 1;
 637  639          new->helper = kmem_zalloc(new->nhelper * sizeof (helper_t), KM_SLEEP);
 638  640          hpend = &new->helper[new->nhelper];
 639  641          for (hp = new->helper; hp != hpend; hp++) {
 640  642                  hp->tag = tag++;
 641  643                  if (hp < &new->helper[MINHELPERS]) {
 642  644                          hp->lzbuf = kmem_alloc(PAGESIZE, KM_SLEEP);
 643  645                          hp->page = kmem_alloc(PAGESIZE, KM_SLEEP);
 644  646                  } else if (new->clevel < DUMP_CLEVEL_BZIP2) {
 645  647                          new->maxsize += 2 * PAGESIZE;
 646  648                  } else {
 647  649                          new->maxsize += PAGESIZE;
 648  650                  }
 649  651                  if (new->clevel >= DUMP_CLEVEL_BZIP2)
 650  652                          new->maxsize += bz2size;
 651  653          }
 652  654  
 653  655          new->cbuf = kmem_zalloc(new->ncbuf * sizeof (cbuf_t), KM_SLEEP);
 654  656          cpend = &new->cbuf[new->ncbuf];
 655  657          for (cp = new->cbuf; cp != cpend; cp++) {
 656  658                  cp->state = CBUF_FREEBUF;
 657  659                  cp->size = CBUF_SIZE;
 658  660                  if (cp < &new->cbuf[MINCBUFS])
 659  661                          cp->buf = kmem_alloc(cp->size, KM_SLEEP);
 660  662                  else
 661  663                          new->maxsize += cp->size;
 662  664          }
 663  665  
 664  666          new->cmap = kmem_zalloc(new->ncmap * sizeof (cbuf_t), KM_SLEEP);
 665  667          cpend = &new->cmap[new->ncmap];
 666  668          for (cp = new->cmap; cp != cpend; cp++) {
 667  669                  cp->state = CBUF_FREEMAP;
 668  670                  cp->size = CBUF_MAPSIZE;
 669  671                  cp->buf = vmem_xalloc(heap_arena, CBUF_MAPSIZE, CBUF_MAPSIZE,
 670  672                      0, 0, NULL, NULL, VM_SLEEP);
 671  673          }
  
    | 
      ↓ open down ↓ | 
    520 lines elided | 
    
      ↑ open up ↑ | 
  
 672  674  
 673  675          /* reserve VA to be backed with spare pages at crash time */
 674  676          if (new->maxsize > 0) {
 675  677                  new->maxsize = P2ROUNDUP(new->maxsize, PAGESIZE);
 676  678                  new->maxvmsize = P2ROUNDUP(new->maxsize, CBUF_MAPSIZE);
 677  679                  new->maxvm = vmem_xalloc(heap_arena, new->maxvmsize,
 678  680                      CBUF_MAPSIZE, 0, 0, NULL, NULL, VM_SLEEP);
 679  681          }
 680  682  
 681  683          /*
 682      -         * Reserve memory for kmem allocation calls made during crash
 683      -         * dump.  The hat layer allocates memory for each mapping
 684      -         * created, and the I/O path allocates buffers and data structs.
 685      -         * Add a few pages for safety.
      684 +         * Reserve memory for kmem allocation calls made during crash dump.  The
      685 +         * hat layer allocates memory for each mapping created, and the I/O path
      686 +         * allocates buffers and data structs.
      687 +         *
      688 +         * On larger systems, we easily exceed the lower amount, so we need some
      689 +         * more space; the cut-over point is relatively arbitrary.  If we run
      690 +         * out, the only impact is that kmem state in the dump becomes
      691 +         * inconsistent.
 686  692           */
      693 +
      694 +        if (dump_kmem_pages == 0) {
      695 +                if (physmem > (16 * ONE_GIG) / PAGESIZE)
      696 +                        dump_kmem_pages = 20;
      697 +                else
      698 +                        dump_kmem_pages = 8;
      699 +        }
      700 +
 687  701          kmem_dump_init((new->ncmap * dump_kmem_permap) +
 688  702              (dump_kmem_pages * PAGESIZE));
 689  703  
 690  704          /* set new config pointers */
 691  705          *old = *new;
 692  706  }
 693  707  
 694  708  /*
 695  709   * Define a struct memlist walker to optimize bitnum to pfn
 696  710   * lookup. The walker maintains the state of the list traversal.
 697  711   */
 698  712  typedef struct dumpmlw {
 699  713          struct memlist  *mp;            /* current memlist */
 700  714          pgcnt_t         basenum;        /* bitnum base offset */
 701  715          pgcnt_t         mppages;        /* current memlist size */
 702  716          pgcnt_t         mpleft;         /* size to end of current memlist */
 703  717          pfn_t           mpaddr;         /* first pfn in memlist */
 704  718  } dumpmlw_t;
 705  719  
 706  720  /* initialize the walker */
 707  721  static inline void
 708  722  dump_init_memlist_walker(dumpmlw_t *pw)
 709  723  {
 710  724          pw->mp = phys_install;
 711  725          pw->basenum = 0;
 712  726          pw->mppages = pw->mp->ml_size >> PAGESHIFT;
 713  727          pw->mpleft = pw->mppages;
 714  728          pw->mpaddr = pw->mp->ml_address >> PAGESHIFT;
 715  729  }
 716  730  
 717  731  /*
 718  732   * Lookup pfn given bitnum. The memlist can be quite long on some
 719  733   * systems (e.g.: one per board). To optimize sequential lookups, the
 720  734   * caller initializes and presents a memlist walker.
 721  735   */
 722  736  static pfn_t
 723  737  dump_bitnum_to_pfn(pgcnt_t bitnum, dumpmlw_t *pw)
 724  738  {
 725  739          bitnum -= pw->basenum;
 726  740          while (pw->mp != NULL) {
 727  741                  if (bitnum < pw->mppages) {
 728  742                          pw->mpleft = pw->mppages - bitnum;
 729  743                          return (pw->mpaddr + bitnum);
 730  744                  }
 731  745                  bitnum -= pw->mppages;
 732  746                  pw->basenum += pw->mppages;
 733  747                  pw->mp = pw->mp->ml_next;
 734  748                  if (pw->mp != NULL) {
 735  749                          pw->mppages = pw->mp->ml_size >> PAGESHIFT;
 736  750                          pw->mpleft = pw->mppages;
 737  751                          pw->mpaddr = pw->mp->ml_address >> PAGESHIFT;
 738  752                  }
 739  753          }
 740  754          return (PFN_INVALID);
 741  755  }
 742  756  
 743  757  static pgcnt_t
 744  758  dump_pfn_to_bitnum(pfn_t pfn)
 745  759  {
 746  760          struct memlist *mp;
 747  761          pgcnt_t bitnum = 0;
 748  762  
 749  763          for (mp = phys_install; mp != NULL; mp = mp->ml_next) {
 750  764                  if (pfn >= (mp->ml_address >> PAGESHIFT) &&
 751  765                      pfn < ((mp->ml_address + mp->ml_size) >> PAGESHIFT))
 752  766                          return (bitnum + pfn - (mp->ml_address >> PAGESHIFT));
 753  767                  bitnum += mp->ml_size >> PAGESHIFT;
 754  768          }
 755  769          return ((pgcnt_t)-1);
 756  770  }
 757  771  
 758  772  /*
 759  773   * Set/test bitmap for a CBUF_MAPSIZE range which includes pfn. The
 760  774   * mapping of pfn to range index is imperfect because pfn and bitnum
 761  775   * do not have the same phase. To make sure a CBUF_MAPSIZE range is
 762  776   * covered, call this for both ends:
 763  777   *      dump_set_used(base)
 764  778   *      dump_set_used(base+CBUF_MAPNP-1)
 765  779   *
 766  780   * This is used during a panic dump to mark pages allocated by
 767  781   * dumpsys_get_maxmem(). The macro IS_DUMP_PAGE(pp) is used by
 768  782   * page_get_mnode_freelist() to make sure pages used by dump are never
 769  783   * allocated.
 770  784   */
 771  785  #define CBUF_MAPP2R(pfn)        ((pfn) >> (CBUF_MAPSHIFT - PAGESHIFT))
 772  786  
 773  787  static void
 774  788  dump_set_used(pfn_t pfn)
 775  789  {
 776  790  
 777  791          pgcnt_t bitnum, rbitnum;
 778  792  
 779  793          bitnum = dump_pfn_to_bitnum(pfn);
 780  794          ASSERT(bitnum != (pgcnt_t)-1);
 781  795  
 782  796          rbitnum = CBUF_MAPP2R(bitnum);
 783  797          ASSERT(rbitnum < dumpcfg.rbitmapsize);
 784  798  
 785  799          BT_SET(dumpcfg.rbitmap, rbitnum);
 786  800  }
 787  801  
 788  802  int
 789  803  dump_test_used(pfn_t pfn)
 790  804  {
 791  805          pgcnt_t bitnum, rbitnum;
 792  806  
 793  807          bitnum = dump_pfn_to_bitnum(pfn);
 794  808          ASSERT(bitnum != (pgcnt_t)-1);
 795  809  
 796  810          rbitnum = CBUF_MAPP2R(bitnum);
 797  811          ASSERT(rbitnum < dumpcfg.rbitmapsize);
 798  812  
 799  813          return (BT_TEST(dumpcfg.rbitmap, rbitnum));
 800  814  }
 801  815  
 802  816  /*
 803  817   * dumpbzalloc and dumpbzfree are callbacks from the bzip2 library.
 804  818   * dumpsys_get_maxmem() uses them for BZ2_bzCompressInit().
 805  819   */
 806  820  static void *
 807  821  dumpbzalloc(void *opaque, int items, int size)
 808  822  {
 809  823          size_t *sz;
 810  824          char *ret;
 811  825  
 812  826          ASSERT(opaque != NULL);
 813  827          sz = opaque;
 814  828          ret = dumpcfg.maxvm + *sz;
 815  829          *sz += items * size;
 816  830          *sz = P2ROUNDUP(*sz, BZ2_BZALLOC_ALIGN);
 817  831          ASSERT(*sz <= dumpcfg.maxvmsize);
 818  832          return (ret);
 819  833  }
 820  834  
 821  835  /*ARGSUSED*/
 822  836  static void
 823  837  dumpbzfree(void *opaque, void *addr)
 824  838  {
 825  839  }
 826  840  
 827  841  /*
 828  842   * Perform additional checks on the page to see if we can really use
 829  843   * it. The kernel (kas) pages are always set in the bitmap. However,
 830  844   * boot memory pages (prom_ppages or P_BOOTPAGES) are not in the
 831  845   * bitmap. So we check for them.
 832  846   */
 833  847  static inline int
 834  848  dump_pfn_check(pfn_t pfn)
 835  849  {
 836  850          page_t *pp = page_numtopp_nolock(pfn);
 837  851          if (pp == NULL || pp->p_pagenum != pfn ||
 838  852  #if defined(__sparc)
 839  853              pp->p_vnode == &promvp ||
 840  854  #else
 841  855              PP_ISBOOTPAGES(pp) ||
 842  856  #endif
 843  857              pp->p_toxic != 0)
 844  858                  return (0);
 845  859          return (1);
 846  860  }
 847  861  
 848  862  /*
 849  863   * Check a range to see if all contained pages are available and
 850  864   * return non-zero if the range can be used.
 851  865   */
 852  866  static inline int
 853  867  dump_range_check(pgcnt_t start, pgcnt_t end, pfn_t pfn)
 854  868  {
 855  869          for (; start < end; start++, pfn++) {
 856  870                  if (BT_TEST(dumpcfg.bitmap, start))
 857  871                          return (0);
 858  872                  if (!dump_pfn_check(pfn))
 859  873                          return (0);
 860  874          }
 861  875          return (1);
 862  876  }
 863  877  
 864  878  /*
 865  879   * dumpsys_get_maxmem() is called during panic. Find unused ranges
 866  880   * and use them for buffers. If we find enough memory switch to
 867  881   * parallel bzip2, otherwise use parallel lzjb.
 868  882   *
 869  883   * It searches the dump bitmap in 2 passes. The first time it looks
 870  884   * for CBUF_MAPSIZE ranges. On the second pass it uses small pages.
 871  885   */
 872  886  static void
 873  887  dumpsys_get_maxmem()
 874  888  {
 875  889          dumpcfg_t *cfg = &dumpcfg;
 876  890          cbuf_t *endcp = &cfg->cbuf[cfg->ncbuf];
 877  891          helper_t *endhp = &cfg->helper[cfg->nhelper];
 878  892          pgcnt_t bitnum, end;
 879  893          size_t sz, endsz, bz2size;
 880  894          pfn_t pfn, off;
 881  895          cbuf_t *cp;
 882  896          helper_t *hp, *ohp;
 883  897          dumpmlw_t mlw;
 884  898          int k;
 885  899  
 886  900          /*
 887  901           * Setting dump_plat_mincpu to 0 at any time forces a serial
 888  902           * dump.
 889  903           */
 890  904          if (dump_plat_mincpu == 0) {
 891  905                  cfg->clevel = 0;
 892  906                  return;
 893  907          }
 894  908  
 895  909          /*
 896  910           * There may be no point in looking for spare memory. If
 897  911           * dumping all memory, then none is spare. If doing a serial
 898  912           * dump, then already have buffers.
 899  913           */
 900  914          if (cfg->maxsize == 0 || cfg->clevel < DUMP_CLEVEL_LZJB ||
 901  915              (dump_conflags & DUMP_ALL) != 0) {
 902  916                  if (cfg->clevel > DUMP_CLEVEL_LZJB)
 903  917                          cfg->clevel = DUMP_CLEVEL_LZJB;
 904  918                  return;
 905  919          }
 906  920  
 907  921          sz = 0;
 908  922          cfg->found4m = 0;
 909  923          cfg->foundsm = 0;
 910  924  
 911  925          /* bitmap of ranges used to estimate which pfns are being used */
 912  926          bzero(dumpcfg.rbitmap, BT_SIZEOFMAP(dumpcfg.rbitmapsize));
 913  927  
 914  928          /* find ranges that are not being dumped to use for buffers */
 915  929          dump_init_memlist_walker(&mlw);
 916  930          for (bitnum = 0; bitnum < dumpcfg.bitmapsize; bitnum = end) {
 917  931                  dump_timeleft = dump_timeout;
 918  932                  end = bitnum + CBUF_MAPNP;
 919  933                  pfn = dump_bitnum_to_pfn(bitnum, &mlw);
 920  934                  ASSERT(pfn != PFN_INVALID);
 921  935  
 922  936                  /* skip partial range at end of mem segment */
 923  937                  if (mlw.mpleft < CBUF_MAPNP) {
 924  938                          end = bitnum + mlw.mpleft;
 925  939                          continue;
 926  940                  }
 927  941  
 928  942                  /* skip non aligned pages */
 929  943                  off = P2PHASE(pfn, CBUF_MAPNP);
 930  944                  if (off != 0) {
 931  945                          end -= off;
 932  946                          continue;
 933  947                  }
 934  948  
 935  949                  if (!dump_range_check(bitnum, end, pfn))
 936  950                          continue;
 937  951  
 938  952                  ASSERT((sz + CBUF_MAPSIZE) <= cfg->maxvmsize);
 939  953                  hat_devload(kas.a_hat, cfg->maxvm + sz, CBUF_MAPSIZE, pfn,
 940  954                      PROT_READ | PROT_WRITE, HAT_LOAD_NOCONSIST);
 941  955                  sz += CBUF_MAPSIZE;
 942  956                  cfg->found4m++;
 943  957  
 944  958                  /* set the bitmap for both ends to be sure to cover the range */
 945  959                  dump_set_used(pfn);
 946  960                  dump_set_used(pfn + CBUF_MAPNP - 1);
 947  961  
 948  962                  if (sz >= cfg->maxsize)
 949  963                          goto foundmax;
 950  964          }
 951  965  
 952  966          /* Add small pages if we can't find enough large pages. */
 953  967          dump_init_memlist_walker(&mlw);
 954  968          for (bitnum = 0; bitnum < dumpcfg.bitmapsize; bitnum = end) {
 955  969                  dump_timeleft = dump_timeout;
 956  970                  end = bitnum + CBUF_MAPNP;
 957  971                  pfn = dump_bitnum_to_pfn(bitnum, &mlw);
 958  972                  ASSERT(pfn != PFN_INVALID);
 959  973  
 960  974                  /* Find any non-aligned pages at start and end of segment. */
 961  975                  off = P2PHASE(pfn, CBUF_MAPNP);
 962  976                  if (mlw.mpleft < CBUF_MAPNP) {
 963  977                          end = bitnum + mlw.mpleft;
 964  978                  } else if (off != 0) {
 965  979                          end -= off;
 966  980                  } else if (cfg->found4m && dump_test_used(pfn)) {
 967  981                          continue;
 968  982                  }
 969  983  
 970  984                  for (; bitnum < end; bitnum++, pfn++) {
 971  985                          dump_timeleft = dump_timeout;
 972  986                          if (BT_TEST(dumpcfg.bitmap, bitnum))
 973  987                                  continue;
 974  988                          if (!dump_pfn_check(pfn))
 975  989                                  continue;
 976  990                          ASSERT((sz + PAGESIZE) <= cfg->maxvmsize);
 977  991                          hat_devload(kas.a_hat, cfg->maxvm + sz, PAGESIZE, pfn,
 978  992                              PROT_READ | PROT_WRITE, HAT_LOAD_NOCONSIST);
 979  993                          sz += PAGESIZE;
 980  994                          cfg->foundsm++;
 981  995                          dump_set_used(pfn);
 982  996                          if (sz >= cfg->maxsize)
 983  997                                  goto foundmax;
 984  998                  }
 985  999          }
 986 1000  
 987 1001          /* Fall back to lzjb if we did not get enough memory for bzip2. */
 988 1002          endsz = (cfg->maxsize * cfg->threshold) / cfg->nhelper;
 989 1003          if (sz < endsz) {
 990 1004                  cfg->clevel = DUMP_CLEVEL_LZJB;
 991 1005          }
 992 1006  
 993 1007          /* Allocate memory for as many helpers as we can. */
 994 1008  foundmax:
 995 1009  
 996 1010          /* Byte offsets into memory found and mapped above */
 997 1011          endsz = sz;
 998 1012          sz = 0;
 999 1013  
1000 1014          /* Set the size for bzip2 state. Only bzip2 needs it. */
1001 1015          bz2size = BZ2_bzCompressInitSize(dump_bzip2_level);
1002 1016  
1003 1017          /* Skip the preallocate output buffers. */
1004 1018          cp = &cfg->cbuf[MINCBUFS];
1005 1019  
1006 1020          /* Use this to move memory up from the preallocated helpers. */
1007 1021          ohp = cfg->helper;
1008 1022  
1009 1023          /* Loop over all helpers and allocate memory. */
1010 1024          for (hp = cfg->helper; hp < endhp; hp++) {
1011 1025  
1012 1026                  /* Skip preallocated helpers by checking hp->page. */
1013 1027                  if (hp->page == NULL) {
1014 1028                          if (cfg->clevel <= DUMP_CLEVEL_LZJB) {
1015 1029                                  /* lzjb needs 2 1-page buffers */
1016 1030                                  if ((sz + (2 * PAGESIZE)) > endsz)
1017 1031                                          break;
1018 1032                                  hp->page = cfg->maxvm + sz;
1019 1033                                  sz += PAGESIZE;
1020 1034                                  hp->lzbuf = cfg->maxvm + sz;
1021 1035                                  sz += PAGESIZE;
1022 1036  
1023 1037                          } else if (ohp->lzbuf != NULL) {
1024 1038                                  /* re-use the preallocted lzjb page for bzip2 */
1025 1039                                  hp->page = ohp->lzbuf;
1026 1040                                  ohp->lzbuf = NULL;
1027 1041                                  ++ohp;
1028 1042  
1029 1043                          } else {
1030 1044                                  /* bzip2 needs a 1-page buffer */
1031 1045                                  if ((sz + PAGESIZE) > endsz)
1032 1046                                          break;
1033 1047                                  hp->page = cfg->maxvm + sz;
1034 1048                                  sz += PAGESIZE;
1035 1049                          }
1036 1050                  }
1037 1051  
1038 1052                  /*
1039 1053                   * Add output buffers per helper. The number of
1040 1054                   * buffers per helper is determined by the ratio of
1041 1055                   * ncbuf to nhelper.
1042 1056                   */
1043 1057                  for (k = 0; cp < endcp && (sz + CBUF_SIZE) <= endsz &&
1044 1058                      k < NCBUF_PER_HELPER; k++) {
1045 1059                          cp->state = CBUF_FREEBUF;
1046 1060                          cp->size = CBUF_SIZE;
1047 1061                          cp->buf = cfg->maxvm + sz;
1048 1062                          sz += CBUF_SIZE;
1049 1063                          ++cp;
1050 1064                  }
1051 1065  
1052 1066                  /*
1053 1067                   * bzip2 needs compression state. Use the dumpbzalloc
1054 1068                   * and dumpbzfree callbacks to allocate the memory.
1055 1069                   * bzip2 does allocation only at init time.
1056 1070                   */
1057 1071                  if (cfg->clevel >= DUMP_CLEVEL_BZIP2) {
1058 1072                          if ((sz + bz2size) > endsz) {
1059 1073                                  hp->page = NULL;
1060 1074                                  break;
1061 1075                          } else {
1062 1076                                  hp->bzstream.opaque = &sz;
1063 1077                                  hp->bzstream.bzalloc = dumpbzalloc;
1064 1078                                  hp->bzstream.bzfree = dumpbzfree;
1065 1079                                  (void) BZ2_bzCompressInit(&hp->bzstream,
1066 1080                                      dump_bzip2_level, 0, 0);
1067 1081                                  hp->bzstream.opaque = NULL;
1068 1082                          }
1069 1083                  }
1070 1084          }
1071 1085  
1072 1086          /* Finish allocating output buffers */
1073 1087          for (; cp < endcp && (sz + CBUF_SIZE) <= endsz; cp++) {
1074 1088                  cp->state = CBUF_FREEBUF;
1075 1089                  cp->size = CBUF_SIZE;
1076 1090                  cp->buf = cfg->maxvm + sz;
1077 1091                  sz += CBUF_SIZE;
1078 1092          }
1079 1093  
1080 1094          /* Enable IS_DUMP_PAGE macro, which checks for pages we took. */
1081 1095          if (cfg->found4m || cfg->foundsm)
1082 1096                  dump_check_used = 1;
1083 1097  
1084 1098          ASSERT(sz <= endsz);
1085 1099  }
1086 1100  
1087 1101  static void
1088 1102  dumphdr_init(void)
1089 1103  {
1090 1104          pgcnt_t npages = 0;
1091 1105  
1092 1106          ASSERT(MUTEX_HELD(&dump_lock));
1093 1107  
1094 1108          if (dumphdr == NULL) {
1095 1109                  dumphdr = kmem_zalloc(sizeof (dumphdr_t), KM_SLEEP);
1096 1110                  dumphdr->dump_magic = DUMP_MAGIC;
1097 1111                  dumphdr->dump_version = DUMP_VERSION;
1098 1112                  dumphdr->dump_wordsize = DUMP_WORDSIZE;
1099 1113                  dumphdr->dump_pageshift = PAGESHIFT;
1100 1114                  dumphdr->dump_pagesize = PAGESIZE;
1101 1115                  dumphdr->dump_utsname = utsname;
1102 1116                  (void) strcpy(dumphdr->dump_platform, platform);
1103 1117                  dumpbuf.size = dumpbuf_iosize(maxphys);
1104 1118                  dumpbuf.start = kmem_alloc(dumpbuf.size, KM_SLEEP);
1105 1119                  dumpbuf.end = dumpbuf.start + dumpbuf.size;
1106 1120                  dumpcfg.pids = kmem_alloc(v.v_proc * sizeof (pid_t), KM_SLEEP);
1107 1121                  dumpcfg.helpermap = kmem_zalloc(BT_SIZEOFMAP(NCPU), KM_SLEEP);
1108 1122                  LOCK_INIT_HELD(&dumpcfg.helper_lock);
1109 1123                  dump_stack_scratch = kmem_alloc(STACK_BUF_SIZE, KM_SLEEP);
1110 1124                  (void) strncpy(dumphdr->dump_uuid, dump_get_uuid(),
1111 1125                      sizeof (dumphdr->dump_uuid));
1112 1126          }
1113 1127  
1114 1128          npages = num_phys_pages();
1115 1129  
1116 1130          if (dumpcfg.bitmapsize != npages) {
1117 1131                  size_t rlen = CBUF_MAPP2R(P2ROUNDUP(npages, CBUF_MAPNP));
1118 1132                  void *map = kmem_alloc(BT_SIZEOFMAP(npages), KM_SLEEP);
1119 1133                  void *rmap = kmem_alloc(BT_SIZEOFMAP(rlen), KM_SLEEP);
1120 1134  
1121 1135                  if (dumpcfg.bitmap != NULL)
1122 1136                          kmem_free(dumpcfg.bitmap, BT_SIZEOFMAP(dumpcfg.
1123 1137                              bitmapsize));
1124 1138                  if (dumpcfg.rbitmap != NULL)
1125 1139                          kmem_free(dumpcfg.rbitmap, BT_SIZEOFMAP(dumpcfg.
1126 1140                              rbitmapsize));
1127 1141                  dumpcfg.bitmap = map;
1128 1142                  dumpcfg.bitmapsize = npages;
1129 1143                  dumpcfg.rbitmap = rmap;
1130 1144                  dumpcfg.rbitmapsize = rlen;
1131 1145          }
1132 1146  }
1133 1147  
1134 1148  /*
1135 1149   * Establish a new dump device.
1136 1150   */
1137 1151  int
1138 1152  dumpinit(vnode_t *vp, char *name, int justchecking)
1139 1153  {
1140 1154          vnode_t *cvp;
1141 1155          vattr_t vattr;
1142 1156          vnode_t *cdev_vp;
1143 1157          int error = 0;
1144 1158  
1145 1159          ASSERT(MUTEX_HELD(&dump_lock));
1146 1160  
1147 1161          dumphdr_init();
1148 1162  
1149 1163          cvp = common_specvp(vp);
1150 1164          if (cvp == dumpvp)
1151 1165                  return (0);
1152 1166  
1153 1167          /*
1154 1168           * Determine whether this is a plausible dump device.  We want either:
1155 1169           * (1) a real device that's not mounted and has a cb_dump routine, or
1156 1170           * (2) a swapfile on some filesystem that has a vop_dump routine.
1157 1171           */
1158 1172          if ((error = VOP_OPEN(&cvp, FREAD | FWRITE, kcred, NULL)) != 0)
1159 1173                  return (error);
1160 1174  
1161 1175          vattr.va_mask = AT_SIZE | AT_TYPE | AT_RDEV;
1162 1176          if ((error = VOP_GETATTR(cvp, &vattr, 0, kcred, NULL)) == 0) {
1163 1177                  if (vattr.va_type == VBLK || vattr.va_type == VCHR) {
1164 1178                          if (devopsp[getmajor(vattr.va_rdev)]->
1165 1179                              devo_cb_ops->cb_dump == nodev)
1166 1180                                  error = ENOTSUP;
1167 1181                          else if (vfs_devismounted(vattr.va_rdev))
1168 1182                                  error = EBUSY;
1169 1183                          if (strcmp(ddi_driver_name(VTOS(cvp)->s_dip),
1170 1184                              ZFS_DRIVER) == 0 &&
1171 1185                              IS_SWAPVP(common_specvp(cvp)))
1172 1186                                          error = EBUSY;
1173 1187                  } else {
1174 1188                          if (vn_matchopval(cvp, VOPNAME_DUMP, fs_nosys) ||
1175 1189                              !IS_SWAPVP(cvp))
1176 1190                                  error = ENOTSUP;
1177 1191                  }
1178 1192          }
1179 1193  
1180 1194          if (error == 0 && vattr.va_size < 2 * DUMP_LOGSIZE + DUMP_ERPTSIZE)
1181 1195                  error = ENOSPC;
1182 1196  
1183 1197          if (error || justchecking) {
1184 1198                  (void) VOP_CLOSE(cvp, FREAD | FWRITE, 1, (offset_t)0,
1185 1199                      kcred, NULL);
1186 1200                  return (error);
1187 1201          }
1188 1202  
1189 1203          VN_HOLD(cvp);
1190 1204  
1191 1205          if (dumpvp != NULL)
1192 1206                  dumpfini();     /* unconfigure the old dump device */
1193 1207  
1194 1208          dumpvp = cvp;
1195 1209          dumpvp_size = vattr.va_size & -DUMP_OFFSET;
1196 1210          dumppath = kmem_alloc(strlen(name) + 1, KM_SLEEP);
1197 1211          (void) strcpy(dumppath, name);
1198 1212          dumpbuf.iosize = 0;
1199 1213  
1200 1214          /*
1201 1215           * If the dump device is a block device, attempt to open up the
1202 1216           * corresponding character device and determine its maximum transfer
1203 1217           * size.  We use this information to potentially resize dumpbuf to a
1204 1218           * larger and more optimal size for performing i/o to the dump device.
1205 1219           */
1206 1220          if (cvp->v_type == VBLK &&
1207 1221              (cdev_vp = makespecvp(VTOS(cvp)->s_dev, VCHR)) != NULL) {
1208 1222                  if (VOP_OPEN(&cdev_vp, FREAD | FWRITE, kcred, NULL) == 0) {
1209 1223                          size_t blk_size;
1210 1224                          struct dk_cinfo dki;
1211 1225                          struct dk_minfo minf;
1212 1226  
1213 1227                          if (VOP_IOCTL(cdev_vp, DKIOCGMEDIAINFO,
1214 1228                              (intptr_t)&minf, FKIOCTL, kcred, NULL, NULL)
1215 1229                              == 0 && minf.dki_lbsize != 0)
1216 1230                                  blk_size = minf.dki_lbsize;
1217 1231                          else
1218 1232                                  blk_size = DEV_BSIZE;
1219 1233  
1220 1234                          if (VOP_IOCTL(cdev_vp, DKIOCINFO, (intptr_t)&dki,
1221 1235                              FKIOCTL, kcred, NULL, NULL) == 0) {
1222 1236                                  dumpbuf.iosize = dki.dki_maxtransfer * blk_size;
1223 1237                                  dumpbuf_resize();
1224 1238                          }
1225 1239                          /*
1226 1240                           * If we are working with a zvol then dumpify it
1227 1241                           * if it's not being used as swap.
1228 1242                           */
1229 1243                          if (strcmp(dki.dki_dname, ZVOL_DRIVER) == 0) {
1230 1244                                  if (IS_SWAPVP(common_specvp(cvp)))
1231 1245                                          error = EBUSY;
1232 1246                                  else if ((error = VOP_IOCTL(cdev_vp,
1233 1247                                      DKIOCDUMPINIT, NULL, FKIOCTL, kcred,
1234 1248                                      NULL, NULL)) != 0)
1235 1249                                          dumpfini();
1236 1250                          }
1237 1251  
1238 1252                          (void) VOP_CLOSE(cdev_vp, FREAD | FWRITE, 1, 0,
1239 1253                              kcred, NULL);
1240 1254                  }
1241 1255  
1242 1256                  VN_RELE(cdev_vp);
1243 1257          }
1244 1258  
1245 1259          cmn_err(CE_CONT, "?dump on %s size %llu MB\n", name, dumpvp_size >> 20);
1246 1260  
1247 1261          dump_update_clevel();
1248 1262  
1249 1263          return (error);
1250 1264  }
1251 1265  
1252 1266  void
1253 1267  dumpfini(void)
1254 1268  {
1255 1269          vattr_t vattr;
1256 1270          boolean_t is_zfs = B_FALSE;
1257 1271          vnode_t *cdev_vp;
1258 1272          ASSERT(MUTEX_HELD(&dump_lock));
1259 1273  
1260 1274          kmem_free(dumppath, strlen(dumppath) + 1);
1261 1275  
1262 1276          /*
1263 1277           * Determine if we are using zvols for our dump device
1264 1278           */
1265 1279          vattr.va_mask = AT_RDEV;
1266 1280          if (VOP_GETATTR(dumpvp, &vattr, 0, kcred, NULL) == 0) {
1267 1281                  is_zfs = (getmajor(vattr.va_rdev) ==
1268 1282                      ddi_name_to_major(ZFS_DRIVER)) ? B_TRUE : B_FALSE;
1269 1283          }
1270 1284  
1271 1285          /*
1272 1286           * If we have a zvol dump device then we call into zfs so
1273 1287           * that it may have a chance to cleanup.
1274 1288           */
1275 1289          if (is_zfs &&
1276 1290              (cdev_vp = makespecvp(VTOS(dumpvp)->s_dev, VCHR)) != NULL) {
1277 1291                  if (VOP_OPEN(&cdev_vp, FREAD | FWRITE, kcred, NULL) == 0) {
1278 1292                          (void) VOP_IOCTL(cdev_vp, DKIOCDUMPFINI, NULL, FKIOCTL,
1279 1293                              kcred, NULL, NULL);
1280 1294                          (void) VOP_CLOSE(cdev_vp, FREAD | FWRITE, 1, 0,
1281 1295                              kcred, NULL);
1282 1296                  }
1283 1297                  VN_RELE(cdev_vp);
1284 1298          }
1285 1299  
1286 1300          (void) VOP_CLOSE(dumpvp, FREAD | FWRITE, 1, (offset_t)0, kcred, NULL);
1287 1301  
1288 1302          VN_RELE(dumpvp);
1289 1303  
1290 1304          dumpvp = NULL;
1291 1305          dumpvp_size = 0;
1292 1306          dumppath = NULL;
1293 1307  }
1294 1308  
1295 1309  static offset_t
1296 1310  dumpvp_flush(void)
1297 1311  {
1298 1312          size_t size = P2ROUNDUP(dumpbuf.cur - dumpbuf.start, PAGESIZE);
1299 1313          hrtime_t iotime;
1300 1314          int err;
1301 1315  
1302 1316          if (dumpbuf.vp_off + size > dumpbuf.vp_limit) {
1303 1317                  dump_ioerr = ENOSPC;
1304 1318                  dumpbuf.vp_off = dumpbuf.vp_limit;
1305 1319          } else if (size != 0) {
1306 1320                  iotime = gethrtime();
1307 1321                  dumpsync.iowait += iotime - dumpsync.iowaitts;
1308 1322                  if (panicstr)
1309 1323                          err = VOP_DUMP(dumpvp, dumpbuf.start,
1310 1324                              lbtodb(dumpbuf.vp_off), btod(size), NULL);
1311 1325                  else
1312 1326                          err = vn_rdwr(UIO_WRITE, dumpbuf.cdev_vp != NULL ?
1313 1327                              dumpbuf.cdev_vp : dumpvp, dumpbuf.start, size,
1314 1328                              dumpbuf.vp_off, UIO_SYSSPACE, 0, dumpbuf.vp_limit,
1315 1329                              kcred, 0);
1316 1330                  if (err && dump_ioerr == 0)
1317 1331                          dump_ioerr = err;
1318 1332                  dumpsync.iowaitts = gethrtime();
1319 1333                  dumpsync.iotime += dumpsync.iowaitts - iotime;
1320 1334                  dumpsync.nwrite += size;
1321 1335                  dumpbuf.vp_off += size;
1322 1336          }
1323 1337          dumpbuf.cur = dumpbuf.start;
1324 1338          dump_timeleft = dump_timeout;
1325 1339          return (dumpbuf.vp_off);
1326 1340  }
1327 1341  
1328 1342  /* maximize write speed by keeping seek offset aligned with size */
1329 1343  void
1330 1344  dumpvp_write(const void *va, size_t size)
1331 1345  {
1332 1346          size_t len, off, sz;
1333 1347  
1334 1348          while (size != 0) {
1335 1349                  len = MIN(size, dumpbuf.end - dumpbuf.cur);
1336 1350                  if (len == 0) {
1337 1351                          off = P2PHASE(dumpbuf.vp_off, dumpbuf.size);
1338 1352                          if (off == 0 || !ISP2(dumpbuf.size)) {
1339 1353                                  (void) dumpvp_flush();
1340 1354                          } else {
1341 1355                                  sz = dumpbuf.size - off;
1342 1356                                  dumpbuf.cur = dumpbuf.start + sz;
1343 1357                                  (void) dumpvp_flush();
1344 1358                                  ovbcopy(dumpbuf.start + sz, dumpbuf.start, off);
1345 1359                                  dumpbuf.cur += off;
1346 1360                          }
1347 1361                  } else {
1348 1362                          bcopy(va, dumpbuf.cur, len);
1349 1363                          va = (char *)va + len;
1350 1364                          dumpbuf.cur += len;
1351 1365                          size -= len;
1352 1366                  }
1353 1367          }
1354 1368  }
1355 1369  
1356 1370  /*ARGSUSED*/
1357 1371  static void
1358 1372  dumpvp_ksyms_write(const void *src, void *dst, size_t size)
1359 1373  {
1360 1374          dumpvp_write(src, size);
1361 1375  }
1362 1376  
1363 1377  /*
1364 1378   * Mark 'pfn' in the bitmap and dump its translation table entry.
1365 1379   */
1366 1380  void
1367 1381  dump_addpage(struct as *as, void *va, pfn_t pfn)
1368 1382  {
1369 1383          mem_vtop_t mem_vtop;
1370 1384          pgcnt_t bitnum;
1371 1385  
1372 1386          if ((bitnum = dump_pfn_to_bitnum(pfn)) != (pgcnt_t)-1) {
1373 1387                  if (!BT_TEST(dumpcfg.bitmap, bitnum)) {
1374 1388                          dumphdr->dump_npages++;
1375 1389                          BT_SET(dumpcfg.bitmap, bitnum);
1376 1390                  }
1377 1391                  dumphdr->dump_nvtop++;
1378 1392                  mem_vtop.m_as = as;
1379 1393                  mem_vtop.m_va = va;
1380 1394                  mem_vtop.m_pfn = pfn;
1381 1395                  dumpvp_write(&mem_vtop, sizeof (mem_vtop_t));
1382 1396          }
1383 1397          dump_timeleft = dump_timeout;
1384 1398  }
1385 1399  
1386 1400  /*
1387 1401   * Mark 'pfn' in the bitmap
1388 1402   */
1389 1403  void
1390 1404  dump_page(pfn_t pfn)
1391 1405  {
1392 1406          pgcnt_t bitnum;
1393 1407  
1394 1408          if ((bitnum = dump_pfn_to_bitnum(pfn)) != (pgcnt_t)-1) {
1395 1409                  if (!BT_TEST(dumpcfg.bitmap, bitnum)) {
1396 1410                          dumphdr->dump_npages++;
1397 1411                          BT_SET(dumpcfg.bitmap, bitnum);
1398 1412                  }
1399 1413          }
1400 1414          dump_timeleft = dump_timeout;
1401 1415  }
1402 1416  
1403 1417  /*
1404 1418   * Dump the <as, va, pfn> information for a given address space.
1405 1419   * SEGOP_DUMP() will call dump_addpage() for each page in the segment.
1406 1420   */
1407 1421  static void
1408 1422  dump_as(struct as *as)
1409 1423  {
1410 1424          struct seg *seg;
1411 1425  
1412 1426          AS_LOCK_ENTER(as, RW_READER);
1413 1427          for (seg = AS_SEGFIRST(as); seg; seg = AS_SEGNEXT(as, seg)) {
1414 1428                  if (seg->s_as != as)
1415 1429                          break;
1416 1430                  if (seg->s_ops == NULL)
1417 1431                          continue;
1418 1432                  SEGOP_DUMP(seg);
1419 1433          }
1420 1434          AS_LOCK_EXIT(as);
1421 1435  
1422 1436          if (seg != NULL)
1423 1437                  cmn_err(CE_WARN, "invalid segment %p in address space %p",
1424 1438                      (void *)seg, (void *)as);
1425 1439  }
1426 1440  
1427 1441  static int
1428 1442  dump_process(pid_t pid)
1429 1443  {
1430 1444          proc_t *p = sprlock(pid);
1431 1445  
1432 1446          if (p == NULL)
1433 1447                  return (-1);
1434 1448          if (p->p_as != &kas) {
1435 1449                  mutex_exit(&p->p_lock);
1436 1450                  dump_as(p->p_as);
1437 1451                  mutex_enter(&p->p_lock);
1438 1452          }
1439 1453  
1440 1454          sprunlock(p);
1441 1455  
1442 1456          return (0);
1443 1457  }
1444 1458  
1445 1459  /*
1446 1460   * The following functions (dump_summary(), dump_ereports(), and
1447 1461   * dump_messages()), write data to an uncompressed area within the
1448 1462   * crashdump. The layout of these is
1449 1463   *
1450 1464   * +------------------------------------------------------------+
1451 1465   * |     compressed pages       | summary | ereports | messages |
1452 1466   * +------------------------------------------------------------+
1453 1467   *
1454 1468   * With the advent of saving a compressed crash dump by default, we
1455 1469   * need to save a little more data to describe the failure mode in
1456 1470   * an uncompressed buffer available before savecore uncompresses
1457 1471   * the dump. Initially this is a copy of the stack trace. Additional
1458 1472   * summary information should be added here.
1459 1473   */
1460 1474  
1461 1475  void
1462 1476  dump_summary(void)
1463 1477  {
1464 1478          u_offset_t dumpvp_start;
1465 1479          summary_dump_t sd;
1466 1480  
1467 1481          if (dumpvp == NULL || dumphdr == NULL)
1468 1482                  return;
1469 1483  
1470 1484          dumpbuf.cur = dumpbuf.start;
1471 1485  
1472 1486          dumpbuf.vp_limit = dumpvp_size - (DUMP_OFFSET + DUMP_LOGSIZE +
1473 1487              DUMP_ERPTSIZE);
1474 1488          dumpvp_start = dumpbuf.vp_limit - DUMP_SUMMARYSIZE;
1475 1489          dumpbuf.vp_off = dumpvp_start;
1476 1490  
1477 1491          sd.sd_magic = SUMMARY_MAGIC;
1478 1492          sd.sd_ssum = checksum32(dump_stack_scratch, STACK_BUF_SIZE);
1479 1493          dumpvp_write(&sd, sizeof (sd));
1480 1494          dumpvp_write(dump_stack_scratch, STACK_BUF_SIZE);
1481 1495  
1482 1496          sd.sd_magic = 0; /* indicate end of summary */
1483 1497          dumpvp_write(&sd, sizeof (sd));
1484 1498          (void) dumpvp_flush();
1485 1499  }
1486 1500  
1487 1501  void
1488 1502  dump_ereports(void)
1489 1503  {
1490 1504          u_offset_t dumpvp_start;
1491 1505          erpt_dump_t ed;
1492 1506  
1493 1507          if (dumpvp == NULL || dumphdr == NULL)
1494 1508                  return;
1495 1509  
1496 1510          dumpbuf.cur = dumpbuf.start;
1497 1511          dumpbuf.vp_limit = dumpvp_size - (DUMP_OFFSET + DUMP_LOGSIZE);
1498 1512          dumpvp_start = dumpbuf.vp_limit - DUMP_ERPTSIZE;
1499 1513          dumpbuf.vp_off = dumpvp_start;
1500 1514  
1501 1515          fm_ereport_dump();
1502 1516          if (panicstr)
1503 1517                  errorq_dump();
1504 1518  
1505 1519          bzero(&ed, sizeof (ed)); /* indicate end of ereports */
1506 1520          dumpvp_write(&ed, sizeof (ed));
1507 1521          (void) dumpvp_flush();
1508 1522  
1509 1523          if (!panicstr) {
1510 1524                  (void) VOP_PUTPAGE(dumpvp, dumpvp_start,
1511 1525                      (size_t)(dumpbuf.vp_off - dumpvp_start),
1512 1526                      B_INVAL | B_FORCE, kcred, NULL);
1513 1527          }
1514 1528  }
1515 1529  
1516 1530  void
1517 1531  dump_messages(void)
1518 1532  {
1519 1533          log_dump_t ld;
1520 1534          mblk_t *mctl, *mdata;
1521 1535          queue_t *q, *qlast;
1522 1536          u_offset_t dumpvp_start;
1523 1537  
1524 1538          if (dumpvp == NULL || dumphdr == NULL || log_consq == NULL)
1525 1539                  return;
1526 1540  
1527 1541          dumpbuf.cur = dumpbuf.start;
1528 1542          dumpbuf.vp_limit = dumpvp_size - DUMP_OFFSET;
1529 1543          dumpvp_start = dumpbuf.vp_limit - DUMP_LOGSIZE;
1530 1544          dumpbuf.vp_off = dumpvp_start;
1531 1545  
1532 1546          qlast = NULL;
1533 1547          do {
1534 1548                  for (q = log_consq; q->q_next != qlast; q = q->q_next)
1535 1549                          continue;
1536 1550                  for (mctl = q->q_first; mctl != NULL; mctl = mctl->b_next) {
1537 1551                          dump_timeleft = dump_timeout;
1538 1552                          mdata = mctl->b_cont;
1539 1553                          ld.ld_magic = LOG_MAGIC;
1540 1554                          ld.ld_msgsize = MBLKL(mctl->b_cont);
1541 1555                          ld.ld_csum = checksum32(mctl->b_rptr, MBLKL(mctl));
1542 1556                          ld.ld_msum = checksum32(mdata->b_rptr, MBLKL(mdata));
1543 1557                          dumpvp_write(&ld, sizeof (ld));
1544 1558                          dumpvp_write(mctl->b_rptr, MBLKL(mctl));
1545 1559                          dumpvp_write(mdata->b_rptr, MBLKL(mdata));
1546 1560                  }
1547 1561          } while ((qlast = q) != log_consq);
1548 1562  
1549 1563          ld.ld_magic = 0;                /* indicate end of messages */
1550 1564          dumpvp_write(&ld, sizeof (ld));
1551 1565          (void) dumpvp_flush();
1552 1566          if (!panicstr) {
1553 1567                  (void) VOP_PUTPAGE(dumpvp, dumpvp_start,
1554 1568                      (size_t)(dumpbuf.vp_off - dumpvp_start),
1555 1569                      B_INVAL | B_FORCE, kcred, NULL);
1556 1570          }
1557 1571  }
1558 1572  
1559 1573  /*
1560 1574   * The following functions are called on multiple CPUs during dump.
1561 1575   * They must not use most kernel services, because all cross-calls are
1562 1576   * disabled during panic. Therefore, blocking locks and cache flushes
1563 1577   * will not work.
1564 1578   */
1565 1579  
1566 1580  /*
1567 1581   * Copy pages, trapping ECC errors. Also, for robustness, trap data
1568 1582   * access in case something goes wrong in the hat layer and the
1569 1583   * mapping is broken.
1570 1584   */
1571 1585  static int
1572 1586  dump_pagecopy(void *src, void *dst)
1573 1587  {
1574 1588          long *wsrc = (long *)src;
1575 1589          long *wdst = (long *)dst;
1576 1590          const ulong_t ncopies = PAGESIZE / sizeof (long);
1577 1591          volatile int w = 0;
1578 1592          volatile int ueoff = -1;
1579 1593          on_trap_data_t otd;
1580 1594  
1581 1595          if (on_trap(&otd, OT_DATA_EC | OT_DATA_ACCESS)) {
1582 1596                  if (ueoff == -1)
1583 1597                          ueoff = w * sizeof (long);
1584 1598                  /* report "bad ECC" or "bad address" */
1585 1599  #ifdef _LP64
1586 1600                  if (otd.ot_trap & OT_DATA_EC)
1587 1601                          wdst[w++] = 0x00badecc00badecc;
1588 1602                  else
1589 1603                          wdst[w++] = 0x00badadd00badadd;
1590 1604  #else
1591 1605                  if (otd.ot_trap & OT_DATA_EC)
1592 1606                          wdst[w++] = 0x00badecc;
1593 1607                  else
1594 1608                          wdst[w++] = 0x00badadd;
1595 1609  #endif
1596 1610          }
1597 1611          while (w < ncopies) {
1598 1612                  wdst[w] = wsrc[w];
1599 1613                  w++;
1600 1614          }
1601 1615          no_trap();
1602 1616          return (ueoff);
1603 1617  }
1604 1618  
1605 1619  static void
1606 1620  dumpsys_close_cq(cqueue_t *cq, int live)
1607 1621  {
1608 1622          if (live) {
1609 1623                  mutex_enter(&cq->mutex);
1610 1624                  atomic_dec_uint(&cq->open);
1611 1625                  cv_signal(&cq->cv);
1612 1626                  mutex_exit(&cq->mutex);
1613 1627          } else {
1614 1628                  atomic_dec_uint(&cq->open);
1615 1629          }
1616 1630  }
1617 1631  
1618 1632  static inline void
1619 1633  dumpsys_spinlock(lock_t *lp)
1620 1634  {
1621 1635          uint_t backoff = 0;
1622 1636          int loop_count = 0;
1623 1637  
1624 1638          while (LOCK_HELD(lp) || !lock_spin_try(lp)) {
1625 1639                  if (++loop_count >= ncpus) {
1626 1640                          backoff = mutex_lock_backoff(0);
1627 1641                          loop_count = 0;
1628 1642                  } else {
1629 1643                          backoff = mutex_lock_backoff(backoff);
1630 1644                  }
1631 1645                  mutex_lock_delay(backoff);
1632 1646          }
1633 1647  }
1634 1648  
1635 1649  static inline void
1636 1650  dumpsys_spinunlock(lock_t *lp)
1637 1651  {
1638 1652          lock_clear(lp);
1639 1653  }
1640 1654  
1641 1655  static inline void
1642 1656  dumpsys_lock(cqueue_t *cq, int live)
1643 1657  {
1644 1658          if (live)
1645 1659                  mutex_enter(&cq->mutex);
1646 1660          else
1647 1661                  dumpsys_spinlock(&cq->spinlock);
1648 1662  }
1649 1663  
1650 1664  static inline void
1651 1665  dumpsys_unlock(cqueue_t *cq, int live, int signal)
1652 1666  {
1653 1667          if (live) {
1654 1668                  if (signal)
1655 1669                          cv_signal(&cq->cv);
1656 1670                  mutex_exit(&cq->mutex);
1657 1671          } else {
1658 1672                  dumpsys_spinunlock(&cq->spinlock);
1659 1673          }
1660 1674  }
1661 1675  
1662 1676  static void
1663 1677  dumpsys_wait_cq(cqueue_t *cq, int live)
1664 1678  {
1665 1679          if (live) {
1666 1680                  cv_wait(&cq->cv, &cq->mutex);
1667 1681          } else {
1668 1682                  dumpsys_spinunlock(&cq->spinlock);
1669 1683                  while (cq->open)
1670 1684                          if (cq->first)
1671 1685                                  break;
1672 1686                  dumpsys_spinlock(&cq->spinlock);
1673 1687          }
1674 1688  }
1675 1689  
1676 1690  static void
1677 1691  dumpsys_put_cq(cqueue_t *cq, cbuf_t *cp, int newstate, int live)
1678 1692  {
1679 1693          if (cp == NULL)
1680 1694                  return;
1681 1695  
1682 1696          dumpsys_lock(cq, live);
1683 1697  
1684 1698          if (cq->ts != 0) {
1685 1699                  cq->empty += gethrtime() - cq->ts;
1686 1700                  cq->ts = 0;
1687 1701          }
1688 1702  
1689 1703          cp->state = newstate;
1690 1704          cp->next = NULL;
1691 1705          if (cq->last == NULL)
1692 1706                  cq->first = cp;
1693 1707          else
1694 1708                  cq->last->next = cp;
1695 1709          cq->last = cp;
1696 1710  
1697 1711          dumpsys_unlock(cq, live, 1);
1698 1712  }
1699 1713  
1700 1714  static cbuf_t *
1701 1715  dumpsys_get_cq(cqueue_t *cq, int live)
1702 1716  {
1703 1717          cbuf_t *cp;
1704 1718          hrtime_t now = gethrtime();
1705 1719  
1706 1720          dumpsys_lock(cq, live);
1707 1721  
1708 1722          /* CONSTCOND */
1709 1723          while (1) {
1710 1724                  cp = (cbuf_t *)cq->first;
1711 1725                  if (cp == NULL) {
1712 1726                          if (cq->open == 0)
1713 1727                                  break;
1714 1728                          dumpsys_wait_cq(cq, live);
1715 1729                          continue;
1716 1730                  }
1717 1731                  cq->first = cp->next;
1718 1732                  if (cq->first == NULL) {
1719 1733                          cq->last = NULL;
1720 1734                          cq->ts = now;
1721 1735                  }
1722 1736                  break;
1723 1737          }
1724 1738  
1725 1739          dumpsys_unlock(cq, live, cq->first != NULL || cq->open == 0);
1726 1740          return (cp);
1727 1741  }
1728 1742  
1729 1743  /*
1730 1744   * Send an error message to the console. If the main task is running
1731 1745   * just write the message via uprintf. If a helper is running the
1732 1746   * message has to be put on a queue for the main task. Setting fmt to
1733 1747   * NULL means flush the error message buffer. If fmt is not NULL, just
1734 1748   * add the text to the existing buffer.
1735 1749   */
1736 1750  static void
1737 1751  dumpsys_errmsg(helper_t *hp, const char *fmt, ...)
1738 1752  {
1739 1753          dumpsync_t *ds = hp->ds;
1740 1754          cbuf_t *cp = hp->cperr;
1741 1755          va_list adx;
1742 1756  
1743 1757          if (hp->helper == MAINHELPER) {
1744 1758                  if (fmt != NULL) {
1745 1759                          if (ds->neednl) {
1746 1760                                  uprintf("\n");
1747 1761                                  ds->neednl = 0;
1748 1762                          }
1749 1763                          va_start(adx, fmt);
1750 1764                          vuprintf(fmt, adx);
1751 1765                          va_end(adx);
1752 1766                  }
1753 1767          } else if (fmt == NULL) {
1754 1768                  if (cp != NULL) {
1755 1769                          CQ_PUT(mainq, cp, CBUF_ERRMSG);
1756 1770                          hp->cperr = NULL;
1757 1771                  }
1758 1772          } else {
1759 1773                  if (hp->cperr == NULL) {
1760 1774                          cp = CQ_GET(freebufq);
1761 1775                          hp->cperr = cp;
1762 1776                          cp->used = 0;
1763 1777                  }
1764 1778                  va_start(adx, fmt);
1765 1779                  cp->used += vsnprintf(cp->buf + cp->used, cp->size - cp->used,
1766 1780                      fmt, adx);
1767 1781                  va_end(adx);
1768 1782                  if ((cp->used + LOG_MSGSIZE) > cp->size) {
1769 1783                          CQ_PUT(mainq, cp, CBUF_ERRMSG);
1770 1784                          hp->cperr = NULL;
1771 1785                  }
1772 1786          }
1773 1787  }
1774 1788  
1775 1789  /*
1776 1790   * Write an output buffer to the dump file. If the main task is
1777 1791   * running just write the data. If a helper is running the output is
1778 1792   * placed on a queue for the main task.
1779 1793   */
1780 1794  static void
1781 1795  dumpsys_swrite(helper_t *hp, cbuf_t *cp, size_t used)
1782 1796  {
1783 1797          dumpsync_t *ds = hp->ds;
1784 1798  
1785 1799          if (hp->helper == MAINHELPER) {
1786 1800                  HRSTART(ds->perpage, write);
1787 1801                  dumpvp_write(cp->buf, used);
1788 1802                  HRSTOP(ds->perpage, write);
1789 1803                  CQ_PUT(freebufq, cp, CBUF_FREEBUF);
1790 1804          } else {
1791 1805                  cp->used = used;
1792 1806                  CQ_PUT(mainq, cp, CBUF_WRITE);
1793 1807          }
1794 1808  }
1795 1809  
1796 1810  /*
1797 1811   * Copy one page within the mapped range. The offset starts at 0 and
1798 1812   * is relative to the first pfn. cp->buf + cp->off is the address of
1799 1813   * the first pfn. If dump_pagecopy returns a UE offset, create an
1800 1814   * error message.  Returns the offset to the next pfn in the range
1801 1815   * selected by the bitmap.
1802 1816   */
1803 1817  static int
1804 1818  dumpsys_copy_page(helper_t *hp, int offset)
1805 1819  {
1806 1820          cbuf_t *cp = hp->cpin;
1807 1821          int ueoff;
1808 1822  
1809 1823          ASSERT(cp->off + offset + PAGESIZE <= cp->size);
1810 1824          ASSERT(BT_TEST(dumpcfg.bitmap, cp->bitnum));
1811 1825  
1812 1826          ueoff = dump_pagecopy(cp->buf + cp->off + offset, hp->page);
1813 1827  
1814 1828          /* ueoff is the offset in the page to a UE error */
1815 1829          if (ueoff != -1) {
1816 1830                  uint64_t pa = ptob(cp->pfn) + offset + ueoff;
1817 1831  
1818 1832                  dumpsys_errmsg(hp, "cpu %d: memory error at PA 0x%08x.%08x\n",
1819 1833                      CPU->cpu_id, (uint32_t)(pa >> 32), (uint32_t)pa);
1820 1834          }
1821 1835  
1822 1836          /*
1823 1837           * Advance bitnum and offset to the next input page for the
1824 1838           * next call to this function.
1825 1839           */
1826 1840          offset += PAGESIZE;
1827 1841          cp->bitnum++;
1828 1842          while (cp->off + offset < cp->size) {
1829 1843                  if (BT_TEST(dumpcfg.bitmap, cp->bitnum))
1830 1844                          break;
1831 1845                  offset += PAGESIZE;
1832 1846                  cp->bitnum++;
1833 1847          }
1834 1848  
1835 1849          return (offset);
1836 1850  }
1837 1851  
1838 1852  /*
1839 1853   * Read the helper queue, and copy one mapped page. Return 0 when
1840 1854   * done. Return 1 when a page has been copied into hp->page.
1841 1855   */
1842 1856  static int
1843 1857  dumpsys_sread(helper_t *hp)
1844 1858  {
1845 1859          dumpsync_t *ds = hp->ds;
1846 1860  
1847 1861          /* CONSTCOND */
1848 1862          while (1) {
1849 1863  
1850 1864                  /* Find the next input buffer. */
1851 1865                  if (hp->cpin == NULL) {
1852 1866                          HRSTART(hp->perpage, inwait);
1853 1867  
1854 1868                          /* CONSTCOND */
1855 1869                          while (1) {
1856 1870                                  hp->cpin = CQ_GET(helperq);
1857 1871                                  dump_timeleft = dump_timeout;
1858 1872  
1859 1873                                  /*
1860 1874                                   * NULL return means the helper queue
1861 1875                                   * is closed and empty.
1862 1876                                   */
1863 1877                                  if (hp->cpin == NULL)
1864 1878                                          break;
1865 1879  
1866 1880                                  /* Have input, check for dump I/O error. */
1867 1881                                  if (!dump_ioerr)
1868 1882                                          break;
1869 1883  
1870 1884                                  /*
1871 1885                                   * If an I/O error occurs, stay in the
1872 1886                                   * loop in order to empty the helper
1873 1887                                   * queue. Return the buffers to the
1874 1888                                   * main task to unmap and free it.
1875 1889                                   */
1876 1890                                  hp->cpin->used = 0;
1877 1891                                  CQ_PUT(mainq, hp->cpin, CBUF_USEDMAP);
1878 1892                          }
1879 1893                          HRSTOP(hp->perpage, inwait);
1880 1894  
1881 1895                          /* Stop here when the helper queue is closed. */
1882 1896                          if (hp->cpin == NULL)
1883 1897                                  break;
1884 1898  
1885 1899                          /* Set the offset=0 to get the first pfn. */
1886 1900                          hp->in = 0;
1887 1901  
1888 1902                          /* Set the total processed to 0 */
1889 1903                          hp->used = 0;
1890 1904                  }
1891 1905  
1892 1906                  /* Process the next page. */
1893 1907                  if (hp->used < hp->cpin->used) {
1894 1908  
1895 1909                          /*
1896 1910                           * Get the next page from the input buffer and
1897 1911                           * return a copy.
1898 1912                           */
1899 1913                          ASSERT(hp->in != -1);
1900 1914                          HRSTART(hp->perpage, copy);
1901 1915                          hp->in = dumpsys_copy_page(hp, hp->in);
1902 1916                          hp->used += PAGESIZE;
1903 1917                          HRSTOP(hp->perpage, copy);
1904 1918                          break;
1905 1919  
1906 1920                  } else {
1907 1921  
1908 1922                          /*
1909 1923                           * Done with the input. Flush the VM and
1910 1924                           * return the buffer to the main task.
1911 1925                           */
1912 1926                          if (panicstr && hp->helper != MAINHELPER)
1913 1927                                  hat_flush_range(kas.a_hat,
1914 1928                                      hp->cpin->buf, hp->cpin->size);
1915 1929                          dumpsys_errmsg(hp, NULL);
1916 1930                          CQ_PUT(mainq, hp->cpin, CBUF_USEDMAP);
1917 1931                          hp->cpin = NULL;
1918 1932                  }
1919 1933          }
1920 1934  
1921 1935          return (hp->cpin != NULL);
1922 1936  }
1923 1937  
1924 1938  /*
1925 1939   * Compress size bytes starting at buf with bzip2
1926 1940   * mode:
1927 1941   *      BZ_RUN          add one more compressed page
1928 1942   *      BZ_FINISH       no more input, flush the state
1929 1943   */
1930 1944  static void
1931 1945  dumpsys_bzrun(helper_t *hp, void *buf, size_t size, int mode)
1932 1946  {
1933 1947          dumpsync_t *ds = hp->ds;
1934 1948          const int CSIZE = sizeof (dumpcsize_t);
1935 1949          bz_stream *ps = &hp->bzstream;
1936 1950          int rc = 0;
1937 1951          uint32_t csize;
1938 1952          dumpcsize_t cs;
1939 1953  
1940 1954          /* Set input pointers to new input page */
1941 1955          if (size > 0) {
1942 1956                  ps->avail_in = size;
1943 1957                  ps->next_in = buf;
1944 1958          }
1945 1959  
1946 1960          /* CONSTCOND */
1947 1961          while (1) {
1948 1962  
1949 1963                  /* Quit when all input has been consumed */
1950 1964                  if (ps->avail_in == 0 && mode == BZ_RUN)
1951 1965                          break;
1952 1966  
1953 1967                  /* Get a new output buffer */
1954 1968                  if (hp->cpout == NULL) {
1955 1969                          HRSTART(hp->perpage, outwait);
1956 1970                          hp->cpout = CQ_GET(freebufq);
1957 1971                          HRSTOP(hp->perpage, outwait);
1958 1972                          ps->avail_out = hp->cpout->size - CSIZE;
1959 1973                          ps->next_out = hp->cpout->buf + CSIZE;
1960 1974                  }
1961 1975  
1962 1976                  /* Compress input, or finalize */
1963 1977                  HRSTART(hp->perpage, compress);
1964 1978                  rc = BZ2_bzCompress(ps, mode);
1965 1979                  HRSTOP(hp->perpage, compress);
1966 1980  
1967 1981                  /* Check for error */
1968 1982                  if (mode == BZ_RUN && rc != BZ_RUN_OK) {
1969 1983                          dumpsys_errmsg(hp, "%d: BZ_RUN error %s at page %lx\n",
1970 1984                              hp->helper, BZ2_bzErrorString(rc),
1971 1985                              hp->cpin->pagenum);
1972 1986                          break;
1973 1987                  }
1974 1988  
1975 1989                  /* Write the buffer if it is full, or we are flushing */
1976 1990                  if (ps->avail_out == 0 || mode == BZ_FINISH) {
1977 1991                          csize = hp->cpout->size - CSIZE - ps->avail_out;
1978 1992                          cs = DUMP_SET_TAG(csize, hp->tag);
1979 1993                          if (csize > 0) {
1980 1994                                  (void) memcpy(hp->cpout->buf, &cs, CSIZE);
1981 1995                                  dumpsys_swrite(hp, hp->cpout, csize + CSIZE);
1982 1996                                  hp->cpout = NULL;
1983 1997                          }
1984 1998                  }
1985 1999  
1986 2000                  /* Check for final complete */
1987 2001                  if (mode == BZ_FINISH) {
1988 2002                          if (rc == BZ_STREAM_END)
1989 2003                                  break;
1990 2004                          if (rc != BZ_FINISH_OK) {
1991 2005                                  dumpsys_errmsg(hp, "%d: BZ_FINISH error %s\n",
1992 2006                                      hp->helper, BZ2_bzErrorString(rc));
1993 2007                                  break;
1994 2008                          }
1995 2009                  }
1996 2010          }
1997 2011  
1998 2012          /* Cleanup state and buffers */
1999 2013          if (mode == BZ_FINISH) {
2000 2014  
2001 2015                  /* Reset state so that it is re-usable. */
2002 2016                  (void) BZ2_bzCompressReset(&hp->bzstream);
2003 2017  
2004 2018                  /* Give any unused outout buffer to the main task */
2005 2019                  if (hp->cpout != NULL) {
2006 2020                          hp->cpout->used = 0;
2007 2021                          CQ_PUT(mainq, hp->cpout, CBUF_ERRMSG);
2008 2022                          hp->cpout = NULL;
2009 2023                  }
2010 2024          }
2011 2025  }
2012 2026  
2013 2027  static void
2014 2028  dumpsys_bz2compress(helper_t *hp)
2015 2029  {
2016 2030          dumpsync_t *ds = hp->ds;
2017 2031          dumpstreamhdr_t sh;
2018 2032  
2019 2033          (void) strcpy(sh.stream_magic, DUMP_STREAM_MAGIC);
2020 2034          sh.stream_pagenum = (pgcnt_t)-1;
2021 2035          sh.stream_npages = 0;
2022 2036          hp->cpin = NULL;
2023 2037          hp->cpout = NULL;
2024 2038          hp->cperr = NULL;
2025 2039          hp->in = 0;
2026 2040          hp->out = 0;
2027 2041          hp->bzstream.avail_in = 0;
2028 2042  
2029 2043          /* Bump reference to mainq while we are running */
2030 2044          CQ_OPEN(mainq);
2031 2045  
2032 2046          /* Get one page at a time */
2033 2047          while (dumpsys_sread(hp)) {
2034 2048                  if (sh.stream_pagenum != hp->cpin->pagenum) {
2035 2049                          sh.stream_pagenum = hp->cpin->pagenum;
2036 2050                          sh.stream_npages = btop(hp->cpin->used);
2037 2051                          dumpsys_bzrun(hp, &sh, sizeof (sh), BZ_RUN);
2038 2052                  }
2039 2053                  dumpsys_bzrun(hp, hp->page, PAGESIZE, 0);
2040 2054          }
2041 2055  
2042 2056          /* Done with input, flush any partial buffer */
2043 2057          if (sh.stream_pagenum != (pgcnt_t)-1) {
2044 2058                  dumpsys_bzrun(hp, NULL, 0, BZ_FINISH);
2045 2059                  dumpsys_errmsg(hp, NULL);
2046 2060          }
2047 2061  
2048 2062          ASSERT(hp->cpin == NULL && hp->cpout == NULL && hp->cperr == NULL);
2049 2063  
2050 2064          /* Decrement main queue count, we are done */
2051 2065          CQ_CLOSE(mainq);
2052 2066  }
2053 2067  
2054 2068  /*
2055 2069   * Compress with lzjb
2056 2070   * write stream block if full or size==0
2057 2071   * if csize==0 write stream header, else write <csize, data>
2058 2072   * size==0 is a call to flush a buffer
2059 2073   * hp->cpout is the buffer we are flushing or filling
2060 2074   * hp->out is the next index to fill data
2061 2075   * osize is either csize+data, or the size of a stream header
2062 2076   */
2063 2077  static void
2064 2078  dumpsys_lzjbrun(helper_t *hp, size_t csize, void *buf, size_t size)
2065 2079  {
2066 2080          dumpsync_t *ds = hp->ds;
2067 2081          const int CSIZE = sizeof (dumpcsize_t);
2068 2082          dumpcsize_t cs;
2069 2083          size_t osize = csize > 0 ? CSIZE + size : size;
2070 2084  
2071 2085          /* If flush, and there is no buffer, just return */
2072 2086          if (size == 0 && hp->cpout == NULL)
2073 2087                  return;
2074 2088  
2075 2089          /* If flush, or cpout is full, write it out */
2076 2090          if (size == 0 ||
2077 2091              hp->cpout != NULL && hp->out + osize > hp->cpout->size) {
2078 2092  
2079 2093                  /* Set tag+size word at the front of the stream block. */
2080 2094                  cs = DUMP_SET_TAG(hp->out - CSIZE, hp->tag);
2081 2095                  (void) memcpy(hp->cpout->buf, &cs, CSIZE);
2082 2096  
2083 2097                  /* Write block to dump file. */
2084 2098                  dumpsys_swrite(hp, hp->cpout, hp->out);
2085 2099  
2086 2100                  /* Clear pointer to indicate we need a new buffer */
2087 2101                  hp->cpout = NULL;
2088 2102  
2089 2103                  /* flushing, we are done */
2090 2104                  if (size == 0)
2091 2105                          return;
2092 2106          }
2093 2107  
2094 2108          /* Get an output buffer if we dont have one. */
2095 2109          if (hp->cpout == NULL) {
2096 2110                  HRSTART(hp->perpage, outwait);
2097 2111                  hp->cpout = CQ_GET(freebufq);
2098 2112                  HRSTOP(hp->perpage, outwait);
2099 2113                  hp->out = CSIZE;
2100 2114          }
2101 2115  
2102 2116          /* Store csize word. This is the size of compressed data. */
2103 2117          if (csize > 0) {
2104 2118                  cs = DUMP_SET_TAG(csize, 0);
2105 2119                  (void) memcpy(hp->cpout->buf + hp->out, &cs, CSIZE);
2106 2120                  hp->out += CSIZE;
2107 2121          }
2108 2122  
2109 2123          /* Store the data. */
2110 2124          (void) memcpy(hp->cpout->buf + hp->out, buf, size);
2111 2125          hp->out += size;
2112 2126  }
2113 2127  
2114 2128  static void
2115 2129  dumpsys_lzjbcompress(helper_t *hp)
2116 2130  {
2117 2131          dumpsync_t *ds = hp->ds;
2118 2132          size_t csize;
2119 2133          dumpstreamhdr_t sh;
2120 2134  
2121 2135          (void) strcpy(sh.stream_magic, DUMP_STREAM_MAGIC);
2122 2136          sh.stream_pagenum = (pfn_t)-1;
2123 2137          sh.stream_npages = 0;
2124 2138          hp->cpin = NULL;
2125 2139          hp->cpout = NULL;
2126 2140          hp->cperr = NULL;
2127 2141          hp->in = 0;
2128 2142          hp->out = 0;
2129 2143  
2130 2144          /* Bump reference to mainq while we are running */
2131 2145          CQ_OPEN(mainq);
2132 2146  
2133 2147          /* Get one page at a time */
2134 2148          while (dumpsys_sread(hp)) {
2135 2149  
2136 2150                  /* Create a stream header for each new input map */
2137 2151                  if (sh.stream_pagenum != hp->cpin->pagenum) {
2138 2152                          sh.stream_pagenum = hp->cpin->pagenum;
2139 2153                          sh.stream_npages = btop(hp->cpin->used);
2140 2154                          dumpsys_lzjbrun(hp, 0, &sh, sizeof (sh));
2141 2155                  }
2142 2156  
2143 2157                  /* Compress one page */
2144 2158                  HRSTART(hp->perpage, compress);
2145 2159                  csize = compress(hp->page, hp->lzbuf, PAGESIZE);
2146 2160                  HRSTOP(hp->perpage, compress);
2147 2161  
2148 2162                  /* Add csize+data to output block */
2149 2163                  ASSERT(csize > 0 && csize <= PAGESIZE);
2150 2164                  dumpsys_lzjbrun(hp, csize, hp->lzbuf, csize);
2151 2165          }
2152 2166  
2153 2167          /* Done with input, flush any partial buffer */
2154 2168          if (sh.stream_pagenum != (pfn_t)-1) {
2155 2169                  dumpsys_lzjbrun(hp, 0, NULL, 0);
2156 2170                  dumpsys_errmsg(hp, NULL);
2157 2171          }
2158 2172  
2159 2173          ASSERT(hp->cpin == NULL && hp->cpout == NULL && hp->cperr == NULL);
2160 2174  
2161 2175          /* Decrement main queue count, we are done */
2162 2176          CQ_CLOSE(mainq);
2163 2177  }
2164 2178  
2165 2179  /*
2166 2180   * Dump helper called from panic_idle() to compress pages.  CPUs in
2167 2181   * this path must not call most kernel services.
2168 2182   *
2169 2183   * During panic, all but one of the CPUs is idle. These CPUs are used
2170 2184   * as helpers working in parallel to copy and compress memory
2171 2185   * pages. During a panic, however, these processors cannot call any
2172 2186   * kernel services. This is because mutexes become no-ops during
2173 2187   * panic, and, cross-call interrupts are inhibited.  Therefore, during
2174 2188   * panic dump the helper CPUs communicate with the panic CPU using
2175 2189   * memory variables. All memory mapping and I/O is performed by the
2176 2190   * panic CPU.
2177 2191   *
2178 2192   * At dump configuration time, helper_lock is set and helpers_wanted
2179 2193   * is 0. dumpsys() decides whether to set helpers_wanted before
2180 2194   * clearing helper_lock.
2181 2195   *
2182 2196   * At panic time, idle CPUs spin-wait on helper_lock, then alternately
2183 2197   * take the lock and become a helper, or return.
2184 2198   */
2185 2199  void
2186 2200  dumpsys_helper()
2187 2201  {
2188 2202          dumpsys_spinlock(&dumpcfg.helper_lock);
2189 2203          if (dumpcfg.helpers_wanted) {
2190 2204                  helper_t *hp, *hpend = &dumpcfg.helper[dumpcfg.nhelper];
2191 2205  
2192 2206                  for (hp = dumpcfg.helper; hp != hpend; hp++) {
2193 2207                          if (hp->helper == FREEHELPER) {
2194 2208                                  hp->helper = CPU->cpu_id;
2195 2209                                  BT_SET(dumpcfg.helpermap, CPU->cpu_seqid);
2196 2210  
2197 2211                                  dumpsys_spinunlock(&dumpcfg.helper_lock);
2198 2212  
2199 2213                                  if (dumpcfg.clevel < DUMP_CLEVEL_BZIP2)
2200 2214                                          dumpsys_lzjbcompress(hp);
2201 2215                                  else
2202 2216                                          dumpsys_bz2compress(hp);
2203 2217  
2204 2218                                  hp->helper = DONEHELPER;
2205 2219                                  return;
2206 2220                          }
2207 2221                  }
2208 2222  
2209 2223                  /* No more helpers are needed. */
2210 2224                  dumpcfg.helpers_wanted = 0;
2211 2225  
2212 2226          }
2213 2227          dumpsys_spinunlock(&dumpcfg.helper_lock);
2214 2228  }
2215 2229  
2216 2230  /*
2217 2231   * No-wait helper callable in spin loops.
2218 2232   *
2219 2233   * Do not wait for helper_lock. Just check helpers_wanted. The caller
2220 2234   * may decide to continue. This is the "c)ontinue, s)ync, r)eset? s"
2221 2235   * case.
2222 2236   */
2223 2237  void
2224 2238  dumpsys_helper_nw()
2225 2239  {
2226 2240          if (dumpcfg.helpers_wanted)
2227 2241                  dumpsys_helper();
2228 2242  }
2229 2243  
2230 2244  /*
2231 2245   * Dump helper for live dumps.
2232 2246   * These run as a system task.
2233 2247   */
2234 2248  static void
2235 2249  dumpsys_live_helper(void *arg)
2236 2250  {
2237 2251          helper_t *hp = arg;
2238 2252  
2239 2253          BT_ATOMIC_SET(dumpcfg.helpermap, CPU->cpu_seqid);
2240 2254          if (dumpcfg.clevel < DUMP_CLEVEL_BZIP2)
2241 2255                  dumpsys_lzjbcompress(hp);
2242 2256          else
2243 2257                  dumpsys_bz2compress(hp);
2244 2258  }
2245 2259  
2246 2260  /*
2247 2261   * Compress one page with lzjb (single threaded case)
2248 2262   */
2249 2263  static void
2250 2264  dumpsys_lzjb_page(helper_t *hp, cbuf_t *cp)
2251 2265  {
2252 2266          dumpsync_t *ds = hp->ds;
2253 2267          uint32_t csize;
2254 2268  
2255 2269          hp->helper = MAINHELPER;
2256 2270          hp->in = 0;
2257 2271          hp->used = 0;
2258 2272          hp->cpin = cp;
2259 2273          while (hp->used < cp->used) {
2260 2274                  HRSTART(hp->perpage, copy);
2261 2275                  hp->in = dumpsys_copy_page(hp, hp->in);
2262 2276                  hp->used += PAGESIZE;
2263 2277                  HRSTOP(hp->perpage, copy);
2264 2278  
2265 2279                  HRSTART(hp->perpage, compress);
2266 2280                  csize = compress(hp->page, hp->lzbuf, PAGESIZE);
2267 2281                  HRSTOP(hp->perpage, compress);
2268 2282  
2269 2283                  HRSTART(hp->perpage, write);
2270 2284                  dumpvp_write(&csize, sizeof (csize));
2271 2285                  dumpvp_write(hp->lzbuf, csize);
2272 2286                  HRSTOP(hp->perpage, write);
2273 2287          }
2274 2288          CQ_PUT(mainq, hp->cpin, CBUF_USEDMAP);
2275 2289          hp->cpin = NULL;
2276 2290  }
2277 2291  
2278 2292  /*
2279 2293   * Main task to dump pages. This is called on the dump CPU.
2280 2294   */
2281 2295  static void
2282 2296  dumpsys_main_task(void *arg)
2283 2297  {
2284 2298          dumpsync_t *ds = arg;
2285 2299          pgcnt_t pagenum = 0, bitnum = 0, hibitnum;
2286 2300          dumpmlw_t mlw;
2287 2301          cbuf_t *cp;
2288 2302          pgcnt_t baseoff, pfnoff;
2289 2303          pfn_t base, pfn;
2290 2304          int i, dumpserial;
2291 2305  
2292 2306          /*
2293 2307           * Fall back to serial mode if there are no helpers.
2294 2308           * dump_plat_mincpu can be set to 0 at any time.
2295 2309           * dumpcfg.helpermap must contain at least one member.
2296 2310           */
2297 2311          dumpserial = 1;
2298 2312  
2299 2313          if (dump_plat_mincpu != 0 && dumpcfg.clevel != 0) {
2300 2314                  for (i = 0; i < BT_BITOUL(NCPU); ++i) {
2301 2315                          if (dumpcfg.helpermap[i] != 0) {
2302 2316                                  dumpserial = 0;
2303 2317                                  break;
2304 2318                          }
2305 2319                  }
2306 2320          }
2307 2321  
2308 2322          if (dumpserial) {
2309 2323                  dumpcfg.clevel = 0;
2310 2324                  if (dumpcfg.helper[0].lzbuf == NULL)
2311 2325                          dumpcfg.helper[0].lzbuf = dumpcfg.helper[1].page;
2312 2326          }
2313 2327  
2314 2328          dump_init_memlist_walker(&mlw);
2315 2329  
2316 2330          for (;;) {
2317 2331                  int sec = (gethrtime() - ds->start) / NANOSEC;
2318 2332  
2319 2333                  /*
2320 2334                   * Render a simple progress display on the system console to
2321 2335                   * make clear to the operator that the system has not hung.
2322 2336                   * Emit an update when dump progress has advanced by one
2323 2337                   * percent, or when no update has been drawn in the last
2324 2338                   * second.
2325 2339                   */
2326 2340                  if (ds->percent > ds->percent_done || sec > ds->sec_done) {
2327 2341                          ds->sec_done = sec;
2328 2342                          ds->percent_done = ds->percent;
2329 2343                          uprintf("^\rdumping: %2d:%02d %3d%% done",
2330 2344                              sec / 60, sec % 60, ds->percent);
2331 2345                          ds->neednl = 1;
2332 2346                  }
2333 2347  
2334 2348                  while (CQ_IS_EMPTY(mainq) && !CQ_IS_EMPTY(writerq)) {
2335 2349  
2336 2350                          /* the writerq never blocks */
2337 2351                          cp = CQ_GET(writerq);
2338 2352                          if (cp == NULL)
2339 2353                                  break;
2340 2354  
2341 2355                          dump_timeleft = dump_timeout;
2342 2356  
2343 2357                          HRSTART(ds->perpage, write);
2344 2358                          dumpvp_write(cp->buf, cp->used);
2345 2359                          HRSTOP(ds->perpage, write);
2346 2360  
2347 2361                          CQ_PUT(freebufq, cp, CBUF_FREEBUF);
2348 2362                  }
2349 2363  
2350 2364                  /*
2351 2365                   * Wait here for some buffers to process. Returns NULL
2352 2366                   * when all helpers have terminated and all buffers
2353 2367                   * have been processed.
2354 2368                   */
2355 2369                  cp = CQ_GET(mainq);
2356 2370  
2357 2371                  if (cp == NULL) {
2358 2372  
2359 2373                          /* Drain the write queue. */
2360 2374                          if (!CQ_IS_EMPTY(writerq))
2361 2375                                  continue;
2362 2376  
2363 2377                          /* Main task exits here. */
2364 2378                          break;
2365 2379                  }
2366 2380  
2367 2381                  dump_timeleft = dump_timeout;
2368 2382  
2369 2383                  switch (cp->state) {
2370 2384  
2371 2385                  case CBUF_FREEMAP:
2372 2386  
2373 2387                          /*
2374 2388                           * Note that we drop CBUF_FREEMAP buffers on
2375 2389                           * the floor (they will not be on any cqueue)
2376 2390                           * when we no longer need them.
2377 2391                           */
2378 2392                          if (bitnum >= dumpcfg.bitmapsize)
2379 2393                                  break;
2380 2394  
2381 2395                          if (dump_ioerr) {
2382 2396                                  bitnum = dumpcfg.bitmapsize;
2383 2397                                  CQ_CLOSE(helperq);
2384 2398                                  break;
2385 2399                          }
2386 2400  
2387 2401                          HRSTART(ds->perpage, bitmap);
2388 2402                          for (; bitnum < dumpcfg.bitmapsize; bitnum++)
2389 2403                                  if (BT_TEST(dumpcfg.bitmap, bitnum))
2390 2404                                          break;
2391 2405                          HRSTOP(ds->perpage, bitmap);
2392 2406                          dump_timeleft = dump_timeout;
2393 2407  
2394 2408                          if (bitnum >= dumpcfg.bitmapsize) {
2395 2409                                  CQ_CLOSE(helperq);
2396 2410                                  break;
2397 2411                          }
2398 2412  
2399 2413                          /*
2400 2414                           * Try to map CBUF_MAPSIZE ranges. Can't
2401 2415                           * assume that memory segment size is a
2402 2416                           * multiple of CBUF_MAPSIZE. Can't assume that
2403 2417                           * the segment starts on a CBUF_MAPSIZE
2404 2418                           * boundary.
2405 2419                           */
2406 2420                          pfn = dump_bitnum_to_pfn(bitnum, &mlw);
2407 2421                          ASSERT(pfn != PFN_INVALID);
2408 2422                          ASSERT(bitnum + mlw.mpleft <= dumpcfg.bitmapsize);
2409 2423  
2410 2424                          base = P2ALIGN(pfn, CBUF_MAPNP);
2411 2425                          if (base < mlw.mpaddr) {
2412 2426                                  base = mlw.mpaddr;
2413 2427                                  baseoff = P2PHASE(base, CBUF_MAPNP);
2414 2428                          } else {
2415 2429                                  baseoff = 0;
2416 2430                          }
2417 2431  
2418 2432                          pfnoff = pfn - base;
2419 2433                          if (pfnoff + mlw.mpleft < CBUF_MAPNP) {
2420 2434                                  hibitnum = bitnum + mlw.mpleft;
2421 2435                                  cp->size = ptob(pfnoff + mlw.mpleft);
2422 2436                          } else {
2423 2437                                  hibitnum = bitnum - pfnoff + CBUF_MAPNP -
2424 2438                                      baseoff;
2425 2439                                  cp->size = CBUF_MAPSIZE - ptob(baseoff);
2426 2440                          }
2427 2441  
2428 2442                          cp->pfn = pfn;
2429 2443                          cp->bitnum = bitnum++;
2430 2444                          cp->pagenum = pagenum++;
2431 2445                          cp->off = ptob(pfnoff);
2432 2446  
2433 2447                          for (; bitnum < hibitnum; bitnum++)
2434 2448                                  if (BT_TEST(dumpcfg.bitmap, bitnum))
2435 2449                                          pagenum++;
2436 2450  
2437 2451                          dump_timeleft = dump_timeout;
2438 2452                          cp->used = ptob(pagenum - cp->pagenum);
2439 2453  
2440 2454                          HRSTART(ds->perpage, map);
2441 2455                          hat_devload(kas.a_hat, cp->buf, cp->size, base,
2442 2456                              PROT_READ, HAT_LOAD_NOCONSIST);
2443 2457                          HRSTOP(ds->perpage, map);
2444 2458  
2445 2459                          ds->pages_mapped += btop(cp->size);
2446 2460                          ds->pages_used += pagenum - cp->pagenum;
2447 2461  
2448 2462                          CQ_OPEN(mainq);
2449 2463  
2450 2464                          /*
2451 2465                           * If there are no helpers the main task does
2452 2466                           * non-streams lzjb compress.
2453 2467                           */
2454 2468                          if (dumpserial) {
2455 2469                                  dumpsys_lzjb_page(dumpcfg.helper, cp);
2456 2470                                  break;
2457 2471                          }
2458 2472  
2459 2473                          /* pass mapped pages to a helper */
2460 2474                          CQ_PUT(helperq, cp, CBUF_INREADY);
2461 2475  
2462 2476                          /* the last page was done */
2463 2477                          if (bitnum >= dumpcfg.bitmapsize)
2464 2478                                  CQ_CLOSE(helperq);
2465 2479  
2466 2480                          break;
2467 2481  
2468 2482                  case CBUF_USEDMAP:
2469 2483  
2470 2484                          ds->npages += btop(cp->used);
2471 2485  
2472 2486                          HRSTART(ds->perpage, unmap);
2473 2487                          hat_unload(kas.a_hat, cp->buf, cp->size, HAT_UNLOAD);
2474 2488                          HRSTOP(ds->perpage, unmap);
2475 2489  
2476 2490                          if (bitnum < dumpcfg.bitmapsize)
2477 2491                                  CQ_PUT(mainq, cp, CBUF_FREEMAP);
2478 2492                          CQ_CLOSE(mainq);
2479 2493  
2480 2494                          ASSERT(ds->npages <= dumphdr->dump_npages);
2481 2495                          ds->percent = ds->npages * 100LL / dumphdr->dump_npages;
2482 2496                          break;
2483 2497  
2484 2498                  case CBUF_WRITE:
2485 2499  
2486 2500                          CQ_PUT(writerq, cp, CBUF_WRITE);
2487 2501                          break;
2488 2502  
2489 2503                  case CBUF_ERRMSG:
2490 2504  
2491 2505                          if (cp->used > 0) {
2492 2506                                  cp->buf[cp->size - 2] = '\n';
2493 2507                                  cp->buf[cp->size - 1] = '\0';
2494 2508                                  if (ds->neednl) {
2495 2509                                          uprintf("\n%s", cp->buf);
2496 2510                                          ds->neednl = 0;
2497 2511                                  } else {
2498 2512                                          uprintf("%s", cp->buf);
2499 2513                                  }
2500 2514                                  /* wait for console output */
2501 2515                                  drv_usecwait(200000);
2502 2516                                  dump_timeleft = dump_timeout;
2503 2517                          }
2504 2518                          CQ_PUT(freebufq, cp, CBUF_FREEBUF);
2505 2519                          break;
2506 2520  
2507 2521                  default:
2508 2522                          uprintf("dump: unexpected buffer state %d, "
2509 2523                              "buffer will be lost\n", cp->state);
2510 2524                          break;
2511 2525  
2512 2526                  } /* end switch */
2513 2527          }
2514 2528  }
2515 2529  
2516 2530  #ifdef  COLLECT_METRICS
2517 2531  size_t
2518 2532  dumpsys_metrics(dumpsync_t *ds, char *buf, size_t size)
2519 2533  {
2520 2534          dumpcfg_t *cfg = &dumpcfg;
2521 2535          int myid = CPU->cpu_seqid;
2522 2536          int i, compress_ratio;
2523 2537          int sec, iorate;
2524 2538          helper_t *hp, *hpend = &cfg->helper[cfg->nhelper];
2525 2539          char *e = buf + size;
2526 2540          char *p = buf;
2527 2541  
2528 2542          sec = ds->elapsed / (1000 * 1000 * 1000ULL);
2529 2543          if (sec < 1)
2530 2544                  sec = 1;
2531 2545  
2532 2546          if (ds->iotime < 1)
2533 2547                  ds->iotime = 1;
2534 2548          iorate = (ds->nwrite * 100000ULL) / ds->iotime;
2535 2549  
2536 2550          compress_ratio = 100LL * ds->npages / btopr(ds->nwrite + 1);
2537 2551  
2538 2552  #define P(...) (p += p < e ? snprintf(p, e - p, __VA_ARGS__) : 0)
2539 2553  
2540 2554          P("Master cpu_seqid,%d\n", CPU->cpu_seqid);
2541 2555          P("Master cpu_id,%d\n", CPU->cpu_id);
2542 2556          P("dump_flags,0x%x\n", dumphdr->dump_flags);
2543 2557          P("dump_ioerr,%d\n", dump_ioerr);
2544 2558  
2545 2559          P("Helpers:\n");
2546 2560          for (i = 0; i < ncpus; i++) {
2547 2561                  if ((i & 15) == 0)
2548 2562                          P(",,%03d,", i);
2549 2563                  if (i == myid)
2550 2564                          P("   M");
2551 2565                  else if (BT_TEST(cfg->helpermap, i))
2552 2566                          P("%4d", cpu_seq[i]->cpu_id);
2553 2567                  else
2554 2568                          P("   *");
2555 2569                  if ((i & 15) == 15)
2556 2570                          P("\n");
2557 2571          }
2558 2572  
2559 2573          P("ncbuf_used,%d\n", cfg->ncbuf_used);
2560 2574          P("ncmap,%d\n", cfg->ncmap);
2561 2575  
2562 2576          P("Found %ldM ranges,%ld\n", (CBUF_MAPSIZE / DUMP_1MB), cfg->found4m);
2563 2577          P("Found small pages,%ld\n", cfg->foundsm);
2564 2578  
2565 2579          P("Compression level,%d\n", cfg->clevel);
2566 2580          P("Compression type,%s %s\n", cfg->clevel == 0 ? "serial" : "parallel",
2567 2581              cfg->clevel >= DUMP_CLEVEL_BZIP2 ? "bzip2" : "lzjb");
2568 2582          P("Compression ratio,%d.%02d\n", compress_ratio / 100, compress_ratio %
2569 2583              100);
2570 2584          P("nhelper_used,%d\n", cfg->nhelper_used);
2571 2585  
2572 2586          P("Dump I/O rate MBS,%d.%02d\n", iorate / 100, iorate % 100);
2573 2587          P("..total bytes,%lld\n", (u_longlong_t)ds->nwrite);
2574 2588          P("..total nsec,%lld\n", (u_longlong_t)ds->iotime);
2575 2589          P("dumpbuf.iosize,%ld\n", dumpbuf.iosize);
2576 2590          P("dumpbuf.size,%ld\n", dumpbuf.size);
2577 2591  
2578 2592          P("Dump pages/sec,%llu\n", (u_longlong_t)ds->npages / sec);
2579 2593          P("Dump pages,%llu\n", (u_longlong_t)ds->npages);
2580 2594          P("Dump time,%d\n", sec);
2581 2595  
2582 2596          if (ds->pages_mapped > 0)
2583 2597                  P("per-cent map utilization,%d\n", (int)((100 * ds->pages_used)
2584 2598                      / ds->pages_mapped));
2585 2599  
2586 2600          P("\nPer-page metrics:\n");
2587 2601          if (ds->npages > 0) {
2588 2602                  for (hp = cfg->helper; hp != hpend; hp++) {
2589 2603  #define PERPAGE(x)      ds->perpage.x += hp->perpage.x;
2590 2604                          PERPAGES;
2591 2605  #undef PERPAGE
2592 2606                  }
2593 2607  #define PERPAGE(x) \
2594 2608                  P("%s nsec/page,%d\n", #x, (int)(ds->perpage.x / ds->npages));
2595 2609                  PERPAGES;
2596 2610  #undef PERPAGE
2597 2611                  P("freebufq.empty,%d\n", (int)(ds->freebufq.empty /
2598 2612                      ds->npages));
2599 2613                  P("helperq.empty,%d\n", (int)(ds->helperq.empty /
2600 2614                      ds->npages));
2601 2615                  P("writerq.empty,%d\n", (int)(ds->writerq.empty /
2602 2616                      ds->npages));
2603 2617                  P("mainq.empty,%d\n", (int)(ds->mainq.empty / ds->npages));
2604 2618  
2605 2619                  P("I/O wait nsec/page,%llu\n", (u_longlong_t)(ds->iowait /
2606 2620                      ds->npages));
2607 2621          }
2608 2622  #undef P
2609 2623          if (p < e)
2610 2624                  bzero(p, e - p);
2611 2625          return (p - buf);
2612 2626  }
2613 2627  #endif  /* COLLECT_METRICS */
2614 2628  
2615 2629  /*
2616 2630   * Dump the system.
2617 2631   */
2618 2632  void
2619 2633  dumpsys(void)
2620 2634  {
2621 2635          dumpsync_t *ds = &dumpsync;
2622 2636          taskq_t *livetaskq = NULL;
2623 2637          pfn_t pfn;
2624 2638          pgcnt_t bitnum;
2625 2639          proc_t *p;
2626 2640          helper_t *hp, *hpend = &dumpcfg.helper[dumpcfg.nhelper];
2627 2641          cbuf_t *cp;
2628 2642          pid_t npids, pidx;
2629 2643          char *content;
2630 2644          char *buf;
2631 2645          size_t size;
2632 2646          int save_dump_clevel;
2633 2647          dumpmlw_t mlw;
2634 2648          dumpcsize_t datatag;
2635 2649          dumpdatahdr_t datahdr;
2636 2650  
2637 2651          if (dumpvp == NULL || dumphdr == NULL) {
2638 2652                  uprintf("skipping system dump - no dump device configured\n");
2639 2653                  if (panicstr) {
2640 2654                          dumpcfg.helpers_wanted = 0;
2641 2655                          dumpsys_spinunlock(&dumpcfg.helper_lock);
2642 2656                  }
2643 2657                  return;
2644 2658          }
2645 2659          dumpbuf.cur = dumpbuf.start;
2646 2660  
2647 2661          /* clear the sync variables */
2648 2662          ASSERT(dumpcfg.nhelper > 0);
2649 2663          bzero(ds, sizeof (*ds));
2650 2664          ds->dumpcpu = CPU->cpu_id;
2651 2665  
2652 2666          /*
2653 2667           * Calculate the starting block for dump.  If we're dumping on a
2654 2668           * swap device, start 1/5 of the way in; otherwise, start at the
2655 2669           * beginning.  And never use the first page -- it may be a disk label.
2656 2670           */
2657 2671          if (dumpvp->v_flag & VISSWAP)
2658 2672                  dumphdr->dump_start = P2ROUNDUP(dumpvp_size / 5, DUMP_OFFSET);
2659 2673          else
2660 2674                  dumphdr->dump_start = DUMP_OFFSET;
2661 2675  
2662 2676          dumphdr->dump_flags = DF_VALID | DF_COMPLETE | DF_LIVE | DF_COMPRESSED;
2663 2677          dumphdr->dump_crashtime = gethrestime_sec();
2664 2678          dumphdr->dump_npages = 0;
2665 2679          dumphdr->dump_nvtop = 0;
2666 2680          bzero(dumpcfg.bitmap, BT_SIZEOFMAP(dumpcfg.bitmapsize));
2667 2681          dump_timeleft = dump_timeout;
2668 2682  
2669 2683          if (panicstr) {
2670 2684                  dumphdr->dump_flags &= ~DF_LIVE;
2671 2685                  (void) VOP_DUMPCTL(dumpvp, DUMP_FREE, NULL, NULL);
2672 2686                  (void) VOP_DUMPCTL(dumpvp, DUMP_ALLOC, NULL, NULL);
2673 2687                  (void) vsnprintf(dumphdr->dump_panicstring, DUMP_PANICSIZE,
2674 2688                      panicstr, panicargs);
2675 2689  
2676 2690          }
2677 2691  
2678 2692          if (dump_conflags & DUMP_ALL)
2679 2693                  content = "all";
2680 2694          else if (dump_conflags & DUMP_CURPROC)
2681 2695                  content = "kernel + curproc";
2682 2696          else
2683 2697                  content = "kernel";
2684 2698          uprintf("dumping to %s, offset %lld, content: %s\n", dumppath,
2685 2699              dumphdr->dump_start, content);
2686 2700  
2687 2701          /* Make sure nodename is current */
2688 2702          bcopy(utsname.nodename, dumphdr->dump_utsname.nodename, SYS_NMLN);
2689 2703  
2690 2704          /*
2691 2705           * If this is a live dump, try to open a VCHR vnode for better
2692 2706           * performance. We must take care to flush the buffer cache
2693 2707           * first.
2694 2708           */
2695 2709          if (!panicstr) {
2696 2710                  vnode_t *cdev_vp, *cmn_cdev_vp;
2697 2711  
2698 2712                  ASSERT(dumpbuf.cdev_vp == NULL);
2699 2713                  cdev_vp = makespecvp(VTOS(dumpvp)->s_dev, VCHR);
2700 2714                  if (cdev_vp != NULL) {
2701 2715                          cmn_cdev_vp = common_specvp(cdev_vp);
2702 2716                          if (VOP_OPEN(&cmn_cdev_vp, FREAD | FWRITE, kcred, NULL)
2703 2717                              == 0) {
2704 2718                                  if (vn_has_cached_data(dumpvp))
2705 2719                                          (void) pvn_vplist_dirty(dumpvp, 0, NULL,
2706 2720                                              B_INVAL | B_TRUNC, kcred);
2707 2721                                  dumpbuf.cdev_vp = cmn_cdev_vp;
2708 2722                          } else {
2709 2723                                  VN_RELE(cdev_vp);
2710 2724                          }
2711 2725                  }
2712 2726          }
2713 2727  
2714 2728          /*
2715 2729           * Store a hires timestamp so we can look it up during debugging.
2716 2730           */
2717 2731          lbolt_debug_entry();
2718 2732  
2719 2733          /*
2720 2734           * Leave room for the message and ereport save areas and terminal dump
2721 2735           * header.
2722 2736           */
2723 2737          dumpbuf.vp_limit = dumpvp_size - DUMP_LOGSIZE - DUMP_OFFSET -
2724 2738              DUMP_ERPTSIZE;
2725 2739  
2726 2740          /*
2727 2741           * Write out the symbol table.  It's no longer compressed,
2728 2742           * so its 'size' and 'csize' are equal.
2729 2743           */
2730 2744          dumpbuf.vp_off = dumphdr->dump_ksyms = dumphdr->dump_start + PAGESIZE;
2731 2745          dumphdr->dump_ksyms_size = dumphdr->dump_ksyms_csize =
2732 2746              ksyms_snapshot(dumpvp_ksyms_write, NULL, LONG_MAX);
2733 2747  
2734 2748          /*
2735 2749           * Write out the translation map.
2736 2750           */
2737 2751          dumphdr->dump_map = dumpvp_flush();
2738 2752          dump_as(&kas);
2739 2753          dumphdr->dump_nvtop += dump_plat_addr();
2740 2754  
2741 2755          /*
2742 2756           * call into hat, which may have unmapped pages that also need to
2743 2757           * be in the dump
2744 2758           */
2745 2759          hat_dump();
2746 2760  
2747 2761          if (dump_conflags & DUMP_ALL) {
2748 2762                  mutex_enter(&pidlock);
2749 2763  
2750 2764                  for (npids = 0, p = practive; p != NULL; p = p->p_next)
2751 2765                          dumpcfg.pids[npids++] = p->p_pid;
2752 2766  
2753 2767                  mutex_exit(&pidlock);
2754 2768  
2755 2769                  for (pidx = 0; pidx < npids; pidx++)
2756 2770                          (void) dump_process(dumpcfg.pids[pidx]);
2757 2771  
2758 2772                  dump_init_memlist_walker(&mlw);
2759 2773                  for (bitnum = 0; bitnum < dumpcfg.bitmapsize; bitnum++) {
2760 2774                          dump_timeleft = dump_timeout;
2761 2775                          pfn = dump_bitnum_to_pfn(bitnum, &mlw);
2762 2776                          /*
2763 2777                           * Some hypervisors do not have all pages available to
2764 2778                           * be accessed by the guest OS.  Check for page
2765 2779                           * accessibility.
2766 2780                           */
2767 2781                          if (plat_hold_page(pfn, PLAT_HOLD_NO_LOCK, NULL) !=
2768 2782                              PLAT_HOLD_OK)
2769 2783                                  continue;
2770 2784                          BT_SET(dumpcfg.bitmap, bitnum);
2771 2785                  }
2772 2786                  dumphdr->dump_npages = dumpcfg.bitmapsize;
2773 2787                  dumphdr->dump_flags |= DF_ALL;
2774 2788  
2775 2789          } else if (dump_conflags & DUMP_CURPROC) {
2776 2790                  /*
2777 2791                   * Determine which pid is to be dumped.  If we're panicking, we
2778 2792                   * dump the process associated with panic_thread (if any).  If
2779 2793                   * this is a live dump, we dump the process associated with
2780 2794                   * curthread.
2781 2795                   */
2782 2796                  npids = 0;
2783 2797                  if (panicstr) {
2784 2798                          if (panic_thread != NULL &&
2785 2799                              panic_thread->t_procp != NULL &&
2786 2800                              panic_thread->t_procp != &p0) {
2787 2801                                  dumpcfg.pids[npids++] =
2788 2802                                      panic_thread->t_procp->p_pid;
2789 2803                          }
2790 2804                  } else {
2791 2805                          dumpcfg.pids[npids++] = curthread->t_procp->p_pid;
2792 2806                  }
2793 2807  
2794 2808                  if (npids && dump_process(dumpcfg.pids[0]) == 0)
2795 2809                          dumphdr->dump_flags |= DF_CURPROC;
2796 2810                  else
2797 2811                          dumphdr->dump_flags |= DF_KERNEL;
2798 2812  
2799 2813          } else {
2800 2814                  dumphdr->dump_flags |= DF_KERNEL;
2801 2815          }
2802 2816  
2803 2817          dumphdr->dump_hashmask = (1 << highbit(dumphdr->dump_nvtop - 1)) - 1;
2804 2818  
2805 2819          /*
2806 2820           * Write out the pfn table.
2807 2821           */
2808 2822          dumphdr->dump_pfn = dumpvp_flush();
2809 2823          dump_init_memlist_walker(&mlw);
2810 2824          for (bitnum = 0; bitnum < dumpcfg.bitmapsize; bitnum++) {
2811 2825                  dump_timeleft = dump_timeout;
2812 2826                  if (!BT_TEST(dumpcfg.bitmap, bitnum))
2813 2827                          continue;
2814 2828                  pfn = dump_bitnum_to_pfn(bitnum, &mlw);
2815 2829                  ASSERT(pfn != PFN_INVALID);
2816 2830                  dumpvp_write(&pfn, sizeof (pfn_t));
2817 2831          }
2818 2832          dump_plat_pfn();
2819 2833  
2820 2834          /*
2821 2835           * Write out all the pages.
2822 2836           * Map pages, copy them handling UEs, compress, and write them out.
2823 2837           * Cooperate with any helpers running on CPUs in panic_idle().
2824 2838           */
2825 2839          dumphdr->dump_data = dumpvp_flush();
2826 2840  
2827 2841          bzero(dumpcfg.helpermap, BT_SIZEOFMAP(NCPU));
2828 2842          ds->live = dumpcfg.clevel > 0 &&
2829 2843              (dumphdr->dump_flags & DF_LIVE) != 0;
2830 2844  
2831 2845          save_dump_clevel = dumpcfg.clevel;
2832 2846          if (panicstr)
2833 2847                  dumpsys_get_maxmem();
2834 2848          else if (dumpcfg.clevel >= DUMP_CLEVEL_BZIP2)
2835 2849                  dumpcfg.clevel = DUMP_CLEVEL_LZJB;
2836 2850  
2837 2851          dumpcfg.nhelper_used = 0;
2838 2852          for (hp = dumpcfg.helper; hp != hpend; hp++) {
2839 2853                  if (hp->page == NULL) {
2840 2854                          hp->helper = DONEHELPER;
2841 2855                          continue;
2842 2856                  }
2843 2857                  ++dumpcfg.nhelper_used;
2844 2858                  hp->helper = FREEHELPER;
2845 2859                  hp->taskqid = NULL;
2846 2860                  hp->ds = ds;
2847 2861                  bzero(&hp->perpage, sizeof (hp->perpage));
2848 2862                  if (dumpcfg.clevel >= DUMP_CLEVEL_BZIP2)
2849 2863                          (void) BZ2_bzCompressReset(&hp->bzstream);
2850 2864          }
2851 2865  
2852 2866          CQ_OPEN(freebufq);
2853 2867          CQ_OPEN(helperq);
2854 2868  
2855 2869          dumpcfg.ncbuf_used = 0;
2856 2870          for (cp = dumpcfg.cbuf; cp != &dumpcfg.cbuf[dumpcfg.ncbuf]; cp++) {
2857 2871                  if (cp->buf != NULL) {
2858 2872                          CQ_PUT(freebufq, cp, CBUF_FREEBUF);
2859 2873                          ++dumpcfg.ncbuf_used;
2860 2874                  }
2861 2875          }
2862 2876  
2863 2877          for (cp = dumpcfg.cmap; cp != &dumpcfg.cmap[dumpcfg.ncmap]; cp++)
2864 2878                  CQ_PUT(mainq, cp, CBUF_FREEMAP);
2865 2879  
2866 2880          ds->start = gethrtime();
2867 2881          ds->iowaitts = ds->start;
2868 2882  
2869 2883          /* start helpers */
2870 2884          if (ds->live) {
2871 2885                  int n = dumpcfg.nhelper_used;
2872 2886                  int pri = MINCLSYSPRI - 25;
2873 2887  
2874 2888                  livetaskq = taskq_create("LiveDump", n, pri, n, n,
2875 2889                      TASKQ_PREPOPULATE);
2876 2890                  for (hp = dumpcfg.helper; hp != hpend; hp++) {
2877 2891                          if (hp->page == NULL)
2878 2892                                  continue;
2879 2893                          hp->helper = hp - dumpcfg.helper;
2880 2894                          hp->taskqid = taskq_dispatch(livetaskq,
2881 2895                              dumpsys_live_helper, (void *)hp, TQ_NOSLEEP);
2882 2896                  }
2883 2897  
2884 2898          } else {
2885 2899                  if (panicstr)
2886 2900                          kmem_dump_begin();
2887 2901                  dumpcfg.helpers_wanted = dumpcfg.clevel > 0;
2888 2902                  dumpsys_spinunlock(&dumpcfg.helper_lock);
2889 2903          }
2890 2904  
2891 2905          /* run main task */
2892 2906          dumpsys_main_task(ds);
2893 2907  
2894 2908          ds->elapsed = gethrtime() - ds->start;
2895 2909          if (ds->elapsed < 1)
2896 2910                  ds->elapsed = 1;
2897 2911  
2898 2912          if (livetaskq != NULL)
2899 2913                  taskq_destroy(livetaskq);
2900 2914  
2901 2915          if (ds->neednl) {
2902 2916                  uprintf("\n");
2903 2917                  ds->neednl = 0;
2904 2918          }
2905 2919  
2906 2920          /* record actual pages dumped */
2907 2921          dumphdr->dump_npages = ds->npages;
2908 2922  
2909 2923          /* platform-specific data */
2910 2924          dumphdr->dump_npages += dump_plat_data(dumpcfg.cbuf[0].buf);
2911 2925  
2912 2926          /* note any errors by clearing DF_COMPLETE */
2913 2927          if (dump_ioerr || ds->npages < dumphdr->dump_npages)
2914 2928                  dumphdr->dump_flags &= ~DF_COMPLETE;
2915 2929  
2916 2930          /* end of stream blocks */
2917 2931          datatag = 0;
2918 2932          dumpvp_write(&datatag, sizeof (datatag));
2919 2933  
2920 2934          bzero(&datahdr, sizeof (datahdr));
2921 2935  
2922 2936          /* buffer for metrics */
2923 2937          buf = dumpcfg.cbuf[0].buf;
2924 2938          size = MIN(dumpcfg.cbuf[0].size, DUMP_OFFSET - sizeof (dumphdr_t) -
2925 2939              sizeof (dumpdatahdr_t));
2926 2940  
2927 2941          /* finish the kmem intercepts, collect kmem verbose info */
2928 2942          if (panicstr) {
2929 2943                  datahdr.dump_metrics = kmem_dump_finish(buf, size);
2930 2944                  buf += datahdr.dump_metrics;
2931 2945                  size -= datahdr.dump_metrics;
2932 2946          }
2933 2947  
2934 2948          /* record in the header whether this is a fault-management panic */
2935 2949          if (panicstr)
2936 2950                  dumphdr->dump_fm_panic = is_fm_panic();
2937 2951  
2938 2952          /* compression info in data header */
2939 2953          datahdr.dump_datahdr_magic = DUMP_DATAHDR_MAGIC;
2940 2954          datahdr.dump_datahdr_version = DUMP_DATAHDR_VERSION;
2941 2955          datahdr.dump_maxcsize = CBUF_SIZE;
2942 2956          datahdr.dump_maxrange = CBUF_MAPSIZE / PAGESIZE;
2943 2957          datahdr.dump_nstreams = dumpcfg.nhelper_used;
2944 2958          datahdr.dump_clevel = dumpcfg.clevel;
2945 2959  #ifdef COLLECT_METRICS
2946 2960          if (dump_metrics_on)
2947 2961                  datahdr.dump_metrics += dumpsys_metrics(ds, buf, size);
2948 2962  #endif
2949 2963          datahdr.dump_data_csize = dumpvp_flush() - dumphdr->dump_data;
2950 2964  
2951 2965          /*
2952 2966           * Write out the initial and terminal dump headers.
2953 2967           */
2954 2968          dumpbuf.vp_off = dumphdr->dump_start;
2955 2969          dumpvp_write(dumphdr, sizeof (dumphdr_t));
2956 2970          (void) dumpvp_flush();
2957 2971  
2958 2972          dumpbuf.vp_limit = dumpvp_size;
2959 2973          dumpbuf.vp_off = dumpbuf.vp_limit - DUMP_OFFSET;
2960 2974          dumpvp_write(dumphdr, sizeof (dumphdr_t));
2961 2975          dumpvp_write(&datahdr, sizeof (dumpdatahdr_t));
2962 2976          dumpvp_write(dumpcfg.cbuf[0].buf, datahdr.dump_metrics);
2963 2977  
2964 2978          (void) dumpvp_flush();
2965 2979  
2966 2980          uprintf("\r%3d%% done: %llu pages dumped, ",
2967 2981              ds->percent_done, (u_longlong_t)ds->npages);
2968 2982  
2969 2983          if (dump_ioerr == 0) {
2970 2984                  uprintf("dump succeeded\n");
2971 2985          } else {
2972 2986                  uprintf("dump failed: error %d\n", dump_ioerr);
2973 2987  #ifdef DEBUG
2974 2988                  if (panicstr)
2975 2989                          debug_enter("dump failed");
2976 2990  #endif
2977 2991          }
2978 2992  
2979 2993          /*
2980 2994           * Write out all undelivered messages.  This has to be the *last*
2981 2995           * thing we do because the dump process itself emits messages.
2982 2996           */
2983 2997          if (panicstr) {
2984 2998                  dump_summary();
2985 2999                  dump_ereports();
2986 3000                  dump_messages();
2987 3001          }
2988 3002  
2989 3003          delay(2 * hz);  /* let people see the 'done' message */
2990 3004          dump_timeleft = 0;
2991 3005          dump_ioerr = 0;
2992 3006  
2993 3007          /* restore settings after live dump completes */
2994 3008          if (!panicstr) {
2995 3009                  dumpcfg.clevel = save_dump_clevel;
2996 3010  
2997 3011                  /* release any VCHR open of the dump device */
2998 3012                  if (dumpbuf.cdev_vp != NULL) {
2999 3013                          (void) VOP_CLOSE(dumpbuf.cdev_vp, FREAD | FWRITE, 1, 0,
3000 3014                              kcred, NULL);
3001 3015                          VN_RELE(dumpbuf.cdev_vp);
3002 3016                          dumpbuf.cdev_vp = NULL;
3003 3017                  }
3004 3018          }
3005 3019  }
3006 3020  
3007 3021  /*
3008 3022   * This function is called whenever the memory size, as represented
3009 3023   * by the phys_install list, changes.
3010 3024   */
3011 3025  void
3012 3026  dump_resize()
3013 3027  {
3014 3028          mutex_enter(&dump_lock);
3015 3029          dumphdr_init();
3016 3030          dumpbuf_resize();
3017 3031          dump_update_clevel();
3018 3032          mutex_exit(&dump_lock);
3019 3033  }
3020 3034  
3021 3035  /*
3022 3036   * This function allows for dynamic resizing of a dump area. It assumes that
3023 3037   * the underlying device has update its appropriate size(9P).
3024 3038   */
3025 3039  int
3026 3040  dumpvp_resize()
3027 3041  {
3028 3042          int error;
3029 3043          vattr_t vattr;
3030 3044  
3031 3045          mutex_enter(&dump_lock);
3032 3046          vattr.va_mask = AT_SIZE;
3033 3047          if ((error = VOP_GETATTR(dumpvp, &vattr, 0, kcred, NULL)) != 0) {
3034 3048                  mutex_exit(&dump_lock);
3035 3049                  return (error);
3036 3050          }
3037 3051  
3038 3052          if (error == 0 && vattr.va_size < 2 * DUMP_LOGSIZE + DUMP_ERPTSIZE) {
3039 3053                  mutex_exit(&dump_lock);
3040 3054                  return (ENOSPC);
3041 3055          }
3042 3056  
3043 3057          dumpvp_size = vattr.va_size & -DUMP_OFFSET;
3044 3058          mutex_exit(&dump_lock);
3045 3059          return (0);
3046 3060  }
3047 3061  
3048 3062  int
3049 3063  dump_set_uuid(const char *uuidstr)
3050 3064  {
3051 3065          const char *ptr;
3052 3066          int i;
3053 3067  
3054 3068          if (uuidstr == NULL || strnlen(uuidstr, 36 + 1) != 36)
3055 3069                  return (EINVAL);
3056 3070  
3057 3071          /* uuid_parse is not common code so check manually */
3058 3072          for (i = 0, ptr = uuidstr; i < 36; i++, ptr++) {
3059 3073                  switch (i) {
3060 3074                  case 8:
3061 3075                  case 13:
3062 3076                  case 18:
3063 3077                  case 23:
3064 3078                          if (*ptr != '-')
3065 3079                                  return (EINVAL);
3066 3080                          break;
3067 3081  
3068 3082                  default:
3069 3083                          if (!isxdigit(*ptr))
3070 3084                                  return (EINVAL);
3071 3085                          break;
3072 3086                  }
3073 3087          }
3074 3088  
3075 3089          if (dump_osimage_uuid[0] != '\0')
3076 3090                  return (EALREADY);
3077 3091  
3078 3092          (void) strncpy(dump_osimage_uuid, uuidstr, 36 + 1);
3079 3093  
3080 3094          cmn_err(CE_CONT, "?This Solaris instance has UUID %s\n",
3081 3095              dump_osimage_uuid);
3082 3096  
3083 3097          return (0);
3084 3098  }
3085 3099  
3086 3100  const char *
3087 3101  dump_get_uuid(void)
3088 3102  {
3089 3103          return (dump_osimage_uuid[0] != '\0' ? dump_osimage_uuid : "");
3090 3104  }
  
    | 
      ↓ open down ↓ | 
    2394 lines elided | 
    
      ↑ open up ↑ | 
  
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX