5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright (c) 1998, 2010, Oracle and/or its affiliates. All rights reserved.
24 * Copyright 2016 Joyent, Inc.
25 */
26
27 #include <sys/types.h>
28 #include <sys/param.h>
29 #include <sys/systm.h>
30 #include <sys/vm.h>
31 #include <sys/proc.h>
32 #include <sys/file.h>
33 #include <sys/conf.h>
34 #include <sys/kmem.h>
35 #include <sys/mem.h>
36 #include <sys/mman.h>
37 #include <sys/vnode.h>
38 #include <sys/errno.h>
39 #include <sys/memlist.h>
40 #include <sys/dumphdr.h>
41 #include <sys/dumpadm.h>
42 #include <sys/ksyms.h>
43 #include <sys/compress.h>
44 #include <sys/stream.h>
54 #include <sys/debug.h>
55 #include <sys/sunddi.h>
56 #include <fs/fs_subr.h>
57 #include <sys/fs/snode.h>
58 #include <sys/ontrap.h>
59 #include <sys/panic.h>
60 #include <sys/dkio.h>
61 #include <sys/vtoc.h>
62 #include <sys/errorq.h>
63 #include <sys/fm/util.h>
64 #include <sys/fs/zfs.h>
65
66 #include <vm/hat.h>
67 #include <vm/as.h>
68 #include <vm/page.h>
69 #include <vm/pvn.h>
70 #include <vm/seg.h>
71 #include <vm/seg_kmem.h>
72 #include <sys/clock_impl.h>
73 #include <sys/hold_page.h>
74
75 #include <bzip2/bzlib.h>
76
77 /*
78 * Crash dump time is dominated by disk write time. To reduce this,
79 * the stronger compression method bzip2 is applied to reduce the dump
80 * size and hence reduce I/O time. However, bzip2 is much more
81 * computationally expensive than the existing lzjb algorithm, so to
82 * avoid increasing compression time, CPUs that are otherwise idle
83 * during panic are employed to parallelize the compression task.
84 * Many helper CPUs are needed to prevent bzip2 from being a
85 * bottleneck, and on systems with too few CPUs, the lzjb algorithm is
86 * parallelized instead. Lastly, I/O and compression are performed by
87 * different CPUs, and are hence overlapped in time, unlike the older
88 * serial code.
89 *
90 * Another important consideration is the speed of the dump
91 * device. Faster disks need less CPUs in order to benefit from
92 * parallel lzjb versus parallel bzip2. Therefore, the CPU count
93 * threshold for switching from parallel lzjb to paralled bzip2 is
94 * elevated for faster disks. The dump device speed is adduced from
95 * the setting for dumpbuf.iosize, see dump_update_clevel.
96 */
97
98 /*
99 * exported vars
100 */
101 kmutex_t dump_lock; /* lock for dump configuration */
102 dumphdr_t *dumphdr; /* dump header */
103 int dump_conflags = DUMP_KERNEL; /* dump configuration flags */
104 vnode_t *dumpvp; /* dump device vnode pointer */
105 u_offset_t dumpvp_size; /* size of dump device, in bytes */
106 char *dumppath; /* pathname of dump device */
107 int dump_timeout = 120; /* timeout for dumping pages */
108 int dump_timeleft; /* portion of dump_timeout remaining */
109 int dump_ioerr; /* dump i/o error */
110 int dump_check_used; /* enable check for used pages */
111 char *dump_stack_scratch; /* scratch area for saving stack summary */
112
113 /*
114 * Tunables for dump compression and parallelism. These can be set via
115 * /etc/system.
116 *
117 * dump_ncpu_low number of helpers for parallel lzjb
118 * This is also the minimum configuration.
119 *
120 * dump_bzip2_level bzip2 compression level: 1-9
121 * Higher numbers give greater compression, but take more memory
122 * and time. Memory used per helper is ~(dump_bzip2_level * 1MB).
123 *
124 * dump_plat_mincpu the cross-over limit for using bzip2 (per platform):
125 * if dump_plat_mincpu == 0, then always do single threaded dump
126 * if ncpu >= dump_plat_mincpu then try to use bzip2
127 *
128 * dump_metrics_on if set, metrics are collected in the kernel, passed
129 * to savecore via the dump file, and recorded by savecore in
130 * METRICS.txt.
131 */
132 uint_t dump_ncpu_low = 4; /* minimum config for parallel lzjb */
133 uint_t dump_bzip2_level = 1; /* bzip2 level (1-9) */
134
135 /* Use dump_plat_mincpu_default unless this variable is set by /etc/system */
136 #define MINCPU_NOT_SET ((uint_t)-1)
137 uint_t dump_plat_mincpu = MINCPU_NOT_SET;
138
139 /* tunables for pre-reserved heap */
140 uint_t dump_kmem_permap = 1024;
141 uint_t dump_kmem_pages = 8;
142
143 /* Define multiple buffers per helper to avoid stalling */
144 #define NCBUF_PER_HELPER 2
145 #define NCMAP_PER_HELPER 4
146
147 /* minimum number of helpers configured */
148 #define MINHELPERS (dump_ncpu_low)
149 #define MINCBUFS (MINHELPERS * NCBUF_PER_HELPER)
150
151 /*
152 * Define constant parameters.
153 *
154 * CBUF_SIZE size of an output buffer
155 *
156 * CBUF_MAPSIZE size of virtual range for mapping pages
157 *
158 * CBUF_MAPNP size of virtual range in pages
159 *
160 */
161 #define DUMP_1KB ((size_t)1 << 10)
162 #define DUMP_1MB ((size_t)1 << 20)
163 #define CBUF_SIZE ((size_t)1 << 17)
164 #define CBUF_MAPSHIFT (22)
165 #define CBUF_MAPSIZE ((size_t)1 << CBUF_MAPSHIFT)
166 #define CBUF_MAPNP ((size_t)1 << (CBUF_MAPSHIFT - PAGESHIFT))
167
168 /*
243 CBUF_USEDMAP,
244 CBUF_FREEBUF,
245 CBUF_WRITE,
246 CBUF_ERRMSG
247 } cbufstate_t;
248
249 typedef struct cbuf cbuf_t;
250
251 struct cbuf {
252 cbuf_t *next; /* next in list */
253 cbufstate_t state; /* processing state */
254 size_t used; /* amount used */
255 size_t size; /* mem size */
256 char *buf; /* kmem or vmem */
257 pgcnt_t pagenum; /* index to pfn map */
258 pgcnt_t bitnum; /* first set bitnum */
259 pfn_t pfn; /* first pfn in mapped range */
260 int off; /* byte offset to first pfn */
261 };
262
263 static char dump_osimage_uuid[36 + 1];
264
265 #define isdigit(ch) ((ch) >= '0' && (ch) <= '9')
266 #define isxdigit(ch) (isdigit(ch) || ((ch) >= 'a' && (ch) <= 'f') || \
267 ((ch) >= 'A' && (ch) <= 'F'))
268
269 /*
270 * cqueue_t queues: a uni-directional channel for communication
271 * from the master to helper tasks or vice-versa using put and
272 * get primitives. Both mappings and data buffers are passed via
273 * queues. Producers close a queue when done. The number of
274 * active producers is reference counted so the consumer can
275 * detect end of data. Concurrent access is mediated by atomic
276 * operations for panic dump, or mutex/cv for live dump.
277 *
278 * There a four queues, used as follows:
279 *
280 * Queue Dataflow NewState
281 * --------------------------------------------------
282 * mainq master -> master FREEMAP
283 * master has initialized or unmapped an input buffer
363 /*
364 * helper_t helpers: contains the context for a stream. CPUs run in
365 * parallel at dump time; each CPU creates a single stream of
366 * compression data. Stream data is divided into CBUF_SIZE blocks.
367 * The blocks are written in order within a stream. But, blocks from
368 * multiple streams can be interleaved. Each stream is identified by a
369 * unique tag.
370 */
371 typedef struct helper {
372 int helper; /* bound helper id */
373 int tag; /* compression stream tag */
374 perpage_t perpage; /* per page metrics */
375 perpage_t perpagets; /* per page metrics (timestamps) */
376 taskqid_t taskqid; /* live dump task ptr */
377 int in, out; /* buffer offsets */
378 cbuf_t *cpin, *cpout, *cperr; /* cbuf objects in process */
379 dumpsync_t *ds; /* pointer to sync vars */
380 size_t used; /* counts input consumed */
381 char *page; /* buffer for page copy */
382 char *lzbuf; /* lzjb output */
383 bz_stream bzstream; /* bzip2 state */
384 } helper_t;
385
386 #define MAINHELPER (-1) /* helper is also the main task */
387 #define FREEHELPER (-2) /* unbound helper */
388 #define DONEHELPER (-3) /* helper finished */
389
390 /*
391 * configuration vars for dumpsys
392 */
393 typedef struct dumpcfg {
394 int threshold; /* ncpu threshold for bzip2 */
395 int nhelper; /* number of helpers */
396 int nhelper_used; /* actual number of helpers used */
397 int ncmap; /* number VA pages for compression */
398 int ncbuf; /* number of bufs for compression */
399 int ncbuf_used; /* number of bufs in use */
400 uint_t clevel; /* dump compression level */
401 helper_t *helper; /* array of helpers */
402 cbuf_t *cmap; /* array of input (map) buffers */
403 cbuf_t *cbuf; /* array of output buffers */
404 ulong_t *helpermap; /* set of dumpsys helper CPU ids */
405 ulong_t *bitmap; /* bitmap for marking pages to dump */
406 ulong_t *rbitmap; /* bitmap for used CBUF_MAPSIZE ranges */
407 pgcnt_t bitmapsize; /* size of bitmap */
408 pgcnt_t rbitmapsize; /* size of bitmap for ranges */
409 pgcnt_t found4m; /* number ranges allocated by dump */
410 pgcnt_t foundsm; /* number small pages allocated by dump */
411 pid_t *pids; /* list of process IDs at dump time */
412 size_t maxsize; /* memory size needed at dump time */
413 size_t maxvmsize; /* size of reserved VM */
414 char *maxvm; /* reserved VM for spare pages */
421 /*
422 * The dump I/O buffer.
423 *
424 * There is one I/O buffer used by dumpvp_write and dumvp_flush. It is
425 * sized according to the optimum device transfer speed.
426 */
427 typedef struct dumpbuf {
428 vnode_t *cdev_vp; /* VCHR open of the dump device */
429 len_t vp_limit; /* maximum write offset */
430 offset_t vp_off; /* current dump device offset */
431 char *cur; /* dump write pointer */
432 char *start; /* dump buffer address */
433 char *end; /* dump buffer end */
434 size_t size; /* size of dumpbuf in bytes */
435 size_t iosize; /* best transfer size for device */
436 } dumpbuf_t;
437
438 dumpbuf_t dumpbuf; /* I/O buffer */
439
440 /*
441 * The dump I/O buffer must be at least one page, at most xfer_size
442 * bytes, and should scale with physmem in between. The transfer size
443 * passed in will either represent a global default (maxphys) or the
444 * best size for the device. The size of the dumpbuf I/O buffer is
445 * limited by dumpbuf_limit (8MB by default) because the dump
446 * performance saturates beyond a certain size. The default is to
447 * select 1/4096 of the memory.
448 */
449 static int dumpbuf_fraction = 12; /* memory size scale factor */
450 static size_t dumpbuf_limit = 8 * DUMP_1MB; /* max I/O buf size */
451
452 static size_t
453 dumpbuf_iosize(size_t xfer_size)
454 {
455 size_t iosize = ptob(physmem >> dumpbuf_fraction);
456
457 if (iosize < PAGESIZE)
458 iosize = PAGESIZE;
459 else if (iosize > xfer_size)
460 iosize = xfer_size;
472 char *old_buf = dumpbuf.start;
473 size_t old_size = dumpbuf.size;
474 char *new_buf;
475 size_t new_size;
476
477 ASSERT(MUTEX_HELD(&dump_lock));
478
479 new_size = dumpbuf_iosize(MAX(dumpbuf.iosize, maxphys));
480 if (new_size <= old_size)
481 return; /* no need to reallocate buffer */
482
483 new_buf = kmem_alloc(new_size, KM_SLEEP);
484 dumpbuf.size = new_size;
485 dumpbuf.start = new_buf;
486 dumpbuf.end = new_buf + new_size;
487 kmem_free(old_buf, old_size);
488 }
489
490 /*
491 * dump_update_clevel is called when dumpadm configures the dump device.
492 * Calculate number of helpers and buffers.
493 * Allocate the minimum configuration for now.
494 *
495 * When the dump file is configured we reserve a minimum amount of
496 * memory for use at crash time. But we reserve VA for all the memory
497 * we really want in order to do the fastest dump possible. The VA is
498 * backed by pages not being dumped, according to the bitmap. If
499 * there is insufficient spare memory, however, we fall back to the
500 * minimum.
501 *
502 * Live dump (savecore -L) always uses the minimum config.
503 *
504 * clevel 0 is single threaded lzjb
505 * clevel 1 is parallel lzjb
506 * clevel 2 is parallel bzip2
507 *
508 * The ncpu threshold is selected with dump_plat_mincpu.
509 * On OPL, set_platform_defaults() overrides the sun4u setting.
510 * The actual values are defined via DUMP_PLAT_*_MINCPU macros.
511 *
512 * Architecture Threshold Algorithm
513 * sun4u < 51 parallel lzjb
514 * sun4u >= 51 parallel bzip2(*)
515 * sun4u OPL < 8 parallel lzjb
516 * sun4u OPL >= 8 parallel bzip2(*)
517 * sun4v < 128 parallel lzjb
518 * sun4v >= 128 parallel bzip2(*)
519 * x86 < 11 parallel lzjb
520 * x86 >= 11 parallel bzip2(*)
521 * 32-bit N/A single-threaded lzjb
522 *
523 * (*) bzip2 is only chosen if there is sufficient available
524 * memory for buffers at dump time. See dumpsys_get_maxmem().
525 *
526 * Faster dump devices have larger I/O buffers. The threshold value is
527 * increased according to the size of the dump I/O buffer, because
528 * parallel lzjb performs better with faster disks. For buffers >= 1MB
529 * the threshold is 3X; for buffers >= 256K threshold is 2X.
530 *
531 * For parallel dumps, the number of helpers is ncpu-1. The CPU
532 * running panic runs the main task. For single-threaded dumps, the
533 * panic CPU does lzjb compression (it is tagged as MAINHELPER.)
534 *
535 * Need multiple buffers per helper so that they do not block waiting
536 * for the main task.
537 * parallel single-threaded
538 * Number of output buffers: nhelper*2 1
539 * Number of mapping buffers: nhelper*4 1
540 *
541 */
542 static void
543 dump_update_clevel()
544 {
545 int tag;
546 size_t bz2size;
547 helper_t *hp, *hpend;
548 cbuf_t *cp, *cpend;
549 dumpcfg_t *old = &dumpcfg;
550 dumpcfg_t newcfg = *old;
551 dumpcfg_t *new = &newcfg;
552
553 ASSERT(MUTEX_HELD(&dump_lock));
554
555 /*
556 * Free the previously allocated bufs and VM.
557 */
558 if (old->helper != NULL) {
559
560 /* helpers */
561 hpend = &old->helper[old->nhelper];
562 for (hp = old->helper; hp != hpend; hp++) {
563 if (hp->lzbuf != NULL)
564 kmem_free(hp->lzbuf, PAGESIZE);
565 if (hp->page != NULL)
566 kmem_free(hp->page, PAGESIZE);
579 if (cp->buf != NULL)
580 kmem_free(cp->buf, cp->size);
581 kmem_free(old->cbuf, old->ncbuf * sizeof (cbuf_t));
582
583 /* reserved VM for dumpsys_get_maxmem */
584 if (old->maxvmsize > 0)
585 vmem_xfree(heap_arena, old->maxvm, old->maxvmsize);
586 }
587
588 /*
589 * Allocate memory and VM.
590 * One CPU runs dumpsys, the rest are helpers.
591 */
592 new->nhelper = ncpus - 1;
593 if (new->nhelper < 1)
594 new->nhelper = 1;
595
596 if (new->nhelper > DUMP_MAX_NHELPER)
597 new->nhelper = DUMP_MAX_NHELPER;
598
599 /* use platform default, unless /etc/system overrides */
600 if (dump_plat_mincpu == MINCPU_NOT_SET)
601 dump_plat_mincpu = dump_plat_mincpu_default;
602
603 /* increase threshold for faster disks */
604 new->threshold = dump_plat_mincpu;
605 if (dumpbuf.iosize >= DUMP_1MB)
606 new->threshold *= 3;
607 else if (dumpbuf.iosize >= (256 * DUMP_1KB))
608 new->threshold *= 2;
609
610 /* figure compression level based upon the computed threshold. */
611 if (dump_plat_mincpu == 0 || new->nhelper < 2) {
612 new->clevel = 0;
613 new->nhelper = 1;
614 } else if ((new->nhelper + 1) >= new->threshold) {
615 new->clevel = DUMP_CLEVEL_BZIP2;
616 } else {
617 new->clevel = DUMP_CLEVEL_LZJB;
618 }
619
620 if (new->clevel == 0) {
621 new->ncbuf = 1;
622 new->ncmap = 1;
623 } else {
624 new->ncbuf = NCBUF_PER_HELPER * new->nhelper;
625 new->ncmap = NCMAP_PER_HELPER * new->nhelper;
626 }
627
628 /*
629 * Allocate new data structures and buffers for MINHELPERS,
630 * and also figure the max desired size.
631 */
632 bz2size = BZ2_bzCompressInitSize(dump_bzip2_level);
633 new->maxsize = 0;
634 new->maxvmsize = 0;
635 new->maxvm = NULL;
636 tag = 1;
637 new->helper = kmem_zalloc(new->nhelper * sizeof (helper_t), KM_SLEEP);
638 hpend = &new->helper[new->nhelper];
639 for (hp = new->helper; hp != hpend; hp++) {
640 hp->tag = tag++;
641 if (hp < &new->helper[MINHELPERS]) {
642 hp->lzbuf = kmem_alloc(PAGESIZE, KM_SLEEP);
643 hp->page = kmem_alloc(PAGESIZE, KM_SLEEP);
644 } else if (new->clevel < DUMP_CLEVEL_BZIP2) {
645 new->maxsize += 2 * PAGESIZE;
646 } else {
647 new->maxsize += PAGESIZE;
648 }
649 if (new->clevel >= DUMP_CLEVEL_BZIP2)
650 new->maxsize += bz2size;
651 }
652
653 new->cbuf = kmem_zalloc(new->ncbuf * sizeof (cbuf_t), KM_SLEEP);
654 cpend = &new->cbuf[new->ncbuf];
655 for (cp = new->cbuf; cp != cpend; cp++) {
656 cp->state = CBUF_FREEBUF;
657 cp->size = CBUF_SIZE;
658 if (cp < &new->cbuf[MINCBUFS])
659 cp->buf = kmem_alloc(cp->size, KM_SLEEP);
660 else
661 new->maxsize += cp->size;
662 }
663
664 new->cmap = kmem_zalloc(new->ncmap * sizeof (cbuf_t), KM_SLEEP);
665 cpend = &new->cmap[new->ncmap];
666 for (cp = new->cmap; cp != cpend; cp++) {
667 cp->state = CBUF_FREEMAP;
668 cp->size = CBUF_MAPSIZE;
669 cp->buf = vmem_xalloc(heap_arena, CBUF_MAPSIZE, CBUF_MAPSIZE,
670 0, 0, NULL, NULL, VM_SLEEP);
783 ASSERT(rbitnum < dumpcfg.rbitmapsize);
784
785 BT_SET(dumpcfg.rbitmap, rbitnum);
786 }
787
788 int
789 dump_test_used(pfn_t pfn)
790 {
791 pgcnt_t bitnum, rbitnum;
792
793 bitnum = dump_pfn_to_bitnum(pfn);
794 ASSERT(bitnum != (pgcnt_t)-1);
795
796 rbitnum = CBUF_MAPP2R(bitnum);
797 ASSERT(rbitnum < dumpcfg.rbitmapsize);
798
799 return (BT_TEST(dumpcfg.rbitmap, rbitnum));
800 }
801
802 /*
803 * dumpbzalloc and dumpbzfree are callbacks from the bzip2 library.
804 * dumpsys_get_maxmem() uses them for BZ2_bzCompressInit().
805 */
806 static void *
807 dumpbzalloc(void *opaque, int items, int size)
808 {
809 size_t *sz;
810 char *ret;
811
812 ASSERT(opaque != NULL);
813 sz = opaque;
814 ret = dumpcfg.maxvm + *sz;
815 *sz += items * size;
816 *sz = P2ROUNDUP(*sz, BZ2_BZALLOC_ALIGN);
817 ASSERT(*sz <= dumpcfg.maxvmsize);
818 return (ret);
819 }
820
821 /*ARGSUSED*/
822 static void
823 dumpbzfree(void *opaque, void *addr)
824 {
825 }
826
827 /*
828 * Perform additional checks on the page to see if we can really use
829 * it. The kernel (kas) pages are always set in the bitmap. However,
830 * boot memory pages (prom_ppages or P_BOOTPAGES) are not in the
831 * bitmap. So we check for them.
832 */
833 static inline int
834 dump_pfn_check(pfn_t pfn)
835 {
836 page_t *pp = page_numtopp_nolock(pfn);
837 if (pp == NULL || pp->p_pagenum != pfn ||
838 #if defined(__sparc)
839 pp->p_vnode == &promvp ||
840 #else
841 PP_ISBOOTPAGES(pp) ||
842 #endif
843 pp->p_toxic != 0)
844 return (0);
845 return (1);
846 }
847
848 /*
849 * Check a range to see if all contained pages are available and
850 * return non-zero if the range can be used.
851 */
852 static inline int
853 dump_range_check(pgcnt_t start, pgcnt_t end, pfn_t pfn)
854 {
855 for (; start < end; start++, pfn++) {
856 if (BT_TEST(dumpcfg.bitmap, start))
857 return (0);
858 if (!dump_pfn_check(pfn))
859 return (0);
860 }
861 return (1);
862 }
863
864 /*
865 * dumpsys_get_maxmem() is called during panic. Find unused ranges
866 * and use them for buffers. If we find enough memory switch to
867 * parallel bzip2, otherwise use parallel lzjb.
868 *
869 * It searches the dump bitmap in 2 passes. The first time it looks
870 * for CBUF_MAPSIZE ranges. On the second pass it uses small pages.
871 */
872 static void
873 dumpsys_get_maxmem()
874 {
875 dumpcfg_t *cfg = &dumpcfg;
876 cbuf_t *endcp = &cfg->cbuf[cfg->ncbuf];
877 helper_t *endhp = &cfg->helper[cfg->nhelper];
878 pgcnt_t bitnum, end;
879 size_t sz, endsz, bz2size;
880 pfn_t pfn, off;
881 cbuf_t *cp;
882 helper_t *hp, *ohp;
883 dumpmlw_t mlw;
884 int k;
885
886 /*
887 * Setting dump_plat_mincpu to 0 at any time forces a serial
888 * dump.
889 */
890 if (dump_plat_mincpu == 0) {
891 cfg->clevel = 0;
892 return;
893 }
894
895 /*
896 * There may be no point in looking for spare memory. If
897 * dumping all memory, then none is spare. If doing a serial
898 * dump, then already have buffers.
899 */
900 if (cfg->maxsize == 0 || cfg->clevel < DUMP_CLEVEL_LZJB ||
901 (dump_conflags & DUMP_ALL) != 0) {
902 if (cfg->clevel > DUMP_CLEVEL_LZJB)
903 cfg->clevel = DUMP_CLEVEL_LZJB;
904 return;
905 }
906
907 sz = 0;
908 cfg->found4m = 0;
909 cfg->foundsm = 0;
910
911 /* bitmap of ranges used to estimate which pfns are being used */
912 bzero(dumpcfg.rbitmap, BT_SIZEOFMAP(dumpcfg.rbitmapsize));
913
914 /* find ranges that are not being dumped to use for buffers */
915 dump_init_memlist_walker(&mlw);
916 for (bitnum = 0; bitnum < dumpcfg.bitmapsize; bitnum = end) {
917 dump_timeleft = dump_timeout;
918 end = bitnum + CBUF_MAPNP;
919 pfn = dump_bitnum_to_pfn(bitnum, &mlw);
920 ASSERT(pfn != PFN_INVALID);
921
922 /* skip partial range at end of mem segment */
923 if (mlw.mpleft < CBUF_MAPNP) {
967 continue;
968 }
969
970 for (; bitnum < end; bitnum++, pfn++) {
971 dump_timeleft = dump_timeout;
972 if (BT_TEST(dumpcfg.bitmap, bitnum))
973 continue;
974 if (!dump_pfn_check(pfn))
975 continue;
976 ASSERT((sz + PAGESIZE) <= cfg->maxvmsize);
977 hat_devload(kas.a_hat, cfg->maxvm + sz, PAGESIZE, pfn,
978 PROT_READ | PROT_WRITE, HAT_LOAD_NOCONSIST);
979 sz += PAGESIZE;
980 cfg->foundsm++;
981 dump_set_used(pfn);
982 if (sz >= cfg->maxsize)
983 goto foundmax;
984 }
985 }
986
987 /* Fall back to lzjb if we did not get enough memory for bzip2. */
988 endsz = (cfg->maxsize * cfg->threshold) / cfg->nhelper;
989 if (sz < endsz) {
990 cfg->clevel = DUMP_CLEVEL_LZJB;
991 }
992
993 /* Allocate memory for as many helpers as we can. */
994 foundmax:
995
996 /* Byte offsets into memory found and mapped above */
997 endsz = sz;
998 sz = 0;
999
1000 /* Set the size for bzip2 state. Only bzip2 needs it. */
1001 bz2size = BZ2_bzCompressInitSize(dump_bzip2_level);
1002
1003 /* Skip the preallocate output buffers. */
1004 cp = &cfg->cbuf[MINCBUFS];
1005
1006 /* Use this to move memory up from the preallocated helpers. */
1007 ohp = cfg->helper;
1008
1009 /* Loop over all helpers and allocate memory. */
1010 for (hp = cfg->helper; hp < endhp; hp++) {
1011
1012 /* Skip preallocated helpers by checking hp->page. */
1013 if (hp->page == NULL) {
1014 if (cfg->clevel <= DUMP_CLEVEL_LZJB) {
1015 /* lzjb needs 2 1-page buffers */
1016 if ((sz + (2 * PAGESIZE)) > endsz)
1017 break;
1018 hp->page = cfg->maxvm + sz;
1019 sz += PAGESIZE;
1020 hp->lzbuf = cfg->maxvm + sz;
1021 sz += PAGESIZE;
1022
1023 } else if (ohp->lzbuf != NULL) {
1024 /* re-use the preallocted lzjb page for bzip2 */
1025 hp->page = ohp->lzbuf;
1026 ohp->lzbuf = NULL;
1027 ++ohp;
1028
1029 } else {
1030 /* bzip2 needs a 1-page buffer */
1031 if ((sz + PAGESIZE) > endsz)
1032 break;
1033 hp->page = cfg->maxvm + sz;
1034 sz += PAGESIZE;
1035 }
1036 }
1037
1038 /*
1039 * Add output buffers per helper. The number of
1040 * buffers per helper is determined by the ratio of
1041 * ncbuf to nhelper.
1042 */
1043 for (k = 0; cp < endcp && (sz + CBUF_SIZE) <= endsz &&
1044 k < NCBUF_PER_HELPER; k++) {
1045 cp->state = CBUF_FREEBUF;
1046 cp->size = CBUF_SIZE;
1047 cp->buf = cfg->maxvm + sz;
1048 sz += CBUF_SIZE;
1049 ++cp;
1050 }
1051
1052 /*
1053 * bzip2 needs compression state. Use the dumpbzalloc
1054 * and dumpbzfree callbacks to allocate the memory.
1055 * bzip2 does allocation only at init time.
1056 */
1057 if (cfg->clevel >= DUMP_CLEVEL_BZIP2) {
1058 if ((sz + bz2size) > endsz) {
1059 hp->page = NULL;
1060 break;
1061 } else {
1062 hp->bzstream.opaque = &sz;
1063 hp->bzstream.bzalloc = dumpbzalloc;
1064 hp->bzstream.bzfree = dumpbzfree;
1065 (void) BZ2_bzCompressInit(&hp->bzstream,
1066 dump_bzip2_level, 0, 0);
1067 hp->bzstream.opaque = NULL;
1068 }
1069 }
1070 }
1071
1072 /* Finish allocating output buffers */
1073 for (; cp < endcp && (sz + CBUF_SIZE) <= endsz; cp++) {
1074 cp->state = CBUF_FREEBUF;
1075 cp->size = CBUF_SIZE;
1076 cp->buf = cfg->maxvm + sz;
1077 sz += CBUF_SIZE;
1078 }
1079
1080 /* Enable IS_DUMP_PAGE macro, which checks for pages we took. */
1081 if (cfg->found4m || cfg->foundsm)
1082 dump_check_used = 1;
1083
1084 ASSERT(sz <= endsz);
1085 }
1086
1087 static void
1088 dumphdr_init(void)
1089 {
1090 pgcnt_t npages = 0;
1893 if (hp->used < hp->cpin->used) {
1894
1895 /*
1896 * Get the next page from the input buffer and
1897 * return a copy.
1898 */
1899 ASSERT(hp->in != -1);
1900 HRSTART(hp->perpage, copy);
1901 hp->in = dumpsys_copy_page(hp, hp->in);
1902 hp->used += PAGESIZE;
1903 HRSTOP(hp->perpage, copy);
1904 break;
1905
1906 } else {
1907
1908 /*
1909 * Done with the input. Flush the VM and
1910 * return the buffer to the main task.
1911 */
1912 if (panicstr && hp->helper != MAINHELPER)
1913 hat_flush_range(kas.a_hat,
1914 hp->cpin->buf, hp->cpin->size);
1915 dumpsys_errmsg(hp, NULL);
1916 CQ_PUT(mainq, hp->cpin, CBUF_USEDMAP);
1917 hp->cpin = NULL;
1918 }
1919 }
1920
1921 return (hp->cpin != NULL);
1922 }
1923
1924 /*
1925 * Compress size bytes starting at buf with bzip2
1926 * mode:
1927 * BZ_RUN add one more compressed page
1928 * BZ_FINISH no more input, flush the state
1929 */
1930 static void
1931 dumpsys_bzrun(helper_t *hp, void *buf, size_t size, int mode)
1932 {
1933 dumpsync_t *ds = hp->ds;
1934 const int CSIZE = sizeof (dumpcsize_t);
1935 bz_stream *ps = &hp->bzstream;
1936 int rc = 0;
1937 uint32_t csize;
1938 dumpcsize_t cs;
1939
1940 /* Set input pointers to new input page */
1941 if (size > 0) {
1942 ps->avail_in = size;
1943 ps->next_in = buf;
1944 }
1945
1946 /* CONSTCOND */
1947 while (1) {
1948
1949 /* Quit when all input has been consumed */
1950 if (ps->avail_in == 0 && mode == BZ_RUN)
1951 break;
1952
1953 /* Get a new output buffer */
1954 if (hp->cpout == NULL) {
1955 HRSTART(hp->perpage, outwait);
1956 hp->cpout = CQ_GET(freebufq);
1957 HRSTOP(hp->perpage, outwait);
1958 ps->avail_out = hp->cpout->size - CSIZE;
1959 ps->next_out = hp->cpout->buf + CSIZE;
1960 }
1961
1962 /* Compress input, or finalize */
1963 HRSTART(hp->perpage, compress);
1964 rc = BZ2_bzCompress(ps, mode);
1965 HRSTOP(hp->perpage, compress);
1966
1967 /* Check for error */
1968 if (mode == BZ_RUN && rc != BZ_RUN_OK) {
1969 dumpsys_errmsg(hp, "%d: BZ_RUN error %s at page %lx\n",
1970 hp->helper, BZ2_bzErrorString(rc),
1971 hp->cpin->pagenum);
1972 break;
1973 }
1974
1975 /* Write the buffer if it is full, or we are flushing */
1976 if (ps->avail_out == 0 || mode == BZ_FINISH) {
1977 csize = hp->cpout->size - CSIZE - ps->avail_out;
1978 cs = DUMP_SET_TAG(csize, hp->tag);
1979 if (csize > 0) {
1980 (void) memcpy(hp->cpout->buf, &cs, CSIZE);
1981 dumpsys_swrite(hp, hp->cpout, csize + CSIZE);
1982 hp->cpout = NULL;
1983 }
1984 }
1985
1986 /* Check for final complete */
1987 if (mode == BZ_FINISH) {
1988 if (rc == BZ_STREAM_END)
1989 break;
1990 if (rc != BZ_FINISH_OK) {
1991 dumpsys_errmsg(hp, "%d: BZ_FINISH error %s\n",
1992 hp->helper, BZ2_bzErrorString(rc));
1993 break;
1994 }
1995 }
1996 }
1997
1998 /* Cleanup state and buffers */
1999 if (mode == BZ_FINISH) {
2000
2001 /* Reset state so that it is re-usable. */
2002 (void) BZ2_bzCompressReset(&hp->bzstream);
2003
2004 /* Give any unused outout buffer to the main task */
2005 if (hp->cpout != NULL) {
2006 hp->cpout->used = 0;
2007 CQ_PUT(mainq, hp->cpout, CBUF_ERRMSG);
2008 hp->cpout = NULL;
2009 }
2010 }
2011 }
2012
2013 static void
2014 dumpsys_bz2compress(helper_t *hp)
2015 {
2016 dumpsync_t *ds = hp->ds;
2017 dumpstreamhdr_t sh;
2018
2019 (void) strcpy(sh.stream_magic, DUMP_STREAM_MAGIC);
2020 sh.stream_pagenum = (pgcnt_t)-1;
2021 sh.stream_npages = 0;
2022 hp->cpin = NULL;
2023 hp->cpout = NULL;
2024 hp->cperr = NULL;
2025 hp->in = 0;
2026 hp->out = 0;
2027 hp->bzstream.avail_in = 0;
2028
2029 /* Bump reference to mainq while we are running */
2030 CQ_OPEN(mainq);
2031
2032 /* Get one page at a time */
2033 while (dumpsys_sread(hp)) {
2034 if (sh.stream_pagenum != hp->cpin->pagenum) {
2035 sh.stream_pagenum = hp->cpin->pagenum;
2036 sh.stream_npages = btop(hp->cpin->used);
2037 dumpsys_bzrun(hp, &sh, sizeof (sh), BZ_RUN);
2038 }
2039 dumpsys_bzrun(hp, hp->page, PAGESIZE, 0);
2040 }
2041
2042 /* Done with input, flush any partial buffer */
2043 if (sh.stream_pagenum != (pgcnt_t)-1) {
2044 dumpsys_bzrun(hp, NULL, 0, BZ_FINISH);
2045 dumpsys_errmsg(hp, NULL);
2046 }
2047
2048 ASSERT(hp->cpin == NULL && hp->cpout == NULL && hp->cperr == NULL);
2049
2050 /* Decrement main queue count, we are done */
2051 CQ_CLOSE(mainq);
2052 }
2053
2054 /*
2055 * Compress with lzjb
2056 * write stream block if full or size==0
2057 * if csize==0 write stream header, else write <csize, data>
2058 * size==0 is a call to flush a buffer
2059 * hp->cpout is the buffer we are flushing or filling
2060 * hp->out is the next index to fill data
2061 * osize is either csize+data, or the size of a stream header
2062 */
2063 static void
2064 dumpsys_lzjbrun(helper_t *hp, size_t csize, void *buf, size_t size)
2065 {
2066 dumpsync_t *ds = hp->ds;
2067 const int CSIZE = sizeof (dumpcsize_t);
2068 dumpcsize_t cs;
2069 size_t osize = csize > 0 ? CSIZE + size : size;
2070
2071 /* If flush, and there is no buffer, just return */
2072 if (size == 0 && hp->cpout == NULL)
2073 return;
2074
2176 * panic CPU.
2177 *
2178 * At dump configuration time, helper_lock is set and helpers_wanted
2179 * is 0. dumpsys() decides whether to set helpers_wanted before
2180 * clearing helper_lock.
2181 *
2182 * At panic time, idle CPUs spin-wait on helper_lock, then alternately
2183 * take the lock and become a helper, or return.
2184 */
2185 void
2186 dumpsys_helper()
2187 {
2188 dumpsys_spinlock(&dumpcfg.helper_lock);
2189 if (dumpcfg.helpers_wanted) {
2190 helper_t *hp, *hpend = &dumpcfg.helper[dumpcfg.nhelper];
2191
2192 for (hp = dumpcfg.helper; hp != hpend; hp++) {
2193 if (hp->helper == FREEHELPER) {
2194 hp->helper = CPU->cpu_id;
2195 BT_SET(dumpcfg.helpermap, CPU->cpu_seqid);
2196
2197 dumpsys_spinunlock(&dumpcfg.helper_lock);
2198
2199 if (dumpcfg.clevel < DUMP_CLEVEL_BZIP2)
2200 dumpsys_lzjbcompress(hp);
2201 else
2202 dumpsys_bz2compress(hp);
2203
2204 hp->helper = DONEHELPER;
2205 return;
2206 }
2207 }
2208
2209 /* No more helpers are needed. */
2210 dumpcfg.helpers_wanted = 0;
2211
2212 }
2213 dumpsys_spinunlock(&dumpcfg.helper_lock);
2214 }
2215
2216 /*
2217 * No-wait helper callable in spin loops.
2218 *
2219 * Do not wait for helper_lock. Just check helpers_wanted. The caller
2220 * may decide to continue. This is the "c)ontinue, s)ync, r)eset? s"
2221 * case.
2222 */
2223 void
2224 dumpsys_helper_nw()
2225 {
2226 if (dumpcfg.helpers_wanted)
2227 dumpsys_helper();
2228 }
2229
2230 /*
2231 * Dump helper for live dumps.
2232 * These run as a system task.
2233 */
2234 static void
2235 dumpsys_live_helper(void *arg)
2236 {
2237 helper_t *hp = arg;
2238
2239 BT_ATOMIC_SET(dumpcfg.helpermap, CPU->cpu_seqid);
2240 if (dumpcfg.clevel < DUMP_CLEVEL_BZIP2)
2241 dumpsys_lzjbcompress(hp);
2242 else
2243 dumpsys_bz2compress(hp);
2244 }
2245
2246 /*
2247 * Compress one page with lzjb (single threaded case)
2248 */
2249 static void
2250 dumpsys_lzjb_page(helper_t *hp, cbuf_t *cp)
2251 {
2252 dumpsync_t *ds = hp->ds;
2253 uint32_t csize;
2254
2255 hp->helper = MAINHELPER;
2256 hp->in = 0;
2257 hp->used = 0;
2258 hp->cpin = cp;
2259 while (hp->used < cp->used) {
2260 HRSTART(hp->perpage, copy);
2261 hp->in = dumpsys_copy_page(hp, hp->in);
2262 hp->used += PAGESIZE;
2263 HRSTOP(hp->perpage, copy);
2270 dumpvp_write(&csize, sizeof (csize));
2271 dumpvp_write(hp->lzbuf, csize);
2272 HRSTOP(hp->perpage, write);
2273 }
2274 CQ_PUT(mainq, hp->cpin, CBUF_USEDMAP);
2275 hp->cpin = NULL;
2276 }
2277
2278 /*
2279 * Main task to dump pages. This is called on the dump CPU.
2280 */
2281 static void
2282 dumpsys_main_task(void *arg)
2283 {
2284 dumpsync_t *ds = arg;
2285 pgcnt_t pagenum = 0, bitnum = 0, hibitnum;
2286 dumpmlw_t mlw;
2287 cbuf_t *cp;
2288 pgcnt_t baseoff, pfnoff;
2289 pfn_t base, pfn;
2290 int i, dumpserial;
2291
2292 /*
2293 * Fall back to serial mode if there are no helpers.
2294 * dump_plat_mincpu can be set to 0 at any time.
2295 * dumpcfg.helpermap must contain at least one member.
2296 */
2297 dumpserial = 1;
2298
2299 if (dump_plat_mincpu != 0 && dumpcfg.clevel != 0) {
2300 for (i = 0; i < BT_BITOUL(NCPU); ++i) {
2301 if (dumpcfg.helpermap[i] != 0) {
2302 dumpserial = 0;
2303 break;
2304 }
2305 }
2306 }
2307
2308 if (dumpserial) {
2309 dumpcfg.clevel = 0;
2310 if (dumpcfg.helper[0].lzbuf == NULL)
2311 dumpcfg.helper[0].lzbuf = dumpcfg.helper[1].page;
2312 }
2313
2314 dump_init_memlist_walker(&mlw);
2315
2316 for (;;) {
2317 int sec = (gethrtime() - ds->start) / NANOSEC;
2318
2319 /*
2320 * Render a simple progress display on the system console to
2321 * make clear to the operator that the system has not hung.
2322 * Emit an update when dump progress has advanced by one
2323 * percent, or when no update has been drawn in the last
2324 * second.
2325 */
2326 if (ds->percent > ds->percent_done || sec > ds->sec_done) {
2327 ds->sec_done = sec;
2328 ds->percent_done = ds->percent;
2329 uprintf("^\rdumping: %2d:%02d %3d%% done",
2330 sec / 60, sec % 60, ds->percent);
2331 ds->neednl = 1;
2332 }
2434 if (BT_TEST(dumpcfg.bitmap, bitnum))
2435 pagenum++;
2436
2437 dump_timeleft = dump_timeout;
2438 cp->used = ptob(pagenum - cp->pagenum);
2439
2440 HRSTART(ds->perpage, map);
2441 hat_devload(kas.a_hat, cp->buf, cp->size, base,
2442 PROT_READ, HAT_LOAD_NOCONSIST);
2443 HRSTOP(ds->perpage, map);
2444
2445 ds->pages_mapped += btop(cp->size);
2446 ds->pages_used += pagenum - cp->pagenum;
2447
2448 CQ_OPEN(mainq);
2449
2450 /*
2451 * If there are no helpers the main task does
2452 * non-streams lzjb compress.
2453 */
2454 if (dumpserial) {
2455 dumpsys_lzjb_page(dumpcfg.helper, cp);
2456 break;
2457 }
2458
2459 /* pass mapped pages to a helper */
2460 CQ_PUT(helperq, cp, CBUF_INREADY);
2461
2462 /* the last page was done */
2463 if (bitnum >= dumpcfg.bitmapsize)
2464 CQ_CLOSE(helperq);
2465
2466 break;
2467
2468 case CBUF_USEDMAP:
2469
2470 ds->npages += btop(cp->used);
2471
2472 HRSTART(ds->perpage, unmap);
2473 hat_unload(kas.a_hat, cp->buf, cp->size, HAT_UNLOAD);
2474 HRSTOP(ds->perpage, unmap);
2475
2476 if (bitnum < dumpcfg.bitmapsize)
2477 CQ_PUT(mainq, cp, CBUF_FREEMAP);
2478 CQ_CLOSE(mainq);
2479
2480 ASSERT(ds->npages <= dumphdr->dump_npages);
2546 for (i = 0; i < ncpus; i++) {
2547 if ((i & 15) == 0)
2548 P(",,%03d,", i);
2549 if (i == myid)
2550 P(" M");
2551 else if (BT_TEST(cfg->helpermap, i))
2552 P("%4d", cpu_seq[i]->cpu_id);
2553 else
2554 P(" *");
2555 if ((i & 15) == 15)
2556 P("\n");
2557 }
2558
2559 P("ncbuf_used,%d\n", cfg->ncbuf_used);
2560 P("ncmap,%d\n", cfg->ncmap);
2561
2562 P("Found %ldM ranges,%ld\n", (CBUF_MAPSIZE / DUMP_1MB), cfg->found4m);
2563 P("Found small pages,%ld\n", cfg->foundsm);
2564
2565 P("Compression level,%d\n", cfg->clevel);
2566 P("Compression type,%s %s\n", cfg->clevel == 0 ? "serial" : "parallel",
2567 cfg->clevel >= DUMP_CLEVEL_BZIP2 ? "bzip2" : "lzjb");
2568 P("Compression ratio,%d.%02d\n", compress_ratio / 100, compress_ratio %
2569 100);
2570 P("nhelper_used,%d\n", cfg->nhelper_used);
2571
2572 P("Dump I/O rate MBS,%d.%02d\n", iorate / 100, iorate % 100);
2573 P("..total bytes,%lld\n", (u_longlong_t)ds->nwrite);
2574 P("..total nsec,%lld\n", (u_longlong_t)ds->iotime);
2575 P("dumpbuf.iosize,%ld\n", dumpbuf.iosize);
2576 P("dumpbuf.size,%ld\n", dumpbuf.size);
2577
2578 P("Dump pages/sec,%llu\n", (u_longlong_t)ds->npages / sec);
2579 P("Dump pages,%llu\n", (u_longlong_t)ds->npages);
2580 P("Dump time,%d\n", sec);
2581
2582 if (ds->pages_mapped > 0)
2583 P("per-cent map utilization,%d\n", (int)((100 * ds->pages_used)
2584 / ds->pages_mapped));
2585
2586 P("\nPer-page metrics:\n");
2587 if (ds->npages > 0) {
2655 * beginning. And never use the first page -- it may be a disk label.
2656 */
2657 if (dumpvp->v_flag & VISSWAP)
2658 dumphdr->dump_start = P2ROUNDUP(dumpvp_size / 5, DUMP_OFFSET);
2659 else
2660 dumphdr->dump_start = DUMP_OFFSET;
2661
2662 dumphdr->dump_flags = DF_VALID | DF_COMPLETE | DF_LIVE | DF_COMPRESSED;
2663 dumphdr->dump_crashtime = gethrestime_sec();
2664 dumphdr->dump_npages = 0;
2665 dumphdr->dump_nvtop = 0;
2666 bzero(dumpcfg.bitmap, BT_SIZEOFMAP(dumpcfg.bitmapsize));
2667 dump_timeleft = dump_timeout;
2668
2669 if (panicstr) {
2670 dumphdr->dump_flags &= ~DF_LIVE;
2671 (void) VOP_DUMPCTL(dumpvp, DUMP_FREE, NULL, NULL);
2672 (void) VOP_DUMPCTL(dumpvp, DUMP_ALLOC, NULL, NULL);
2673 (void) vsnprintf(dumphdr->dump_panicstring, DUMP_PANICSIZE,
2674 panicstr, panicargs);
2675
2676 }
2677
2678 if (dump_conflags & DUMP_ALL)
2679 content = "all";
2680 else if (dump_conflags & DUMP_CURPROC)
2681 content = "kernel + curproc";
2682 else
2683 content = "kernel";
2684 uprintf("dumping to %s, offset %lld, content: %s\n", dumppath,
2685 dumphdr->dump_start, content);
2686
2687 /* Make sure nodename is current */
2688 bcopy(utsname.nodename, dumphdr->dump_utsname.nodename, SYS_NMLN);
2689
2690 /*
2691 * If this is a live dump, try to open a VCHR vnode for better
2692 * performance. We must take care to flush the buffer cache
2693 * first.
2694 */
2695 if (!panicstr) {
2700 if (cdev_vp != NULL) {
2701 cmn_cdev_vp = common_specvp(cdev_vp);
2702 if (VOP_OPEN(&cmn_cdev_vp, FREAD | FWRITE, kcred, NULL)
2703 == 0) {
2704 if (vn_has_cached_data(dumpvp))
2705 (void) pvn_vplist_dirty(dumpvp, 0, NULL,
2706 B_INVAL | B_TRUNC, kcred);
2707 dumpbuf.cdev_vp = cmn_cdev_vp;
2708 } else {
2709 VN_RELE(cdev_vp);
2710 }
2711 }
2712 }
2713
2714 /*
2715 * Store a hires timestamp so we can look it up during debugging.
2716 */
2717 lbolt_debug_entry();
2718
2719 /*
2720 * Leave room for the message and ereport save areas and terminal dump
2721 * header.
2722 */
2723 dumpbuf.vp_limit = dumpvp_size - DUMP_LOGSIZE - DUMP_OFFSET -
2724 DUMP_ERPTSIZE;
2725
2726 /*
2727 * Write out the symbol table. It's no longer compressed,
2728 * so its 'size' and 'csize' are equal.
2729 */
2730 dumpbuf.vp_off = dumphdr->dump_ksyms = dumphdr->dump_start + PAGESIZE;
2731 dumphdr->dump_ksyms_size = dumphdr->dump_ksyms_csize =
2732 ksyms_snapshot(dumpvp_ksyms_write, NULL, LONG_MAX);
2733
2734 /*
2735 * Write out the translation map.
2736 */
2737 dumphdr->dump_map = dumpvp_flush();
2738 dump_as(&kas);
2739 dumphdr->dump_nvtop += dump_plat_addr();
2740
2741 /*
2742 * call into hat, which may have unmapped pages that also need to
2743 * be in the dump
2744 */
2808 dumphdr->dump_pfn = dumpvp_flush();
2809 dump_init_memlist_walker(&mlw);
2810 for (bitnum = 0; bitnum < dumpcfg.bitmapsize; bitnum++) {
2811 dump_timeleft = dump_timeout;
2812 if (!BT_TEST(dumpcfg.bitmap, bitnum))
2813 continue;
2814 pfn = dump_bitnum_to_pfn(bitnum, &mlw);
2815 ASSERT(pfn != PFN_INVALID);
2816 dumpvp_write(&pfn, sizeof (pfn_t));
2817 }
2818 dump_plat_pfn();
2819
2820 /*
2821 * Write out all the pages.
2822 * Map pages, copy them handling UEs, compress, and write them out.
2823 * Cooperate with any helpers running on CPUs in panic_idle().
2824 */
2825 dumphdr->dump_data = dumpvp_flush();
2826
2827 bzero(dumpcfg.helpermap, BT_SIZEOFMAP(NCPU));
2828 ds->live = dumpcfg.clevel > 0 &&
2829 (dumphdr->dump_flags & DF_LIVE) != 0;
2830
2831 save_dump_clevel = dumpcfg.clevel;
2832 if (panicstr)
2833 dumpsys_get_maxmem();
2834 else if (dumpcfg.clevel >= DUMP_CLEVEL_BZIP2)
2835 dumpcfg.clevel = DUMP_CLEVEL_LZJB;
2836
2837 dumpcfg.nhelper_used = 0;
2838 for (hp = dumpcfg.helper; hp != hpend; hp++) {
2839 if (hp->page == NULL) {
2840 hp->helper = DONEHELPER;
2841 continue;
2842 }
2843 ++dumpcfg.nhelper_used;
2844 hp->helper = FREEHELPER;
2845 hp->taskqid = NULL;
2846 hp->ds = ds;
2847 bzero(&hp->perpage, sizeof (hp->perpage));
2848 if (dumpcfg.clevel >= DUMP_CLEVEL_BZIP2)
2849 (void) BZ2_bzCompressReset(&hp->bzstream);
2850 }
2851
2852 CQ_OPEN(freebufq);
2853 CQ_OPEN(helperq);
2854
2855 dumpcfg.ncbuf_used = 0;
2856 for (cp = dumpcfg.cbuf; cp != &dumpcfg.cbuf[dumpcfg.ncbuf]; cp++) {
2857 if (cp->buf != NULL) {
2858 CQ_PUT(freebufq, cp, CBUF_FREEBUF);
2859 ++dumpcfg.ncbuf_used;
2860 }
2861 }
2862
2863 for (cp = dumpcfg.cmap; cp != &dumpcfg.cmap[dumpcfg.ncmap]; cp++)
2864 CQ_PUT(mainq, cp, CBUF_FREEMAP);
2865
2866 ds->start = gethrtime();
2867 ds->iowaitts = ds->start;
2868
2869 /* start helpers */
2870 if (ds->live) {
2871 int n = dumpcfg.nhelper_used;
2872 int pri = MINCLSYSPRI - 25;
2873
2874 livetaskq = taskq_create("LiveDump", n, pri, n, n,
2875 TASKQ_PREPOPULATE);
2876 for (hp = dumpcfg.helper; hp != hpend; hp++) {
2877 if (hp->page == NULL)
2878 continue;
2879 hp->helper = hp - dumpcfg.helper;
2880 hp->taskqid = taskq_dispatch(livetaskq,
2881 dumpsys_live_helper, (void *)hp, TQ_NOSLEEP);
2882 }
2883
2884 } else {
2885 if (panicstr)
2886 kmem_dump_begin();
2887 dumpcfg.helpers_wanted = dumpcfg.clevel > 0;
2888 dumpsys_spinunlock(&dumpcfg.helper_lock);
2889 }
2890
2891 /* run main task */
2892 dumpsys_main_task(ds);
2893
2894 ds->elapsed = gethrtime() - ds->start;
2895 if (ds->elapsed < 1)
2896 ds->elapsed = 1;
2897
2898 if (livetaskq != NULL)
2899 taskq_destroy(livetaskq);
2900
2901 if (ds->neednl) {
2902 uprintf("\n");
2903 ds->neednl = 0;
2904 }
2905
2906 /* record actual pages dumped */
2907 dumphdr->dump_npages = ds->npages;
3028 int error;
3029 vattr_t vattr;
3030
3031 mutex_enter(&dump_lock);
3032 vattr.va_mask = AT_SIZE;
3033 if ((error = VOP_GETATTR(dumpvp, &vattr, 0, kcred, NULL)) != 0) {
3034 mutex_exit(&dump_lock);
3035 return (error);
3036 }
3037
3038 if (error == 0 && vattr.va_size < 2 * DUMP_LOGSIZE + DUMP_ERPTSIZE) {
3039 mutex_exit(&dump_lock);
3040 return (ENOSPC);
3041 }
3042
3043 dumpvp_size = vattr.va_size & -DUMP_OFFSET;
3044 mutex_exit(&dump_lock);
3045 return (0);
3046 }
3047
3048 int
3049 dump_set_uuid(const char *uuidstr)
3050 {
3051 const char *ptr;
3052 int i;
3053
3054 if (uuidstr == NULL || strnlen(uuidstr, 36 + 1) != 36)
3055 return (EINVAL);
3056
3057 /* uuid_parse is not common code so check manually */
3058 for (i = 0, ptr = uuidstr; i < 36; i++, ptr++) {
3059 switch (i) {
3060 case 8:
3061 case 13:
3062 case 18:
3063 case 23:
3064 if (*ptr != '-')
3065 return (EINVAL);
3066 break;
3067
3068 default:
3069 if (!isxdigit(*ptr))
3070 return (EINVAL);
3071 break;
3072 }
3073 }
3074
3075 if (dump_osimage_uuid[0] != '\0')
3076 return (EALREADY);
3077
3078 (void) strncpy(dump_osimage_uuid, uuidstr, 36 + 1);
3079
3080 cmn_err(CE_CONT, "?This Solaris instance has UUID %s\n",
3081 dump_osimage_uuid);
3082
3083 return (0);
3084 }
3085
3086 const char *
3087 dump_get_uuid(void)
3088 {
3089 return (dump_osimage_uuid[0] != '\0' ? dump_osimage_uuid : "");
3090 }
|
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright (c) 1998, 2010, Oracle and/or its affiliates. All rights reserved.
24 * Copyright 2016 Joyent, Inc.
25 * Copyright 2018 Nexenta Systems, Inc. All rights reserved.
26 */
27
28 #include <sys/types.h>
29 #include <sys/param.h>
30 #include <sys/systm.h>
31 #include <sys/vm.h>
32 #include <sys/proc.h>
33 #include <sys/file.h>
34 #include <sys/conf.h>
35 #include <sys/kmem.h>
36 #include <sys/mem.h>
37 #include <sys/mman.h>
38 #include <sys/vnode.h>
39 #include <sys/errno.h>
40 #include <sys/memlist.h>
41 #include <sys/dumphdr.h>
42 #include <sys/dumpadm.h>
43 #include <sys/ksyms.h>
44 #include <sys/compress.h>
45 #include <sys/stream.h>
55 #include <sys/debug.h>
56 #include <sys/sunddi.h>
57 #include <fs/fs_subr.h>
58 #include <sys/fs/snode.h>
59 #include <sys/ontrap.h>
60 #include <sys/panic.h>
61 #include <sys/dkio.h>
62 #include <sys/vtoc.h>
63 #include <sys/errorq.h>
64 #include <sys/fm/util.h>
65 #include <sys/fs/zfs.h>
66
67 #include <vm/hat.h>
68 #include <vm/as.h>
69 #include <vm/page.h>
70 #include <vm/pvn.h>
71 #include <vm/seg.h>
72 #include <vm/seg_kmem.h>
73 #include <sys/clock_impl.h>
74 #include <sys/hold_page.h>
75 #include <sys/cpu.h>
76
77 #include <sys/uuid.h>
78
79 /*
80 * Parallel Dump:
81 * CPUs that are otherwise idle during panic are employed to parallelize
82 * the compression task. I/O and compression are performed by different
83 * CPUs, and are hence overlapped in time, unlike the older serial code.
84 */
85
86 /*
87 * exported vars
88 */
89 kmutex_t dump_lock; /* lock for dump configuration */
90 dumphdr_t *dumphdr; /* dump header */
91 int dump_conflags = DUMP_KERNEL; /* dump configuration flags */
92 vnode_t *dumpvp; /* dump device vnode pointer */
93 u_offset_t dumpvp_size; /* size of dump device, in bytes */
94 char *dumppath; /* pathname of dump device */
95 int dump_timeout = 120; /* timeout for dumping pages */
96 int dump_timeleft; /* portion of dump_timeout remaining */
97 int dump_ioerr; /* dump i/o error */
98 int dump_check_used; /* enable check for used pages */
99 char *dump_stack_scratch; /* scratch area for saving stack summary */
100
101 /*
102 * Tunables for dump compression and parallelism.
103 * These can be set via /etc/system.
104 *
105 * dump_ncpu_low:
106 * This is the minimum configuration for parallel lzjb.
107 * A special value of 0 means that parallel dump will not be used.
108 *
109 * dump_metrics_on:
110 * If set, metrics are collected in the kernel, passed to savecore
111 * via the dump file, and recorded by savecore in METRICS.txt.
112 */
113 uint_t dump_ncpu_low = 4; /* minimum config for parallel lzjb */
114
115 /* tunables for pre-reserved heap */
116 uint_t dump_kmem_permap = 1024;
117 uint_t dump_kmem_pages = 8;
118
119 /* Define multiple buffers per helper to avoid stalling */
120 #define NCBUF_PER_HELPER 2
121 #define NCMAP_PER_HELPER 4
122
123 /* minimum number of helpers configured */
124 #define MINHELPERS (MAX(dump_ncpu_low, 1))
125 #define MINCBUFS (MINHELPERS * NCBUF_PER_HELPER)
126
127 /*
128 * Define constant parameters.
129 *
130 * CBUF_SIZE size of an output buffer
131 *
132 * CBUF_MAPSIZE size of virtual range for mapping pages
133 *
134 * CBUF_MAPNP size of virtual range in pages
135 *
136 */
137 #define DUMP_1KB ((size_t)1 << 10)
138 #define DUMP_1MB ((size_t)1 << 20)
139 #define CBUF_SIZE ((size_t)1 << 17)
140 #define CBUF_MAPSHIFT (22)
141 #define CBUF_MAPSIZE ((size_t)1 << CBUF_MAPSHIFT)
142 #define CBUF_MAPNP ((size_t)1 << (CBUF_MAPSHIFT - PAGESHIFT))
143
144 /*
219 CBUF_USEDMAP,
220 CBUF_FREEBUF,
221 CBUF_WRITE,
222 CBUF_ERRMSG
223 } cbufstate_t;
224
225 typedef struct cbuf cbuf_t;
226
227 struct cbuf {
228 cbuf_t *next; /* next in list */
229 cbufstate_t state; /* processing state */
230 size_t used; /* amount used */
231 size_t size; /* mem size */
232 char *buf; /* kmem or vmem */
233 pgcnt_t pagenum; /* index to pfn map */
234 pgcnt_t bitnum; /* first set bitnum */
235 pfn_t pfn; /* first pfn in mapped range */
236 int off; /* byte offset to first pfn */
237 };
238
239 static char dump_osimage_uuid[UUID_PRINTABLE_STRING_LENGTH];
240
241 #define isdigit(ch) ((ch) >= '0' && (ch) <= '9')
242 #define isxdigit(ch) (isdigit(ch) || ((ch) >= 'a' && (ch) <= 'f') || \
243 ((ch) >= 'A' && (ch) <= 'F'))
244
245 /*
246 * cqueue_t queues: a uni-directional channel for communication
247 * from the master to helper tasks or vice-versa using put and
248 * get primitives. Both mappings and data buffers are passed via
249 * queues. Producers close a queue when done. The number of
250 * active producers is reference counted so the consumer can
251 * detect end of data. Concurrent access is mediated by atomic
252 * operations for panic dump, or mutex/cv for live dump.
253 *
254 * There a four queues, used as follows:
255 *
256 * Queue Dataflow NewState
257 * --------------------------------------------------
258 * mainq master -> master FREEMAP
259 * master has initialized or unmapped an input buffer
339 /*
340 * helper_t helpers: contains the context for a stream. CPUs run in
341 * parallel at dump time; each CPU creates a single stream of
342 * compression data. Stream data is divided into CBUF_SIZE blocks.
343 * The blocks are written in order within a stream. But, blocks from
344 * multiple streams can be interleaved. Each stream is identified by a
345 * unique tag.
346 */
347 typedef struct helper {
348 int helper; /* bound helper id */
349 int tag; /* compression stream tag */
350 perpage_t perpage; /* per page metrics */
351 perpage_t perpagets; /* per page metrics (timestamps) */
352 taskqid_t taskqid; /* live dump task ptr */
353 int in, out; /* buffer offsets */
354 cbuf_t *cpin, *cpout, *cperr; /* cbuf objects in process */
355 dumpsync_t *ds; /* pointer to sync vars */
356 size_t used; /* counts input consumed */
357 char *page; /* buffer for page copy */
358 char *lzbuf; /* lzjb output */
359 } helper_t;
360
361 #define MAINHELPER (-1) /* helper is also the main task */
362 #define FREEHELPER (-2) /* unbound helper */
363 #define DONEHELPER (-3) /* helper finished */
364
365 /*
366 * configuration vars for dumpsys
367 */
368 typedef struct dumpcfg {
369 int nhelper; /* number of helpers */
370 int nhelper_used; /* actual number of helpers used */
371 int ncmap; /* number VA pages for compression */
372 int ncbuf; /* number of bufs for compression */
373 int ncbuf_used; /* number of bufs in use */
374 uint_t clevel; /* dump compression level */
375 helper_t *helper; /* array of helpers */
376 cbuf_t *cmap; /* array of input (map) buffers */
377 cbuf_t *cbuf; /* array of output buffers */
378 ulong_t *helpermap; /* set of dumpsys helper CPU ids */
379 ulong_t *bitmap; /* bitmap for marking pages to dump */
380 ulong_t *rbitmap; /* bitmap for used CBUF_MAPSIZE ranges */
381 pgcnt_t bitmapsize; /* size of bitmap */
382 pgcnt_t rbitmapsize; /* size of bitmap for ranges */
383 pgcnt_t found4m; /* number ranges allocated by dump */
384 pgcnt_t foundsm; /* number small pages allocated by dump */
385 pid_t *pids; /* list of process IDs at dump time */
386 size_t maxsize; /* memory size needed at dump time */
387 size_t maxvmsize; /* size of reserved VM */
388 char *maxvm; /* reserved VM for spare pages */
395 /*
396 * The dump I/O buffer.
397 *
398 * There is one I/O buffer used by dumpvp_write and dumvp_flush. It is
399 * sized according to the optimum device transfer speed.
400 */
401 typedef struct dumpbuf {
402 vnode_t *cdev_vp; /* VCHR open of the dump device */
403 len_t vp_limit; /* maximum write offset */
404 offset_t vp_off; /* current dump device offset */
405 char *cur; /* dump write pointer */
406 char *start; /* dump buffer address */
407 char *end; /* dump buffer end */
408 size_t size; /* size of dumpbuf in bytes */
409 size_t iosize; /* best transfer size for device */
410 } dumpbuf_t;
411
412 dumpbuf_t dumpbuf; /* I/O buffer */
413
414 /*
415 * DUMP_HELPER_MAX_WAIT
416 * For parallel dump, defines maximum time main task thread will wait
417 * for at least one helper to register in dumpcfg.helpermap, before
418 * assuming there are no helpers and falling back to serial mode.
419 */
420 #define DUMP_HELPER_MAX_WAIT 1000 /* millisec */
421
422 /*
423 * The dump I/O buffer must be at least one page, at most xfer_size
424 * bytes, and should scale with physmem in between. The transfer size
425 * passed in will either represent a global default (maxphys) or the
426 * best size for the device. The size of the dumpbuf I/O buffer is
427 * limited by dumpbuf_limit (8MB by default) because the dump
428 * performance saturates beyond a certain size. The default is to
429 * select 1/4096 of the memory.
430 */
431 static int dumpbuf_fraction = 12; /* memory size scale factor */
432 static size_t dumpbuf_limit = 8 * DUMP_1MB; /* max I/O buf size */
433
434 static size_t
435 dumpbuf_iosize(size_t xfer_size)
436 {
437 size_t iosize = ptob(physmem >> dumpbuf_fraction);
438
439 if (iosize < PAGESIZE)
440 iosize = PAGESIZE;
441 else if (iosize > xfer_size)
442 iosize = xfer_size;
454 char *old_buf = dumpbuf.start;
455 size_t old_size = dumpbuf.size;
456 char *new_buf;
457 size_t new_size;
458
459 ASSERT(MUTEX_HELD(&dump_lock));
460
461 new_size = dumpbuf_iosize(MAX(dumpbuf.iosize, maxphys));
462 if (new_size <= old_size)
463 return; /* no need to reallocate buffer */
464
465 new_buf = kmem_alloc(new_size, KM_SLEEP);
466 dumpbuf.size = new_size;
467 dumpbuf.start = new_buf;
468 dumpbuf.end = new_buf + new_size;
469 kmem_free(old_buf, old_size);
470 }
471
472 /*
473 * dump_update_clevel is called when dumpadm configures the dump device.
474 * Determine the compression level / type
475 * - DUMP_CLEVEL_SERIAL is single threaded lzjb
476 * - DUMP_CLEVEL_LZJB is parallel lzjb
477 * Calculate number of helpers and buffers.
478 * Allocate the minimum configuration for now.
479 *
480 * When the dump file is configured we reserve a minimum amount of
481 * memory for use at crash time. But we reserve VA for all the memory
482 * we really want in order to do the fastest dump possible. The VA is
483 * backed by pages not being dumped, according to the bitmap. If
484 * there is insufficient spare memory, however, we fall back to the
485 * minimum.
486 *
487 * Live dump (savecore -L) always uses the minimum config.
488 *
489 * For parallel dumps, the number of helpers is ncpu-1. The CPU
490 * running panic runs the main task. For single-threaded dumps, the
491 * panic CPU does lzjb compression (it is tagged as MAINHELPER.)
492 *
493 * Need multiple buffers per helper so that they do not block waiting
494 * for the main task.
495 * parallel single-threaded
496 * Number of output buffers: nhelper*2 1
497 * Number of mapping buffers: nhelper*4 1
498 *
499 */
500 static void
501 dump_update_clevel()
502 {
503 int tag;
504 helper_t *hp, *hpend;
505 cbuf_t *cp, *cpend;
506 dumpcfg_t *old = &dumpcfg;
507 dumpcfg_t newcfg = *old;
508 dumpcfg_t *new = &newcfg;
509
510 ASSERT(MUTEX_HELD(&dump_lock));
511
512 /*
513 * Free the previously allocated bufs and VM.
514 */
515 if (old->helper != NULL) {
516
517 /* helpers */
518 hpend = &old->helper[old->nhelper];
519 for (hp = old->helper; hp != hpend; hp++) {
520 if (hp->lzbuf != NULL)
521 kmem_free(hp->lzbuf, PAGESIZE);
522 if (hp->page != NULL)
523 kmem_free(hp->page, PAGESIZE);
536 if (cp->buf != NULL)
537 kmem_free(cp->buf, cp->size);
538 kmem_free(old->cbuf, old->ncbuf * sizeof (cbuf_t));
539
540 /* reserved VM for dumpsys_get_maxmem */
541 if (old->maxvmsize > 0)
542 vmem_xfree(heap_arena, old->maxvm, old->maxvmsize);
543 }
544
545 /*
546 * Allocate memory and VM.
547 * One CPU runs dumpsys, the rest are helpers.
548 */
549 new->nhelper = ncpus - 1;
550 if (new->nhelper < 1)
551 new->nhelper = 1;
552
553 if (new->nhelper > DUMP_MAX_NHELPER)
554 new->nhelper = DUMP_MAX_NHELPER;
555
556 /* If dump_ncpu_low is 0 or greater than ncpus, do serial dump */
557 if (dump_ncpu_low == 0 || dump_ncpu_low > ncpus || new->nhelper < 2) {
558 new->clevel = DUMP_CLEVEL_SERIAL;
559 new->nhelper = 1;
560 new->ncbuf = 1;
561 new->ncmap = 1;
562 } else {
563 new->clevel = DUMP_CLEVEL_LZJB;
564 new->ncbuf = NCBUF_PER_HELPER * new->nhelper;
565 new->ncmap = NCMAP_PER_HELPER * new->nhelper;
566 }
567
568 /*
569 * Allocate new data structures and buffers for MINHELPERS,
570 * and also figure the max desired size.
571 */
572 new->maxsize = 0;
573 new->maxvmsize = 0;
574 new->maxvm = NULL;
575 tag = 1;
576 new->helper = kmem_zalloc(new->nhelper * sizeof (helper_t), KM_SLEEP);
577 hpend = &new->helper[new->nhelper];
578 for (hp = new->helper; hp != hpend; hp++) {
579 hp->tag = tag++;
580 if (hp < &new->helper[MINHELPERS]) {
581 hp->lzbuf = kmem_alloc(PAGESIZE, KM_SLEEP);
582 hp->page = kmem_alloc(PAGESIZE, KM_SLEEP);
583 } else {
584 new->maxsize += 2 * PAGESIZE;
585 }
586 }
587
588 new->cbuf = kmem_zalloc(new->ncbuf * sizeof (cbuf_t), KM_SLEEP);
589 cpend = &new->cbuf[new->ncbuf];
590 for (cp = new->cbuf; cp != cpend; cp++) {
591 cp->state = CBUF_FREEBUF;
592 cp->size = CBUF_SIZE;
593 if (cp < &new->cbuf[MINCBUFS])
594 cp->buf = kmem_alloc(cp->size, KM_SLEEP);
595 else
596 new->maxsize += cp->size;
597 }
598
599 new->cmap = kmem_zalloc(new->ncmap * sizeof (cbuf_t), KM_SLEEP);
600 cpend = &new->cmap[new->ncmap];
601 for (cp = new->cmap; cp != cpend; cp++) {
602 cp->state = CBUF_FREEMAP;
603 cp->size = CBUF_MAPSIZE;
604 cp->buf = vmem_xalloc(heap_arena, CBUF_MAPSIZE, CBUF_MAPSIZE,
605 0, 0, NULL, NULL, VM_SLEEP);
718 ASSERT(rbitnum < dumpcfg.rbitmapsize);
719
720 BT_SET(dumpcfg.rbitmap, rbitnum);
721 }
722
723 int
724 dump_test_used(pfn_t pfn)
725 {
726 pgcnt_t bitnum, rbitnum;
727
728 bitnum = dump_pfn_to_bitnum(pfn);
729 ASSERT(bitnum != (pgcnt_t)-1);
730
731 rbitnum = CBUF_MAPP2R(bitnum);
732 ASSERT(rbitnum < dumpcfg.rbitmapsize);
733
734 return (BT_TEST(dumpcfg.rbitmap, rbitnum));
735 }
736
737 /*
738 * Perform additional checks on the page to see if we can really use
739 * it. The kernel (kas) pages are always set in the bitmap. However,
740 * boot memory pages (prom_ppages or P_BOOTPAGES) are not in the
741 * bitmap. So we check for them.
742 */
743 static inline int
744 dump_pfn_check(pfn_t pfn)
745 {
746 page_t *pp = page_numtopp_nolock(pfn);
747 if (pp == NULL || pp->p_pagenum != pfn ||
748 #if defined(__sparc)
749 pp->p_vnode == &promvp ||
750 #else
751 PP_ISBOOTPAGES(pp) ||
752 #endif
753 pp->p_toxic != 0)
754 return (0);
755 return (1);
756 }
757
758 /*
759 * Check a range to see if all contained pages are available and
760 * return non-zero if the range can be used.
761 */
762 static inline int
763 dump_range_check(pgcnt_t start, pgcnt_t end, pfn_t pfn)
764 {
765 for (; start < end; start++, pfn++) {
766 if (BT_TEST(dumpcfg.bitmap, start))
767 return (0);
768 if (!dump_pfn_check(pfn))
769 return (0);
770 }
771 return (1);
772 }
773
774 /*
775 * dumpsys_get_maxmem() is called during panic. Find unused ranges
776 * and use them for buffers.
777 * It searches the dump bitmap in 2 passes. The first time it looks
778 * for CBUF_MAPSIZE ranges. On the second pass it uses small pages.
779 */
780 static void
781 dumpsys_get_maxmem()
782 {
783 dumpcfg_t *cfg = &dumpcfg;
784 cbuf_t *endcp = &cfg->cbuf[cfg->ncbuf];
785 helper_t *endhp = &cfg->helper[cfg->nhelper];
786 pgcnt_t bitnum, end;
787 size_t sz, endsz;
788 pfn_t pfn, off;
789 cbuf_t *cp;
790 helper_t *hp;
791 dumpmlw_t mlw;
792 int k;
793
794 /*
795 * Setting dump_ncpu_low to 0 forces a single threaded dump.
796 */
797 if (dump_ncpu_low == 0) {
798 cfg->clevel = DUMP_CLEVEL_SERIAL;
799 return;
800 }
801
802 /*
803 * There may be no point in looking for spare memory. If
804 * dumping all memory, then none is spare. If doing a serial
805 * dump, then already have buffers.
806 */
807 if (cfg->maxsize == 0 || cfg->clevel == DUMP_CLEVEL_SERIAL ||
808 (dump_conflags & DUMP_ALL) != 0) {
809 return;
810 }
811
812 sz = 0;
813 cfg->found4m = 0;
814 cfg->foundsm = 0;
815
816 /* bitmap of ranges used to estimate which pfns are being used */
817 bzero(dumpcfg.rbitmap, BT_SIZEOFMAP(dumpcfg.rbitmapsize));
818
819 /* find ranges that are not being dumped to use for buffers */
820 dump_init_memlist_walker(&mlw);
821 for (bitnum = 0; bitnum < dumpcfg.bitmapsize; bitnum = end) {
822 dump_timeleft = dump_timeout;
823 end = bitnum + CBUF_MAPNP;
824 pfn = dump_bitnum_to_pfn(bitnum, &mlw);
825 ASSERT(pfn != PFN_INVALID);
826
827 /* skip partial range at end of mem segment */
828 if (mlw.mpleft < CBUF_MAPNP) {
872 continue;
873 }
874
875 for (; bitnum < end; bitnum++, pfn++) {
876 dump_timeleft = dump_timeout;
877 if (BT_TEST(dumpcfg.bitmap, bitnum))
878 continue;
879 if (!dump_pfn_check(pfn))
880 continue;
881 ASSERT((sz + PAGESIZE) <= cfg->maxvmsize);
882 hat_devload(kas.a_hat, cfg->maxvm + sz, PAGESIZE, pfn,
883 PROT_READ | PROT_WRITE, HAT_LOAD_NOCONSIST);
884 sz += PAGESIZE;
885 cfg->foundsm++;
886 dump_set_used(pfn);
887 if (sz >= cfg->maxsize)
888 goto foundmax;
889 }
890 }
891
892 /* Allocate memory for as many helpers as we can. */
893 foundmax:
894
895 /* Byte offsets into memory found and mapped above */
896 endsz = sz;
897 sz = 0;
898
899 /* Skip the preallocate output buffers. */
900 cp = &cfg->cbuf[MINCBUFS];
901
902 /* Loop over all helpers and allocate memory. */
903 for (hp = cfg->helper; hp < endhp; hp++) {
904
905 /* Skip preallocated helpers by checking hp->page. */
906 if (hp->page == NULL) {
907 /* lzjb needs 2 1-page buffers */
908 if ((sz + (2 * PAGESIZE)) > endsz)
909 break;
910 hp->page = cfg->maxvm + sz;
911 sz += PAGESIZE;
912 hp->lzbuf = cfg->maxvm + sz;
913 sz += PAGESIZE;
914 }
915
916 /*
917 * Add output buffers per helper. The number of
918 * buffers per helper is determined by the ratio of
919 * ncbuf to nhelper.
920 */
921 for (k = 0; cp < endcp && (sz + CBUF_SIZE) <= endsz &&
922 k < NCBUF_PER_HELPER; k++) {
923 cp->state = CBUF_FREEBUF;
924 cp->size = CBUF_SIZE;
925 cp->buf = cfg->maxvm + sz;
926 sz += CBUF_SIZE;
927 ++cp;
928 }
929 }
930
931 /* Finish allocating output buffers */
932 for (; cp < endcp && (sz + CBUF_SIZE) <= endsz; cp++) {
933 cp->state = CBUF_FREEBUF;
934 cp->size = CBUF_SIZE;
935 cp->buf = cfg->maxvm + sz;
936 sz += CBUF_SIZE;
937 }
938
939 /* Enable IS_DUMP_PAGE macro, which checks for pages we took. */
940 if (cfg->found4m || cfg->foundsm)
941 dump_check_used = 1;
942
943 ASSERT(sz <= endsz);
944 }
945
946 static void
947 dumphdr_init(void)
948 {
949 pgcnt_t npages = 0;
1752 if (hp->used < hp->cpin->used) {
1753
1754 /*
1755 * Get the next page from the input buffer and
1756 * return a copy.
1757 */
1758 ASSERT(hp->in != -1);
1759 HRSTART(hp->perpage, copy);
1760 hp->in = dumpsys_copy_page(hp, hp->in);
1761 hp->used += PAGESIZE;
1762 HRSTOP(hp->perpage, copy);
1763 break;
1764
1765 } else {
1766
1767 /*
1768 * Done with the input. Flush the VM and
1769 * return the buffer to the main task.
1770 */
1771 if (panicstr && hp->helper != MAINHELPER)
1772 hat_flush();
1773 dumpsys_errmsg(hp, NULL);
1774 CQ_PUT(mainq, hp->cpin, CBUF_USEDMAP);
1775 hp->cpin = NULL;
1776 }
1777 }
1778
1779 return (hp->cpin != NULL);
1780 }
1781
1782 /*
1783 * Compress with lzjb
1784 * write stream block if full or size==0
1785 * if csize==0 write stream header, else write <csize, data>
1786 * size==0 is a call to flush a buffer
1787 * hp->cpout is the buffer we are flushing or filling
1788 * hp->out is the next index to fill data
1789 * osize is either csize+data, or the size of a stream header
1790 */
1791 static void
1792 dumpsys_lzjbrun(helper_t *hp, size_t csize, void *buf, size_t size)
1793 {
1794 dumpsync_t *ds = hp->ds;
1795 const int CSIZE = sizeof (dumpcsize_t);
1796 dumpcsize_t cs;
1797 size_t osize = csize > 0 ? CSIZE + size : size;
1798
1799 /* If flush, and there is no buffer, just return */
1800 if (size == 0 && hp->cpout == NULL)
1801 return;
1802
1904 * panic CPU.
1905 *
1906 * At dump configuration time, helper_lock is set and helpers_wanted
1907 * is 0. dumpsys() decides whether to set helpers_wanted before
1908 * clearing helper_lock.
1909 *
1910 * At panic time, idle CPUs spin-wait on helper_lock, then alternately
1911 * take the lock and become a helper, or return.
1912 */
1913 void
1914 dumpsys_helper()
1915 {
1916 dumpsys_spinlock(&dumpcfg.helper_lock);
1917 if (dumpcfg.helpers_wanted) {
1918 helper_t *hp, *hpend = &dumpcfg.helper[dumpcfg.nhelper];
1919
1920 for (hp = dumpcfg.helper; hp != hpend; hp++) {
1921 if (hp->helper == FREEHELPER) {
1922 hp->helper = CPU->cpu_id;
1923 BT_SET(dumpcfg.helpermap, CPU->cpu_seqid);
1924 dumpsys_spinunlock(&dumpcfg.helper_lock);
1925 dumpsys_lzjbcompress(hp);
1926 hp->helper = DONEHELPER;
1927 return;
1928 }
1929 }
1930
1931 /* No more helpers are needed. */
1932 dumpcfg.helpers_wanted = 0;
1933
1934 }
1935 dumpsys_spinunlock(&dumpcfg.helper_lock);
1936 }
1937
1938 /*
1939 * No-wait helper callable in spin loops.
1940 *
1941 * Do not wait for helper_lock. Just check helpers_wanted. The caller
1942 * may decide to continue. This is the "c)ontinue, s)ync, r)eset? s"
1943 * case.
1944 */
1945 void
1946 dumpsys_helper_nw()
1947 {
1948 if (dumpcfg.helpers_wanted)
1949 dumpsys_helper();
1950 }
1951
1952 /*
1953 * Dump helper for live dumps.
1954 * These run as a system task.
1955 */
1956 static void
1957 dumpsys_live_helper(void *arg)
1958 {
1959 helper_t *hp = arg;
1960
1961 BT_ATOMIC_SET(dumpcfg.helpermap, CPU->cpu_seqid);
1962 dumpsys_lzjbcompress(hp);
1963 }
1964
1965 /*
1966 * Compress one page with lzjb (single threaded case)
1967 */
1968 static void
1969 dumpsys_lzjb_page(helper_t *hp, cbuf_t *cp)
1970 {
1971 dumpsync_t *ds = hp->ds;
1972 uint32_t csize;
1973
1974 hp->helper = MAINHELPER;
1975 hp->in = 0;
1976 hp->used = 0;
1977 hp->cpin = cp;
1978 while (hp->used < cp->used) {
1979 HRSTART(hp->perpage, copy);
1980 hp->in = dumpsys_copy_page(hp, hp->in);
1981 hp->used += PAGESIZE;
1982 HRSTOP(hp->perpage, copy);
1989 dumpvp_write(&csize, sizeof (csize));
1990 dumpvp_write(hp->lzbuf, csize);
1991 HRSTOP(hp->perpage, write);
1992 }
1993 CQ_PUT(mainq, hp->cpin, CBUF_USEDMAP);
1994 hp->cpin = NULL;
1995 }
1996
1997 /*
1998 * Main task to dump pages. This is called on the dump CPU.
1999 */
2000 static void
2001 dumpsys_main_task(void *arg)
2002 {
2003 dumpsync_t *ds = arg;
2004 pgcnt_t pagenum = 0, bitnum = 0, hibitnum;
2005 dumpmlw_t mlw;
2006 cbuf_t *cp;
2007 pgcnt_t baseoff, pfnoff;
2008 pfn_t base, pfn;
2009 int i;
2010
2011 /*
2012 * Fall back to serial mode if there are no helpers.
2013 * dump_ncpu_low can be set to 0 at any time.
2014 * dumpcfg.helpermap must contain at least one member.
2015 *
2016 * It is possible that the helpers haven't registered
2017 * in helpermap yet; wait up to DUMP_HELPER_MAX_WAIT.
2018 */
2019 if (dump_ncpu_low != 0 && dumpcfg.clevel != DUMP_CLEVEL_SERIAL) {
2020 boolean_t dumpserial = B_TRUE;
2021 hrtime_t hrtmax = MSEC2NSEC(DUMP_HELPER_MAX_WAIT);
2022 hrtime_t hrtstart = gethrtime();
2023
2024 for (;;) {
2025 for (i = 0; i < BT_BITOUL(NCPU); ++i) {
2026 if (dumpcfg.helpermap[i] != 0) {
2027 dumpserial = B_FALSE;
2028 break;
2029 }
2030 }
2031
2032 if ((!dumpserial) ||
2033 ((gethrtime() - hrtstart) >= hrtmax)) {
2034 break;
2035 }
2036
2037 ht_pause();
2038 }
2039
2040 if (dumpserial) {
2041 dumpcfg.clevel = DUMP_CLEVEL_SERIAL;
2042 if (dumpcfg.helper[0].lzbuf == NULL) {
2043 dumpcfg.helper[0].lzbuf =
2044 dumpcfg.helper[1].page;
2045 }
2046 }
2047 }
2048
2049 dump_init_memlist_walker(&mlw);
2050
2051 for (;;) {
2052 int sec = (gethrtime() - ds->start) / NANOSEC;
2053
2054 /*
2055 * Render a simple progress display on the system console to
2056 * make clear to the operator that the system has not hung.
2057 * Emit an update when dump progress has advanced by one
2058 * percent, or when no update has been drawn in the last
2059 * second.
2060 */
2061 if (ds->percent > ds->percent_done || sec > ds->sec_done) {
2062 ds->sec_done = sec;
2063 ds->percent_done = ds->percent;
2064 uprintf("^\rdumping: %2d:%02d %3d%% done",
2065 sec / 60, sec % 60, ds->percent);
2066 ds->neednl = 1;
2067 }
2169 if (BT_TEST(dumpcfg.bitmap, bitnum))
2170 pagenum++;
2171
2172 dump_timeleft = dump_timeout;
2173 cp->used = ptob(pagenum - cp->pagenum);
2174
2175 HRSTART(ds->perpage, map);
2176 hat_devload(kas.a_hat, cp->buf, cp->size, base,
2177 PROT_READ, HAT_LOAD_NOCONSIST);
2178 HRSTOP(ds->perpage, map);
2179
2180 ds->pages_mapped += btop(cp->size);
2181 ds->pages_used += pagenum - cp->pagenum;
2182
2183 CQ_OPEN(mainq);
2184
2185 /*
2186 * If there are no helpers the main task does
2187 * non-streams lzjb compress.
2188 */
2189 if (dumpcfg.clevel == DUMP_CLEVEL_SERIAL) {
2190 dumpsys_lzjb_page(dumpcfg.helper, cp);
2191 } else {
2192 /* pass mapped pages to a helper */
2193 CQ_PUT(helperq, cp, CBUF_INREADY);
2194 }
2195
2196 /* the last page was done */
2197 if (bitnum >= dumpcfg.bitmapsize)
2198 CQ_CLOSE(helperq);
2199
2200 break;
2201
2202 case CBUF_USEDMAP:
2203
2204 ds->npages += btop(cp->used);
2205
2206 HRSTART(ds->perpage, unmap);
2207 hat_unload(kas.a_hat, cp->buf, cp->size, HAT_UNLOAD);
2208 HRSTOP(ds->perpage, unmap);
2209
2210 if (bitnum < dumpcfg.bitmapsize)
2211 CQ_PUT(mainq, cp, CBUF_FREEMAP);
2212 CQ_CLOSE(mainq);
2213
2214 ASSERT(ds->npages <= dumphdr->dump_npages);
2280 for (i = 0; i < ncpus; i++) {
2281 if ((i & 15) == 0)
2282 P(",,%03d,", i);
2283 if (i == myid)
2284 P(" M");
2285 else if (BT_TEST(cfg->helpermap, i))
2286 P("%4d", cpu_seq[i]->cpu_id);
2287 else
2288 P(" *");
2289 if ((i & 15) == 15)
2290 P("\n");
2291 }
2292
2293 P("ncbuf_used,%d\n", cfg->ncbuf_used);
2294 P("ncmap,%d\n", cfg->ncmap);
2295
2296 P("Found %ldM ranges,%ld\n", (CBUF_MAPSIZE / DUMP_1MB), cfg->found4m);
2297 P("Found small pages,%ld\n", cfg->foundsm);
2298
2299 P("Compression level,%d\n", cfg->clevel);
2300 P("Compression type,%s lzjb\n",
2301 cfg->clevel == DUMP_CLEVEL_SERIAL ? "serial" : "parallel");
2302 P("Compression ratio,%d.%02d\n", compress_ratio / 100, compress_ratio %
2303 100);
2304 P("nhelper_used,%d\n", cfg->nhelper_used);
2305
2306 P("Dump I/O rate MBS,%d.%02d\n", iorate / 100, iorate % 100);
2307 P("..total bytes,%lld\n", (u_longlong_t)ds->nwrite);
2308 P("..total nsec,%lld\n", (u_longlong_t)ds->iotime);
2309 P("dumpbuf.iosize,%ld\n", dumpbuf.iosize);
2310 P("dumpbuf.size,%ld\n", dumpbuf.size);
2311
2312 P("Dump pages/sec,%llu\n", (u_longlong_t)ds->npages / sec);
2313 P("Dump pages,%llu\n", (u_longlong_t)ds->npages);
2314 P("Dump time,%d\n", sec);
2315
2316 if (ds->pages_mapped > 0)
2317 P("per-cent map utilization,%d\n", (int)((100 * ds->pages_used)
2318 / ds->pages_mapped));
2319
2320 P("\nPer-page metrics:\n");
2321 if (ds->npages > 0) {
2389 * beginning. And never use the first page -- it may be a disk label.
2390 */
2391 if (dumpvp->v_flag & VISSWAP)
2392 dumphdr->dump_start = P2ROUNDUP(dumpvp_size / 5, DUMP_OFFSET);
2393 else
2394 dumphdr->dump_start = DUMP_OFFSET;
2395
2396 dumphdr->dump_flags = DF_VALID | DF_COMPLETE | DF_LIVE | DF_COMPRESSED;
2397 dumphdr->dump_crashtime = gethrestime_sec();
2398 dumphdr->dump_npages = 0;
2399 dumphdr->dump_nvtop = 0;
2400 bzero(dumpcfg.bitmap, BT_SIZEOFMAP(dumpcfg.bitmapsize));
2401 dump_timeleft = dump_timeout;
2402
2403 if (panicstr) {
2404 dumphdr->dump_flags &= ~DF_LIVE;
2405 (void) VOP_DUMPCTL(dumpvp, DUMP_FREE, NULL, NULL);
2406 (void) VOP_DUMPCTL(dumpvp, DUMP_ALLOC, NULL, NULL);
2407 (void) vsnprintf(dumphdr->dump_panicstring, DUMP_PANICSIZE,
2408 panicstr, panicargs);
2409 (void) strncpy(dumphdr->dump_uuid, dump_get_uuid(),
2410 sizeof (dumphdr->dump_uuid));
2411 }
2412
2413 if (dump_conflags & DUMP_ALL)
2414 content = "all";
2415 else if (dump_conflags & DUMP_CURPROC)
2416 content = "kernel + curproc";
2417 else
2418 content = "kernel";
2419 uprintf("dumping to %s, offset %lld, content: %s\n", dumppath,
2420 dumphdr->dump_start, content);
2421
2422 /* Make sure nodename is current */
2423 bcopy(utsname.nodename, dumphdr->dump_utsname.nodename, SYS_NMLN);
2424
2425 /*
2426 * If this is a live dump, try to open a VCHR vnode for better
2427 * performance. We must take care to flush the buffer cache
2428 * first.
2429 */
2430 if (!panicstr) {
2435 if (cdev_vp != NULL) {
2436 cmn_cdev_vp = common_specvp(cdev_vp);
2437 if (VOP_OPEN(&cmn_cdev_vp, FREAD | FWRITE, kcred, NULL)
2438 == 0) {
2439 if (vn_has_cached_data(dumpvp))
2440 (void) pvn_vplist_dirty(dumpvp, 0, NULL,
2441 B_INVAL | B_TRUNC, kcred);
2442 dumpbuf.cdev_vp = cmn_cdev_vp;
2443 } else {
2444 VN_RELE(cdev_vp);
2445 }
2446 }
2447 }
2448
2449 /*
2450 * Store a hires timestamp so we can look it up during debugging.
2451 */
2452 lbolt_debug_entry();
2453
2454 /*
2455 * Leave room for the summary, message and ereport save areas
2456 * and terminal dump header.
2457 */
2458 dumpbuf.vp_limit = dumpvp_size - DUMP_LOGSIZE - DUMP_OFFSET -
2459 DUMP_ERPTSIZE - DUMP_SUMMARYSIZE;
2460
2461 /*
2462 * Write out the symbol table. It's no longer compressed,
2463 * so its 'size' and 'csize' are equal.
2464 */
2465 dumpbuf.vp_off = dumphdr->dump_ksyms = dumphdr->dump_start + PAGESIZE;
2466 dumphdr->dump_ksyms_size = dumphdr->dump_ksyms_csize =
2467 ksyms_snapshot(dumpvp_ksyms_write, NULL, LONG_MAX);
2468
2469 /*
2470 * Write out the translation map.
2471 */
2472 dumphdr->dump_map = dumpvp_flush();
2473 dump_as(&kas);
2474 dumphdr->dump_nvtop += dump_plat_addr();
2475
2476 /*
2477 * call into hat, which may have unmapped pages that also need to
2478 * be in the dump
2479 */
2543 dumphdr->dump_pfn = dumpvp_flush();
2544 dump_init_memlist_walker(&mlw);
2545 for (bitnum = 0; bitnum < dumpcfg.bitmapsize; bitnum++) {
2546 dump_timeleft = dump_timeout;
2547 if (!BT_TEST(dumpcfg.bitmap, bitnum))
2548 continue;
2549 pfn = dump_bitnum_to_pfn(bitnum, &mlw);
2550 ASSERT(pfn != PFN_INVALID);
2551 dumpvp_write(&pfn, sizeof (pfn_t));
2552 }
2553 dump_plat_pfn();
2554
2555 /*
2556 * Write out all the pages.
2557 * Map pages, copy them handling UEs, compress, and write them out.
2558 * Cooperate with any helpers running on CPUs in panic_idle().
2559 */
2560 dumphdr->dump_data = dumpvp_flush();
2561
2562 bzero(dumpcfg.helpermap, BT_SIZEOFMAP(NCPU));
2563 ds->live = dumpcfg.clevel > DUMP_CLEVEL_SERIAL &&
2564 (dumphdr->dump_flags & DF_LIVE) != 0;
2565
2566 save_dump_clevel = dumpcfg.clevel;
2567 if (panicstr)
2568 dumpsys_get_maxmem();
2569
2570 dumpcfg.nhelper_used = 0;
2571 for (hp = dumpcfg.helper; hp != hpend; hp++) {
2572 if (hp->page == NULL) {
2573 hp->helper = DONEHELPER;
2574 continue;
2575 }
2576 ++dumpcfg.nhelper_used;
2577 hp->helper = FREEHELPER;
2578 hp->taskqid = NULL;
2579 hp->ds = ds;
2580 bzero(&hp->perpage, sizeof (hp->perpage));
2581 }
2582
2583 CQ_OPEN(freebufq);
2584 CQ_OPEN(helperq);
2585
2586 dumpcfg.ncbuf_used = 0;
2587 for (cp = dumpcfg.cbuf; cp != &dumpcfg.cbuf[dumpcfg.ncbuf]; cp++) {
2588 if (cp->buf != NULL) {
2589 CQ_PUT(freebufq, cp, CBUF_FREEBUF);
2590 ++dumpcfg.ncbuf_used;
2591 }
2592 }
2593
2594 for (cp = dumpcfg.cmap; cp != &dumpcfg.cmap[dumpcfg.ncmap]; cp++)
2595 CQ_PUT(mainq, cp, CBUF_FREEMAP);
2596
2597 ds->start = gethrtime();
2598 ds->iowaitts = ds->start;
2599
2600 /* start helpers */
2601 if (ds->live) {
2602 int n = dumpcfg.nhelper_used;
2603 int pri = MINCLSYSPRI - 25;
2604
2605 livetaskq = taskq_create("LiveDump", n, pri, n, n,
2606 TASKQ_PREPOPULATE);
2607 for (hp = dumpcfg.helper; hp != hpend; hp++) {
2608 if (hp->page == NULL)
2609 continue;
2610 hp->helper = hp - dumpcfg.helper;
2611 hp->taskqid = taskq_dispatch(livetaskq,
2612 dumpsys_live_helper, (void *)hp, TQ_NOSLEEP);
2613 }
2614
2615 } else {
2616 if (panicstr)
2617 kmem_dump_begin();
2618 dumpcfg.helpers_wanted = dumpcfg.clevel > DUMP_CLEVEL_SERIAL;
2619 dumpsys_spinunlock(&dumpcfg.helper_lock);
2620 }
2621
2622 /* run main task */
2623 dumpsys_main_task(ds);
2624
2625 ds->elapsed = gethrtime() - ds->start;
2626 if (ds->elapsed < 1)
2627 ds->elapsed = 1;
2628
2629 if (livetaskq != NULL)
2630 taskq_destroy(livetaskq);
2631
2632 if (ds->neednl) {
2633 uprintf("\n");
2634 ds->neednl = 0;
2635 }
2636
2637 /* record actual pages dumped */
2638 dumphdr->dump_npages = ds->npages;
2759 int error;
2760 vattr_t vattr;
2761
2762 mutex_enter(&dump_lock);
2763 vattr.va_mask = AT_SIZE;
2764 if ((error = VOP_GETATTR(dumpvp, &vattr, 0, kcred, NULL)) != 0) {
2765 mutex_exit(&dump_lock);
2766 return (error);
2767 }
2768
2769 if (error == 0 && vattr.va_size < 2 * DUMP_LOGSIZE + DUMP_ERPTSIZE) {
2770 mutex_exit(&dump_lock);
2771 return (ENOSPC);
2772 }
2773
2774 dumpvp_size = vattr.va_size & -DUMP_OFFSET;
2775 mutex_exit(&dump_lock);
2776 return (0);
2777 }
2778
2779 static int
2780 dump_validate_uuid(const char *uuidstr)
2781 {
2782 const char *ptr;
2783 int i;
2784
2785 if (uuidstr == NULL || strlen(uuidstr) !=
2786 UUID_PRINTABLE_STRING_LENGTH - 1)
2787 return (EINVAL);
2788
2789 /* uuid_parse is not common code so check manually */
2790 for (i = 0, ptr = uuidstr; i < UUID_PRINTABLE_STRING_LENGTH - 1;
2791 i++, ptr++) {
2792 switch (i) {
2793 case 8:
2794 case 13:
2795 case 18:
2796 case 23:
2797 if (*ptr != '-')
2798 return (EINVAL);
2799 break;
2800
2801 default:
2802 if (!isxdigit(*ptr))
2803 return (EINVAL);
2804 break;
2805 }
2806 }
2807
2808 return (0);
2809 }
2810
2811 int
2812 dump_update_uuid(const char *uuidstr)
2813 {
2814
2815 if (dump_validate_uuid(uuidstr) != 0 || dumphdr == NULL)
2816 return (EINVAL);
2817
2818 bzero(dumphdr->dump_uuid, sizeof (dumphdr->dump_uuid));
2819 (void) strncpy(dumphdr->dump_uuid, uuidstr,
2820 sizeof (dumphdr->dump_uuid));
2821
2822 return (0);
2823 }
2824
2825 int
2826 dump_set_uuid(const char *uuidstr)
2827 {
2828 if (dump_validate_uuid(uuidstr) != 0)
2829 return (EINVAL);
2830
2831 if (dump_osimage_uuid[0] != '\0')
2832 return (EALREADY);
2833
2834 (void) strncpy(dump_osimage_uuid, uuidstr,
2835 UUID_PRINTABLE_STRING_LENGTH);
2836
2837 cmn_err(CE_CONT, "?This Solaris instance has UUID %s\n",
2838 dump_osimage_uuid);
2839
2840 return (0);
2841 }
2842
2843 const char *
2844 dump_get_uuid(void)
2845 {
2846 return (dump_osimage_uuid[0] != '\0' ? dump_osimage_uuid : "");
2847 }
|