Print this page
NEX-17845 Remove support for BZIP2 from dump
Reviewed by: Sanjay Nadkarni <sanjay.nadkarni@nexenta.com>
Reviewed by: Yuri Pankov <yuri.pankov@nexenta.com>
NEX-14185 savecore -f vmdump.1 tries to unpack this to unix.0 and vmcore.0 and other nits
Reviewed by: Yuri Pankov <yuri.pankov@nexenta.com>
Reviewed by: Roman Strashkin <roman.strashkin@nexenta.com>
Reviewed by: Sanjay Nadkarni <sanjay.nadkarni@nexenta.com>
NEX-9338 improve the layout of the crash directory (follow-up)
Reviewed by: Yuri Pankov <yuri.pankov@nexenta.com>
Reviewed by: Sanjay Nadkarni <sanjay.nadkarni@nexenta.com>
NEX-9338 improve the layout of the crash directory
Reviewed by: Sanjay Nadkarni <sanjay.nadkarni@nexenta.com>
Reviewed by: Steve Peng <steve.peng@nexenta.com>


   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright (c) 1983, 2010, Oracle and/or its affiliates. All rights reserved.
  23  * Copyright 2016 Joyent, Inc.
  24  */
  25 /*
  26  * Copyright 2016 Nexenta Systems, Inc. All rights reserved.
  27  */
  28 
  29 #include <stdio.h>
  30 #include <stdlib.h>
  31 #include <stdarg.h>
  32 #include <unistd.h>
  33 #include <fcntl.h>
  34 #include <errno.h>
  35 #include <string.h>
  36 #include <deflt.h>
  37 #include <time.h>
  38 #include <syslog.h>
  39 #include <stropts.h>
  40 #include <pthread.h>
  41 #include <limits.h>
  42 #include <atomic.h>
  43 #include <libnvpair.h>
  44 #include <libintl.h>
  45 #include <sys/mem.h>
  46 #include <sys/statvfs.h>
  47 #include <sys/dumphdr.h>
  48 #include <sys/dumpadm.h>
  49 #include <sys/compress.h>
  50 #include <sys/panic.h>
  51 #include <sys/sysmacros.h>
  52 #include <sys/stat.h>
  53 #include <sys/resource.h>
  54 #include <bzip2/bzlib.h>
  55 #include <sys/fm/util.h>
  56 #include <fm/libfmevent.h>
  57 #include <sys/int_fmtio.h>


  58 
  59 
  60 /* fread/fwrite buffer size */
  61 #define FBUFSIZE                (1ULL << 20)
  62 
  63 /* minimum size for output buffering */
  64 #define MINCOREBLKSIZE          (1ULL << 17)
  65 
  66 /* create this file if metrics collection is enabled in the kernel */
  67 #define METRICSFILE "METRICS.csv"
  68 
  69 static char     progname[9] = "savecore";
  70 static char     *savedir;               /* savecore directory */

  71 static char     *dumpfile;              /* source of raw crash dump */
  72 static long     bounds = -1;            /* numeric suffix */
  73 static long     pagesize;               /* dump pagesize */
  74 static int      dumpfd = -1;            /* dumpfile descriptor */

  75 static boolean_t have_dumpfile = B_TRUE;        /* dumpfile existence */
  76 static dumphdr_t corehdr, dumphdr;      /* initial and terminal dumphdrs */
  77 static boolean_t dump_incomplete;       /* dumphdr indicates incomplete */
  78 static boolean_t fm_panic;              /* dump is the result of fm_panic */
  79 static offset_t endoff;                 /* offset of end-of-dump header */
  80 static int      verbose;                /* chatty mode */
  81 static int      disregard_valid_flag;   /* disregard valid flag */
  82 static int      livedump;               /* dump the current running system */
  83 static int      interactive;            /* user invoked; no syslog */
  84 static int      csave;                  /* save dump compressed */
  85 static int      filemode;               /* processing file, not dump device */
  86 static int      percent_done;           /* progress indicator */
  87 static int      sec_done;               /* progress last report time */
  88 static hrtime_t startts;                /* timestamp at start */
  89 static volatile uint64_t saved;         /* count of pages written */
  90 static volatile uint64_t zpages;        /* count of zero pages not written */
  91 static dumpdatahdr_t datahdr;           /* compression info */
  92 static long     coreblksize;            /* preferred write size (st_blksize) */
  93 static int      cflag;                  /* run as savecore -c */
  94 static int      mflag;                  /* run as savecore -m */

  95 
  96 /*
  97  * Payload information for the events we raise.  These are used
  98  * in raise_event to determine what payload to include.
  99  */
 100 #define SC_PAYLOAD_SAVEDIR      0x0001  /* Include savedir in event */
 101 #define SC_PAYLOAD_INSTANCE     0x0002  /* Include bounds instance number */
 102 #define SC_PAYLOAD_IMAGEUUID    0x0004  /* Include dump OS instance uuid */
 103 #define SC_PAYLOAD_CRASHTIME    0x0008  /* Include epoch crashtime */
 104 #define SC_PAYLOAD_PANICSTR     0x0010  /* Include panic string */
 105 #define SC_PAYLOAD_PANICSTACK   0x0020  /* Include panic string */
 106 #define SC_PAYLOAD_FAILREASON   0x0040  /* Include failure reason */
 107 #define SC_PAYLOAD_DUMPCOMPLETE 0x0080  /* Include completeness indicator */
 108 #define SC_PAYLOAD_ISCOMPRESSED 0x0100  /* Dump is in vmdump.N form */
 109 #define SC_PAYLOAD_DUMPADM_EN   0x0200  /* Is dumpadm enabled or not? */
 110 #define SC_PAYLOAD_FM_PANIC     0x0400  /* Panic initiated by FMA */
 111 #define SC_PAYLOAD_JUSTCHECKING 0x0800  /* Run with -c flag? */
 112 
 113 enum sc_event_type {
 114         SC_EVENT_DUMP_PENDING,


 223                 code = 0;
 224                 break;
 225 
 226         case SC_EXIT_PEND:
 227                 /*
 228                  * Raise an ireport saying why we are exiting.  Do not
 229                  * raise if run as savecore -m.  If something in the
 230                  * raise_event codepath calls logprint avoid recursion.
 231                  */
 232                 if (!mflag && logprint_raised++ == 0)
 233                         raise_event(SC_EVENT_SAVECORE_FAILURE, buf);
 234                 code = 2;
 235                 break;
 236 
 237         case SC_EXIT_FM:
 238                 code = 3;
 239                 break;
 240 
 241         case SC_EXIT_ERR:
 242         default:
 243                 if (!mflag && logprint_raised++ == 0 && have_dumpfile)

 244                         raise_event(SC_EVENT_SAVECORE_FAILURE, buf);
 245                 code = 1;
 246                 break;
 247         }
 248 
 249         exit(code);
 250 }
 251 
 252 /*
 253  * System call / libc wrappers that exit on error.
 254  */
 255 static int
 256 Open(const char *name, int oflags, mode_t mode)
 257 {
 258         int fd;
 259 
 260         if ((fd = open64(name, oflags, mode)) == -1)
 261                 logprint(SC_SL_ERR | SC_EXIT_ERR, "open(\"%s\"): %s",
 262                     name, strerror(errno));
 263         return (fd);


 284 {
 285         if (fseeko64(f, off, SEEK_SET) != 0)
 286                 logprint(SC_SL_ERR | SC_EXIT_ERR, "fseeko64: %s",
 287                     strerror(errno));
 288 }
 289 
 290 typedef struct stat64 Stat_t;
 291 
 292 static void
 293 Fstat(int fd, Stat_t *sb, const char *fname)
 294 {
 295         if (fstat64(fd, sb) != 0)
 296                 logprint(SC_SL_ERR | SC_EXIT_ERR, "fstat(\"%s\"): %s", fname,
 297                     strerror(errno));
 298 }
 299 
 300 static void
 301 Stat(const char *fname, Stat_t *sb)
 302 {
 303         if (stat64(fname, sb) != 0) {





 304                 have_dumpfile = B_FALSE;
 305                 logprint(SC_SL_ERR | SC_EXIT_ERR, "failed to get status "
 306                     "of file %s", fname);
 307         }
 308 }
 309 
 310 static void
 311 Pread(int fd, void *buf, size_t size, offset_t off)
 312 {
 313         ssize_t sz = pread64(fd, buf, size, off);
 314 
 315         if (sz < 0)
 316                 logprint(SC_SL_ERR | SC_EXIT_ERR,
 317                     "pread: %s", strerror(errno));
 318         else if (sz != size)
 319                 logprint(SC_SL_ERR | SC_EXIT_ERR,
 320                     "pread: size %ld != %ld", sz, size);
 321 }
 322 
 323 static void


 361                 dumpfd = Open(dumpfile, O_RDWR | O_DSYNC, 0644);
 362         endoff = llseek(dumpfd, -DUMP_OFFSET, SEEK_END) & -DUMP_OFFSET;
 363         Pread(dumpfd, &dumphdr, sizeof (dumphdr), endoff);
 364         Pread(dumpfd, &datahdr, sizeof (datahdr), endoff + sizeof (dumphdr));
 365 
 366         pagesize = dumphdr.dump_pagesize;
 367 
 368         if (dumphdr.dump_magic != DUMP_MAGIC)
 369                 logprint(SC_SL_NONE | SC_EXIT_PEND, "bad magic number %x",
 370                     dumphdr.dump_magic);
 371 
 372         if ((dumphdr.dump_flags & DF_VALID) == 0 && !disregard_valid_flag)
 373                 logprint(SC_SL_NONE | SC_IF_VERBOSE | SC_EXIT_OK,
 374                     "dump already processed");
 375 
 376         if (dumphdr.dump_version != DUMP_VERSION)
 377                 logprint(SC_SL_NONE | SC_IF_VERBOSE | SC_EXIT_PEND,
 378                     "dump version (%d) != %s version (%d)",
 379                     dumphdr.dump_version, progname, DUMP_VERSION);
 380 




 381         if (dumphdr.dump_wordsize != DUMP_WORDSIZE)
 382                 logprint(SC_SL_NONE | SC_EXIT_PEND,
 383                     "dump is from %u-bit kernel - cannot save on %u-bit kernel",
 384                     dumphdr.dump_wordsize, DUMP_WORDSIZE);
 385 
 386         if (datahdr.dump_datahdr_magic == DUMP_DATAHDR_MAGIC) {
 387                 if (datahdr.dump_datahdr_version != DUMP_DATAHDR_VERSION)
 388                         logprint(SC_SL_NONE | SC_IF_VERBOSE | SC_EXIT_PEND,
 389                             "dump data version (%d) != %s data version (%d)",
 390                             datahdr.dump_datahdr_version, progname,
 391                             DUMP_DATAHDR_VERSION);
 392         } else {
 393                 (void) memset(&datahdr, 0, sizeof (datahdr));
 394                 datahdr.dump_maxcsize = pagesize;
 395         }
 396 
 397         /*
 398          * Read the initial header, clear the valid bits, and compare headers.
 399          * The main header may have been overwritten by swapping if we're
 400          * using a swap partition as the dump device, in which case we bail.


 403 
 404         corehdr.dump_flags &= ~DF_VALID;
 405         dumphdr.dump_flags &= ~DF_VALID;
 406 
 407         if (memcmp(&corehdr, &dumphdr, sizeof (dumphdr_t)) != 0) {
 408                 /*
 409                  * Clear valid bit so we don't complain on every invocation.
 410                  */
 411                 if (!filemode)
 412                         Pwrite(dumpfd, &dumphdr, sizeof (dumphdr), endoff);
 413                 logprint(SC_SL_ERR | SC_EXIT_ERR,
 414                     "initial dump header corrupt");
 415         }
 416 }
 417 
 418 static void
 419 check_space(int csave)
 420 {
 421         struct statvfs fsb;
 422         int64_t spacefree, dumpsize, minfree, datasize;

 423 
 424         if (statvfs(".", &fsb) < 0)
 425                 logprint(SC_SL_ERR | SC_EXIT_ERR, "statvfs: %s",
 426                     strerror(errno));
 427 


 428         dumpsize = dumphdr.dump_data - dumphdr.dump_start;
 429         datasize = dumphdr.dump_npages * pagesize;
 430         if (!csave)
 431                 dumpsize += datasize;
 432         else
 433                 dumpsize += datahdr.dump_data_csize;
 434 
 435         spacefree = (int64_t)fsb.f_bavail * fsb.f_frsize;
 436         minfree = 1024LL * read_number_from_file("minfree", 1024);
 437         if (spacefree < minfree + dumpsize) {
 438                 logprint(SC_SL_ERR | SC_EXIT_ERR,
 439                     "not enough space in %s (%lld MB avail, %lld MB needed)",
 440                     savedir, spacefree >> 20, (minfree + dumpsize) >> 20);
 441         }
 442 }
 443 
 444 static void
 445 build_dump_map(int corefd, const pfn_t *pfn_table)
 446 {
 447         long i;
 448         static long misses = 0;
 449         size_t dump_mapsize = (corehdr.dump_hashmask + 1) * sizeof (dump_map_t);
 450         mem_vtop_t vtop;
 451         dump_map_t *dmp = Zalloc(dump_mapsize);
 452         char *inbuf = Zalloc(FBUFSIZE);
 453         FILE *in = fdopen(dup(dumpfd), "rb");
 454 
 455         (void) setvbuf(in, inbuf, _IOFBF, FBUFSIZE);
 456         Fseek(dumphdr.dump_map, in);


 558         (void) fclose(in);
 559         (void) fclose(out);
 560         free(outbuf);
 561         free(buf);
 562 }
 563 
 564 /*
 565  * Concatenate dump contents into a new file.
 566  * Update corehdr with new offsets.
 567  */
 568 static void
 569 copy_crashfile(const char *corefile)
 570 {
 571         int corefd = Open(corefile, O_WRONLY | O_CREAT | O_TRUNC, 0644);
 572         size_t bufsz = FBUFSIZE;
 573         char *inbuf = Zalloc(bufsz);
 574         offset_t coreoff;
 575         size_t nb;
 576 
 577         logprint(SC_SL_ERR | SC_IF_VERBOSE,
 578             "Copying %s to %s/%s\n", dumpfile, savedir, corefile);
 579 
 580         /*
 581          * This dump file is still compressed
 582          */
 583         corehdr.dump_flags |= DF_COMPRESSED | DF_VALID;
 584 
 585         /*
 586          * Leave room for corehdr, it is updated and written last
 587          */
 588         corehdr.dump_start = 0;
 589         coreoff = sizeof (corehdr);
 590 
 591         /*
 592          * Read in the compressed symbol table, copy it to corefile.
 593          */
 594         coreoff = roundup(coreoff, pagesize);
 595         corehdr.dump_ksyms = coreoff;
 596         Copy(dumphdr.dump_ksyms, dumphdr.dump_ksyms_csize, &coreoff, corefd,
 597             inbuf, bufsz);
 598 


 683         int size;
 684 };
 685 
 686 typedef enum streamstate {
 687         STREAMSTART,
 688         STREAMPAGES
 689 } streamstate_t;
 690 
 691 typedef struct stream {
 692         streamstate_t state;
 693         int init;
 694         int tag;
 695         int bound;
 696         int nout;
 697         char *blkbuf;
 698         blockhdr_t blocks;
 699         pgcnt_t pagenum;
 700         pgcnt_t curpage;
 701         pgcnt_t npages;
 702         pgcnt_t done;
 703         bz_stream strm;
 704         dumpcsize_t sc;
 705         dumpstreamhdr_t sh;
 706 } stream_t;
 707 
 708 static stream_t *streams;
 709 static stream_t *endstreams;
 710 
 711 const int cs = sizeof (dumpcsize_t);
 712 
 713 typedef struct tinfo {
 714         pthread_t tid;
 715         int corefd;
 716 } tinfo_t;
 717 
 718 static int threads_stop;
 719 static int threads_active;
 720 static tinfo_t *tinfo;
 721 static tinfo_t *endtinfo;
 722 
 723 static pthread_mutex_t lock = PTHREAD_MUTEX_INITIALIZER;


 946                                 doflush = 1;
 947                                 atomic_inc_64(&zpages);
 948                         } else if (++s->nout >= BTOP(coreblksize) ||
 949                             isblkbnd(s->curpage + s->nout)) {
 950                                 doflush = 1;
 951                         }
 952                         if (++s->done >= s->npages) {
 953                                 s->state = STREAMSTART;
 954                                 doflush = 1;
 955                         }
 956                         if (doflush) {
 957                                 putpage(corefd, s->blkbuf, s->curpage, s->nout);
 958                                 s->nout = 0;
 959                                 s->curpage = s->pagenum + s->done;
 960                         }
 961                         break;
 962                 }
 963         }
 964 }
 965 
 966 /* bzlib library reports errors with this callback */
 967 void
 968 bz_internal_error(int errcode)
 969 {
 970         logprint(SC_SL_ERR | SC_EXIT_ERR, "bz_internal_error: err %s\n",
 971             BZ2_bzErrorString(errcode));
 972 }
 973 
 974 /*
 975  * Return one object in the stream.
 976  *
 977  * An object (stream header or page) will likely span an input block
 978  * of compression data. Return non-zero when an entire object has been
 979  * retrieved from the stream.
 980  */
 981 static int
 982 bz2decompress(stream_t *s, void *buf, size_t size)
 983 {
 984         int rc;
 985 
 986         if (s->strm.avail_out == 0) {
 987                 s->strm.next_out = buf;
 988                 s->strm.avail_out = size;
 989         }
 990         while (s->strm.avail_in > 0) {
 991                 rc = BZ2_bzDecompress(&s->strm);
 992                 if (rc == BZ_STREAM_END) {
 993                         rc = BZ2_bzDecompressReset(&s->strm);
 994                         if (rc != BZ_OK)
 995                                 logprint(SC_SL_ERR | SC_EXIT_ERR,
 996                                     "BZ2_bzDecompressReset: %s",
 997                                     BZ2_bzErrorString(rc));
 998                         continue;
 999                 }
1000 
1001                 if (s->strm.avail_out == 0)
1002                         break;
1003         }
1004         return (s->strm.avail_out == 0);
1005 }
1006 
1007 /*
1008  * Process one bzip2 block.
1009  * The interface is documented here:
1010  * http://www.bzip.org/1.0.5/bzip2-manual-1.0.5.html
1011  */
1012 static void
1013 bz2block(int corefd, stream_t *s, char *block, size_t blocksz)
1014 {
1015         int rc = 0;
1016         int doflush;
1017         char *out;
1018 
1019         if (!s->init) {
1020                 s->init = 1;
1021                 rc = BZ2_bzDecompressInit(&s->strm, 0, 0);
1022                 if (rc != BZ_OK)
1023                         logprint(SC_SL_ERR | SC_EXIT_ERR,
1024                             "BZ2_bzDecompressInit: %s", BZ2_bzErrorString(rc));
1025                 if (s->blkbuf == NULL)
1026                         s->blkbuf = Zalloc(coreblksize);
1027                 s->strm.avail_out = 0;
1028                 s->state = STREAMSTART;
1029         }
1030         s->strm.next_in = block;
1031         s->strm.avail_in = blocksz;
1032 
1033         while (s->strm.avail_in > 0) {
1034                 switch (s->state) {
1035                 case STREAMSTART:
1036                         if (!bz2decompress(s, &s->sh, sizeof (s->sh)))
1037                                 return;
1038                         if (strcmp(DUMP_STREAM_MAGIC, s->sh.stream_magic) != 0)
1039                                 logprint(SC_SL_ERR | SC_EXIT_ERR,
1040                                     "BZ2 STREAMSTART: bad stream header");
1041                         if (s->sh.stream_npages > datahdr.dump_maxrange)
1042                                 logprint(SC_SL_ERR | SC_EXIT_ERR,
1043                                     "BZ2 STREAMSTART: bad range: %d > %d",
1044                                     s->sh.stream_npages, datahdr.dump_maxrange);
1045                         s->pagenum = s->sh.stream_pagenum;
1046                         s->npages = s->sh.stream_npages;
1047                         s->curpage = s->pagenum;
1048                         s->nout = 0;
1049                         s->done = 0;
1050                         s->state = STREAMPAGES;
1051                         break;
1052                 case STREAMPAGES:
1053                         out = s->blkbuf + PTOB(s->nout);
1054                         if (!bz2decompress(s, out, pagesize))
1055                                 return;
1056 
1057                         atomic_inc_64(&saved);
1058 
1059                         doflush = 0;
1060                         if (s->nout == 0 && iszpage(out)) {
1061                                 doflush = 1;
1062                                 atomic_inc_64(&zpages);
1063                         } else if (++s->nout >= BTOP(coreblksize) ||
1064                             isblkbnd(s->curpage + s->nout)) {
1065                                 doflush = 1;
1066                         }
1067                         if (++s->done >= s->npages) {
1068                                 s->state = STREAMSTART;
1069                                 doflush = 1;
1070                         }
1071                         if (doflush) {
1072                                 putpage(corefd, s->blkbuf, s->curpage, s->nout);
1073                                 s->nout = 0;
1074                                 s->curpage = s->pagenum + s->done;
1075                         }
1076                         break;
1077                 }
1078         }
1079 }
1080 
1081 /* report progress */
1082 static void
1083 report_progress()
1084 {
1085         int sec, percent;
1086 
1087         if (!interactive)
1088                 return;
1089 
1090         percent = saved * 100LL / corehdr.dump_npages;
1091         sec = (gethrtime() - startts) / NANOSEC;
1092         if (percent > percent_done || sec > sec_done) {
1093                 (void) printf("\r%2d:%02d %3d%% done", sec / 60, sec % 60,
1094                     percent);
1095                 (void) fflush(stdout);
1096                 sec_done = sec;
1097                 percent_done = percent;
1098         }
1099 }
1100 


1103 runstreams(void *arg)
1104 {
1105         tinfo_t *t = arg;
1106         stream_t *s;
1107         block_t *b;
1108         int bound;
1109 
1110         (void) pthread_mutex_lock(&lock);
1111         while (!threads_stop) {
1112                 bound = 0;
1113                 for (s = streams; s != endstreams; s++) {
1114                         if (s->bound || s->blocks.head == NULL)
1115                                 continue;
1116                         s->bound = 1;
1117                         bound = 1;
1118                         (void) pthread_cond_signal(&cvwork);
1119                         while (s->blocks.head != NULL) {
1120                                 b = deqh(&s->blocks);
1121                                 (void) pthread_mutex_unlock(&lock);
1122 
1123                                 if (datahdr.dump_clevel < DUMP_CLEVEL_BZIP2)
1124                                         lzjbblock(t->corefd, s, b->block,
1125                                             b->size);
1126                                 else
1127                                         bz2block(t->corefd, s, b->block,
1128                                             b->size);
1129 
1130                                 (void) pthread_mutex_lock(&lock);
1131                                 enqt(&freeblocks, b);
1132                                 (void) pthread_cond_signal(&cvfree);
1133 
1134                                 report_progress();
1135                         }
1136                         s->bound = 0;
1137                         (void) pthread_cond_signal(&cvbarrier);
1138                 }
1139                 if (!bound && !threads_stop)
1140                         (void) pthread_cond_wait(&cvwork, &lock);
1141         }
1142         (void) close(t->corefd);
1143         (void) pthread_cond_signal(&cvwork);
1144         (void) pthread_mutex_unlock(&lock);
1145         return (arg);
1146 }
1147 
1148 /*
1149  * Process compressed pages.
1150  *
1151  * The old format, now called single-threaded lzjb, is a 32-bit size
1152  * word followed by 'size' bytes of lzjb compression data for one
1153  * page. The new format extends this by storing a 12-bit "tag" in the
1154  * upper bits of the size word. When the size word is pagesize or
1155  * less, it is assumed to be one lzjb page. When the size word is
1156  * greater than pagesize, it is assumed to be a "stream block",
1157  * belonging to up to 4095 streams. In practice, the number of streams
1158  * is set to one less than the number of CPUs running at crash
1159  * time. One CPU processes the crash dump, the remaining CPUs
1160  * separately process groups of data pages.
1161  *
1162  * savecore creates a thread per stream, but never more threads than
1163  * the number of CPUs running savecore. This is because savecore can
1164  * be processing a crash file from a remote machine, which may have
1165  * more CPUs.
1166  *
1167  * When the kernel uses parallel lzjb or parallel bzip2, we expect a
1168  * series of 128KB blocks of compression data. In this case, each
1169  * block has a "tag", in the range 1-4095. Each block is handed off to
1170  * to the threads running "runstreams". The dump format is either lzjb
1171  * or bzip2, never a mixture. These threads, in turn, process the
1172  * compression data for groups of pages. Groups of pages are delimited
1173  * by a "stream header", which indicates a starting pfn and number of
1174  * pages. When a stream block has been read, the condition variable
1175  * "cvwork" is signalled, which causes one of the avaiable threads to
1176  * wake up and process the stream.
1177  *
1178  * In the parallel case there will be streams blocks encoding all data
1179  * pages. The stream of blocks is terminated by a zero size
1180  * word. There can be a few lzjb pages tacked on the end, depending on
1181  * the architecture. The sbarrier function ensures that all stream
1182  * blocks have been processed so that the page number for the few
1183  * single pages at the end can be known.
1184  */
1185 static void
1186 decompress_pages(int corefd)
1187 {
1188         char *cpage = NULL;
1189         char *dpage = NULL;
1190         char *out;
1191         pgcnt_t curpage = 0;
1192         block_t *b;
1193         FILE *dumpf;
1194         FILE *tracef = NULL;
1195         stream_t *s;
1196         size_t dsize;


1328         if (dpage)
1329                 free(dpage);
1330         if (streams)
1331                 free(streams);
1332 }
1333 
1334 static void
1335 build_corefile(const char *namelist, const char *corefile)
1336 {
1337         size_t pfn_table_size = dumphdr.dump_npages * sizeof (pfn_t);
1338         size_t ksyms_size = dumphdr.dump_ksyms_size;
1339         size_t ksyms_csize = dumphdr.dump_ksyms_csize;
1340         pfn_t *pfn_table;
1341         char *ksyms_base = Zalloc(ksyms_size);
1342         char *ksyms_cbase = Zalloc(ksyms_csize);
1343         size_t ksyms_dsize;
1344         Stat_t st;
1345         int corefd = Open(corefile, O_WRONLY | O_CREAT | O_TRUNC, 0644);
1346         int namefd = Open(namelist, O_WRONLY | O_CREAT | O_TRUNC, 0644);
1347 
1348         (void) printf("Constructing namelist %s/%s\n", savedir, namelist);
1349 
1350         /*
1351          * Determine the optimum write size for the core file
1352          */
1353         Fstat(corefd, &st, corefile);
1354 
1355         if (verbose > 1)
1356                 (void) printf("%s: %ld block size\n", corefile,
1357                     (long)st.st_blksize);
1358         coreblksize = st.st_blksize;
1359         if (coreblksize < MINCOREBLKSIZE || !ISP2(coreblksize))
1360                 coreblksize = MINCOREBLKSIZE;
1361 
1362         hist = Zalloc((sizeof (uint64_t) * BTOP(coreblksize)) + 1);
1363 
1364         /*
1365          * This dump file is now uncompressed
1366          */
1367         corehdr.dump_flags &= ~DF_COMPRESSED;
1368 
1369         /*
1370          * Read in the compressed symbol table, copy it to corefile,
1371          * decompress it, and write the result to namelist.
1372          */
1373         corehdr.dump_ksyms = pagesize;
1374         Pread(dumpfd, ksyms_cbase, ksyms_csize, dumphdr.dump_ksyms);
1375         Pwrite(corefd, ksyms_cbase, ksyms_csize, corehdr.dump_ksyms);
1376 
1377         ksyms_dsize = decompress(ksyms_cbase, ksyms_base, ksyms_csize,
1378             ksyms_size);
1379         if (ksyms_dsize != ksyms_size)
1380                 logprint(SC_SL_WARN,
1381                     "bad data in symbol table, %lu of %lu bytes saved",
1382                     ksyms_dsize, ksyms_size);
1383 
1384         Pwrite(namefd, ksyms_base, ksyms_size, 0);
1385         (void) close(namefd);
1386         free(ksyms_cbase);
1387         free(ksyms_base);
1388 
1389         (void) printf("Constructing corefile %s/%s\n", savedir, corefile);
1390 
1391         /*
1392          * Read in and write out the pfn table.
1393          */
1394         pfn_table = Zalloc(pfn_table_size);
1395         corehdr.dump_pfn = corehdr.dump_ksyms + roundup(ksyms_size, pagesize);
1396         Pread(dumpfd, pfn_table, pfn_table_size, dumphdr.dump_pfn);
1397         Pwrite(corefd, pfn_table, pfn_table_size, corehdr.dump_pfn);
1398 
1399         /*
1400          * Convert the raw translation data into a hashed dump map.
1401          */
1402         corehdr.dump_map = corehdr.dump_pfn + roundup(pfn_table_size, pagesize);
1403         build_dump_map(corefd, pfn_table);
1404         free(pfn_table);
1405 
1406         /*
1407          * Decompress the pages
1408          */
1409         decompress_pages(corefd);


1542                 return;
1543         }
1544 
1545         if (sd.sd_magic != SUMMARY_MAGIC) {
1546                 *stack = '\0';
1547                 logprint(SC_SL_NONE | SC_IF_VERBOSE,
1548                     "bad summary magic %x", sd.sd_magic);
1549                 return;
1550         }
1551         Pread(dumpfd, stack, STACK_BUF_SIZE, dumpoff);
1552         if (sd.sd_ssum != checksum32(stack, STACK_BUF_SIZE))
1553                 logprint(SC_SL_NONE | SC_IF_VERBOSE, "bad stack checksum");
1554 }
1555 
1556 static void
1557 raise_event(enum sc_event_type evidx, char *warn_string)
1558 {
1559         uint32_t pl = sc_event[evidx].sce_payload;
1560         char panic_stack[STACK_BUF_SIZE];
1561         nvlist_t *attr = NULL;
1562         char uuidbuf[36 + 1];
1563         int err = 0;
1564 
1565         if (nvlist_alloc(&attr, NV_UNIQUE_NAME, 0) != 0)
1566                 goto publish;   /* try to send payload-free event */
1567 
1568         if (pl & SC_PAYLOAD_SAVEDIR && savedir != NULL)
1569                 err |= nvlist_add_string(attr, "dumpdir", savedir);
1570 
1571         if (pl & SC_PAYLOAD_INSTANCE && bounds != -1)
1572                 err |= nvlist_add_int64(attr, "instance", bounds);
1573 
1574         if (pl & SC_PAYLOAD_ISCOMPRESSED) {
1575                 err |= nvlist_add_boolean_value(attr, "compressed",
1576                     csave ? B_TRUE : B_FALSE);
1577         }
1578 
1579         if (pl & SC_PAYLOAD_DUMPADM_EN) {
1580                 char *disabled = defread("DUMPADM_ENABLE=no");
1581 
1582                 err |= nvlist_add_boolean_value(attr, "savecore-enabled",


1634                 logprint(SC_SL_WARN, "Errors while constructing '%s' "
1635                     "event payload; will try to publish anyway.");
1636 publish:
1637         if (fmev_rspublish_nvl(FMEV_RULESET_ON_SUNOS,
1638             "panic", sc_event[evidx].sce_subclass, FMEV_HIPRI,
1639             attr) != FMEV_SUCCESS) {
1640                 logprint(SC_SL_ERR, "failed to publish '%s' event: %s",
1641                     sc_event[evidx].sce_subclass, fmev_strerror(fmev_errno));
1642                 nvlist_free(attr);
1643         }
1644 
1645 }
1646 
1647 
1648 int
1649 main(int argc, char *argv[])
1650 {
1651         int i, c, bfd;
1652         Stat_t st;
1653         struct rlimit rl;

1654         long filebounds = -1;
1655         char namelist[30], corefile[30], boundstr[30];
1656         dumpfile = NULL;






1657 
1658         startts = gethrtime();
1659 
1660         (void) getrlimit(RLIMIT_NOFILE, &rl);
1661         rl.rlim_cur = rl.rlim_max;
1662         (void) setrlimit(RLIMIT_NOFILE, &rl);
1663 
1664         openlog(progname, LOG_ODELAY, LOG_AUTH);
1665 
1666         (void) defopen("/etc/dumpadm.conf");
1667         savedir = defread("DUMPADM_SAVDIR=");
1668         if (savedir != NULL)
1669                 savedir = strdup(savedir);
1670 
1671         while ((c = getopt(argc, argv, "Lvcdmf:")) != EOF) {
1672                 switch (c) {
1673                 case 'L':
1674                         livedump++;
1675                         break;
1676                 case 'v':
1677                         verbose++;
1678                         break;
1679                 case 'c':
1680                         cflag++;
1681                         break;
1682                 case 'd':
1683                         disregard_valid_flag++;
1684                         break;
1685                 case 'm':
1686                         mflag++;
1687                         break;
1688                 case 'f':
1689                         dumpfile = optarg;
1690                         filebounds = getbounds(dumpfile);

1691                         break;
1692                 case '?':
1693                         usage();
1694                 }
1695         }
1696 
1697         /*
1698          * If doing something other than extracting an existing dump (i.e.
1699          * dumpfile has been provided as an option), the user must be root.
1700          */
1701         if (geteuid() != 0 && dumpfile == NULL) {
1702                 (void) fprintf(stderr, "%s: %s %s\n", progname,
1703                     gettext("you must be root to use"), progname);
1704                 exit(1);
1705         }
1706 
1707         interactive = isatty(STDOUT_FILENO);
1708 
1709         if (cflag && livedump)
1710                 usage();
1711 
1712         if (dumpfile == NULL || livedump)
1713                 dumpfd = Open("/dev/dump", O_RDONLY, 0444);
1714 
1715         if (dumpfile == NULL) {
1716                 dumpfile = Zalloc(MAXPATHLEN);
1717                 if (ioctl(dumpfd, DIOCGETDEV, dumpfile) == -1) {

1718                         have_dumpfile = B_FALSE;
1719                         logprint(SC_SL_NONE | SC_IF_ISATTY | SC_EXIT_ERR,
1720                             "no dump device configured");
1721                 }
1722         }
1723 
1724         if (mflag)
1725                 return (message_save());
1726 
1727         if (optind == argc - 1)













1728                 savedir = argv[optind];

1729 
1730         if (savedir == NULL || optind < argc - 1)
1731                 usage();
1732 
1733         if (livedump && ioctl(dumpfd, DIOCDUMP, NULL) == -1)







1734                 logprint(SC_SL_NONE | SC_EXIT_ERR,
1735                     "dedicated dump device required");

1736 
1737         (void) close(dumpfd);
1738         dumpfd = -1;
1739 
1740         Stat(dumpfile, &st);
1741 
1742         filemode = S_ISREG(st.st_mode);
1743 
1744         if (!filemode && defread("DUMPADM_CSAVE=off") == NULL)
1745                 csave = 1;
1746 
1747         read_dumphdr();
1748 
1749         /*
1750          * We want this message to go to the log file, but not the console.
1751          * There's no good way to do that with the existing syslog facility.
1752          * We could extend it to handle this, but there doesn't seem to be
1753          * a general need for it, so we isolate the complexity here instead.
1754          */
1755         if (dumphdr.dump_panicstring[0] != '\0') {


1774                 lc.level = 0;
1775 
1776                 ctl.buf = (void *)&lc;
1777                 ctl.len = sizeof (log_ctl_t);
1778 
1779                 dat.buf = (void *)msg;
1780                 dat.len = strlen(msg) + 1;
1781 
1782                 (void) putmsg(logfd, &ctl, &dat, 0);
1783                 (void) close(logfd);
1784         }
1785 
1786         if ((dumphdr.dump_flags & DF_COMPLETE) == 0) {
1787                 logprint(SC_SL_WARN, "incomplete dump on dump device");
1788                 dump_incomplete = B_TRUE;
1789         }
1790 
1791         if (dumphdr.dump_fm_panic)
1792                 fm_panic = B_TRUE;
1793 













1794         /*
1795          * We have a valid dump on a dump device and know as much about
1796          * it as we're going to at this stage.  Raise an event for
1797          * logging and so that FMA can open a case for this panic.
1798          * Avoid this step for FMA-initiated panics - FMA will replay
1799          * ereports off the dump device independently of savecore and
1800          * will make a diagnosis, so we don't want to open two cases
1801          * for the same event.  Also avoid raising an event for a
1802          * livedump, or when we inflating a compressed dump.
1803          */
1804         if (!fm_panic && !livedump && !filemode)
1805                 raise_event(SC_EVENT_DUMP_PENDING, NULL);
1806 
1807         logprint(SC_SL_WARN, "System dump time: %s",
1808             ctime(&dumphdr.dump_crashtime));
1809 
1810         /*
1811          * Option -c is designed for use from svc-dumpadm where we know
1812          * that dumpadm -n is in effect but run savecore -c just to
1813          * get the above dump_pending_on_device event raised.  If it is run
1814          * interactively then just print further panic details.
1815          */
1816         if (cflag) {
1817                 char *disabled = defread("DUMPADM_ENABLE=no");
1818                 int lvl = interactive ? SC_SL_WARN : SC_SL_ERR;
1819                 int ec = fm_panic ? SC_EXIT_FM : SC_EXIT_PEND;
1820 
1821                 logprint(lvl | ec,
1822                     "Panic crashdump pending on dump device%s "
1823                     "run savecore(1M) manually to extract. "
1824                     "Image UUID %s%s.",
1825                     disabled ? " but dumpadm -n in effect;" : ";",
1826                     corehdr.dump_uuid,
1827                     fm_panic ?  "(fault-management initiated)" : "");
1828                 /*NOTREACHED*/
1829         }
1830 
1831         if (chdir(savedir) == -1)







1832                 logprint(SC_SL_ERR | SC_EXIT_ERR, "chdir(\"%s\"): %s",
1833                     savedir, strerror(errno));
1834 
1835         check_space(csave);
1836 


1837         if (filebounds < 0)
1838                 bounds = read_number_from_file("bounds", 0);
1839         else
1840                 bounds = filebounds;
1841 
































1842         if (csave) {
1843                 size_t metrics_size = datahdr.dump_metrics;
1844 
1845                 (void) sprintf(corefile, "vmdump.%ld", bounds);
1846 









1847                 datahdr.dump_metrics = 0;
1848 
1849                 logprint(SC_SL_ERR,
1850                     "Saving compressed system crash dump in %s/%s",
1851                     savedir, corefile);
1852 
1853                 copy_crashfile(corefile);
1854 
1855                 /*
1856                  * Raise a fault management event that indicates the system
1857                  * has panicked. We know a reasonable amount about the
1858                  * condition at this time, but the dump is still compressed.
1859                  */
1860                 if (!livedump && !fm_panic)
1861                         raise_event(SC_EVENT_DUMP_AVAILABLE, NULL);
1862 
1863                 if (metrics_size > 0) {
1864                         int sec = (gethrtime() - startts) / 1000 / 1000 / 1000;
1865                         FILE *mfile = fopen(METRICSFILE, "a");
1866                         char *metrics = Zalloc(metrics_size + 1);
1867 
1868                         Pread(dumpfd, metrics, metrics_size, endoff +
1869                             sizeof (dumphdr) + sizeof (datahdr));
1870 
1871                         if (sec < 1)


1890                                     dumphdr.dump_flags & DF_LIVE ? "Live" :
1891                                     "Crash", ctime(&dumphdr.dump_crashtime));
1892                                 (void) fprintf(mfile, ",,,%s/%s\n", savedir,
1893                                     corefile);
1894                                 (void) fprintf(mfile, "Metrics:\n%s\n",
1895                                     metrics);
1896                                 (void) fprintf(mfile, "Copy pages,%ld\n",
1897                                     dumphdr.  dump_npages);
1898                                 (void) fprintf(mfile, "Copy time,%d\n", sec);
1899                                 (void) fprintf(mfile, "Copy pages/sec,%ld\n",
1900                                     dumphdr.dump_npages / sec);
1901                                 (void) fprintf(mfile, "]]]]\n");
1902                                 (void) fclose(mfile);
1903                         }
1904                         free(metrics);
1905                 }
1906 
1907                 logprint(SC_SL_ERR,
1908                     "Decompress the crash dump with "
1909                     "\n'savecore -vf %s/%s'",
1910                     savedir, corefile);
1911 
1912         } else {
1913                 (void) sprintf(namelist, "unix.%ld", bounds);
1914                 (void) sprintf(corefile, "vmcore.%ld", bounds);
1915 
1916                 if (interactive && filebounds >= 0 && access(corefile, F_OK)
1917                     == 0)

1918                         logprint(SC_SL_NONE | SC_EXIT_ERR,
1919                             "%s already exists: remove with "
1920                             "'rm -f %s/{unix,vmcore}.%ld'",
1921                             corefile, savedir, bounds);

1922 
1923                 logprint(SC_SL_ERR,
1924                     "saving system crash dump in %s/{unix,vmcore}.%ld",
1925                     savedir, bounds);
1926 
1927                 build_corefile(namelist, corefile);
1928 
1929                 if (!livedump && !filemode && !fm_panic)
1930                         raise_event(SC_EVENT_DUMP_AVAILABLE, NULL);
1931 
1932                 if (access(METRICSFILE, F_OK) == 0) {
1933                         int sec = (gethrtime() - startts) / 1000 / 1000 / 1000;
1934                         FILE *mfile = fopen(METRICSFILE, "a");
1935 
1936                         if (sec < 1)
1937                                 sec = 1;
1938 
1939                         if (mfile == NULL) {
1940                                 logprint(SC_SL_WARN,
1941                                     "Can't create %s: %s",
1942                                     METRICSFILE, strerror(errno));
1943                         } else {
1944                                 (void) fprintf(mfile, "[[[[,,,");
1945                                 for (i = 0; i < argc; i++)


1950                                 (void) fprintf(mfile, ",,,%s %s %s %s %s\n",
1951                                     dumphdr.dump_utsname.sysname,
1952                                     dumphdr.dump_utsname.nodename,
1953                                     dumphdr.dump_utsname.release,
1954                                     dumphdr.dump_utsname.version,
1955                                     dumphdr.dump_utsname.machine);
1956                                 (void) fprintf(mfile,
1957                                     "Uncompress pages,%"PRIu64"\n", saved);
1958                                 (void) fprintf(mfile, "Uncompress time,%d\n",
1959                                     sec);
1960                                 (void) fprintf(mfile, "Uncompress pages/sec,%"
1961                                     PRIu64"\n", saved / sec);
1962                                 (void) fprintf(mfile, "]]]]\n");
1963                                 (void) fclose(mfile);
1964                         }
1965                 }
1966         }
1967 
1968         if (filebounds < 0) {
1969                 (void) sprintf(boundstr, "%ld\n", bounds + 1);
1970                 bfd = Open("bounds", O_WRONLY | O_CREAT | O_TRUNC, 0644);
1971                 Pwrite(bfd, boundstr, strlen(boundstr), 0);
1972                 (void) close(bfd);
1973         }
1974 
1975         if (verbose) {
1976                 int sec = (gethrtime() - startts) / 1000 / 1000 / 1000;
1977 
1978                 (void) printf("%d:%02d dump %s is done\n",
1979                     sec / 60, sec % 60,
1980                     csave ? "copy" : "decompress");
1981         }
1982 
1983         if (verbose > 1 && hist != NULL) {
1984                 int i, nw;
1985 
1986                 for (i = 1, nw = 0; i <= BTOP(coreblksize); ++i)
1987                         nw += hist[i] * i;
1988                 (void) printf("pages count     %%\n");
1989                 for (i = 0; i <= BTOP(coreblksize); ++i) {
1990                         if (hist[i] == 0)


   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright (c) 1983, 2010, Oracle and/or its affiliates. All rights reserved.
  23  * Copyright 2016 Joyent, Inc.
  24  */
  25 /*
  26  * Copyright 2018 Nexenta Systems, Inc. All rights reserved.
  27  */
  28 
  29 #include <stdio.h>
  30 #include <stdlib.h>
  31 #include <stdarg.h>
  32 #include <unistd.h>
  33 #include <fcntl.h>
  34 #include <errno.h>
  35 #include <string.h>
  36 #include <deflt.h>
  37 #include <time.h>
  38 #include <syslog.h>
  39 #include <stropts.h>
  40 #include <pthread.h>
  41 #include <limits.h>
  42 #include <atomic.h>
  43 #include <libnvpair.h>
  44 #include <libintl.h>
  45 #include <sys/mem.h>
  46 #include <sys/statvfs.h>
  47 #include <sys/dumphdr.h>
  48 #include <sys/dumpadm.h>
  49 #include <sys/compress.h>
  50 #include <sys/panic.h>
  51 #include <sys/sysmacros.h>
  52 #include <sys/stat.h>
  53 #include <sys/resource.h>

  54 #include <sys/fm/util.h>
  55 #include <fm/libfmevent.h>
  56 #include <sys/int_fmtio.h>
  57 #include <uuid/uuid.h>
  58 #include <libgen.h>
  59 
  60 
  61 /* fread/fwrite buffer size */
  62 #define FBUFSIZE                (1ULL << 20)
  63 
  64 /* minimum size for output buffering */
  65 #define MINCOREBLKSIZE          (1ULL << 17)
  66 
  67 /* create this file if metrics collection is enabled in the kernel */
  68 #define METRICSFILE "METRICS.csv"
  69 
  70 static char     progname[9] = "savecore";
  71 static char     *savedir;               /* savecore directory */
  72 static char     uuiddir[MAXPATHLEN];    /* UUID directory */
  73 static char     *dumpfile;              /* source of raw crash dump */
  74 static long     bounds = -1;            /* numeric suffix */
  75 static long     pagesize;               /* dump pagesize */
  76 static int      dumpfd = -1;            /* dumpfile descriptor */
  77 static boolean_t skip_event = B_FALSE;  /* do not raise an event */
  78 static boolean_t have_dumpfile = B_TRUE;        /* dumpfile existence */
  79 static dumphdr_t corehdr, dumphdr;      /* initial and terminal dumphdrs */
  80 static boolean_t dump_incomplete;       /* dumphdr indicates incomplete */
  81 static boolean_t fm_panic;              /* dump is the result of fm_panic */
  82 static offset_t endoff;                 /* offset of end-of-dump header */
  83 static int      verbose;                /* chatty mode */
  84 static int      disregard_valid_flag;   /* disregard valid flag */
  85 static int      livedump;               /* dump the current running system */
  86 static int      interactive;            /* user invoked; no syslog */
  87 static int      csave;                  /* save dump compressed */
  88 static int      filemode;               /* processing file, not dump device */
  89 static int      percent_done;           /* progress indicator */
  90 static int      sec_done;               /* progress last report time */
  91 static hrtime_t startts;                /* timestamp at start */
  92 static volatile uint64_t saved;         /* count of pages written */
  93 static volatile uint64_t zpages;        /* count of zero pages not written */
  94 static dumpdatahdr_t datahdr;           /* compression info */
  95 static long     coreblksize;            /* preferred write size (st_blksize) */
  96 static int      cflag;                  /* run as savecore -c */
  97 static int      mflag;                  /* run as savecore -m */
  98 static int      fflag;                  /* -f option used */
  99 
 100 /*
 101  * Payload information for the events we raise.  These are used
 102  * in raise_event to determine what payload to include.
 103  */
 104 #define SC_PAYLOAD_SAVEDIR      0x0001  /* Include savedir in event */
 105 #define SC_PAYLOAD_INSTANCE     0x0002  /* Include bounds instance number */
 106 #define SC_PAYLOAD_IMAGEUUID    0x0004  /* Include dump OS instance uuid */
 107 #define SC_PAYLOAD_CRASHTIME    0x0008  /* Include epoch crashtime */
 108 #define SC_PAYLOAD_PANICSTR     0x0010  /* Include panic string */
 109 #define SC_PAYLOAD_PANICSTACK   0x0020  /* Include panic string */
 110 #define SC_PAYLOAD_FAILREASON   0x0040  /* Include failure reason */
 111 #define SC_PAYLOAD_DUMPCOMPLETE 0x0080  /* Include completeness indicator */
 112 #define SC_PAYLOAD_ISCOMPRESSED 0x0100  /* Dump is in vmdump.N form */
 113 #define SC_PAYLOAD_DUMPADM_EN   0x0200  /* Is dumpadm enabled or not? */
 114 #define SC_PAYLOAD_FM_PANIC     0x0400  /* Panic initiated by FMA */
 115 #define SC_PAYLOAD_JUSTCHECKING 0x0800  /* Run with -c flag? */
 116 
 117 enum sc_event_type {
 118         SC_EVENT_DUMP_PENDING,


 227                 code = 0;
 228                 break;
 229 
 230         case SC_EXIT_PEND:
 231                 /*
 232                  * Raise an ireport saying why we are exiting.  Do not
 233                  * raise if run as savecore -m.  If something in the
 234                  * raise_event codepath calls logprint avoid recursion.
 235                  */
 236                 if (!mflag && logprint_raised++ == 0)
 237                         raise_event(SC_EVENT_SAVECORE_FAILURE, buf);
 238                 code = 2;
 239                 break;
 240 
 241         case SC_EXIT_FM:
 242                 code = 3;
 243                 break;
 244 
 245         case SC_EXIT_ERR:
 246         default:
 247                 if (!mflag && logprint_raised++ == 0 && !skip_event &&
 248                     have_dumpfile)
 249                         raise_event(SC_EVENT_SAVECORE_FAILURE, buf);
 250                 code = 1;
 251                 break;
 252         }
 253 
 254         exit(code);
 255 }
 256 
 257 /*
 258  * System call / libc wrappers that exit on error.
 259  */
 260 static int
 261 Open(const char *name, int oflags, mode_t mode)
 262 {
 263         int fd;
 264 
 265         if ((fd = open64(name, oflags, mode)) == -1)
 266                 logprint(SC_SL_ERR | SC_EXIT_ERR, "open(\"%s\"): %s",
 267                     name, strerror(errno));
 268         return (fd);


 289 {
 290         if (fseeko64(f, off, SEEK_SET) != 0)
 291                 logprint(SC_SL_ERR | SC_EXIT_ERR, "fseeko64: %s",
 292                     strerror(errno));
 293 }
 294 
 295 typedef struct stat64 Stat_t;
 296 
 297 static void
 298 Fstat(int fd, Stat_t *sb, const char *fname)
 299 {
 300         if (fstat64(fd, sb) != 0)
 301                 logprint(SC_SL_ERR | SC_EXIT_ERR, "fstat(\"%s\"): %s", fname,
 302                     strerror(errno));
 303 }
 304 
 305 static void
 306 Stat(const char *fname, Stat_t *sb)
 307 {
 308         if (stat64(fname, sb) != 0) {
 309                 /*
 310                  * If dump/core file doesn't exist, then best
 311                  * to not go further (raise an event).
 312                  */
 313                 skip_event = B_TRUE;
 314                 have_dumpfile = B_FALSE;
 315                 logprint(SC_SL_ERR | SC_EXIT_ERR, "failed to get status "
 316                     "of file %s", fname);
 317         }
 318 }
 319 
 320 static void
 321 Pread(int fd, void *buf, size_t size, offset_t off)
 322 {
 323         ssize_t sz = pread64(fd, buf, size, off);
 324 
 325         if (sz < 0)
 326                 logprint(SC_SL_ERR | SC_EXIT_ERR,
 327                     "pread: %s", strerror(errno));
 328         else if (sz != size)
 329                 logprint(SC_SL_ERR | SC_EXIT_ERR,
 330                     "pread: size %ld != %ld", sz, size);
 331 }
 332 
 333 static void


 371                 dumpfd = Open(dumpfile, O_RDWR | O_DSYNC, 0644);
 372         endoff = llseek(dumpfd, -DUMP_OFFSET, SEEK_END) & -DUMP_OFFSET;
 373         Pread(dumpfd, &dumphdr, sizeof (dumphdr), endoff);
 374         Pread(dumpfd, &datahdr, sizeof (datahdr), endoff + sizeof (dumphdr));
 375 
 376         pagesize = dumphdr.dump_pagesize;
 377 
 378         if (dumphdr.dump_magic != DUMP_MAGIC)
 379                 logprint(SC_SL_NONE | SC_EXIT_PEND, "bad magic number %x",
 380                     dumphdr.dump_magic);
 381 
 382         if ((dumphdr.dump_flags & DF_VALID) == 0 && !disregard_valid_flag)
 383                 logprint(SC_SL_NONE | SC_IF_VERBOSE | SC_EXIT_OK,
 384                     "dump already processed");
 385 
 386         if (dumphdr.dump_version != DUMP_VERSION)
 387                 logprint(SC_SL_NONE | SC_IF_VERBOSE | SC_EXIT_PEND,
 388                     "dump version (%d) != %s version (%d)",
 389                     dumphdr.dump_version, progname, DUMP_VERSION);
 390 
 391         if (datahdr.dump_clevel > DUMP_CLEVEL_LZJB)
 392                 logprint(SC_SL_NONE | SC_EXIT_PEND,
 393                     "unsupported compression format (%d)", datahdr.dump_clevel);
 394 
 395         if (dumphdr.dump_wordsize != DUMP_WORDSIZE)
 396                 logprint(SC_SL_NONE | SC_EXIT_PEND,
 397                     "dump is from %u-bit kernel - cannot save on %u-bit kernel",
 398                     dumphdr.dump_wordsize, DUMP_WORDSIZE);
 399 
 400         if (datahdr.dump_datahdr_magic == DUMP_DATAHDR_MAGIC) {
 401                 if (datahdr.dump_datahdr_version != DUMP_DATAHDR_VERSION)
 402                         logprint(SC_SL_NONE | SC_IF_VERBOSE | SC_EXIT_PEND,
 403                             "dump data version (%d) != %s data version (%d)",
 404                             datahdr.dump_datahdr_version, progname,
 405                             DUMP_DATAHDR_VERSION);
 406         } else {
 407                 (void) memset(&datahdr, 0, sizeof (datahdr));
 408                 datahdr.dump_maxcsize = pagesize;
 409         }
 410 
 411         /*
 412          * Read the initial header, clear the valid bits, and compare headers.
 413          * The main header may have been overwritten by swapping if we're
 414          * using a swap partition as the dump device, in which case we bail.


 417 
 418         corehdr.dump_flags &= ~DF_VALID;
 419         dumphdr.dump_flags &= ~DF_VALID;
 420 
 421         if (memcmp(&corehdr, &dumphdr, sizeof (dumphdr_t)) != 0) {
 422                 /*
 423                  * Clear valid bit so we don't complain on every invocation.
 424                  */
 425                 if (!filemode)
 426                         Pwrite(dumpfd, &dumphdr, sizeof (dumphdr), endoff);
 427                 logprint(SC_SL_ERR | SC_EXIT_ERR,
 428                     "initial dump header corrupt");
 429         }
 430 }
 431 
 432 static void
 433 check_space(int csave)
 434 {
 435         struct statvfs fsb;
 436         int64_t spacefree, dumpsize, minfree, datasize;
 437         char minfreefile[MAXPATHLEN];
 438 
 439         if (statvfs(".", &fsb) < 0)
 440                 logprint(SC_SL_ERR | SC_EXIT_ERR, "statvfs: %s",
 441                     strerror(errno));
 442 
 443         (void) snprintf(minfreefile, MAXPATHLEN, "%s/minfree", savedir);
 444 
 445         dumpsize = dumphdr.dump_data - dumphdr.dump_start;
 446         datasize = dumphdr.dump_npages * pagesize;
 447         if (!csave)
 448                 dumpsize += datasize;
 449         else
 450                 dumpsize += datahdr.dump_data_csize;
 451 
 452         spacefree = (int64_t)fsb.f_bavail * fsb.f_frsize;
 453         minfree = 1024LL * read_number_from_file(minfreefile, 1024);
 454         if (spacefree < minfree + dumpsize) {
 455                 logprint(SC_SL_ERR | SC_EXIT_ERR,
 456                     "not enough space in %s (%lld MB avail, %lld MB needed)",
 457                     savedir, spacefree >> 20, (minfree + dumpsize) >> 20);
 458         }
 459 }
 460 
 461 static void
 462 build_dump_map(int corefd, const pfn_t *pfn_table)
 463 {
 464         long i;
 465         static long misses = 0;
 466         size_t dump_mapsize = (corehdr.dump_hashmask + 1) * sizeof (dump_map_t);
 467         mem_vtop_t vtop;
 468         dump_map_t *dmp = Zalloc(dump_mapsize);
 469         char *inbuf = Zalloc(FBUFSIZE);
 470         FILE *in = fdopen(dup(dumpfd), "rb");
 471 
 472         (void) setvbuf(in, inbuf, _IOFBF, FBUFSIZE);
 473         Fseek(dumphdr.dump_map, in);


 575         (void) fclose(in);
 576         (void) fclose(out);
 577         free(outbuf);
 578         free(buf);
 579 }
 580 
 581 /*
 582  * Concatenate dump contents into a new file.
 583  * Update corehdr with new offsets.
 584  */
 585 static void
 586 copy_crashfile(const char *corefile)
 587 {
 588         int corefd = Open(corefile, O_WRONLY | O_CREAT | O_TRUNC, 0644);
 589         size_t bufsz = FBUFSIZE;
 590         char *inbuf = Zalloc(bufsz);
 591         offset_t coreoff;
 592         size_t nb;
 593 
 594         logprint(SC_SL_ERR | SC_IF_VERBOSE,
 595             "Copying %s to %s/%s\n", dumpfile, uuiddir, corefile);
 596 
 597         /*
 598          * This dump file is still compressed
 599          */
 600         corehdr.dump_flags |= DF_COMPRESSED | DF_VALID;
 601 
 602         /*
 603          * Leave room for corehdr, it is updated and written last
 604          */
 605         corehdr.dump_start = 0;
 606         coreoff = sizeof (corehdr);
 607 
 608         /*
 609          * Read in the compressed symbol table, copy it to corefile.
 610          */
 611         coreoff = roundup(coreoff, pagesize);
 612         corehdr.dump_ksyms = coreoff;
 613         Copy(dumphdr.dump_ksyms, dumphdr.dump_ksyms_csize, &coreoff, corefd,
 614             inbuf, bufsz);
 615 


 700         int size;
 701 };
 702 
 703 typedef enum streamstate {
 704         STREAMSTART,
 705         STREAMPAGES
 706 } streamstate_t;
 707 
 708 typedef struct stream {
 709         streamstate_t state;
 710         int init;
 711         int tag;
 712         int bound;
 713         int nout;
 714         char *blkbuf;
 715         blockhdr_t blocks;
 716         pgcnt_t pagenum;
 717         pgcnt_t curpage;
 718         pgcnt_t npages;
 719         pgcnt_t done;

 720         dumpcsize_t sc;
 721         dumpstreamhdr_t sh;
 722 } stream_t;
 723 
 724 static stream_t *streams;
 725 static stream_t *endstreams;
 726 
 727 const int cs = sizeof (dumpcsize_t);
 728 
 729 typedef struct tinfo {
 730         pthread_t tid;
 731         int corefd;
 732 } tinfo_t;
 733 
 734 static int threads_stop;
 735 static int threads_active;
 736 static tinfo_t *tinfo;
 737 static tinfo_t *endtinfo;
 738 
 739 static pthread_mutex_t lock = PTHREAD_MUTEX_INITIALIZER;


 962                                 doflush = 1;
 963                                 atomic_inc_64(&zpages);
 964                         } else if (++s->nout >= BTOP(coreblksize) ||
 965                             isblkbnd(s->curpage + s->nout)) {
 966                                 doflush = 1;
 967                         }
 968                         if (++s->done >= s->npages) {
 969                                 s->state = STREAMSTART;
 970                                 doflush = 1;
 971                         }
 972                         if (doflush) {
 973                                 putpage(corefd, s->blkbuf, s->curpage, s->nout);
 974                                 s->nout = 0;
 975                                 s->curpage = s->pagenum + s->done;
 976                         }
 977                         break;
 978                 }
 979         }
 980 }
 981 



















































































































 982 /* report progress */
 983 static void
 984 report_progress()
 985 {
 986         int sec, percent;
 987 
 988         if (!interactive)
 989                 return;
 990 
 991         percent = saved * 100LL / corehdr.dump_npages;
 992         sec = (gethrtime() - startts) / NANOSEC;
 993         if (percent > percent_done || sec > sec_done) {
 994                 (void) printf("\r%2d:%02d %3d%% done", sec / 60, sec % 60,
 995                     percent);
 996                 (void) fflush(stdout);
 997                 sec_done = sec;
 998                 percent_done = percent;
 999         }
1000 }
1001 


1004 runstreams(void *arg)
1005 {
1006         tinfo_t *t = arg;
1007         stream_t *s;
1008         block_t *b;
1009         int bound;
1010 
1011         (void) pthread_mutex_lock(&lock);
1012         while (!threads_stop) {
1013                 bound = 0;
1014                 for (s = streams; s != endstreams; s++) {
1015                         if (s->bound || s->blocks.head == NULL)
1016                                 continue;
1017                         s->bound = 1;
1018                         bound = 1;
1019                         (void) pthread_cond_signal(&cvwork);
1020                         while (s->blocks.head != NULL) {
1021                                 b = deqh(&s->blocks);
1022                                 (void) pthread_mutex_unlock(&lock);
1023 

1024                                 lzjbblock(t->corefd, s, b->block,
1025                                     b->size);



1026 
1027                                 (void) pthread_mutex_lock(&lock);
1028                                 enqt(&freeblocks, b);
1029                                 (void) pthread_cond_signal(&cvfree);
1030 
1031                                 report_progress();
1032                         }
1033                         s->bound = 0;
1034                         (void) pthread_cond_signal(&cvbarrier);
1035                 }
1036                 if (!bound && !threads_stop)
1037                         (void) pthread_cond_wait(&cvwork, &lock);
1038         }
1039         (void) close(t->corefd);
1040         (void) pthread_cond_signal(&cvwork);
1041         (void) pthread_mutex_unlock(&lock);
1042         return (arg);
1043 }
1044 
1045 /*
1046  * Process compressed pages.
1047  *
1048  * The old format, now called single-threaded lzjb, is a 32-bit size
1049  * word followed by 'size' bytes of lzjb compression data for one
1050  * page. The new format extends this by storing a 12-bit "tag" in the
1051  * upper bits of the size word. When the size word is pagesize or
1052  * less, it is assumed to be one lzjb page. When the size word is
1053  * greater than pagesize, it is assumed to be a "stream block",
1054  * belonging to up to 4095 streams. In practice, the number of streams
1055  * is set to one less than the number of CPUs running at crash
1056  * time. One CPU processes the crash dump, the remaining CPUs
1057  * separately process groups of data pages.
1058  *
1059  * savecore creates a thread per stream, but never more threads than
1060  * the number of CPUs running savecore. This is because savecore can
1061  * be processing a crash file from a remote machine, which may have
1062  * more CPUs.
1063  *
1064  * When the kernel uses parallel compression we expect a series of 128KB
1065  * blocks of compression data. In this case, each block has a "tag" in
1066  * the range 1-4095. Each block is handed off to the threads running
1067  * "runstreams". These threads, in turn, process the compression data
1068  * for groups of pages. Groups of pages are delimited by a "stream header",
1069  * which indicates a starting pfn and number of pages. When a stream block
1070  * has been read, the condition variable "cvwork" is signalled, which causes
1071  * one of the available threads to wake up and process the stream.


1072  *
1073  * In the parallel case there will be streams blocks encoding all data
1074  * pages. The stream of blocks is terminated by a zero size
1075  * word. There can be a few lzjb pages tacked on the end, depending on
1076  * the architecture. The sbarrier function ensures that all stream
1077  * blocks have been processed so that the page number for the few
1078  * single pages at the end can be known.
1079  */
1080 static void
1081 decompress_pages(int corefd)
1082 {
1083         char *cpage = NULL;
1084         char *dpage = NULL;
1085         char *out;
1086         pgcnt_t curpage = 0;
1087         block_t *b;
1088         FILE *dumpf;
1089         FILE *tracef = NULL;
1090         stream_t *s;
1091         size_t dsize;


1223         if (dpage)
1224                 free(dpage);
1225         if (streams)
1226                 free(streams);
1227 }
1228 
1229 static void
1230 build_corefile(const char *namelist, const char *corefile)
1231 {
1232         size_t pfn_table_size = dumphdr.dump_npages * sizeof (pfn_t);
1233         size_t ksyms_size = dumphdr.dump_ksyms_size;
1234         size_t ksyms_csize = dumphdr.dump_ksyms_csize;
1235         pfn_t *pfn_table;
1236         char *ksyms_base = Zalloc(ksyms_size);
1237         char *ksyms_cbase = Zalloc(ksyms_csize);
1238         size_t ksyms_dsize;
1239         Stat_t st;
1240         int corefd = Open(corefile, O_WRONLY | O_CREAT | O_TRUNC, 0644);
1241         int namefd = Open(namelist, O_WRONLY | O_CREAT | O_TRUNC, 0644);
1242 
1243         (void) printf("Constructing namelist %s/%s\n", uuiddir, namelist);
1244 
1245         /*
1246          * Determine the optimum write size for the core file
1247          */
1248         Fstat(corefd, &st, corefile);
1249 
1250         if (verbose > 1)
1251                 (void) printf("%s: %ld block size\n", corefile,
1252                     (long)st.st_blksize);
1253         coreblksize = st.st_blksize;
1254         if (coreblksize < MINCOREBLKSIZE || !ISP2(coreblksize))
1255                 coreblksize = MINCOREBLKSIZE;
1256 
1257         hist = Zalloc((sizeof (uint64_t) * BTOP(coreblksize)) + 1);
1258 
1259         /*
1260          * This dump file is now uncompressed
1261          */
1262         corehdr.dump_flags &= ~DF_COMPRESSED;
1263 
1264         /*
1265          * Read in the compressed symbol table, copy it to corefile,
1266          * decompress it, and write the result to namelist.
1267          */
1268         corehdr.dump_ksyms = pagesize;
1269         Pread(dumpfd, ksyms_cbase, ksyms_csize, dumphdr.dump_ksyms);
1270         Pwrite(corefd, ksyms_cbase, ksyms_csize, corehdr.dump_ksyms);
1271 
1272         ksyms_dsize = decompress(ksyms_cbase, ksyms_base, ksyms_csize,
1273             ksyms_size);
1274         if (ksyms_dsize != ksyms_size)
1275                 logprint(SC_SL_WARN,
1276                     "bad data in symbol table, %lu of %lu bytes saved",
1277                     ksyms_dsize, ksyms_size);
1278 
1279         Pwrite(namefd, ksyms_base, ksyms_size, 0);
1280         (void) close(namefd);
1281         free(ksyms_cbase);
1282         free(ksyms_base);
1283 
1284         (void) printf("Constructing corefile %s/%s\n", uuiddir, corefile);
1285 
1286         /*
1287          * Read in and write out the pfn table.
1288          */
1289         pfn_table = Zalloc(pfn_table_size);
1290         corehdr.dump_pfn = corehdr.dump_ksyms + roundup(ksyms_size, pagesize);
1291         Pread(dumpfd, pfn_table, pfn_table_size, dumphdr.dump_pfn);
1292         Pwrite(corefd, pfn_table, pfn_table_size, corehdr.dump_pfn);
1293 
1294         /*
1295          * Convert the raw translation data into a hashed dump map.
1296          */
1297         corehdr.dump_map = corehdr.dump_pfn + roundup(pfn_table_size, pagesize);
1298         build_dump_map(corefd, pfn_table);
1299         free(pfn_table);
1300 
1301         /*
1302          * Decompress the pages
1303          */
1304         decompress_pages(corefd);


1437                 return;
1438         }
1439 
1440         if (sd.sd_magic != SUMMARY_MAGIC) {
1441                 *stack = '\0';
1442                 logprint(SC_SL_NONE | SC_IF_VERBOSE,
1443                     "bad summary magic %x", sd.sd_magic);
1444                 return;
1445         }
1446         Pread(dumpfd, stack, STACK_BUF_SIZE, dumpoff);
1447         if (sd.sd_ssum != checksum32(stack, STACK_BUF_SIZE))
1448                 logprint(SC_SL_NONE | SC_IF_VERBOSE, "bad stack checksum");
1449 }
1450 
1451 static void
1452 raise_event(enum sc_event_type evidx, char *warn_string)
1453 {
1454         uint32_t pl = sc_event[evidx].sce_payload;
1455         char panic_stack[STACK_BUF_SIZE];
1456         nvlist_t *attr = NULL;
1457         char uuidbuf[UUID_PRINTABLE_STRING_LENGTH];
1458         int err = 0;
1459 
1460         if (nvlist_alloc(&attr, NV_UNIQUE_NAME, 0) != 0)
1461                 goto publish;   /* try to send payload-free event */
1462 
1463         if (pl & SC_PAYLOAD_SAVEDIR && savedir != NULL)
1464                 err |= nvlist_add_string(attr, "dumpdir", savedir);
1465 
1466         if (pl & SC_PAYLOAD_INSTANCE && bounds != -1)
1467                 err |= nvlist_add_int64(attr, "instance", bounds);
1468 
1469         if (pl & SC_PAYLOAD_ISCOMPRESSED) {
1470                 err |= nvlist_add_boolean_value(attr, "compressed",
1471                     csave ? B_TRUE : B_FALSE);
1472         }
1473 
1474         if (pl & SC_PAYLOAD_DUMPADM_EN) {
1475                 char *disabled = defread("DUMPADM_ENABLE=no");
1476 
1477                 err |= nvlist_add_boolean_value(attr, "savecore-enabled",


1529                 logprint(SC_SL_WARN, "Errors while constructing '%s' "
1530                     "event payload; will try to publish anyway.");
1531 publish:
1532         if (fmev_rspublish_nvl(FMEV_RULESET_ON_SUNOS,
1533             "panic", sc_event[evidx].sce_subclass, FMEV_HIPRI,
1534             attr) != FMEV_SUCCESS) {
1535                 logprint(SC_SL_ERR, "failed to publish '%s' event: %s",
1536                     sc_event[evidx].sce_subclass, fmev_strerror(fmev_errno));
1537                 nvlist_free(attr);
1538         }
1539 
1540 }
1541 
1542 
1543 int
1544 main(int argc, char *argv[])
1545 {
1546         int i, c, bfd;
1547         Stat_t st;
1548         struct rlimit rl;
1549         struct stat sc, sd;
1550         long filebounds = -1;
1551         char namelist[30], corefile[30], boundstr[30];
1552         dumpfile = NULL;
1553         char uuidstr[UUID_PRINTABLE_STRING_LENGTH];
1554         uuid_t uu;
1555         static char boundsfile[MAXPATHLEN];
1556         static char boundslink[MAXPATHLEN];
1557         static boolean_t fma_layout = B_TRUE;
1558         char *slash;
1559 
1560         startts = gethrtime();
1561 
1562         (void) getrlimit(RLIMIT_NOFILE, &rl);
1563         rl.rlim_cur = rl.rlim_max;
1564         (void) setrlimit(RLIMIT_NOFILE, &rl);
1565 
1566         openlog(progname, LOG_ODELAY, LOG_AUTH);
1567 
1568         (void) defopen("/etc/dumpadm.conf");
1569         savedir = defread("DUMPADM_SAVDIR=");
1570         if (savedir != NULL)
1571                 savedir = strdup(savedir);
1572 
1573         while ((c = getopt(argc, argv, "Lvcdmf:")) != EOF) {
1574                 switch (c) {
1575                 case 'L':
1576                         livedump++;
1577                         break;
1578                 case 'v':
1579                         verbose++;
1580                         break;
1581                 case 'c':
1582                         cflag++;
1583                         break;
1584                 case 'd':
1585                         disregard_valid_flag++;
1586                         break;
1587                 case 'm':
1588                         mflag++;
1589                         break;
1590                 case 'f':
1591                         dumpfile = optarg;
1592                         filebounds = getbounds(dumpfile);
1593                         fflag++;
1594                         break;
1595                 case '?':
1596                         usage();
1597                 }
1598         }
1599 
1600         /*
1601          * If doing something other than extracting an existing dump (i.e.
1602          * dumpfile has been provided as an option), the user must be root.
1603          */
1604         if (geteuid() != 0 && dumpfile == NULL) {
1605                 (void) fprintf(stderr, "%s: %s %s\n", progname,
1606                     gettext("you must be root to use"), progname);
1607                 exit(1);
1608         }
1609 
1610         interactive = isatty(STDOUT_FILENO);
1611 
1612         if (cflag && livedump)
1613                 usage();
1614 
1615         if (dumpfile == NULL || livedump)
1616                 dumpfd = Open("/dev/dump", O_RDONLY, 0444);
1617 
1618         if (dumpfile == NULL) {
1619                 dumpfile = Zalloc(MAXPATHLEN);
1620                 if (ioctl(dumpfd, DIOCGETDEV, dumpfile) == -1) {
1621                         skip_event = B_TRUE;
1622                         have_dumpfile = B_FALSE;
1623                         logprint(SC_SL_NONE | SC_IF_ISATTY | SC_EXIT_ERR,
1624                             "no dump device configured");
1625                 }
1626         }
1627 
1628         if (mflag)
1629                 return (message_save());
1630 
1631         if (optind == argc - 1) {
1632                 /*
1633                  * Use the default layout if directory was specified.
1634                  * If the directory path matches value configured (by dumpadm),
1635                  * then revert to fma layout.
1636                  */
1637                 fma_layout = B_FALSE;
1638                 if (savedir != NULL && (stat(savedir, &sc) >= 0 &&
1639                     stat(argv[optind], &sd) >= 0)) {
1640                         if (sc.st_ino == sd.st_ino &&
1641                             sc.st_dev == sd.st_dev) {
1642                                 fma_layout = B_TRUE;
1643                         }
1644                 }
1645                 savedir = argv[optind];
1646         }
1647 
1648         if (savedir == NULL || optind < argc - 1)
1649                 usage();
1650 
1651         if (livedump) {
1652                 /*
1653                  * For livedump we must update the dump header with
1654                  * newly generated uuid.
1655                  */
1656                 uuid_generate(uu);
1657                 uuid_unparse(uu, uuidstr);
1658                 if (ioctl(dumpfd, DIOCDUMP, uuidstr) == -1)
1659                         logprint(SC_SL_NONE | SC_EXIT_ERR,
1660                             "dedicated dump device required");
1661         }
1662 
1663         (void) close(dumpfd);
1664         dumpfd = -1;
1665 
1666         Stat(dumpfile, &st);
1667 
1668         filemode = S_ISREG(st.st_mode);
1669 
1670         if (!filemode && defread("DUMPADM_CSAVE=off") == NULL)
1671                 csave = 1;
1672 
1673         read_dumphdr();
1674 
1675         /*
1676          * We want this message to go to the log file, but not the console.
1677          * There's no good way to do that with the existing syslog facility.
1678          * We could extend it to handle this, but there doesn't seem to be
1679          * a general need for it, so we isolate the complexity here instead.
1680          */
1681         if (dumphdr.dump_panicstring[0] != '\0') {


1700                 lc.level = 0;
1701 
1702                 ctl.buf = (void *)&lc;
1703                 ctl.len = sizeof (log_ctl_t);
1704 
1705                 dat.buf = (void *)msg;
1706                 dat.len = strlen(msg) + 1;
1707 
1708                 (void) putmsg(logfd, &ctl, &dat, 0);
1709                 (void) close(logfd);
1710         }
1711 
1712         if ((dumphdr.dump_flags & DF_COMPLETE) == 0) {
1713                 logprint(SC_SL_WARN, "incomplete dump on dump device");
1714                 dump_incomplete = B_TRUE;
1715         }
1716 
1717         if (dumphdr.dump_fm_panic)
1718                 fm_panic = B_TRUE;
1719 
1720         /* remove last slash */
1721         slash = strrchr(savedir, '\0');
1722         while (--slash > savedir && *slash == '/') {
1723                 *slash = '\0';
1724         }
1725 
1726         if (fma_layout) {
1727                 (void) snprintf(uuiddir, sizeof (uuiddir), "%s/data/%s",
1728                     savedir, dumphdr.dump_uuid);
1729         } else {
1730                 (void) strncpy(uuiddir, savedir, sizeof (uuiddir));
1731         }
1732 
1733         /*
1734          * We have a valid dump on a dump device and know as much about
1735          * it as we're going to at this stage.  Raise an event for
1736          * logging and so that FMA can open a case for this panic.
1737          * Avoid this step for FMA-initiated panics - FMA will replay
1738          * ereports off the dump device independently of savecore and
1739          * will make a diagnosis, so we don't want to open two cases
1740          * for the same event.  Also avoid raising an event for a
1741          * livedump, or when we inflating a compressed dump.
1742          */
1743         if (!fm_panic && !livedump && !filemode)
1744                 raise_event(SC_EVENT_DUMP_PENDING, NULL);
1745 
1746         logprint(SC_SL_WARN, "System dump time: %s",
1747             ctime(&dumphdr.dump_crashtime));
1748 
1749         /*
1750          * Option -c is designed for use from svc-dumpadm where we know
1751          * that dumpadm -n is in effect but run savecore -c just to
1752          * get the above dump_pending_on_device event raised.  If it is run
1753          * interactively then just print further panic details.
1754          */
1755         if (cflag) {
1756                 char *disabled = defread("DUMPADM_ENABLE=no");
1757                 int lvl = interactive ? SC_SL_WARN : SC_SL_ERR;
1758                 int ec = fm_panic ? SC_EXIT_FM : SC_EXIT_PEND;
1759 
1760                 logprint(lvl | ec,
1761                     "Panic crashdump pending on dump device%s "
1762                     "run savecore(1M) manually to extract. "
1763                     "Image UUID %s%s.",
1764                     disabled ? " but dumpadm -n in effect;" : ";",
1765                     corehdr.dump_uuid,
1766                     fm_panic ?  "(fault-management initiated)" : "");
1767                 /*NOTREACHED*/
1768         }
1769 
1770         if (fma_layout && mkdirp(uuiddir, 0755) != 0) {
1771                 if (errno != EEXIST)
1772                         logprint(SC_SL_ERR | SC_EXIT_ERR,
1773                             "mkdirp(\"%s\"): %s",
1774                             uuiddir, strerror(errno));
1775         }
1776 
1777         if (chdir(uuiddir) == -1)
1778                 logprint(SC_SL_ERR | SC_EXIT_ERR, "chdir(\"%s\"): %s",
1779                     uuiddir, strerror(errno));
1780 
1781         check_space(csave);
1782 
1783         (void) snprintf(boundsfile, MAXPATHLEN, "%s/bounds", savedir);
1784 
1785         if (filebounds < 0)
1786                 bounds = read_number_from_file(boundsfile, 0);
1787         else
1788                 bounds = filebounds;
1789 
1790         if (!fflag && disregard_valid_flag && bounds > 0)
1791                 bounds--;
1792 
1793         (void) snprintf(boundslink, MAXPATHLEN, "%s/%d", savedir, bounds);
1794 
1795         /*
1796          * Create a symbolic link to easily maintain the sequential ordering.
1797          */
1798         if (!fflag && fma_layout && symlink(uuiddir, boundslink) != 0) {
1799                 if (errno == EEXIST) {
1800                         char symbuf[MAXPATHLEN] = {'\0'};
1801 
1802                         if (readlink(boundslink, symbuf, sizeof (symbuf)) < 0)
1803                                 logprint(SC_SL_ERR | SC_EXIT_ERR,
1804                                 "readlink: %s", strerror(errno));
1805                         if (strcmp(symbuf, uuiddir) != 0) {
1806                                 logprint(SC_SL_ERR,
1807                                     "Symbolic link %s already exists but "
1808                                     "specifies a wrong UUID directory, "
1809                                     "new symbolic link will be created "
1810                                     "instead", boundslink);
1811                                 (void) unlink(boundslink);
1812                                 if (symlink(uuiddir, boundslink) != 0)
1813                                         logprint(SC_SL_ERR | SC_EXIT_ERR,
1814                                             "symlink: %s", strerror(errno));
1815                         }
1816                 } else {
1817                         logprint(SC_SL_ERR | SC_EXIT_ERR, "symlink: %s",
1818                             strerror(errno));
1819                 }
1820         }
1821 
1822         if (csave) {
1823                 size_t metrics_size = datahdr.dump_metrics;
1824 
1825                 (void) sprintf(corefile, "vmdump.%ld", bounds);
1826 
1827                 if (interactive && bounds >= 0 && access(corefile, F_OK)
1828                     == 0) {
1829                         skip_event = B_TRUE;
1830                         logprint(SC_SL_NONE | SC_EXIT_ERR,
1831                             "%s already exists: remove with "
1832                             "'rm -f %s/{unix,vmcore}.%ld'",
1833                             corefile, uuiddir, bounds);
1834                 }
1835 
1836                 datahdr.dump_metrics = 0;
1837 
1838                 logprint(SC_SL_ERR,
1839                     "Saving compressed system crash dump in %s/%s",
1840                     uuiddir, corefile);
1841 
1842                 copy_crashfile(corefile);
1843 
1844                 /*
1845                  * Raise a fault management event that indicates the system
1846                  * has panicked. We know a reasonable amount about the
1847                  * condition at this time, but the dump is still compressed.
1848                  */
1849                 if (!livedump && !fm_panic)
1850                         raise_event(SC_EVENT_DUMP_AVAILABLE, NULL);
1851 
1852                 if (metrics_size > 0) {
1853                         int sec = (gethrtime() - startts) / 1000 / 1000 / 1000;
1854                         FILE *mfile = fopen(METRICSFILE, "a");
1855                         char *metrics = Zalloc(metrics_size + 1);
1856 
1857                         Pread(dumpfd, metrics, metrics_size, endoff +
1858                             sizeof (dumphdr) + sizeof (datahdr));
1859 
1860                         if (sec < 1)


1879                                     dumphdr.dump_flags & DF_LIVE ? "Live" :
1880                                     "Crash", ctime(&dumphdr.dump_crashtime));
1881                                 (void) fprintf(mfile, ",,,%s/%s\n", savedir,
1882                                     corefile);
1883                                 (void) fprintf(mfile, "Metrics:\n%s\n",
1884                                     metrics);
1885                                 (void) fprintf(mfile, "Copy pages,%ld\n",
1886                                     dumphdr.  dump_npages);
1887                                 (void) fprintf(mfile, "Copy time,%d\n", sec);
1888                                 (void) fprintf(mfile, "Copy pages/sec,%ld\n",
1889                                     dumphdr.dump_npages / sec);
1890                                 (void) fprintf(mfile, "]]]]\n");
1891                                 (void) fclose(mfile);
1892                         }
1893                         free(metrics);
1894                 }
1895 
1896                 logprint(SC_SL_ERR,
1897                     "Decompress the crash dump with "
1898                     "\n'savecore -vf %s/%s'",
1899                     uuiddir, corefile);
1900 
1901         } else {
1902                 (void) sprintf(namelist, "unix.%ld", bounds);
1903                 (void) sprintf(corefile, "vmcore.%ld", bounds);
1904 
1905                 if (interactive && bounds >= 0 && access(corefile, F_OK)
1906                     == 0) {
1907                         skip_event = B_TRUE;
1908                         logprint(SC_SL_NONE | SC_EXIT_ERR,
1909                             "%s already exists: remove with "
1910                             "'rm -f %s/{unix,vmcore}.%ld'",
1911                             corefile, uuiddir, bounds);
1912                 }
1913 
1914                 logprint(SC_SL_ERR,
1915                     "saving system crash dump in %s/{unix,vmcore}.%ld",
1916                     uuiddir, bounds);
1917 
1918                 build_corefile(namelist, corefile);
1919 
1920                 if (!livedump && !filemode && !fm_panic)
1921                         raise_event(SC_EVENT_DUMP_AVAILABLE, NULL);
1922 
1923                 if (access(METRICSFILE, F_OK) == 0) {
1924                         int sec = (gethrtime() - startts) / 1000 / 1000 / 1000;
1925                         FILE *mfile = fopen(METRICSFILE, "a");
1926 
1927                         if (sec < 1)
1928                                 sec = 1;
1929 
1930                         if (mfile == NULL) {
1931                                 logprint(SC_SL_WARN,
1932                                     "Can't create %s: %s",
1933                                     METRICSFILE, strerror(errno));
1934                         } else {
1935                                 (void) fprintf(mfile, "[[[[,,,");
1936                                 for (i = 0; i < argc; i++)


1941                                 (void) fprintf(mfile, ",,,%s %s %s %s %s\n",
1942                                     dumphdr.dump_utsname.sysname,
1943                                     dumphdr.dump_utsname.nodename,
1944                                     dumphdr.dump_utsname.release,
1945                                     dumphdr.dump_utsname.version,
1946                                     dumphdr.dump_utsname.machine);
1947                                 (void) fprintf(mfile,
1948                                     "Uncompress pages,%"PRIu64"\n", saved);
1949                                 (void) fprintf(mfile, "Uncompress time,%d\n",
1950                                     sec);
1951                                 (void) fprintf(mfile, "Uncompress pages/sec,%"
1952                                     PRIu64"\n", saved / sec);
1953                                 (void) fprintf(mfile, "]]]]\n");
1954                                 (void) fclose(mfile);
1955                         }
1956                 }
1957         }
1958 
1959         if (filebounds < 0) {
1960                 (void) sprintf(boundstr, "%ld\n", bounds + 1);
1961                 bfd = Open(boundsfile, O_WRONLY | O_CREAT | O_TRUNC, 0644);
1962                 Pwrite(bfd, boundstr, strlen(boundstr), 0);
1963                 (void) close(bfd);
1964         }
1965 
1966         if (verbose) {
1967                 int sec = (gethrtime() - startts) / 1000 / 1000 / 1000;
1968 
1969                 (void) printf("%d:%02d dump %s is done\n",
1970                     sec / 60, sec % 60,
1971                     csave ? "copy" : "decompress");
1972         }
1973 
1974         if (verbose > 1 && hist != NULL) {
1975                 int i, nw;
1976 
1977                 for (i = 1, nw = 0; i <= BTOP(coreblksize); ++i)
1978                         nw += hist[i] * i;
1979                 (void) printf("pages count     %%\n");
1980                 for (i = 0; i <= BTOP(coreblksize); ++i) {
1981                         if (hist[i] == 0)