Print this page
NEX-17845 Remove support for BZIP2 from dump
Reviewed by: Sanjay Nadkarni <sanjay.nadkarni@nexenta.com>
Reviewed by: Yuri Pankov <yuri.pankov@nexenta.com>
NEX-14185 savecore -f vmdump.1 tries to unpack this to unix.0 and vmcore.0 and other nits
Reviewed by: Yuri Pankov <yuri.pankov@nexenta.com>
Reviewed by: Roman Strashkin <roman.strashkin@nexenta.com>
Reviewed by: Sanjay Nadkarni <sanjay.nadkarni@nexenta.com>
NEX-9338 improve the layout of the crash directory (follow-up)
Reviewed by: Yuri Pankov <yuri.pankov@nexenta.com>
Reviewed by: Sanjay Nadkarni <sanjay.nadkarni@nexenta.com>
NEX-9338 improve the layout of the crash directory
Reviewed by: Sanjay Nadkarni <sanjay.nadkarni@nexenta.com>
Reviewed by: Steve Peng <steve.peng@nexenta.com>

@@ -21,11 +21,11 @@
 /*
  * Copyright (c) 1983, 2010, Oracle and/or its affiliates. All rights reserved.
  * Copyright 2016 Joyent, Inc.
  */
 /*
- * Copyright 2016 Nexenta Systems, Inc. All rights reserved.
+ * Copyright 2018 Nexenta Systems, Inc. All rights reserved.
  */
 
 #include <stdio.h>
 #include <stdlib.h>
 #include <stdarg.h>

@@ -49,14 +49,15 @@
 #include <sys/compress.h>
 #include <sys/panic.h>
 #include <sys/sysmacros.h>
 #include <sys/stat.h>
 #include <sys/resource.h>
-#include <bzip2/bzlib.h>
 #include <sys/fm/util.h>
 #include <fm/libfmevent.h>
 #include <sys/int_fmtio.h>
+#include <uuid/uuid.h>
+#include <libgen.h>
 
 
 /* fread/fwrite buffer size */
 #define FBUFSIZE                (1ULL << 20)
 

@@ -66,14 +67,16 @@
 /* create this file if metrics collection is enabled in the kernel */
 #define METRICSFILE "METRICS.csv"
 
 static char     progname[9] = "savecore";
 static char     *savedir;               /* savecore directory */
+static char     uuiddir[MAXPATHLEN];    /* UUID directory */
 static char     *dumpfile;              /* source of raw crash dump */
 static long     bounds = -1;            /* numeric suffix */
 static long     pagesize;               /* dump pagesize */
 static int      dumpfd = -1;            /* dumpfile descriptor */
+static boolean_t skip_event = B_FALSE;  /* do not raise an event */
 static boolean_t have_dumpfile = B_TRUE;        /* dumpfile existence */
 static dumphdr_t corehdr, dumphdr;      /* initial and terminal dumphdrs */
 static boolean_t dump_incomplete;       /* dumphdr indicates incomplete */
 static boolean_t fm_panic;              /* dump is the result of fm_panic */
 static offset_t endoff;                 /* offset of end-of-dump header */

@@ -90,10 +93,11 @@
 static volatile uint64_t zpages;        /* count of zero pages not written */
 static dumpdatahdr_t datahdr;           /* compression info */
 static long     coreblksize;            /* preferred write size (st_blksize) */
 static int      cflag;                  /* run as savecore -c */
 static int      mflag;                  /* run as savecore -m */
+static int      fflag;                  /* -f option used */
 
 /*
  * Payload information for the events we raise.  These are used
  * in raise_event to determine what payload to include.
  */

@@ -238,11 +242,12 @@
                 code = 3;
                 break;
 
         case SC_EXIT_ERR:
         default:
-                if (!mflag && logprint_raised++ == 0 && have_dumpfile)
+                if (!mflag && logprint_raised++ == 0 && !skip_event &&
+                    have_dumpfile)
                         raise_event(SC_EVENT_SAVECORE_FAILURE, buf);
                 code = 1;
                 break;
         }
 

@@ -299,10 +304,15 @@
 
 static void
 Stat(const char *fname, Stat_t *sb)
 {
         if (stat64(fname, sb) != 0) {
+                /*
+                 * If dump/core file doesn't exist, then best
+                 * to not go further (raise an event).
+                 */
+                skip_event = B_TRUE;
                 have_dumpfile = B_FALSE;
                 logprint(SC_SL_ERR | SC_EXIT_ERR, "failed to get status "
                     "of file %s", fname);
         }
 }

@@ -376,10 +386,14 @@
         if (dumphdr.dump_version != DUMP_VERSION)
                 logprint(SC_SL_NONE | SC_IF_VERBOSE | SC_EXIT_PEND,
                     "dump version (%d) != %s version (%d)",
                     dumphdr.dump_version, progname, DUMP_VERSION);
 
+        if (datahdr.dump_clevel > DUMP_CLEVEL_LZJB)
+                logprint(SC_SL_NONE | SC_EXIT_PEND,
+                    "unsupported compression format (%d)", datahdr.dump_clevel);
+
         if (dumphdr.dump_wordsize != DUMP_WORDSIZE)
                 logprint(SC_SL_NONE | SC_EXIT_PEND,
                     "dump is from %u-bit kernel - cannot save on %u-bit kernel",
                     dumphdr.dump_wordsize, DUMP_WORDSIZE);
 

@@ -418,24 +432,27 @@
 static void
 check_space(int csave)
 {
         struct statvfs fsb;
         int64_t spacefree, dumpsize, minfree, datasize;
+        char minfreefile[MAXPATHLEN];
 
         if (statvfs(".", &fsb) < 0)
                 logprint(SC_SL_ERR | SC_EXIT_ERR, "statvfs: %s",
                     strerror(errno));
 
+        (void) snprintf(minfreefile, MAXPATHLEN, "%s/minfree", savedir);
+
         dumpsize = dumphdr.dump_data - dumphdr.dump_start;
         datasize = dumphdr.dump_npages * pagesize;
         if (!csave)
                 dumpsize += datasize;
         else
                 dumpsize += datahdr.dump_data_csize;
 
         spacefree = (int64_t)fsb.f_bavail * fsb.f_frsize;
-        minfree = 1024LL * read_number_from_file("minfree", 1024);
+        minfree = 1024LL * read_number_from_file(minfreefile, 1024);
         if (spacefree < minfree + dumpsize) {
                 logprint(SC_SL_ERR | SC_EXIT_ERR,
                     "not enough space in %s (%lld MB avail, %lld MB needed)",
                     savedir, spacefree >> 20, (minfree + dumpsize) >> 20);
         }

@@ -573,11 +590,11 @@
         char *inbuf = Zalloc(bufsz);
         offset_t coreoff;
         size_t nb;
 
         logprint(SC_SL_ERR | SC_IF_VERBOSE,
-            "Copying %s to %s/%s\n", dumpfile, savedir, corefile);
+            "Copying %s to %s/%s\n", dumpfile, uuiddir, corefile);
 
         /*
          * This dump file is still compressed
          */
         corehdr.dump_flags |= DF_COMPRESSED | DF_VALID;

@@ -698,11 +715,10 @@
         blockhdr_t blocks;
         pgcnt_t pagenum;
         pgcnt_t curpage;
         pgcnt_t npages;
         pgcnt_t done;
-        bz_stream strm;
         dumpcsize_t sc;
         dumpstreamhdr_t sh;
 } stream_t;
 
 static stream_t *streams;

@@ -961,125 +977,10 @@
                         break;
                 }
         }
 }
 
-/* bzlib library reports errors with this callback */
-void
-bz_internal_error(int errcode)
-{
-        logprint(SC_SL_ERR | SC_EXIT_ERR, "bz_internal_error: err %s\n",
-            BZ2_bzErrorString(errcode));
-}
-
-/*
- * Return one object in the stream.
- *
- * An object (stream header or page) will likely span an input block
- * of compression data. Return non-zero when an entire object has been
- * retrieved from the stream.
- */
-static int
-bz2decompress(stream_t *s, void *buf, size_t size)
-{
-        int rc;
-
-        if (s->strm.avail_out == 0) {
-                s->strm.next_out = buf;
-                s->strm.avail_out = size;
-        }
-        while (s->strm.avail_in > 0) {
-                rc = BZ2_bzDecompress(&s->strm);
-                if (rc == BZ_STREAM_END) {
-                        rc = BZ2_bzDecompressReset(&s->strm);
-                        if (rc != BZ_OK)
-                                logprint(SC_SL_ERR | SC_EXIT_ERR,
-                                    "BZ2_bzDecompressReset: %s",
-                                    BZ2_bzErrorString(rc));
-                        continue;
-                }
-
-                if (s->strm.avail_out == 0)
-                        break;
-        }
-        return (s->strm.avail_out == 0);
-}
-
-/*
- * Process one bzip2 block.
- * The interface is documented here:
- * http://www.bzip.org/1.0.5/bzip2-manual-1.0.5.html
- */
-static void
-bz2block(int corefd, stream_t *s, char *block, size_t blocksz)
-{
-        int rc = 0;
-        int doflush;
-        char *out;
-
-        if (!s->init) {
-                s->init = 1;
-                rc = BZ2_bzDecompressInit(&s->strm, 0, 0);
-                if (rc != BZ_OK)
-                        logprint(SC_SL_ERR | SC_EXIT_ERR,
-                            "BZ2_bzDecompressInit: %s", BZ2_bzErrorString(rc));
-                if (s->blkbuf == NULL)
-                        s->blkbuf = Zalloc(coreblksize);
-                s->strm.avail_out = 0;
-                s->state = STREAMSTART;
-        }
-        s->strm.next_in = block;
-        s->strm.avail_in = blocksz;
-
-        while (s->strm.avail_in > 0) {
-                switch (s->state) {
-                case STREAMSTART:
-                        if (!bz2decompress(s, &s->sh, sizeof (s->sh)))
-                                return;
-                        if (strcmp(DUMP_STREAM_MAGIC, s->sh.stream_magic) != 0)
-                                logprint(SC_SL_ERR | SC_EXIT_ERR,
-                                    "BZ2 STREAMSTART: bad stream header");
-                        if (s->sh.stream_npages > datahdr.dump_maxrange)
-                                logprint(SC_SL_ERR | SC_EXIT_ERR,
-                                    "BZ2 STREAMSTART: bad range: %d > %d",
-                                    s->sh.stream_npages, datahdr.dump_maxrange);
-                        s->pagenum = s->sh.stream_pagenum;
-                        s->npages = s->sh.stream_npages;
-                        s->curpage = s->pagenum;
-                        s->nout = 0;
-                        s->done = 0;
-                        s->state = STREAMPAGES;
-                        break;
-                case STREAMPAGES:
-                        out = s->blkbuf + PTOB(s->nout);
-                        if (!bz2decompress(s, out, pagesize))
-                                return;
-
-                        atomic_inc_64(&saved);
-
-                        doflush = 0;
-                        if (s->nout == 0 && iszpage(out)) {
-                                doflush = 1;
-                                atomic_inc_64(&zpages);
-                        } else if (++s->nout >= BTOP(coreblksize) ||
-                            isblkbnd(s->curpage + s->nout)) {
-                                doflush = 1;
-                        }
-                        if (++s->done >= s->npages) {
-                                s->state = STREAMSTART;
-                                doflush = 1;
-                        }
-                        if (doflush) {
-                                putpage(corefd, s->blkbuf, s->curpage, s->nout);
-                                s->nout = 0;
-                                s->curpage = s->pagenum + s->done;
-                        }
-                        break;
-                }
-        }
-}
-
 /* report progress */
 static void
 report_progress()
 {
         int sec, percent;

@@ -1118,16 +1019,12 @@
                         (void) pthread_cond_signal(&cvwork);
                         while (s->blocks.head != NULL) {
                                 b = deqh(&s->blocks);
                                 (void) pthread_mutex_unlock(&lock);
 
-                                if (datahdr.dump_clevel < DUMP_CLEVEL_BZIP2)
                                         lzjbblock(t->corefd, s, b->block,
                                             b->size);
-                                else
-                                        bz2block(t->corefd, s, b->block,
-                                            b->size);
 
                                 (void) pthread_mutex_lock(&lock);
                                 enqt(&freeblocks, b);
                                 (void) pthread_cond_signal(&cvfree);
 

@@ -1162,20 +1059,18 @@
  * savecore creates a thread per stream, but never more threads than
  * the number of CPUs running savecore. This is because savecore can
  * be processing a crash file from a remote machine, which may have
  * more CPUs.
  *
- * When the kernel uses parallel lzjb or parallel bzip2, we expect a
- * series of 128KB blocks of compression data. In this case, each
- * block has a "tag", in the range 1-4095. Each block is handed off to
- * to the threads running "runstreams". The dump format is either lzjb
- * or bzip2, never a mixture. These threads, in turn, process the
- * compression data for groups of pages. Groups of pages are delimited
- * by a "stream header", which indicates a starting pfn and number of
- * pages. When a stream block has been read, the condition variable
- * "cvwork" is signalled, which causes one of the avaiable threads to
- * wake up and process the stream.
+ * When the kernel uses parallel compression we expect a series of 128KB
+ * blocks of compression data. In this case, each block has a "tag" in
+ * the range 1-4095. Each block is handed off to the threads running
+ * "runstreams". These threads, in turn, process the compression data
+ * for groups of pages. Groups of pages are delimited by a "stream header",
+ * which indicates a starting pfn and number of pages. When a stream block
+ * has been read, the condition variable "cvwork" is signalled, which causes
+ * one of the available threads to wake up and process the stream.
  *
  * In the parallel case there will be streams blocks encoding all data
  * pages. The stream of blocks is terminated by a zero size
  * word. There can be a few lzjb pages tacked on the end, depending on
  * the architecture. The sbarrier function ensures that all stream

@@ -1343,11 +1238,11 @@
         size_t ksyms_dsize;
         Stat_t st;
         int corefd = Open(corefile, O_WRONLY | O_CREAT | O_TRUNC, 0644);
         int namefd = Open(namelist, O_WRONLY | O_CREAT | O_TRUNC, 0644);
 
-        (void) printf("Constructing namelist %s/%s\n", savedir, namelist);
+        (void) printf("Constructing namelist %s/%s\n", uuiddir, namelist);
 
         /*
          * Determine the optimum write size for the core file
          */
         Fstat(corefd, &st, corefile);

@@ -1384,11 +1279,11 @@
         Pwrite(namefd, ksyms_base, ksyms_size, 0);
         (void) close(namefd);
         free(ksyms_cbase);
         free(ksyms_base);
 
-        (void) printf("Constructing corefile %s/%s\n", savedir, corefile);
+        (void) printf("Constructing corefile %s/%s\n", uuiddir, corefile);
 
         /*
          * Read in and write out the pfn table.
          */
         pfn_table = Zalloc(pfn_table_size);

@@ -1557,11 +1452,11 @@
 raise_event(enum sc_event_type evidx, char *warn_string)
 {
         uint32_t pl = sc_event[evidx].sce_payload;
         char panic_stack[STACK_BUF_SIZE];
         nvlist_t *attr = NULL;
-        char uuidbuf[36 + 1];
+        char uuidbuf[UUID_PRINTABLE_STRING_LENGTH];
         int err = 0;
 
         if (nvlist_alloc(&attr, NV_UNIQUE_NAME, 0) != 0)
                 goto publish;   /* try to send payload-free event */
 

@@ -1649,13 +1544,20 @@
 main(int argc, char *argv[])
 {
         int i, c, bfd;
         Stat_t st;
         struct rlimit rl;
+        struct stat sc, sd;
         long filebounds = -1;
         char namelist[30], corefile[30], boundstr[30];
         dumpfile = NULL;
+        char uuidstr[UUID_PRINTABLE_STRING_LENGTH];
+        uuid_t uu;
+        static char boundsfile[MAXPATHLEN];
+        static char boundslink[MAXPATHLEN];
+        static boolean_t fma_layout = B_TRUE;
+        char *slash;
 
         startts = gethrtime();
 
         (void) getrlimit(RLIMIT_NOFILE, &rl);
         rl.rlim_cur = rl.rlim_max;

@@ -1686,10 +1588,11 @@
                         mflag++;
                         break;
                 case 'f':
                         dumpfile = optarg;
                         filebounds = getbounds(dumpfile);
+                        fflag++;
                         break;
                 case '?':
                         usage();
                 }
         }

@@ -1713,28 +1616,51 @@
                 dumpfd = Open("/dev/dump", O_RDONLY, 0444);
 
         if (dumpfile == NULL) {
                 dumpfile = Zalloc(MAXPATHLEN);
                 if (ioctl(dumpfd, DIOCGETDEV, dumpfile) == -1) {
+                        skip_event = B_TRUE;
                         have_dumpfile = B_FALSE;
                         logprint(SC_SL_NONE | SC_IF_ISATTY | SC_EXIT_ERR,
                             "no dump device configured");
                 }
         }
 
         if (mflag)
                 return (message_save());
 
-        if (optind == argc - 1)
+        if (optind == argc - 1) {
+                /*
+                 * Use the default layout if directory was specified.
+                 * If the directory path matches value configured (by dumpadm),
+                 * then revert to fma layout.
+                 */
+                fma_layout = B_FALSE;
+                if (savedir != NULL && (stat(savedir, &sc) >= 0 &&
+                    stat(argv[optind], &sd) >= 0)) {
+                        if (sc.st_ino == sd.st_ino &&
+                            sc.st_dev == sd.st_dev) {
+                                fma_layout = B_TRUE;
+                        }
+                }
                 savedir = argv[optind];
+        }
 
         if (savedir == NULL || optind < argc - 1)
                 usage();
 
-        if (livedump && ioctl(dumpfd, DIOCDUMP, NULL) == -1)
+        if (livedump) {
+                /*
+                 * For livedump we must update the dump header with
+                 * newly generated uuid.
+                 */
+                uuid_generate(uu);
+                uuid_unparse(uu, uuidstr);
+                if (ioctl(dumpfd, DIOCDUMP, uuidstr) == -1)
                 logprint(SC_SL_NONE | SC_EXIT_ERR,
                     "dedicated dump device required");
+        }
 
         (void) close(dumpfd);
         dumpfd = -1;
 
         Stat(dumpfile, &st);

@@ -1789,10 +1715,23 @@
         }
 
         if (dumphdr.dump_fm_panic)
                 fm_panic = B_TRUE;
 
+        /* remove last slash */
+        slash = strrchr(savedir, '\0');
+        while (--slash > savedir && *slash == '/') {
+                *slash = '\0';
+        }
+
+        if (fma_layout) {
+                (void) snprintf(uuiddir, sizeof (uuiddir), "%s/data/%s",
+                    savedir, dumphdr.dump_uuid);
+        } else {
+                (void) strncpy(uuiddir, savedir, sizeof (uuiddir));
+        }
+
         /*
          * We have a valid dump on a dump device and know as much about
          * it as we're going to at this stage.  Raise an event for
          * logging and so that FMA can open a case for this panic.
          * Avoid this step for FMA-initiated panics - FMA will replay

@@ -1826,31 +1765,81 @@
                     corehdr.dump_uuid,
                     fm_panic ?  "(fault-management initiated)" : "");
                 /*NOTREACHED*/
         }
 
-        if (chdir(savedir) == -1)
+        if (fma_layout && mkdirp(uuiddir, 0755) != 0) {
+                if (errno != EEXIST)
+                        logprint(SC_SL_ERR | SC_EXIT_ERR,
+                            "mkdirp(\"%s\"): %s",
+                            uuiddir, strerror(errno));
+        }
+
+        if (chdir(uuiddir) == -1)
                 logprint(SC_SL_ERR | SC_EXIT_ERR, "chdir(\"%s\"): %s",
-                    savedir, strerror(errno));
+                    uuiddir, strerror(errno));
 
         check_space(csave);
 
+        (void) snprintf(boundsfile, MAXPATHLEN, "%s/bounds", savedir);
+
         if (filebounds < 0)
-                bounds = read_number_from_file("bounds", 0);
+                bounds = read_number_from_file(boundsfile, 0);
         else
                 bounds = filebounds;
 
+        if (!fflag && disregard_valid_flag && bounds > 0)
+                bounds--;
+
+        (void) snprintf(boundslink, MAXPATHLEN, "%s/%d", savedir, bounds);
+
+        /*
+         * Create a symbolic link to easily maintain the sequential ordering.
+         */
+        if (!fflag && fma_layout && symlink(uuiddir, boundslink) != 0) {
+                if (errno == EEXIST) {
+                        char symbuf[MAXPATHLEN] = {'\0'};
+
+                        if (readlink(boundslink, symbuf, sizeof (symbuf)) < 0)
+                                logprint(SC_SL_ERR | SC_EXIT_ERR,
+                                "readlink: %s", strerror(errno));
+                        if (strcmp(symbuf, uuiddir) != 0) {
+                                logprint(SC_SL_ERR,
+                                    "Symbolic link %s already exists but "
+                                    "specifies a wrong UUID directory, "
+                                    "new symbolic link will be created "
+                                    "instead", boundslink);
+                                (void) unlink(boundslink);
+                                if (symlink(uuiddir, boundslink) != 0)
+                                        logprint(SC_SL_ERR | SC_EXIT_ERR,
+                                            "symlink: %s", strerror(errno));
+                        }
+                } else {
+                        logprint(SC_SL_ERR | SC_EXIT_ERR, "symlink: %s",
+                            strerror(errno));
+                }
+        }
+
         if (csave) {
                 size_t metrics_size = datahdr.dump_metrics;
 
                 (void) sprintf(corefile, "vmdump.%ld", bounds);
 
+                if (interactive && bounds >= 0 && access(corefile, F_OK)
+                    == 0) {
+                        skip_event = B_TRUE;
+                        logprint(SC_SL_NONE | SC_EXIT_ERR,
+                            "%s already exists: remove with "
+                            "'rm -f %s/{unix,vmcore}.%ld'",
+                            corefile, uuiddir, bounds);
+                }
+
                 datahdr.dump_metrics = 0;
 
                 logprint(SC_SL_ERR,
                     "Saving compressed system crash dump in %s/%s",
-                    savedir, corefile);
+                    uuiddir, corefile);
 
                 copy_crashfile(corefile);
 
                 /*
                  * Raise a fault management event that indicates the system

@@ -1905,26 +1894,28 @@
                 }
 
                 logprint(SC_SL_ERR,
                     "Decompress the crash dump with "
                     "\n'savecore -vf %s/%s'",
-                    savedir, corefile);
+                    uuiddir, corefile);
 
         } else {
                 (void) sprintf(namelist, "unix.%ld", bounds);
                 (void) sprintf(corefile, "vmcore.%ld", bounds);
 
-                if (interactive && filebounds >= 0 && access(corefile, F_OK)
-                    == 0)
+                if (interactive && bounds >= 0 && access(corefile, F_OK)
+                    == 0) {
+                        skip_event = B_TRUE;
                         logprint(SC_SL_NONE | SC_EXIT_ERR,
                             "%s already exists: remove with "
                             "'rm -f %s/{unix,vmcore}.%ld'",
-                            corefile, savedir, bounds);
+                            corefile, uuiddir, bounds);
+                }
 
                 logprint(SC_SL_ERR,
                     "saving system crash dump in %s/{unix,vmcore}.%ld",
-                    savedir, bounds);
+                    uuiddir, bounds);
 
                 build_corefile(namelist, corefile);
 
                 if (!livedump && !filemode && !fm_panic)
                         raise_event(SC_EVENT_DUMP_AVAILABLE, NULL);

@@ -1965,11 +1956,11 @@
                 }
         }
 
         if (filebounds < 0) {
                 (void) sprintf(boundstr, "%ld\n", bounds + 1);
-                bfd = Open("bounds", O_WRONLY | O_CREAT | O_TRUNC, 0644);
+                bfd = Open(boundsfile, O_WRONLY | O_CREAT | O_TRUNC, 0644);
                 Pwrite(bfd, boundstr, strlen(boundstr), 0);
                 (void) close(bfd);
         }
 
         if (verbose) {