1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
24 * Copyright 2017 Nexenta Systems, Inc. All rights reserved.
25 */
26
27 /*
28 * Panic software-diagnosis subsidiary
29 *
30 * We model a system panic as a defect diagnosis in FMA. When a system
31 * panicks, savecore publishes events which we subscribe to here.
32 *
33 * Our driving events are all raised by savecore, run either from
34 * startup of the dumpadm service or interactively at the command line.
35 * The following describes the logic for the handling of these events.
36 *
37 * On reboot after panic we will run savecore as part of the dumpadm
38 * service startup; we run savecore even if savecore is otherwise
39 * disabled (ie dumpadm -n in effect) - we run savecore -c to check for
40 * a valid dump and raise the initial event.
41 *
42 * If savecore (or savecore -c) observes a valid dump pending on the
43 * device, it raises a "dump_pending_on_device" event provided this
44 * was not an FMA-initiated panic (for those we will replay ereports
45 * from the dump device as usual and make a diagnosis from those; we do
46 * not need to open a case for the panic). We subscribe to the
47 * "dump_pending_on_device" event and use that to open a case; we
48 * open a case requesting the same case uuid as the panic dump image
49 * has for the OS instance uuid - if that fails because of a duplicate
50 * uuid then we have already opened a case for this panic so no need
51 * to open another.
52 *
53 * Included in the "dump_pending_on_device" event is an indication of
54 * whether or not dumpadm is enabled. If not (dumpadm -n in effect)
55 * then we do not expect any further events regarding this panic
56 * until such time as the admin runs savecore manually (if ever).
57 * So in this case we solve the case immediately after open. If/when
58 * subsequent events arrive when savecore is run manually, we will toss
59 * them.
60 *
61 * If dumpadm is enabled then savecore, run from dumpadm service startup,
62 * will attempt to process the dump - either to copy it off the dump
63 * device (if saving compressed) or to uncompress it off the dump device.
64 * If this succeeds savecore raises a "dump_available" event which
65 * includes information on the directory it was saved in, the instance
66 * number, image uuid, compressed form or not, and whether the dump
67 * was complete (as per the dumphdr). If the savecore fails for
68 * some reason then it exits and raises a "savecore_failure" event.
69 * These two events are raised even for FMA-initiated panics.
70 *
71 * We subscribe to both the "dump_available" and "savecore_failed" events,
72 * and in the handling thereof we will close the case opened earlier (if
73 * this is not an FMA-initiated panic). On receipt of the initial
74 * "dump_available" event we also arm a timer for +10 minutes if
75 * dumpadm is enabled - if no "dump_available" or "savecore_failed" arrives
76 * in that time we will solve the case on timeout.
77 *
78 * When the timer fires we check whether the initial event for each panic
79 * case was received more than 30 minutes ago; if it was we solve the case
80 * with what we have. If we're still within the waiting period we rearm
81 * for a further 10 minutes. The timer is shared by all cases that we
82 * create, which is why the fire interval is shorter than the maximum time
83 * we are prepared to wait.
84 */
85
86 #include <strings.h>
87 #include <sys/panic.h>
88 #include <zone.h>
89 #include <uuid/uuid.h>
90
91 #include "../../common/sw.h"
92 #include "panic.h"
93
94 #define MAX_STRING_LEN 160
95
96 static id_t myid;
97
98 static id_t mytimerid;
99
100 /*
101 * Our serialization structure type.
102 */
103 #define SWDE_PANIC_CASEDATA_VERS 1
104
105 typedef struct swde_panic_casedata {
106 uint32_t scd_vers; /* must be first member */
107 uint64_t scd_receive_time; /* when we first knew of this panic */
108 size_t scd_nvlbufsz; /* size of following buffer */
109 /* packed attr nvlist follows */
110 } swde_panic_casedata_t;
111
112 static struct {
113 fmd_stat_t swde_panic_diagnosed;
114 fmd_stat_t swde_panic_badclass;
115 fmd_stat_t swde_panic_noattr;
116 fmd_stat_t swde_panic_unexpected_fm_panic;
117 fmd_stat_t swde_panic_badattr;
118 fmd_stat_t swde_panic_badfmri;
119 fmd_stat_t swde_panic_noinstance;
120 fmd_stat_t swde_panic_nouuid;
121 fmd_stat_t swde_panic_dupuuid;
122 fmd_stat_t swde_panic_nocase;
123 fmd_stat_t swde_panic_notime;
124 fmd_stat_t swde_panic_nopanicstr;
125 fmd_stat_t swde_panic_nodumpdir;
126 fmd_stat_t swde_panic_nostack;
127 fmd_stat_t swde_panic_incomplete;
128 fmd_stat_t swde_panic_failed;
129 fmd_stat_t swde_panic_basecasedata;
130 fmd_stat_t swde_panic_failsrlz;
131 } swde_panic_stats = {
132 { "swde_panic_diagnosed", FMD_TYPE_UINT64,
133 "panic defects published" },
134 { "swde_panic_badclass", FMD_TYPE_UINT64,
135 "incorrect event class received" },
136 { "swde_panic_noattr", FMD_TYPE_UINT64,
137 "malformed event - missing attr nvlist" },
138 { "swde_panic_unexpected_fm_panic", FMD_TYPE_UINT64,
139 "dump available for an fm_panic()" },
140 { "swde_panic_badattr", FMD_TYPE_UINT64,
141 "malformed event - invalid attr list" },
142 { "swde_panic_badfmri", FMD_TYPE_UINT64,
143 "malformed event - fmri2str fails" },
144 { "swde_panic_noinstance", FMD_TYPE_UINT64,
145 "malformed event - no instance number" },
146 { "swde_panic_nouuid", FMD_TYPE_UINT64,
147 "malformed event - missing uuid" },
148 { "swde_panic_dupuuid", FMD_TYPE_UINT64,
149 "duplicate events received" },
150 { "swde_panic_nocase", FMD_TYPE_UINT64,
151 "case missing for uuid" },
152 { "swde_panic_notime", FMD_TYPE_UINT64,
153 "missing crash dump time" },
154 { "swde_panic_nopanicstr", FMD_TYPE_UINT64,
155 "missing panic string" },
156 { "swde_panic_nodumpdir", FMD_TYPE_UINT64,
157 "missing crashdump save directory" },
158 { "swde_panic_nostack", FMD_TYPE_UINT64,
159 "missing panic stack" },
160 { "swde_panic_incomplete", FMD_TYPE_UINT64,
161 "missing panic incomplete" },
162 { "swde_panic_failed", FMD_TYPE_UINT64,
163 "missing panic failed" },
164 { "swde_panic_badcasedata", FMD_TYPE_UINT64,
165 "bad case data during timeout" },
166 { "swde_panic_failsrlz", FMD_TYPE_UINT64,
167 "failures to serialize case data" },
168 };
169
170 #define BUMPSTAT(stat) swde_panic_stats.stat.fmds_value.ui64++
171
172 static nvlist_t *
173 panic_sw_fmri(fmd_hdl_t *hdl, char *object)
174 {
175 nvlist_t *fmri;
176 nvlist_t *sw_obj;
177 int err = 0;
178
179 fmri = fmd_nvl_alloc(hdl, FMD_SLEEP);
180 err |= nvlist_add_uint8(fmri, FM_VERSION, FM_SW_SCHEME_VERSION);
181 err |= nvlist_add_string(fmri, FM_FMRI_SCHEME, FM_FMRI_SCHEME_SW);
182
183 sw_obj = fmd_nvl_alloc(hdl, FMD_SLEEP);
184 err |= nvlist_add_string(sw_obj, FM_FMRI_SW_OBJ_PATH, object);
185 err |= nvlist_add_nvlist(fmri, FM_FMRI_SW_OBJ, sw_obj);
186 nvlist_free(sw_obj);
187 if (!err)
188 return (fmri);
189 else
190 return (0);
191 }
192
193 static const char *dumpfiles[2] = { "unix.%lld", "vmcore.%lld" };
194 static const char *dumpfiles_comp[2] = { "vmdump.%lld", NULL};
195
196 static void
197 swde_panic_solve(fmd_hdl_t *hdl, fmd_case_t *cp,
198 nvlist_t *attr, fmd_event_t *ep, boolean_t savecore_success)
199 {
200 char path[MAXPATHLEN];
201 char *dumpdir, *uuid;
202 nvlist_t *defect, *rsrc;
203 nvpair_t *nvp;
204 int i;
205
206 /*
207 * Attribute members to include in event-specific defect
208 * payload. Some attributes will not be present for some
209 * cases - e.g., if we timed out and solved the case without
210 * a "dump_available" report.
211 */
212 const char *toadd[] = {
213 "os-instance-uuid", /* same as case uuid */
214 "panicstr", /* for initial classification work */
215 "panicstack", /* for initial classification work */
216 "crashtime", /* in epoch time */
217 "panic-time", /* Formatted crash time */
218 };
219
220 if (ep != NULL)
221 fmd_case_add_ereport(hdl, cp, ep);
222 /*
223 * As a temporary solution we create and fmri in the sw scheme
224 * in panic_sw_fmri. This should become a generic fmri constructor
225 *
226 * We need to user a resource FMRI which will have a sufficiently
227 * unique string representation such that fmd will not see
228 * repeated panic diagnoses (all using the same defect class)
229 * as duplicates and discard later cases. We can't actually diagnose
230 * the panic to anything specific (e.g., a path to a module and
231 * function/line etc therein). We could pick on a generic
232 * representative such as /kernel/genunix but that could lead
233 * to misunderstanding. So we choose a path based on <dumpdir>
234 * and the OS instance UUID - "<dumpdir>/data/<uuid>".
235 */
236 (void) nvlist_lookup_string(attr, "dumpdir", &dumpdir);
237 (void) nvlist_lookup_string(attr, "os-instance-uuid", &uuid);
238 (void) snprintf(path, sizeof (path), "%s/data/%s", dumpdir, uuid);
239 rsrc = panic_sw_fmri(hdl, path);
240
241 defect = fmd_nvl_create_defect(hdl, SW_SUNOS_PANIC_DEFECT,
242 100, rsrc, NULL, rsrc);
243 nvlist_free(rsrc);
244
245 (void) nvlist_add_boolean_value(defect, "savecore-succcess",
246 savecore_success);
247
248 if (savecore_success) {
249 boolean_t compressed;
250 int64_t instance;
251 const char **pathfmts;
252 char buf[2][32];
253 int files = 0;
254 char *arr[2];
255 int i;
256
257 (void) nvlist_lookup_int64(attr, "instance", &instance);
258 (void) nvlist_lookup_boolean_value(attr, "compressed",
259 &compressed);
260
261 pathfmts = compressed ? &dumpfiles_comp[0] : &dumpfiles[0];
262
263 for (i = 0; i < 2; i++) {
264 if (pathfmts[i] == NULL) {
265 arr[i] = NULL;
266 continue;
267 }
268
269 (void) snprintf(buf[i], 32, pathfmts[i], instance);
270 arr[i] = buf[i];
271 files++;
272 }
273
274 (void) nvlist_add_string(defect, "dump-dir", dumpdir);
275 (void) nvlist_add_string_array(defect, "dump-files", arr,
276 files);
277 } else {
278 char *rsn;
279
280 if (nvlist_lookup_string(attr, "failure-reason", &rsn) == 0)
281 (void) nvlist_add_string(defect, "failure-reason", rsn);
282 }
283
284 /*
285 * Not all attributes will necessarily be available - eg if
286 * dumpadm was not enabled there'll be no instance and dumpdir.
287 */
288 for (i = 0; i < sizeof (toadd) / sizeof (toadd[0]); i++) {
289 if (nvlist_lookup_nvpair(attr, toadd[i], &nvp) == 0)
290 (void) nvlist_add_nvpair(defect, nvp);
291 }
292
293 fmd_case_add_suspect(hdl, cp, defect);
294 fmd_case_solve(hdl, cp);
295
296 /*
297 * Close the case. Do no free casedata - framework does that for us
298 * on closure callback.
299 */
300 fmd_case_close(hdl, cp);
301 BUMPSTAT(swde_panic_diagnosed);
302 }
303
304 /*ARGSUSED*/
305 static void
306 swde_panic_timeout(fmd_hdl_t *hdl, id_t timerid, void *data)
307 {
308 fmd_case_t *cp = swde_case_first(hdl, myid);
309 swde_panic_casedata_t *cdp;
310 time_t now = time(NULL);
311 nvlist_t *attr;
312 int remain = 0;
313 uint32_t vers;
314
315 while (cp != NULL) {
316 cdp = swde_case_data(hdl, cp, &vers);
317 if (vers != SWDE_PANIC_CASEDATA_VERS)
318 fmd_hdl_abort(hdl, "case data version confused\n");
319
320 if (now > cdp->scd_receive_time + 30 * 60) {
321 if (nvlist_unpack((char *)cdp + sizeof (*cdp),
322 cdp->scd_nvlbufsz, &attr, 0) == 0) {
323 swde_panic_solve(hdl, cp, attr, NULL, B_FALSE);
324 nvlist_free(attr);
325 } else {
326 BUMPSTAT(swde_panic_basecasedata);
327 fmd_case_close(hdl, cp);
328 }
329 } else {
330 remain++;
331 }
332
333
334 cp = swde_case_next(hdl, cp);
335 }
336
337 if (remain) {
338 mytimerid = sw_timer_install(hdl, myid, NULL, NULL,
339 10ULL * NANOSEC * 60);
340 }
341 }
342
343 /*
344 * Our verify entry point is called for each of our open cases during
345 * module load. We must return 0 for the case to be closed by our caller,
346 * or 1 to keep it (or if we have already closed it during this call).
347 */
348 static int
349 swde_panic_vrfy(fmd_hdl_t *hdl, fmd_case_t *cp)
350 {
351 swde_panic_casedata_t *cdp;
352 time_t now = time(NULL);
353 nvlist_t *attr;
354 uint32_t vers;
355
356 cdp = swde_case_data(hdl, cp, &vers);
357
358 if (vers != SWDE_PANIC_CASEDATA_VERS)
359 return (0); /* case will be closed */
360
361 if (now > cdp->scd_receive_time + 30 * 60) {
362 if (nvlist_unpack((char *)cdp + sizeof (*cdp),
363 cdp->scd_nvlbufsz, &attr, 0) == 0) {
364 swde_panic_solve(hdl, cp, attr, NULL, B_FALSE);
365 nvlist_free(attr);
366 return (1); /* case already closed */
367 } else {
368 return (0); /* close case */
369 }
370 }
371
372 if (mytimerid != 0)
373 mytimerid = sw_timer_install(hdl, myid,
374 NULL, NULL, 10ULL * NANOSEC * 60);
375
376 return (1); /* retain case */
377 }
378
379 /*
380 * Handler for ireport.os.sunos.panic.dump_pending_on_device.
381 *
382 * A future RFE should try adding a means of avoiding diagnosing repeated
383 * defects on panic loops, which would just add to the mayhem and potentially
384 * log lots of calls through ASR. Panics with similar enough panic
385 * strings and/or stacks should not diagnose to new defects with some
386 * period of time, for example.
387 */
388
389 /*ARGSUSED*/
390 void
391 swde_panic_detected(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl,
392 const char *class, void *arg)
393 {
394 boolean_t fm_panic, expect_savecore;
395 swde_panic_casedata_t *cdp;
396 nvlist_t *attr;
397 fmd_case_t *cp;
398 char *fmribuf;
399 char *uuid;
400 size_t sz;
401
402 fmd_hdl_debug(hdl, "swde_panic_detected\n");
403
404 if (nvlist_lookup_nvlist(nvl, FM_IREPORT_ATTRIBUTES, &attr) != 0) {
405 BUMPSTAT(swde_panic_noattr);
406 return;
407 }
408
409 if (nvlist_lookup_string(attr, "os-instance-uuid", &uuid) != 0) {
410 BUMPSTAT(swde_panic_nouuid);
411 return;
412 }
413
414 fmd_hdl_debug(hdl, "swde_panic_detected: OS instance %s\n", uuid);
415
416 if (nvlist_lookup_boolean_value(attr, "fm-panic", &fm_panic) != 0 ||
417 fm_panic == B_TRUE) {
418 BUMPSTAT(swde_panic_unexpected_fm_panic);
419 return;
420 }
421
422 /*
423 * Prepare serialization data to be associated with a new
424 * case. Our serialization data consists of a swde_panic_casedata_t
425 * structure followed by a packed nvlist of the attributes of
426 * the initial event.
427 */
428 if (nvlist_size(attr, &sz, NV_ENCODE_NATIVE) != 0) {
429 BUMPSTAT(swde_panic_failsrlz);
430 return;
431 }
432
433 cdp = fmd_hdl_zalloc(hdl, sizeof (*cdp) + sz, FMD_SLEEP);
434 fmribuf = (char *)cdp + sizeof (*cdp);
435 cdp->scd_vers = SWDE_PANIC_CASEDATA_VERS;
436 cdp->scd_receive_time = time(NULL);
437 cdp->scd_nvlbufsz = sz;
438
439 /*
440 * Open a case with UUID matching the the panicking kernel, add this
441 * event to the case.
442 */
443 if ((cp = swde_case_open(hdl, myid, uuid, SWDE_PANIC_CASEDATA_VERS,
444 cdp, sizeof (*cdp) + sz)) == NULL) {
445 BUMPSTAT(swde_panic_dupuuid);
446 fmd_hdl_debug(hdl, "swde_case_open returned NULL - dup?\n");
447 fmd_hdl_free(hdl, cdp, sizeof (*cdp) + sz);
448 return;
449 }
450
451 fmd_case_setprincipal(hdl, cp, ep);
452
453 if (nvlist_lookup_boolean_value(attr, "will-attempt-savecore",
454 &expect_savecore) != 0 || expect_savecore == B_FALSE) {
455 fmd_hdl_debug(hdl, "savecore not being attempted - "
456 "solve now\n");
457 swde_panic_solve(hdl, cp, attr, ep, B_FALSE);
458 return;
459 }
460
461 /*
462 * We expect to see either a "dump_available" or a "savecore_failed"
463 * event before too long. In case that never shows up, for whatever
464 * reason, we want to be able to solve the case anyway.
465 */
466 fmd_case_add_ereport(hdl, cp, ep);
467 (void) nvlist_pack(attr, &fmribuf, &sz, NV_ENCODE_NATIVE, 0);
468 swde_case_data_write(hdl, cp);
469
470 if (mytimerid == 0) {
471 mytimerid = sw_timer_install(hdl, myid, NULL, ep,
472 10ULL * NANOSEC * 60);
473 fmd_hdl_debug(hdl, "armed timer\n");
474 } else {
475 fmd_hdl_debug(hdl, "timer already armed\n");
476 }
477 }
478
479 /*
480 * savecore has now run and saved a crash dump to the filesystem. It is
481 * either a compressed dump (vmdump.n) or uncompressed {unix.n, vmcore.n}
482 * Savecore has raised an ireport to say the dump is there.
483 */
484
485 /*ARGSUSED*/
486 void
487 swde_panic_savecore_done(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl,
488 const char *class, void *arg)
489 {
490 boolean_t savecore_success = (arg != NULL);
491 boolean_t fm_panic;
492 nvlist_t *attr;
493 fmd_case_t *cp;
494 char *uuid;
495
496 fmd_hdl_debug(hdl, "savecore_done (%s)\n", savecore_success ?
497 "success" : "fail");
498
499 if (nvlist_lookup_nvlist(nvl, FM_IREPORT_ATTRIBUTES, &attr) != 0) {
500 BUMPSTAT(swde_panic_noattr);
501 return;
502 }
503
504 if (nvlist_lookup_boolean_value(attr, "fm-panic", &fm_panic) != 0 ||
505 fm_panic == B_TRUE) {
506 return; /* not expected, but just in case */
507 }
508
509 if (nvlist_lookup_string(attr, "os-instance-uuid", &uuid) != 0) {
510 BUMPSTAT(swde_panic_nouuid);
511 return;
512 }
513
514 /*
515 * Find the case related to the panicking kernel; our cases have
516 * the same uuid as the crashed OS image.
517 */
518 cp = fmd_case_uulookup(hdl, uuid);
519 if (!cp) {
520 /* Unable to find the case. */
521 fmd_hdl_debug(hdl, "savecore_done: can't find case for "
522 "image %s\n", uuid);
523 BUMPSTAT(swde_panic_nocase);
524 return;
525 }
526
527 fmd_hdl_debug(hdl, "savecore_done: solving case %s\n", uuid);
528 swde_panic_solve(hdl, cp, attr, ep, savecore_success);
529 }
530
531 const struct sw_disp swde_panic_disp[] = {
532 { SW_SUNOS_PANIC_DETECTED, swde_panic_detected, NULL },
533 { SW_SUNOS_PANIC_AVAIL, swde_panic_savecore_done, (void *)1 },
534 { SW_SUNOS_PANIC_FAILURE, swde_panic_savecore_done, NULL },
535 /*
536 * Something has to subscribe to every fault
537 * or defect diagnosed in fmd. We do that here, but throw it away.
538 */
539 { SW_SUNOS_PANIC_DEFECT, NULL, NULL },
540 { NULL, NULL, NULL }
541 };
542
543 /*ARGSUSED*/
544 int
545 swde_panic_init(fmd_hdl_t *hdl, id_t id, const struct sw_disp **dpp,
546 int *nelemp)
547 {
548 myid = id;
549
550 if (getzoneid() != GLOBAL_ZONEID)
551 return (SW_SUB_INIT_FAIL_VOLUNTARY);
552
553 (void) fmd_stat_create(hdl, FMD_STAT_NOALLOC,
554 sizeof (swde_panic_stats) / sizeof (fmd_stat_t),
555 (fmd_stat_t *)&swde_panic_stats);
556
557 fmd_hdl_subscribe(hdl, SW_SUNOS_PANIC_DETECTED);
558 fmd_hdl_subscribe(hdl, SW_SUNOS_PANIC_FAILURE);
559 fmd_hdl_subscribe(hdl, SW_SUNOS_PANIC_AVAIL);
560
561 *dpp = &swde_panic_disp[0];
562 *nelemp = sizeof (swde_panic_disp) / sizeof (swde_panic_disp[0]);
563 return (SW_SUB_INIT_SUCCESS);
564 }
565
566 void
567 swde_panic_fini(fmd_hdl_t *hdl)
568 {
569 if (mytimerid)
570 sw_timer_remove(hdl, myid, mytimerid);
571 }
572
573 const struct sw_subinfo panic_diag_info = {
574 "panic diagnosis", /* swsub_name */
575 SW_CASE_PANIC, /* swsub_casetype */
576 swde_panic_init, /* swsub_init */
577 swde_panic_fini, /* swsub_fini */
578 swde_panic_timeout, /* swsub_timeout */
579 NULL, /* swsub_case_close */
580 swde_panic_vrfy, /* swsub_case_vrfy */
581 };