Print this page
re #13388 rb4382 fmd_api.h uses bool which is a C99/C++ keyword
| Split |
Close |
| Expand all |
| Collapse all |
--- old/usr/src/cmd/fm/fmd/common/fmd_ckpt.c
+++ new/usr/src/cmd/fm/fmd/common/fmd_ckpt.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21
22 22 /*
23 23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
24 24 * Use is subject to license terms.
25 25 */
26 26
27 27 #include <sys/types.h>
28 28 #include <sys/mkdev.h>
29 29 #include <sys/stat.h>
30 30
31 31 #include <strings.h>
32 32 #include <unistd.h>
33 33 #include <limits.h>
34 34 #include <fcntl.h>
35 35
36 36 #include <fmd_module.h>
37 37 #include <fmd_error.h>
38 38 #include <fmd_alloc.h>
39 39 #include <fmd_case.h>
40 40 #include <fmd_serd.h>
41 41 #include <fmd_subr.h>
42 42 #include <fmd_conf.h>
43 43 #include <fmd_event.h>
44 44 #include <fmd_log.h>
45 45 #include <fmd_api.h>
46 46 #include <fmd_ckpt.h>
47 47
48 48 #include <fmd.h>
49 49
50 50 #define P2ROUNDUP(x, align) (-(-(x) & -(align)))
51 51 #define IS_P2ALIGNED(v, a) ((((uintptr_t)(v)) & ((uintptr_t)(a) - 1)) == 0)
52 52
53 53 /*
54 54 * The fmd_ckpt_t structure is used to manage all of the state needed by the
55 55 * various subroutines that save and restore checkpoints. The structure is
56 56 * initialized using fmd_ckpt_create() or fmd_ckpt_open() and is destroyed
57 57 * by fmd_ckpt_destroy(). Refer to the subroutines below for more details.
58 58 */
59 59 typedef struct fmd_ckpt {
60 60 char ckp_src[PATH_MAX]; /* ckpt input or output filename */
61 61 char ckp_dst[PATH_MAX]; /* ckpt rename filename */
62 62 uchar_t *ckp_buf; /* data buffer base address */
63 63 fcf_hdr_t *ckp_hdr; /* file header pointer */
64 64 uchar_t *ckp_ptr; /* data buffer pointer */
65 65 size_t ckp_size; /* data buffer size */
66 66 fcf_sec_t *ckp_secp; /* section header table pointer */
67 67 fcf_sec_t *ckp_modp; /* section header for module */
68 68 uint_t ckp_secs; /* number of sections */
69 69 char *ckp_strs; /* string table base pointer */
70 70 char *ckp_strp; /* string table pointer */
71 71 size_t ckp_strn; /* string table size */
72 72 int ckp_fd; /* output descriptor */
73 73 fmd_module_t *ckp_mp; /* checkpoint module */
74 74 void *ckp_arg; /* private arg for callbacks */
75 75 } fmd_ckpt_t;
76 76
77 77 typedef struct fmd_ckpt_desc {
78 78 uint64_t secd_size; /* minimum section size */
79 79 uint32_t secd_entsize; /* minimum section entry size */
80 80 uint32_t secd_align; /* section alignment */
81 81 } fmd_ckpt_desc_t;
82 82
83 83 /*
84 84 * Table of FCF section descriptions. Here we record the minimum size for each
85 85 * section (for use during restore) and the expected entry size and alignment
86 86 * for each section (for use during both checkpoint and restore).
87 87 */
88 88 static const fmd_ckpt_desc_t _fmd_ckpt_sections[] = {
89 89 { 0, 0, sizeof (uint8_t) }, /* NONE */
90 90 { 1, 0, sizeof (char) }, /* STRTAB */
91 91 { sizeof (fcf_module_t), 0, sizeof (uint32_t) }, /* MODULE */
92 92 { sizeof (fcf_case_t), 0, sizeof (uint32_t) }, /* CASE */
93 93 { sizeof (fcf_buf_t), sizeof (fcf_buf_t), sizeof (uint32_t) }, /* BUFS */
94 94 { 0, 0, _MAX_ALIGNMENT }, /* BUFFER */
95 95 { sizeof (fcf_serd_t), sizeof (fcf_serd_t), sizeof (uint64_t) }, /* SERD */
96 96 { sizeof (fcf_event_t), sizeof (fcf_event_t), sizeof (uint64_t) }, /* EVENTS */
97 97 { sizeof (fcf_nvl_t), sizeof (fcf_nvl_t), sizeof (uint64_t) }, /* NVLISTS */
98 98 };
99 99
100 100 static int
101 101 fmd_ckpt_create(fmd_ckpt_t *ckp, fmd_module_t *mp)
102 102 {
103 103 const char *dir = mp->mod_ckpt;
104 104 const char *name = mp->mod_name;
105 105 mode_t mode;
106 106
107 107 bzero(ckp, sizeof (fmd_ckpt_t));
108 108 ckp->ckp_mp = mp;
109 109
110 110 ckp->ckp_size = sizeof (fcf_hdr_t);
111 111 ckp->ckp_strn = 1; /* for \0 */
112 112
113 113 (void) snprintf(ckp->ckp_src, PATH_MAX, "%s/%s+", dir, name);
114 114 (void) snprintf(ckp->ckp_dst, PATH_MAX, "%s/%s", dir, name);
115 115
116 116 (void) unlink(ckp->ckp_src);
117 117 (void) fmd_conf_getprop(fmd.d_conf, "ckpt.mode", &mode);
118 118 ckp->ckp_fd = open64(ckp->ckp_src, O_WRONLY | O_CREAT | O_EXCL, mode);
119 119
120 120 return (ckp->ckp_fd);
121 121 }
122 122
123 123 /*PRINTFLIKE2*/
124 124 static int
125 125 fmd_ckpt_inval(fmd_ckpt_t *ckp, const char *format, ...)
126 126 {
127 127 va_list ap;
128 128
129 129 va_start(ap, format);
130 130 fmd_verror(EFMD_CKPT_INVAL, format, ap);
131 131 va_end(ap);
132 132
133 133 fmd_free(ckp->ckp_buf, ckp->ckp_size);
134 134 return (fmd_set_errno(EFMD_CKPT_INVAL));
135 135 }
136 136
137 137 static int
138 138 fmd_ckpt_open(fmd_ckpt_t *ckp, fmd_module_t *mp)
139 139 {
140 140 struct stat64 st;
141 141 uint64_t seclen;
142 142 uint_t i;
143 143 int err;
144 144
145 145 bzero(ckp, sizeof (fmd_ckpt_t));
146 146 ckp->ckp_mp = mp;
147 147
148 148 (void) snprintf(ckp->ckp_src, PATH_MAX, "%s/%s",
149 149 mp->mod_ckpt, mp->mod_name);
150 150
151 151 if ((ckp->ckp_fd = open(ckp->ckp_src, O_RDONLY)) == -1)
152 152 return (-1); /* failed to open checkpoint file */
153 153
154 154 if (fstat64(ckp->ckp_fd, &st) == -1) {
155 155 err = errno;
156 156 (void) close(ckp->ckp_fd);
157 157 return (fmd_set_errno(err));
158 158 }
159 159
160 160 ckp->ckp_buf = fmd_alloc(st.st_size, FMD_SLEEP);
161 161 ckp->ckp_hdr = (void *)ckp->ckp_buf;
162 162 ckp->ckp_size = read(ckp->ckp_fd, ckp->ckp_buf, st.st_size);
163 163
164 164 if (ckp->ckp_size != st.st_size || ckp->ckp_size < sizeof (fcf_hdr_t) ||
165 165 ckp->ckp_size != ckp->ckp_hdr->fcfh_filesz) {
166 166 err = ckp->ckp_size == (size_t)-1L ? errno : EFMD_CKPT_SHORT;
167 167 fmd_free(ckp->ckp_buf, st.st_size);
168 168 (void) close(ckp->ckp_fd);
169 169 return (fmd_set_errno(err));
170 170 }
171 171
172 172 (void) close(ckp->ckp_fd);
173 173 ckp->ckp_fd = -1;
174 174
175 175 /*
176 176 * Once we've read in a consistent copy of the FCF file and we're sure
177 177 * the header can be accessed, go through it and make sure everything
178 178 * is valid. We also check that unused bits are zero so we can expand
179 179 * to use them safely in the future and support old files if needed.
180 180 */
181 181 if (bcmp(&ckp->ckp_hdr->fcfh_ident[FCF_ID_MAG0],
182 182 FCF_MAG_STRING, FCF_MAG_STRLEN) != 0)
183 183 return (fmd_ckpt_inval(ckp, "bad checkpoint magic string\n"));
184 184
185 185 if (ckp->ckp_hdr->fcfh_ident[FCF_ID_MODEL] != FCF_MODEL_NATIVE)
186 186 return (fmd_ckpt_inval(ckp, "bad checkpoint data model\n"));
187 187
188 188 if (ckp->ckp_hdr->fcfh_ident[FCF_ID_ENCODING] != FCF_ENCODE_NATIVE)
189 189 return (fmd_ckpt_inval(ckp, "bad checkpoint data encoding\n"));
190 190
191 191 if (ckp->ckp_hdr->fcfh_ident[FCF_ID_VERSION] != FCF_VERSION_1) {
192 192 return (fmd_ckpt_inval(ckp, "bad checkpoint version %u\n",
193 193 ckp->ckp_hdr->fcfh_ident[FCF_ID_VERSION]));
194 194 }
195 195
196 196 for (i = FCF_ID_PAD; i < FCF_ID_SIZE; i++) {
197 197 if (ckp->ckp_hdr->fcfh_ident[i] != 0) {
198 198 return (fmd_ckpt_inval(ckp,
199 199 "bad checkpoint padding at id[%d]", i));
200 200 }
201 201 }
202 202
203 203 if (ckp->ckp_hdr->fcfh_flags & ~FCF_FL_VALID)
204 204 return (fmd_ckpt_inval(ckp, "bad checkpoint flags\n"));
205 205
206 206 if (ckp->ckp_hdr->fcfh_pad != 0)
207 207 return (fmd_ckpt_inval(ckp, "reserved field in use\n"));
208 208
209 209 if (ckp->ckp_hdr->fcfh_hdrsize < sizeof (fcf_hdr_t) ||
210 210 ckp->ckp_hdr->fcfh_secsize < sizeof (fcf_sec_t)) {
211 211 return (fmd_ckpt_inval(ckp,
212 212 "bad header and/or section size\n"));
213 213 }
214 214
215 215 seclen = (uint64_t)ckp->ckp_hdr->fcfh_secnum *
216 216 (uint64_t)ckp->ckp_hdr->fcfh_secsize;
217 217
218 218 if (ckp->ckp_hdr->fcfh_secoff > ckp->ckp_size ||
219 219 seclen > ckp->ckp_size ||
220 220 ckp->ckp_hdr->fcfh_secoff + seclen > ckp->ckp_size ||
221 221 ckp->ckp_hdr->fcfh_secoff + seclen < ckp->ckp_hdr->fcfh_secoff)
222 222 return (fmd_ckpt_inval(ckp, "truncated section headers\n"));
223 223
224 224 if (!IS_P2ALIGNED(ckp->ckp_hdr->fcfh_secoff, sizeof (uint64_t)) ||
225 225 !IS_P2ALIGNED(ckp->ckp_hdr->fcfh_secsize, sizeof (uint64_t)))
226 226 return (fmd_ckpt_inval(ckp, "misaligned section headers\n"));
227 227
228 228 /*
229 229 * Once the header is validated, iterate over the section headers
230 230 * ensuring that each one is valid w.r.t. offset, alignment, and size.
231 231 * We also pick up the string table pointer during this pass.
232 232 */
233 233 ckp->ckp_secp = (void *)(ckp->ckp_buf + ckp->ckp_hdr->fcfh_secoff);
234 234 ckp->ckp_secs = ckp->ckp_hdr->fcfh_secnum;
235 235
236 236 for (i = 0; i < ckp->ckp_secs; i++) {
237 237 fcf_sec_t *sp = (void *)(ckp->ckp_buf +
238 238 ckp->ckp_hdr->fcfh_secoff + ckp->ckp_hdr->fcfh_secsize * i);
239 239
240 240 const fmd_ckpt_desc_t *dp = &_fmd_ckpt_sections[sp->fcfs_type];
241 241
242 242 if (sp->fcfs_flags != 0) {
243 243 return (fmd_ckpt_inval(ckp, "section %u has invalid "
244 244 "section flags (0x%x)\n", i, sp->fcfs_flags));
245 245 }
246 246
247 247 if (sp->fcfs_align & (sp->fcfs_align - 1)) {
248 248 return (fmd_ckpt_inval(ckp, "section %u has invalid "
249 249 "alignment (%u)\n", i, sp->fcfs_align));
250 250 }
251 251
252 252 if (sp->fcfs_offset & (sp->fcfs_align - 1)) {
253 253 return (fmd_ckpt_inval(ckp, "section %u is not properly"
254 254 " aligned (offset %llu)\n", i, sp->fcfs_offset));
255 255 }
256 256
257 257 if (sp->fcfs_entsize != 0 &&
258 258 (sp->fcfs_entsize & (sp->fcfs_align - 1)) != 0) {
259 259 return (fmd_ckpt_inval(ckp, "section %u has misaligned "
260 260 "entsize %u\n", i, sp->fcfs_entsize));
261 261 }
262 262
263 263 if (sp->fcfs_offset > ckp->ckp_size ||
264 264 sp->fcfs_size > ckp->ckp_size ||
265 265 sp->fcfs_offset + sp->fcfs_size > ckp->ckp_size ||
266 266 sp->fcfs_offset + sp->fcfs_size < sp->fcfs_offset) {
267 267 return (fmd_ckpt_inval(ckp, "section %u has corrupt "
268 268 "size or offset\n", i));
269 269 }
270 270
271 271 if (sp->fcfs_type >= sizeof (_fmd_ckpt_sections) /
272 272 sizeof (_fmd_ckpt_sections[0])) {
273 273 return (fmd_ckpt_inval(ckp, "section %u has unknown "
274 274 "section type %u\n", i, sp->fcfs_type));
275 275 }
276 276
277 277 if (sp->fcfs_align != dp->secd_align) {
278 278 return (fmd_ckpt_inval(ckp, "section %u has align %u "
279 279 "(not %u)\n", i, sp->fcfs_align, dp->secd_align));
280 280 }
281 281
282 282 if (sp->fcfs_size < dp->secd_size ||
283 283 sp->fcfs_entsize < dp->secd_entsize) {
284 284 return (fmd_ckpt_inval(ckp, "section %u has short "
285 285 "size or entsize\n", i));
286 286 }
287 287
288 288 switch (sp->fcfs_type) {
289 289 case FCF_SECT_STRTAB:
290 290 if (ckp->ckp_strs != NULL) {
291 291 return (fmd_ckpt_inval(ckp, "multiple string "
292 292 "tables are present in checkpoint file\n"));
293 293 }
294 294
295 295 ckp->ckp_strs = (char *)ckp->ckp_buf + sp->fcfs_offset;
296 296 ckp->ckp_strn = sp->fcfs_size;
297 297
298 298 if (ckp->ckp_strs[ckp->ckp_strn - 1] != '\0') {
299 299 return (fmd_ckpt_inval(ckp, "string table %u "
300 300 "is missing terminating nul byte\n", i));
301 301 }
302 302 break;
303 303
304 304 case FCF_SECT_MODULE:
305 305 if (ckp->ckp_modp != NULL) {
306 306 return (fmd_ckpt_inval(ckp, "multiple module "
307 307 "sects are present in checkpoint file\n"));
308 308 }
309 309 ckp->ckp_modp = sp;
310 310 break;
311 311 }
312 312 }
313 313
314 314 /*
315 315 * Ensure that the first section is an empty one of type FCF_SECT_NONE.
316 316 * This is done to ensure that links can use index 0 as a null section.
317 317 */
318 318 if (ckp->ckp_secs == 0 || ckp->ckp_secp->fcfs_type != FCF_SECT_NONE ||
319 319 ckp->ckp_secp->fcfs_entsize != 0 || ckp->ckp_secp->fcfs_size != 0) {
320 320 return (fmd_ckpt_inval(ckp, "section 0 is not of the "
321 321 "appropriate size and/or attributes (SECT_NONE)\n"));
322 322 }
323 323
324 324 if (ckp->ckp_modp == NULL) {
325 325 return (fmd_ckpt_inval(ckp,
326 326 "no module section found in file\n"));
327 327 }
328 328
329 329 return (0);
330 330 }
331 331
332 332 static void
333 333 fmd_ckpt_destroy(fmd_ckpt_t *ckp)
334 334 {
335 335 if (ckp->ckp_buf != NULL)
336 336 fmd_free(ckp->ckp_buf, ckp->ckp_size);
337 337 if (ckp->ckp_fd >= 0)
338 338 (void) close(ckp->ckp_fd);
339 339 }
340 340
341 341 /*
342 342 * fmd_ckpt_error() is used as a wrapper around fmd_error() for ckpt routines.
343 343 * It calls fmd_module_unlock() on behalf of its caller, logs the error, and
344 344 * then aborts the API call and the surrounding module entry point by doing an
345 345 * fmd_module_abort(), which longjmps to the place where we entered the module.
346 346 * Depending on the type of error and conf settings, we will reset or fail.
347 347 */
348 348 /*PRINTFLIKE3*/
349 349 static void
350 350 fmd_ckpt_error(fmd_ckpt_t *ckp, int err, const char *format, ...)
351 351 {
352 352 fmd_module_t *mp = ckp->ckp_mp;
353 353 va_list ap;
354 354
355 355 va_start(ap, format);
356 356 fmd_verror(err, format, ap);
357 357 va_end(ap);
358 358
359 359 if (fmd_module_locked(mp))
360 360 fmd_module_unlock(mp);
361 361
362 362 fmd_ckpt_destroy(ckp);
363 363 fmd_module_abort(mp, err);
364 364 }
365 365
366 366 static fcf_secidx_t
367 367 fmd_ckpt_section(fmd_ckpt_t *ckp, const void *data, uint_t type, uint64_t size)
368 368 {
369 369 const fmd_ckpt_desc_t *dp;
370 370
371 371 ASSERT(type < sizeof (_fmd_ckpt_sections) / sizeof (fmd_ckpt_desc_t));
372 372 dp = &_fmd_ckpt_sections[type];
373 373
374 374 ckp->ckp_ptr = (uchar_t *)
375 375 P2ROUNDUP((uintptr_t)ckp->ckp_ptr, dp->secd_align);
376 376
377 377 ckp->ckp_secp->fcfs_type = type;
378 378 ckp->ckp_secp->fcfs_align = dp->secd_align;
379 379 ckp->ckp_secp->fcfs_flags = 0;
380 380 ckp->ckp_secp->fcfs_entsize = dp->secd_entsize;
381 381 ckp->ckp_secp->fcfs_offset = (size_t)(ckp->ckp_ptr - ckp->ckp_buf);
382 382 ckp->ckp_secp->fcfs_size = size;
383 383
384 384 /*
385 385 * If the data pointer is non-NULL, copy the data to our buffer; else
386 386 * the caller is responsible for doing so and updating ckp->ckp_ptr.
387 387 */
388 388 if (data != NULL) {
389 389 bcopy(data, ckp->ckp_ptr, size);
390 390 ckp->ckp_ptr += size;
391 391 }
392 392
393 393 ckp->ckp_secp++;
394 394 return (ckp->ckp_secs++);
395 395 }
396 396
397 397 static fcf_stridx_t
398 398 fmd_ckpt_string(fmd_ckpt_t *ckp, const char *s)
399 399 {
400 400 fcf_stridx_t idx = (fcf_stridx_t)(ckp->ckp_strp - ckp->ckp_strs);
401 401
402 402 (void) strcpy(ckp->ckp_strp, s);
403 403 ckp->ckp_strp += strlen(s) + 1;
404 404
405 405 return (idx);
406 406 }
407 407
408 408 static int
409 409 fmd_ckpt_alloc(fmd_ckpt_t *ckp, uint64_t gen)
410 410 {
411 411 /*
412 412 * We've added up all the sections by now: add two more for SECT_NONE
413 413 * and SECT_STRTAB, and add the size of the section header table and
414 414 * string table to the total size. We know that the fcf_hdr_t is
415 415 * aligned so that that fcf_sec_t's can follow it, and that fcf_sec_t
416 416 * is aligned so that any section can follow it, so no extra padding
417 417 * bytes need to be allocated between any of these items.
418 418 */
419 419 ckp->ckp_secs += 2; /* for FCF_SECT_NONE and FCF_SECT_STRTAB */
420 420 ckp->ckp_size += sizeof (fcf_sec_t) * ckp->ckp_secs;
421 421 ckp->ckp_size += ckp->ckp_strn;
422 422
423 423 TRACE((FMD_DBG_CKPT, "alloc fcf buf size %u", ckp->ckp_size));
424 424 ckp->ckp_buf = fmd_zalloc(ckp->ckp_size, FMD_NOSLEEP);
425 425
426 426 if (ckp->ckp_buf == NULL)
427 427 return (-1); /* errno is set for us */
428 428
429 429 ckp->ckp_hdr = (void *)ckp->ckp_buf;
430 430
431 431 ckp->ckp_hdr->fcfh_ident[FCF_ID_MAG0] = FCF_MAG_MAG0;
432 432 ckp->ckp_hdr->fcfh_ident[FCF_ID_MAG1] = FCF_MAG_MAG1;
433 433 ckp->ckp_hdr->fcfh_ident[FCF_ID_MAG2] = FCF_MAG_MAG2;
434 434 ckp->ckp_hdr->fcfh_ident[FCF_ID_MAG3] = FCF_MAG_MAG3;
435 435 ckp->ckp_hdr->fcfh_ident[FCF_ID_MODEL] = FCF_MODEL_NATIVE;
436 436 ckp->ckp_hdr->fcfh_ident[FCF_ID_ENCODING] = FCF_ENCODE_NATIVE;
437 437 ckp->ckp_hdr->fcfh_ident[FCF_ID_VERSION] = FCF_VERSION;
438 438
439 439 ckp->ckp_hdr->fcfh_hdrsize = sizeof (fcf_hdr_t);
440 440 ckp->ckp_hdr->fcfh_secsize = sizeof (fcf_sec_t);
441 441 ckp->ckp_hdr->fcfh_secnum = ckp->ckp_secs;
442 442 ckp->ckp_hdr->fcfh_secoff = sizeof (fcf_hdr_t);
443 443 ckp->ckp_hdr->fcfh_filesz = ckp->ckp_size;
444 444 ckp->ckp_hdr->fcfh_cgen = gen;
445 445
446 446 ckp->ckp_secs = 0; /* reset section counter for second pass */
447 447 ckp->ckp_secp = (void *)(ckp->ckp_buf + sizeof (fcf_hdr_t));
448 448 ckp->ckp_strs = (char *)ckp->ckp_buf + ckp->ckp_size - ckp->ckp_strn;
449 449 ckp->ckp_strp = ckp->ckp_strs + 1; /* use first byte as \0 */
450 450 ckp->ckp_ptr = (uchar_t *)(ckp->ckp_secp + ckp->ckp_hdr->fcfh_secnum);
451 451
452 452 (void) fmd_ckpt_section(ckp, NULL, FCF_SECT_NONE, 0);
453 453 return (0);
454 454 }
455 455
456 456 static int
457 457 fmd_ckpt_commit(fmd_ckpt_t *ckp)
458 458 {
459 459 fcf_sec_t *secbase = (void *)(ckp->ckp_buf + sizeof (fcf_hdr_t));
460 460 size_t stroff = ckp->ckp_size - ckp->ckp_strn;
461 461
462 462 /*
463 463 * Before committing the checkpoint, we assert that fmd_ckpt_t's sizes
464 464 * and current pointer locations all add up appropriately. Any ASSERTs
465 465 * which trip here likely indicate an inconsistency in the code for the
466 466 * reservation pass and the buffer update pass of the FCF subroutines.
467 467 */
468 468 ASSERT((size_t)(ckp->ckp_ptr - ckp->ckp_buf) == stroff);
469 469 (void) fmd_ckpt_section(ckp, NULL, FCF_SECT_STRTAB, ckp->ckp_strn);
470 470 ckp->ckp_ptr += ckp->ckp_strn; /* string table is already filled in */
471 471
472 472 ASSERT(ckp->ckp_secs == ckp->ckp_hdr->fcfh_secnum);
473 473 ASSERT(ckp->ckp_secp == secbase + ckp->ckp_hdr->fcfh_secnum);
474 474 ASSERT(ckp->ckp_ptr == ckp->ckp_buf + ckp->ckp_hdr->fcfh_filesz);
475 475
476 476 if (write(ckp->ckp_fd, ckp->ckp_buf, ckp->ckp_size) != ckp->ckp_size ||
477 477 fsync(ckp->ckp_fd) != 0 || close(ckp->ckp_fd) != 0)
478 478 return (-1); /* errno is set for us */
479 479
480 480 ckp->ckp_fd = -1; /* fd is now closed */
481 481 return (rename(ckp->ckp_src, ckp->ckp_dst) != 0);
482 482 }
483 483
484 484 static void
485 485 fmd_ckpt_resv(fmd_ckpt_t *ckp, size_t size, size_t align)
486 486 {
487 487 if (size != 0) {
488 488 ckp->ckp_size = P2ROUNDUP(ckp->ckp_size, align) + size;
489 489 ckp->ckp_secs++;
490 490 }
491 491 }
492 492
493 493 static void
494 494 fmd_ckpt_resv_buf(fmd_buf_t *bp, fmd_ckpt_t *ckp)
495 495 {
496 496 ckp->ckp_size = P2ROUNDUP(ckp->ckp_size, _MAX_ALIGNMENT) + bp->buf_size;
497 497 ckp->ckp_strn += strlen(bp->buf_name) + 1;
498 498 ckp->ckp_secs++;
499 499 }
500 500
501 501 static void
502 502 fmd_ckpt_save_buf(fmd_buf_t *bp, fmd_ckpt_t *ckp)
503 503 {
504 504 fcf_buf_t *fcfb = ckp->ckp_arg;
505 505
506 506 fcfb->fcfb_name = fmd_ckpt_string(ckp, bp->buf_name);
507 507 fcfb->fcfb_data = fmd_ckpt_section(ckp,
508 508 bp->buf_data, FCF_SECT_BUFFER, bp->buf_size);
509 509
510 510 ckp->ckp_arg = fcfb + 1;
511 511 }
512 512
513 513 static void
514 514 fmd_ckpt_save_event(fmd_ckpt_t *ckp, fmd_event_t *e)
515 515 {
516 516 fcf_event_t *fcfe = (void *)ckp->ckp_ptr;
517 517 fmd_event_impl_t *ep = (fmd_event_impl_t *)e;
518 518 fmd_log_t *lp = ep->ev_log;
519 519
520 520 fcfe->fcfe_todsec = ep->ev_time.ftv_sec;
521 521 fcfe->fcfe_todnsec = ep->ev_time.ftv_nsec;
522 522 fcfe->fcfe_major = lp ? major(lp->log_stat.st_dev) : -1U;
523 523 fcfe->fcfe_minor = lp ? minor(lp->log_stat.st_dev) : -1U;
524 524 fcfe->fcfe_inode = lp ? lp->log_stat.st_ino : -1ULL;
525 525 fcfe->fcfe_offset = ep->ev_off;
526 526
527 527 ckp->ckp_ptr += sizeof (fcf_event_t);
528 528 }
529 529
530 530 static void
531 531 fmd_ckpt_save_nvlist(fmd_ckpt_t *ckp, nvlist_t *nvl)
532 532 {
533 533 fcf_nvl_t *fcfn = (void *)ckp->ckp_ptr;
534 534 char *nvbuf = (char *)ckp->ckp_ptr + sizeof (fcf_nvl_t);
535 535 size_t nvsize = 0;
536 536
537 537 (void) nvlist_size(nvl, &nvsize, NV_ENCODE_NATIVE);
538 538 fcfn->fcfn_size = (uint64_t)nvsize;
539 539
540 540 (void) nvlist_pack(nvl, &nvbuf, &nvsize, NV_ENCODE_NATIVE, 0);
541 541 ckp->ckp_ptr += sizeof (fcf_nvl_t) + nvsize;
542 542
543 543 ckp->ckp_ptr = (uchar_t *)
544 544 P2ROUNDUP((uintptr_t)ckp->ckp_ptr, sizeof (uint64_t));
545 545 }
546 546
547 547 static void
548 548 fmd_ckpt_resv_serd(fmd_serd_eng_t *sgp, fmd_ckpt_t *ckp)
549 549 {
550 550 fmd_ckpt_resv(ckp,
551 551 sizeof (fcf_event_t) * sgp->sg_count, sizeof (uint64_t));
552 552
553 553 ckp->ckp_strn += strlen(sgp->sg_name) + 1;
554 554 }
555 555
556 556 static void
557 557 fmd_ckpt_save_serd(fmd_serd_eng_t *sgp, fmd_ckpt_t *ckp)
558 558 {
559 559 fcf_serd_t *fcfd = ckp->ckp_arg;
560 560 fcf_secidx_t evsec = FCF_SECT_NONE;
561 561 fmd_serd_elem_t *sep;
562 562
563 563 if (sgp->sg_count != 0) {
564 564 evsec = fmd_ckpt_section(ckp, NULL, FCF_SECT_EVENTS,
565 565 sizeof (fcf_event_t) * sgp->sg_count);
566 566
567 567 for (sep = fmd_list_next(&sgp->sg_list);
568 568 sep != NULL; sep = fmd_list_next(sep))
569 569 fmd_ckpt_save_event(ckp, sep->se_event);
570 570 }
571 571
572 572 fcfd->fcfd_name = fmd_ckpt_string(ckp, sgp->sg_name);
573 573 fcfd->fcfd_events = evsec;
574 574 fcfd->fcfd_pad = 0;
575 575 fcfd->fcfd_n = sgp->sg_n;
576 576 fcfd->fcfd_t = sgp->sg_t;
577 577
578 578 ckp->ckp_arg = fcfd + 1;
579 579 }
580 580
581 581 static void
582 582 fmd_ckpt_resv_case(fmd_ckpt_t *ckp, fmd_case_t *cp)
583 583 {
584 584 fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
585 585 fmd_case_susp_t *cis;
586 586 uint_t n;
587 587
588 588 if (cip->ci_xprt != NULL)
589 589 return; /* do not checkpoint cases from remote transports */
590 590
591 591 n = fmd_buf_hash_count(&cip->ci_bufs);
592 592 fmd_buf_hash_apply(&cip->ci_bufs, (fmd_buf_f *)fmd_ckpt_resv_buf, ckp);
593 593 fmd_ckpt_resv(ckp, sizeof (fcf_buf_t) * n, sizeof (uint32_t));
594 594
595 595 if (cip->ci_principal != NULL)
596 596 fmd_ckpt_resv(ckp, sizeof (fcf_event_t), sizeof (uint64_t));
597 597
598 598 fmd_ckpt_resv(ckp,
599 599 sizeof (fcf_event_t) * cip->ci_nitems, sizeof (uint64_t));
600 600
601 601 if (cip->ci_nsuspects != 0)
602 602 ckp->ckp_size = P2ROUNDUP(ckp->ckp_size, sizeof (uint64_t));
603 603
604 604 cip->ci_nvsz = 0; /* compute size of packed suspect nvlist array */
605 605
606 606 for (cis = cip->ci_suspects; cis != NULL; cis = cis->cis_next) {
607 607 size_t nvsize = 0;
608 608
609 609 (void) nvlist_size(cis->cis_nvl, &nvsize, NV_ENCODE_NATIVE);
610 610 cip->ci_nvsz += sizeof (fcf_nvl_t) + nvsize;
611 611 cip->ci_nvsz = P2ROUNDUP(cip->ci_nvsz, sizeof (uint64_t));
612 612 }
613 613
614 614 fmd_ckpt_resv(ckp, cip->ci_nvsz, sizeof (uint64_t));
615 615 fmd_ckpt_resv(ckp, sizeof (fcf_case_t), sizeof (uint32_t));
616 616 ckp->ckp_strn += strlen(cip->ci_uuid) + 1;
617 617 }
618 618
619 619 static void
620 620 fmd_ckpt_save_case(fmd_ckpt_t *ckp, fmd_case_t *cp)
621 621 {
622 622 fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
623 623
624 624 fmd_case_item_t *cit;
625 625 fmd_case_susp_t *cis;
626 626 fcf_case_t fcfc;
627 627 uint_t n;
628 628
629 629 fcf_secidx_t bufsec = FCF_SECIDX_NONE;
630 630 fcf_secidx_t evsec = FCF_SECIDX_NONE;
631 631 fcf_secidx_t nvsec = FCF_SECIDX_NONE;
632 632 fcf_secidx_t prsec = FCF_SECIDX_NONE;
633 633
634 634 if (cip->ci_xprt != NULL)
635 635 return; /* do not checkpoint cases from remote transports */
636 636
637 637 if ((n = fmd_buf_hash_count(&cip->ci_bufs)) != 0) {
638 638 size_t size = sizeof (fcf_buf_t) * n;
639 639 fcf_buf_t *bufs = ckp->ckp_arg = fmd_alloc(size, FMD_SLEEP);
640 640
641 641 fmd_buf_hash_apply(&cip->ci_bufs,
642 642 (fmd_buf_f *)fmd_ckpt_save_buf, ckp);
643 643
644 644 bufsec = fmd_ckpt_section(ckp, bufs, FCF_SECT_BUFS, size);
645 645 fmd_free(bufs, size);
646 646 }
647 647
648 648 if (cip->ci_principal != NULL) {
649 649 prsec = fmd_ckpt_section(ckp, NULL, FCF_SECT_EVENTS,
650 650 sizeof (fcf_event_t));
651 651
652 652 fmd_ckpt_save_event(ckp, cip->ci_principal);
653 653 }
654 654
655 655 if (cip->ci_nitems != 0) {
656 656 evsec = fmd_ckpt_section(ckp, NULL, FCF_SECT_EVENTS,
657 657 sizeof (fcf_event_t) * cip->ci_nitems);
658 658
659 659 for (cit = cip->ci_items; cit != NULL; cit = cit->cit_next)
660 660 fmd_ckpt_save_event(ckp, cit->cit_event);
661 661 }
662 662
663 663 if (cip->ci_nsuspects != 0) {
664 664 nvsec = fmd_ckpt_section(ckp, NULL,
665 665 FCF_SECT_NVLISTS, cip->ci_nvsz);
666 666
667 667 for (cis = cip->ci_suspects; cis != NULL; cis = cis->cis_next)
668 668 fmd_ckpt_save_nvlist(ckp, cis->cis_nvl);
669 669 }
670 670
671 671 fcfc.fcfc_uuid = fmd_ckpt_string(ckp, cip->ci_uuid);
672 672 fcfc.fcfc_bufs = bufsec;
673 673 fcfc.fcfc_principal = prsec;
674 674 fcfc.fcfc_events = evsec;
675 675 fcfc.fcfc_suspects = nvsec;
676 676
677 677 switch (cip->ci_state) {
678 678 case FMD_CASE_UNSOLVED:
679 679 fcfc.fcfc_state = FCF_CASE_UNSOLVED;
680 680 break;
681 681 case FMD_CASE_SOLVED:
682 682 fcfc.fcfc_state = FCF_CASE_SOLVED;
683 683 break;
684 684 case FMD_CASE_CLOSE_WAIT:
685 685 fcfc.fcfc_state = FCF_CASE_CLOSE_WAIT;
686 686 break;
687 687 default:
688 688 fmd_panic("case %p (%s) has invalid state %u",
689 689 (void *)cp, cip->ci_uuid, cip->ci_state);
690 690 }
691 691
692 692 (void) fmd_ckpt_section(ckp, &fcfc, FCF_SECT_CASE, sizeof (fcf_case_t));
693 693 }
694 694
695 695 static void
696 696 fmd_ckpt_resv_module(fmd_ckpt_t *ckp, fmd_module_t *mp)
697 697 {
698 698 fmd_case_t *cp;
699 699 uint_t n;
700 700
701 701 for (cp = fmd_list_next(&mp->mod_cases); cp; cp = fmd_list_next(cp))
702 702 fmd_ckpt_resv_case(ckp, cp);
703 703
704 704 n = fmd_serd_hash_count(&mp->mod_serds);
705 705 fmd_serd_hash_apply(&mp->mod_serds,
706 706 (fmd_serd_eng_f *)fmd_ckpt_resv_serd, ckp);
707 707 fmd_ckpt_resv(ckp, sizeof (fcf_serd_t) * n, sizeof (uint64_t));
708 708
709 709 n = fmd_buf_hash_count(&mp->mod_bufs);
710 710 fmd_buf_hash_apply(&mp->mod_bufs, (fmd_buf_f *)fmd_ckpt_resv_buf, ckp);
711 711 fmd_ckpt_resv(ckp, sizeof (fcf_buf_t) * n, sizeof (uint32_t));
712 712
713 713 fmd_ckpt_resv(ckp, sizeof (fcf_module_t), sizeof (uint32_t));
714 714 ckp->ckp_strn += strlen(mp->mod_name) + 1;
715 715 ckp->ckp_strn += strlen(mp->mod_path) + 1;
716 716 ckp->ckp_strn += strlen(mp->mod_info->fmdi_desc) + 1;
717 717 ckp->ckp_strn += strlen(mp->mod_info->fmdi_vers) + 1;
718 718 }
719 719
720 720 static void
721 721 fmd_ckpt_save_module(fmd_ckpt_t *ckp, fmd_module_t *mp)
722 722 {
723 723 fcf_secidx_t bufsec = FCF_SECIDX_NONE;
724 724 fcf_module_t fcfm;
725 725 fmd_case_t *cp;
726 726 uint_t n;
727 727
728 728 for (cp = fmd_list_next(&mp->mod_cases); cp; cp = fmd_list_next(cp))
729 729 fmd_ckpt_save_case(ckp, cp);
730 730
731 731 if ((n = fmd_serd_hash_count(&mp->mod_serds)) != 0) {
732 732 size_t size = sizeof (fcf_serd_t) * n;
733 733 fcf_serd_t *serds = ckp->ckp_arg = fmd_alloc(size, FMD_SLEEP);
734 734
735 735 fmd_serd_hash_apply(&mp->mod_serds,
736 736 (fmd_serd_eng_f *)fmd_ckpt_save_serd, ckp);
737 737
738 738 (void) fmd_ckpt_section(ckp, serds, FCF_SECT_SERD, size);
739 739 fmd_free(serds, size);
740 740 }
741 741
742 742 if ((n = fmd_buf_hash_count(&mp->mod_bufs)) != 0) {
743 743 size_t size = sizeof (fcf_buf_t) * n;
744 744 fcf_buf_t *bufs = ckp->ckp_arg = fmd_alloc(size, FMD_SLEEP);
745 745
746 746 fmd_buf_hash_apply(&mp->mod_bufs,
747 747 (fmd_buf_f *)fmd_ckpt_save_buf, ckp);
748 748
749 749 bufsec = fmd_ckpt_section(ckp, bufs, FCF_SECT_BUFS, size);
750 750 fmd_free(bufs, size);
751 751 }
752 752
753 753 fcfm.fcfm_name = fmd_ckpt_string(ckp, mp->mod_name);
754 754 fcfm.fcfm_path = fmd_ckpt_string(ckp, mp->mod_path);
755 755 fcfm.fcfm_desc = fmd_ckpt_string(ckp, mp->mod_info->fmdi_desc);
756 756 fcfm.fcfm_vers = fmd_ckpt_string(ckp, mp->mod_info->fmdi_vers);
757 757 fcfm.fcfm_bufs = bufsec;
758 758
759 759 (void) fmd_ckpt_section(ckp, &fcfm,
760 760 FCF_SECT_MODULE, sizeof (fcf_module_t));
761 761 }
762 762
763 763 void
764 764 fmd_ckpt_save(fmd_module_t *mp)
765 765 {
766 766 struct stat64 st;
767 767 char path[PATH_MAX];
768 768 mode_t dirmode;
769 769
|
↓ open down ↓ |
769 lines elided |
↑ open up ↑ |
770 770 hrtime_t now = gethrtime();
771 771 fmd_ckpt_t ckp;
772 772 int err;
773 773
774 774 ASSERT(fmd_module_locked(mp));
775 775
776 776 /*
777 777 * If checkpointing is disabled for the module, just return. We must
778 778 * commit the module state anyway to transition pending log events.
779 779 */
780 - if (mp->mod_stats->ms_ckpt_save.fmds_value.bool == FMD_B_FALSE) {
780 + if (mp->mod_stats->ms_ckpt_save.fmds_value.b == FMD_B_FALSE) {
781 781 fmd_module_commit(mp);
782 782 return;
783 783 }
784 784
785 785 if (!(mp->mod_flags & (FMD_MOD_MDIRTY | FMD_MOD_CDIRTY)))
786 786 return; /* no checkpoint is necessary for this module */
787 787
788 788 TRACE((FMD_DBG_CKPT, "ckpt save begin %s %llu",
789 789 mp->mod_name, mp->mod_gen + 1));
790 790
791 791 /*
792 792 * If the per-module checkpoint directory isn't found or isn't of type
793 793 * directory, move aside whatever is there (if anything) and attempt
794 794 * to mkdir(2) a new module checkpoint directory. If this fails, we
795 795 * have no choice but to abort the checkpoint and try again later.
796 796 */
797 797 if (stat64(mp->mod_ckpt, &st) != 0 || !S_ISDIR(st.st_mode)) {
798 798 (void) snprintf(path, sizeof (path), "%s-", mp->mod_ckpt);
799 799 (void) rename(mp->mod_ckpt, path);
800 800 (void) fmd_conf_getprop(fmd.d_conf, "ckpt.dirmode", &dirmode);
801 801
802 802 if (mkdir(mp->mod_ckpt, dirmode) != 0) {
803 803 fmd_error(EFMD_CKPT_MKDIR,
804 804 "failed to mkdir %s", mp->mod_ckpt);
805 805 return; /* return without clearing dirty bits */
806 806 }
807 807 }
808 808
809 809 /*
810 810 * Create a temporary file to write out the checkpoint into, and create
811 811 * a fmd_ckpt_t structure to manage construction of the checkpoint. We
812 812 * then figure out how much space will be required, and allocate it.
813 813 */
814 814 if (fmd_ckpt_create(&ckp, mp) == -1) {
815 815 fmd_error(EFMD_CKPT_CREATE, "failed to create %s", ckp.ckp_src);
816 816 return;
817 817 }
818 818
819 819 fmd_ckpt_resv_module(&ckp, mp);
820 820
821 821 if (fmd_ckpt_alloc(&ckp, mp->mod_gen + 1) != 0) {
822 822 fmd_error(EFMD_CKPT_NOMEM, "failed to build %s", ckp.ckp_src);
823 823 fmd_ckpt_destroy(&ckp);
824 824 return;
825 825 }
826 826
827 827 /*
828 828 * Fill in the checkpoint content, write it to disk, sync it, and then
829 829 * atomically rename it to the destination path. If this fails, we
830 830 * have no choice but to leave all our dirty bits set and return.
831 831 */
832 832 fmd_ckpt_save_module(&ckp, mp);
833 833 err = fmd_ckpt_commit(&ckp);
834 834 fmd_ckpt_destroy(&ckp);
835 835
836 836 if (err != 0) {
837 837 fmd_error(EFMD_CKPT_COMMIT, "failed to commit %s", ckp.ckp_dst);
838 838 return; /* return without clearing dirty bits */
839 839 }
840 840
841 841 fmd_module_commit(mp);
842 842 TRACE((FMD_DBG_CKPT, "ckpt save end %s", mp->mod_name));
843 843
844 844 mp->mod_stats->ms_ckpt_cnt.fmds_value.ui64++;
845 845 mp->mod_stats->ms_ckpt_time.fmds_value.ui64 += gethrtime() - now;
846 846
847 847 fmd_dprintf(FMD_DBG_CKPT, "saved checkpoint of %s (%llu)\n",
848 848 mp->mod_name, mp->mod_gen);
849 849 }
850 850
851 851 /*
852 852 * Utility function to retrieve a pointer to a section's header and verify that
853 853 * it is of the expected type or it is a FCF_SECT_NONE reference.
854 854 */
855 855 static const fcf_sec_t *
856 856 fmd_ckpt_secptr(fmd_ckpt_t *ckp, fcf_secidx_t sid, uint_t type)
857 857 {
858 858 const fcf_sec_t *sp = (void *)(ckp->ckp_buf +
859 859 ckp->ckp_hdr->fcfh_secoff + ckp->ckp_hdr->fcfh_secsize * sid);
860 860
861 861 return (sid < ckp->ckp_secs && (sp->fcfs_type == type ||
862 862 sp->fcfs_type == FCF_SECT_NONE) ? sp : NULL);
863 863 }
864 864
865 865 /*
866 866 * Utility function to retrieve the data pointer for a particular section. The
867 867 * validity of the header values has already been checked by fmd_ckpt_open().
868 868 */
869 869 static const void *
870 870 fmd_ckpt_dataptr(fmd_ckpt_t *ckp, const fcf_sec_t *sp)
871 871 {
872 872 return (ckp->ckp_buf + sp->fcfs_offset);
873 873 }
874 874
875 875 /*
876 876 * Utility function to retrieve the end of the data region for a particular
877 877 * section. The validity of this value has been confirmed by fmd_ckpt_open().
878 878 */
879 879 static const void *
880 880 fmd_ckpt_datalim(fmd_ckpt_t *ckp, const fcf_sec_t *sp)
881 881 {
882 882 return (ckp->ckp_buf + sp->fcfs_offset + sp->fcfs_size);
883 883 }
884 884
885 885 /*
886 886 * Utility function to retrieve a string pointer (fcf_stridx_t). If the string
887 887 * index is valid, the string data is returned; otherwise 'defstr' is returned.
888 888 */
889 889 static const char *
890 890 fmd_ckpt_strptr(fmd_ckpt_t *ckp, fcf_stridx_t sid, const char *defstr)
891 891 {
892 892 return (sid < ckp->ckp_strn ? ckp->ckp_strs + sid : defstr);
893 893 }
894 894
895 895 static void
896 896 fmd_ckpt_restore_events(fmd_ckpt_t *ckp, fcf_secidx_t sid,
897 897 void (*func)(void *, fmd_event_t *), void *arg)
898 898 {
899 899 const fcf_event_t *fcfe;
900 900 const fcf_sec_t *sp;
901 901 fmd_timeval_t ftv;
902 902 fmd_log_t *lp, *errlp;
903 903 uint_t i, n;
904 904 uint32_t e_maj, e_min;
905 905 uint64_t e_ino;
906 906
907 907 if ((sp = fmd_ckpt_secptr(ckp, sid, FCF_SECT_EVENTS)) == NULL) {
908 908 fmd_ckpt_error(ckp, EFMD_CKPT_INVAL,
909 909 "invalid link to section %u: expected events\n", sid);
910 910 }
911 911
912 912 if (sp->fcfs_size == 0)
913 913 return; /* empty events section or type none */
914 914
915 915 fcfe = fmd_ckpt_dataptr(ckp, sp);
916 916 n = sp->fcfs_size / sp->fcfs_entsize;
917 917
918 918 /*
919 919 * Hold the reader lock on log pointers to block log rotation during
920 920 * the section restore so that we can safely insert refs to d_errlog.
921 921 */
922 922 (void) pthread_rwlock_rdlock(&fmd.d_log_lock);
923 923 errlp = fmd.d_errlog;
924 924
925 925 e_maj = major(errlp->log_stat.st_dev);
926 926 e_min = minor(errlp->log_stat.st_dev);
927 927 e_ino = errlp->log_stat.st_ino;
928 928
929 929 for (i = 0; i < n; i++) {
930 930 fmd_event_t *ep;
931 931
932 932 ftv.ftv_sec = fcfe->fcfe_todsec;
933 933 ftv.ftv_nsec = fcfe->fcfe_todnsec;
934 934
935 935 if (e_ino == fcfe->fcfe_inode &&
936 936 e_maj == fcfe->fcfe_major &&
937 937 e_min == fcfe->fcfe_minor)
938 938 lp = errlp;
939 939 else
940 940 lp = NULL;
941 941
942 942 ep = fmd_event_recreate(FMD_EVT_PROTOCOL,
943 943 &ftv, NULL, NULL, lp, fcfe->fcfe_offset, 0);
944 944 fmd_event_hold(ep);
945 945 func(arg, ep);
946 946 fmd_event_rele(ep);
947 947
948 948 fcfe = (fcf_event_t *)((uintptr_t)fcfe + sp->fcfs_entsize);
949 949 }
950 950
951 951 (void) pthread_rwlock_unlock(&fmd.d_log_lock);
952 952 }
953 953
954 954 static int
955 955 fmd_ckpt_restore_suspects(fmd_ckpt_t *ckp, fmd_case_t *cp, fcf_secidx_t sid)
956 956 {
957 957 const fcf_nvl_t *fcfn, *endn;
958 958 const fcf_sec_t *sp;
959 959 nvlist_t *nvl;
960 960 int err, i;
961 961
962 962 if ((sp = fmd_ckpt_secptr(ckp, sid, FCF_SECT_NVLISTS)) == NULL) {
963 963 fmd_ckpt_error(ckp, EFMD_CKPT_INVAL,
964 964 "invalid link to section %u: expected nvlists\n", sid);
965 965 }
966 966
967 967 fcfn = fmd_ckpt_dataptr(ckp, sp);
968 968 endn = fmd_ckpt_datalim(ckp, sp);
969 969
970 970 for (i = 0; fcfn < endn; i++) {
971 971 char *data = (char *)fcfn + sp->fcfs_entsize;
972 972 size_t size = (size_t)fcfn->fcfn_size;
973 973
974 974 if (fcfn->fcfn_size > (size_t)((char *)endn - data)) {
975 975 fmd_ckpt_error(ckp, EFMD_CKPT_INVAL, "nvlist %u [%d] "
976 976 "size %u exceeds buffer\n", sid, i, size);
977 977 }
978 978
979 979 if ((err = nvlist_xunpack(data, size, &nvl, &fmd.d_nva)) != 0) {
980 980 fmd_ckpt_error(ckp, EFMD_CKPT_INVAL, "failed to "
981 981 "unpack nvlist %u [%d]: %s\n", sid, i,
982 982 fmd_strerror(err));
983 983 }
984 984
985 985 fmd_case_insert_suspect(cp, nvl);
986 986
987 987 size = sp->fcfs_entsize + fcfn->fcfn_size;
988 988 size = P2ROUNDUP(size, sizeof (uint64_t));
989 989 fcfn = (fcf_nvl_t *)((uintptr_t)fcfn + size);
990 990 }
991 991
992 992 return (i);
993 993 }
994 994
995 995 static void
996 996 fmd_ckpt_restore_bufs(fmd_ckpt_t *ckp, fmd_module_t *mp,
997 997 fmd_case_t *cp, fcf_secidx_t sid)
998 998 {
999 999 const fcf_sec_t *sp, *dsp;
1000 1000 const fcf_buf_t *fcfb;
1001 1001 uint_t i, n;
1002 1002
1003 1003 if ((sp = fmd_ckpt_secptr(ckp, sid, FCF_SECT_BUFS)) == NULL) {
1004 1004 fmd_ckpt_error(ckp, EFMD_CKPT_INVAL,
1005 1005 "invalid link to section %u: expected bufs\n", sid);
1006 1006 }
1007 1007
1008 1008 if (sp->fcfs_size == 0)
1009 1009 return; /* empty events section or type none */
1010 1010
1011 1011 fcfb = fmd_ckpt_dataptr(ckp, sp);
1012 1012 n = sp->fcfs_size / sp->fcfs_entsize;
1013 1013
1014 1014 for (i = 0; i < n; i++) {
1015 1015 dsp = fmd_ckpt_secptr(ckp, fcfb->fcfb_data, FCF_SECT_BUFFER);
1016 1016
1017 1017 if (dsp == NULL) {
1018 1018 fmd_ckpt_error(ckp, EFMD_CKPT_INVAL, "invalid %u "
1019 1019 "buffer link %u\n", sid, fcfb->fcfb_data);
1020 1020 }
1021 1021
1022 1022 fmd_buf_write((fmd_hdl_t *)mp, cp,
1023 1023 fmd_ckpt_strptr(ckp, fcfb->fcfb_name, "<CORRUPT>"),
1024 1024 ckp->ckp_buf + dsp->fcfs_offset, dsp->fcfs_size);
1025 1025
1026 1026 fcfb = (fcf_buf_t *)((uintptr_t)fcfb + sp->fcfs_entsize);
1027 1027 }
1028 1028 }
1029 1029
1030 1030 static void
1031 1031 fmd_ckpt_restore_case(fmd_ckpt_t *ckp, fmd_module_t *mp, const fcf_sec_t *sp)
1032 1032 {
1033 1033 const fcf_case_t *fcfc = fmd_ckpt_dataptr(ckp, sp);
1034 1034 const char *uuid = fmd_ckpt_strptr(ckp, fcfc->fcfc_uuid, NULL);
1035 1035 fmd_case_t *cp;
1036 1036 int n;
1037 1037
1038 1038 if (uuid == NULL || fcfc->fcfc_state > FCF_CASE_CLOSE_WAIT) {
1039 1039 fmd_ckpt_error(ckp, EFMD_CKPT_INVAL, "corrupt %u case uuid "
1040 1040 "and/or state\n", (uint_t)(sp - ckp->ckp_secp));
1041 1041 }
1042 1042
1043 1043 fmd_module_lock(mp);
1044 1044
1045 1045 if ((cp = fmd_case_recreate(mp, NULL,
1046 1046 fcfc->fcfc_state != FCF_CASE_UNSOLVED ? FCF_CASE_SOLVED :
1047 1047 FMD_CASE_UNSOLVED, uuid, NULL)) == NULL) {
1048 1048 fmd_ckpt_error(ckp, EFMD_CKPT_INVAL,
1049 1049 "duplicate case uuid: %s\n", uuid);
1050 1050 }
1051 1051
1052 1052 fmd_ckpt_restore_events(ckp, fcfc->fcfc_principal,
1053 1053 (void (*)(void *, fmd_event_t *))fmd_case_insert_principal, cp);
1054 1054
1055 1055 fmd_ckpt_restore_events(ckp, fcfc->fcfc_events,
1056 1056 (void (*)(void *, fmd_event_t *))fmd_case_insert_event, cp);
1057 1057
1058 1058 /*
1059 1059 * Once solved, treat suspects from resource cache as master copy.
1060 1060 *
1061 1061 * If !fmd.d_running, this module must be a builtin, and so we don't
1062 1062 * want to restore suspects or call fmd_case_transition_update() at this
1063 1063 * stage. The suspects will be added later from the resource cache.
1064 1064 * Calling fmd_case_transition("SOLVED") is OK here as the state is
1065 1065 * already solved, so all it does is update the case flags.
1066 1066 */
1067 1067 if (fmd.d_running && (n = ((fmd_case_impl_t *)cp)->ci_nsuspects) == 0)
1068 1068 n = fmd_ckpt_restore_suspects(ckp, cp, fcfc->fcfc_suspects);
1069 1069
1070 1070 if (!fmd.d_running)
1071 1071 fmd_case_transition(cp, FMD_CASE_SOLVED, FMD_CF_SOLVED);
1072 1072 else if (fcfc->fcfc_state == FCF_CASE_SOLVED)
1073 1073 fmd_case_transition_update(cp, FMD_CASE_SOLVED, FMD_CF_SOLVED);
1074 1074 else if (fcfc->fcfc_state == FCF_CASE_CLOSE_WAIT && n != 0)
1075 1075 fmd_case_transition(cp, FMD_CASE_CLOSE_WAIT, FMD_CF_SOLVED);
1076 1076 else if (fcfc->fcfc_state == FCF_CASE_CLOSE_WAIT && n == 0)
1077 1077 fmd_case_transition(cp, FMD_CASE_CLOSE_WAIT, 0);
1078 1078
1079 1079 fmd_module_unlock(mp);
1080 1080 fmd_ckpt_restore_bufs(ckp, mp, cp, fcfc->fcfc_bufs);
1081 1081 }
1082 1082
1083 1083 static void
1084 1084 fmd_ckpt_restore_serd(fmd_ckpt_t *ckp, fmd_module_t *mp, const fcf_sec_t *sp)
1085 1085 {
1086 1086 const fcf_serd_t *fcfd = fmd_ckpt_dataptr(ckp, sp);
1087 1087 uint_t i, n = sp->fcfs_size / sp->fcfs_entsize;
1088 1088 const fcf_sec_t *esp;
1089 1089 const char *s;
1090 1090
1091 1091 for (i = 0; i < n; i++) {
1092 1092 esp = fmd_ckpt_secptr(ckp, fcfd->fcfd_events, FCF_SECT_EVENTS);
1093 1093
1094 1094 if (esp == NULL) {
1095 1095 fmd_ckpt_error(ckp, EFMD_CKPT_INVAL,
1096 1096 "invalid events link %u\n", fcfd->fcfd_events);
1097 1097 }
1098 1098
1099 1099 if ((s = fmd_ckpt_strptr(ckp, fcfd->fcfd_name, NULL)) == NULL) {
1100 1100 fmd_ckpt_error(ckp, EFMD_CKPT_INVAL,
1101 1101 "serd name %u is corrupt\n", fcfd->fcfd_name);
1102 1102 }
1103 1103
1104 1104 fmd_serd_create((fmd_hdl_t *)mp, s, fcfd->fcfd_n, fcfd->fcfd_t);
1105 1105 fmd_module_lock(mp);
1106 1106
1107 1107 fmd_ckpt_restore_events(ckp, fcfd->fcfd_events,
1108 1108 (void (*)(void *, fmd_event_t *))fmd_serd_eng_record,
1109 1109 fmd_serd_eng_lookup(&mp->mod_serds, s));
1110 1110
1111 1111 fmd_module_unlock(mp);
1112 1112 fcfd = (fcf_serd_t *)((uintptr_t)fcfd + sp->fcfs_entsize);
1113 1113 }
1114 1114 }
1115 1115
1116 1116 static void
1117 1117 fmd_ckpt_restore_module(fmd_ckpt_t *ckp, fmd_module_t *mp)
1118 1118 {
1119 1119 const fcf_module_t *fcfm = fmd_ckpt_dataptr(ckp, ckp->ckp_modp);
1120 1120 const fcf_sec_t *sp;
1121 1121 uint_t i;
1122 1122
1123 1123 if (strcmp(mp->mod_name, fmd_ckpt_strptr(ckp, fcfm->fcfm_name, "")) ||
1124 1124 strcmp(mp->mod_path, fmd_ckpt_strptr(ckp, fcfm->fcfm_path, ""))) {
1125 1125 fmd_ckpt_error(ckp, EFMD_CKPT_INVAL,
1126 1126 "checkpoint is not for module %s\n", mp->mod_name);
1127 1127 }
1128 1128
1129 1129 for (i = 0; i < ckp->ckp_secs; i++) {
1130 1130 sp = (void *)(ckp->ckp_buf +
1131 1131 ckp->ckp_hdr->fcfh_secoff + ckp->ckp_hdr->fcfh_secsize * i);
1132 1132
1133 1133 switch (sp->fcfs_type) {
1134 1134 case FCF_SECT_CASE:
1135 1135 fmd_ckpt_restore_case(ckp, mp, sp);
1136 1136 break;
1137 1137 case FCF_SECT_SERD:
1138 1138 fmd_ckpt_restore_serd(ckp, mp, sp);
1139 1139 break;
1140 1140 }
1141 1141 }
1142 1142
1143 1143 fmd_ckpt_restore_bufs(ckp, mp, NULL, fcfm->fcfm_bufs);
1144 1144 mp->mod_gen = ckp->ckp_hdr->fcfh_cgen;
1145 1145 }
1146 1146
|
↓ open down ↓ |
356 lines elided |
↑ open up ↑ |
1147 1147 /*
1148 1148 * Restore a checkpoint for the specified module. Any errors which occur
1149 1149 * during restore will call fmd_ckpt_error() or trigger an fmd_api_error(),
1150 1150 * either of which will automatically unlock the module and trigger an abort.
1151 1151 */
1152 1152 void
1153 1153 fmd_ckpt_restore(fmd_module_t *mp)
1154 1154 {
1155 1155 fmd_ckpt_t ckp;
1156 1156
1157 - if (mp->mod_stats->ms_ckpt_restore.fmds_value.bool == FMD_B_FALSE)
1157 + if (mp->mod_stats->ms_ckpt_restore.fmds_value.b == FMD_B_FALSE)
1158 1158 return; /* never restore checkpoints for this module */
1159 1159
1160 1160 TRACE((FMD_DBG_CKPT, "ckpt restore begin %s", mp->mod_name));
1161 1161
1162 1162 if (fmd_ckpt_open(&ckp, mp) == -1) {
1163 1163 if (errno != ENOENT)
1164 1164 fmd_error(EFMD_CKPT_OPEN, "can't open %s", ckp.ckp_src);
1165 1165 TRACE((FMD_DBG_CKPT, "ckpt restore end %s", mp->mod_name));
1166 1166 return;
1167 1167 }
1168 1168
1169 1169 ASSERT(!fmd_module_locked(mp));
1170 1170 fmd_ckpt_restore_module(&ckp, mp);
1171 1171 fmd_ckpt_destroy(&ckp);
1172 1172 fmd_module_clrdirty(mp);
1173 1173
1174 1174 TRACE((FMD_DBG_CKPT, "ckpt restore end %s", mp->mod_name));
1175 1175 fmd_dprintf(FMD_DBG_CKPT, "restored checkpoint of %s\n", mp->mod_name);
1176 1176 }
1177 1177
1178 1178 /*
1179 1179 * Delete the module's checkpoint file. This is used by the ckpt.zero property
1180 1180 * code or by the fmadm reset RPC service path to force a checkpoint delete.
1181 1181 */
1182 1182 void
1183 1183 fmd_ckpt_delete(fmd_module_t *mp)
1184 1184 {
1185 1185 char path[PATH_MAX];
1186 1186
1187 1187 (void) snprintf(path, sizeof (path),
1188 1188 "%s/%s", mp->mod_ckpt, mp->mod_name);
1189 1189
1190 1190 TRACE((FMD_DBG_CKPT, "delete %s ckpt", mp->mod_name));
1191 1191
1192 1192 if (unlink(path) != 0 && errno != ENOENT)
1193 1193 fmd_error(EFMD_CKPT_DELETE, "failed to delete %s", path);
1194 1194 }
1195 1195
1196 1196 /*
1197 1197 * Move aside the module's checkpoint file if checkpoint restore has failed.
1198 1198 * We rename the file rather than deleting it in the hopes that someone might
1199 1199 * send it to us for post-mortem analysis of whether we have a checkpoint bug.
1200 1200 */
1201 1201 void
1202 1202 fmd_ckpt_rename(fmd_module_t *mp)
1203 1203 {
1204 1204 char src[PATH_MAX], dst[PATH_MAX];
1205 1205
1206 1206 (void) snprintf(src, sizeof (src), "%s/%s", mp->mod_ckpt, mp->mod_name);
1207 1207 (void) snprintf(dst, sizeof (dst), "%s-", src);
1208 1208
1209 1209 TRACE((FMD_DBG_CKPT, "rename %s ckpt", mp->mod_name));
1210 1210
1211 1211 if (rename(src, dst) != 0 && errno != ENOENT)
1212 1212 fmd_error(EFMD_CKPT_DELETE, "failed to rename %s", src);
1213 1213 }
|
↓ open down ↓ |
46 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX