1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  24  * Use is subject to license terms.
  25  */
  26 
  27 #include <sys/types.h>
  28 #include <sys/mkdev.h>
  29 #include <sys/stat.h>
  30 
  31 #include <strings.h>
  32 #include <unistd.h>
  33 #include <limits.h>
  34 #include <fcntl.h>
  35 
  36 #include <fmd_module.h>
  37 #include <fmd_error.h>
  38 #include <fmd_alloc.h>
  39 #include <fmd_case.h>
  40 #include <fmd_serd.h>
  41 #include <fmd_subr.h>
  42 #include <fmd_conf.h>
  43 #include <fmd_event.h>
  44 #include <fmd_log.h>
  45 #include <fmd_api.h>
  46 #include <fmd_ckpt.h>
  47 
  48 #include <fmd.h>
  49 
  50 #define P2ROUNDUP(x, align)     (-(-(x) & -(align)))
  51 #define IS_P2ALIGNED(v, a)      ((((uintptr_t)(v)) & ((uintptr_t)(a) - 1)) == 0)
  52 
  53 /*
  54  * The fmd_ckpt_t structure is used to manage all of the state needed by the
  55  * various subroutines that save and restore checkpoints.  The structure is
  56  * initialized using fmd_ckpt_create() or fmd_ckpt_open() and is destroyed
  57  * by fmd_ckpt_destroy().  Refer to the subroutines below for more details.
  58  */
  59 typedef struct fmd_ckpt {
  60         char ckp_src[PATH_MAX]; /* ckpt input or output filename */
  61         char ckp_dst[PATH_MAX]; /* ckpt rename filename */
  62         uchar_t *ckp_buf;       /* data buffer base address */
  63         fcf_hdr_t *ckp_hdr;     /* file header pointer */
  64         uchar_t *ckp_ptr;       /* data buffer pointer */
  65         size_t ckp_size;        /* data buffer size */
  66         fcf_sec_t *ckp_secp;    /* section header table pointer */
  67         fcf_sec_t *ckp_modp;    /* section header for module */
  68         uint_t ckp_secs;        /* number of sections */
  69         char *ckp_strs;         /* string table base pointer */
  70         char *ckp_strp;         /* string table pointer */
  71         size_t ckp_strn;        /* string table size */
  72         int ckp_fd;             /* output descriptor */
  73         fmd_module_t *ckp_mp;   /* checkpoint module */
  74         void *ckp_arg;          /* private arg for callbacks */
  75 } fmd_ckpt_t;
  76 
  77 typedef struct fmd_ckpt_desc {
  78         uint64_t secd_size;     /* minimum section size */
  79         uint32_t secd_entsize;  /* minimum section entry size */
  80         uint32_t secd_align;    /* section alignment */
  81 } fmd_ckpt_desc_t;
  82 
  83 /*
  84  * Table of FCF section descriptions.  Here we record the minimum size for each
  85  * section (for use during restore) and the expected entry size and alignment
  86  * for each section (for use during both checkpoint and restore).
  87  */
  88 static const fmd_ckpt_desc_t _fmd_ckpt_sections[] = {
  89 { 0, 0, sizeof (uint8_t) },                                        /* NONE */
  90 { 1, 0, sizeof (char) },                                           /* STRTAB */
  91 { sizeof (fcf_module_t), 0, sizeof (uint32_t) },                   /* MODULE */
  92 { sizeof (fcf_case_t), 0, sizeof (uint32_t) },                     /* CASE */
  93 { sizeof (fcf_buf_t), sizeof (fcf_buf_t), sizeof (uint32_t) },     /* BUFS */
  94 { 0, 0, _MAX_ALIGNMENT },                                          /* BUFFER */
  95 { sizeof (fcf_serd_t), sizeof (fcf_serd_t), sizeof (uint64_t) },   /* SERD */
  96 { sizeof (fcf_event_t), sizeof (fcf_event_t), sizeof (uint64_t) }, /* EVENTS */
  97 { sizeof (fcf_nvl_t), sizeof (fcf_nvl_t), sizeof (uint64_t) },     /* NVLISTS */
  98 };
  99 
 100 static int
 101 fmd_ckpt_create(fmd_ckpt_t *ckp, fmd_module_t *mp)
 102 {
 103         const char *dir = mp->mod_ckpt;
 104         const char *name = mp->mod_name;
 105         mode_t mode;
 106 
 107         bzero(ckp, sizeof (fmd_ckpt_t));
 108         ckp->ckp_mp = mp;
 109 
 110         ckp->ckp_size = sizeof (fcf_hdr_t);
 111         ckp->ckp_strn = 1; /* for \0 */
 112 
 113         (void) snprintf(ckp->ckp_src, PATH_MAX, "%s/%s+", dir, name);
 114         (void) snprintf(ckp->ckp_dst, PATH_MAX, "%s/%s", dir, name);
 115 
 116         (void) unlink(ckp->ckp_src);
 117         (void) fmd_conf_getprop(fmd.d_conf, "ckpt.mode", &mode);
 118         ckp->ckp_fd = open64(ckp->ckp_src, O_WRONLY | O_CREAT | O_EXCL, mode);
 119 
 120         return (ckp->ckp_fd);
 121 }
 122 
 123 /*PRINTFLIKE2*/
 124 static int
 125 fmd_ckpt_inval(fmd_ckpt_t *ckp, const char *format, ...)
 126 {
 127         va_list ap;
 128 
 129         va_start(ap, format);
 130         fmd_verror(EFMD_CKPT_INVAL, format, ap);
 131         va_end(ap);
 132 
 133         fmd_free(ckp->ckp_buf, ckp->ckp_size);
 134         return (fmd_set_errno(EFMD_CKPT_INVAL));
 135 }
 136 
 137 static int
 138 fmd_ckpt_open(fmd_ckpt_t *ckp, fmd_module_t *mp)
 139 {
 140         struct stat64 st;
 141         uint64_t seclen;
 142         uint_t i;
 143         int err;
 144 
 145         bzero(ckp, sizeof (fmd_ckpt_t));
 146         ckp->ckp_mp = mp;
 147 
 148         (void) snprintf(ckp->ckp_src, PATH_MAX, "%s/%s",
 149             mp->mod_ckpt, mp->mod_name);
 150 
 151         if ((ckp->ckp_fd = open(ckp->ckp_src, O_RDONLY)) == -1)
 152                 return (-1); /* failed to open checkpoint file */
 153 
 154         if (fstat64(ckp->ckp_fd, &st) == -1) {
 155                 err = errno;
 156                 (void) close(ckp->ckp_fd);
 157                 return (fmd_set_errno(err));
 158         }
 159 
 160         ckp->ckp_buf = fmd_alloc(st.st_size, FMD_SLEEP);
 161         ckp->ckp_hdr = (void *)ckp->ckp_buf;
 162         ckp->ckp_size = read(ckp->ckp_fd, ckp->ckp_buf, st.st_size);
 163 
 164         if (ckp->ckp_size != st.st_size || ckp->ckp_size < sizeof (fcf_hdr_t) ||
 165             ckp->ckp_size != ckp->ckp_hdr->fcfh_filesz) {
 166                 err = ckp->ckp_size == (size_t)-1L ? errno : EFMD_CKPT_SHORT;
 167                 fmd_free(ckp->ckp_buf, st.st_size);
 168                 (void) close(ckp->ckp_fd);
 169                 return (fmd_set_errno(err));
 170         }
 171 
 172         (void) close(ckp->ckp_fd);
 173         ckp->ckp_fd = -1;
 174 
 175         /*
 176          * Once we've read in a consistent copy of the FCF file and we're sure
 177          * the header can be accessed, go through it and make sure everything
 178          * is valid.  We also check that unused bits are zero so we can expand
 179          * to use them safely in the future and support old files if needed.
 180          */
 181         if (bcmp(&ckp->ckp_hdr->fcfh_ident[FCF_ID_MAG0],
 182             FCF_MAG_STRING, FCF_MAG_STRLEN) != 0)
 183                 return (fmd_ckpt_inval(ckp, "bad checkpoint magic string\n"));
 184 
 185         if (ckp->ckp_hdr->fcfh_ident[FCF_ID_MODEL] != FCF_MODEL_NATIVE)
 186                 return (fmd_ckpt_inval(ckp, "bad checkpoint data model\n"));
 187 
 188         if (ckp->ckp_hdr->fcfh_ident[FCF_ID_ENCODING] != FCF_ENCODE_NATIVE)
 189                 return (fmd_ckpt_inval(ckp, "bad checkpoint data encoding\n"));
 190 
 191         if (ckp->ckp_hdr->fcfh_ident[FCF_ID_VERSION] != FCF_VERSION_1) {
 192                 return (fmd_ckpt_inval(ckp, "bad checkpoint version %u\n",
 193                     ckp->ckp_hdr->fcfh_ident[FCF_ID_VERSION]));
 194         }
 195 
 196         for (i = FCF_ID_PAD; i < FCF_ID_SIZE; i++) {
 197                 if (ckp->ckp_hdr->fcfh_ident[i] != 0) {
 198                         return (fmd_ckpt_inval(ckp,
 199                             "bad checkpoint padding at id[%d]", i));
 200                 }
 201         }
 202 
 203         if (ckp->ckp_hdr->fcfh_flags & ~FCF_FL_VALID)
 204                 return (fmd_ckpt_inval(ckp, "bad checkpoint flags\n"));
 205 
 206         if (ckp->ckp_hdr->fcfh_pad != 0)
 207                 return (fmd_ckpt_inval(ckp, "reserved field in use\n"));
 208 
 209         if (ckp->ckp_hdr->fcfh_hdrsize < sizeof (fcf_hdr_t) ||
 210             ckp->ckp_hdr->fcfh_secsize < sizeof (fcf_sec_t)) {
 211                 return (fmd_ckpt_inval(ckp,
 212                     "bad header and/or section size\n"));
 213         }
 214 
 215         seclen = (uint64_t)ckp->ckp_hdr->fcfh_secnum *
 216             (uint64_t)ckp->ckp_hdr->fcfh_secsize;
 217 
 218         if (ckp->ckp_hdr->fcfh_secoff > ckp->ckp_size ||
 219             seclen > ckp->ckp_size ||
 220             ckp->ckp_hdr->fcfh_secoff + seclen > ckp->ckp_size ||
 221             ckp->ckp_hdr->fcfh_secoff + seclen < ckp->ckp_hdr->fcfh_secoff)
 222                 return (fmd_ckpt_inval(ckp, "truncated section headers\n"));
 223 
 224         if (!IS_P2ALIGNED(ckp->ckp_hdr->fcfh_secoff, sizeof (uint64_t)) ||
 225             !IS_P2ALIGNED(ckp->ckp_hdr->fcfh_secsize, sizeof (uint64_t)))
 226                 return (fmd_ckpt_inval(ckp, "misaligned section headers\n"));
 227 
 228         /*
 229          * Once the header is validated, iterate over the section headers
 230          * ensuring that each one is valid w.r.t. offset, alignment, and size.
 231          * We also pick up the string table pointer during this pass.
 232          */
 233         ckp->ckp_secp = (void *)(ckp->ckp_buf + ckp->ckp_hdr->fcfh_secoff);
 234         ckp->ckp_secs = ckp->ckp_hdr->fcfh_secnum;
 235 
 236         for (i = 0; i < ckp->ckp_secs; i++) {
 237                 fcf_sec_t *sp = (void *)(ckp->ckp_buf +
 238                     ckp->ckp_hdr->fcfh_secoff + ckp->ckp_hdr->fcfh_secsize * i);
 239 
 240                 const fmd_ckpt_desc_t *dp = &_fmd_ckpt_sections[sp->fcfs_type];
 241 
 242                 if (sp->fcfs_flags != 0) {
 243                         return (fmd_ckpt_inval(ckp, "section %u has invalid "
 244                             "section flags (0x%x)\n", i, sp->fcfs_flags));
 245                 }
 246 
 247                 if (sp->fcfs_align & (sp->fcfs_align - 1)) {
 248                         return (fmd_ckpt_inval(ckp, "section %u has invalid "
 249                             "alignment (%u)\n", i, sp->fcfs_align));
 250                 }
 251 
 252                 if (sp->fcfs_offset & (sp->fcfs_align - 1)) {
 253                         return (fmd_ckpt_inval(ckp, "section %u is not properly"
 254                             " aligned (offset %llu)\n", i, sp->fcfs_offset));
 255                 }
 256 
 257                 if (sp->fcfs_entsize != 0 &&
 258                     (sp->fcfs_entsize & (sp->fcfs_align - 1)) != 0) {
 259                         return (fmd_ckpt_inval(ckp, "section %u has misaligned "
 260                             "entsize %u\n", i, sp->fcfs_entsize));
 261                 }
 262 
 263                 if (sp->fcfs_offset > ckp->ckp_size ||
 264                     sp->fcfs_size > ckp->ckp_size ||
 265                     sp->fcfs_offset + sp->fcfs_size > ckp->ckp_size ||
 266                     sp->fcfs_offset + sp->fcfs_size < sp->fcfs_offset) {
 267                         return (fmd_ckpt_inval(ckp, "section %u has corrupt "
 268                             "size or offset\n", i));
 269                 }
 270 
 271                 if (sp->fcfs_type >= sizeof (_fmd_ckpt_sections) /
 272                     sizeof (_fmd_ckpt_sections[0])) {
 273                         return (fmd_ckpt_inval(ckp, "section %u has unknown "
 274                             "section type %u\n", i, sp->fcfs_type));
 275                 }
 276 
 277                 if (sp->fcfs_align != dp->secd_align) {
 278                         return (fmd_ckpt_inval(ckp, "section %u has align %u "
 279                             "(not %u)\n", i, sp->fcfs_align, dp->secd_align));
 280                 }
 281 
 282                 if (sp->fcfs_size < dp->secd_size ||
 283                     sp->fcfs_entsize < dp->secd_entsize) {
 284                         return (fmd_ckpt_inval(ckp, "section %u has short "
 285                             "size or entsize\n", i));
 286                 }
 287 
 288                 switch (sp->fcfs_type) {
 289                 case FCF_SECT_STRTAB:
 290                         if (ckp->ckp_strs != NULL) {
 291                                 return (fmd_ckpt_inval(ckp, "multiple string "
 292                                     "tables are present in checkpoint file\n"));
 293                         }
 294 
 295                         ckp->ckp_strs = (char *)ckp->ckp_buf + sp->fcfs_offset;
 296                         ckp->ckp_strn = sp->fcfs_size;
 297 
 298                         if (ckp->ckp_strs[ckp->ckp_strn - 1] != '\0') {
 299                                 return (fmd_ckpt_inval(ckp, "string table %u "
 300                                     "is missing terminating nul byte\n", i));
 301                         }
 302                         break;
 303 
 304                 case FCF_SECT_MODULE:
 305                         if (ckp->ckp_modp != NULL) {
 306                                 return (fmd_ckpt_inval(ckp, "multiple module "
 307                                     "sects are present in checkpoint file\n"));
 308                         }
 309                         ckp->ckp_modp = sp;
 310                         break;
 311                 }
 312         }
 313 
 314         /*
 315          * Ensure that the first section is an empty one of type FCF_SECT_NONE.
 316          * This is done to ensure that links can use index 0 as a null section.
 317          */
 318         if (ckp->ckp_secs == 0 || ckp->ckp_secp->fcfs_type != FCF_SECT_NONE ||
 319             ckp->ckp_secp->fcfs_entsize != 0 || ckp->ckp_secp->fcfs_size != 0) {
 320                 return (fmd_ckpt_inval(ckp, "section 0 is not of the "
 321                     "appropriate size and/or attributes (SECT_NONE)\n"));
 322         }
 323 
 324         if (ckp->ckp_modp == NULL) {
 325                 return (fmd_ckpt_inval(ckp,
 326                     "no module section found in file\n"));
 327         }
 328 
 329         return (0);
 330 }
 331 
 332 static void
 333 fmd_ckpt_destroy(fmd_ckpt_t *ckp)
 334 {
 335         if (ckp->ckp_buf != NULL)
 336                 fmd_free(ckp->ckp_buf, ckp->ckp_size);
 337         if (ckp->ckp_fd >= 0)
 338                 (void) close(ckp->ckp_fd);
 339 }
 340 
 341 /*
 342  * fmd_ckpt_error() is used as a wrapper around fmd_error() for ckpt routines.
 343  * It calls fmd_module_unlock() on behalf of its caller, logs the error, and
 344  * then aborts the API call and the surrounding module entry point by doing an
 345  * fmd_module_abort(), which longjmps to the place where we entered the module.
 346  * Depending on the type of error and conf settings, we will reset or fail.
 347  */
 348 /*PRINTFLIKE3*/
 349 static void
 350 fmd_ckpt_error(fmd_ckpt_t *ckp, int err, const char *format, ...)
 351 {
 352         fmd_module_t *mp = ckp->ckp_mp;
 353         va_list ap;
 354 
 355         va_start(ap, format);
 356         fmd_verror(err, format, ap);
 357         va_end(ap);
 358 
 359         if (fmd_module_locked(mp))
 360                 fmd_module_unlock(mp);
 361 
 362         fmd_ckpt_destroy(ckp);
 363         fmd_module_abort(mp, err);
 364 }
 365 
 366 static fcf_secidx_t
 367 fmd_ckpt_section(fmd_ckpt_t *ckp, const void *data, uint_t type, uint64_t size)
 368 {
 369         const fmd_ckpt_desc_t *dp;
 370 
 371         ASSERT(type < sizeof (_fmd_ckpt_sections) / sizeof (fmd_ckpt_desc_t));
 372         dp = &_fmd_ckpt_sections[type];
 373 
 374         ckp->ckp_ptr = (uchar_t *)
 375             P2ROUNDUP((uintptr_t)ckp->ckp_ptr, dp->secd_align);
 376 
 377         ckp->ckp_secp->fcfs_type = type;
 378         ckp->ckp_secp->fcfs_align = dp->secd_align;
 379         ckp->ckp_secp->fcfs_flags = 0;
 380         ckp->ckp_secp->fcfs_entsize = dp->secd_entsize;
 381         ckp->ckp_secp->fcfs_offset = (size_t)(ckp->ckp_ptr - ckp->ckp_buf);
 382         ckp->ckp_secp->fcfs_size = size;
 383 
 384         /*
 385          * If the data pointer is non-NULL, copy the data to our buffer; else
 386          * the caller is responsible for doing so and updating ckp->ckp_ptr.
 387          */
 388         if (data != NULL) {
 389                 bcopy(data, ckp->ckp_ptr, size);
 390                 ckp->ckp_ptr += size;
 391         }
 392 
 393         ckp->ckp_secp++;
 394         return (ckp->ckp_secs++);
 395 }
 396 
 397 static fcf_stridx_t
 398 fmd_ckpt_string(fmd_ckpt_t *ckp, const char *s)
 399 {
 400         fcf_stridx_t idx = (fcf_stridx_t)(ckp->ckp_strp - ckp->ckp_strs);
 401 
 402         (void) strcpy(ckp->ckp_strp, s);
 403         ckp->ckp_strp += strlen(s) + 1;
 404 
 405         return (idx);
 406 }
 407 
 408 static int
 409 fmd_ckpt_alloc(fmd_ckpt_t *ckp, uint64_t gen)
 410 {
 411         /*
 412          * We've added up all the sections by now: add two more for SECT_NONE
 413          * and SECT_STRTAB, and add the size of the section header table and
 414          * string table to the total size.  We know that the fcf_hdr_t is
 415          * aligned so that that fcf_sec_t's can follow it, and that fcf_sec_t
 416          * is aligned so that any section can follow it, so no extra padding
 417          * bytes need to be allocated between any of these items.
 418          */
 419         ckp->ckp_secs += 2; /* for FCF_SECT_NONE and FCF_SECT_STRTAB */
 420         ckp->ckp_size += sizeof (fcf_sec_t) * ckp->ckp_secs;
 421         ckp->ckp_size += ckp->ckp_strn;
 422 
 423         TRACE((FMD_DBG_CKPT, "alloc fcf buf size %u", ckp->ckp_size));
 424         ckp->ckp_buf = fmd_zalloc(ckp->ckp_size, FMD_NOSLEEP);
 425 
 426         if (ckp->ckp_buf == NULL)
 427                 return (-1); /* errno is set for us */
 428 
 429         ckp->ckp_hdr = (void *)ckp->ckp_buf;
 430 
 431         ckp->ckp_hdr->fcfh_ident[FCF_ID_MAG0] = FCF_MAG_MAG0;
 432         ckp->ckp_hdr->fcfh_ident[FCF_ID_MAG1] = FCF_MAG_MAG1;
 433         ckp->ckp_hdr->fcfh_ident[FCF_ID_MAG2] = FCF_MAG_MAG2;
 434         ckp->ckp_hdr->fcfh_ident[FCF_ID_MAG3] = FCF_MAG_MAG3;
 435         ckp->ckp_hdr->fcfh_ident[FCF_ID_MODEL] = FCF_MODEL_NATIVE;
 436         ckp->ckp_hdr->fcfh_ident[FCF_ID_ENCODING] = FCF_ENCODE_NATIVE;
 437         ckp->ckp_hdr->fcfh_ident[FCF_ID_VERSION] = FCF_VERSION;
 438 
 439         ckp->ckp_hdr->fcfh_hdrsize = sizeof (fcf_hdr_t);
 440         ckp->ckp_hdr->fcfh_secsize = sizeof (fcf_sec_t);
 441         ckp->ckp_hdr->fcfh_secnum = ckp->ckp_secs;
 442         ckp->ckp_hdr->fcfh_secoff = sizeof (fcf_hdr_t);
 443         ckp->ckp_hdr->fcfh_filesz = ckp->ckp_size;
 444         ckp->ckp_hdr->fcfh_cgen = gen;
 445 
 446         ckp->ckp_secs = 0; /* reset section counter for second pass */
 447         ckp->ckp_secp = (void *)(ckp->ckp_buf + sizeof (fcf_hdr_t));
 448         ckp->ckp_strs = (char *)ckp->ckp_buf + ckp->ckp_size - ckp->ckp_strn;
 449         ckp->ckp_strp = ckp->ckp_strs + 1; /* use first byte as \0 */
 450         ckp->ckp_ptr = (uchar_t *)(ckp->ckp_secp + ckp->ckp_hdr->fcfh_secnum);
 451 
 452         (void) fmd_ckpt_section(ckp, NULL, FCF_SECT_NONE, 0);
 453         return (0);
 454 }
 455 
 456 static int
 457 fmd_ckpt_commit(fmd_ckpt_t *ckp)
 458 {
 459         fcf_sec_t *secbase = (void *)(ckp->ckp_buf + sizeof (fcf_hdr_t));
 460         size_t stroff = ckp->ckp_size - ckp->ckp_strn;
 461 
 462         /*
 463          * Before committing the checkpoint, we assert that fmd_ckpt_t's sizes
 464          * and current pointer locations all add up appropriately.  Any ASSERTs
 465          * which trip here likely indicate an inconsistency in the code for the
 466          * reservation pass and the buffer update pass of the FCF subroutines.
 467          */
 468         ASSERT((size_t)(ckp->ckp_ptr - ckp->ckp_buf) == stroff);
 469         (void) fmd_ckpt_section(ckp, NULL, FCF_SECT_STRTAB, ckp->ckp_strn);
 470         ckp->ckp_ptr += ckp->ckp_strn; /* string table is already filled in */
 471 
 472         ASSERT(ckp->ckp_secs == ckp->ckp_hdr->fcfh_secnum);
 473         ASSERT(ckp->ckp_secp == secbase + ckp->ckp_hdr->fcfh_secnum);
 474         ASSERT(ckp->ckp_ptr == ckp->ckp_buf + ckp->ckp_hdr->fcfh_filesz);
 475 
 476         if (write(ckp->ckp_fd, ckp->ckp_buf, ckp->ckp_size) != ckp->ckp_size ||
 477             fsync(ckp->ckp_fd) != 0 || close(ckp->ckp_fd) != 0)
 478                 return (-1); /* errno is set for us */
 479 
 480         ckp->ckp_fd = -1; /* fd is now closed */
 481         return (rename(ckp->ckp_src, ckp->ckp_dst) != 0);
 482 }
 483 
 484 static void
 485 fmd_ckpt_resv(fmd_ckpt_t *ckp, size_t size, size_t align)
 486 {
 487         if (size != 0) {
 488                 ckp->ckp_size = P2ROUNDUP(ckp->ckp_size, align) + size;
 489                 ckp->ckp_secs++;
 490         }
 491 }
 492 
 493 static void
 494 fmd_ckpt_resv_buf(fmd_buf_t *bp, fmd_ckpt_t *ckp)
 495 {
 496         ckp->ckp_size = P2ROUNDUP(ckp->ckp_size, _MAX_ALIGNMENT) + bp->buf_size;
 497         ckp->ckp_strn += strlen(bp->buf_name) + 1;
 498         ckp->ckp_secs++;
 499 }
 500 
 501 static void
 502 fmd_ckpt_save_buf(fmd_buf_t *bp, fmd_ckpt_t *ckp)
 503 {
 504         fcf_buf_t *fcfb = ckp->ckp_arg;
 505 
 506         fcfb->fcfb_name = fmd_ckpt_string(ckp, bp->buf_name);
 507         fcfb->fcfb_data = fmd_ckpt_section(ckp,
 508             bp->buf_data, FCF_SECT_BUFFER, bp->buf_size);
 509 
 510         ckp->ckp_arg = fcfb + 1;
 511 }
 512 
 513 static void
 514 fmd_ckpt_save_event(fmd_ckpt_t *ckp, fmd_event_t *e)
 515 {
 516         fcf_event_t *fcfe = (void *)ckp->ckp_ptr;
 517         fmd_event_impl_t *ep = (fmd_event_impl_t *)e;
 518         fmd_log_t *lp = ep->ev_log;
 519 
 520         fcfe->fcfe_todsec = ep->ev_time.ftv_sec;
 521         fcfe->fcfe_todnsec = ep->ev_time.ftv_nsec;
 522         fcfe->fcfe_major = lp ? major(lp->log_stat.st_dev) : -1U;
 523         fcfe->fcfe_minor = lp ? minor(lp->log_stat.st_dev) : -1U;
 524         fcfe->fcfe_inode = lp ? lp->log_stat.st_ino : -1ULL;
 525         fcfe->fcfe_offset = ep->ev_off;
 526 
 527         ckp->ckp_ptr += sizeof (fcf_event_t);
 528 }
 529 
 530 static void
 531 fmd_ckpt_save_nvlist(fmd_ckpt_t *ckp, nvlist_t *nvl)
 532 {
 533         fcf_nvl_t *fcfn = (void *)ckp->ckp_ptr;
 534         char *nvbuf = (char *)ckp->ckp_ptr + sizeof (fcf_nvl_t);
 535         size_t nvsize = 0;
 536 
 537         (void) nvlist_size(nvl, &nvsize, NV_ENCODE_NATIVE);
 538         fcfn->fcfn_size = (uint64_t)nvsize;
 539 
 540         (void) nvlist_pack(nvl, &nvbuf, &nvsize, NV_ENCODE_NATIVE, 0);
 541         ckp->ckp_ptr += sizeof (fcf_nvl_t) + nvsize;
 542 
 543         ckp->ckp_ptr = (uchar_t *)
 544             P2ROUNDUP((uintptr_t)ckp->ckp_ptr, sizeof (uint64_t));
 545 }
 546 
 547 static void
 548 fmd_ckpt_resv_serd(fmd_serd_eng_t *sgp, fmd_ckpt_t *ckp)
 549 {
 550         fmd_ckpt_resv(ckp,
 551             sizeof (fcf_event_t) * sgp->sg_count, sizeof (uint64_t));
 552 
 553         ckp->ckp_strn += strlen(sgp->sg_name) + 1;
 554 }
 555 
 556 static void
 557 fmd_ckpt_save_serd(fmd_serd_eng_t *sgp, fmd_ckpt_t *ckp)
 558 {
 559         fcf_serd_t *fcfd = ckp->ckp_arg;
 560         fcf_secidx_t evsec = FCF_SECT_NONE;
 561         fmd_serd_elem_t *sep;
 562 
 563         if (sgp->sg_count != 0) {
 564                 evsec = fmd_ckpt_section(ckp, NULL, FCF_SECT_EVENTS,
 565                     sizeof (fcf_event_t) * sgp->sg_count);
 566 
 567                 for (sep = fmd_list_next(&sgp->sg_list);
 568                     sep != NULL; sep = fmd_list_next(sep))
 569                         fmd_ckpt_save_event(ckp, sep->se_event);
 570         }
 571 
 572         fcfd->fcfd_name = fmd_ckpt_string(ckp, sgp->sg_name);
 573         fcfd->fcfd_events = evsec;
 574         fcfd->fcfd_pad = 0;
 575         fcfd->fcfd_n = sgp->sg_n;
 576         fcfd->fcfd_t = sgp->sg_t;
 577 
 578         ckp->ckp_arg = fcfd + 1;
 579 }
 580 
 581 static void
 582 fmd_ckpt_resv_case(fmd_ckpt_t *ckp, fmd_case_t *cp)
 583 {
 584         fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
 585         fmd_case_susp_t *cis;
 586         uint_t n;
 587 
 588         if (cip->ci_xprt != NULL)
 589                 return; /* do not checkpoint cases from remote transports */
 590 
 591         n = fmd_buf_hash_count(&cip->ci_bufs);
 592         fmd_buf_hash_apply(&cip->ci_bufs, (fmd_buf_f *)fmd_ckpt_resv_buf, ckp);
 593         fmd_ckpt_resv(ckp, sizeof (fcf_buf_t) * n, sizeof (uint32_t));
 594 
 595         if (cip->ci_principal != NULL)
 596                 fmd_ckpt_resv(ckp, sizeof (fcf_event_t), sizeof (uint64_t));
 597 
 598         fmd_ckpt_resv(ckp,
 599             sizeof (fcf_event_t) * cip->ci_nitems, sizeof (uint64_t));
 600 
 601         if (cip->ci_nsuspects != 0)
 602                 ckp->ckp_size = P2ROUNDUP(ckp->ckp_size, sizeof (uint64_t));
 603 
 604         cip->ci_nvsz = 0; /* compute size of packed suspect nvlist array */
 605 
 606         for (cis = cip->ci_suspects; cis != NULL; cis = cis->cis_next) {
 607                 size_t nvsize = 0;
 608 
 609                 (void) nvlist_size(cis->cis_nvl, &nvsize, NV_ENCODE_NATIVE);
 610                 cip->ci_nvsz += sizeof (fcf_nvl_t) + nvsize;
 611                 cip->ci_nvsz = P2ROUNDUP(cip->ci_nvsz, sizeof (uint64_t));
 612         }
 613 
 614         fmd_ckpt_resv(ckp, cip->ci_nvsz, sizeof (uint64_t));
 615         fmd_ckpt_resv(ckp, sizeof (fcf_case_t), sizeof (uint32_t));
 616         ckp->ckp_strn += strlen(cip->ci_uuid) + 1;
 617 }
 618 
 619 static void
 620 fmd_ckpt_save_case(fmd_ckpt_t *ckp, fmd_case_t *cp)
 621 {
 622         fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
 623 
 624         fmd_case_item_t *cit;
 625         fmd_case_susp_t *cis;
 626         fcf_case_t fcfc;
 627         uint_t n;
 628 
 629         fcf_secidx_t bufsec = FCF_SECIDX_NONE;
 630         fcf_secidx_t evsec = FCF_SECIDX_NONE;
 631         fcf_secidx_t nvsec = FCF_SECIDX_NONE;
 632         fcf_secidx_t prsec = FCF_SECIDX_NONE;
 633 
 634         if (cip->ci_xprt != NULL)
 635                 return; /* do not checkpoint cases from remote transports */
 636 
 637         if ((n = fmd_buf_hash_count(&cip->ci_bufs)) != 0) {
 638                 size_t size = sizeof (fcf_buf_t) * n;
 639                 fcf_buf_t *bufs = ckp->ckp_arg = fmd_alloc(size, FMD_SLEEP);
 640 
 641                 fmd_buf_hash_apply(&cip->ci_bufs,
 642                     (fmd_buf_f *)fmd_ckpt_save_buf, ckp);
 643 
 644                 bufsec = fmd_ckpt_section(ckp, bufs, FCF_SECT_BUFS, size);
 645                 fmd_free(bufs, size);
 646         }
 647 
 648         if (cip->ci_principal != NULL) {
 649                 prsec = fmd_ckpt_section(ckp, NULL, FCF_SECT_EVENTS,
 650                     sizeof (fcf_event_t));
 651 
 652                 fmd_ckpt_save_event(ckp, cip->ci_principal);
 653         }
 654 
 655         if (cip->ci_nitems != 0) {
 656                 evsec = fmd_ckpt_section(ckp, NULL, FCF_SECT_EVENTS,
 657                     sizeof (fcf_event_t) * cip->ci_nitems);
 658 
 659                 for (cit = cip->ci_items; cit != NULL; cit = cit->cit_next)
 660                         fmd_ckpt_save_event(ckp, cit->cit_event);
 661         }
 662 
 663         if (cip->ci_nsuspects != 0) {
 664                 nvsec = fmd_ckpt_section(ckp, NULL,
 665                     FCF_SECT_NVLISTS, cip->ci_nvsz);
 666 
 667                 for (cis = cip->ci_suspects; cis != NULL; cis = cis->cis_next)
 668                         fmd_ckpt_save_nvlist(ckp, cis->cis_nvl);
 669         }
 670 
 671         fcfc.fcfc_uuid = fmd_ckpt_string(ckp, cip->ci_uuid);
 672         fcfc.fcfc_bufs = bufsec;
 673         fcfc.fcfc_principal = prsec;
 674         fcfc.fcfc_events = evsec;
 675         fcfc.fcfc_suspects = nvsec;
 676 
 677         switch (cip->ci_state) {
 678         case FMD_CASE_UNSOLVED:
 679                 fcfc.fcfc_state = FCF_CASE_UNSOLVED;
 680                 break;
 681         case FMD_CASE_SOLVED:
 682                 fcfc.fcfc_state = FCF_CASE_SOLVED;
 683                 break;
 684         case FMD_CASE_CLOSE_WAIT:
 685                 fcfc.fcfc_state = FCF_CASE_CLOSE_WAIT;
 686                 break;
 687         default:
 688                 fmd_panic("case %p (%s) has invalid state %u",
 689                     (void *)cp, cip->ci_uuid, cip->ci_state);
 690         }
 691 
 692         (void) fmd_ckpt_section(ckp, &fcfc, FCF_SECT_CASE, sizeof (fcf_case_t));
 693 }
 694 
 695 static void
 696 fmd_ckpt_resv_module(fmd_ckpt_t *ckp, fmd_module_t *mp)
 697 {
 698         fmd_case_t *cp;
 699         uint_t n;
 700 
 701         for (cp = fmd_list_next(&mp->mod_cases); cp; cp = fmd_list_next(cp))
 702                 fmd_ckpt_resv_case(ckp, cp);
 703 
 704         n = fmd_serd_hash_count(&mp->mod_serds);
 705         fmd_serd_hash_apply(&mp->mod_serds,
 706             (fmd_serd_eng_f *)fmd_ckpt_resv_serd, ckp);
 707         fmd_ckpt_resv(ckp, sizeof (fcf_serd_t) * n, sizeof (uint64_t));
 708 
 709         n = fmd_buf_hash_count(&mp->mod_bufs);
 710         fmd_buf_hash_apply(&mp->mod_bufs, (fmd_buf_f *)fmd_ckpt_resv_buf, ckp);
 711         fmd_ckpt_resv(ckp, sizeof (fcf_buf_t) * n, sizeof (uint32_t));
 712 
 713         fmd_ckpt_resv(ckp, sizeof (fcf_module_t), sizeof (uint32_t));
 714         ckp->ckp_strn += strlen(mp->mod_name) + 1;
 715         ckp->ckp_strn += strlen(mp->mod_path) + 1;
 716         ckp->ckp_strn += strlen(mp->mod_info->fmdi_desc) + 1;
 717         ckp->ckp_strn += strlen(mp->mod_info->fmdi_vers) + 1;
 718 }
 719 
 720 static void
 721 fmd_ckpt_save_module(fmd_ckpt_t *ckp, fmd_module_t *mp)
 722 {
 723         fcf_secidx_t bufsec = FCF_SECIDX_NONE;
 724         fcf_module_t fcfm;
 725         fmd_case_t *cp;
 726         uint_t n;
 727 
 728         for (cp = fmd_list_next(&mp->mod_cases); cp; cp = fmd_list_next(cp))
 729                 fmd_ckpt_save_case(ckp, cp);
 730 
 731         if ((n = fmd_serd_hash_count(&mp->mod_serds)) != 0) {
 732                 size_t size = sizeof (fcf_serd_t) * n;
 733                 fcf_serd_t *serds = ckp->ckp_arg = fmd_alloc(size, FMD_SLEEP);
 734 
 735                 fmd_serd_hash_apply(&mp->mod_serds,
 736                     (fmd_serd_eng_f *)fmd_ckpt_save_serd, ckp);
 737 
 738                 (void) fmd_ckpt_section(ckp, serds, FCF_SECT_SERD, size);
 739                 fmd_free(serds, size);
 740         }
 741 
 742         if ((n = fmd_buf_hash_count(&mp->mod_bufs)) != 0) {
 743                 size_t size = sizeof (fcf_buf_t) * n;
 744                 fcf_buf_t *bufs = ckp->ckp_arg = fmd_alloc(size, FMD_SLEEP);
 745 
 746                 fmd_buf_hash_apply(&mp->mod_bufs,
 747                     (fmd_buf_f *)fmd_ckpt_save_buf, ckp);
 748 
 749                 bufsec = fmd_ckpt_section(ckp, bufs, FCF_SECT_BUFS, size);
 750                 fmd_free(bufs, size);
 751         }
 752 
 753         fcfm.fcfm_name = fmd_ckpt_string(ckp, mp->mod_name);
 754         fcfm.fcfm_path = fmd_ckpt_string(ckp, mp->mod_path);
 755         fcfm.fcfm_desc = fmd_ckpt_string(ckp, mp->mod_info->fmdi_desc);
 756         fcfm.fcfm_vers = fmd_ckpt_string(ckp, mp->mod_info->fmdi_vers);
 757         fcfm.fcfm_bufs = bufsec;
 758 
 759         (void) fmd_ckpt_section(ckp, &fcfm,
 760             FCF_SECT_MODULE, sizeof (fcf_module_t));
 761 }
 762 
 763 void
 764 fmd_ckpt_save(fmd_module_t *mp)
 765 {
 766         struct stat64 st;
 767         char path[PATH_MAX];
 768         mode_t dirmode;
 769 
 770         hrtime_t now = gethrtime();
 771         fmd_ckpt_t ckp;
 772         int err;
 773 
 774         ASSERT(fmd_module_locked(mp));
 775 
 776         /*
 777          * If checkpointing is disabled for the module, just return.  We must
 778          * commit the module state anyway to transition pending log events.
 779          */
 780         if (mp->mod_stats->ms_ckpt_save.fmds_value.bool == FMD_B_FALSE) {
 781                 fmd_module_commit(mp);
 782                 return;
 783         }
 784 
 785         if (!(mp->mod_flags & (FMD_MOD_MDIRTY | FMD_MOD_CDIRTY)))
 786                 return; /* no checkpoint is necessary for this module */
 787 
 788         TRACE((FMD_DBG_CKPT, "ckpt save begin %s %llu",
 789             mp->mod_name, mp->mod_gen + 1));
 790 
 791         /*
 792          * If the per-module checkpoint directory isn't found or isn't of type
 793          * directory, move aside whatever is there (if anything) and attempt
 794          * to mkdir(2) a new module checkpoint directory.  If this fails, we
 795          * have no choice but to abort the checkpoint and try again later.
 796          */
 797         if (stat64(mp->mod_ckpt, &st) != 0 || !S_ISDIR(st.st_mode)) {
 798                 (void) snprintf(path, sizeof (path), "%s-", mp->mod_ckpt);
 799                 (void) rename(mp->mod_ckpt, path);
 800                 (void) fmd_conf_getprop(fmd.d_conf, "ckpt.dirmode", &dirmode);
 801 
 802                 if (mkdir(mp->mod_ckpt, dirmode) != 0) {
 803                         fmd_error(EFMD_CKPT_MKDIR,
 804                             "failed to mkdir %s", mp->mod_ckpt);
 805                         return; /* return without clearing dirty bits */
 806                 }
 807         }
 808 
 809         /*
 810          * Create a temporary file to write out the checkpoint into, and create
 811          * a fmd_ckpt_t structure to manage construction of the checkpoint.  We
 812          * then figure out how much space will be required, and allocate it.
 813          */
 814         if (fmd_ckpt_create(&ckp, mp) == -1) {
 815                 fmd_error(EFMD_CKPT_CREATE, "failed to create %s", ckp.ckp_src);
 816                 return;
 817         }
 818 
 819         fmd_ckpt_resv_module(&ckp, mp);
 820 
 821         if (fmd_ckpt_alloc(&ckp, mp->mod_gen + 1) != 0) {
 822                 fmd_error(EFMD_CKPT_NOMEM, "failed to build %s", ckp.ckp_src);
 823                 fmd_ckpt_destroy(&ckp);
 824                 return;
 825         }
 826 
 827         /*
 828          * Fill in the checkpoint content, write it to disk, sync it, and then
 829          * atomically rename it to the destination path.  If this fails, we
 830          * have no choice but to leave all our dirty bits set and return.
 831          */
 832         fmd_ckpt_save_module(&ckp, mp);
 833         err = fmd_ckpt_commit(&ckp);
 834         fmd_ckpt_destroy(&ckp);
 835 
 836         if (err != 0) {
 837                 fmd_error(EFMD_CKPT_COMMIT, "failed to commit %s", ckp.ckp_dst);
 838                 return; /* return without clearing dirty bits */
 839         }
 840 
 841         fmd_module_commit(mp);
 842         TRACE((FMD_DBG_CKPT, "ckpt save end %s", mp->mod_name));
 843 
 844         mp->mod_stats->ms_ckpt_cnt.fmds_value.ui64++;
 845         mp->mod_stats->ms_ckpt_time.fmds_value.ui64 += gethrtime() - now;
 846 
 847         fmd_dprintf(FMD_DBG_CKPT, "saved checkpoint of %s (%llu)\n",
 848             mp->mod_name, mp->mod_gen);
 849 }
 850 
 851 /*
 852  * Utility function to retrieve a pointer to a section's header and verify that
 853  * it is of the expected type or it is a FCF_SECT_NONE reference.
 854  */
 855 static const fcf_sec_t *
 856 fmd_ckpt_secptr(fmd_ckpt_t *ckp, fcf_secidx_t sid, uint_t type)
 857 {
 858         const fcf_sec_t *sp = (void *)(ckp->ckp_buf +
 859             ckp->ckp_hdr->fcfh_secoff + ckp->ckp_hdr->fcfh_secsize * sid);
 860 
 861         return (sid < ckp->ckp_secs && (sp->fcfs_type == type ||
 862             sp->fcfs_type == FCF_SECT_NONE) ? sp : NULL);
 863 }
 864 
 865 /*
 866  * Utility function to retrieve the data pointer for a particular section.  The
 867  * validity of the header values has already been checked by fmd_ckpt_open().
 868  */
 869 static const void *
 870 fmd_ckpt_dataptr(fmd_ckpt_t *ckp, const fcf_sec_t *sp)
 871 {
 872         return (ckp->ckp_buf + sp->fcfs_offset);
 873 }
 874 
 875 /*
 876  * Utility function to retrieve the end of the data region for a particular
 877  * section.  The validity of this value has been confirmed by fmd_ckpt_open().
 878  */
 879 static const void *
 880 fmd_ckpt_datalim(fmd_ckpt_t *ckp, const fcf_sec_t *sp)
 881 {
 882         return (ckp->ckp_buf + sp->fcfs_offset + sp->fcfs_size);
 883 }
 884 
 885 /*
 886  * Utility function to retrieve a string pointer (fcf_stridx_t).  If the string
 887  * index is valid, the string data is returned; otherwise 'defstr' is returned.
 888  */
 889 static const char *
 890 fmd_ckpt_strptr(fmd_ckpt_t *ckp, fcf_stridx_t sid, const char *defstr)
 891 {
 892         return (sid < ckp->ckp_strn ? ckp->ckp_strs + sid : defstr);
 893 }
 894 
 895 static void
 896 fmd_ckpt_restore_events(fmd_ckpt_t *ckp, fcf_secidx_t sid,
 897     void (*func)(void *, fmd_event_t *), void *arg)
 898 {
 899         const fcf_event_t *fcfe;
 900         const fcf_sec_t *sp;
 901         fmd_timeval_t ftv;
 902         fmd_log_t *lp, *errlp;
 903         uint_t i, n;
 904         uint32_t e_maj, e_min;
 905         uint64_t e_ino;
 906 
 907         if ((sp = fmd_ckpt_secptr(ckp, sid, FCF_SECT_EVENTS)) == NULL) {
 908                 fmd_ckpt_error(ckp, EFMD_CKPT_INVAL,
 909                     "invalid link to section %u: expected events\n", sid);
 910         }
 911 
 912         if (sp->fcfs_size == 0)
 913                 return; /* empty events section or type none */
 914 
 915         fcfe = fmd_ckpt_dataptr(ckp, sp);
 916         n = sp->fcfs_size / sp->fcfs_entsize;
 917 
 918         /*
 919          * Hold the reader lock on log pointers to block log rotation during
 920          * the section restore so that we can safely insert refs to d_errlog.
 921          */
 922         (void) pthread_rwlock_rdlock(&fmd.d_log_lock);
 923         errlp = fmd.d_errlog;
 924 
 925         e_maj = major(errlp->log_stat.st_dev);
 926         e_min = minor(errlp->log_stat.st_dev);
 927         e_ino = errlp->log_stat.st_ino;
 928 
 929         for (i = 0; i < n; i++) {
 930                 fmd_event_t *ep;
 931 
 932                 ftv.ftv_sec = fcfe->fcfe_todsec;
 933                 ftv.ftv_nsec = fcfe->fcfe_todnsec;
 934 
 935                 if (e_ino == fcfe->fcfe_inode &&
 936                     e_maj == fcfe->fcfe_major &&
 937                     e_min == fcfe->fcfe_minor)
 938                         lp = errlp;
 939                 else
 940                         lp = NULL;
 941 
 942                 ep = fmd_event_recreate(FMD_EVT_PROTOCOL,
 943                     &ftv, NULL, NULL, lp, fcfe->fcfe_offset, 0);
 944                 fmd_event_hold(ep);
 945                 func(arg, ep);
 946                 fmd_event_rele(ep);
 947 
 948                 fcfe = (fcf_event_t *)((uintptr_t)fcfe + sp->fcfs_entsize);
 949         }
 950 
 951         (void) pthread_rwlock_unlock(&fmd.d_log_lock);
 952 }
 953 
 954 static int
 955 fmd_ckpt_restore_suspects(fmd_ckpt_t *ckp, fmd_case_t *cp, fcf_secidx_t sid)
 956 {
 957         const fcf_nvl_t *fcfn, *endn;
 958         const fcf_sec_t *sp;
 959         nvlist_t *nvl;
 960         int err, i;
 961 
 962         if ((sp = fmd_ckpt_secptr(ckp, sid, FCF_SECT_NVLISTS)) == NULL) {
 963                 fmd_ckpt_error(ckp, EFMD_CKPT_INVAL,
 964                     "invalid link to section %u: expected nvlists\n", sid);
 965         }
 966 
 967         fcfn = fmd_ckpt_dataptr(ckp, sp);
 968         endn = fmd_ckpt_datalim(ckp, sp);
 969 
 970         for (i = 0; fcfn < endn; i++) {
 971                 char *data = (char *)fcfn + sp->fcfs_entsize;
 972                 size_t size = (size_t)fcfn->fcfn_size;
 973 
 974                 if (fcfn->fcfn_size > (size_t)((char *)endn - data)) {
 975                         fmd_ckpt_error(ckp, EFMD_CKPT_INVAL, "nvlist %u [%d] "
 976                             "size %u exceeds buffer\n", sid, i, size);
 977                 }
 978 
 979                 if ((err = nvlist_xunpack(data, size, &nvl, &fmd.d_nva)) != 0) {
 980                         fmd_ckpt_error(ckp, EFMD_CKPT_INVAL, "failed to "
 981                             "unpack nvlist %u [%d]: %s\n", sid, i,
 982                             fmd_strerror(err));
 983                 }
 984 
 985                 fmd_case_insert_suspect(cp, nvl);
 986 
 987                 size = sp->fcfs_entsize + fcfn->fcfn_size;
 988                 size = P2ROUNDUP(size, sizeof (uint64_t));
 989                 fcfn = (fcf_nvl_t *)((uintptr_t)fcfn + size);
 990         }
 991 
 992         return (i);
 993 }
 994 
 995 static void
 996 fmd_ckpt_restore_bufs(fmd_ckpt_t *ckp, fmd_module_t *mp,
 997     fmd_case_t *cp, fcf_secidx_t sid)
 998 {
 999         const fcf_sec_t *sp, *dsp;
1000         const fcf_buf_t *fcfb;
1001         uint_t i, n;
1002 
1003         if ((sp = fmd_ckpt_secptr(ckp, sid, FCF_SECT_BUFS)) == NULL) {
1004                 fmd_ckpt_error(ckp, EFMD_CKPT_INVAL,
1005                     "invalid link to section %u: expected bufs\n", sid);
1006         }
1007 
1008         if (sp->fcfs_size == 0)
1009                 return; /* empty events section or type none */
1010 
1011         fcfb = fmd_ckpt_dataptr(ckp, sp);
1012         n = sp->fcfs_size / sp->fcfs_entsize;
1013 
1014         for (i = 0; i < n; i++) {
1015                 dsp = fmd_ckpt_secptr(ckp, fcfb->fcfb_data, FCF_SECT_BUFFER);
1016 
1017                 if (dsp == NULL) {
1018                         fmd_ckpt_error(ckp, EFMD_CKPT_INVAL, "invalid %u "
1019                             "buffer link %u\n", sid, fcfb->fcfb_data);
1020                 }
1021 
1022                 fmd_buf_write((fmd_hdl_t *)mp, cp,
1023                     fmd_ckpt_strptr(ckp, fcfb->fcfb_name, "<CORRUPT>"),
1024                     ckp->ckp_buf + dsp->fcfs_offset, dsp->fcfs_size);
1025 
1026                 fcfb = (fcf_buf_t *)((uintptr_t)fcfb + sp->fcfs_entsize);
1027         }
1028 }
1029 
1030 static void
1031 fmd_ckpt_restore_case(fmd_ckpt_t *ckp, fmd_module_t *mp, const fcf_sec_t *sp)
1032 {
1033         const fcf_case_t *fcfc = fmd_ckpt_dataptr(ckp, sp);
1034         const char *uuid = fmd_ckpt_strptr(ckp, fcfc->fcfc_uuid, NULL);
1035         fmd_case_t *cp;
1036         int n;
1037 
1038         if (uuid == NULL || fcfc->fcfc_state > FCF_CASE_CLOSE_WAIT) {
1039                 fmd_ckpt_error(ckp, EFMD_CKPT_INVAL, "corrupt %u case uuid "
1040                     "and/or state\n", (uint_t)(sp - ckp->ckp_secp));
1041         }
1042 
1043         fmd_module_lock(mp);
1044 
1045         if ((cp = fmd_case_recreate(mp, NULL,
1046             fcfc->fcfc_state != FCF_CASE_UNSOLVED ? FCF_CASE_SOLVED :
1047             FMD_CASE_UNSOLVED, uuid, NULL)) == NULL) {
1048                 fmd_ckpt_error(ckp, EFMD_CKPT_INVAL,
1049                     "duplicate case uuid: %s\n", uuid);
1050         }
1051 
1052         fmd_ckpt_restore_events(ckp, fcfc->fcfc_principal,
1053             (void (*)(void *, fmd_event_t *))fmd_case_insert_principal, cp);
1054 
1055         fmd_ckpt_restore_events(ckp, fcfc->fcfc_events,
1056             (void (*)(void *, fmd_event_t *))fmd_case_insert_event, cp);
1057 
1058         /*
1059          * Once solved, treat suspects from resource cache as master copy.
1060          *
1061          * If !fmd.d_running, this module must be a builtin, and so we don't
1062          * want to restore suspects or call fmd_case_transition_update() at this
1063          * stage. The suspects will be added later from the resource cache.
1064          * Calling fmd_case_transition("SOLVED") is OK here as the state is
1065          * already solved, so all it does is update the case flags.
1066          */
1067         if (fmd.d_running && (n = ((fmd_case_impl_t *)cp)->ci_nsuspects) == 0)
1068                 n = fmd_ckpt_restore_suspects(ckp, cp, fcfc->fcfc_suspects);
1069 
1070         if (!fmd.d_running)
1071                 fmd_case_transition(cp, FMD_CASE_SOLVED, FMD_CF_SOLVED);
1072         else if (fcfc->fcfc_state == FCF_CASE_SOLVED)
1073                 fmd_case_transition_update(cp, FMD_CASE_SOLVED, FMD_CF_SOLVED);
1074         else if (fcfc->fcfc_state == FCF_CASE_CLOSE_WAIT && n != 0)
1075                 fmd_case_transition(cp, FMD_CASE_CLOSE_WAIT, FMD_CF_SOLVED);
1076         else if (fcfc->fcfc_state == FCF_CASE_CLOSE_WAIT && n == 0)
1077                 fmd_case_transition(cp, FMD_CASE_CLOSE_WAIT, 0);
1078 
1079         fmd_module_unlock(mp);
1080         fmd_ckpt_restore_bufs(ckp, mp, cp, fcfc->fcfc_bufs);
1081 }
1082 
1083 static void
1084 fmd_ckpt_restore_serd(fmd_ckpt_t *ckp, fmd_module_t *mp, const fcf_sec_t *sp)
1085 {
1086         const fcf_serd_t *fcfd = fmd_ckpt_dataptr(ckp, sp);
1087         uint_t i, n = sp->fcfs_size / sp->fcfs_entsize;
1088         const fcf_sec_t *esp;
1089         const char *s;
1090 
1091         for (i = 0; i < n; i++) {
1092                 esp = fmd_ckpt_secptr(ckp, fcfd->fcfd_events, FCF_SECT_EVENTS);
1093 
1094                 if (esp == NULL) {
1095                         fmd_ckpt_error(ckp, EFMD_CKPT_INVAL,
1096                             "invalid events link %u\n", fcfd->fcfd_events);
1097                 }
1098 
1099                 if ((s = fmd_ckpt_strptr(ckp, fcfd->fcfd_name, NULL)) == NULL) {
1100                         fmd_ckpt_error(ckp, EFMD_CKPT_INVAL,
1101                             "serd name %u is corrupt\n", fcfd->fcfd_name);
1102                 }
1103 
1104                 fmd_serd_create((fmd_hdl_t *)mp, s, fcfd->fcfd_n, fcfd->fcfd_t);
1105                 fmd_module_lock(mp);
1106 
1107                 fmd_ckpt_restore_events(ckp, fcfd->fcfd_events,
1108                     (void (*)(void *, fmd_event_t *))fmd_serd_eng_record,
1109                     fmd_serd_eng_lookup(&mp->mod_serds, s));
1110 
1111                 fmd_module_unlock(mp);
1112                 fcfd = (fcf_serd_t *)((uintptr_t)fcfd + sp->fcfs_entsize);
1113         }
1114 }
1115 
1116 static void
1117 fmd_ckpt_restore_module(fmd_ckpt_t *ckp, fmd_module_t *mp)
1118 {
1119         const fcf_module_t *fcfm = fmd_ckpt_dataptr(ckp, ckp->ckp_modp);
1120         const fcf_sec_t *sp;
1121         uint_t i;
1122 
1123         if (strcmp(mp->mod_name, fmd_ckpt_strptr(ckp, fcfm->fcfm_name, "")) ||
1124             strcmp(mp->mod_path, fmd_ckpt_strptr(ckp, fcfm->fcfm_path, ""))) {
1125                 fmd_ckpt_error(ckp, EFMD_CKPT_INVAL,
1126                     "checkpoint is not for module %s\n", mp->mod_name);
1127         }
1128 
1129         for (i = 0; i < ckp->ckp_secs; i++) {
1130                 sp = (void *)(ckp->ckp_buf +
1131                     ckp->ckp_hdr->fcfh_secoff + ckp->ckp_hdr->fcfh_secsize * i);
1132 
1133                 switch (sp->fcfs_type) {
1134                 case FCF_SECT_CASE:
1135                         fmd_ckpt_restore_case(ckp, mp, sp);
1136                         break;
1137                 case FCF_SECT_SERD:
1138                         fmd_ckpt_restore_serd(ckp, mp, sp);
1139                         break;
1140                 }
1141         }
1142 
1143         fmd_ckpt_restore_bufs(ckp, mp, NULL, fcfm->fcfm_bufs);
1144         mp->mod_gen = ckp->ckp_hdr->fcfh_cgen;
1145 }
1146 
1147 /*
1148  * Restore a checkpoint for the specified module.  Any errors which occur
1149  * during restore will call fmd_ckpt_error() or trigger an fmd_api_error(),
1150  * either of which will automatically unlock the module and trigger an abort.
1151  */
1152 void
1153 fmd_ckpt_restore(fmd_module_t *mp)
1154 {
1155         fmd_ckpt_t ckp;
1156 
1157         if (mp->mod_stats->ms_ckpt_restore.fmds_value.bool == FMD_B_FALSE)
1158                 return; /* never restore checkpoints for this module */
1159 
1160         TRACE((FMD_DBG_CKPT, "ckpt restore begin %s", mp->mod_name));
1161 
1162         if (fmd_ckpt_open(&ckp, mp) == -1) {
1163                 if (errno != ENOENT)
1164                         fmd_error(EFMD_CKPT_OPEN, "can't open %s", ckp.ckp_src);
1165                 TRACE((FMD_DBG_CKPT, "ckpt restore end %s", mp->mod_name));
1166                 return;
1167         }
1168 
1169         ASSERT(!fmd_module_locked(mp));
1170         fmd_ckpt_restore_module(&ckp, mp);
1171         fmd_ckpt_destroy(&ckp);
1172         fmd_module_clrdirty(mp);
1173 
1174         TRACE((FMD_DBG_CKPT, "ckpt restore end %s", mp->mod_name));
1175         fmd_dprintf(FMD_DBG_CKPT, "restored checkpoint of %s\n", mp->mod_name);
1176 }
1177 
1178 /*
1179  * Delete the module's checkpoint file.  This is used by the ckpt.zero property
1180  * code or by the fmadm reset RPC service path to force a checkpoint delete.
1181  */
1182 void
1183 fmd_ckpt_delete(fmd_module_t *mp)
1184 {
1185         char path[PATH_MAX];
1186 
1187         (void) snprintf(path, sizeof (path),
1188             "%s/%s", mp->mod_ckpt, mp->mod_name);
1189 
1190         TRACE((FMD_DBG_CKPT, "delete %s ckpt", mp->mod_name));
1191 
1192         if (unlink(path) != 0 && errno != ENOENT)
1193                 fmd_error(EFMD_CKPT_DELETE, "failed to delete %s", path);
1194 }
1195 
1196 /*
1197  * Move aside the module's checkpoint file if checkpoint restore has failed.
1198  * We rename the file rather than deleting it in the hopes that someone might
1199  * send it to us for post-mortem analysis of whether we have a checkpoint bug.
1200  */
1201 void
1202 fmd_ckpt_rename(fmd_module_t *mp)
1203 {
1204         char src[PATH_MAX], dst[PATH_MAX];
1205 
1206         (void) snprintf(src, sizeof (src), "%s/%s", mp->mod_ckpt, mp->mod_name);
1207         (void) snprintf(dst, sizeof (dst), "%s-", src);
1208 
1209         TRACE((FMD_DBG_CKPT, "rename %s ckpt", mp->mod_name));
1210 
1211         if (rename(src, dst) != 0 && errno != ENOENT)
1212                 fmd_error(EFMD_CKPT_DELETE, "failed to rename %s", src);
1213 }