1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
  24  */
  25 
  26 #include <sys/fm/protocol.h>
  27 
  28 #include <fmd_api.h>
  29 #include <fmd_subr.h>
  30 #include <fmd_string.h>
  31 #include <fmd_protocol.h>
  32 #include <fmd_module.h>
  33 #include <fmd_error.h>
  34 
  35 static struct {
  36         fmd_stat_t nosub;
  37         fmd_stat_t module;
  38 } self_stats = {
  39         { "nosub", FMD_TYPE_UINT64, "event classes with no subscribers seen" },
  40         { "module", FMD_TYPE_UINT64, "error events received from fmd modules" },
  41 };
  42 
  43 typedef struct self_case {
  44         enum { SC_CLASS, SC_MODULE } sc_kind;
  45         char *sc_name;
  46 } self_case_t;
  47 
  48 static self_case_t *
  49 self_case_create(fmd_hdl_t *hdl, int kind, const char *name)
  50 {
  51         self_case_t *scp = fmd_hdl_alloc(hdl, sizeof (self_case_t), FMD_SLEEP);
  52 
  53         scp->sc_kind = kind;
  54         scp->sc_name = fmd_hdl_strdup(hdl, name, FMD_SLEEP);
  55 
  56         return (scp);
  57 }
  58 
  59 static void
  60 self_case_destroy(fmd_hdl_t *hdl, self_case_t *scp)
  61 {
  62         fmd_hdl_strfree(hdl, scp->sc_name);
  63         fmd_hdl_free(hdl, scp, sizeof (self_case_t));
  64 }
  65 
  66 static fmd_case_t *
  67 self_case_lookup(fmd_hdl_t *hdl, int kind, const char *name)
  68 {
  69         fmd_case_t *cp = NULL;
  70 
  71         while ((cp = fmd_case_next(hdl, cp)) != NULL) {
  72                 self_case_t *scp = fmd_case_getspecific(hdl, cp);
  73                 if (scp->sc_kind == kind && strcmp(scp->sc_name, name) == 0)
  74                         break;
  75         }
  76 
  77         return (cp);
  78 }
  79 
  80 /*ARGSUSED*/
  81 static void
  82 self_recv(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, const char *class)
  83 {
  84         fmd_case_t *cp;
  85         nvlist_t *flt, *mod;
  86         char *name;
  87         int err = 0;
  88 
  89         /*
  90          * If we get an error report from another fmd module, then create a
  91          * case for the module and add the ereport to it.  The error is either
  92          * from fmd_hdl_error() or from fmd_api_error().  If it is the latter,
  93          * fmd_module_error() will send another event of class EFMD_MOD_FAIL
  94          * when the module has failed, at which point we can solve the case.
  95          * We can also close the case on EFMD_MOD_CONF (bad config file).
  96          */
  97         if (strcmp(class, fmd_errclass(EFMD_MODULE)) == 0 &&
  98             nvlist_lookup_nvlist(nvl, FM_EREPORT_DETECTOR, &mod) == 0 &&
  99             nvlist_lookup_string(mod, FM_FMRI_FMD_NAME, &name) == 0) {
 100 
 101                 if ((cp = self_case_lookup(hdl, SC_MODULE, name)) == NULL) {
 102                         cp = fmd_case_open(hdl,
 103                             self_case_create(hdl, SC_MODULE, name));
 104                 }
 105 
 106                 fmd_case_add_ereport(hdl, cp, ep);
 107                 self_stats.module.fmds_value.ui64++;
 108                 (void) nvlist_lookup_int32(nvl, FMD_ERR_MOD_ERRNO, &err);
 109 
 110                 if (err != EFMD_MOD_FAIL && err != EFMD_MOD_CONF)
 111                         return; /* module is still active, so keep case open */
 112 
 113                 if (fmd_case_solved(hdl, cp))
 114                         return; /* case is already closed but error in _fini */
 115 
 116                 class = err == EFMD_MOD_FAIL ? FMD_FLT_MOD : FMD_FLT_CONF;
 117                 flt = fmd_protocol_fault(class, 100, mod, NULL, NULL, NULL);
 118 
 119                 fmd_case_add_suspect(hdl, cp, flt);
 120                 fmd_case_solve(hdl, cp);
 121 
 122                 return;
 123         }
 124 
 125         /*
 126          * If we get an I/O DDI ereport, drop it for now until the I/O DE is
 127          * implemented and integrated.  Existing drivers in O/N have bugs that
 128          * will trigger these and we don't want this producing FMD_FLT_NOSUB.
 129          */
 130         if (strncmp(class, "ereport.io.ddi.", strlen("ereport.io.ddi.")) == 0)
 131                 return; /* if we got a DDI ereport, drop it for now */
 132 
 133         /*
 134          * If we get any other type of event then it is of a class for which
 135          * there are no subscribers.  Some of these correspond to internal fmd
 136          * errors, which we ignore.  Otherwise we keep one case per class and
 137          * use it to produce a message indicating that something is awry.
 138          */
 139         if (strcmp(class, FM_LIST_SUSPECT_CLASS) == 0 ||
 140             strcmp(class, FM_LIST_ISOLATED_CLASS) == 0 ||
 141             strcmp(class, FM_LIST_UPDATED_CLASS) == 0 ||
 142             strcmp(class, FM_LIST_RESOLVED_CLASS) == 0 ||
 143             strcmp(class, FM_LIST_REPAIRED_CLASS) == 0 ||
 144             strncmp(class, FM_FAULT_CLASS, strlen(FM_FAULT_CLASS)) == 0 ||
 145             strncmp(class, FM_DEFECT_CLASS, strlen(FM_DEFECT_CLASS)) == 0)
 146                 return; /* if no agents are present just drop list.* */
 147 
 148         if (strncmp(class, FMD_ERR_CLASS, FMD_ERR_CLASS_LEN) == 0)
 149                 return; /* if fmd itself produced the error just drop it */
 150 
 151         if (strncmp(class, FMD_RSRC_CLASS, FMD_RSRC_CLASS_LEN) == 0)
 152                 return; /* if fmd itself produced the event just drop it */
 153 
 154         if (strncmp(class, SYSEVENT_RSRC_CLASS, SYSEVENT_RSRC_CLASS_LEN) == 0)
 155                 return; /* sysvent resources are auto generated by fmd */
 156 
 157         if (self_case_lookup(hdl, SC_CLASS, class) != NULL)
 158                 return; /* case is already open against this class */
 159 
 160         if (strncmp(class, FM_IREPORT_CLASS ".",
 161             sizeof (FM_IREPORT_CLASS)) == 0)
 162                 return; /* no subscriber required for ireport.* */
 163 
 164         cp = fmd_case_open(hdl, self_case_create(hdl, SC_CLASS, class));
 165         fmd_case_add_ereport(hdl, cp, ep);
 166         self_stats.nosub.fmds_value.ui64++;
 167 
 168         flt = fmd_protocol_fault(FMD_FLT_NOSUB, 100, NULL, NULL, NULL, NULL);
 169         (void) nvlist_add_string(flt, "nosub_class", class);
 170         fmd_case_add_suspect(hdl, cp, flt);
 171         fmd_case_solve(hdl, cp);
 172 }
 173 
 174 static void
 175 self_close(fmd_hdl_t *hdl, fmd_case_t *cp)
 176 {
 177         self_case_destroy(hdl, fmd_case_getspecific(hdl, cp));
 178 }
 179 
 180 static const fmd_hdl_ops_t self_ops = {
 181         self_recv,      /* fmdo_recv */
 182         NULL,           /* fmdo_timeout */
 183         self_close,     /* fmdo_close */
 184         NULL,           /* fmdo_stats */
 185         NULL,           /* fmdo_gc */
 186 };
 187 
 188 void
 189 self_init(fmd_hdl_t *hdl)
 190 {
 191         fmd_module_t *mp = (fmd_module_t *)hdl; /* see below */
 192 
 193         fmd_hdl_info_t info = {
 194             "Fault Manager Self-Diagnosis", "1.0", &self_ops, NULL
 195         };
 196 
 197         /*
 198          * Unlike other modules, fmd-self-diagnosis has some special needs that
 199          * fall outside of what we want in the module API.  Manually disable
 200          * checkpointing for this module by tweaking the mod_stats values.
 201          * The self-diagnosis world relates to fmd's running state and modules
 202          * which all change when it restarts, so don't bother w/ checkpointing.
 203          */
 204         (void) pthread_mutex_lock(&mp->mod_stats_lock);
 205         mp->mod_stats->ms_ckpt_save.fmds_value.bool = FMD_B_FALSE;
 206         mp->mod_stats->ms_ckpt_restore.fmds_value.bool = FMD_B_FALSE;
 207         (void) pthread_mutex_unlock(&mp->mod_stats_lock);
 208 
 209         if (fmd_hdl_register(hdl, FMD_API_VERSION, &info) != 0)
 210                 return; /* failed to register with fmd */
 211 
 212         (void) fmd_stat_create(hdl, FMD_STAT_NOALLOC, sizeof (self_stats) /
 213             sizeof (fmd_stat_t), (fmd_stat_t *)&self_stats);
 214 }
 215 
 216 void
 217 self_fini(fmd_hdl_t *hdl)
 218 {
 219         fmd_case_t *cp = NULL;
 220 
 221         while ((cp = fmd_case_next(hdl, cp)) != NULL)
 222                 self_case_destroy(hdl, fmd_case_getspecific(hdl, cp));
 223 }