big-one Wdiff usr/src/cmd/fm/modules/common/disk-transport/disk_transport.c

Print this page

NEX-3166 need to add FMA events for SSD lifespan
Reviewed by: Jeffry Molanus <jeffry.molanus@nexenta.com>
Reviewed by: Sanjay Nadkarni <sanjay.nadkarni@nexenta.com>
Reviewed by: Yuri Pankov <yuri.pankov@nexenta.com>
Reviewed by: Saso Kiselkov <saso.kiselkov@nexenta.com>
OS-119 use disk sense data to trigger over-temp fault

Split	Close
Expand all
Collapse all

          --- old/usr/src/cmd/fm/modules/common/disk-transport/disk_transport.c
          +++ new/usr/src/cmd/fm/modules/common/disk-transport/disk_transport.c

   1    1  /*
   2    2   * CDDL HEADER START
   3    3   *
   4    4   * The contents of this file are subject to the terms of the
   5    5   * Common Development and Distribution License (the "License").
   6    6   * You may not use this file except in compliance with the License.
   7    7   *
   8    8   * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9    9   * or http://www.opensolaris.org/os/licensing.
  10   10   * See the License for the specific language governing permissions
  11   11   * and limitations under the License.
  12   12   *
  13   13   * When distributing Covered Code, include this CDDL HEADER in each
  14   14   * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15   15   * If applicable, add the following below this CDDL HEADER, with the
  16   16   * fields enclosed by brackets "[]" replaced with your own identifying
  17   17   * information: Portions Copyright [yyyy] [name of copyright owner]
  18   18   *
  19   19   * CDDL HEADER END
  20   20   */
  21   21  /*
  22   22   * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
  23   23   * Use is subject to license terms.
  24   24   * Copyright 2016 Nexenta Systems, Inc.  All rights reserved.
  25   25   */
  26   26  
  27   27  /*
  28   28   * Disk error transport module
  29   29   *
  30   30   * This transport module is responsible for translating between disk errors
  31   31   * and FMA ereports.  It is a read-only transport module, and checks for the
  32   32   * following failures:
  33   33   *
  34   34   *      - overtemp
  35   35   *      - predictive failure
  36   36   *      - self-test failure
  37   37   *      - solid state media wearout
  38   38   *
  39   39   * These failures are detected via the TOPO_METH_DISK_STATUS method, which
  40   40   * leverages libdiskstatus to do the actual analysis.  This transport module is
  41   41   * in charge of the following tasks:
  42   42   *
  43   43   *      - discovering available devices
  44   44   *      - periodically checking devices
  45   45   *      - managing device addition/removal
  46   46   */
  47   47  
  48   48  #include <ctype.h>
  49   49  #include <fm/fmd_api.h>
  50   50  #include <fm/libdiskstatus.h>
  51   51  #include <fm/libtopo.h>
  52   52  #include <fm/topo_hc.h>
  53   53  #include <fm/topo_mod.h>
  54   54  #include <limits.h>
  55   55  #include <string.h>
  56   56  #include <sys/fm/io/scsi.h>
  57   57  #include <sys/fm/protocol.h>
  58   58  
  59   59  static struct dt_stat {
  60   60          fmd_stat_t dropped;
  61   61  } dt_stats = {
  62   62          { "dropped", FMD_TYPE_UINT64, "number of dropped ereports" }
  63   63  };
  64   64  
  65   65  typedef struct disk_monitor {
  66   66          fmd_hdl_t       *dm_hdl;
  67   67          fmd_xprt_t      *dm_xprt;
  68   68          id_t            dm_timer;
  69   69          hrtime_t        dm_interval;
  70   70          char            *dm_sim_search;
  71   71          char            *dm_sim_file;
  72   72          boolean_t       dm_timer_istopo;
  73   73  } disk_monitor_t;
  74   74  
  75   75  static void
  76   76  dt_post_ereport(fmd_hdl_t *hdl, fmd_xprt_t *xprt, const char *protocol,
  77   77      const char *faultname, uint64_t ena, nvlist_t *detector, nvlist_t *payload)
  78   78  {
  79   79          nvlist_t *nvl;
  80   80          int e = 0;
  81   81          char fullclass[PATH_MAX];
  82   82  
  83   83          (void) snprintf(fullclass, sizeof (fullclass), "%s.io.%s.disk.%s",
  84   84              FM_EREPORT_CLASS, protocol, faultname);
  85   85  
  86   86          if (nvlist_alloc(&nvl, NV_UNIQUE_NAME, 0) == 0) {
  87   87                  e |= nvlist_add_string(nvl, FM_CLASS, fullclass);
  88   88                  e |= nvlist_add_uint8(nvl, FM_VERSION, FM_EREPORT_VERSION);
  89   89                  e |= nvlist_add_uint64(nvl, FM_EREPORT_ENA, ena);
  90   90                  e |= nvlist_add_nvlist(nvl, FM_EREPORT_DETECTOR, detector);
  91   91                  e |= nvlist_merge(nvl, payload, 0);
  92   92  
  93   93                  if (e == 0) {
  94   94                          fmd_xprt_post(hdl, xprt, nvl, 0);
  95   95                  } else {
  96   96                          nvlist_free(nvl);
  97   97                          dt_stats.dropped.fmds_value.ui64++;
  98   98                  }
  99   99          } else {
 100  100                  dt_stats.dropped.fmds_value.ui64++;
 101  101          }
 102  102  }
 103  103  
 104  104  /*
 105  105   * Check a single topo node for failure.  This simply invokes the disk status
 106  106   * method, and generates any ereports as necessary.
 107  107   */
 108  108  static int
 109  109  dt_analyze_disk(topo_hdl_t *thp, tnode_t *node, void *arg)
 110  110  {
 111  111          nvlist_t *result;
 112  112          nvlist_t *fmri, *faults;
 113  113          char *protocol;
 114  114          int err;
 115  115          disk_monitor_t *dmp = arg;
 116  116          nvpair_t *elem;
 117  117          boolean_t fault;
 118  118          nvlist_t *details;
 119  119          char *fmristr;
 120  120          nvlist_t *in = NULL;
 121  121  
 122  122          if (topo_node_resource(node, &fmri, &err) != 0) {
 123  123                  fmd_hdl_error(dmp->dm_hdl, "failed to get fmri: %s\n",
 124  124                      topo_strerror(err));
 125  125                  return (TOPO_WALK_ERR);
 126  126          }
 127  127  
 128  128          if (topo_hdl_nvalloc(thp, &in, NV_UNIQUE_NAME) != 0) {
 129  129                  nvlist_free(fmri);
 130  130                  return (TOPO_WALK_ERR);
 131  131          }
 132  132  
 133  133          if (dmp->dm_sim_search) {
 134  134                  fmristr = NULL;
 135  135                  if (topo_fmri_nvl2str(thp, fmri, &fmristr, &err) == 0 &&
 136  136                      strstr(fmristr, dmp->dm_sim_search) != 0)
 137  137                          (void) nvlist_add_string(in, "path", dmp->dm_sim_file);
 138  138                  topo_hdl_strfree(thp, fmristr);
 139  139          }
 140  140  
 141  141          /*
 142  142           * Try to invoke the method.  If this fails (most likely because the
 143  143           * method is not supported), then ignore this node.
 144  144           */
 145  145          if (topo_method_invoke(node, TOPO_METH_DISK_STATUS,
 146  146              TOPO_METH_DISK_STATUS_VERSION, in, &result, &err) != 0) {
 147  147                  nvlist_free(fmri);
 148  148                  nvlist_free(in);
 149  149                  return (TOPO_WALK_NEXT);
 150  150          }
 151  151  
 152  152          nvlist_free(in);
 153  153  
 154  154          /*
 155  155           * Check for faults and post ereport(s) if needed
 156  156           */
 157  157          if (nvlist_lookup_nvlist(result, "faults", &faults) == 0 &&
 158  158              nvlist_lookup_string(result, "protocol", &protocol) == 0) {
 159  159                  elem = NULL;
 160  160                  while ((elem = nvlist_next_nvpair(faults, elem)) != NULL) {

↓ open down ↓

160 lines elided

↑ open up ↑

 161  161                          if (nvpair_type(elem) != DATA_TYPE_BOOLEAN_VALUE)
 162  162                                  continue;
 163  163  
 164  164                          (void) nvpair_value_boolean_value(elem, &fault);
 165  165                          if (!fault ||
 166  166                              nvlist_lookup_nvlist(result, nvpair_name(elem),
 167  167                              &details) != 0)
 168  168                                  continue;
 169  169  
 170  170                          if (strcmp(nvpair_name(elem),
      171 +                            FM_EREPORT_SCSI_OVERTEMP) == 0 &&
      172 +                            fmd_prop_get_int32(dmp->dm_hdl,
      173 +                            "ignore-overtemp") == FMD_B_TRUE)
      174 +                                continue;
      175 +
      176 +                        if (strcmp(nvpair_name(elem),
 171  177                              FM_EREPORT_SCSI_SSMWEAROUT) == 0 &&
 172  178                              fmd_prop_get_int32(dmp->dm_hdl,
 173  179                              "ignore-ssm-wearout") == FMD_B_TRUE)
 174  180                                  continue;
 175  181  
 176  182                          dt_post_ereport(dmp->dm_hdl, dmp->dm_xprt, protocol,
 177  183                              nvpair_name(elem),
 178  184                              fmd_event_ena_create(dmp->dm_hdl), fmri, details);
 179  185                  }
 180  186          }

 181  187  
 182  188          nvlist_free(result);
 183  189          nvlist_free(fmri);
 184  190  
 185  191          return (TOPO_WALK_NEXT);
 186  192  }
 187  193  
 188  194  /*
 189  195   * Periodic timeout.  Iterates over all hc:// topo nodes, calling
 190  196   * dt_analyze_disk() for each one.
 191  197   */
 192  198  /*ARGSUSED*/
 193  199  static void
 194  200  dt_timeout(fmd_hdl_t *hdl, id_t id, void *data)
 195  201  {
 196  202          topo_hdl_t *thp;
 197  203          topo_walk_t *twp;
 198  204          int err;
 199  205          disk_monitor_t *dmp = fmd_hdl_getspecific(hdl);
 200  206  
 201  207          dmp->dm_hdl = hdl;
 202  208  
 203  209          thp = fmd_hdl_topo_hold(hdl, TOPO_VERSION);
 204  210          if ((twp = topo_walk_init(thp, FM_FMRI_SCHEME_HC, dt_analyze_disk,
 205  211              dmp, &err)) == NULL) {
 206  212                  fmd_hdl_topo_rele(hdl, thp);
 207  213                  fmd_hdl_error(hdl, "failed to get topology: %s\n",
 208  214                      topo_strerror(err));
 209  215                  return;
 210  216          }
 211  217  
 212  218          if (topo_walk_step(twp, TOPO_WALK_CHILD) == TOPO_WALK_ERR) {
 213  219                  topo_walk_fini(twp);
 214  220                  fmd_hdl_topo_rele(hdl, thp);
 215  221                  fmd_hdl_error(hdl, "failed to walk topology\n");
 216  222                  return;
 217  223          }
 218  224  
 219  225          topo_walk_fini(twp);
 220  226          fmd_hdl_topo_rele(hdl, thp);
 221  227  
 222  228          dmp->dm_timer = fmd_timer_install(hdl, NULL, NULL, dmp->dm_interval);
 223  229          dmp->dm_timer_istopo = B_FALSE;
 224  230  }
 225  231  
 226  232  /*
 227  233   * Called when the topology may have changed.  We want to examine all disks in
 228  234   * case a new one has been inserted, but we don't want to overwhelm the system
 229  235   * in the event of a flurry of topology changes, as most likely only a small
 230  236   * number of disks are changing.  To avoid this, we set the timer for a small
 231  237   * but non-trivial interval (by default 1 minute), and ignore intervening
 232  238   * changes during this period.  This still gives us a reasonable response time
 233  239   * to newly inserted devices without overwhelming the system if lots of hotplug
 234  240   * activity is going on.
 235  241   */
 236  242  /*ARGSUSED*/
 237  243  static void
 238  244  dt_topo_change(fmd_hdl_t *hdl, topo_hdl_t *thp)
 239  245  {
 240  246          disk_monitor_t *dmp = fmd_hdl_getspecific(hdl);
 241  247  
 242  248          if (dmp->dm_timer_istopo)
 243  249                  return;
 244  250

↓ open down ↓

64 lines elided

↑ open up ↑

 245  251          fmd_timer_remove(hdl, dmp->dm_timer);
 246  252          dmp->dm_timer = fmd_timer_install(hdl, NULL, NULL,
 247  253              fmd_prop_get_int64(hdl, "min-interval"));
 248  254          dmp->dm_timer_istopo = B_TRUE;
 249  255  }
 250  256  
 251  257  static const fmd_prop_t fmd_props[] = {
 252  258          { "interval", FMD_TYPE_TIME, "1h" },
 253  259          { "min-interval", FMD_TYPE_TIME, "1min" },
 254  260          { "simulate", FMD_TYPE_STRING, "" },
      261 +        { "ignore-overtemp", FMD_TYPE_BOOL, "true"},
 255  262          { "ignore-ssm-wearout", FMD_TYPE_BOOL, "false"},
 256  263          { NULL, 0, NULL }
 257  264  };
 258  265  
 259  266  static const fmd_hdl_ops_t fmd_ops = {
 260  267          NULL,                   /* fmdo_recv */
 261  268          dt_timeout,             /* fmdo_timeout */
 262      -        NULL,                   /* fmdo_close */
      269 +        NULL,                   /* fmdo_close */
 263  270          NULL,                   /* fmdo_stats */
 264  271          NULL,                   /* fmdo_gc */
 265  272          NULL,                   /* fmdo_send */
 266  273          dt_topo_change,         /* fmdo_topo_change */
 267  274  };
 268  275  
 269  276  static const fmd_hdl_info_t fmd_info = {
 270  277          "Disk Transport Agent", "1.1", &fmd_ops, fmd_props
 271  278  };
 272  279

 273  280  void
 274  281  _fmd_init(fmd_hdl_t *hdl)
 275  282  {
 276  283          disk_monitor_t *dmp;
 277  284          char *simulate;
 278  285  
 279  286          if (fmd_hdl_register(hdl, FMD_API_VERSION, &fmd_info) != 0)
 280  287                  return;
 281  288  
 282  289          (void) fmd_stat_create(hdl, FMD_STAT_NOALLOC,
 283  290              sizeof (dt_stats) / sizeof (fmd_stat_t),
 284  291              (fmd_stat_t *)&dt_stats);
 285  292  
 286  293          dmp = fmd_hdl_zalloc(hdl, sizeof (disk_monitor_t), FMD_SLEEP);
 287  294          fmd_hdl_setspecific(hdl, dmp);
 288  295  
 289  296          dmp->dm_xprt = fmd_xprt_open(hdl, FMD_XPRT_RDONLY, NULL, NULL);
 290  297          dmp->dm_interval = fmd_prop_get_int64(hdl, "interval");
 291  298  
 292  299          /*
 293  300           * Determine if we have the simulate property set.  This property allows
 294  301           * the developer to substitute a faulty device based off all or part of
 295  302           * an FMRI string.  For example, one could do:
 296  303           *
 297  304           *      setprop simulate "bay=4/disk=4  /path/to/sim.so"
 298  305           *
 299  306           * When the transport module encounters an FMRI containing the given
 300  307           * string, then it will open the simulator file instead of the
 301  308           * corresponding device.  This can be any file, but is intended to be a
 302  309           * libdiskstatus simulator shared object, capable of faking up SCSI
 303  310           * responses.
 304  311           *
 305  312           * The property consists of two strings, an FMRI fragment and an
 306  313           * absolute path, separated by whitespace.
 307  314           */
 308  315          simulate = fmd_prop_get_string(hdl, "simulate");
 309  316          if (simulate[0] != '\0') {
 310  317                  const char *sep;
 311  318                  size_t len;
 312  319  
 313  320                  for (sep = simulate; *sep != '\0'; sep++) {
 314  321                          if (isspace(*sep))
 315  322                                  break;
 316  323                  }
 317  324  
 318  325                  if (*sep != '\0') {
 319  326                          len = sep - simulate;
 320  327  
 321  328                          dmp->dm_sim_search = fmd_hdl_alloc(hdl,
 322  329                              len + 1, FMD_SLEEP);
 323  330                          (void) memcpy(dmp->dm_sim_search, simulate, len);
 324  331                          dmp->dm_sim_search[len] = '\0';
 325  332                  }
 326  333  
 327  334                  for (; *sep != '\0'; sep++) {
 328  335                          if (!isspace(*sep))
 329  336                                  break;
 330  337                  }
 331  338  
 332  339                  if (*sep != '\0') {
 333  340                          dmp->dm_sim_file = fmd_hdl_strdup(hdl, sep, FMD_SLEEP);
 334  341                  } else if (dmp->dm_sim_search) {
 335  342                          fmd_hdl_strfree(hdl, dmp->dm_sim_search);
 336  343                          dmp->dm_sim_search = NULL;
 337  344                  }
 338  345          }
 339  346          fmd_prop_free_string(hdl, simulate);
 340  347  
 341  348          /*
 342  349           * Call our initial timer routine.  This will do an initial check of all
 343  350           * the disks, and then start the periodic timeout.
 344  351           */
 345  352          dmp->dm_timer = fmd_timer_install(hdl, NULL, NULL, 0);
 346  353  }
 347  354  
 348  355  void
 349  356  _fmd_fini(fmd_hdl_t *hdl)
 350  357  {
 351  358          disk_monitor_t *dmp;
 352  359  
 353  360          dmp = fmd_hdl_getspecific(hdl);
 354  361          if (dmp) {
 355  362                  fmd_xprt_close(hdl, dmp->dm_xprt);
 356  363                  fmd_hdl_strfree(hdl, dmp->dm_sim_search);
 357  364                  fmd_hdl_strfree(hdl, dmp->dm_sim_file);
 358  365                  fmd_hdl_free(hdl, dmp, sizeof (*dmp));
 359  366          }
 360  367  }

↓ open down ↓

88 lines elided

↑ open up ↑

XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX