Print this page
NEX-10626 Hot spare doesn't replace failed SSD
Reviewed by: Sanjay Nadkarni <sanjay.nadkarni@nexenta.com>
Reviewed by: Saso Kiselkov <saso.kiselkov@nexenta.com>
Reviewed by: Evan Layton <evan.layton@nexenta.com>
NEX-5736 implement autoreplace matching based on FRU slot number
NEX-6200 hot spares are not reactivated after reinserting into enclosure
NEX-9403 need to update FRU for spare and l2cache devices
NEX-9404 remove lofi autoreplace support from syseventd
NEX-9409 hotsparing doesn't work for vdevs without FRU
NEX-9424 zfs`vdev_online() needs better notification about state changes
Portions contributed by: Alek Pinchuk <alek@nexenta.com>
Portions contributed by: Josef 'Jeff' Sipek <josef.sipek@nexenta.com>
Reviewed by: Roman Strashkin <roman.strashkin@nexenta.com>
Reviewed by: Steve Peng <steve.peng@nexenta.com>
Reviewed by: Sanjay Nadkarni <sanjay.nadkarni@nexenta.com>
NEX-7397 Hotspare didn't kick in automatically when one of the drive in pool went "Faulty" (is_ssd fix)
Reviewed by: Saso Kiselkov <saso.kiselkov@nexenta.com>
Reviewed by: Yuri Pankov <yuri.pankov@nexenta.com>
NEX-7397 Hotspare didn't kick in automatically when one of the drive in pool went "Faulty"
Reviewed by: Sanjay Nadkarni <sanjay.nadkarni@nexenta.com>
Reviewed by: Roman Strashkin <roman.strashkin@nexenta.com>
NEX-5753 FMD core dumps
Reviewed by: Yuri Pankov <yuri.pankov@nexenta.com>
Reviewed by: Sanjay Nadkarni <sanjay.nadkarni@nexenta.com>
Reviewed by: Josef 'Jeff' Sipek <josef.sipek@nexenta.com>
NEX-5774 fix for NEX-3166 has a tunable typo
Reviewed by: Saso Kiselkov <saso.kiselkov@nexenta.com>
Reviewed by: Sanjay Nadkarni <sanjay.nadkarni@nexenta.com>
Reviewed by: Josef 'Jeff' Sipek <josef.sipek@nexenta.com>
NEX-3166 need to add FMA events for SSD lifespan
Reviewed by: Jeffry Molanus <jeffry.molanus@nexenta.com>
Reviewed by: Sanjay Nadkarni <sanjay.nadkarni@nexenta.com>
Reviewed by: Yuri Pankov <yuri.pankov@nexenta.com>
Reviewed by: Saso Kiselkov <saso.kiselkov@nexenta.com>
NEX-2846 Enable Automatic/Intelligent Hot Sparing capability (lint fix)
Reviewed by: Jean McCormack <jean.mccormack@nexenta.com>
NEX-2846 Enable Automatic/Intelligent Hot Sparing capability
Reviewed by: Jeffry Molanus <jeffry.molanus@nexenta.com>
Reviewed by: Roman Strashkin <roman.strashkin@nexenta.com>
Reviewed by: Saso Kiselkov <saso.kiselkov@nexenta.com>
NEX-5163 backport illumos 6027 EOL zulu (XVR-4000)
Reviewed by: Kevin Crowe <kevin.crowe@nexenta.com>
6027 EOL zulu (XVR-4000)
Reviewed by: Garrett D'Amore <garrett@damore.org>
Reviewed by: Peter Tribble <peter.tribble@gmail.com>
Reviewed by: Richard Lowe <richlowe@richlowe.net>
Approved by: Dan McDonald <danmcd@omniti.com>
NEX-5162 backport illumos 6507 i386 makecontext(3c) needs to 16-byte align the stack
Reviewed by: Kevin Crowe <kevin.crowe@nexenta.com>
6507 i386 makecontext(3c) needs to 16-byte align the stack
Reviewed by: Gordon Ross <gordon.w.ross@gmail.com>
Reviewed by: Robert Mustacchi <rm@joyent.com>
Approved by: Dan McDonald <danmcd@omniti.com>
NEX-5207 attempt to activate spare cores fmd
Reviewed by: Yuri Pankov <yuri.pankov@nexenta.com>
Reviewed by: Roman Strashkin <roman.strashkin@nexenta.com>
Reviewed by: Sanjay Nadkarni <sanjay.nadkarni@nexenta.com>
NEX-1438 bump slow-io threshold and default to disabled automated response
NEX-941 zfs doesn't replace "UNAVAIL" disk from spares in pool
OS-66 Retired devices may still get attached leading to ndi_devi_online errors
OS-65 New FMA agent is needed to consume diagnosed slow IO
Portions contributed by Marcel Telka.
zfsxx issue #11: support for spare device groups
re #12393 rb3935 Kerberos and smbd disagree about who is our AD server (fix elf runtime attributes check)
re #11612 rb3907 Failing vdev of a mirrored pool should not take zfs operations out of action for extended periods of time.
@@ -16,12 +16,14 @@
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
+
/*
* Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2017 Nexenta Systems, Inc.
*/
/*
* The ZFS retire agent is responsible for managing hot spares across all pools.
* When we see a device fault or a device removal, we try to open the associated
@@ -37,10 +39,12 @@
#include <sys/fm/protocol.h>
#include <sys/fm/fs/zfs.h>
#include <libzfs.h>
#include <fm/libtopo.h>
#include <string.h>
+#include <sys/int_fmtio.h>
+#include <devid.h>
typedef struct zfs_retire_repaired {
struct zfs_retire_repaired *zrr_next;
uint64_t zrr_pool;
uint64_t zrr_vdev;
@@ -64,12 +68,14 @@
/*
* Find a pool with a matching GUID.
*/
typedef struct find_cbdata {
+ fmd_hdl_t *cb_hdl;
uint64_t cb_guid;
const char *cb_fru;
+ ddi_devid_t cb_devid;
zpool_handle_t *cb_zhp;
nvlist_t *cb_vdev;
} find_cbdata_t;
static int
@@ -89,46 +95,65 @@
/*
* Find a vdev within a tree with a matching GUID.
*/
static nvlist_t *
-find_vdev(libzfs_handle_t *zhdl, nvlist_t *nv, const char *search_fru,
- uint64_t search_guid)
+find_vdev(fmd_hdl_t *hdl, libzfs_handle_t *zhdl, nvlist_t *nv,
+ const char *search_fru, ddi_devid_t search_devid, uint64_t search_guid)
{
uint64_t guid;
nvlist_t **child;
uint_t c, children;
nvlist_t *ret;
- char *fru;
+ char *fru, *devidstr, *path;
+ ddi_devid_t devid;
- if (search_fru != NULL) {
- if (nvlist_lookup_string(nv, ZPOOL_CONFIG_FRU, &fru) == 0 &&
- libzfs_fru_compare(zhdl, fru, search_fru))
+ if (nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path) == 0)
+ fmd_hdl_debug(hdl, "find_vdev: vdev path: %s", path);
+
+ if (search_fru != NULL &&
+ nvlist_lookup_string(nv, ZPOOL_CONFIG_FRU, &fru) == 0) {
+ fmd_hdl_debug(hdl, "find_vdev: found fru: %s", fru);
+ if (libzfs_fru_compare(zhdl, fru, search_fru))
return (nv);
- } else {
+ }
+
+ if (search_devid != NULL &&
+ nvlist_lookup_string(nv, ZPOOL_CONFIG_DEVID, &devidstr) == 0) {
+ fmd_hdl_debug(hdl, "find_vdev: found devid: %s", devidstr);
+
+ if (devid_str_decode(devidstr, &devid, NULL) == 0) {
+ if (devid_compare(search_devid, devid) == 0) {
+ devid_free(devid);
+ return (nv);
+ }
+
+ devid_free(devid);
+ }
+ }
+
if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID, &guid) == 0 &&
guid == search_guid)
return (nv);
- }
if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
&child, &children) != 0)
return (NULL);
for (c = 0; c < children; c++) {
- if ((ret = find_vdev(zhdl, child[c], search_fru,
- search_guid)) != NULL)
+ if ((ret = find_vdev(hdl, zhdl, child[c], search_fru,
+ search_devid, search_guid)) != NULL)
return (ret);
}
if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_L2CACHE,
&child, &children) != 0)
return (NULL);
for (c = 0; c < children; c++) {
- if ((ret = find_vdev(zhdl, child[c], search_fru,
- search_guid)) != NULL)
+ if ((ret = find_vdev(hdl, zhdl, child[c], search_fru,
+ search_devid, search_guid)) != NULL)
return (ret);
}
return (NULL);
}
@@ -135,12 +160,12 @@
/*
* Given a (pool, vdev) GUID pair, find the matching pool and vdev.
*/
static zpool_handle_t *
-find_by_guid(libzfs_handle_t *zhdl, uint64_t pool_guid, uint64_t vdev_guid,
- nvlist_t **vdevp)
+find_by_guid(fmd_hdl_t *hdl, libzfs_handle_t *zhdl, uint64_t pool_guid,
+ uint64_t vdev_guid, nvlist_t **vdevp)
{
find_cbdata_t cb;
zpool_handle_t *zhp;
nvlist_t *config, *nvroot;
@@ -158,11 +183,11 @@
zpool_close(zhp);
return (NULL);
}
if (vdev_guid != 0) {
- if ((*vdevp = find_vdev(zhdl, nvroot, NULL,
+ if ((*vdevp = find_vdev(hdl, zhdl, nvroot, NULL, NULL,
vdev_guid)) == NULL) {
zpool_close(zhp);
return (NULL);
}
}
@@ -179,92 +204,291 @@
config = zpool_get_config(zhp, NULL);
if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
&nvroot) != 0) {
zpool_close(zhp);
+ fmd_hdl_debug(cbp->cb_hdl, "search_pool: "
+ "unable to get vdev tree");
return (0);
}
- if ((cbp->cb_vdev = find_vdev(zpool_get_handle(zhp), nvroot,
- cbp->cb_fru, 0)) != NULL) {
+ if ((cbp->cb_vdev = find_vdev(cbp->cb_hdl, zpool_get_handle(zhp),
+ nvroot, cbp->cb_fru, cbp->cb_devid, cbp->cb_guid)) != NULL) {
cbp->cb_zhp = zhp;
return (1);
}
zpool_close(zhp);
return (0);
}
/*
- * Given a FRU FMRI, find the matching pool and vdev.
+ * Given a FRU FMRI, devid, or guid: find the matching pool and vdev.
*/
static zpool_handle_t *
-find_by_fru(libzfs_handle_t *zhdl, const char *fru, nvlist_t **vdevp)
+find_by_anything(fmd_hdl_t *hdl, libzfs_handle_t *zhdl, const char *fru,
+ ddi_devid_t devid, uint64_t guid, nvlist_t **vdevp)
{
find_cbdata_t cb;
+ (void) memset(&cb, 0, sizeof (cb));
+ cb.cb_hdl = hdl;
cb.cb_fru = fru;
+ cb.cb_devid = devid;
+ cb.cb_guid = guid;
cb.cb_zhp = NULL;
+
if (zpool_iter(zhdl, search_pool, &cb) != 1)
return (NULL);
*vdevp = cb.cb_vdev;
return (cb.cb_zhp);
}
/*
- * Given a vdev, attempt to replace it with every known spare until one
- * succeeds.
+ * Create a solved FMD case and add the fault to it
*/
static void
-replace_with_spare(fmd_hdl_t *hdl, zpool_handle_t *zhp, nvlist_t *vdev)
+generate_fault(fmd_hdl_t *hdl, nvlist_t *vdev, char *faultname)
{
- nvlist_t *config, *nvroot, *replacement;
- nvlist_t **spares;
- uint_t s, nspares;
- char *dev_name;
+ char *devid, *fdevid, *physpath, *s;
+ fmd_case_t *c;
+ fmd_hdl_topo_node_info_t *node;
+ nvlist_t *fault = NULL;
+ uint64_t wd;
- config = zpool_get_config(zhp, NULL);
- if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
- &nvroot) != 0)
+ assert(hdl != NULL);
+ assert(vdev != NULL);
+ assert(faultname != NULL);
+
+ if (nvlist_lookup_string(vdev, ZPOOL_CONFIG_PHYS_PATH,
+ &physpath) != 0 ||
+ nvlist_lookup_uint64(vdev, ZPOOL_CONFIG_WHOLE_DISK, &wd) != 0)
return;
- /*
- * Find out if there are any hot spares available in the pool.
+ if (nvlist_lookup_string(vdev, ZPOOL_CONFIG_DEVID,
+ &devid) == 0) {
+ fdevid = strdup(devid);
+ } else {
+ fdevid = devid_str_from_path(physpath);
+ }
+ if (fdevid == NULL) {
+ fmd_hdl_debug(hdl, "%s: failed to get devid", __func__);
+ return;
+ }
+
+ if (wd && (s = strrchr(fdevid, '/')) != NULL)
+ *s = '\0';
+
+ c = fmd_case_open(hdl, NULL);
+ if ((node = fmd_hdl_topo_node_get_by_devid(hdl, fdevid)) == NULL) {
+ fault = fmd_nvl_create_fault(hdl, faultname, 100, NULL, vdev,
+ NULL);
+ } else {
+ fault = fmd_nvl_create_fault(hdl, faultname, 100,
+ node->resource, node->fru, node->resource);
+ nvlist_free(node->fru);
+ nvlist_free(node->resource);
+ fmd_hdl_free(hdl, node,
+ sizeof (fmd_hdl_topo_node_info_t));
+ }
+ fmd_case_add_suspect(hdl, c, fault);
+ fmd_case_setspecific(hdl, c, fdevid);
+ fmd_case_solve(hdl, c);
+
+ devid_str_free(fdevid);
+ fmd_hdl_debug(hdl, "%s: dispatched %s", __func__, faultname);
+}
+
+/*
+ * Determine if the FRU fields for the spare and the failed device match.
*/
- if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES,
- &spares, &nspares) != 0)
+static boolean_t
+match_fru(fmd_hdl_t *hdl, char *ffru, nvlist_t *spare)
+{
+ char *sfru;
+ boolean_t ret = B_FALSE;
+
+ if (nvlist_lookup_string(spare, ZPOOL_CONFIG_FRU, &sfru) != 0) {
+ fmd_hdl_debug(hdl, "%s: spare FRU not set", __func__);
+ return (B_FALSE);
+ }
+
+ /* We match on enclosure only at the moment */
+ ret = libzfs_fru_cmp_enclosure(ffru, sfru);
+ if (!ret)
+ fmd_hdl_debug(hdl, "%s: enclosure not matched", __func__);
+
+ return (ret);
+}
+
+static boolean_t
+do_replace(zpool_handle_t *zhp, const char *fpath, const char *spath,
+ nvlist_t *spare)
+{
+ nvlist_t *nvroot;
+ boolean_t ret = B_FALSE;
+
+ if (nvlist_alloc(&nvroot, NV_UNIQUE_NAME, 0) != 0)
+ return (B_FALSE);
+
+ if (nvlist_add_string(nvroot, ZPOOL_CONFIG_TYPE, VDEV_TYPE_ROOT) != 0 ||
+ nvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN,
+ &spare, 1) != 0)
+ goto fail;
+
+ ret = (zpool_vdev_attach(zhp, fpath, spath, nvroot, B_TRUE) == 0);
+
+fail:
+ nvlist_free(nvroot);
+ return (ret);
+}
+
+/*
+ * Attempt to replace failed device with spare.
+ *
+ * Spare selection is done in the following order:
+ * - If failed device has sparegroup property set, look for the spares that
+ * belongs to the same sparegroup. If no suitable spare is found, skip
+ * the spares that have sparegroup property set while doing other match types.
+ * - If failed device has FRU set, look for the spares in the same enclosure.
+ * - Finally, try using any available spare.
+ *
+ * Note that all match types do a media-type match first, so that we don't
+ * replace HDD with SSD and vice versa.
+ */
+static void
+replace_with_spare(fmd_hdl_t *hdl, zpool_handle_t *zhp, nvlist_t *vdev)
+{
+ nvlist_t *config, *nvroot, **spares;
+ uint_t i, nspares;
+ boolean_t uu1, uu2, log;
+ char *devpath;
+ char fdevpath[PATH_MAX]; /* devpath of failed device */
+ char *ffru = NULL; /* FRU of failed device */
+ char fsg[MAXNAMELEN]; /* sparegroup of failed device */
+ boolean_t use_sg = B_FALSE; /* do sparegroup matching */
+ boolean_t done_sg = B_FALSE; /* done sparegroup matching */
+ boolean_t use_fru = B_FALSE; /* do FRU matching */
+ boolean_t done_fru = B_FALSE; /* done FRU matching */
+ boolean_t fssd = B_FALSE; /* failed device is SSD */
+ uint64_t wd;
+
+ if ((config = zpool_get_config(zhp, NULL)) == NULL ||
+ nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, &nvroot) != 0)
return;
- replacement = fmd_nvl_alloc(hdl, FMD_SLEEP);
+ /* Check if there are any hot spares available in the pool */
+ if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES, &spares,
+ &nspares) != 0) {
+ fmd_hdl_debug(hdl, "%s: no spares found", __func__);
+ return;
+ }
- (void) nvlist_add_string(replacement, ZPOOL_CONFIG_TYPE,
- VDEV_TYPE_ROOT);
+ if (nvlist_lookup_string(vdev, ZPOOL_CONFIG_PATH, &devpath) != 0 ||
+ nvlist_lookup_uint64(vdev, ZPOOL_CONFIG_WHOLE_DISK, &wd) != 0 ||
+ nvlist_lookup_boolean_value(vdev, ZPOOL_CONFIG_IS_SSD, &fssd) != 0)
+ return;
+ (void) strlcpy(fdevpath, devpath, sizeof (fdevpath));
+ if (wd)
+ fdevpath[strlen(fdevpath) - 2] = '\0';
- dev_name = zpool_vdev_name(NULL, zhp, vdev, B_FALSE);
+ /* Spares can't replace log devices */
+ (void) zpool_find_vdev(zhp, fdevpath, &uu1, &uu2, &log, NULL);
+ if (log)
+ return;
+ /* Check if we should do sparegroup matching */
+ if (vdev_get_prop(zhp, fdevpath, VDEV_PROP_SPAREGROUP, fsg,
+ sizeof (fsg)) == 0 && strcmp(fsg, "-") != 0)
+ use_sg = B_TRUE;
+
+ use_fru = (fmd_prop_get_int32(hdl, "fru_compare") == FMD_B_TRUE);
+ /* Disable FRU matching if failed device doesn't have FRU set */
+ if (nvlist_lookup_string(vdev, ZPOOL_CONFIG_FRU, &ffru) != 0)
+ use_fru = B_FALSE;
+
+again:
+ /* Go through the spares list */
+ for (i = 0; i < nspares; i++) {
+ char sdevpath[PATH_MAX]; /* devpath of spare */
+ char ssg[MAXNAMELEN]; /* sparegroup of spare */
+ boolean_t sssd = B_FALSE; /* spare is SSD */
+ boolean_t ssg_set = B_FALSE;
+
+ if (nvlist_lookup_string(spares[i], ZPOOL_CONFIG_PATH,
+ &devpath) != 0 ||
+ nvlist_lookup_uint64(spares[i], ZPOOL_CONFIG_WHOLE_DISK,
+ &wd) != 0)
+ continue;
+
+ (void) strlcpy(sdevpath, devpath, sizeof (sdevpath));
+ if (wd)
+ sdevpath[strlen(sdevpath) - 2] = '\0';
+
+ /* Don't swap HDD for SSD and vice versa */
+ if (nvlist_lookup_boolean_value(spares[i], ZPOOL_CONFIG_IS_SSD,
+ &sssd) != 0 || fssd != sssd) {
+ continue;
+ }
+
+ /* Get the sparegroup property for the spare */
+ if (vdev_get_prop(zhp, sdevpath, VDEV_PROP_SPAREGROUP, ssg,
+ sizeof (ssg)) == 0 && strcmp(ssg, "-") != 0)
+ ssg_set = B_TRUE;
+
+ if (use_sg) {
+ if (!ssg_set || strcmp(fsg, ssg) != 0)
+ continue;
+ /* Found spare in the the same group */
+ if (do_replace(zhp, fdevpath, sdevpath, spares[i]))
+ return;
+ continue;
+ }
+
/*
- * Try to replace each spare, ending when we successfully
- * replace it.
+ * If we tried matching on sparegroup and have not found
+ * any suitable spare, skip all spares with sparegroup
+ * set.
*/
- for (s = 0; s < nspares; s++) {
- char *spare_name;
+ if (done_sg && ssg_set)
+ continue;
- if (nvlist_lookup_string(spares[s], ZPOOL_CONFIG_PATH,
- &spare_name) != 0)
+ if (use_fru) {
+ if (!match_fru(hdl, ffru, spares[i]))
continue;
+ /* Found spare with matching FRU */
+ if (do_replace(zhp, fdevpath, sdevpath, spares[i]))
+ return;
+ continue;
+ }
- (void) nvlist_add_nvlist_array(replacement,
- ZPOOL_CONFIG_CHILDREN, &spares[s], 1);
+ /*
+ * sparegroup and FRU matching was either not used or didn't
+ * find any suitable spares, use the first available one.
+ */
+ if (do_replace(zhp, fdevpath, sdevpath, spares[i])) {
+ /* If we tried intellegent sparing, generate fault */
+ if (done_sg || done_fru) {
+ generate_fault(hdl, vdev,
+ "fault.fs.zfs.vdev.dumb_spared");
+ }
+ return;
+ }
+ }
- if (zpool_vdev_attach(zhp, dev_name, spare_name,
- replacement, B_TRUE) == 0)
- break;
+ if (use_sg) {
+ done_sg = B_TRUE;
+ use_sg = B_FALSE;
+ goto again;
+ } else if (use_fru) {
+ done_fru = B_TRUE;
+ use_fru = B_FALSE;
+ goto again;
}
- free(dev_name);
- nvlist_free(replacement);
+ generate_fault(hdl, vdev, "fault.fs.zfs.vdev.not_spared");
}
/*
* Repair this vdev if we had diagnosed a 'fault.fs.zfs.device' and
* ASRU is now usable. ZFS has found the device to be present and
@@ -345,32 +569,92 @@
zrp->zrr_pool = pool_guid;
zrp->zrr_vdev = vdev_guid;
zdp->zrd_repaired = zrp;
}
+static int
+zfs_get_vdev_state(fmd_hdl_t *hdl, libzfs_handle_t *zhdl, zpool_handle_t *zhp,
+ uint64_t vdev_guid, nvlist_t **vdev)
+{
+ nvlist_t *config, *nvroot;
+ vdev_stat_t *vs;
+ uint_t cnt;
+ boolean_t missing;
+
+ if (zpool_refresh_stats(zhp, &missing) != 0 ||
+ missing != B_FALSE) {
+ fmd_hdl_debug(hdl, "zfs_get_vdev_state: can't refresh stats");
+ return (VDEV_STATE_UNKNOWN);
+ }
+
+ config = zpool_get_config(zhp, NULL);
+ if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
+ &nvroot) != 0) {
+ fmd_hdl_debug(hdl, "zfs_get_vdev_state: can't get vdev tree");
+ return (VDEV_STATE_UNKNOWN);
+ }
+
+ *vdev = find_vdev(hdl, zhdl, nvroot, NULL, NULL, vdev_guid);
+
+ if (nvlist_lookup_uint64_array(*vdev, ZPOOL_CONFIG_VDEV_STATS,
+ (uint64_t **)&vs, &cnt) != 0) {
+ fmd_hdl_debug(hdl, "zfs_get_vdev_state: can't get vdev stats");
+ return (VDEV_STATE_UNKNOWN);
+ }
+
+ return (vs->vs_state);
+}
+
+int
+zfs_retire_device(fmd_hdl_t *hdl, char *path, boolean_t retire)
+{
+ di_retire_t drt = {0};
+ int err;
+
+ drt.rt_abort = (void (*)(void *, const char *, ...))fmd_hdl_abort;
+ drt.rt_debug = (void (*)(void *, const char *, ...))fmd_hdl_debug;
+ drt.rt_hdl = hdl;
+
+ fmd_hdl_debug(hdl, "zfs_retire_device: "
+ "attempting to %sretire %s", retire ? "" : "un", path);
+
+ err = retire ?
+ di_retire_device(path, &drt, 0) :
+ di_unretire_device(path, &drt);
+
+ if (err != 0)
+ fmd_hdl_debug(hdl, "zfs_retire_device: ",
+ "di_%sretire_device failed: %d %s",
+ retire ? "" : "un", err, path);
+
+ return (err);
+}
+
/*ARGSUSED*/
static void
zfs_retire_recv(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl,
const char *class)
{
uint64_t pool_guid, vdev_guid;
zpool_handle_t *zhp;
- nvlist_t *resource, *fault, *fru;
+ nvlist_t *resource, *fault, *fru, *asru;
nvlist_t **faults;
uint_t f, nfaults;
zfs_retire_data_t *zdp = fmd_hdl_getspecific(hdl);
libzfs_handle_t *zhdl = zdp->zrd_hdl;
boolean_t fault_device, degrade_device;
boolean_t is_repair;
- char *scheme, *fmri;
+ char *scheme = NULL, *fmri = NULL, *devidstr = NULL, *path = NULL;
+ ddi_devid_t devid;
nvlist_t *vdev;
char *uuid;
int repair_done = 0;
boolean_t retire;
boolean_t is_disk;
+ boolean_t retire_device = B_FALSE;
vdev_aux_t aux;
- topo_hdl_t *thp;
+ topo_hdl_t *thp = NULL;
int err;
/*
* If this is a resource notifying us of device removal, then simply
* check for an available spare and continue.
@@ -380,11 +664,11 @@
&pool_guid) != 0 ||
nvlist_lookup_uint64(nvl, FM_EREPORT_PAYLOAD_ZFS_VDEV_GUID,
&vdev_guid) != 0)
return;
- if ((zhp = find_by_guid(zhdl, pool_guid, vdev_guid,
+ if ((zhp = find_by_guid(hdl, zhdl, pool_guid, vdev_guid,
&vdev)) == NULL)
return;
if (fmd_prop_get_int32(hdl, "spare_on_remove"))
replace_with_spare(hdl, zhp, vdev);
@@ -424,19 +708,33 @@
is_disk = B_FALSE;
if (nvlist_lookup_boolean_value(fault, FM_SUSPECT_RETIRE,
&retire) == 0 && retire == 0)
continue;
+ if (fmd_nvl_class_match(hdl, fault, "fault.io.disk.slow-io") &&
+ fmd_prop_get_int32(hdl, "slow_io_skip_retire") ==
+ FMD_B_TRUE) {
+ fmd_hdl_debug(hdl, "ignoring slow io fault");
+ continue;
+ }
if (fmd_nvl_class_match(hdl, fault,
"fault.io.disk.ssm-wearout") &&
fmd_prop_get_int32(hdl, "ssm_wearout_skip_retire") ==
FMD_B_TRUE) {
fmd_hdl_debug(hdl, "zfs-retire: ignoring SSM fault");
continue;
}
+ if (fmd_nvl_class_match(hdl, fault,
+ "fault.io.disk.ssm-wearout") &&
+ fmd_prop_get_int32(hdl, "ssm_wearout_skip_retire") ==
+ FMD_B_TRUE) {
+ fmd_hdl_debug(hdl, "zfs-retire: ignoring SSM fault");
+ continue;
+ }
+
/*
* While we subscribe to fault.fs.zfs.*, we only take action
* for faults targeting a specific vdev (open failure or SERD
* failure). We also subscribe to fault.io.* events, so that
* faulty disks will be faulted in the ZFS configuration.
@@ -445,48 +743,125 @@
fault_device = B_TRUE;
} else if (fmd_nvl_class_match(hdl, fault,
"fault.fs.zfs.vdev.checksum")) {
degrade_device = B_TRUE;
} else if (fmd_nvl_class_match(hdl, fault,
+ "fault.fs.zfs.vdev.timeout")) {
+ fault_device = B_TRUE;
+ } else if (fmd_nvl_class_match(hdl, fault,
"fault.fs.zfs.device")) {
fault_device = B_FALSE;
- } else if (fmd_nvl_class_match(hdl, fault, "fault.io.*")) {
+ } else if (fmd_nvl_class_match(hdl, fault, "fault.io.disk.*") ||
+ fmd_nvl_class_match(hdl, fault, "fault.io.scsi.*")) {
is_disk = B_TRUE;
fault_device = B_TRUE;
} else {
continue;
}
if (is_disk) {
/*
- * This is a disk fault. Lookup the FRU, convert it to
- * an FMRI string, and attempt to find a matching vdev.
+ * This is a disk fault. Lookup the FRU and ASRU,
+ * convert them to FMRI and devid strings, and attempt
+ * to find a matching vdev. If no vdev is found, the
+ * device might still be retired/unretired.
*/
if (nvlist_lookup_nvlist(fault, FM_FAULT_FRU,
&fru) != 0 ||
nvlist_lookup_string(fru, FM_FMRI_SCHEME,
- &scheme) != 0)
- continue;
+ &scheme) != 0) {
+ fmd_hdl_debug(hdl,
+ "zfs_retire_recv: unable to get FRU");
+ goto nofru;
+ }
- if (strcmp(scheme, FM_FMRI_SCHEME_HC) != 0)
- continue;
+ if (strcmp(scheme, FM_FMRI_SCHEME_HC) != 0) {
+ fmd_hdl_debug(hdl,
+ "zfs_retire_recv: not hc scheme: %s",
+ scheme);
+ goto nofru;
+ }
thp = fmd_hdl_topo_hold(hdl, TOPO_VERSION);
if (topo_fmri_nvl2str(thp, fru, &fmri, &err) != 0) {
fmd_hdl_topo_rele(hdl, thp);
- continue;
+ fmd_hdl_debug(hdl,
+ "zfs_retire_recv: unable to get FMRI");
+ goto nofru;
}
- zhp = find_by_fru(zhdl, fmri, &vdev);
+ fmd_hdl_debug(hdl, "zfs_retire_recv: got FMRI %s",
+ fmri);
+
+ nofru:
+ if (nvlist_lookup_nvlist(fault, FM_FAULT_ASRU,
+ &asru) != 0 ||
+ nvlist_lookup_string(asru, FM_FMRI_SCHEME,
+ &scheme) != 0) {
+ fmd_hdl_debug(hdl,
+ "zfs_retire_recv: unable to get ASRU");
+ goto nodevid;
+ }
+
+ if (strcmp(scheme, FM_FMRI_SCHEME_DEV) != 0) {
+ fmd_hdl_debug(hdl,
+ "zfs_retire_recv: not dev scheme: %s",
+ scheme);
+ goto nodevid;
+ }
+
+ if (nvlist_lookup_string(asru, FM_FMRI_DEV_ID,
+ &devidstr) != 0) {
+ fmd_hdl_debug(hdl,
+ "zfs_retire_recv: couldn't get devid");
+ goto nodevid;
+ }
+
+ fmd_hdl_debug(hdl, "zfs_retire_recv: got devid %s",
+ devidstr);
+
+ if (devid_str_decode(devidstr, &devid, NULL) != 0) {
+ fmd_hdl_debug(hdl,
+ "zfs_retire_recv: devid_str_decode failed");
+ goto nodevid;
+ }
+
+ if (nvlist_lookup_string(asru, FM_FMRI_DEV_PATH,
+ &path) != 0) {
+ fmd_hdl_debug(hdl,
+ "zfs_retire_recv: couldn't get path, "
+ "won't be able to retire device");
+ goto nodevid;
+ }
+
+ fmd_hdl_debug(hdl, "zfs_retire_recv: got path %s",
+ path);
+
+ nodevid:
+ zhp = find_by_anything(hdl, zhdl, fmri, devid, 0,
+ &vdev);
+ if (fmri) {
topo_hdl_strfree(thp, fmri);
fmd_hdl_topo_rele(hdl, thp);
+ }
+ if (devid)
+ devid_free(devid);
- if (zhp == NULL)
+ if (zhp == NULL) {
+ fmd_hdl_debug(hdl, "zfs_retire_recv: no zhp");
+ if (path != NULL)
+ (void) zfs_retire_device(hdl, path,
+ !is_repair);
continue;
+ }
- (void) nvlist_lookup_uint64(vdev,
- ZPOOL_CONFIG_GUID, &vdev_guid);
+ (void) nvlist_lookup_uint64(vdev, ZPOOL_CONFIG_GUID,
+ &vdev_guid);
+
+ fmd_hdl_debug(hdl, "zfs_retire_recv: found vdev GUID: %"
+ PRIx64, vdev_guid);
+
aux = VDEV_AUX_EXTERNAL;
} else {
/*
* This is a ZFS fault. Lookup the resource, and
* attempt to find the matching vdev.
@@ -510,14 +885,18 @@
vdev_guid = 0;
else
continue;
}
- if ((zhp = find_by_guid(zhdl, pool_guid, vdev_guid,
+ if ((zhp = find_by_guid(hdl, zhdl, pool_guid, vdev_guid,
&vdev)) == NULL)
continue;
+ if (fmd_nvl_class_match(hdl, fault,
+ "fault.fs.zfs.vdev.open_failed"))
+ aux = VDEV_AUX_OPEN_FAILED;
+ else
aux = VDEV_AUX_ERR_EXCEEDED;
}
if (vdev_guid == 0) {
/*
@@ -531,29 +910,42 @@
/*
* If this is a repair event, then mark the vdev as repaired and
* continue.
*/
if (is_repair) {
+ if (is_disk && path != NULL &&
+ zfs_retire_device(hdl, path, B_FALSE) != 0)
+ continue;
+
repair_done = 1;
(void) zpool_vdev_clear(zhp, vdev_guid);
zpool_close(zhp);
continue;
}
/*
* Actively fault the device if needed.
*/
- if (fault_device)
+ if (fault_device) {
(void) zpool_vdev_fault(zhp, vdev_guid, aux);
+
+ if (zfs_get_vdev_state(hdl, zhdl, zhp, vdev_guid, &vdev)
+ == VDEV_STATE_FAULTED)
+ retire_device = B_TRUE;
+ }
+
if (degrade_device)
(void) zpool_vdev_degrade(zhp, vdev_guid, aux);
/*
* Attempt to substitute a hot spare.
*/
replace_with_spare(hdl, zhp, vdev);
zpool_close(zhp);
+
+ if (is_disk && retire_device && path != NULL)
+ (void) zfs_retire_device(hdl, path, B_TRUE);
}
if (strcmp(class, FM_LIST_REPAIRED_CLASS) == 0 && repair_done &&
nvlist_lookup_string(nvl, FM_SUSPECT_UUID, &uuid) == 0)
fmd_case_uuresolved(hdl, uuid);
@@ -567,16 +959,18 @@
NULL, /* fmdo_gc */
};
static const fmd_prop_t fmd_props[] = {
{ "spare_on_remove", FMD_TYPE_BOOL, "true" },
+ { "slow_io_skip_retire", FMD_TYPE_BOOL, "true"},
{ "ssm_wearout_skip_retire", FMD_TYPE_BOOL, "true"},
+ { "fru_compare", FMD_TYPE_BOOL, "true"},
{ NULL, 0, NULL }
};
static const fmd_hdl_info_t fmd_info = {
- "ZFS Retire Agent", "1.0", &fmd_ops, fmd_props
+ "ZFS Retire Agent", "1.1", &fmd_ops, fmd_props
};
void
_fmd_init(fmd_hdl_t *hdl)
{