Print this page
NEX-10626 Hot spare doesn't replace failed SSD
Reviewed by: Sanjay Nadkarni <sanjay.nadkarni@nexenta.com>
Reviewed by: Saso Kiselkov <saso.kiselkov@nexenta.com>
Reviewed by: Evan Layton <evan.layton@nexenta.com>
NEX-5736 implement autoreplace matching based on FRU slot number
NEX-6200 hot spares are not reactivated after reinserting into enclosure
NEX-9403 need to update FRU for spare and l2cache devices
NEX-9404 remove lofi autoreplace support from syseventd
NEX-9409 hotsparing doesn't work for vdevs without FRU
NEX-9424 zfs`vdev_online() needs better notification about state changes
Portions contributed by: Alek Pinchuk <alek@nexenta.com>
Portions contributed by: Josef 'Jeff' Sipek <josef.sipek@nexenta.com>
Reviewed by: Roman Strashkin <roman.strashkin@nexenta.com>
Reviewed by: Steve Peng <steve.peng@nexenta.com>
Reviewed by: Sanjay Nadkarni <sanjay.nadkarni@nexenta.com>
NEX-7397 Hotspare didn't kick in automatically when one of the drive in pool went "Faulty" (is_ssd fix)
Reviewed by: Saso Kiselkov <saso.kiselkov@nexenta.com>
Reviewed by: Yuri Pankov <yuri.pankov@nexenta.com>
NEX-7397 Hotspare didn't kick in automatically when one of the drive in pool went "Faulty"
Reviewed by: Sanjay Nadkarni <sanjay.nadkarni@nexenta.com>
Reviewed by: Roman Strashkin <roman.strashkin@nexenta.com>
NEX-5753 FMD core dumps
Reviewed by: Yuri Pankov <yuri.pankov@nexenta.com>
Reviewed by: Sanjay Nadkarni <sanjay.nadkarni@nexenta.com>
Reviewed by: Josef 'Jeff' Sipek <josef.sipek@nexenta.com>
NEX-5774 fix for NEX-3166 has a tunable typo
Reviewed by: Saso Kiselkov <saso.kiselkov@nexenta.com>
Reviewed by: Sanjay Nadkarni <sanjay.nadkarni@nexenta.com>
Reviewed by: Josef 'Jeff' Sipek <josef.sipek@nexenta.com>
NEX-3166 need to add FMA events for SSD lifespan
Reviewed by: Jeffry Molanus <jeffry.molanus@nexenta.com>
Reviewed by: Sanjay Nadkarni <sanjay.nadkarni@nexenta.com>
Reviewed by: Yuri Pankov <yuri.pankov@nexenta.com>
Reviewed by: Saso Kiselkov <saso.kiselkov@nexenta.com>
NEX-2846 Enable Automatic/Intelligent Hot Sparing capability (lint fix)
Reviewed by: Jean McCormack <jean.mccormack@nexenta.com>
NEX-2846 Enable Automatic/Intelligent Hot Sparing capability
Reviewed by: Jeffry Molanus <jeffry.molanus@nexenta.com>
Reviewed by: Roman Strashkin <roman.strashkin@nexenta.com>
Reviewed by: Saso Kiselkov <saso.kiselkov@nexenta.com>
NEX-5163 backport illumos 6027 EOL zulu (XVR-4000)
Reviewed by: Kevin Crowe <kevin.crowe@nexenta.com>
6027 EOL zulu (XVR-4000)
Reviewed by: Garrett D'Amore <garrett@damore.org>
Reviewed by: Peter Tribble <peter.tribble@gmail.com>
Reviewed by: Richard Lowe <richlowe@richlowe.net>
Approved by: Dan McDonald <danmcd@omniti.com>
NEX-5162 backport illumos 6507 i386 makecontext(3c) needs to 16-byte align the stack
Reviewed by: Kevin Crowe <kevin.crowe@nexenta.com>
6507 i386 makecontext(3c) needs to 16-byte align the stack
Reviewed by: Gordon Ross <gordon.w.ross@gmail.com>
Reviewed by: Robert Mustacchi <rm@joyent.com>
Approved by: Dan McDonald <danmcd@omniti.com>
NEX-5207 attempt to activate spare cores fmd
Reviewed by: Yuri Pankov <yuri.pankov@nexenta.com>
Reviewed by: Roman Strashkin <roman.strashkin@nexenta.com>
Reviewed by: Sanjay Nadkarni <sanjay.nadkarni@nexenta.com>
NEX-1438 bump slow-io threshold and default to disabled automated response
NEX-941 zfs doesn't replace "UNAVAIL" disk from spares in pool
OS-66 Retired devices may still get attached leading to ndi_devi_online errors
OS-65 New FMA agent is needed to consume diagnosed slow IO
Portions contributed by Marcel Telka.
zfsxx issue #11: support for spare device groups
re #12393 rb3935 Kerberos and smbd disagree about who is our AD server (fix elf runtime attributes check)
re #11612 rb3907 Failing vdev of a mirrored pool should not take zfs operations out of action for extended periods of time.

@@ -16,12 +16,14 @@
  * fields enclosed by brackets "[]" replaced with your own identifying
  * information: Portions Copyright [yyyy] [name of copyright owner]
  *
  * CDDL HEADER END
  */
+
 /*
  * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2017 Nexenta Systems, Inc.
  */
 
 /*
  * The ZFS retire agent is responsible for managing hot spares across all pools.
  * When we see a device fault or a device removal, we try to open the associated

@@ -37,10 +39,12 @@
 #include <sys/fm/protocol.h>
 #include <sys/fm/fs/zfs.h>
 #include <libzfs.h>
 #include <fm/libtopo.h>
 #include <string.h>
+#include <sys/int_fmtio.h>
+#include <devid.h>
 
 typedef struct zfs_retire_repaired {
         struct zfs_retire_repaired      *zrr_next;
         uint64_t                        zrr_pool;
         uint64_t                        zrr_vdev;

@@ -64,12 +68,14 @@
 
 /*
  * Find a pool with a matching GUID.
  */
 typedef struct find_cbdata {
+        fmd_hdl_t       *cb_hdl;
         uint64_t        cb_guid;
         const char      *cb_fru;
+        ddi_devid_t     cb_devid;
         zpool_handle_t  *cb_zhp;
         nvlist_t        *cb_vdev;
 } find_cbdata_t;
 
 static int

@@ -89,46 +95,65 @@
 
 /*
  * Find a vdev within a tree with a matching GUID.
  */
 static nvlist_t *
-find_vdev(libzfs_handle_t *zhdl, nvlist_t *nv, const char *search_fru,
-    uint64_t search_guid)
+find_vdev(fmd_hdl_t *hdl, libzfs_handle_t *zhdl, nvlist_t *nv,
+    const char *search_fru, ddi_devid_t search_devid, uint64_t search_guid)
 {
         uint64_t guid;
         nvlist_t **child;
         uint_t c, children;
         nvlist_t *ret;
-        char *fru;
+        char *fru, *devidstr, *path;
+        ddi_devid_t devid;
 
-        if (search_fru != NULL) {
-                if (nvlist_lookup_string(nv, ZPOOL_CONFIG_FRU, &fru) == 0 &&
-                    libzfs_fru_compare(zhdl, fru, search_fru))
+        if (nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path) == 0)
+                fmd_hdl_debug(hdl, "find_vdev: vdev path: %s", path);
+
+        if (search_fru != NULL &&
+            nvlist_lookup_string(nv, ZPOOL_CONFIG_FRU, &fru) == 0) {
+                fmd_hdl_debug(hdl, "find_vdev: found fru: %s", fru);
+                if (libzfs_fru_compare(zhdl, fru, search_fru))
                         return (nv);
-        } else {
+        }
+
+        if (search_devid != NULL &&
+            nvlist_lookup_string(nv, ZPOOL_CONFIG_DEVID, &devidstr) == 0) {
+                fmd_hdl_debug(hdl, "find_vdev: found devid: %s", devidstr);
+
+                if (devid_str_decode(devidstr, &devid, NULL) == 0) {
+                        if (devid_compare(search_devid, devid) == 0) {
+                                devid_free(devid);
+                                return (nv);
+                        }
+
+                        devid_free(devid);
+                }
+        }
+
                 if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID, &guid) == 0 &&
                     guid == search_guid)
                         return (nv);
-        }
 
         if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
             &child, &children) != 0)
                 return (NULL);
 
         for (c = 0; c < children; c++) {
-                if ((ret = find_vdev(zhdl, child[c], search_fru,
-                    search_guid)) != NULL)
+                if ((ret = find_vdev(hdl, zhdl, child[c], search_fru,
+                    search_devid, search_guid)) != NULL)
                         return (ret);
         }
 
         if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_L2CACHE,
             &child, &children) != 0)
                 return (NULL);
 
         for (c = 0; c < children; c++) {
-                if ((ret = find_vdev(zhdl, child[c], search_fru,
-                    search_guid)) != NULL)
+                if ((ret = find_vdev(hdl, zhdl, child[c], search_fru,
+                    search_devid, search_guid)) != NULL)
                         return (ret);
         }
 
         return (NULL);
 }

@@ -135,12 +160,12 @@
 
 /*
  * Given a (pool, vdev) GUID pair, find the matching pool and vdev.
  */
 static zpool_handle_t *
-find_by_guid(libzfs_handle_t *zhdl, uint64_t pool_guid, uint64_t vdev_guid,
-    nvlist_t **vdevp)
+find_by_guid(fmd_hdl_t *hdl, libzfs_handle_t *zhdl, uint64_t pool_guid,
+    uint64_t vdev_guid, nvlist_t **vdevp)
 {
         find_cbdata_t cb;
         zpool_handle_t *zhp;
         nvlist_t *config, *nvroot;
 

@@ -158,11 +183,11 @@
                 zpool_close(zhp);
                 return (NULL);
         }
 
         if (vdev_guid != 0) {
-                if ((*vdevp = find_vdev(zhdl, nvroot, NULL,
+                if ((*vdevp = find_vdev(hdl, zhdl, nvroot, NULL, NULL,
                     vdev_guid)) == NULL) {
                         zpool_close(zhp);
                         return (NULL);
                 }
         }

@@ -179,92 +204,291 @@
 
         config = zpool_get_config(zhp, NULL);
         if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
             &nvroot) != 0) {
                 zpool_close(zhp);
+                fmd_hdl_debug(cbp->cb_hdl, "search_pool: "
+                    "unable to get vdev tree");
                 return (0);
         }
 
-        if ((cbp->cb_vdev = find_vdev(zpool_get_handle(zhp), nvroot,
-            cbp->cb_fru, 0)) != NULL) {
+        if ((cbp->cb_vdev = find_vdev(cbp->cb_hdl, zpool_get_handle(zhp),
+            nvroot, cbp->cb_fru, cbp->cb_devid, cbp->cb_guid)) != NULL) {
                 cbp->cb_zhp = zhp;
                 return (1);
         }
 
         zpool_close(zhp);
         return (0);
 }
 
 /*
- * Given a FRU FMRI, find the matching pool and vdev.
+ * Given a FRU FMRI, devid, or guid: find the matching pool and vdev.
  */
 static zpool_handle_t *
-find_by_fru(libzfs_handle_t *zhdl, const char *fru, nvlist_t **vdevp)
+find_by_anything(fmd_hdl_t *hdl, libzfs_handle_t *zhdl, const char *fru,
+    ddi_devid_t devid, uint64_t guid, nvlist_t **vdevp)
 {
         find_cbdata_t cb;
 
+        (void) memset(&cb, 0, sizeof (cb));
+        cb.cb_hdl = hdl;
         cb.cb_fru = fru;
+        cb.cb_devid = devid;
+        cb.cb_guid = guid;
         cb.cb_zhp = NULL;
+
         if (zpool_iter(zhdl, search_pool, &cb) != 1)
                 return (NULL);
 
         *vdevp = cb.cb_vdev;
         return (cb.cb_zhp);
 }
 
 /*
- * Given a vdev, attempt to replace it with every known spare until one
- * succeeds.
+ * Create a solved FMD case and add the fault to it
  */
 static void
-replace_with_spare(fmd_hdl_t *hdl, zpool_handle_t *zhp, nvlist_t *vdev)
+generate_fault(fmd_hdl_t *hdl, nvlist_t *vdev, char *faultname)
 {
-        nvlist_t *config, *nvroot, *replacement;
-        nvlist_t **spares;
-        uint_t s, nspares;
-        char *dev_name;
+        char *devid, *fdevid, *physpath, *s;
+        fmd_case_t *c;
+        fmd_hdl_topo_node_info_t *node;
+        nvlist_t *fault = NULL;
+        uint64_t wd;
 
-        config = zpool_get_config(zhp, NULL);
-        if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
-            &nvroot) != 0)
+        assert(hdl != NULL);
+        assert(vdev != NULL);
+        assert(faultname != NULL);
+
+        if (nvlist_lookup_string(vdev, ZPOOL_CONFIG_PHYS_PATH,
+            &physpath) != 0 ||
+            nvlist_lookup_uint64(vdev, ZPOOL_CONFIG_WHOLE_DISK, &wd) != 0)
                 return;
 
-        /*
-         * Find out if there are any hot spares available in the pool.
+        if (nvlist_lookup_string(vdev, ZPOOL_CONFIG_DEVID,
+            &devid) == 0) {
+                fdevid = strdup(devid);
+        } else {
+                fdevid = devid_str_from_path(physpath);
+        }
+        if (fdevid == NULL) {
+                fmd_hdl_debug(hdl, "%s: failed to get devid", __func__);
+                return;
+        }
+
+        if (wd && (s = strrchr(fdevid, '/')) != NULL)
+                *s = '\0';
+
+        c = fmd_case_open(hdl, NULL);
+        if ((node = fmd_hdl_topo_node_get_by_devid(hdl, fdevid)) == NULL) {
+                fault = fmd_nvl_create_fault(hdl, faultname, 100, NULL, vdev,
+                    NULL);
+        } else {
+                fault = fmd_nvl_create_fault(hdl, faultname, 100,
+                    node->resource, node->fru, node->resource);
+                nvlist_free(node->fru);
+                nvlist_free(node->resource);
+                fmd_hdl_free(hdl, node,
+                    sizeof (fmd_hdl_topo_node_info_t));
+        }
+        fmd_case_add_suspect(hdl, c, fault);
+        fmd_case_setspecific(hdl, c, fdevid);
+        fmd_case_solve(hdl, c);
+
+        devid_str_free(fdevid);
+        fmd_hdl_debug(hdl, "%s: dispatched %s", __func__, faultname);
+}
+
+/*
+ * Determine if the FRU fields for the spare and the failed device match.
          */
-        if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES,
-            &spares, &nspares) != 0)
+static boolean_t
+match_fru(fmd_hdl_t *hdl, char *ffru, nvlist_t *spare)
+{
+        char *sfru;
+        boolean_t ret = B_FALSE;
+
+        if (nvlist_lookup_string(spare, ZPOOL_CONFIG_FRU, &sfru) != 0) {
+                fmd_hdl_debug(hdl, "%s: spare FRU not set", __func__);
+                return (B_FALSE);
+        }
+
+        /* We match on enclosure only at the moment */
+        ret = libzfs_fru_cmp_enclosure(ffru, sfru);
+        if (!ret)
+                fmd_hdl_debug(hdl, "%s: enclosure not matched", __func__);
+
+        return (ret);
+}
+
+static boolean_t
+do_replace(zpool_handle_t *zhp, const char *fpath, const char *spath,
+    nvlist_t *spare)
+{
+        nvlist_t *nvroot;
+        boolean_t ret = B_FALSE;
+
+        if (nvlist_alloc(&nvroot, NV_UNIQUE_NAME, 0) != 0)
+                return (B_FALSE);
+
+        if (nvlist_add_string(nvroot, ZPOOL_CONFIG_TYPE, VDEV_TYPE_ROOT) != 0 ||
+            nvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN,
+            &spare, 1) != 0)
+                goto fail;
+
+        ret = (zpool_vdev_attach(zhp, fpath, spath, nvroot, B_TRUE) == 0);
+
+fail:
+        nvlist_free(nvroot);
+        return (ret);
+}
+
+/*
+ * Attempt to replace failed device with spare.
+ *
+ * Spare selection is done in the following order:
+ * - If failed device has sparegroup property set, look for the spares that
+ *   belongs to the same sparegroup. If no suitable spare is found, skip
+ *   the spares that have sparegroup property set while doing other match types.
+ * - If failed device has FRU set, look for the spares in the same enclosure.
+ * - Finally, try using any available spare.
+ *
+ * Note that all match types do a media-type match first, so that we don't
+ * replace HDD with SSD and vice versa.
+ */
+static void
+replace_with_spare(fmd_hdl_t *hdl, zpool_handle_t *zhp, nvlist_t *vdev)
+{
+        nvlist_t *config, *nvroot, **spares;
+        uint_t i, nspares;
+        boolean_t uu1, uu2, log;
+        char *devpath;
+        char fdevpath[PATH_MAX];        /* devpath of failed device */
+        char *ffru = NULL;              /* FRU of failed device */
+        char fsg[MAXNAMELEN];           /* sparegroup of failed device */
+        boolean_t use_sg = B_FALSE;     /* do sparegroup matching */
+        boolean_t done_sg = B_FALSE;    /* done sparegroup matching */
+        boolean_t use_fru = B_FALSE;    /* do FRU matching */
+        boolean_t done_fru = B_FALSE;   /* done FRU matching */
+        boolean_t fssd = B_FALSE;       /* failed device is SSD */
+        uint64_t wd;
+
+        if ((config = zpool_get_config(zhp, NULL)) == NULL ||
+            nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, &nvroot) != 0)
                 return;
 
-        replacement = fmd_nvl_alloc(hdl, FMD_SLEEP);
+        /* Check if there are any hot spares available in the pool */
+        if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES, &spares,
+            &nspares) != 0) {
+                fmd_hdl_debug(hdl, "%s: no spares found", __func__);
+                return;
+        }
 
-        (void) nvlist_add_string(replacement, ZPOOL_CONFIG_TYPE,
-            VDEV_TYPE_ROOT);
+        if (nvlist_lookup_string(vdev, ZPOOL_CONFIG_PATH, &devpath) != 0 ||
+            nvlist_lookup_uint64(vdev, ZPOOL_CONFIG_WHOLE_DISK, &wd) != 0 ||
+            nvlist_lookup_boolean_value(vdev, ZPOOL_CONFIG_IS_SSD, &fssd) != 0)
+                return;
+        (void) strlcpy(fdevpath, devpath, sizeof (fdevpath));
+        if (wd)
+                fdevpath[strlen(fdevpath) - 2] = '\0';
 
-        dev_name = zpool_vdev_name(NULL, zhp, vdev, B_FALSE);
+        /* Spares can't replace log devices */
+        (void) zpool_find_vdev(zhp, fdevpath, &uu1, &uu2, &log, NULL);
+        if (log)
+                return;
 
+        /* Check if we should do sparegroup matching */
+        if (vdev_get_prop(zhp, fdevpath, VDEV_PROP_SPAREGROUP, fsg,
+            sizeof (fsg)) == 0 && strcmp(fsg, "-") != 0)
+                use_sg = B_TRUE;
+
+        use_fru = (fmd_prop_get_int32(hdl, "fru_compare") == FMD_B_TRUE);
+        /* Disable FRU matching if failed device doesn't have FRU set */
+        if (nvlist_lookup_string(vdev, ZPOOL_CONFIG_FRU, &ffru) != 0)
+                use_fru = B_FALSE;
+
+again:
+        /* Go through the spares list */
+        for (i = 0; i < nspares; i++) {
+                char sdevpath[PATH_MAX];        /* devpath of spare */
+                char ssg[MAXNAMELEN];           /* sparegroup of spare */
+                boolean_t sssd = B_FALSE;       /* spare is SSD */
+                boolean_t ssg_set = B_FALSE;
+
+                if (nvlist_lookup_string(spares[i], ZPOOL_CONFIG_PATH,
+                    &devpath) != 0 ||
+                    nvlist_lookup_uint64(spares[i], ZPOOL_CONFIG_WHOLE_DISK,
+                    &wd) != 0)
+                        continue;
+
+                (void) strlcpy(sdevpath, devpath, sizeof (sdevpath));
+                if (wd)
+                        sdevpath[strlen(sdevpath) - 2] = '\0';
+
+                /* Don't swap HDD for SSD and vice versa */
+                if (nvlist_lookup_boolean_value(spares[i], ZPOOL_CONFIG_IS_SSD,
+                    &sssd) != 0 || fssd != sssd) {
+                        continue;
+                }
+
+                /* Get the sparegroup property for the spare */
+                if (vdev_get_prop(zhp, sdevpath, VDEV_PROP_SPAREGROUP, ssg,
+                    sizeof (ssg)) == 0 && strcmp(ssg, "-") != 0)
+                        ssg_set = B_TRUE;
+
+                if (use_sg) {
+                        if (!ssg_set || strcmp(fsg, ssg) != 0)
+                                continue;
+                        /* Found spare in the the same group */
+                        if (do_replace(zhp, fdevpath, sdevpath, spares[i]))
+                                return;
+                        continue;
+                }
+
         /*
-         * Try to replace each spare, ending when we successfully
-         * replace it.
+                 * If we tried matching on sparegroup and have not found
+                 * any suitable spare, skip all spares with sparegroup
+                 * set.
          */
-        for (s = 0; s < nspares; s++) {
-                char *spare_name;
+                if (done_sg && ssg_set)
+                        continue;
 
-                if (nvlist_lookup_string(spares[s], ZPOOL_CONFIG_PATH,
-                    &spare_name) != 0)
+                if (use_fru) {
+                        if (!match_fru(hdl, ffru, spares[i]))
                         continue;
+                        /* Found spare with matching FRU */
+                        if (do_replace(zhp, fdevpath, sdevpath, spares[i]))
+                                return;
+                        continue;
+                }
 
-                (void) nvlist_add_nvlist_array(replacement,
-                    ZPOOL_CONFIG_CHILDREN, &spares[s], 1);
+                /*
+                 * sparegroup and FRU matching was either not used or didn't
+                 * find any suitable spares, use the first available one.
+                 */
+                if (do_replace(zhp, fdevpath, sdevpath, spares[i])) {
+                        /* If we tried intellegent sparing, generate fault */
+                        if (done_sg || done_fru) {
+                                generate_fault(hdl, vdev,
+                                    "fault.fs.zfs.vdev.dumb_spared");
+                        }
+                        return;
+                }
+        }
 
-                if (zpool_vdev_attach(zhp, dev_name, spare_name,
-                    replacement, B_TRUE) == 0)
-                        break;
+        if (use_sg) {
+                done_sg = B_TRUE;
+                use_sg = B_FALSE;
+                goto again;
+        } else if (use_fru) {
+                done_fru = B_TRUE;
+                use_fru = B_FALSE;
+                goto again;
         }
 
-        free(dev_name);
-        nvlist_free(replacement);
+        generate_fault(hdl, vdev, "fault.fs.zfs.vdev.not_spared");
 }
 
 /*
  * Repair this vdev if we had diagnosed a 'fault.fs.zfs.device' and
  * ASRU is now usable.  ZFS has found the device to be present and

@@ -345,32 +569,92 @@
         zrp->zrr_pool = pool_guid;
         zrp->zrr_vdev = vdev_guid;
         zdp->zrd_repaired = zrp;
 }
 
+static int
+zfs_get_vdev_state(fmd_hdl_t *hdl, libzfs_handle_t *zhdl, zpool_handle_t *zhp,
+    uint64_t vdev_guid, nvlist_t **vdev)
+{
+        nvlist_t *config, *nvroot;
+        vdev_stat_t *vs;
+        uint_t cnt;
+        boolean_t missing;
+
+        if (zpool_refresh_stats(zhp, &missing) != 0 ||
+            missing != B_FALSE) {
+                fmd_hdl_debug(hdl, "zfs_get_vdev_state: can't refresh stats");
+                return (VDEV_STATE_UNKNOWN);
+        }
+
+        config = zpool_get_config(zhp, NULL);
+        if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
+            &nvroot) != 0) {
+                fmd_hdl_debug(hdl, "zfs_get_vdev_state: can't get vdev tree");
+                return (VDEV_STATE_UNKNOWN);
+        }
+
+        *vdev = find_vdev(hdl, zhdl, nvroot, NULL, NULL, vdev_guid);
+
+        if (nvlist_lookup_uint64_array(*vdev, ZPOOL_CONFIG_VDEV_STATS,
+            (uint64_t **)&vs, &cnt) != 0) {
+                fmd_hdl_debug(hdl, "zfs_get_vdev_state: can't get vdev stats");
+                return (VDEV_STATE_UNKNOWN);
+        }
+
+        return (vs->vs_state);
+}
+
+int
+zfs_retire_device(fmd_hdl_t *hdl, char *path, boolean_t retire)
+{
+        di_retire_t drt = {0};
+        int err;
+
+        drt.rt_abort = (void (*)(void *, const char *, ...))fmd_hdl_abort;
+        drt.rt_debug = (void (*)(void *, const char *, ...))fmd_hdl_debug;
+        drt.rt_hdl = hdl;
+
+        fmd_hdl_debug(hdl, "zfs_retire_device: "
+            "attempting to %sretire %s", retire ? "" : "un", path);
+
+        err = retire ?
+            di_retire_device(path, &drt, 0) :
+            di_unretire_device(path, &drt);
+
+        if (err != 0)
+                fmd_hdl_debug(hdl, "zfs_retire_device: ",
+                    "di_%sretire_device failed: %d %s",
+                    retire ? "" : "un", err, path);
+
+        return (err);
+}
+
 /*ARGSUSED*/
 static void
 zfs_retire_recv(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl,
     const char *class)
 {
         uint64_t pool_guid, vdev_guid;
         zpool_handle_t *zhp;
-        nvlist_t *resource, *fault, *fru;
+        nvlist_t *resource, *fault, *fru, *asru;
         nvlist_t **faults;
         uint_t f, nfaults;
         zfs_retire_data_t *zdp = fmd_hdl_getspecific(hdl);
         libzfs_handle_t *zhdl = zdp->zrd_hdl;
         boolean_t fault_device, degrade_device;
         boolean_t is_repair;
-        char *scheme, *fmri;
+        char *scheme = NULL, *fmri = NULL, *devidstr = NULL, *path = NULL;
+        ddi_devid_t devid;
         nvlist_t *vdev;
         char *uuid;
         int repair_done = 0;
         boolean_t retire;
         boolean_t is_disk;
+        boolean_t retire_device = B_FALSE;
         vdev_aux_t aux;
-        topo_hdl_t *thp;
+        topo_hdl_t *thp = NULL;
         int err;
 
         /*
          * If this is a resource notifying us of device removal, then simply
          * check for an available spare and continue.

@@ -380,11 +664,11 @@
                     &pool_guid) != 0 ||
                     nvlist_lookup_uint64(nvl, FM_EREPORT_PAYLOAD_ZFS_VDEV_GUID,
                     &vdev_guid) != 0)
                         return;
 
-                if ((zhp = find_by_guid(zhdl, pool_guid, vdev_guid,
+                if ((zhp = find_by_guid(hdl, zhdl, pool_guid, vdev_guid,
                     &vdev)) == NULL)
                         return;
 
                 if (fmd_prop_get_int32(hdl, "spare_on_remove"))
                         replace_with_spare(hdl, zhp, vdev);

@@ -424,19 +708,33 @@
                 is_disk = B_FALSE;
 
                 if (nvlist_lookup_boolean_value(fault, FM_SUSPECT_RETIRE,
                     &retire) == 0 && retire == 0)
                         continue;
+                if (fmd_nvl_class_match(hdl, fault, "fault.io.disk.slow-io") &&
+                    fmd_prop_get_int32(hdl, "slow_io_skip_retire") ==
+                    FMD_B_TRUE) {
+                        fmd_hdl_debug(hdl, "ignoring slow io fault");
+                        continue;
+                }
 
                 if (fmd_nvl_class_match(hdl, fault,
                     "fault.io.disk.ssm-wearout") &&
                     fmd_prop_get_int32(hdl, "ssm_wearout_skip_retire") ==
                     FMD_B_TRUE) {
                         fmd_hdl_debug(hdl, "zfs-retire: ignoring SSM fault");
                         continue;
                 }
 
+                if (fmd_nvl_class_match(hdl, fault,
+                    "fault.io.disk.ssm-wearout") &&
+                    fmd_prop_get_int32(hdl, "ssm_wearout_skip_retire") ==
+                    FMD_B_TRUE) {
+                        fmd_hdl_debug(hdl, "zfs-retire: ignoring SSM fault");
+                        continue;
+                }
+
                 /*
                  * While we subscribe to fault.fs.zfs.*, we only take action
                  * for faults targeting a specific vdev (open failure or SERD
                  * failure).  We also subscribe to fault.io.* events, so that
                  * faulty disks will be faulted in the ZFS configuration.

@@ -445,48 +743,125 @@
                         fault_device = B_TRUE;
                 } else if (fmd_nvl_class_match(hdl, fault,
                     "fault.fs.zfs.vdev.checksum")) {
                         degrade_device = B_TRUE;
                 } else if (fmd_nvl_class_match(hdl, fault,
+                    "fault.fs.zfs.vdev.timeout")) {
+                        fault_device = B_TRUE;
+                } else if (fmd_nvl_class_match(hdl, fault,
                     "fault.fs.zfs.device")) {
                         fault_device = B_FALSE;
-                } else if (fmd_nvl_class_match(hdl, fault, "fault.io.*")) {
+                } else if (fmd_nvl_class_match(hdl, fault, "fault.io.disk.*") ||
+                    fmd_nvl_class_match(hdl, fault, "fault.io.scsi.*")) {
                         is_disk = B_TRUE;
                         fault_device = B_TRUE;
                 } else {
                         continue;
                 }
 
                 if (is_disk) {
                         /*
-                         * This is a disk fault.  Lookup the FRU, convert it to
-                         * an FMRI string, and attempt to find a matching vdev.
+                         * This is a disk fault.  Lookup the FRU and ASRU,
+                         * convert them to FMRI and devid strings, and attempt
+                         * to find a matching vdev. If no vdev is found, the
+                         * device might still be retired/unretired.
                          */
                         if (nvlist_lookup_nvlist(fault, FM_FAULT_FRU,
                             &fru) != 0 ||
                             nvlist_lookup_string(fru, FM_FMRI_SCHEME,
-                            &scheme) != 0)
-                                continue;
+                            &scheme) != 0) {
+                                fmd_hdl_debug(hdl,
+                                    "zfs_retire_recv: unable to get FRU");
+                                goto nofru;
+                        }
 
-                        if (strcmp(scheme, FM_FMRI_SCHEME_HC) != 0)
-                                continue;
+                        if (strcmp(scheme, FM_FMRI_SCHEME_HC) != 0) {
+                                fmd_hdl_debug(hdl,
+                                    "zfs_retire_recv: not hc scheme: %s",
+                                    scheme);
+                                goto nofru;
+                        }
 
                         thp = fmd_hdl_topo_hold(hdl, TOPO_VERSION);
                         if (topo_fmri_nvl2str(thp, fru, &fmri, &err) != 0) {
                                 fmd_hdl_topo_rele(hdl, thp);
-                                continue;
+                                fmd_hdl_debug(hdl,
+                                    "zfs_retire_recv: unable to get FMRI");
+                                goto nofru;
                         }
 
-                        zhp = find_by_fru(zhdl, fmri, &vdev);
+                        fmd_hdl_debug(hdl, "zfs_retire_recv: got FMRI %s",
+                            fmri);
+
+                nofru:
+                        if (nvlist_lookup_nvlist(fault, FM_FAULT_ASRU,
+                            &asru) != 0 ||
+                            nvlist_lookup_string(asru, FM_FMRI_SCHEME,
+                            &scheme) != 0) {
+                                fmd_hdl_debug(hdl,
+                                    "zfs_retire_recv: unable to get ASRU");
+                                goto nodevid;
+                        }
+
+                        if (strcmp(scheme, FM_FMRI_SCHEME_DEV) != 0) {
+                                fmd_hdl_debug(hdl,
+                                    "zfs_retire_recv: not dev scheme: %s",
+                                    scheme);
+                                goto nodevid;
+                        }
+
+                        if (nvlist_lookup_string(asru, FM_FMRI_DEV_ID,
+                            &devidstr) != 0) {
+                                fmd_hdl_debug(hdl,
+                                    "zfs_retire_recv: couldn't get devid");
+                                goto nodevid;
+                        }
+
+                        fmd_hdl_debug(hdl, "zfs_retire_recv: got devid %s",
+                            devidstr);
+
+                        if (devid_str_decode(devidstr, &devid, NULL) != 0) {
+                                fmd_hdl_debug(hdl,
+                                    "zfs_retire_recv: devid_str_decode failed");
+                                goto nodevid;
+                        }
+
+                        if (nvlist_lookup_string(asru, FM_FMRI_DEV_PATH,
+                            &path) != 0) {
+                                fmd_hdl_debug(hdl,
+                                    "zfs_retire_recv: couldn't get path, "
+                                    "won't be able to retire device");
+                                goto nodevid;
+                        }
+
+                        fmd_hdl_debug(hdl, "zfs_retire_recv: got path %s",
+                            path);
+
+                nodevid:
+                        zhp = find_by_anything(hdl, zhdl, fmri, devid, 0,
+                            &vdev);
+                        if (fmri) {
                         topo_hdl_strfree(thp, fmri);
                         fmd_hdl_topo_rele(hdl, thp);
+                        }
+                        if (devid)
+                                devid_free(devid);
 
-                        if (zhp == NULL)
+                        if (zhp == NULL) {
+                                fmd_hdl_debug(hdl, "zfs_retire_recv: no zhp");
+                                if (path != NULL)
+                                        (void) zfs_retire_device(hdl, path,
+                                            !is_repair);
                                 continue;
+                        }
 
-                        (void) nvlist_lookup_uint64(vdev,
-                            ZPOOL_CONFIG_GUID, &vdev_guid);
+                        (void) nvlist_lookup_uint64(vdev, ZPOOL_CONFIG_GUID,
+                            &vdev_guid);
+
+                        fmd_hdl_debug(hdl, "zfs_retire_recv: found vdev GUID: %"
+                            PRIx64, vdev_guid);
+
                         aux = VDEV_AUX_EXTERNAL;
                 } else {
                         /*
                          * This is a ZFS fault.  Lookup the resource, and
                          * attempt to find the matching vdev.

@@ -510,14 +885,18 @@
                                         vdev_guid = 0;
                                 else
                                         continue;
                         }
 
-                        if ((zhp = find_by_guid(zhdl, pool_guid, vdev_guid,
+                        if ((zhp = find_by_guid(hdl, zhdl, pool_guid, vdev_guid,
                             &vdev)) == NULL)
                                 continue;
 
+                        if (fmd_nvl_class_match(hdl, fault,
+                            "fault.fs.zfs.vdev.open_failed"))
+                                aux = VDEV_AUX_OPEN_FAILED;
+                        else
                         aux = VDEV_AUX_ERR_EXCEEDED;
                 }
 
                 if (vdev_guid == 0) {
                         /*

@@ -531,29 +910,42 @@
                 /*
                  * If this is a repair event, then mark the vdev as repaired and
                  * continue.
                  */
                 if (is_repair) {
+                        if (is_disk && path != NULL &&
+                            zfs_retire_device(hdl, path, B_FALSE) != 0)
+                                continue;
+
                         repair_done = 1;
                         (void) zpool_vdev_clear(zhp, vdev_guid);
                         zpool_close(zhp);
                         continue;
                 }
 
                 /*
                  * Actively fault the device if needed.
                  */
-                if (fault_device)
+                if (fault_device) {
                         (void) zpool_vdev_fault(zhp, vdev_guid, aux);
+
+                        if (zfs_get_vdev_state(hdl, zhdl, zhp, vdev_guid, &vdev)
+                            == VDEV_STATE_FAULTED)
+                                retire_device = B_TRUE;
+                }
+
                 if (degrade_device)
                         (void) zpool_vdev_degrade(zhp, vdev_guid, aux);
 
                 /*
                  * Attempt to substitute a hot spare.
                  */
                 replace_with_spare(hdl, zhp, vdev);
                 zpool_close(zhp);
+
+                if (is_disk && retire_device && path != NULL)
+                        (void) zfs_retire_device(hdl, path, B_TRUE);
         }
 
         if (strcmp(class, FM_LIST_REPAIRED_CLASS) == 0 && repair_done &&
             nvlist_lookup_string(nvl, FM_SUSPECT_UUID, &uuid) == 0)
                 fmd_case_uuresolved(hdl, uuid);

@@ -567,16 +959,18 @@
         NULL,                   /* fmdo_gc */
 };
 
 static const fmd_prop_t fmd_props[] = {
         { "spare_on_remove", FMD_TYPE_BOOL, "true" },
+        { "slow_io_skip_retire", FMD_TYPE_BOOL, "true"},
         { "ssm_wearout_skip_retire", FMD_TYPE_BOOL, "true"},
+        { "fru_compare", FMD_TYPE_BOOL, "true"},
         { NULL, 0, NULL }
 };
 
 static const fmd_hdl_info_t fmd_info = {
-        "ZFS Retire Agent", "1.0", &fmd_ops, fmd_props
+        "ZFS Retire Agent", "1.1", &fmd_ops, fmd_props
 };
 
 void
 _fmd_init(fmd_hdl_t *hdl)
 {