Print this page
NEX-10626 Hot spare doesn't replace failed SSD
Reviewed by: Sanjay Nadkarni <sanjay.nadkarni@nexenta.com>
Reviewed by: Saso Kiselkov <saso.kiselkov@nexenta.com>
Reviewed by: Evan Layton <evan.layton@nexenta.com>
NEX-5736 implement autoreplace matching based on FRU slot number
NEX-6200 hot spares are not reactivated after reinserting into enclosure
NEX-9403 need to update FRU for spare and l2cache devices
NEX-9404 remove lofi autoreplace support from syseventd
NEX-9409 hotsparing doesn't work for vdevs without FRU
NEX-9424 zfs`vdev_online() needs better notification about state changes
Portions contributed by: Alek Pinchuk <alek@nexenta.com>
Portions contributed by: Josef 'Jeff' Sipek <josef.sipek@nexenta.com>
Reviewed by: Roman Strashkin <roman.strashkin@nexenta.com>
Reviewed by: Steve Peng <steve.peng@nexenta.com>
Reviewed by: Sanjay Nadkarni <sanjay.nadkarni@nexenta.com>
NEX-7397 Hotspare didn't kick in automatically when one of the drive in pool went "Faulty" (is_ssd fix)
Reviewed by: Saso Kiselkov <saso.kiselkov@nexenta.com>
Reviewed by: Yuri Pankov <yuri.pankov@nexenta.com>
NEX-7397 Hotspare didn't kick in automatically when one of the drive in pool went "Faulty"
Reviewed by: Sanjay Nadkarni <sanjay.nadkarni@nexenta.com>
Reviewed by: Roman Strashkin <roman.strashkin@nexenta.com>
NEX-5753 FMD core dumps
Reviewed by: Yuri Pankov <yuri.pankov@nexenta.com>
Reviewed by: Sanjay Nadkarni <sanjay.nadkarni@nexenta.com>
Reviewed by: Josef 'Jeff' Sipek <josef.sipek@nexenta.com>
NEX-5774 fix for NEX-3166 has a tunable typo
Reviewed by: Saso Kiselkov <saso.kiselkov@nexenta.com>
Reviewed by: Sanjay Nadkarni <sanjay.nadkarni@nexenta.com>
Reviewed by: Josef 'Jeff' Sipek <josef.sipek@nexenta.com>
NEX-3166 need to add FMA events for SSD lifespan
Reviewed by: Jeffry Molanus <jeffry.molanus@nexenta.com>
Reviewed by: Sanjay Nadkarni <sanjay.nadkarni@nexenta.com>
Reviewed by: Yuri Pankov <yuri.pankov@nexenta.com>
Reviewed by: Saso Kiselkov <saso.kiselkov@nexenta.com>
NEX-2846 Enable Automatic/Intelligent Hot Sparing capability (lint fix)
Reviewed by: Jean McCormack <jean.mccormack@nexenta.com>
NEX-2846 Enable Automatic/Intelligent Hot Sparing capability
Reviewed by: Jeffry Molanus <jeffry.molanus@nexenta.com>
Reviewed by: Roman Strashkin <roman.strashkin@nexenta.com>
Reviewed by: Saso Kiselkov <saso.kiselkov@nexenta.com>
NEX-5163 backport illumos 6027 EOL zulu (XVR-4000)
Reviewed by: Kevin Crowe <kevin.crowe@nexenta.com>
6027 EOL zulu (XVR-4000)
Reviewed by: Garrett D'Amore <garrett@damore.org>
Reviewed by: Peter Tribble <peter.tribble@gmail.com>
Reviewed by: Richard Lowe <richlowe@richlowe.net>
Approved by: Dan McDonald <danmcd@omniti.com>
NEX-5162 backport illumos 6507 i386 makecontext(3c) needs to 16-byte align the stack
Reviewed by: Kevin Crowe <kevin.crowe@nexenta.com>
6507 i386 makecontext(3c) needs to 16-byte align the stack
Reviewed by: Gordon Ross <gordon.w.ross@gmail.com>
Reviewed by: Robert Mustacchi <rm@joyent.com>
Approved by: Dan McDonald <danmcd@omniti.com>
NEX-5207 attempt to activate spare cores fmd
Reviewed by: Yuri Pankov <yuri.pankov@nexenta.com>
Reviewed by: Roman Strashkin <roman.strashkin@nexenta.com>
Reviewed by: Sanjay Nadkarni <sanjay.nadkarni@nexenta.com>
NEX-1438 bump slow-io threshold and default to disabled automated response
NEX-941 zfs doesn't replace "UNAVAIL" disk from spares in pool
OS-66 Retired devices may still get attached leading to ndi_devi_online errors
OS-65 New FMA agent is needed to consume diagnosed slow IO
Portions contributed by Marcel Telka.
zfsxx issue #11: support for spare device groups
re #12393 rb3935 Kerberos and smbd disagree about who is our AD server (fix elf runtime attributes check)
re #11612 rb3907 Failing vdev of a mirrored pool should not take zfs operations out of action for extended periods of time.
*** 16,27 ****
--- 16,29 ----
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
+
/*
* Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2017 Nexenta Systems, Inc.
*/
/*
* The ZFS retire agent is responsible for managing hot spares across all pools.
* When we see a device fault or a device removal, we try to open the associated
*** 37,46 ****
--- 39,50 ----
#include <sys/fm/protocol.h>
#include <sys/fm/fs/zfs.h>
#include <libzfs.h>
#include <fm/libtopo.h>
#include <string.h>
+ #include <sys/int_fmtio.h>
+ #include <devid.h>
typedef struct zfs_retire_repaired {
struct zfs_retire_repaired *zrr_next;
uint64_t zrr_pool;
uint64_t zrr_vdev;
*** 64,75 ****
--- 68,81 ----
/*
* Find a pool with a matching GUID.
*/
typedef struct find_cbdata {
+ fmd_hdl_t *cb_hdl;
uint64_t cb_guid;
const char *cb_fru;
+ ddi_devid_t cb_devid;
zpool_handle_t *cb_zhp;
nvlist_t *cb_vdev;
} find_cbdata_t;
static int
*** 89,134 ****
/*
* Find a vdev within a tree with a matching GUID.
*/
static nvlist_t *
! find_vdev(libzfs_handle_t *zhdl, nvlist_t *nv, const char *search_fru,
! uint64_t search_guid)
{
uint64_t guid;
nvlist_t **child;
uint_t c, children;
nvlist_t *ret;
! char *fru;
! if (search_fru != NULL) {
! if (nvlist_lookup_string(nv, ZPOOL_CONFIG_FRU, &fru) == 0 &&
! libzfs_fru_compare(zhdl, fru, search_fru))
return (nv);
! } else {
if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID, &guid) == 0 &&
guid == search_guid)
return (nv);
- }
if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
&child, &children) != 0)
return (NULL);
for (c = 0; c < children; c++) {
! if ((ret = find_vdev(zhdl, child[c], search_fru,
! search_guid)) != NULL)
return (ret);
}
if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_L2CACHE,
&child, &children) != 0)
return (NULL);
for (c = 0; c < children; c++) {
! if ((ret = find_vdev(zhdl, child[c], search_fru,
! search_guid)) != NULL)
return (ret);
}
return (NULL);
}
--- 95,159 ----
/*
* Find a vdev within a tree with a matching GUID.
*/
static nvlist_t *
! find_vdev(fmd_hdl_t *hdl, libzfs_handle_t *zhdl, nvlist_t *nv,
! const char *search_fru, ddi_devid_t search_devid, uint64_t search_guid)
{
uint64_t guid;
nvlist_t **child;
uint_t c, children;
nvlist_t *ret;
! char *fru, *devidstr, *path;
! ddi_devid_t devid;
! if (nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path) == 0)
! fmd_hdl_debug(hdl, "find_vdev: vdev path: %s", path);
!
! if (search_fru != NULL &&
! nvlist_lookup_string(nv, ZPOOL_CONFIG_FRU, &fru) == 0) {
! fmd_hdl_debug(hdl, "find_vdev: found fru: %s", fru);
! if (libzfs_fru_compare(zhdl, fru, search_fru))
return (nv);
! }
!
! if (search_devid != NULL &&
! nvlist_lookup_string(nv, ZPOOL_CONFIG_DEVID, &devidstr) == 0) {
! fmd_hdl_debug(hdl, "find_vdev: found devid: %s", devidstr);
!
! if (devid_str_decode(devidstr, &devid, NULL) == 0) {
! if (devid_compare(search_devid, devid) == 0) {
! devid_free(devid);
! return (nv);
! }
!
! devid_free(devid);
! }
! }
!
if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID, &guid) == 0 &&
guid == search_guid)
return (nv);
if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
&child, &children) != 0)
return (NULL);
for (c = 0; c < children; c++) {
! if ((ret = find_vdev(hdl, zhdl, child[c], search_fru,
! search_devid, search_guid)) != NULL)
return (ret);
}
if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_L2CACHE,
&child, &children) != 0)
return (NULL);
for (c = 0; c < children; c++) {
! if ((ret = find_vdev(hdl, zhdl, child[c], search_fru,
! search_devid, search_guid)) != NULL)
return (ret);
}
return (NULL);
}
*** 135,146 ****
/*
* Given a (pool, vdev) GUID pair, find the matching pool and vdev.
*/
static zpool_handle_t *
! find_by_guid(libzfs_handle_t *zhdl, uint64_t pool_guid, uint64_t vdev_guid,
! nvlist_t **vdevp)
{
find_cbdata_t cb;
zpool_handle_t *zhp;
nvlist_t *config, *nvroot;
--- 160,171 ----
/*
* Given a (pool, vdev) GUID pair, find the matching pool and vdev.
*/
static zpool_handle_t *
! find_by_guid(fmd_hdl_t *hdl, libzfs_handle_t *zhdl, uint64_t pool_guid,
! uint64_t vdev_guid, nvlist_t **vdevp)
{
find_cbdata_t cb;
zpool_handle_t *zhp;
nvlist_t *config, *nvroot;
*** 158,168 ****
zpool_close(zhp);
return (NULL);
}
if (vdev_guid != 0) {
! if ((*vdevp = find_vdev(zhdl, nvroot, NULL,
vdev_guid)) == NULL) {
zpool_close(zhp);
return (NULL);
}
}
--- 183,193 ----
zpool_close(zhp);
return (NULL);
}
if (vdev_guid != 0) {
! if ((*vdevp = find_vdev(hdl, zhdl, nvroot, NULL, NULL,
vdev_guid)) == NULL) {
zpool_close(zhp);
return (NULL);
}
}
*** 179,270 ****
config = zpool_get_config(zhp, NULL);
if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
&nvroot) != 0) {
zpool_close(zhp);
return (0);
}
! if ((cbp->cb_vdev = find_vdev(zpool_get_handle(zhp), nvroot,
! cbp->cb_fru, 0)) != NULL) {
cbp->cb_zhp = zhp;
return (1);
}
zpool_close(zhp);
return (0);
}
/*
! * Given a FRU FMRI, find the matching pool and vdev.
*/
static zpool_handle_t *
! find_by_fru(libzfs_handle_t *zhdl, const char *fru, nvlist_t **vdevp)
{
find_cbdata_t cb;
cb.cb_fru = fru;
cb.cb_zhp = NULL;
if (zpool_iter(zhdl, search_pool, &cb) != 1)
return (NULL);
*vdevp = cb.cb_vdev;
return (cb.cb_zhp);
}
/*
! * Given a vdev, attempt to replace it with every known spare until one
! * succeeds.
*/
static void
! replace_with_spare(fmd_hdl_t *hdl, zpool_handle_t *zhp, nvlist_t *vdev)
{
! nvlist_t *config, *nvroot, *replacement;
! nvlist_t **spares;
! uint_t s, nspares;
! char *dev_name;
! config = zpool_get_config(zhp, NULL);
! if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
! &nvroot) != 0)
return;
! /*
! * Find out if there are any hot spares available in the pool.
*/
! if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES,
! &spares, &nspares) != 0)
return;
! replacement = fmd_nvl_alloc(hdl, FMD_SLEEP);
! (void) nvlist_add_string(replacement, ZPOOL_CONFIG_TYPE,
! VDEV_TYPE_ROOT);
! dev_name = zpool_vdev_name(NULL, zhp, vdev, B_FALSE);
/*
! * Try to replace each spare, ending when we successfully
! * replace it.
*/
! for (s = 0; s < nspares; s++) {
! char *spare_name;
! if (nvlist_lookup_string(spares[s], ZPOOL_CONFIG_PATH,
! &spare_name) != 0)
continue;
! (void) nvlist_add_nvlist_array(replacement,
! ZPOOL_CONFIG_CHILDREN, &spares[s], 1);
! if (zpool_vdev_attach(zhp, dev_name, spare_name,
! replacement, B_TRUE) == 0)
! break;
}
! free(dev_name);
! nvlist_free(replacement);
}
/*
* Repair this vdev if we had diagnosed a 'fault.fs.zfs.device' and
* ASRU is now usable. ZFS has found the device to be present and
--- 204,494 ----
config = zpool_get_config(zhp, NULL);
if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
&nvroot) != 0) {
zpool_close(zhp);
+ fmd_hdl_debug(cbp->cb_hdl, "search_pool: "
+ "unable to get vdev tree");
return (0);
}
! if ((cbp->cb_vdev = find_vdev(cbp->cb_hdl, zpool_get_handle(zhp),
! nvroot, cbp->cb_fru, cbp->cb_devid, cbp->cb_guid)) != NULL) {
cbp->cb_zhp = zhp;
return (1);
}
zpool_close(zhp);
return (0);
}
/*
! * Given a FRU FMRI, devid, or guid: find the matching pool and vdev.
*/
static zpool_handle_t *
! find_by_anything(fmd_hdl_t *hdl, libzfs_handle_t *zhdl, const char *fru,
! ddi_devid_t devid, uint64_t guid, nvlist_t **vdevp)
{
find_cbdata_t cb;
+ (void) memset(&cb, 0, sizeof (cb));
+ cb.cb_hdl = hdl;
cb.cb_fru = fru;
+ cb.cb_devid = devid;
+ cb.cb_guid = guid;
cb.cb_zhp = NULL;
+
if (zpool_iter(zhdl, search_pool, &cb) != 1)
return (NULL);
*vdevp = cb.cb_vdev;
return (cb.cb_zhp);
}
/*
! * Create a solved FMD case and add the fault to it
*/
static void
! generate_fault(fmd_hdl_t *hdl, nvlist_t *vdev, char *faultname)
{
! char *devid, *fdevid, *physpath, *s;
! fmd_case_t *c;
! fmd_hdl_topo_node_info_t *node;
! nvlist_t *fault = NULL;
! uint64_t wd;
! assert(hdl != NULL);
! assert(vdev != NULL);
! assert(faultname != NULL);
!
! if (nvlist_lookup_string(vdev, ZPOOL_CONFIG_PHYS_PATH,
! &physpath) != 0 ||
! nvlist_lookup_uint64(vdev, ZPOOL_CONFIG_WHOLE_DISK, &wd) != 0)
return;
! if (nvlist_lookup_string(vdev, ZPOOL_CONFIG_DEVID,
! &devid) == 0) {
! fdevid = strdup(devid);
! } else {
! fdevid = devid_str_from_path(physpath);
! }
! if (fdevid == NULL) {
! fmd_hdl_debug(hdl, "%s: failed to get devid", __func__);
! return;
! }
!
! if (wd && (s = strrchr(fdevid, '/')) != NULL)
! *s = '\0';
!
! c = fmd_case_open(hdl, NULL);
! if ((node = fmd_hdl_topo_node_get_by_devid(hdl, fdevid)) == NULL) {
! fault = fmd_nvl_create_fault(hdl, faultname, 100, NULL, vdev,
! NULL);
! } else {
! fault = fmd_nvl_create_fault(hdl, faultname, 100,
! node->resource, node->fru, node->resource);
! nvlist_free(node->fru);
! nvlist_free(node->resource);
! fmd_hdl_free(hdl, node,
! sizeof (fmd_hdl_topo_node_info_t));
! }
! fmd_case_add_suspect(hdl, c, fault);
! fmd_case_setspecific(hdl, c, fdevid);
! fmd_case_solve(hdl, c);
!
! devid_str_free(fdevid);
! fmd_hdl_debug(hdl, "%s: dispatched %s", __func__, faultname);
! }
!
! /*
! * Determine if the FRU fields for the spare and the failed device match.
*/
! static boolean_t
! match_fru(fmd_hdl_t *hdl, char *ffru, nvlist_t *spare)
! {
! char *sfru;
! boolean_t ret = B_FALSE;
!
! if (nvlist_lookup_string(spare, ZPOOL_CONFIG_FRU, &sfru) != 0) {
! fmd_hdl_debug(hdl, "%s: spare FRU not set", __func__);
! return (B_FALSE);
! }
!
! /* We match on enclosure only at the moment */
! ret = libzfs_fru_cmp_enclosure(ffru, sfru);
! if (!ret)
! fmd_hdl_debug(hdl, "%s: enclosure not matched", __func__);
!
! return (ret);
! }
!
! static boolean_t
! do_replace(zpool_handle_t *zhp, const char *fpath, const char *spath,
! nvlist_t *spare)
! {
! nvlist_t *nvroot;
! boolean_t ret = B_FALSE;
!
! if (nvlist_alloc(&nvroot, NV_UNIQUE_NAME, 0) != 0)
! return (B_FALSE);
!
! if (nvlist_add_string(nvroot, ZPOOL_CONFIG_TYPE, VDEV_TYPE_ROOT) != 0 ||
! nvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN,
! &spare, 1) != 0)
! goto fail;
!
! ret = (zpool_vdev_attach(zhp, fpath, spath, nvroot, B_TRUE) == 0);
!
! fail:
! nvlist_free(nvroot);
! return (ret);
! }
!
! /*
! * Attempt to replace failed device with spare.
! *
! * Spare selection is done in the following order:
! * - If failed device has sparegroup property set, look for the spares that
! * belongs to the same sparegroup. If no suitable spare is found, skip
! * the spares that have sparegroup property set while doing other match types.
! * - If failed device has FRU set, look for the spares in the same enclosure.
! * - Finally, try using any available spare.
! *
! * Note that all match types do a media-type match first, so that we don't
! * replace HDD with SSD and vice versa.
! */
! static void
! replace_with_spare(fmd_hdl_t *hdl, zpool_handle_t *zhp, nvlist_t *vdev)
! {
! nvlist_t *config, *nvroot, **spares;
! uint_t i, nspares;
! boolean_t uu1, uu2, log;
! char *devpath;
! char fdevpath[PATH_MAX]; /* devpath of failed device */
! char *ffru = NULL; /* FRU of failed device */
! char fsg[MAXNAMELEN]; /* sparegroup of failed device */
! boolean_t use_sg = B_FALSE; /* do sparegroup matching */
! boolean_t done_sg = B_FALSE; /* done sparegroup matching */
! boolean_t use_fru = B_FALSE; /* do FRU matching */
! boolean_t done_fru = B_FALSE; /* done FRU matching */
! boolean_t fssd = B_FALSE; /* failed device is SSD */
! uint64_t wd;
!
! if ((config = zpool_get_config(zhp, NULL)) == NULL ||
! nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, &nvroot) != 0)
return;
! /* Check if there are any hot spares available in the pool */
! if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES, &spares,
! &nspares) != 0) {
! fmd_hdl_debug(hdl, "%s: no spares found", __func__);
! return;
! }
! if (nvlist_lookup_string(vdev, ZPOOL_CONFIG_PATH, &devpath) != 0 ||
! nvlist_lookup_uint64(vdev, ZPOOL_CONFIG_WHOLE_DISK, &wd) != 0 ||
! nvlist_lookup_boolean_value(vdev, ZPOOL_CONFIG_IS_SSD, &fssd) != 0)
! return;
! (void) strlcpy(fdevpath, devpath, sizeof (fdevpath));
! if (wd)
! fdevpath[strlen(fdevpath) - 2] = '\0';
! /* Spares can't replace log devices */
! (void) zpool_find_vdev(zhp, fdevpath, &uu1, &uu2, &log, NULL);
! if (log)
! return;
+ /* Check if we should do sparegroup matching */
+ if (vdev_get_prop(zhp, fdevpath, VDEV_PROP_SPAREGROUP, fsg,
+ sizeof (fsg)) == 0 && strcmp(fsg, "-") != 0)
+ use_sg = B_TRUE;
+
+ use_fru = (fmd_prop_get_int32(hdl, "fru_compare") == FMD_B_TRUE);
+ /* Disable FRU matching if failed device doesn't have FRU set */
+ if (nvlist_lookup_string(vdev, ZPOOL_CONFIG_FRU, &ffru) != 0)
+ use_fru = B_FALSE;
+
+ again:
+ /* Go through the spares list */
+ for (i = 0; i < nspares; i++) {
+ char sdevpath[PATH_MAX]; /* devpath of spare */
+ char ssg[MAXNAMELEN]; /* sparegroup of spare */
+ boolean_t sssd = B_FALSE; /* spare is SSD */
+ boolean_t ssg_set = B_FALSE;
+
+ if (nvlist_lookup_string(spares[i], ZPOOL_CONFIG_PATH,
+ &devpath) != 0 ||
+ nvlist_lookup_uint64(spares[i], ZPOOL_CONFIG_WHOLE_DISK,
+ &wd) != 0)
+ continue;
+
+ (void) strlcpy(sdevpath, devpath, sizeof (sdevpath));
+ if (wd)
+ sdevpath[strlen(sdevpath) - 2] = '\0';
+
+ /* Don't swap HDD for SSD and vice versa */
+ if (nvlist_lookup_boolean_value(spares[i], ZPOOL_CONFIG_IS_SSD,
+ &sssd) != 0 || fssd != sssd) {
+ continue;
+ }
+
+ /* Get the sparegroup property for the spare */
+ if (vdev_get_prop(zhp, sdevpath, VDEV_PROP_SPAREGROUP, ssg,
+ sizeof (ssg)) == 0 && strcmp(ssg, "-") != 0)
+ ssg_set = B_TRUE;
+
+ if (use_sg) {
+ if (!ssg_set || strcmp(fsg, ssg) != 0)
+ continue;
+ /* Found spare in the the same group */
+ if (do_replace(zhp, fdevpath, sdevpath, spares[i]))
+ return;
+ continue;
+ }
+
/*
! * If we tried matching on sparegroup and have not found
! * any suitable spare, skip all spares with sparegroup
! * set.
*/
! if (done_sg && ssg_set)
! continue;
! if (use_fru) {
! if (!match_fru(hdl, ffru, spares[i]))
continue;
+ /* Found spare with matching FRU */
+ if (do_replace(zhp, fdevpath, sdevpath, spares[i]))
+ return;
+ continue;
+ }
! /*
! * sparegroup and FRU matching was either not used or didn't
! * find any suitable spares, use the first available one.
! */
! if (do_replace(zhp, fdevpath, sdevpath, spares[i])) {
! /* If we tried intellegent sparing, generate fault */
! if (done_sg || done_fru) {
! generate_fault(hdl, vdev,
! "fault.fs.zfs.vdev.dumb_spared");
! }
! return;
! }
! }
! if (use_sg) {
! done_sg = B_TRUE;
! use_sg = B_FALSE;
! goto again;
! } else if (use_fru) {
! done_fru = B_TRUE;
! use_fru = B_FALSE;
! goto again;
}
! generate_fault(hdl, vdev, "fault.fs.zfs.vdev.not_spared");
}
/*
* Repair this vdev if we had diagnosed a 'fault.fs.zfs.device' and
* ASRU is now usable. ZFS has found the device to be present and
*** 345,376 ****
zrp->zrr_pool = pool_guid;
zrp->zrr_vdev = vdev_guid;
zdp->zrd_repaired = zrp;
}
/*ARGSUSED*/
static void
zfs_retire_recv(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl,
const char *class)
{
uint64_t pool_guid, vdev_guid;
zpool_handle_t *zhp;
! nvlist_t *resource, *fault, *fru;
nvlist_t **faults;
uint_t f, nfaults;
zfs_retire_data_t *zdp = fmd_hdl_getspecific(hdl);
libzfs_handle_t *zhdl = zdp->zrd_hdl;
boolean_t fault_device, degrade_device;
boolean_t is_repair;
! char *scheme, *fmri;
nvlist_t *vdev;
char *uuid;
int repair_done = 0;
boolean_t retire;
boolean_t is_disk;
vdev_aux_t aux;
! topo_hdl_t *thp;
int err;
/*
* If this is a resource notifying us of device removal, then simply
* check for an available spare and continue.
--- 569,660 ----
zrp->zrr_pool = pool_guid;
zrp->zrr_vdev = vdev_guid;
zdp->zrd_repaired = zrp;
}
+ static int
+ zfs_get_vdev_state(fmd_hdl_t *hdl, libzfs_handle_t *zhdl, zpool_handle_t *zhp,
+ uint64_t vdev_guid, nvlist_t **vdev)
+ {
+ nvlist_t *config, *nvroot;
+ vdev_stat_t *vs;
+ uint_t cnt;
+ boolean_t missing;
+
+ if (zpool_refresh_stats(zhp, &missing) != 0 ||
+ missing != B_FALSE) {
+ fmd_hdl_debug(hdl, "zfs_get_vdev_state: can't refresh stats");
+ return (VDEV_STATE_UNKNOWN);
+ }
+
+ config = zpool_get_config(zhp, NULL);
+ if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
+ &nvroot) != 0) {
+ fmd_hdl_debug(hdl, "zfs_get_vdev_state: can't get vdev tree");
+ return (VDEV_STATE_UNKNOWN);
+ }
+
+ *vdev = find_vdev(hdl, zhdl, nvroot, NULL, NULL, vdev_guid);
+
+ if (nvlist_lookup_uint64_array(*vdev, ZPOOL_CONFIG_VDEV_STATS,
+ (uint64_t **)&vs, &cnt) != 0) {
+ fmd_hdl_debug(hdl, "zfs_get_vdev_state: can't get vdev stats");
+ return (VDEV_STATE_UNKNOWN);
+ }
+
+ return (vs->vs_state);
+ }
+
+ int
+ zfs_retire_device(fmd_hdl_t *hdl, char *path, boolean_t retire)
+ {
+ di_retire_t drt = {0};
+ int err;
+
+ drt.rt_abort = (void (*)(void *, const char *, ...))fmd_hdl_abort;
+ drt.rt_debug = (void (*)(void *, const char *, ...))fmd_hdl_debug;
+ drt.rt_hdl = hdl;
+
+ fmd_hdl_debug(hdl, "zfs_retire_device: "
+ "attempting to %sretire %s", retire ? "" : "un", path);
+
+ err = retire ?
+ di_retire_device(path, &drt, 0) :
+ di_unretire_device(path, &drt);
+
+ if (err != 0)
+ fmd_hdl_debug(hdl, "zfs_retire_device: ",
+ "di_%sretire_device failed: %d %s",
+ retire ? "" : "un", err, path);
+
+ return (err);
+ }
+
/*ARGSUSED*/
static void
zfs_retire_recv(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl,
const char *class)
{
uint64_t pool_guid, vdev_guid;
zpool_handle_t *zhp;
! nvlist_t *resource, *fault, *fru, *asru;
nvlist_t **faults;
uint_t f, nfaults;
zfs_retire_data_t *zdp = fmd_hdl_getspecific(hdl);
libzfs_handle_t *zhdl = zdp->zrd_hdl;
boolean_t fault_device, degrade_device;
boolean_t is_repair;
! char *scheme = NULL, *fmri = NULL, *devidstr = NULL, *path = NULL;
! ddi_devid_t devid;
nvlist_t *vdev;
char *uuid;
int repair_done = 0;
boolean_t retire;
boolean_t is_disk;
+ boolean_t retire_device = B_FALSE;
vdev_aux_t aux;
! topo_hdl_t *thp = NULL;
int err;
/*
* If this is a resource notifying us of device removal, then simply
* check for an available spare and continue.
*** 380,390 ****
&pool_guid) != 0 ||
nvlist_lookup_uint64(nvl, FM_EREPORT_PAYLOAD_ZFS_VDEV_GUID,
&vdev_guid) != 0)
return;
! if ((zhp = find_by_guid(zhdl, pool_guid, vdev_guid,
&vdev)) == NULL)
return;
if (fmd_prop_get_int32(hdl, "spare_on_remove"))
replace_with_spare(hdl, zhp, vdev);
--- 664,674 ----
&pool_guid) != 0 ||
nvlist_lookup_uint64(nvl, FM_EREPORT_PAYLOAD_ZFS_VDEV_GUID,
&vdev_guid) != 0)
return;
! if ((zhp = find_by_guid(hdl, zhdl, pool_guid, vdev_guid,
&vdev)) == NULL)
return;
if (fmd_prop_get_int32(hdl, "spare_on_remove"))
replace_with_spare(hdl, zhp, vdev);
*** 424,442 ****
--- 708,740 ----
is_disk = B_FALSE;
if (nvlist_lookup_boolean_value(fault, FM_SUSPECT_RETIRE,
&retire) == 0 && retire == 0)
continue;
+ if (fmd_nvl_class_match(hdl, fault, "fault.io.disk.slow-io") &&
+ fmd_prop_get_int32(hdl, "slow_io_skip_retire") ==
+ FMD_B_TRUE) {
+ fmd_hdl_debug(hdl, "ignoring slow io fault");
+ continue;
+ }
if (fmd_nvl_class_match(hdl, fault,
"fault.io.disk.ssm-wearout") &&
fmd_prop_get_int32(hdl, "ssm_wearout_skip_retire") ==
FMD_B_TRUE) {
fmd_hdl_debug(hdl, "zfs-retire: ignoring SSM fault");
continue;
}
+ if (fmd_nvl_class_match(hdl, fault,
+ "fault.io.disk.ssm-wearout") &&
+ fmd_prop_get_int32(hdl, "ssm_wearout_skip_retire") ==
+ FMD_B_TRUE) {
+ fmd_hdl_debug(hdl, "zfs-retire: ignoring SSM fault");
+ continue;
+ }
+
/*
* While we subscribe to fault.fs.zfs.*, we only take action
* for faults targeting a specific vdev (open failure or SERD
* failure). We also subscribe to fault.io.* events, so that
* faulty disks will be faulted in the ZFS configuration.
*** 445,492 ****
fault_device = B_TRUE;
} else if (fmd_nvl_class_match(hdl, fault,
"fault.fs.zfs.vdev.checksum")) {
degrade_device = B_TRUE;
} else if (fmd_nvl_class_match(hdl, fault,
"fault.fs.zfs.device")) {
fault_device = B_FALSE;
! } else if (fmd_nvl_class_match(hdl, fault, "fault.io.*")) {
is_disk = B_TRUE;
fault_device = B_TRUE;
} else {
continue;
}
if (is_disk) {
/*
! * This is a disk fault. Lookup the FRU, convert it to
! * an FMRI string, and attempt to find a matching vdev.
*/
if (nvlist_lookup_nvlist(fault, FM_FAULT_FRU,
&fru) != 0 ||
nvlist_lookup_string(fru, FM_FMRI_SCHEME,
! &scheme) != 0)
! continue;
! if (strcmp(scheme, FM_FMRI_SCHEME_HC) != 0)
! continue;
thp = fmd_hdl_topo_hold(hdl, TOPO_VERSION);
if (topo_fmri_nvl2str(thp, fru, &fmri, &err) != 0) {
fmd_hdl_topo_rele(hdl, thp);
! continue;
}
! zhp = find_by_fru(zhdl, fmri, &vdev);
topo_hdl_strfree(thp, fmri);
fmd_hdl_topo_rele(hdl, thp);
! if (zhp == NULL)
continue;
! (void) nvlist_lookup_uint64(vdev,
! ZPOOL_CONFIG_GUID, &vdev_guid);
aux = VDEV_AUX_EXTERNAL;
} else {
/*
* This is a ZFS fault. Lookup the resource, and
* attempt to find the matching vdev.
--- 743,867 ----
fault_device = B_TRUE;
} else if (fmd_nvl_class_match(hdl, fault,
"fault.fs.zfs.vdev.checksum")) {
degrade_device = B_TRUE;
} else if (fmd_nvl_class_match(hdl, fault,
+ "fault.fs.zfs.vdev.timeout")) {
+ fault_device = B_TRUE;
+ } else if (fmd_nvl_class_match(hdl, fault,
"fault.fs.zfs.device")) {
fault_device = B_FALSE;
! } else if (fmd_nvl_class_match(hdl, fault, "fault.io.disk.*") ||
! fmd_nvl_class_match(hdl, fault, "fault.io.scsi.*")) {
is_disk = B_TRUE;
fault_device = B_TRUE;
} else {
continue;
}
if (is_disk) {
/*
! * This is a disk fault. Lookup the FRU and ASRU,
! * convert them to FMRI and devid strings, and attempt
! * to find a matching vdev. If no vdev is found, the
! * device might still be retired/unretired.
*/
if (nvlist_lookup_nvlist(fault, FM_FAULT_FRU,
&fru) != 0 ||
nvlist_lookup_string(fru, FM_FMRI_SCHEME,
! &scheme) != 0) {
! fmd_hdl_debug(hdl,
! "zfs_retire_recv: unable to get FRU");
! goto nofru;
! }
! if (strcmp(scheme, FM_FMRI_SCHEME_HC) != 0) {
! fmd_hdl_debug(hdl,
! "zfs_retire_recv: not hc scheme: %s",
! scheme);
! goto nofru;
! }
thp = fmd_hdl_topo_hold(hdl, TOPO_VERSION);
if (topo_fmri_nvl2str(thp, fru, &fmri, &err) != 0) {
fmd_hdl_topo_rele(hdl, thp);
! fmd_hdl_debug(hdl,
! "zfs_retire_recv: unable to get FMRI");
! goto nofru;
}
! fmd_hdl_debug(hdl, "zfs_retire_recv: got FMRI %s",
! fmri);
!
! nofru:
! if (nvlist_lookup_nvlist(fault, FM_FAULT_ASRU,
! &asru) != 0 ||
! nvlist_lookup_string(asru, FM_FMRI_SCHEME,
! &scheme) != 0) {
! fmd_hdl_debug(hdl,
! "zfs_retire_recv: unable to get ASRU");
! goto nodevid;
! }
!
! if (strcmp(scheme, FM_FMRI_SCHEME_DEV) != 0) {
! fmd_hdl_debug(hdl,
! "zfs_retire_recv: not dev scheme: %s",
! scheme);
! goto nodevid;
! }
!
! if (nvlist_lookup_string(asru, FM_FMRI_DEV_ID,
! &devidstr) != 0) {
! fmd_hdl_debug(hdl,
! "zfs_retire_recv: couldn't get devid");
! goto nodevid;
! }
!
! fmd_hdl_debug(hdl, "zfs_retire_recv: got devid %s",
! devidstr);
!
! if (devid_str_decode(devidstr, &devid, NULL) != 0) {
! fmd_hdl_debug(hdl,
! "zfs_retire_recv: devid_str_decode failed");
! goto nodevid;
! }
!
! if (nvlist_lookup_string(asru, FM_FMRI_DEV_PATH,
! &path) != 0) {
! fmd_hdl_debug(hdl,
! "zfs_retire_recv: couldn't get path, "
! "won't be able to retire device");
! goto nodevid;
! }
!
! fmd_hdl_debug(hdl, "zfs_retire_recv: got path %s",
! path);
!
! nodevid:
! zhp = find_by_anything(hdl, zhdl, fmri, devid, 0,
! &vdev);
! if (fmri) {
topo_hdl_strfree(thp, fmri);
fmd_hdl_topo_rele(hdl, thp);
+ }
+ if (devid)
+ devid_free(devid);
! if (zhp == NULL) {
! fmd_hdl_debug(hdl, "zfs_retire_recv: no zhp");
! if (path != NULL)
! (void) zfs_retire_device(hdl, path,
! !is_repair);
continue;
+ }
! (void) nvlist_lookup_uint64(vdev, ZPOOL_CONFIG_GUID,
! &vdev_guid);
!
! fmd_hdl_debug(hdl, "zfs_retire_recv: found vdev GUID: %"
! PRIx64, vdev_guid);
!
aux = VDEV_AUX_EXTERNAL;
} else {
/*
* This is a ZFS fault. Lookup the resource, and
* attempt to find the matching vdev.
*** 510,523 ****
vdev_guid = 0;
else
continue;
}
! if ((zhp = find_by_guid(zhdl, pool_guid, vdev_guid,
&vdev)) == NULL)
continue;
aux = VDEV_AUX_ERR_EXCEEDED;
}
if (vdev_guid == 0) {
/*
--- 885,902 ----
vdev_guid = 0;
else
continue;
}
! if ((zhp = find_by_guid(hdl, zhdl, pool_guid, vdev_guid,
&vdev)) == NULL)
continue;
+ if (fmd_nvl_class_match(hdl, fault,
+ "fault.fs.zfs.vdev.open_failed"))
+ aux = VDEV_AUX_OPEN_FAILED;
+ else
aux = VDEV_AUX_ERR_EXCEEDED;
}
if (vdev_guid == 0) {
/*
*** 531,559 ****
/*
* If this is a repair event, then mark the vdev as repaired and
* continue.
*/
if (is_repair) {
repair_done = 1;
(void) zpool_vdev_clear(zhp, vdev_guid);
zpool_close(zhp);
continue;
}
/*
* Actively fault the device if needed.
*/
! if (fault_device)
(void) zpool_vdev_fault(zhp, vdev_guid, aux);
if (degrade_device)
(void) zpool_vdev_degrade(zhp, vdev_guid, aux);
/*
* Attempt to substitute a hot spare.
*/
replace_with_spare(hdl, zhp, vdev);
zpool_close(zhp);
}
if (strcmp(class, FM_LIST_REPAIRED_CLASS) == 0 && repair_done &&
nvlist_lookup_string(nvl, FM_SUSPECT_UUID, &uuid) == 0)
fmd_case_uuresolved(hdl, uuid);
--- 910,951 ----
/*
* If this is a repair event, then mark the vdev as repaired and
* continue.
*/
if (is_repair) {
+ if (is_disk && path != NULL &&
+ zfs_retire_device(hdl, path, B_FALSE) != 0)
+ continue;
+
repair_done = 1;
(void) zpool_vdev_clear(zhp, vdev_guid);
zpool_close(zhp);
continue;
}
/*
* Actively fault the device if needed.
*/
! if (fault_device) {
(void) zpool_vdev_fault(zhp, vdev_guid, aux);
+
+ if (zfs_get_vdev_state(hdl, zhdl, zhp, vdev_guid, &vdev)
+ == VDEV_STATE_FAULTED)
+ retire_device = B_TRUE;
+ }
+
if (degrade_device)
(void) zpool_vdev_degrade(zhp, vdev_guid, aux);
/*
* Attempt to substitute a hot spare.
*/
replace_with_spare(hdl, zhp, vdev);
zpool_close(zhp);
+
+ if (is_disk && retire_device && path != NULL)
+ (void) zfs_retire_device(hdl, path, B_TRUE);
}
if (strcmp(class, FM_LIST_REPAIRED_CLASS) == 0 && repair_done &&
nvlist_lookup_string(nvl, FM_SUSPECT_UUID, &uuid) == 0)
fmd_case_uuresolved(hdl, uuid);
*** 567,582 ****
NULL, /* fmdo_gc */
};
static const fmd_prop_t fmd_props[] = {
{ "spare_on_remove", FMD_TYPE_BOOL, "true" },
{ "ssm_wearout_skip_retire", FMD_TYPE_BOOL, "true"},
{ NULL, 0, NULL }
};
static const fmd_hdl_info_t fmd_info = {
! "ZFS Retire Agent", "1.0", &fmd_ops, fmd_props
};
void
_fmd_init(fmd_hdl_t *hdl)
{
--- 959,976 ----
NULL, /* fmdo_gc */
};
static const fmd_prop_t fmd_props[] = {
{ "spare_on_remove", FMD_TYPE_BOOL, "true" },
+ { "slow_io_skip_retire", FMD_TYPE_BOOL, "true"},
{ "ssm_wearout_skip_retire", FMD_TYPE_BOOL, "true"},
+ { "fru_compare", FMD_TYPE_BOOL, "true"},
{ NULL, 0, NULL }
};
static const fmd_hdl_info_t fmd_info = {
! "ZFS Retire Agent", "1.1", &fmd_ops, fmd_props
};
void
_fmd_init(fmd_hdl_t *hdl)
{