Print this page
NEX-6353 The "DKIOCSOLIDSTATE failed, assuming non-SSD media" messages don't provide any useful information
Reviewed by: Sanjay Nadkarni <sanjay.nadkarni@nexenta.com>
Reviewed by: Roman Strashkin <roman.strashkin@nexenta.com>
NEX-5736 implement autoreplace matching based on FRU slot number
NEX-6200 hot spares are not reactivated after reinserting into enclosure
NEX-9403 need to update FRU for spare and l2cache devices
NEX-9404 remove lofi autoreplace support from syseventd
NEX-9409 hotsparing doesn't work for vdevs without FRU
NEX-9424 zfs`vdev_online() needs better notification about state changes
Portions contributed by: Alek Pinchuk <alek@nexenta.com>
Portions contributed by: Josef 'Jeff' Sipek <josef.sipek@nexenta.com>
Reviewed by: Roman Strashkin <roman.strashkin@nexenta.com>
Reviewed by: Steve Peng <steve.peng@nexenta.com>
Reviewed by: Sanjay Nadkarni <sanjay.nadkarni@nexenta.com>
NEX-8065 ZFS doesn't notice when disk vdevs have no write cache
Reviewed by: Dan Fields <dan.fields@nexenta.com>
Reviewed by: Alek Pinchuk <alek.pinchuk@nexenta.com>
Reviewed by: George Wilson <george.wilson@delphix.com>
NEX-2846 Enable Automatic/Intelligent Hot Sparing capability
Reviewed by: Jeffry Molanus <jeffry.molanus@nexenta.com>
Reviewed by: Roman Strashkin <roman.strashkin@nexenta.com>
Reviewed by: Saso Kiselkov <saso.kiselkov@nexenta.com>
6494 ASSERT supported zio_types for file and disk vdevs
Reviewed by: George Wilson <george.wilson@delphix.com>
Reviewed by: Matthew Ahrens <mahrens@delphix.com>
Approved by: Albert Lee <trisk@omniti.com>
NEX-3984 On-demand TRIM
Reviewed by: Alek Pinchuk <alek@nexenta.com>
Reviewed by: Josef 'Jeff' Sipek <josef.sipek@nexenta.com>
Conflicts:
usr/src/common/zfs/zpool_prop.c
usr/src/uts/common/sys/fs/zfs.h
NEX-3508 CLONE - Port NEX-2946 Add UNMAP/TRIM functionality to ZFS and illumos
Reviewed by: Josef Sipek <josef.sipek@nexenta.com>
Reviewed by: Alek Pinchuk <alek.pinchuk@nexenta.com>
Conflicts:
usr/src/uts/common/io/scsi/targets/sd.c
usr/src/uts/common/sys/scsi/targets/sddef.h
NEX-2933 tip of nza-kernel hangs during zpool offline
Reviewed by: Alek Pinchuk <alek.pinchuk@nexenta.com>
Reviewed by: Josef Sipek <josef.sipek@nexenta.com>
NEX-1142 move rwlock to vdev to protect vdev_tsd
not just ldi handle.
This way we serialize open/close, yet allow parallel I/O.
4370 avoid transmitting holes during zfs send
4371 DMU code clean up
Reviewed by: Matthew Ahrens <mahrens@delphix.com>
Reviewed by: George Wilson <george.wilson@delphix.com>
Reviewed by: Christopher Siden <christopher.siden@delphix.com>
Reviewed by: Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
Approved by: Garrett D'Amore <garrett@damore.org>
NEX-1065 Added serialization to avoid race (fix lint)
NEX-1065 Added serialization to avoid race
between ldi notification and I/O path.
Also fixes OS-124, NEX-1051, NEX-1062.
@@ -16,15 +16,16 @@
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
+
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012, 2016 by Delphix. All rights reserved.
- * Copyright 2016 Nexenta Systems, Inc. All rights reserved.
+ * Copyright (c) 2012, 2015 by Delphix. All rights reserved.
* Copyright (c) 2013 Joyent, Inc. All rights reserved.
+ * Copyright 2017 Nexenta Systems, Inc.
*/
#include <sys/zfs_context.h>
#include <sys/spa_impl.h>
#include <sys/refcount.h>
@@ -41,60 +42,70 @@
* Virtual device vector for disks.
*/
extern ldi_ident_t zfs_li;
-static void vdev_disk_close(vdev_t *);
+static void vdev_disk_close_impl(vdev_t *, boolean_t);
typedef struct vdev_disk_ldi_cb {
list_node_t lcb_next;
ldi_callback_id_t lcb_id;
} vdev_disk_ldi_cb_t;
-static void
-vdev_disk_alloc(vdev_t *vd)
+static vdev_disk_t *
+vdev_disk_alloc(void)
{
vdev_disk_t *dvd;
- dvd = vd->vdev_tsd = kmem_zalloc(sizeof (vdev_disk_t), KM_SLEEP);
+ dvd = kmem_zalloc(sizeof (vdev_disk_t), KM_SLEEP);
/*
* Create the LDI event callback list.
*/
list_create(&dvd->vd_ldi_cbs, sizeof (vdev_disk_ldi_cb_t),
offsetof(vdev_disk_ldi_cb_t, lcb_next));
+ return (dvd);
}
static void
-vdev_disk_free(vdev_t *vd)
+vdev_disk_free_locked(vdev_t *vd)
{
- vdev_disk_t *dvd = vd->vdev_tsd;
vdev_disk_ldi_cb_t *lcb;
+ vdev_disk_t *dvd = vd->vdev_tsd;
+ ASSERT(rw_lock_held(&vd->vdev_tsd_lock));
+
if (dvd == NULL)
return;
/*
* We have already closed the LDI handle. Clean up the LDI event
* callbacks and free vd->vdev_tsd.
*/
+ vd->vdev_tsd = NULL;
while ((lcb = list_head(&dvd->vd_ldi_cbs)) != NULL) {
list_remove(&dvd->vd_ldi_cbs, lcb);
(void) ldi_ev_remove_callbacks(lcb->lcb_id);
kmem_free(lcb, sizeof (vdev_disk_ldi_cb_t));
}
list_destroy(&dvd->vd_ldi_cbs);
kmem_free(dvd, sizeof (vdev_disk_t));
- vd->vdev_tsd = NULL;
}
+static void
+vdev_disk_free(vdev_t *vd)
+{
+ rw_enter(&vd->vdev_tsd_lock, RW_WRITER);
+ vdev_disk_free_locked(vd);
+ rw_exit(&vd->vdev_tsd_lock);
+}
+
/* ARGSUSED */
static int
vdev_disk_off_notify(ldi_handle_t lh, ldi_ev_cookie_t ecookie, void *arg,
void *ev_data)
{
vdev_t *vd = (vdev_t *)arg;
- vdev_disk_t *dvd = vd->vdev_tsd;
/*
* Ignore events other than offline.
*/
if (strcmp(ldi_ev_get_type(ecookie), LDI_EV_OFFLINE) != 0)
@@ -106,12 +117,11 @@
*
* We inform vdev_disk_close that it is being called from offline
* notify context so it will defer cleanup of LDI event callbacks and
* freeing of vd->vdev_tsd to the offline finalize or a reopen.
*/
- dvd->vd_ldi_offline = B_TRUE;
- vdev_disk_close(vd);
+ vdev_disk_close_impl(vd, B_TRUE);
/*
* Now that the device is closed, request that the spa_async_thread
* mark the device as REMOVED and notify FMA of the removal.
*/
@@ -240,26 +250,16 @@
dsl_pool_vnrele_taskq(vd->vdev_spa->spa_dsl_pool));
vd->vdev_devid_vp = NULL;
}
}
-/*
- * We want to be loud in DEBUG kernels when DKIOCGMEDIAINFOEXT fails, or when
- * even a fallback to DKIOCGMEDIAINFO fails.
- */
-#ifdef DEBUG
-#define VDEV_DEBUG(...) cmn_err(CE_NOTE, __VA_ARGS__)
-#else
-#define VDEV_DEBUG(...) /* Nothing... */
-#endif
-
static int
vdev_disk_open(vdev_t *vd, uint64_t *psize, uint64_t *max_psize,
uint64_t *ashift)
{
spa_t *spa = vd->vdev_spa;
- vdev_disk_t *dvd = vd->vdev_tsd;
+ vdev_disk_t *dvd;
ldi_ev_cookie_t ecookie;
vdev_disk_ldi_cb_t *lcb;
union {
struct dk_minfo_ext ude;
struct dk_minfo ud;
@@ -266,11 +266,11 @@
} dks;
struct dk_minfo_ext *dkmext = &dks.ude;
struct dk_minfo *dkm = &dks.ud;
int error;
dev_t dev;
- int otyp;
+ int otyp, vdev_ssd;
boolean_t validate_devid = B_FALSE;
ddi_devid_t devid;
uint64_t capacity = 0, blksz = 0, pbsize;
/*
@@ -278,34 +278,34 @@
*/
if (vd->vdev_path == NULL || vd->vdev_path[0] != '/') {
vd->vdev_stat.vs_aux = VDEV_AUX_BAD_LABEL;
return (SET_ERROR(EINVAL));
}
-
+ rw_enter(&vd->vdev_tsd_lock, RW_WRITER);
+ dvd = vd->vdev_tsd;
/*
* Reopen the device if it's not currently open. Otherwise,
* just update the physical size of the device.
*/
if (dvd != NULL) {
- if (dvd->vd_ldi_offline && dvd->vd_lh == NULL) {
+ ASSERT(vd->vdev_reopening);
/*
- * If we are opening a device in its offline notify
- * context, the LDI handle was just closed. Clean
- * up the LDI event callbacks and free vd->vdev_tsd.
+ * Here vd_lh is protected by vdev_tsd_lock
*/
- vdev_disk_free(vd);
- } else {
- ASSERT(vd->vdev_reopening);
+ ASSERT(dvd->vd_lh != NULL);
+ /* This should not happen, but let's be safe */
+ if (dvd->vd_lh == NULL) {
+ /* What are we going to do here??? */
+ rw_exit(&vd->vdev_tsd_lock);
+ return (SET_ERROR(ENXIO));
+ }
goto skip_open;
}
- }
-
/*
- * Create vd->vdev_tsd.
+ * Create dvd to be used as vd->vdev_tsd.
*/
- vdev_disk_alloc(vd);
- dvd = vd->vdev_tsd;
+ vd->vdev_tsd = dvd = vdev_disk_alloc();
/*
* When opening a disk device, we want to preserve the user's original
* intent. We always want to open the device by the path the user gave
* us, even if it is one of multiple paths to the same device. But we
@@ -323,12 +323,12 @@
*/
if (vd->vdev_devid != NULL) {
if (ddi_devid_str_decode(vd->vdev_devid, &dvd->vd_devid,
&dvd->vd_minor) != 0) {
vd->vdev_stat.vs_aux = VDEV_AUX_BAD_LABEL;
- vdev_dbgmsg(vd, "vdev_disk_open: invalid "
- "vdev_devid '%s'", vd->vdev_devid);
+ vdev_disk_free_locked(vd);
+ rw_exit(&vd->vdev_tsd_lock);
return (SET_ERROR(EINVAL));
}
}
error = EINVAL; /* presume failure */
@@ -417,12 +417,12 @@
kcred, &dvd->vd_lh, zfs_li);
}
if (error) {
vd->vdev_stat.vs_aux = VDEV_AUX_OPEN_FAILED;
- vdev_dbgmsg(vd, "vdev_disk_open: failed to open [error=%d]",
- error);
+ vdev_disk_free_locked(vd);
+ rw_exit(&vd->vdev_tsd_lock);
return (error);
}
/*
* Now that the device has been successfully opened, update the devid
@@ -432,12 +432,12 @@
ldi_get_devid(dvd->vd_lh, &devid) == 0) {
if (ddi_devid_compare(devid, dvd->vd_devid) != 0) {
char *vd_devid;
vd_devid = ddi_devid_str_encode(devid, dvd->vd_minor);
- vdev_dbgmsg(vd, "vdev_disk_open: update devid from "
- "'%s' to '%s'", vd->vdev_devid, vd_devid);
+ zfs_dbgmsg("vdev %s: update devid from %s, "
+ "to %s", vd->vdev_path, vd->vdev_devid, vd_devid);
spa_strfree(vd->vdev_devid);
vd->vdev_devid = spa_strdup(vd_devid);
ddi_devid_str_free(vd_devid);
}
ddi_devid_free(devid);
@@ -487,17 +487,23 @@
lcb = kmem_zalloc(sizeof (vdev_disk_ldi_cb_t), KM_SLEEP);
list_insert_tail(&dvd->vd_ldi_cbs, lcb);
(void) ldi_ev_register_callbacks(dvd->vd_lh, ecookie,
&vdev_disk_dgrd_callb, (void *) vd, &lcb->lcb_id);
}
+
+ /* Reset TRIM flag, as underlying device support may have changed */
+ vd->vdev_notrim = B_FALSE;
+
skip_open:
+ ASSERT(dvd != NULL);
/*
* Determine the actual size of the device.
*/
if (ldi_get_size(dvd->vd_lh, psize) != 0) {
vd->vdev_stat.vs_aux = VDEV_AUX_OPEN_FAILED;
- vdev_dbgmsg(vd, "vdev_disk_open: failed to get size");
+ vdev_disk_free_locked(vd);
+ rw_exit(&vd->vdev_tsd_lock);
return (SET_ERROR(EINVAL));
}
*max_psize = *psize;
@@ -510,20 +516,14 @@
capacity = dkmext->dki_capacity - 1;
blksz = dkmext->dki_lbsize;
pbsize = dkmext->dki_pbsize;
} else if ((error = ldi_ioctl(dvd->vd_lh, DKIOCGMEDIAINFO,
(intptr_t)dkm, FKIOCTL, kcred, NULL)) == 0) {
- VDEV_DEBUG(
- "vdev_disk_open(\"%s\"): fallback to DKIOCGMEDIAINFO\n",
- vd->vdev_path);
capacity = dkm->dki_capacity - 1;
blksz = dkm->dki_lbsize;
pbsize = blksz;
} else {
- VDEV_DEBUG("vdev_disk_open(\"%s\"): "
- "both DKIOCGMEDIAINFO{,EXT} calls failed, %d\n",
- vd->vdev_path, error);
pbsize = DEV_BSIZE;
}
*ashift = highbit64(MAX(pbsize, SPA_MINBLOCKSIZE)) - 1;
@@ -546,26 +546,50 @@
*/
(void) ldi_ioctl(dvd->vd_lh, DKIOCSETWCE, (intptr_t)&wce,
FKIOCTL, kcred, NULL);
}
+ if (ldi_ioctl(dvd->vd_lh, DKIOCSOLIDSTATE, (intptr_t)&vdev_ssd,
+ FKIOCTL, kcred, NULL) != 0)
+ vd->vdev_is_ssd = B_FALSE;
+ else
+ vd->vdev_is_ssd = vdev_ssd ? B_TRUE : B_FALSE;
+
/*
+ * We are done with vd_lh and vdev_tsd, release the vdev_tsd_lock
+ */
+ rw_exit(&vd->vdev_tsd_lock);
+
+ /*
* Clear the nowritecache bit, so that on a vdev_reopen() we will
* try again.
*/
vd->vdev_nowritecache = B_FALSE;
+ /*
+ * vdev open has succeeded - reset fault flags if last fault was due
+ * to a failed open since the open fault looks to have been transient
+ */
+ if (vd->vdev_removed || (vd->vdev_faulted &&
+ vd->vdev_label_aux == VDEV_AUX_OPEN_FAILED)) {
+ vd->vdev_faulted = vd->vdev_removed = 0ULL;
+ vd->vdev_label_aux = VDEV_AUX_NONE;
+ }
+
return (0);
}
static void
-vdev_disk_close(vdev_t *vd)
+vdev_disk_close_impl(vdev_t *vd, boolean_t ldi_offline)
{
- vdev_disk_t *dvd = vd->vdev_tsd;
+ vdev_disk_t *dvd;
+ rw_enter(&vd->vdev_tsd_lock, RW_WRITER);
+ dvd = vd->vdev_tsd;
+
if (vd->vdev_reopening || dvd == NULL)
- return;
+ goto out;
if (dvd->vd_minor != NULL) {
ddi_devid_str_free(dvd->vd_minor);
dvd->vd_minor = NULL;
}
@@ -584,42 +608,54 @@
/*
* If we closed the LDI handle due to an offline notify from LDI,
* don't free vd->vdev_tsd or unregister the callbacks here;
* the offline finalize callback or a reopen will take care of it.
*/
- if (dvd->vd_ldi_offline)
- return;
+ if (!ldi_offline)
+ vdev_disk_free_locked(vd);
+out:
+ rw_exit(&vd->vdev_tsd_lock);
+}
- vdev_disk_free(vd);
+static void
+vdev_disk_close(vdev_t *vd)
+{
+ vdev_disk_close_impl(vd, B_FALSE);
}
int
vdev_disk_physio(vdev_t *vd, caddr_t data,
size_t size, uint64_t offset, int flags, boolean_t isdump)
{
- vdev_disk_t *dvd = vd->vdev_tsd;
+ int rc = EIO;
+ vdev_disk_t *dvd;
+ rw_enter(&vd->vdev_tsd_lock, RW_READER);
+ dvd = vd->vdev_tsd;
/*
* If the vdev is closed, it's likely in the REMOVED or FAULTED state.
* Nothing to be done here but return failure.
*/
- if (dvd == NULL || (dvd->vd_ldi_offline && dvd->vd_lh == NULL))
- return (EIO);
+ if (dvd == NULL || dvd->vd_lh == NULL)
+ goto out;
ASSERT(vd->vdev_ops == &vdev_disk_ops);
/*
* If in the context of an active crash dump, use the ldi_dump(9F)
* call instead of ldi_strategy(9F) as usual.
*/
if (isdump) {
ASSERT3P(dvd, !=, NULL);
- return (ldi_dump(dvd->vd_lh, data, lbtodb(offset),
- lbtodb(size)));
+ rc = ldi_dump(dvd->vd_lh, data, lbtodb(offset), lbtodb(size));
+ goto out;
}
- return (vdev_disk_ldi_physio(dvd->vd_lh, data, size, offset, flags));
+ rc = vdev_disk_ldi_physio(dvd->vd_lh, data, size, offset, flags);
+out:
+ rw_exit(&vd->vdev_tsd_lock);
+ return (rc);
}
int
vdev_disk_ldi_physio(ldi_handle_t vd_lh, caddr_t data,
size_t size, uint64_t offset, int flags)
@@ -698,30 +734,34 @@
static void
vdev_disk_io_start(zio_t *zio)
{
vdev_t *vd = zio->io_vd;
- vdev_disk_t *dvd = vd->vdev_tsd;
+ vdev_disk_t *dvd;
vdev_buf_t *vb;
struct dk_callback *dkc;
buf_t *bp;
int error;
+ rw_enter(&vd->vdev_tsd_lock, RW_READER);
+ dvd = vd->vdev_tsd;
/*
* If the vdev is closed, it's likely in the REMOVED or FAULTED state.
* Nothing to be done here but return failure.
*/
- if (dvd == NULL || (dvd->vd_ldi_offline && dvd->vd_lh == NULL)) {
+ if (dvd == NULL || dvd->vd_lh == NULL) {
zio->io_error = ENXIO;
+ rw_exit(&vd->vdev_tsd_lock);
zio_interrupt(zio);
return;
}
if (zio->io_type == ZIO_TYPE_IOCTL) {
/* XXPOLICY */
if (!vdev_readable(vd)) {
zio->io_error = SET_ERROR(ENXIO);
+ rw_exit(&vd->vdev_tsd_lock);
zio_interrupt(zio);
return;
}
switch (zio->io_cmd) {
@@ -750,21 +790,52 @@
/*
* The ioctl will be done asychronously,
* and will call vdev_disk_ioctl_done()
* upon completion.
*/
+ rw_exit(&vd->vdev_tsd_lock);
return;
}
zio->io_error = error;
break;
+ case DKIOCFREE:
+ /*
+ * We perform device support checks here instead of
+ * in zio_trim(), as zio_trim() might be invoked on
+ * top of a top-level vdev, whereas vdev_disk_io_start
+ * is guaranteed to be operating a leaf vdev.
+ */
+ if (vd->vdev_notrim &&
+ spa_get_force_trim(vd->vdev_spa) !=
+ SPA_FORCE_TRIM_ON) {
+ zio->io_error = SET_ERROR(ENOTSUP);
+ break;
+ }
+
+ /*
+ * zio->io_private contains a dkioc_free_list_t
+ * specifying which offsets are to be freed
+ */
+ ASSERT(zio->io_private != NULL);
+ error = ldi_ioctl(dvd->vd_lh, zio->io_cmd,
+ (uintptr_t)zio->io_private, FKIOCTL, kcred, NULL);
+
+ if (error == ENOTSUP || error == ENOTTY)
+ vd->vdev_notrim = B_TRUE;
+
+ zio->io_error = error;
+
+ break;
+
default:
zio->io_error = SET_ERROR(ENOTSUP);
}
+ rw_exit(&vd->vdev_tsd_lock);
zio_execute(zio);
return;
}
ASSERT(zio->io_type == ZIO_TYPE_READ || zio->io_type == ZIO_TYPE_WRITE);
@@ -794,10 +865,11 @@
bp->b_bufsize = zio->io_size;
bp->b_iodone = (int (*)())vdev_disk_io_intr;
/* ldi_strategy() will return non-zero only on programming errors */
VERIFY(ldi_strategy(dvd->vd_lh, bp) == 0);
+ rw_exit(&vd->vdev_tsd_lock);
}
static void
vdev_disk_io_done(zio_t *zio)
{
@@ -808,15 +880,20 @@
* the device has been removed. If this is the case, then we trigger an
* asynchronous removal of the device. Otherwise, probe the device and
* make sure it's still accessible.
*/
if (zio->io_error == EIO && !vd->vdev_remove_wanted) {
- vdev_disk_t *dvd = vd->vdev_tsd;
- int state = DKIO_NONE;
+ vdev_disk_t *dvd;
+ int rc = EIO, state = DKIO_NONE;
- if (ldi_ioctl(dvd->vd_lh, DKIOCSTATE, (intptr_t)&state,
- FKIOCTL, kcred, NULL) == 0 && state != DKIO_INSERTED) {
+ rw_enter(&vd->vdev_tsd_lock, RW_READER);
+ dvd = vd->vdev_tsd;
+ if (dvd != NULL && dvd->vd_lh != NULL)
+ rc = ldi_ioctl(dvd->vd_lh, DKIOCSTATE,
+ (intptr_t)&state, FKIOCTL, kcred, NULL);
+ rw_exit(&vd->vdev_tsd_lock);
+ if (rc == 0 && state != DKIO_INSERTED) {
/*
* We post the resource as soon as possible, instead of
* when the async removal actually happens, because the
* DE is using this information to discard previous I/O
* errors.