Print this page
NEX-6832 fcsm module's debug level default should be 0 (cstyle fix)
Reviewed by: Rob Gittins <rob.gittins@nexenta.com>
NEX-7503 backport illumos #7307
Reviewed by: Saso Kiselkov <saso.kiselkov@nexenta.com>
Reviewed by: Roman Strashkin <roman.strashkin@nexenta.com>
Reviewed by: Yuri Pankov <yuri.pankov@nexenta.com>
Reviewed by: Steve Peng <steve.peng@nexenta.com>
NEX-7048 COMSTAR MODE_SENSE support is broken
Reviewed by: Rob Gittins <rob.gittins@nexenta.com>
Reviewed by: Steve Peng <steve.peng@nexenta.com>
Reviewed by: Evan Layton <evan.layton@nexenta.com>
NEX-5428 Backout the 5.0 changes
NEX-2937 Continuous write_same starves all other commands
Reviewed by: Rick McNeal <rick.mcneal@nexenta.com>
Reviewed by: Steve Peng <steve.peng@nexenta.com>
NEX-4707 memory leak in stmf_sbd`sbd_attach() on successful property lookup
Reviewed by: Rick McNeal <rick.mcneal@nexenta.com>
Reviewed by: Marcel Telka <marcel.telka@nexenta.com>
NEX-3508 CLONE - Port NEX-2946 Add UNMAP/TRIM functionality to ZFS and illumos
Reviewed by: Josef Sipek <josef.sipek@nexenta.com>
Reviewed by: Alek Pinchuk <alek.pinchuk@nexenta.com>
Conflicts:
usr/src/uts/common/io/scsi/targets/sd.c
usr/src/uts/common/sys/scsi/targets/sddef.h
NEX-3111 Comstar does not pass cstyle and hdrchk
Reviewed by: Jean McCormack <jean.mccormack@nexenta.com>
Reviewed by: Rick McNeal <rick.mcneal@nexenta.com>
Reviewed by: Tony Nguyen <tony.nguyen@nexenta.com>
NEX-3023 Panics and hangs when using write_same and compare_and_write
Review by: Bayard Bell <bayard.bell@nexenta.com>
Review by: Rick McNeal <rick.mcneal@nexenta.com>
Review by: Jean McCormack <jean.mccormack@nexenta.com>
Approved by: Jean McCormack <jean.mccormack@nexenta.com>
Related bug: NEX-2723 Kernel panic in xfer_completion code for write_same (0x93) and compare_and_write (0x89)
NEX-1965 Page fault at netbios_first_level_name_decode+0xbb
Support simultaneous compare_and_write operations for VAAI
Bug IDs SUP-505
SUP-1768
SUP-1928
Code Reviewers:
Sarah Jelinek
Jeffry Molanus
Albert Lee
Harold Shaw
NEX-988 itask_lu_[read|write]_time was inadvertently removed by the Illumos 3862 fix
re #12618 rb4053 Creating LU unconditionally enables write cache on backing store device
re #7936 rb3706 Support for COMSTAR/OEM
re #8002 rb3706 Allow setting iSCSI vendor ID via stmf_sbd.conf
re #11454 rb3750 Fix inconsistent vid/pid in stmf
Re #6790 backspace should perform delete on console
VAAI (XXX ATS support for COMSTAR, YYY Block-copy support for COMSTAR)
@@ -19,16 +19,16 @@
* CDDL HEADER END
*/
/*
* Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2013 Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 2013 by Delphix. All rights reserved.
+ * Copyright 2016 Nexenta Systems, Inc. All rights reserved.
*/
-#include <sys/sysmacros.h>
#include <sys/conf.h>
+#include <sys/list.h>
#include <sys/file.h>
#include <sys/ddi.h>
#include <sys/sunddi.h>
#include <sys/modctl.h>
#include <sys/scsi/scsi.h>
@@ -54,10 +54,14 @@
#define SBD_IS_ZVOL(zvol) (strncmp("/dev/zvol", zvol, 9))
extern sbd_status_t sbd_pgr_meta_init(sbd_lu_t *sl);
extern sbd_status_t sbd_pgr_meta_load(sbd_lu_t *sl);
extern void sbd_pgr_reset(sbd_lu_t *sl);
+extern int HardwareAcceleratedLocking;
+extern int HardwareAcceleratedInit;
+extern int HardwareAcceleratedMove;
+extern uint8_t sbd_unmap_enable;
static int sbd_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg,
void **result);
static int sbd_attach(dev_info_t *dip, ddi_attach_cmd_t cmd);
static int sbd_detach(dev_info_t *dip, ddi_detach_cmd_t cmd);
@@ -107,13 +111,14 @@
static stmf_lu_provider_t *sbd_lp;
static sbd_lu_t *sbd_lu_list = NULL;
static kmutex_t sbd_lock;
static dev_info_t *sbd_dip;
static uint32_t sbd_lu_count = 0;
+uint8_t sbd_enable_unmap_sync = 0;
/* Global property settings for the logical unit */
-char sbd_vendor_id[] = "SUN ";
+char sbd_vendor_id[] = "NEXENTA ";
char sbd_product_id[] = "COMSTAR ";
char sbd_revision[] = "1.0 ";
char *sbd_mgmt_url = NULL;
uint16_t sbd_mgmt_url_alloc_size = 0;
krwlock_t sbd_global_prop_lock;
@@ -153,11 +158,15 @@
&sbd_cb_ops,
NULL, /* bus_ops */
NULL /* power */
};
-#define SBD_NAME "COMSTAR SBD"
+#ifdef DEBUG
+#define SBD_NAME "COMSTAR SBD+ " __DATE__ " " __TIME__ " DEBUG"
+#else
+#define SBD_NAME "COMSTAR SBD+"
+#endif
static struct modldrv modldrv = {
&mod_driverops,
SBD_NAME,
&sbd_ops
@@ -192,10 +201,18 @@
stmf_free(sbd_lp);
return (EINVAL);
}
mutex_init(&sbd_lock, NULL, MUTEX_DRIVER, NULL);
rw_init(&sbd_global_prop_lock, NULL, RW_DRIVER, NULL);
+
+ if (HardwareAcceleratedLocking == 0)
+ cmn_err(CE_NOTE, "HardwareAcceleratedLocking Disabled");
+ if (HardwareAcceleratedMove == 0)
+ cmn_err(CE_NOTE, "HardwareAcceleratedMove Disabled");
+ if (HardwareAcceleratedInit == 0)
+ cmn_err(CE_NOTE, "HardwareAcceleratedInit Disabled");
+
return (0);
}
int
_fini(void)
@@ -270,19 +287,38 @@
}
static int
sbd_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
{
+ char *prop;
+
switch (cmd) {
case DDI_ATTACH:
sbd_dip = dip;
if (ddi_create_minor_node(dip, "admin", S_IFCHR, 0,
DDI_NT_STMF_LP, 0) != DDI_SUCCESS) {
break;
}
ddi_report_dev(dip);
+
+ if (ddi_prop_lookup_string(DDI_DEV_T_ANY, dip,
+ DDI_PROP_DONTPASS, "vendor-id", &prop) == DDI_SUCCESS) {
+ (void) snprintf(sbd_vendor_id, 9, "%s%8s", prop, "");
+ ddi_prop_free(prop);
+ }
+ if (ddi_prop_lookup_string(DDI_DEV_T_ANY, dip,
+ DDI_PROP_DONTPASS, "product-id", &prop) == DDI_SUCCESS) {
+ (void) snprintf(sbd_product_id, 17, "%s%16s", prop, "");
+ ddi_prop_free(prop);
+ }
+ if (ddi_prop_lookup_string(DDI_DEV_T_ANY, dip,
+ DDI_PROP_DONTPASS, "revision", &prop) == DDI_SUCCESS) {
+ (void) snprintf(sbd_revision, 5, "%s%4s", prop, "");
+ ddi_prop_free(prop);
+ }
+
return (DDI_SUCCESS);
}
return (DDI_FAILURE);
}
@@ -1394,11 +1430,14 @@
* Will scribble SL_UNMAP_ENABLED into sl_flags if we succeed.
*/
static void
do_unmap_setup(sbd_lu_t *sl)
{
- ASSERT((sl->sl_flags & SL_UNMAP_ENABLED) == 0);
+ if (sbd_unmap_enable == 0) {
+ sl->sl_flags &= ~(SL_UNMAP_ENABLED);
+ return;
+ }
if ((sl->sl_flags & SL_ZFS_META) == 0)
return; /* No UNMAP for you. */
sl->sl_flags |= SL_UNMAP_ENABLED;
@@ -1439,12 +1478,14 @@
lu->lu_new_task = sbd_new_task;
lu->lu_dbuf_xfer_done = sbd_dbuf_xfer_done;
lu->lu_send_status_done = sbd_send_status_done;
lu->lu_task_free = sbd_task_free;
lu->lu_abort = sbd_abort;
+ lu->lu_task_poll = sbd_task_poll;
lu->lu_dbuf_free = sbd_dbuf_free;
lu->lu_ctl = sbd_ctl;
+ lu->lu_task_done = sbd_ats_remove_by_task;
lu->lu_info = sbd_info;
sl->sl_state = STMF_STATE_OFFLINE;
if ((ret = stmf_register_lu(lu)) != STMF_SUCCESS) {
stmf_trace(0, "Failed to register with framework, ret=%llx",
@@ -1453,10 +1494,16 @@
*err_ret = SBD_RET_GUID_ALREADY_REGISTERED;
}
return (EIO);
}
+ /*
+ * setup the ATS (compare and write) lists to handle multiple
+ * ATS commands simultaneously
+ */
+ list_create(&sl->sl_ats_io_list, sizeof (ats_state_t),
+ offsetof(ats_state_t, as_next));
*err_ret = 0;
return (0);
}
int
@@ -1559,10 +1606,11 @@
}
} else {
sl->sl_lu_size = vattr.va_size;
}
}
+
if (sl->sl_lu_size < SBD_MIN_LU_SIZE) {
*err_ret = SBD_RET_FILE_SIZE_ERROR;
ret = EINVAL;
goto odf_close_data_and_exit;
}
@@ -1835,11 +1883,11 @@
}
if (slu->slu_write_protected) {
sl->sl_flags |= SL_WRITE_PROTECTED;
}
if (slu->slu_blksize_valid) {
- if (!ISP2(slu->slu_blksize) ||
+ if ((slu->slu_blksize & (slu->slu_blksize - 1)) ||
(slu->slu_blksize > (32 * 1024)) ||
(slu->slu_blksize == 0)) {
*err_ret = SBD_RET_INVALID_BLKSIZE;
ret = EINVAL;
goto scm_err_out;
@@ -2995,19 +3043,31 @@
sbd_status_t
sbd_data_read(sbd_lu_t *sl, struct scsi_task *task,
uint64_t offset, uint64_t size, uint8_t *buf)
{
- int ret;
+ int ret, ioflag = 0;
long resid;
+ hrtime_t xfer_start;
+ uint8_t op = task->task_cdb[0];
if ((offset + size) > sl->sl_lu_size) {
return (SBD_IO_PAST_EOF);
}
offset += sl->sl_data_offset;
+ /*
+ * Check to see if the command is READ(10), READ(12), or READ(16).
+ * If it is then check for bit 3 being set to indicate if Forced
+ * Unit Access is being requested. If so, the FSYNC flag will be set
+ * on the read.
+ */
+ if (((op == SCMD_READ_G1) || (op == SCMD_READ_G4) ||
+ (op == SCMD_READ_G5)) && (task->task_cdb[1] & BIT_3)) {
+ ioflag = FSYNC;
+ }
if ((offset + size) > sl->sl_data_readable_size) {
uint64_t store_end;
if (offset > sl->sl_data_readable_size) {
bzero(buf, size);
return (SBD_SUCCESS);
@@ -3015,10 +3075,11 @@
store_end = sl->sl_data_readable_size - offset;
bzero(buf + store_end, size - store_end);
size = store_end;
}
+ xfer_start = gethrtime();
DTRACE_PROBE5(backing__store__read__start, sbd_lu_t *, sl,
uint8_t *, buf, uint64_t, size, uint64_t, offset,
scsi_task_t *, task);
/*
@@ -3030,15 +3091,18 @@
rw_enter(&sl->sl_access_state_lock, RW_READER);
if ((sl->sl_flags & SL_MEDIA_LOADED) == 0) {
rw_exit(&sl->sl_access_state_lock);
return (SBD_FAILURE);
}
+
ret = vn_rdwr(UIO_READ, sl->sl_data_vp, (caddr_t)buf, (ssize_t)size,
- (offset_t)offset, UIO_SYSSPACE, 0, RLIM64_INFINITY, CRED(),
+ (offset_t)offset, UIO_SYSSPACE, ioflag, RLIM64_INFINITY, CRED(),
&resid);
rw_exit(&sl->sl_access_state_lock);
+ stmf_lu_xfer_done(task, B_TRUE /* read */,
+ (gethrtime() - xfer_start));
DTRACE_PROBE6(backing__store__read__end, sbd_lu_t *, sl,
uint8_t *, buf, uint64_t, size, uint64_t, offset,
int, ret, scsi_task_t *, task);
over_sl_data_read:
@@ -3057,24 +3121,38 @@
{
int ret;
long resid;
sbd_status_t sret = SBD_SUCCESS;
int ioflag;
+ hrtime_t xfer_start;
+ uint8_t op = task->task_cdb[0];
+ boolean_t fua_bit = B_FALSE;
if ((offset + size) > sl->sl_lu_size) {
return (SBD_IO_PAST_EOF);
}
offset += sl->sl_data_offset;
- if ((sl->sl_flags & SL_WRITEBACK_CACHE_DISABLE) &&
- (sl->sl_flags & SL_FLUSH_ON_DISABLED_WRITECACHE)) {
+ /*
+ * Check to see if the command is WRITE(10), WRITE(12), or WRITE(16).
+ * If it is then check for bit 3 being set to indicate if Forced
+ * Unit Access is being requested. If so, the FSYNC flag will be set
+ * on the write.
+ */
+ if (((op == SCMD_WRITE_G1) || (op == SCMD_WRITE_G4) ||
+ (op == SCMD_WRITE_G5)) && (task->task_cdb[1] & BIT_3)) {
+ fua_bit = B_TRUE;
+ }
+ if (((sl->sl_flags & SL_WRITEBACK_CACHE_DISABLE) &&
+ (sl->sl_flags & SL_FLUSH_ON_DISABLED_WRITECACHE)) || fua_bit) {
ioflag = FSYNC;
} else {
ioflag = 0;
}
+ xfer_start = gethrtime();
DTRACE_PROBE5(backing__store__write__start, sbd_lu_t *, sl,
uint8_t *, buf, uint64_t, size, uint64_t, offset,
scsi_task_t *, task);
/*
@@ -3091,10 +3169,12 @@
ret = vn_rdwr(UIO_WRITE, sl->sl_data_vp, (caddr_t)buf, (ssize_t)size,
(offset_t)offset, UIO_SYSSPACE, ioflag, RLIM64_INFINITY, CRED(),
&resid);
rw_exit(&sl->sl_access_state_lock);
+ stmf_lu_xfer_done(task, B_FALSE /* write */,
+ (gethrtime() - xfer_start));
DTRACE_PROBE6(backing__store__write__end, sbd_lu_t *, sl,
uint8_t *, buf, uint64_t, size, uint64_t, offset,
int, ret, scsi_task_t *, task);
if ((ret == 0) && (resid == 0) &&
@@ -3101,11 +3181,10 @@
(sl->sl_flags & SL_WRITEBACK_CACHE_DISABLE) &&
(sl->sl_flags & SL_FLUSH_ON_DISABLED_WRITECACHE)) {
sret = sbd_flush_data_cache(sl, 1);
}
over_sl_data_write:
-
if ((ret || resid) || (sret != SBD_SUCCESS)) {
return (SBD_FAILURE);
} else if ((offset + size) > sl->sl_data_readable_size) {
uint64_t old_size, new_size;
@@ -3637,10 +3716,11 @@
KM_SLEEP);
(void) strcpy(*comstarprop, ptr);
}
}
out:
+ if (nv != NULL)
nvlist_free(nv);
kmem_free((void *)(uintptr_t)zc->zc_nvlist_dst, size);
kmem_free(zc, sizeof (zfs_cmd_t));
(void) ldi_close(zfs_lh, FREAD|FWRITE, kcred);
@@ -3687,32 +3767,59 @@
return (rc);
}
/*
* Unmap a region in a volume. Currently only supported for zvols.
+ * The list of extents to be freed is passed in a dkioc_free_list_t
+ * which the caller is responsible for destroying.
*/
int
-sbd_unmap(sbd_lu_t *sl, uint64_t offset, uint64_t length)
+sbd_unmap(sbd_lu_t *sl, dkioc_free_list_t *dfl)
{
vnode_t *vp;
- int unused;
- dkioc_free_t df;
+ int unused, ret;
- /* Right now, we only support UNMAP on zvols. */
- if (!(sl->sl_flags & SL_ZFS_META))
- return (EIO);
+ /* Nothing to do */
+ if (dfl->dfl_num_exts == 0)
+ return (0);
- df.df_flags = (sl->sl_flags & SL_WRITEBACK_CACHE_DISABLE) ?
+ /*
+ * TODO: unmap performance may be improved by not doing the synchronous
+ * removal of the blocks and writing of the metadata. The
+ * transaction is in the zil so the state should be stable.
+ */
+ dfl->dfl_flags = (sl->sl_flags & SL_WRITEBACK_CACHE_DISABLE) ?
DF_WAIT_SYNC : 0;
- df.df_start = offset;
- df.df_length = length;
/* Use the data vnode we have to send a fop_ioctl(). */
vp = sl->sl_data_vp;
if (vp == NULL) {
cmn_err(CE_WARN, "Cannot unmap - no vnode pointer.");
return (EIO);
}
- return (VOP_IOCTL(vp, DKIOCFREE, (intptr_t)(&df), FKIOCTL, kcred,
- &unused, NULL));
+ ret = VOP_IOCTL(vp, DKIOCFREE, (intptr_t)dfl, FKIOCTL, kcred,
+ &unused, NULL);
+
+ return (ret);
+}
+
+/*
+ * Check if this lu belongs to sbd or some other lu
+ * provider. A simple check for one of the module
+ * entry points is sufficient.
+ */
+int
+sbd_is_valid_lu(stmf_lu_t *lu)
+{
+ if (lu->lu_new_task == sbd_new_task)
+ return (1);
+ return (0);
+}
+
+uint8_t
+sbd_get_lbasize_shift(stmf_lu_t *lu)
+{
+ sbd_lu_t *sl = (sbd_lu_t *)lu->lu_provider_private;
+
+ return (sl->sl_data_blocksize_shift);
}