Print this page
NEX-6832 fcsm module's debug level default should be 0 (cstyle fix)
Reviewed by: Rob Gittins <rob.gittins@nexenta.com>
NEX-7503 backport illumos #7307
Reviewed by: Saso Kiselkov <saso.kiselkov@nexenta.com>
Reviewed by: Roman Strashkin <roman.strashkin@nexenta.com>
Reviewed by: Yuri Pankov <yuri.pankov@nexenta.com>
Reviewed by: Steve Peng <steve.peng@nexenta.com>
NEX-7048 COMSTAR MODE_SENSE support is broken
Reviewed by: Rob Gittins <rob.gittins@nexenta.com>
Reviewed by: Steve Peng <steve.peng@nexenta.com>
Reviewed by: Evan Layton <evan.layton@nexenta.com>
NEX-5428 Backout the 5.0 changes
NEX-2937 Continuous write_same starves all other commands
Reviewed by: Rick McNeal <rick.mcneal@nexenta.com>
Reviewed by: Steve Peng <steve.peng@nexenta.com>
NEX-4707 memory leak in stmf_sbd`sbd_attach() on successful property lookup
Reviewed by: Rick McNeal <rick.mcneal@nexenta.com>
Reviewed by: Marcel Telka <marcel.telka@nexenta.com>
NEX-3508 CLONE - Port NEX-2946 Add UNMAP/TRIM functionality to ZFS and illumos
Reviewed by: Josef Sipek <josef.sipek@nexenta.com>
Reviewed by: Alek Pinchuk <alek.pinchuk@nexenta.com>
Conflicts:
    usr/src/uts/common/io/scsi/targets/sd.c
    usr/src/uts/common/sys/scsi/targets/sddef.h
NEX-3111 Comstar does not pass cstyle and hdrchk
        Reviewed by: Jean McCormack <jean.mccormack@nexenta.com>
        Reviewed by: Rick McNeal <rick.mcneal@nexenta.com>
        Reviewed by: Tony Nguyen <tony.nguyen@nexenta.com>
NEX-3023 Panics and hangs when using write_same and compare_and_write
Review by: Bayard Bell <bayard.bell@nexenta.com>
Review by: Rick McNeal <rick.mcneal@nexenta.com>
Review by: Jean McCormack <jean.mccormack@nexenta.com>
Approved by: Jean McCormack <jean.mccormack@nexenta.com>
Related bug: NEX-2723 Kernel panic in xfer_completion code for write_same (0x93) and compare_and_write (0x89)
NEX-1965 Page fault at netbios_first_level_name_decode+0xbb
Support simultaneous compare_and_write operations for VAAI
Bug IDs SUP-505
                SUP-1768
                SUP-1928
Code Reviewers:
        Sarah Jelinek
        Jeffry Molanus
        Albert Lee
        Harold Shaw
NEX-988 itask_lu_[read|write]_time was inadvertently removed by the Illumos 3862 fix
re #12618 rb4053 Creating LU unconditionally enables write cache on backing store device
re #7936 rb3706 Support for COMSTAR/OEM
re #8002 rb3706 Allow setting iSCSI vendor ID via stmf_sbd.conf
re #11454 rb3750 Fix inconsistent vid/pid in stmf
Re #6790 backspace should perform delete on console
VAAI (XXX ATS support for COMSTAR, YYY Block-copy support for COMSTAR)
        
@@ -19,16 +19,16 @@
  * CDDL HEADER END
  */
 
 /*
  * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2013 Nexenta Systems, Inc. All rights reserved.
  * Copyright (c) 2013 by Delphix. All rights reserved.
+ * Copyright 2016 Nexenta Systems, Inc. All rights reserved.
  */
 
-#include <sys/sysmacros.h>
 #include <sys/conf.h>
+#include <sys/list.h>
 #include <sys/file.h>
 #include <sys/ddi.h>
 #include <sys/sunddi.h>
 #include <sys/modctl.h>
 #include <sys/scsi/scsi.h>
@@ -54,10 +54,14 @@
 #define SBD_IS_ZVOL(zvol)       (strncmp("/dev/zvol", zvol, 9))
 
 extern sbd_status_t sbd_pgr_meta_init(sbd_lu_t *sl);
 extern sbd_status_t sbd_pgr_meta_load(sbd_lu_t *sl);
 extern void sbd_pgr_reset(sbd_lu_t *sl);
+extern int HardwareAcceleratedLocking;
+extern int HardwareAcceleratedInit;
+extern int HardwareAcceleratedMove;
+extern uint8_t sbd_unmap_enable;
 
 static int sbd_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg,
     void **result);
 static int sbd_attach(dev_info_t *dip, ddi_attach_cmd_t cmd);
 static int sbd_detach(dev_info_t *dip, ddi_detach_cmd_t cmd);
@@ -107,13 +111,14 @@
 static stmf_lu_provider_t *sbd_lp;
 static sbd_lu_t         *sbd_lu_list = NULL;
 static kmutex_t         sbd_lock;
 static dev_info_t       *sbd_dip;
 static uint32_t         sbd_lu_count = 0;
+uint8_t sbd_enable_unmap_sync = 0;
 
 /* Global property settings for the logical unit */
-char sbd_vendor_id[]    = "SUN     ";
+char sbd_vendor_id[]    = "NEXENTA ";
 char sbd_product_id[]   = "COMSTAR         ";
 char sbd_revision[]     = "1.0 ";
 char *sbd_mgmt_url = NULL;
 uint16_t sbd_mgmt_url_alloc_size = 0;
 krwlock_t sbd_global_prop_lock;
@@ -153,11 +158,15 @@
         &sbd_cb_ops,
         NULL,                   /* bus_ops */
         NULL                    /* power */
 };
 
-#define SBD_NAME        "COMSTAR SBD"
+#ifdef DEBUG
+#define SBD_NAME        "COMSTAR SBD+ " __DATE__ " " __TIME__ " DEBUG"
+#else
+#define SBD_NAME        "COMSTAR SBD+"
+#endif
 
 static struct modldrv modldrv = {
         &mod_driverops,
         SBD_NAME,
         &sbd_ops
@@ -192,10 +201,18 @@
                 stmf_free(sbd_lp);
                 return (EINVAL);
         }
         mutex_init(&sbd_lock, NULL, MUTEX_DRIVER, NULL);
         rw_init(&sbd_global_prop_lock, NULL, RW_DRIVER, NULL);
+
+        if (HardwareAcceleratedLocking == 0)
+                cmn_err(CE_NOTE, "HardwareAcceleratedLocking Disabled");
+        if (HardwareAcceleratedMove == 0)
+                cmn_err(CE_NOTE, "HardwareAcceleratedMove  Disabled");
+        if (HardwareAcceleratedInit == 0)
+                cmn_err(CE_NOTE, "HardwareAcceleratedInit  Disabled");
+
         return (0);
 }
 
 int
 _fini(void)
@@ -270,19 +287,38 @@
 }
 
 static int
 sbd_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
 {
+        char    *prop;
+
         switch (cmd) {
         case DDI_ATTACH:
                 sbd_dip = dip;
 
                 if (ddi_create_minor_node(dip, "admin", S_IFCHR, 0,
                     DDI_NT_STMF_LP, 0) != DDI_SUCCESS) {
                         break;
                 }
                 ddi_report_dev(dip);
+
+                if (ddi_prop_lookup_string(DDI_DEV_T_ANY, dip,
+                    DDI_PROP_DONTPASS, "vendor-id", &prop) == DDI_SUCCESS) {
+                        (void) snprintf(sbd_vendor_id, 9, "%s%8s", prop, "");
+                        ddi_prop_free(prop);
+                }
+                if (ddi_prop_lookup_string(DDI_DEV_T_ANY, dip,
+                    DDI_PROP_DONTPASS, "product-id", &prop) == DDI_SUCCESS) {
+                        (void) snprintf(sbd_product_id, 17, "%s%16s", prop, "");
+                        ddi_prop_free(prop);
+                }
+                if (ddi_prop_lookup_string(DDI_DEV_T_ANY, dip,
+                    DDI_PROP_DONTPASS, "revision", &prop) == DDI_SUCCESS) {
+                        (void) snprintf(sbd_revision, 5, "%s%4s", prop, "");
+                        ddi_prop_free(prop);
+                }
+
                 return (DDI_SUCCESS);
         }
 
         return (DDI_FAILURE);
 }
@@ -1394,11 +1430,14 @@
  * Will scribble SL_UNMAP_ENABLED into sl_flags if we succeed.
  */
 static void
 do_unmap_setup(sbd_lu_t *sl)
 {
-        ASSERT((sl->sl_flags & SL_UNMAP_ENABLED) == 0);
+        if (sbd_unmap_enable == 0) {
+                sl->sl_flags &= ~(SL_UNMAP_ENABLED);
+                return;
+        }
 
         if ((sl->sl_flags & SL_ZFS_META) == 0)
                 return; /* No UNMAP for you. */
 
         sl->sl_flags |= SL_UNMAP_ENABLED;
@@ -1439,12 +1478,14 @@
         lu->lu_new_task = sbd_new_task;
         lu->lu_dbuf_xfer_done = sbd_dbuf_xfer_done;
         lu->lu_send_status_done = sbd_send_status_done;
         lu->lu_task_free = sbd_task_free;
         lu->lu_abort = sbd_abort;
+        lu->lu_task_poll = sbd_task_poll;
         lu->lu_dbuf_free = sbd_dbuf_free;
         lu->lu_ctl = sbd_ctl;
+        lu->lu_task_done = sbd_ats_remove_by_task;
         lu->lu_info = sbd_info;
         sl->sl_state = STMF_STATE_OFFLINE;
 
         if ((ret = stmf_register_lu(lu)) != STMF_SUCCESS) {
                 stmf_trace(0, "Failed to register with framework, ret=%llx",
@@ -1453,10 +1494,16 @@
                         *err_ret = SBD_RET_GUID_ALREADY_REGISTERED;
                 }
                 return (EIO);
         }
 
+        /*
+         * setup the ATS (compare and write) lists to handle multiple
+         * ATS commands simultaneously
+         */
+        list_create(&sl->sl_ats_io_list, sizeof (ats_state_t),
+            offsetof(ats_state_t, as_next));
         *err_ret = 0;
         return (0);
 }
 
 int
@@ -1559,10 +1606,11 @@
                         }
                 } else {
                         sl->sl_lu_size = vattr.va_size;
                 }
         }
+
         if (sl->sl_lu_size < SBD_MIN_LU_SIZE) {
                 *err_ret = SBD_RET_FILE_SIZE_ERROR;
                 ret = EINVAL;
                 goto odf_close_data_and_exit;
         }
@@ -1835,11 +1883,11 @@
         }
         if (slu->slu_write_protected) {
                 sl->sl_flags |= SL_WRITE_PROTECTED;
         }
         if (slu->slu_blksize_valid) {
-                if (!ISP2(slu->slu_blksize) ||
+                if ((slu->slu_blksize & (slu->slu_blksize - 1)) ||
                     (slu->slu_blksize > (32 * 1024)) ||
                     (slu->slu_blksize == 0)) {
                         *err_ret = SBD_RET_INVALID_BLKSIZE;
                         ret = EINVAL;
                         goto scm_err_out;
@@ -2995,19 +3043,31 @@
 
 sbd_status_t
 sbd_data_read(sbd_lu_t *sl, struct scsi_task *task,
     uint64_t offset, uint64_t size, uint8_t *buf)
 {
-        int ret;
+        int ret, ioflag = 0;
         long resid;
+        hrtime_t xfer_start;
+        uint8_t op = task->task_cdb[0];
 
         if ((offset + size) > sl->sl_lu_size) {
                 return (SBD_IO_PAST_EOF);
         }
 
         offset += sl->sl_data_offset;
 
+        /*
+         * Check to see if the command is READ(10), READ(12), or READ(16).
+         * If it is then check for bit 3 being set to indicate if Forced
+         * Unit Access is being requested. If so, the FSYNC flag will be set
+         * on the read.
+         */
+        if (((op == SCMD_READ_G1) || (op == SCMD_READ_G4) ||
+            (op == SCMD_READ_G5)) && (task->task_cdb[1] & BIT_3)) {
+                ioflag = FSYNC;
+        }
         if ((offset + size) > sl->sl_data_readable_size) {
                 uint64_t store_end;
                 if (offset > sl->sl_data_readable_size) {
                         bzero(buf, size);
                         return (SBD_SUCCESS);
@@ -3015,10 +3075,11 @@
                 store_end = sl->sl_data_readable_size - offset;
                 bzero(buf + store_end, size - store_end);
                 size = store_end;
         }
 
+        xfer_start = gethrtime();
         DTRACE_PROBE5(backing__store__read__start, sbd_lu_t *, sl,
             uint8_t *, buf, uint64_t, size, uint64_t, offset,
             scsi_task_t *, task);
 
         /*
@@ -3030,15 +3091,18 @@
         rw_enter(&sl->sl_access_state_lock, RW_READER);
         if ((sl->sl_flags & SL_MEDIA_LOADED) == 0) {
                 rw_exit(&sl->sl_access_state_lock);
                 return (SBD_FAILURE);
         }
+
         ret = vn_rdwr(UIO_READ, sl->sl_data_vp, (caddr_t)buf, (ssize_t)size,
-            (offset_t)offset, UIO_SYSSPACE, 0, RLIM64_INFINITY, CRED(),
+            (offset_t)offset, UIO_SYSSPACE, ioflag, RLIM64_INFINITY, CRED(),
             &resid);
         rw_exit(&sl->sl_access_state_lock);
 
+        stmf_lu_xfer_done(task, B_TRUE /* read */,
+            (gethrtime() - xfer_start));
         DTRACE_PROBE6(backing__store__read__end, sbd_lu_t *, sl,
             uint8_t *, buf, uint64_t, size, uint64_t, offset,
             int, ret, scsi_task_t *, task);
 
 over_sl_data_read:
@@ -3057,24 +3121,38 @@
 {
         int ret;
         long resid;
         sbd_status_t sret = SBD_SUCCESS;
         int ioflag;
+        hrtime_t xfer_start;
+        uint8_t op = task->task_cdb[0];
+        boolean_t fua_bit = B_FALSE;
 
         if ((offset + size) > sl->sl_lu_size) {
                 return (SBD_IO_PAST_EOF);
         }
 
         offset += sl->sl_data_offset;
 
-        if ((sl->sl_flags & SL_WRITEBACK_CACHE_DISABLE) &&
-            (sl->sl_flags & SL_FLUSH_ON_DISABLED_WRITECACHE)) {
+        /*
+         * Check to see if the command is WRITE(10), WRITE(12), or WRITE(16).
+         * If it is then check for bit 3 being set to indicate if Forced
+         * Unit Access is being requested. If so, the FSYNC flag will be set
+         * on the write.
+         */
+        if (((op == SCMD_WRITE_G1) || (op == SCMD_WRITE_G4) ||
+            (op == SCMD_WRITE_G5)) && (task->task_cdb[1] & BIT_3)) {
+                fua_bit = B_TRUE;
+        }
+        if (((sl->sl_flags & SL_WRITEBACK_CACHE_DISABLE) &&
+            (sl->sl_flags & SL_FLUSH_ON_DISABLED_WRITECACHE)) || fua_bit) {
                 ioflag = FSYNC;
         } else {
                 ioflag = 0;
         }
 
+        xfer_start = gethrtime();
         DTRACE_PROBE5(backing__store__write__start, sbd_lu_t *, sl,
             uint8_t *, buf, uint64_t, size, uint64_t, offset,
             scsi_task_t *, task);
 
         /*
@@ -3091,10 +3169,12 @@
         ret = vn_rdwr(UIO_WRITE, sl->sl_data_vp, (caddr_t)buf, (ssize_t)size,
             (offset_t)offset, UIO_SYSSPACE, ioflag, RLIM64_INFINITY, CRED(),
             &resid);
         rw_exit(&sl->sl_access_state_lock);
 
+        stmf_lu_xfer_done(task, B_FALSE /* write */,
+            (gethrtime() - xfer_start));
         DTRACE_PROBE6(backing__store__write__end, sbd_lu_t *, sl,
             uint8_t *, buf, uint64_t, size, uint64_t, offset,
             int, ret, scsi_task_t *, task);
 
         if ((ret == 0) && (resid == 0) &&
@@ -3101,11 +3181,10 @@
             (sl->sl_flags & SL_WRITEBACK_CACHE_DISABLE) &&
             (sl->sl_flags & SL_FLUSH_ON_DISABLED_WRITECACHE)) {
                 sret = sbd_flush_data_cache(sl, 1);
         }
 over_sl_data_write:
-
         if ((ret || resid) || (sret != SBD_SUCCESS)) {
                 return (SBD_FAILURE);
         } else if ((offset + size) > sl->sl_data_readable_size) {
                 uint64_t old_size, new_size;
 
@@ -3637,10 +3716,11 @@
                             KM_SLEEP);
                         (void) strcpy(*comstarprop, ptr);
                 }
         }
 out:
+        if (nv != NULL)
         nvlist_free(nv);
         kmem_free((void *)(uintptr_t)zc->zc_nvlist_dst, size);
         kmem_free(zc, sizeof (zfs_cmd_t));
         (void) ldi_close(zfs_lh, FREAD|FWRITE, kcred);
 
@@ -3687,32 +3767,59 @@
         return (rc);
 }
 
 /*
  * Unmap a region in a volume.  Currently only supported for zvols.
+ * The list of extents to be freed is passed in a dkioc_free_list_t
+ * which the caller is responsible for destroying.
  */
 int
-sbd_unmap(sbd_lu_t *sl, uint64_t offset, uint64_t length)
+sbd_unmap(sbd_lu_t *sl, dkioc_free_list_t *dfl)
 {
         vnode_t *vp;
-        int unused;
-        dkioc_free_t df;
+        int unused, ret;
 
-        /* Right now, we only support UNMAP on zvols. */
-        if (!(sl->sl_flags & SL_ZFS_META))
-                return (EIO);
+        /* Nothing to do */
+        if (dfl->dfl_num_exts == 0)
+                return (0);
 
-        df.df_flags = (sl->sl_flags & SL_WRITEBACK_CACHE_DISABLE) ?
+        /*
+         * TODO: unmap performance may be improved by not doing the synchronous
+         * removal of the blocks and writing of the metadata.  The
+         * transaction is in the zil so the state should be stable.
+         */
+        dfl->dfl_flags = (sl->sl_flags & SL_WRITEBACK_CACHE_DISABLE) ?
             DF_WAIT_SYNC : 0;
-        df.df_start = offset;
-        df.df_length = length;
 
         /* Use the data vnode we have to send a fop_ioctl(). */
         vp = sl->sl_data_vp;
         if (vp == NULL) {
                 cmn_err(CE_WARN, "Cannot unmap - no vnode pointer.");
                 return (EIO);
         }
 
-        return (VOP_IOCTL(vp, DKIOCFREE, (intptr_t)(&df), FKIOCTL, kcred,
-            &unused, NULL));
+        ret = VOP_IOCTL(vp, DKIOCFREE, (intptr_t)dfl, FKIOCTL, kcred,
+            &unused, NULL);
+
+        return (ret);
+}
+
+/*
+ * Check if this lu belongs to sbd or some other lu
+ * provider. A simple check for one of the module
+ * entry points is sufficient.
+ */
+int
+sbd_is_valid_lu(stmf_lu_t *lu)
+{
+        if (lu->lu_new_task == sbd_new_task)
+                return (1);
+        return (0);
+}
+
+uint8_t
+sbd_get_lbasize_shift(stmf_lu_t *lu)
+{
+        sbd_lu_t *sl = (sbd_lu_t *)lu->lu_provider_private;
+
+        return (sl->sl_data_blocksize_shift);
 }