Print this page
NEX-6832 fcsm module's debug level default should be 0 (cstyle fix)
Reviewed by: Rob Gittins <rob.gittins@nexenta.com>
NEX-7503 backport illumos #7307
Reviewed by: Saso Kiselkov <saso.kiselkov@nexenta.com>
Reviewed by: Roman Strashkin <roman.strashkin@nexenta.com>
Reviewed by: Yuri Pankov <yuri.pankov@nexenta.com>
Reviewed by: Steve Peng <steve.peng@nexenta.com>
NEX-7048 COMSTAR MODE_SENSE support is broken
Reviewed by: Rob Gittins <rob.gittins@nexenta.com>
Reviewed by: Steve Peng <steve.peng@nexenta.com>
Reviewed by: Evan Layton <evan.layton@nexenta.com>
NEX-5428 Backout the 5.0 changes
NEX-2937 Continuous write_same starves all other commands
Reviewed by: Rick McNeal <rick.mcneal@nexenta.com>
Reviewed by: Steve Peng <steve.peng@nexenta.com>
NEX-4707 memory leak in stmf_sbd`sbd_attach() on successful property lookup
Reviewed by: Rick McNeal <rick.mcneal@nexenta.com>
Reviewed by: Marcel Telka <marcel.telka@nexenta.com>
NEX-3508 CLONE - Port NEX-2946 Add UNMAP/TRIM functionality to ZFS and illumos
Reviewed by: Josef Sipek <josef.sipek@nexenta.com>
Reviewed by: Alek Pinchuk <alek.pinchuk@nexenta.com>
Conflicts:
    usr/src/uts/common/io/scsi/targets/sd.c
    usr/src/uts/common/sys/scsi/targets/sddef.h
NEX-3111 Comstar does not pass cstyle and hdrchk
        Reviewed by: Jean McCormack <jean.mccormack@nexenta.com>
        Reviewed by: Rick McNeal <rick.mcneal@nexenta.com>
        Reviewed by: Tony Nguyen <tony.nguyen@nexenta.com>
NEX-3023 Panics and hangs when using write_same and compare_and_write
Review by: Bayard Bell <bayard.bell@nexenta.com>
Review by: Rick McNeal <rick.mcneal@nexenta.com>
Review by: Jean McCormack <jean.mccormack@nexenta.com>
Approved by: Jean McCormack <jean.mccormack@nexenta.com>
Related bug: NEX-2723 Kernel panic in xfer_completion code for write_same (0x93) and compare_and_write (0x89)
NEX-1965 Page fault at netbios_first_level_name_decode+0xbb
Support simultaneous compare_and_write operations for VAAI
Bug IDs SUP-505
                SUP-1768
                SUP-1928
Code Reviewers:
        Sarah Jelinek
        Jeffry Molanus
        Albert Lee
        Harold Shaw
NEX-988 itask_lu_[read|write]_time was inadvertently removed by the Illumos 3862 fix
re #12618 rb4053 Creating LU unconditionally enables write cache on backing store device
re #7936 rb3706 Support for COMSTAR/OEM
re #8002 rb3706 Allow setting iSCSI vendor ID via stmf_sbd.conf
re #11454 rb3750 Fix inconsistent vid/pid in stmf
Re #6790 backspace should perform delete on console
VAAI (XXX ATS support for COMSTAR, YYY Block-copy support for COMSTAR)
        
*** 19,34 ****
   * CDDL HEADER END
   */
  
  /*
   * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
-  * Copyright 2013 Nexenta Systems, Inc. All rights reserved.
   * Copyright (c) 2013 by Delphix. All rights reserved.
   */
  
- #include <sys/sysmacros.h>
  #include <sys/conf.h>
  #include <sys/file.h>
  #include <sys/ddi.h>
  #include <sys/sunddi.h>
  #include <sys/modctl.h>
  #include <sys/scsi/scsi.h>
--- 19,34 ----
   * CDDL HEADER END
   */
  
  /*
   * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
   * Copyright (c) 2013 by Delphix. All rights reserved.
+  * Copyright 2016 Nexenta Systems, Inc. All rights reserved.
   */
  
  #include <sys/conf.h>
+ #include <sys/list.h>
  #include <sys/file.h>
  #include <sys/ddi.h>
  #include <sys/sunddi.h>
  #include <sys/modctl.h>
  #include <sys/scsi/scsi.h>
*** 54,63 ****
--- 54,67 ----
  #define SBD_IS_ZVOL(zvol)       (strncmp("/dev/zvol", zvol, 9))
  
  extern sbd_status_t sbd_pgr_meta_init(sbd_lu_t *sl);
  extern sbd_status_t sbd_pgr_meta_load(sbd_lu_t *sl);
  extern void sbd_pgr_reset(sbd_lu_t *sl);
+ extern int HardwareAcceleratedLocking;
+ extern int HardwareAcceleratedInit;
+ extern int HardwareAcceleratedMove;
+ extern uint8_t sbd_unmap_enable;
  
  static int sbd_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg,
      void **result);
  static int sbd_attach(dev_info_t *dip, ddi_attach_cmd_t cmd);
  static int sbd_detach(dev_info_t *dip, ddi_detach_cmd_t cmd);
*** 107,119 ****
  static stmf_lu_provider_t *sbd_lp;
  static sbd_lu_t         *sbd_lu_list = NULL;
  static kmutex_t         sbd_lock;
  static dev_info_t       *sbd_dip;
  static uint32_t         sbd_lu_count = 0;
  
  /* Global property settings for the logical unit */
! char sbd_vendor_id[]    = "SUN     ";
  char sbd_product_id[]   = "COMSTAR         ";
  char sbd_revision[]     = "1.0 ";
  char *sbd_mgmt_url = NULL;
  uint16_t sbd_mgmt_url_alloc_size = 0;
  krwlock_t sbd_global_prop_lock;
--- 111,124 ----
  static stmf_lu_provider_t *sbd_lp;
  static sbd_lu_t         *sbd_lu_list = NULL;
  static kmutex_t         sbd_lock;
  static dev_info_t       *sbd_dip;
  static uint32_t         sbd_lu_count = 0;
+ uint8_t sbd_enable_unmap_sync = 0;
  
  /* Global property settings for the logical unit */
! char sbd_vendor_id[]    = "NEXENTA ";
  char sbd_product_id[]   = "COMSTAR         ";
  char sbd_revision[]     = "1.0 ";
  char *sbd_mgmt_url = NULL;
  uint16_t sbd_mgmt_url_alloc_size = 0;
  krwlock_t sbd_global_prop_lock;
*** 153,163 ****
          &sbd_cb_ops,
          NULL,                   /* bus_ops */
          NULL                    /* power */
  };
  
! #define SBD_NAME        "COMSTAR SBD"
  
  static struct modldrv modldrv = {
          &mod_driverops,
          SBD_NAME,
          &sbd_ops
--- 158,172 ----
          &sbd_cb_ops,
          NULL,                   /* bus_ops */
          NULL                    /* power */
  };
  
! #ifdef DEBUG
! #define SBD_NAME        "COMSTAR SBD+ " __DATE__ " " __TIME__ " DEBUG"
! #else
! #define SBD_NAME        "COMSTAR SBD+"
! #endif
  
  static struct modldrv modldrv = {
          &mod_driverops,
          SBD_NAME,
          &sbd_ops
*** 192,201 ****
--- 201,218 ----
                  stmf_free(sbd_lp);
                  return (EINVAL);
          }
          mutex_init(&sbd_lock, NULL, MUTEX_DRIVER, NULL);
          rw_init(&sbd_global_prop_lock, NULL, RW_DRIVER, NULL);
+ 
+         if (HardwareAcceleratedLocking == 0)
+                 cmn_err(CE_NOTE, "HardwareAcceleratedLocking Disabled");
+         if (HardwareAcceleratedMove == 0)
+                 cmn_err(CE_NOTE, "HardwareAcceleratedMove  Disabled");
+         if (HardwareAcceleratedInit == 0)
+                 cmn_err(CE_NOTE, "HardwareAcceleratedInit  Disabled");
+ 
          return (0);
  }
  
  int
  _fini(void)
*** 270,288 ****
--- 287,324 ----
  }
  
  static int
  sbd_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
  {
+         char    *prop;
+ 
          switch (cmd) {
          case DDI_ATTACH:
                  sbd_dip = dip;
  
                  if (ddi_create_minor_node(dip, "admin", S_IFCHR, 0,
                      DDI_NT_STMF_LP, 0) != DDI_SUCCESS) {
                          break;
                  }
                  ddi_report_dev(dip);
+ 
+                 if (ddi_prop_lookup_string(DDI_DEV_T_ANY, dip,
+                     DDI_PROP_DONTPASS, "vendor-id", &prop) == DDI_SUCCESS) {
+                         (void) snprintf(sbd_vendor_id, 9, "%s%8s", prop, "");
+                         ddi_prop_free(prop);
+                 }
+                 if (ddi_prop_lookup_string(DDI_DEV_T_ANY, dip,
+                     DDI_PROP_DONTPASS, "product-id", &prop) == DDI_SUCCESS) {
+                         (void) snprintf(sbd_product_id, 17, "%s%16s", prop, "");
+                         ddi_prop_free(prop);
+                 }
+                 if (ddi_prop_lookup_string(DDI_DEV_T_ANY, dip,
+                     DDI_PROP_DONTPASS, "revision", &prop) == DDI_SUCCESS) {
+                         (void) snprintf(sbd_revision, 5, "%s%4s", prop, "");
+                         ddi_prop_free(prop);
+                 }
+ 
                  return (DDI_SUCCESS);
          }
  
          return (DDI_FAILURE);
  }
*** 1394,1404 ****
   * Will scribble SL_UNMAP_ENABLED into sl_flags if we succeed.
   */
  static void
  do_unmap_setup(sbd_lu_t *sl)
  {
!         ASSERT((sl->sl_flags & SL_UNMAP_ENABLED) == 0);
  
          if ((sl->sl_flags & SL_ZFS_META) == 0)
                  return; /* No UNMAP for you. */
  
          sl->sl_flags |= SL_UNMAP_ENABLED;
--- 1430,1443 ----
   * Will scribble SL_UNMAP_ENABLED into sl_flags if we succeed.
   */
  static void
  do_unmap_setup(sbd_lu_t *sl)
  {
!         if (sbd_unmap_enable == 0) {
!                 sl->sl_flags &= ~(SL_UNMAP_ENABLED);
!                 return;
!         }
  
          if ((sl->sl_flags & SL_ZFS_META) == 0)
                  return; /* No UNMAP for you. */
  
          sl->sl_flags |= SL_UNMAP_ENABLED;
*** 1439,1450 ****
--- 1478,1491 ----
          lu->lu_new_task = sbd_new_task;
          lu->lu_dbuf_xfer_done = sbd_dbuf_xfer_done;
          lu->lu_send_status_done = sbd_send_status_done;
          lu->lu_task_free = sbd_task_free;
          lu->lu_abort = sbd_abort;
+         lu->lu_task_poll = sbd_task_poll;
          lu->lu_dbuf_free = sbd_dbuf_free;
          lu->lu_ctl = sbd_ctl;
+         lu->lu_task_done = sbd_ats_remove_by_task;
          lu->lu_info = sbd_info;
          sl->sl_state = STMF_STATE_OFFLINE;
  
          if ((ret = stmf_register_lu(lu)) != STMF_SUCCESS) {
                  stmf_trace(0, "Failed to register with framework, ret=%llx",
*** 1453,1462 ****
--- 1494,1509 ----
                          *err_ret = SBD_RET_GUID_ALREADY_REGISTERED;
                  }
                  return (EIO);
          }
  
+         /*
+          * setup the ATS (compare and write) lists to handle multiple
+          * ATS commands simultaneously
+          */
+         list_create(&sl->sl_ats_io_list, sizeof (ats_state_t),
+             offsetof(ats_state_t, as_next));
          *err_ret = 0;
          return (0);
  }
  
  int
*** 1559,1568 ****
--- 1606,1616 ----
                          }
                  } else {
                          sl->sl_lu_size = vattr.va_size;
                  }
          }
+ 
          if (sl->sl_lu_size < SBD_MIN_LU_SIZE) {
                  *err_ret = SBD_RET_FILE_SIZE_ERROR;
                  ret = EINVAL;
                  goto odf_close_data_and_exit;
          }
*** 1835,1845 ****
          }
          if (slu->slu_write_protected) {
                  sl->sl_flags |= SL_WRITE_PROTECTED;
          }
          if (slu->slu_blksize_valid) {
!                 if (!ISP2(slu->slu_blksize) ||
                      (slu->slu_blksize > (32 * 1024)) ||
                      (slu->slu_blksize == 0)) {
                          *err_ret = SBD_RET_INVALID_BLKSIZE;
                          ret = EINVAL;
                          goto scm_err_out;
--- 1883,1893 ----
          }
          if (slu->slu_write_protected) {
                  sl->sl_flags |= SL_WRITE_PROTECTED;
          }
          if (slu->slu_blksize_valid) {
!                 if ((slu->slu_blksize & (slu->slu_blksize - 1)) ||
                      (slu->slu_blksize > (32 * 1024)) ||
                      (slu->slu_blksize == 0)) {
                          *err_ret = SBD_RET_INVALID_BLKSIZE;
                          ret = EINVAL;
                          goto scm_err_out;
*** 2995,3013 ****
  
  sbd_status_t
  sbd_data_read(sbd_lu_t *sl, struct scsi_task *task,
      uint64_t offset, uint64_t size, uint8_t *buf)
  {
!         int ret;
          long resid;
  
          if ((offset + size) > sl->sl_lu_size) {
                  return (SBD_IO_PAST_EOF);
          }
  
          offset += sl->sl_data_offset;
  
          if ((offset + size) > sl->sl_data_readable_size) {
                  uint64_t store_end;
                  if (offset > sl->sl_data_readable_size) {
                          bzero(buf, size);
                          return (SBD_SUCCESS);
--- 3043,3073 ----
  
  sbd_status_t
  sbd_data_read(sbd_lu_t *sl, struct scsi_task *task,
      uint64_t offset, uint64_t size, uint8_t *buf)
  {
!         int ret, ioflag = 0;
          long resid;
+         hrtime_t xfer_start;
+         uint8_t op = task->task_cdb[0];
  
          if ((offset + size) > sl->sl_lu_size) {
                  return (SBD_IO_PAST_EOF);
          }
  
          offset += sl->sl_data_offset;
  
+         /*
+          * Check to see if the command is READ(10), READ(12), or READ(16).
+          * If it is then check for bit 3 being set to indicate if Forced
+          * Unit Access is being requested. If so, the FSYNC flag will be set
+          * on the read.
+          */
+         if (((op == SCMD_READ_G1) || (op == SCMD_READ_G4) ||
+             (op == SCMD_READ_G5)) && (task->task_cdb[1] & BIT_3)) {
+                 ioflag = FSYNC;
+         }
          if ((offset + size) > sl->sl_data_readable_size) {
                  uint64_t store_end;
                  if (offset > sl->sl_data_readable_size) {
                          bzero(buf, size);
                          return (SBD_SUCCESS);
*** 3015,3024 ****
--- 3075,3085 ----
                  store_end = sl->sl_data_readable_size - offset;
                  bzero(buf + store_end, size - store_end);
                  size = store_end;
          }
  
+         xfer_start = gethrtime();
          DTRACE_PROBE5(backing__store__read__start, sbd_lu_t *, sl,
              uint8_t *, buf, uint64_t, size, uint64_t, offset,
              scsi_task_t *, task);
  
          /*
*** 3030,3044 ****
          rw_enter(&sl->sl_access_state_lock, RW_READER);
          if ((sl->sl_flags & SL_MEDIA_LOADED) == 0) {
                  rw_exit(&sl->sl_access_state_lock);
                  return (SBD_FAILURE);
          }
          ret = vn_rdwr(UIO_READ, sl->sl_data_vp, (caddr_t)buf, (ssize_t)size,
!             (offset_t)offset, UIO_SYSSPACE, 0, RLIM64_INFINITY, CRED(),
              &resid);
          rw_exit(&sl->sl_access_state_lock);
  
          DTRACE_PROBE6(backing__store__read__end, sbd_lu_t *, sl,
              uint8_t *, buf, uint64_t, size, uint64_t, offset,
              int, ret, scsi_task_t *, task);
  
  over_sl_data_read:
--- 3091,3108 ----
          rw_enter(&sl->sl_access_state_lock, RW_READER);
          if ((sl->sl_flags & SL_MEDIA_LOADED) == 0) {
                  rw_exit(&sl->sl_access_state_lock);
                  return (SBD_FAILURE);
          }
+ 
          ret = vn_rdwr(UIO_READ, sl->sl_data_vp, (caddr_t)buf, (ssize_t)size,
!             (offset_t)offset, UIO_SYSSPACE, ioflag, RLIM64_INFINITY, CRED(),
              &resid);
          rw_exit(&sl->sl_access_state_lock);
  
+         stmf_lu_xfer_done(task, B_TRUE /* read */,
+             (gethrtime() - xfer_start));
          DTRACE_PROBE6(backing__store__read__end, sbd_lu_t *, sl,
              uint8_t *, buf, uint64_t, size, uint64_t, offset,
              int, ret, scsi_task_t *, task);
  
  over_sl_data_read:
*** 3057,3080 ****
  {
          int ret;
          long resid;
          sbd_status_t sret = SBD_SUCCESS;
          int ioflag;
  
          if ((offset + size) > sl->sl_lu_size) {
                  return (SBD_IO_PAST_EOF);
          }
  
          offset += sl->sl_data_offset;
  
!         if ((sl->sl_flags & SL_WRITEBACK_CACHE_DISABLE) &&
!             (sl->sl_flags & SL_FLUSH_ON_DISABLED_WRITECACHE)) {
                  ioflag = FSYNC;
          } else {
                  ioflag = 0;
          }
  
          DTRACE_PROBE5(backing__store__write__start, sbd_lu_t *, sl,
              uint8_t *, buf, uint64_t, size, uint64_t, offset,
              scsi_task_t *, task);
  
          /*
--- 3121,3158 ----
  {
          int ret;
          long resid;
          sbd_status_t sret = SBD_SUCCESS;
          int ioflag;
+         hrtime_t xfer_start;
+         uint8_t op = task->task_cdb[0];
+         boolean_t fua_bit = B_FALSE;
  
          if ((offset + size) > sl->sl_lu_size) {
                  return (SBD_IO_PAST_EOF);
          }
  
          offset += sl->sl_data_offset;
  
!         /*
!          * Check to see if the command is WRITE(10), WRITE(12), or WRITE(16).
!          * If it is then check for bit 3 being set to indicate if Forced
!          * Unit Access is being requested. If so, the FSYNC flag will be set
!          * on the write.
!          */
!         if (((op == SCMD_WRITE_G1) || (op == SCMD_WRITE_G4) ||
!             (op == SCMD_WRITE_G5)) && (task->task_cdb[1] & BIT_3)) {
!                 fua_bit = B_TRUE;
!         }
!         if (((sl->sl_flags & SL_WRITEBACK_CACHE_DISABLE) &&
!             (sl->sl_flags & SL_FLUSH_ON_DISABLED_WRITECACHE)) || fua_bit) {
                  ioflag = FSYNC;
          } else {
                  ioflag = 0;
          }
  
+         xfer_start = gethrtime();
          DTRACE_PROBE5(backing__store__write__start, sbd_lu_t *, sl,
              uint8_t *, buf, uint64_t, size, uint64_t, offset,
              scsi_task_t *, task);
  
          /*
*** 3091,3100 ****
--- 3169,3180 ----
          ret = vn_rdwr(UIO_WRITE, sl->sl_data_vp, (caddr_t)buf, (ssize_t)size,
              (offset_t)offset, UIO_SYSSPACE, ioflag, RLIM64_INFINITY, CRED(),
              &resid);
          rw_exit(&sl->sl_access_state_lock);
  
+         stmf_lu_xfer_done(task, B_FALSE /* write */,
+             (gethrtime() - xfer_start));
          DTRACE_PROBE6(backing__store__write__end, sbd_lu_t *, sl,
              uint8_t *, buf, uint64_t, size, uint64_t, offset,
              int, ret, scsi_task_t *, task);
  
          if ((ret == 0) && (resid == 0) &&
*** 3101,3111 ****
              (sl->sl_flags & SL_WRITEBACK_CACHE_DISABLE) &&
              (sl->sl_flags & SL_FLUSH_ON_DISABLED_WRITECACHE)) {
                  sret = sbd_flush_data_cache(sl, 1);
          }
  over_sl_data_write:
- 
          if ((ret || resid) || (sret != SBD_SUCCESS)) {
                  return (SBD_FAILURE);
          } else if ((offset + size) > sl->sl_data_readable_size) {
                  uint64_t old_size, new_size;
  
--- 3181,3190 ----
*** 3637,3646 ****
--- 3716,3726 ----
                              KM_SLEEP);
                          (void) strcpy(*comstarprop, ptr);
                  }
          }
  out:
+         if (nv != NULL)
                  nvlist_free(nv);
          kmem_free((void *)(uintptr_t)zc->zc_nvlist_dst, size);
          kmem_free(zc, sizeof (zfs_cmd_t));
          (void) ldi_close(zfs_lh, FREAD|FWRITE, kcred);
  
*** 3687,3718 ****
          return (rc);
  }
  
  /*
   * Unmap a region in a volume.  Currently only supported for zvols.
   */
  int
! sbd_unmap(sbd_lu_t *sl, uint64_t offset, uint64_t length)
  {
          vnode_t *vp;
!         int unused;
!         dkioc_free_t df;
  
!         /* Right now, we only support UNMAP on zvols. */
!         if (!(sl->sl_flags & SL_ZFS_META))
!                 return (EIO);
  
!         df.df_flags = (sl->sl_flags & SL_WRITEBACK_CACHE_DISABLE) ?
              DF_WAIT_SYNC : 0;
-         df.df_start = offset;
-         df.df_length = length;
  
          /* Use the data vnode we have to send a fop_ioctl(). */
          vp = sl->sl_data_vp;
          if (vp == NULL) {
                  cmn_err(CE_WARN, "Cannot unmap - no vnode pointer.");
                  return (EIO);
          }
  
!         return (VOP_IOCTL(vp, DKIOCFREE, (intptr_t)(&df), FKIOCTL, kcred,
!             &unused, NULL));
  }
--- 3767,3825 ----
          return (rc);
  }
  
  /*
   * Unmap a region in a volume.  Currently only supported for zvols.
+  * The list of extents to be freed is passed in a dkioc_free_list_t
+  * which the caller is responsible for destroying.
   */
  int
! sbd_unmap(sbd_lu_t *sl, dkioc_free_list_t *dfl)
  {
          vnode_t *vp;
!         int unused, ret;
  
!         /* Nothing to do */
!         if (dfl->dfl_num_exts == 0)
!                 return (0);
  
!         /*
!          * TODO: unmap performance may be improved by not doing the synchronous
!          * removal of the blocks and writing of the metadata.  The
!          * transaction is in the zil so the state should be stable.
!          */
!         dfl->dfl_flags = (sl->sl_flags & SL_WRITEBACK_CACHE_DISABLE) ?
              DF_WAIT_SYNC : 0;
  
          /* Use the data vnode we have to send a fop_ioctl(). */
          vp = sl->sl_data_vp;
          if (vp == NULL) {
                  cmn_err(CE_WARN, "Cannot unmap - no vnode pointer.");
                  return (EIO);
          }
  
!         ret = VOP_IOCTL(vp, DKIOCFREE, (intptr_t)dfl, FKIOCTL, kcred,
!             &unused, NULL);
! 
!         return (ret);
! }
! 
! /*
!  * Check if this lu belongs to sbd or some other lu
!  * provider. A simple check for one of the module
!  * entry points is sufficient.
!  */
! int
! sbd_is_valid_lu(stmf_lu_t *lu)
! {
!         if (lu->lu_new_task == sbd_new_task)
!                 return (1);
!         return (0);
! }
! 
! uint8_t
! sbd_get_lbasize_shift(stmf_lu_t *lu)
! {
!         sbd_lu_t *sl = (sbd_lu_t *)lu->lu_provider_private;
! 
!         return (sl->sl_data_blocksize_shift);
  }