Print this page
re #7364 rb2201 "hddisco" hangs after unplugging both cables from JBOD (and NMS too)
re #8346 rb2639 KT disk failures
re #8346 rb2639 KT disk failures
re #10443 rb3479 3.1.3 crash: BAD TRAP: type=e (#pf Page fault)
@@ -235,10 +235,11 @@
uint32_t *control, pMpi2SCSIIORequest_t frame, ddi_acc_handle_t acc_hdl);
static void mptsas_watch(void *arg);
static void mptsas_watchsubr(mptsas_t *mpt);
static void mptsas_cmd_timeout(mptsas_t *mpt, uint16_t devhdl);
+static void mptsas_kill_target(mptsas_t *mpt, mptsas_target_t *ptgt);
static void mptsas_start_passthru(mptsas_t *mpt, mptsas_cmd_t *cmd);
static int mptsas_do_passthru(mptsas_t *mpt, uint8_t *request, uint8_t *reply,
uint8_t *data, uint32_t request_size, uint32_t reply_size,
uint32_t data_size, uint32_t direction, uint8_t *dataout,
@@ -345,18 +346,14 @@
static int mptsas_get_target_device_info(mptsas_t *mpt, uint32_t page_address,
uint16_t *handle, mptsas_target_t **pptgt);
static void mptsas_update_phymask(mptsas_t *mpt);
-static int mptsas_send_sep(mptsas_t *mpt, mptsas_target_t *ptgt,
- uint32_t *status, uint8_t cmd);
static dev_info_t *mptsas_get_dip_from_dev(dev_t dev,
mptsas_phymask_t *phymask);
static mptsas_target_t *mptsas_addr_to_ptgt(mptsas_t *mpt, char *addr,
mptsas_phymask_t phymask);
-static int mptsas_set_led_status(mptsas_t *mpt, mptsas_target_t *ptgt,
- uint32_t slotstatus);
/*
* Enumeration / DR functions
*/
@@ -463,10 +460,18 @@
/*
* Tunable timeout value for Inquiry VPD page 0x83
* By default the value is 30 seconds.
*/
int mptsas_inq83_retry_timeout = 30;
+/*
+ * Maximum number of command timeouts (0 - 255) considered acceptable.
+ */
+int mptsas_timeout_threshold = 2;
+/*
+ * Timeouts exceeding threshold within this period are considered excessive.
+ */
+int mptsas_timeout_interval = 30;
/*
* This is used to allocate memory for message frame storage, not for
* data I/O DMA. All message frames must be stored in the first 4G of
* physical memory.
@@ -2616,17 +2621,14 @@
}
static void
mptsas_alloc_reply_args(mptsas_t *mpt)
{
- if (mpt->m_replyh_args != NULL) {
- kmem_free(mpt->m_replyh_args, sizeof (m_replyh_arg_t)
- * mpt->m_max_replies);
- mpt->m_replyh_args = NULL;
- }
+ if (mpt->m_replyh_args == NULL) {
mpt->m_replyh_args = kmem_zalloc(sizeof (m_replyh_arg_t) *
mpt->m_max_replies, KM_SLEEP);
+ }
}
static int
mptsas_alloc_extra_sgl_frame(mptsas_t *mpt, mptsas_cmd_t *cmd)
{
@@ -4824,10 +4826,16 @@
* just let taskq resolve ack action
* and ack would be sent in taskq thread
*/
NDBG20(("send mptsas_handle_event_sync success"));
}
+
+ if (mpt->m_in_reset) {
+ NDBG20(("dropping event received during reset"));
+ return;
+ }
+
if ((ddi_taskq_dispatch(mpt->m_event_taskq, mptsas_handle_event,
(void *)args, DDI_NOSLEEP)) != DDI_SUCCESS) {
mptsas_log(mpt, CE_WARN, "No memory available"
"for dispatch taskq");
/*
@@ -5761,12 +5769,18 @@
}
ASSERT(parent);
handle_topo_change:
mutex_enter(&mpt->m_mutex);
-
+ /*
+ * If HBA is being reset, don't perform operations depending
+ * on the IOC. We must free the topo list, however.
+ */
+ if (!mpt->m_in_reset)
mptsas_handle_topo_change(topo_node, parent);
+ else
+ NDBG20(("skipping topo change received during reset"));
save_node = topo_node;
topo_node = topo_node->next;
ASSERT(save_node);
kmem_free(save_node, sizeof (mptsas_topo_change_list_t));
mutex_exit(&mpt->m_mutex);
@@ -6052,14 +6066,10 @@
break;
}
}
mutex_enter(&mpt->m_mutex);
- if (mptsas_set_led_status(mpt, ptgt, 0) != DDI_SUCCESS) {
- NDBG14(("mptsas: clear LED for tgt %x failed",
- ptgt->m_slot_num));
- }
if (rval == DDI_SUCCESS) {
mptsas_tgt_free(&mpt->m_active->m_tgttbl,
ptgt->m_sas_wwn, ptgt->m_phymask);
ptgt = NULL;
} else {
@@ -6988,10 +6998,18 @@
replyh_arg = (m_replyh_arg_t *)args;
rfm = replyh_arg->rfm;
mpt = replyh_arg->mpt;
mutex_enter(&mpt->m_mutex);
+ /*
+ * If HBA is being reset, drop incoming event.
+ */
+ if (mpt->m_in_reset) {
+ NDBG20(("dropping event received prior to reset"));
+ mutex_exit(&mpt->m_mutex);
+ return;
+ }
eventreply = (pMpi2EventNotificationReply_t)
(mpt->m_reply_frame + (rfm - mpt->m_reply_frame_dma_addr));
event = ddi_get16(mpt->m_acc_reply_frame_hdl, &eventreply->Event);
@@ -8515,10 +8533,20 @@
reason = CMD_RESET;
stat = STAT_DEV_RESET;
switch (tasktype) {
case MPI2_SCSITASKMGMT_TASKTYPE_TARGET_RESET:
if (Tgt(cmd) == target) {
+ if (cmd->cmd_tgt_addr->m_timeout < 0) {
+ /*
+ * When timeout requested, propagate
+ * proper reason and statistics to
+ * target drivers.
+ */
+ reason = CMD_TIMEOUT;
+ stat |= STAT_TIMEOUT;
+ }
+
NDBG25(("mptsas_flush_target discovered non-"
"NULL cmd in slot %d, tasktype 0x%x", slot,
tasktype));
mptsas_dump_cmd(mpt, cmd);
mptsas_remove_cmd(mpt, cmd);
@@ -8702,10 +8730,18 @@
mptsas_set_pkt_reason(mpt, cmd, CMD_RESET, STAT_BUS_RESET);
mptsas_doneq_add(mpt, cmd);
mutex_enter(&mpt->m_tx_waitq_mutex);
}
mutex_exit(&mpt->m_tx_waitq_mutex);
+
+ /*
+ * Drain the taskqs prior to reallocating resources.
+ */
+ mutex_exit(&mpt->m_mutex);
+ ddi_taskq_wait(mpt->m_event_taskq);
+ ddi_taskq_wait(mpt->m_dr_taskq);
+ mutex_enter(&mpt->m_mutex);
}
/*
* set pkt_reason and OR in pkt_statistics flag
*/
@@ -9382,12 +9418,28 @@
continue;
}
ptgt->m_timeout -= mptsas_scsi_watchdog_tick;
+ if (ptgt->m_timeout_count > 0) {
+ ptgt->m_timeout_interval +=
+ mptsas_scsi_watchdog_tick;
+ }
+ if (ptgt->m_timeout_interval > mptsas_timeout_interval) {
+ ptgt->m_timeout_interval = 0;
+ ptgt->m_timeout_count = 0;
+ }
+
if (ptgt->m_timeout < 0) {
+ ptgt->m_timeout_count++;
+ if (ptgt->m_timeout_count >
+ mptsas_timeout_threshold) {
+ ptgt->m_timeout_count = 0;
+ mptsas_kill_target(mpt, ptgt);
+ } else {
mptsas_cmd_timeout(mpt, ptgt->m_devhdl);
+ }
ptgt = (mptsas_target_t *)mptsas_hash_traverse(
&mpt->m_active->m_tgttbl, MPTSAS_HASH_NEXT);
continue;
}
@@ -9425,10 +9477,44 @@
"recovery failed!", devhdl);
}
}
/*
+ * target causing too many timeouts
+ */
+static void
+mptsas_kill_target(mptsas_t *mpt, mptsas_target_t *ptgt)
+{
+ mptsas_topo_change_list_t *topo_node = NULL;
+
+ NDBG29(("mptsas_tgt_kill: target=%d", ptgt->m_devhdl));
+ mptsas_log(mpt, CE_WARN, "timeout threshold exceeded for "
+ "Target %d", ptgt->m_devhdl);
+
+ topo_node = kmem_zalloc(sizeof (mptsas_topo_change_list_t), KM_SLEEP);
+ topo_node->mpt = mpt;
+ topo_node->un.phymask = ptgt->m_phymask;
+ topo_node->event = MPTSAS_DR_EVENT_OFFLINE_TARGET;
+ topo_node->devhdl = ptgt->m_devhdl;
+ if (ptgt->m_deviceinfo & DEVINFO_DIRECT_ATTACHED)
+ topo_node->flags = MPTSAS_TOPO_FLAG_DIRECT_ATTACHED_DEVICE;
+ else
+ topo_node->flags = MPTSAS_TOPO_FLAG_EXPANDER_ATTACHED_DEVICE;
+ topo_node->object = NULL;
+
+ /*
+ * Launch DR taskq to fake topology change
+ */
+ if ((ddi_taskq_dispatch(mpt->m_dr_taskq,
+ mptsas_handle_dr, (void *)topo_node,
+ DDI_NOSLEEP)) != DDI_SUCCESS) {
+ mptsas_log(mpt, CE_NOTE, "mptsas start taskq "
+ "for fake offline event failed. \n");
+ }
+}
+
+/*
* Device / Hotplug control
*/
static int
mptsas_scsi_quiesce(dev_info_t *dip)
{
@@ -11324,70 +11410,12 @@
mutex_exit(&mpt->m_mutex);
}
if (iport_flag) {
status = scsi_hba_ioctl(dev, cmd, data, mode, credp, rval);
- if (status != 0) {
goto out;
}
- /*
- * The following code control the OK2RM LED, it doesn't affect
- * the ioctl return status.
- */
- if ((cmd == DEVCTL_DEVICE_ONLINE) ||
- (cmd == DEVCTL_DEVICE_OFFLINE)) {
- if (ndi_dc_allochdl((void *)data, &dcp) !=
- NDI_SUCCESS) {
- goto out;
- }
- addr = ndi_dc_getaddr(dcp);
- ptgt = mptsas_addr_to_ptgt(mpt, addr, phymask);
- if (ptgt == NULL) {
- NDBG14(("mptsas_ioctl led control: tgt %s not "
- "found", addr));
- ndi_dc_freehdl(dcp);
- goto out;
- }
- mutex_enter(&mpt->m_mutex);
- if (cmd == DEVCTL_DEVICE_ONLINE) {
- ptgt->m_tgt_unconfigured = 0;
- } else if (cmd == DEVCTL_DEVICE_OFFLINE) {
- ptgt->m_tgt_unconfigured = 1;
- }
- slotstatus = 0;
-#ifdef MPTSAS_GET_LED
- /*
- * The get led status can't get a valid/reasonable
- * state, so ignore the get led status, and write the
- * required value directly
- */
- if (mptsas_get_led_status(mpt, ptgt, &slotstatus) !=
- DDI_SUCCESS) {
- NDBG14(("mptsas_ioctl: get LED for tgt %s "
- "failed %x", addr, slotstatus));
- slotstatus = 0;
- }
- NDBG14(("mptsas_ioctl: LED status %x for %s",
- slotstatus, addr));
-#endif
- if (cmd == DEVCTL_DEVICE_OFFLINE) {
- slotstatus |=
- MPI2_SEP_REQ_SLOTSTATUS_REQUEST_REMOVE;
- } else {
- slotstatus &=
- ~MPI2_SEP_REQ_SLOTSTATUS_REQUEST_REMOVE;
- }
- if (mptsas_set_led_status(mpt, ptgt, slotstatus) !=
- DDI_SUCCESS) {
- NDBG14(("mptsas_ioctl: set LED for tgt %s "
- "failed %x", addr, slotstatus));
- }
- mutex_exit(&mpt->m_mutex);
- ndi_dc_freehdl(dcp);
- }
- goto out;
- }
switch (cmd) {
case MPTIOCTL_UPDATE_FLASH:
if (ddi_copyin((void *)data, &flashdata,
sizeof (struct mptsas_update_flash), mode)) {
status = EFAULT;
@@ -13836,14 +13864,10 @@
(void) ddi_prop_free(old_guid);
if ((!MDI_PI_IS_ONLINE(*pip)) &&
(!MDI_PI_IS_STANDBY(*pip)) &&
(ptgt->m_tgt_unconfigured == 0)) {
rval = mdi_pi_online(*pip, 0);
- mutex_enter(&mpt->m_mutex);
- (void) mptsas_set_led_status(mpt, ptgt,
- 0);
- mutex_exit(&mpt->m_mutex);
} else {
rval = DDI_SUCCESS;
}
if (rval != DDI_SUCCESS) {
mptsas_log(mpt, CE_WARN, "path:target: "
@@ -14093,19 +14117,10 @@
mdi_rtn = MDI_FAILURE;
goto virt_create_done;
}
NDBG20(("new path:%s onlining,", MDI_PI(*pip)->pi_addr));
mdi_rtn = mdi_pi_online(*pip, 0);
- if (mdi_rtn == MDI_SUCCESS) {
- mutex_enter(&mpt->m_mutex);
- if (mptsas_set_led_status(mpt, ptgt, 0) !=
- DDI_SUCCESS) {
- NDBG14(("mptsas: clear LED for slot %x "
- "failed", ptgt->m_slot_num));
- }
- mutex_exit(&mpt->m_mutex);
- }
if (mdi_rtn == MDI_NOT_SUPPORTED) {
mdi_rtn = MDI_FAILURE;
}
virt_create_done:
if (*pip && mdi_rtn != MDI_SUCCESS) {
@@ -14455,19 +14470,10 @@
/*
* Try to online the new node
*/
ndi_rtn = ndi_devi_online(*lun_dip, NDI_ONLINE_ATTACH);
}
- if (ndi_rtn == NDI_SUCCESS) {
- mutex_enter(&mpt->m_mutex);
- if (mptsas_set_led_status(mpt, ptgt, 0) !=
- DDI_SUCCESS) {
- NDBG14(("mptsas: clear LED for tgt %x "
- "failed", ptgt->m_slot_num));
- }
- mutex_exit(&mpt->m_mutex);
- }
/*
* If success set rtn flag, else unwire alloc'd lun
*/
if (ndi_rtn != NDI_SUCCESS) {
@@ -15356,83 +15362,10 @@
ptgt = mptsas_phy_to_tgt(mpt, (int)phymask, phynum);
}
return (ptgt);
}
-#ifdef MPTSAS_GET_LED
-static int
-mptsas_get_led_status(mptsas_t *mpt, mptsas_target_t *ptgt,
- uint32_t *slotstatus)
-{
- return (mptsas_send_sep(mpt, ptgt, slotstatus,
- MPI2_SEP_REQ_ACTION_READ_STATUS));
-}
-#endif
-static int
-mptsas_set_led_status(mptsas_t *mpt, mptsas_target_t *ptgt, uint32_t slotstatus)
-{
- NDBG14(("mptsas_ioctl: set LED status %x for slot %x",
- slotstatus, ptgt->m_slot_num));
- return (mptsas_send_sep(mpt, ptgt, &slotstatus,
- MPI2_SEP_REQ_ACTION_WRITE_STATUS));
-}
-/*
- * send sep request, use enclosure/slot addressing
- */
-static int mptsas_send_sep(mptsas_t *mpt, mptsas_target_t *ptgt,
- uint32_t *status, uint8_t act)
-{
- Mpi2SepRequest_t req;
- Mpi2SepReply_t rep;
- int ret;
-
- ASSERT(mutex_owned(&mpt->m_mutex));
-
- bzero(&req, sizeof (req));
- bzero(&rep, sizeof (rep));
-
- /* Do nothing for RAID volumes */
- if (ptgt->m_phymask == 0) {
- NDBG14(("mptsas_send_sep: Skip RAID volumes"));
- return (DDI_FAILURE);
- }
-
- req.Function = MPI2_FUNCTION_SCSI_ENCLOSURE_PROCESSOR;
- req.Action = act;
- req.Flags = MPI2_SEP_REQ_FLAGS_ENCLOSURE_SLOT_ADDRESS;
- req.EnclosureHandle = LE_16(ptgt->m_enclosure);
- req.Slot = LE_16(ptgt->m_slot_num);
- if (act == MPI2_SEP_REQ_ACTION_WRITE_STATUS) {
- req.SlotStatus = LE_32(*status);
- }
- ret = mptsas_do_passthru(mpt, (uint8_t *)&req, (uint8_t *)&rep, NULL,
- sizeof (req), sizeof (rep), NULL, 0, NULL, 0, 60, FKIOCTL);
- if (ret != 0) {
- mptsas_log(mpt, CE_NOTE, "mptsas_send_sep: passthru SEP "
- "Processor Request message error %d", ret);
- return (DDI_FAILURE);
- }
- /* do passthrough success, check the ioc status */
- if (LE_16(rep.IOCStatus) != MPI2_IOCSTATUS_SUCCESS) {
- if ((LE_16(rep.IOCStatus) & MPI2_IOCSTATUS_MASK) ==
- MPI2_IOCSTATUS_INVALID_FIELD) {
- mptsas_log(mpt, CE_NOTE, "send sep act %x: Not "
- "supported action, loginfo %x", act,
- LE_32(rep.IOCLogInfo));
- return (DDI_FAILURE);
- }
- mptsas_log(mpt, CE_NOTE, "send_sep act %x: ioc "
- "status:%x", act, LE_16(rep.IOCStatus));
- return (DDI_FAILURE);
- }
- if (act != MPI2_SEP_REQ_ACTION_WRITE_STATUS) {
- *status = LE_32(rep.SlotStatus);
- }
-
- return (DDI_SUCCESS);
-}
-
int
mptsas_dma_addr_create(mptsas_t *mpt, ddi_dma_attr_t dma_attr,
ddi_dma_handle_t *dma_hdp, ddi_acc_handle_t *acc_hdp, caddr_t *dma_memp,
uint32_t alloc_size, ddi_dma_cookie_t *cookiep)
{