Print this page
re #7364 rb2201 "hddisco" hangs after unplugging both cables from JBOD (and NMS too)
re #8346 rb2639 KT disk failures
re #8346 rb2639 KT disk failures
re #10443 rb3479 3.1.3 crash: BAD TRAP: type=e (#pf Page fault)

*** 235,244 **** --- 235,245 ---- uint32_t *control, pMpi2SCSIIORequest_t frame, ddi_acc_handle_t acc_hdl); static void mptsas_watch(void *arg); static void mptsas_watchsubr(mptsas_t *mpt); static void mptsas_cmd_timeout(mptsas_t *mpt, uint16_t devhdl); + static void mptsas_kill_target(mptsas_t *mpt, mptsas_target_t *ptgt); static void mptsas_start_passthru(mptsas_t *mpt, mptsas_cmd_t *cmd); static int mptsas_do_passthru(mptsas_t *mpt, uint8_t *request, uint8_t *reply, uint8_t *data, uint32_t request_size, uint32_t reply_size, uint32_t data_size, uint32_t direction, uint8_t *dataout,
*** 345,362 **** static int mptsas_get_target_device_info(mptsas_t *mpt, uint32_t page_address, uint16_t *handle, mptsas_target_t **pptgt); static void mptsas_update_phymask(mptsas_t *mpt); - static int mptsas_send_sep(mptsas_t *mpt, mptsas_target_t *ptgt, - uint32_t *status, uint8_t cmd); static dev_info_t *mptsas_get_dip_from_dev(dev_t dev, mptsas_phymask_t *phymask); static mptsas_target_t *mptsas_addr_to_ptgt(mptsas_t *mpt, char *addr, mptsas_phymask_t phymask); - static int mptsas_set_led_status(mptsas_t *mpt, mptsas_target_t *ptgt, - uint32_t slotstatus); /* * Enumeration / DR functions */ --- 346,359 ----
*** 463,472 **** --- 460,477 ---- /* * Tunable timeout value for Inquiry VPD page 0x83 * By default the value is 30 seconds. */ int mptsas_inq83_retry_timeout = 30; + /* + * Maximum number of command timeouts (0 - 255) considered acceptable. + */ + int mptsas_timeout_threshold = 2; + /* + * Timeouts exceeding threshold within this period are considered excessive. + */ + int mptsas_timeout_interval = 30; /* * This is used to allocate memory for message frame storage, not for * data I/O DMA. All message frames must be stored in the first 4G of * physical memory.
*** 2616,2632 **** } static void mptsas_alloc_reply_args(mptsas_t *mpt) { ! if (mpt->m_replyh_args != NULL) { ! kmem_free(mpt->m_replyh_args, sizeof (m_replyh_arg_t) ! * mpt->m_max_replies); ! mpt->m_replyh_args = NULL; ! } mpt->m_replyh_args = kmem_zalloc(sizeof (m_replyh_arg_t) * mpt->m_max_replies, KM_SLEEP); } static int mptsas_alloc_extra_sgl_frame(mptsas_t *mpt, mptsas_cmd_t *cmd) { --- 2621,2634 ---- } static void mptsas_alloc_reply_args(mptsas_t *mpt) { ! if (mpt->m_replyh_args == NULL) { mpt->m_replyh_args = kmem_zalloc(sizeof (m_replyh_arg_t) * mpt->m_max_replies, KM_SLEEP); + } } static int mptsas_alloc_extra_sgl_frame(mptsas_t *mpt, mptsas_cmd_t *cmd) {
*** 4824,4833 **** --- 4826,4841 ---- * just let taskq resolve ack action * and ack would be sent in taskq thread */ NDBG20(("send mptsas_handle_event_sync success")); } + + if (mpt->m_in_reset) { + NDBG20(("dropping event received during reset")); + return; + } + if ((ddi_taskq_dispatch(mpt->m_event_taskq, mptsas_handle_event, (void *)args, DDI_NOSLEEP)) != DDI_SUCCESS) { mptsas_log(mpt, CE_WARN, "No memory available" "for dispatch taskq"); /*
*** 5761,5772 **** } ASSERT(parent); handle_topo_change: mutex_enter(&mpt->m_mutex); ! mptsas_handle_topo_change(topo_node, parent); save_node = topo_node; topo_node = topo_node->next; ASSERT(save_node); kmem_free(save_node, sizeof (mptsas_topo_change_list_t)); mutex_exit(&mpt->m_mutex); --- 5769,5786 ---- } ASSERT(parent); handle_topo_change: mutex_enter(&mpt->m_mutex); ! /* ! * If HBA is being reset, don't perform operations depending ! * on the IOC. We must free the topo list, however. ! */ ! if (!mpt->m_in_reset) mptsas_handle_topo_change(topo_node, parent); + else + NDBG20(("skipping topo change received during reset")); save_node = topo_node; topo_node = topo_node->next; ASSERT(save_node); kmem_free(save_node, sizeof (mptsas_topo_change_list_t)); mutex_exit(&mpt->m_mutex);
*** 6052,6065 **** break; } } mutex_enter(&mpt->m_mutex); - if (mptsas_set_led_status(mpt, ptgt, 0) != DDI_SUCCESS) { - NDBG14(("mptsas: clear LED for tgt %x failed", - ptgt->m_slot_num)); - } if (rval == DDI_SUCCESS) { mptsas_tgt_free(&mpt->m_active->m_tgttbl, ptgt->m_sas_wwn, ptgt->m_phymask); ptgt = NULL; } else { --- 6066,6075 ----
*** 6988,6997 **** --- 6998,7015 ---- replyh_arg = (m_replyh_arg_t *)args; rfm = replyh_arg->rfm; mpt = replyh_arg->mpt; mutex_enter(&mpt->m_mutex); + /* + * If HBA is being reset, drop incoming event. + */ + if (mpt->m_in_reset) { + NDBG20(("dropping event received prior to reset")); + mutex_exit(&mpt->m_mutex); + return; + } eventreply = (pMpi2EventNotificationReply_t) (mpt->m_reply_frame + (rfm - mpt->m_reply_frame_dma_addr)); event = ddi_get16(mpt->m_acc_reply_frame_hdl, &eventreply->Event);
*** 8515,8524 **** --- 8533,8552 ---- reason = CMD_RESET; stat = STAT_DEV_RESET; switch (tasktype) { case MPI2_SCSITASKMGMT_TASKTYPE_TARGET_RESET: if (Tgt(cmd) == target) { + if (cmd->cmd_tgt_addr->m_timeout < 0) { + /* + * When timeout requested, propagate + * proper reason and statistics to + * target drivers. + */ + reason = CMD_TIMEOUT; + stat |= STAT_TIMEOUT; + } + NDBG25(("mptsas_flush_target discovered non-" "NULL cmd in slot %d, tasktype 0x%x", slot, tasktype)); mptsas_dump_cmd(mpt, cmd); mptsas_remove_cmd(mpt, cmd);
*** 8702,8711 **** --- 8730,8747 ---- mptsas_set_pkt_reason(mpt, cmd, CMD_RESET, STAT_BUS_RESET); mptsas_doneq_add(mpt, cmd); mutex_enter(&mpt->m_tx_waitq_mutex); } mutex_exit(&mpt->m_tx_waitq_mutex); + + /* + * Drain the taskqs prior to reallocating resources. + */ + mutex_exit(&mpt->m_mutex); + ddi_taskq_wait(mpt->m_event_taskq); + ddi_taskq_wait(mpt->m_dr_taskq); + mutex_enter(&mpt->m_mutex); } /* * set pkt_reason and OR in pkt_statistics flag */
*** 9382,9393 **** --- 9418,9445 ---- continue; } ptgt->m_timeout -= mptsas_scsi_watchdog_tick; + if (ptgt->m_timeout_count > 0) { + ptgt->m_timeout_interval += + mptsas_scsi_watchdog_tick; + } + if (ptgt->m_timeout_interval > mptsas_timeout_interval) { + ptgt->m_timeout_interval = 0; + ptgt->m_timeout_count = 0; + } + if (ptgt->m_timeout < 0) { + ptgt->m_timeout_count++; + if (ptgt->m_timeout_count > + mptsas_timeout_threshold) { + ptgt->m_timeout_count = 0; + mptsas_kill_target(mpt, ptgt); + } else { mptsas_cmd_timeout(mpt, ptgt->m_devhdl); + } ptgt = (mptsas_target_t *)mptsas_hash_traverse( &mpt->m_active->m_tgttbl, MPTSAS_HASH_NEXT); continue; }
*** 9425,9434 **** --- 9477,9520 ---- "recovery failed!", devhdl); } } /* + * target causing too many timeouts + */ + static void + mptsas_kill_target(mptsas_t *mpt, mptsas_target_t *ptgt) + { + mptsas_topo_change_list_t *topo_node = NULL; + + NDBG29(("mptsas_tgt_kill: target=%d", ptgt->m_devhdl)); + mptsas_log(mpt, CE_WARN, "timeout threshold exceeded for " + "Target %d", ptgt->m_devhdl); + + topo_node = kmem_zalloc(sizeof (mptsas_topo_change_list_t), KM_SLEEP); + topo_node->mpt = mpt; + topo_node->un.phymask = ptgt->m_phymask; + topo_node->event = MPTSAS_DR_EVENT_OFFLINE_TARGET; + topo_node->devhdl = ptgt->m_devhdl; + if (ptgt->m_deviceinfo & DEVINFO_DIRECT_ATTACHED) + topo_node->flags = MPTSAS_TOPO_FLAG_DIRECT_ATTACHED_DEVICE; + else + topo_node->flags = MPTSAS_TOPO_FLAG_EXPANDER_ATTACHED_DEVICE; + topo_node->object = NULL; + + /* + * Launch DR taskq to fake topology change + */ + if ((ddi_taskq_dispatch(mpt->m_dr_taskq, + mptsas_handle_dr, (void *)topo_node, + DDI_NOSLEEP)) != DDI_SUCCESS) { + mptsas_log(mpt, CE_NOTE, "mptsas start taskq " + "for fake offline event failed. \n"); + } + } + + /* * Device / Hotplug control */ static int mptsas_scsi_quiesce(dev_info_t *dip) {
*** 11324,11393 **** mutex_exit(&mpt->m_mutex); } if (iport_flag) { status = scsi_hba_ioctl(dev, cmd, data, mode, credp, rval); - if (status != 0) { goto out; } - /* - * The following code control the OK2RM LED, it doesn't affect - * the ioctl return status. - */ - if ((cmd == DEVCTL_DEVICE_ONLINE) || - (cmd == DEVCTL_DEVICE_OFFLINE)) { - if (ndi_dc_allochdl((void *)data, &dcp) != - NDI_SUCCESS) { - goto out; - } - addr = ndi_dc_getaddr(dcp); - ptgt = mptsas_addr_to_ptgt(mpt, addr, phymask); - if (ptgt == NULL) { - NDBG14(("mptsas_ioctl led control: tgt %s not " - "found", addr)); - ndi_dc_freehdl(dcp); - goto out; - } - mutex_enter(&mpt->m_mutex); - if (cmd == DEVCTL_DEVICE_ONLINE) { - ptgt->m_tgt_unconfigured = 0; - } else if (cmd == DEVCTL_DEVICE_OFFLINE) { - ptgt->m_tgt_unconfigured = 1; - } - slotstatus = 0; - #ifdef MPTSAS_GET_LED - /* - * The get led status can't get a valid/reasonable - * state, so ignore the get led status, and write the - * required value directly - */ - if (mptsas_get_led_status(mpt, ptgt, &slotstatus) != - DDI_SUCCESS) { - NDBG14(("mptsas_ioctl: get LED for tgt %s " - "failed %x", addr, slotstatus)); - slotstatus = 0; - } - NDBG14(("mptsas_ioctl: LED status %x for %s", - slotstatus, addr)); - #endif - if (cmd == DEVCTL_DEVICE_OFFLINE) { - slotstatus |= - MPI2_SEP_REQ_SLOTSTATUS_REQUEST_REMOVE; - } else { - slotstatus &= - ~MPI2_SEP_REQ_SLOTSTATUS_REQUEST_REMOVE; - } - if (mptsas_set_led_status(mpt, ptgt, slotstatus) != - DDI_SUCCESS) { - NDBG14(("mptsas_ioctl: set LED for tgt %s " - "failed %x", addr, slotstatus)); - } - mutex_exit(&mpt->m_mutex); - ndi_dc_freehdl(dcp); - } - goto out; - } switch (cmd) { case MPTIOCTL_UPDATE_FLASH: if (ddi_copyin((void *)data, &flashdata, sizeof (struct mptsas_update_flash), mode)) { status = EFAULT; --- 11410,11421 ----
*** 13836,13849 **** (void) ddi_prop_free(old_guid); if ((!MDI_PI_IS_ONLINE(*pip)) && (!MDI_PI_IS_STANDBY(*pip)) && (ptgt->m_tgt_unconfigured == 0)) { rval = mdi_pi_online(*pip, 0); - mutex_enter(&mpt->m_mutex); - (void) mptsas_set_led_status(mpt, ptgt, - 0); - mutex_exit(&mpt->m_mutex); } else { rval = DDI_SUCCESS; } if (rval != DDI_SUCCESS) { mptsas_log(mpt, CE_WARN, "path:target: " --- 13864,13873 ----
*** 14093,14111 **** mdi_rtn = MDI_FAILURE; goto virt_create_done; } NDBG20(("new path:%s onlining,", MDI_PI(*pip)->pi_addr)); mdi_rtn = mdi_pi_online(*pip, 0); - if (mdi_rtn == MDI_SUCCESS) { - mutex_enter(&mpt->m_mutex); - if (mptsas_set_led_status(mpt, ptgt, 0) != - DDI_SUCCESS) { - NDBG14(("mptsas: clear LED for slot %x " - "failed", ptgt->m_slot_num)); - } - mutex_exit(&mpt->m_mutex); - } if (mdi_rtn == MDI_NOT_SUPPORTED) { mdi_rtn = MDI_FAILURE; } virt_create_done: if (*pip && mdi_rtn != MDI_SUCCESS) { --- 14117,14126 ----
*** 14455,14473 **** /* * Try to online the new node */ ndi_rtn = ndi_devi_online(*lun_dip, NDI_ONLINE_ATTACH); } - if (ndi_rtn == NDI_SUCCESS) { - mutex_enter(&mpt->m_mutex); - if (mptsas_set_led_status(mpt, ptgt, 0) != - DDI_SUCCESS) { - NDBG14(("mptsas: clear LED for tgt %x " - "failed", ptgt->m_slot_num)); - } - mutex_exit(&mpt->m_mutex); - } /* * If success set rtn flag, else unwire alloc'd lun */ if (ndi_rtn != NDI_SUCCESS) { --- 14470,14479 ----
*** 15356,15438 **** ptgt = mptsas_phy_to_tgt(mpt, (int)phymask, phynum); } return (ptgt); } - #ifdef MPTSAS_GET_LED - static int - mptsas_get_led_status(mptsas_t *mpt, mptsas_target_t *ptgt, - uint32_t *slotstatus) - { - return (mptsas_send_sep(mpt, ptgt, slotstatus, - MPI2_SEP_REQ_ACTION_READ_STATUS)); - } - #endif - static int - mptsas_set_led_status(mptsas_t *mpt, mptsas_target_t *ptgt, uint32_t slotstatus) - { - NDBG14(("mptsas_ioctl: set LED status %x for slot %x", - slotstatus, ptgt->m_slot_num)); - return (mptsas_send_sep(mpt, ptgt, &slotstatus, - MPI2_SEP_REQ_ACTION_WRITE_STATUS)); - } - /* - * send sep request, use enclosure/slot addressing - */ - static int mptsas_send_sep(mptsas_t *mpt, mptsas_target_t *ptgt, - uint32_t *status, uint8_t act) - { - Mpi2SepRequest_t req; - Mpi2SepReply_t rep; - int ret; - - ASSERT(mutex_owned(&mpt->m_mutex)); - - bzero(&req, sizeof (req)); - bzero(&rep, sizeof (rep)); - - /* Do nothing for RAID volumes */ - if (ptgt->m_phymask == 0) { - NDBG14(("mptsas_send_sep: Skip RAID volumes")); - return (DDI_FAILURE); - } - - req.Function = MPI2_FUNCTION_SCSI_ENCLOSURE_PROCESSOR; - req.Action = act; - req.Flags = MPI2_SEP_REQ_FLAGS_ENCLOSURE_SLOT_ADDRESS; - req.EnclosureHandle = LE_16(ptgt->m_enclosure); - req.Slot = LE_16(ptgt->m_slot_num); - if (act == MPI2_SEP_REQ_ACTION_WRITE_STATUS) { - req.SlotStatus = LE_32(*status); - } - ret = mptsas_do_passthru(mpt, (uint8_t *)&req, (uint8_t *)&rep, NULL, - sizeof (req), sizeof (rep), NULL, 0, NULL, 0, 60, FKIOCTL); - if (ret != 0) { - mptsas_log(mpt, CE_NOTE, "mptsas_send_sep: passthru SEP " - "Processor Request message error %d", ret); - return (DDI_FAILURE); - } - /* do passthrough success, check the ioc status */ - if (LE_16(rep.IOCStatus) != MPI2_IOCSTATUS_SUCCESS) { - if ((LE_16(rep.IOCStatus) & MPI2_IOCSTATUS_MASK) == - MPI2_IOCSTATUS_INVALID_FIELD) { - mptsas_log(mpt, CE_NOTE, "send sep act %x: Not " - "supported action, loginfo %x", act, - LE_32(rep.IOCLogInfo)); - return (DDI_FAILURE); - } - mptsas_log(mpt, CE_NOTE, "send_sep act %x: ioc " - "status:%x", act, LE_16(rep.IOCStatus)); - return (DDI_FAILURE); - } - if (act != MPI2_SEP_REQ_ACTION_WRITE_STATUS) { - *status = LE_32(rep.SlotStatus); - } - - return (DDI_SUCCESS); - } - int mptsas_dma_addr_create(mptsas_t *mpt, ddi_dma_attr_t dma_attr, ddi_dma_handle_t *dma_hdp, ddi_acc_handle_t *acc_hdp, caddr_t *dma_memp, uint32_t alloc_size, ddi_dma_cookie_t *cookiep) { --- 15362,15371 ----