Print this page
re #7364 rb2201 "hddisco" hangs after unplugging both cables from JBOD (and NMS too)
re #8346 rb2639 KT disk failures
re #8346 rb2639 KT disk failures
re #10443 rb3479 3.1.3 crash: BAD TRAP: type=e (#pf Page fault)
        
@@ -235,10 +235,11 @@
     uint32_t *control, pMpi2SCSIIORequest_t frame, ddi_acc_handle_t acc_hdl);
 
 static void mptsas_watch(void *arg);
 static void mptsas_watchsubr(mptsas_t *mpt);
 static void mptsas_cmd_timeout(mptsas_t *mpt, uint16_t devhdl);
+static void mptsas_kill_target(mptsas_t *mpt, mptsas_target_t *ptgt);
 
 static void mptsas_start_passthru(mptsas_t *mpt, mptsas_cmd_t *cmd);
 static int mptsas_do_passthru(mptsas_t *mpt, uint8_t *request, uint8_t *reply,
     uint8_t *data, uint32_t request_size, uint32_t reply_size,
     uint32_t data_size, uint32_t direction, uint8_t *dataout,
@@ -345,18 +346,14 @@
 
 static int mptsas_get_target_device_info(mptsas_t *mpt, uint32_t page_address,
     uint16_t *handle, mptsas_target_t **pptgt);
 static void mptsas_update_phymask(mptsas_t *mpt);
 
-static int mptsas_send_sep(mptsas_t *mpt, mptsas_target_t *ptgt,
-    uint32_t *status, uint8_t cmd);
 static dev_info_t *mptsas_get_dip_from_dev(dev_t dev,
     mptsas_phymask_t *phymask);
 static mptsas_target_t *mptsas_addr_to_ptgt(mptsas_t *mpt, char *addr,
     mptsas_phymask_t phymask);
-static int mptsas_set_led_status(mptsas_t *mpt, mptsas_target_t *ptgt,
-    uint32_t slotstatus);
 
 
 /*
  * Enumeration / DR functions
  */
@@ -463,10 +460,18 @@
 /*
  * Tunable timeout value for Inquiry VPD page 0x83
  * By default the value is 30 seconds.
  */
 int mptsas_inq83_retry_timeout = 30;
+/*
+ * Maximum number of command timeouts (0 - 255) considered acceptable.
+ */
+int mptsas_timeout_threshold = 2;
+/*
+ * Timeouts exceeding threshold within this period are considered excessive.
+ */
+int mptsas_timeout_interval = 30;
 
 /*
  * This is used to allocate memory for message frame storage, not for
  * data I/O DMA. All message frames must be stored in the first 4G of
  * physical memory.
@@ -2616,17 +2621,14 @@
 }
 
 static void
 mptsas_alloc_reply_args(mptsas_t *mpt)
 {
-        if (mpt->m_replyh_args != NULL) {
-                kmem_free(mpt->m_replyh_args, sizeof (m_replyh_arg_t)
-                    * mpt->m_max_replies);
-                mpt->m_replyh_args = NULL;
-        }
+        if (mpt->m_replyh_args == NULL) {
         mpt->m_replyh_args = kmem_zalloc(sizeof (m_replyh_arg_t) *
             mpt->m_max_replies, KM_SLEEP);
+        }
 }
 
 static int
 mptsas_alloc_extra_sgl_frame(mptsas_t *mpt, mptsas_cmd_t *cmd)
 {
@@ -4824,10 +4826,16 @@
                          * just let taskq resolve ack action
                          * and ack would be sent in taskq thread
                          */
                         NDBG20(("send mptsas_handle_event_sync success"));
                 }
+
+                if (mpt->m_in_reset) {
+                        NDBG20(("dropping event received during reset"));
+                        return;
+                }
+
                 if ((ddi_taskq_dispatch(mpt->m_event_taskq, mptsas_handle_event,
                     (void *)args, DDI_NOSLEEP)) != DDI_SUCCESS) {
                         mptsas_log(mpt, CE_WARN, "No memory available"
                         "for dispatch taskq");
                         /*
@@ -5761,12 +5769,18 @@
                 }
                 ASSERT(parent);
 handle_topo_change:
 
                 mutex_enter(&mpt->m_mutex);
-
+                /*
+                 * If HBA is being reset, don't perform operations depending
+                 * on the IOC. We must free the topo list, however.
+                 */
+                if (!mpt->m_in_reset)
                 mptsas_handle_topo_change(topo_node, parent);
+                else
+                        NDBG20(("skipping topo change received during reset"));
                 save_node = topo_node;
                 topo_node = topo_node->next;
                 ASSERT(save_node);
                 kmem_free(save_node, sizeof (mptsas_topo_change_list_t));
                 mutex_exit(&mpt->m_mutex);
@@ -6052,14 +6066,10 @@
                                 break;
                         }
                 }
 
                 mutex_enter(&mpt->m_mutex);
-                if (mptsas_set_led_status(mpt, ptgt, 0) != DDI_SUCCESS) {
-                        NDBG14(("mptsas: clear LED for tgt %x failed",
-                            ptgt->m_slot_num));
-                }
                 if (rval == DDI_SUCCESS) {
                         mptsas_tgt_free(&mpt->m_active->m_tgttbl,
                             ptgt->m_sas_wwn, ptgt->m_phymask);
                         ptgt = NULL;
                 } else {
@@ -6988,10 +6998,18 @@
         replyh_arg = (m_replyh_arg_t *)args;
         rfm = replyh_arg->rfm;
         mpt = replyh_arg->mpt;
 
         mutex_enter(&mpt->m_mutex);
+        /*
+         * If HBA is being reset, drop incoming event.
+         */
+        if (mpt->m_in_reset) {
+                NDBG20(("dropping event received prior to reset"));
+                mutex_exit(&mpt->m_mutex);
+                return;
+        }
 
         eventreply = (pMpi2EventNotificationReply_t)
             (mpt->m_reply_frame + (rfm - mpt->m_reply_frame_dma_addr));
         event = ddi_get16(mpt->m_acc_reply_frame_hdl, &eventreply->Event);
 
@@ -8515,10 +8533,20 @@
                 reason = CMD_RESET;
                 stat = STAT_DEV_RESET;
                 switch (tasktype) {
                 case MPI2_SCSITASKMGMT_TASKTYPE_TARGET_RESET:
                         if (Tgt(cmd) == target) {
+                                if (cmd->cmd_tgt_addr->m_timeout < 0) {
+                                        /*
+                                         * When timeout requested, propagate
+                                         * proper reason and statistics to
+                                         * target drivers.
+                                         */
+                                        reason = CMD_TIMEOUT;
+                                        stat |= STAT_TIMEOUT;
+                                }
+                                
                                 NDBG25(("mptsas_flush_target discovered non-"
                                     "NULL cmd in slot %d, tasktype 0x%x", slot,
                                     tasktype));
                                 mptsas_dump_cmd(mpt, cmd);
                                 mptsas_remove_cmd(mpt, cmd);
@@ -8702,10 +8730,18 @@
                 mptsas_set_pkt_reason(mpt, cmd, CMD_RESET, STAT_BUS_RESET);
                 mptsas_doneq_add(mpt, cmd);
                 mutex_enter(&mpt->m_tx_waitq_mutex);
         }
         mutex_exit(&mpt->m_tx_waitq_mutex);
+
+        /*
+         * Drain the taskqs prior to reallocating resources.
+         */
+        mutex_exit(&mpt->m_mutex);
+        ddi_taskq_wait(mpt->m_event_taskq);
+        ddi_taskq_wait(mpt->m_dr_taskq);
+        mutex_enter(&mpt->m_mutex);
 }
 
 /*
  * set pkt_reason and OR in pkt_statistics flag
  */
@@ -9382,12 +9418,28 @@
                                 continue;
                         }
 
                         ptgt->m_timeout -= mptsas_scsi_watchdog_tick;
 
+                        if (ptgt->m_timeout_count > 0) {
+                                ptgt->m_timeout_interval +=
+                                    mptsas_scsi_watchdog_tick;
+                        }
+                        if (ptgt->m_timeout_interval > mptsas_timeout_interval) {
+                                ptgt->m_timeout_interval = 0;
+                                ptgt->m_timeout_count = 0;
+                        }
+
                         if (ptgt->m_timeout < 0) {
+                                ptgt->m_timeout_count++;
+                                if (ptgt->m_timeout_count >
+                                    mptsas_timeout_threshold) {
+                                        ptgt->m_timeout_count = 0;
+                                        mptsas_kill_target(mpt, ptgt);
+                                } else {
                                 mptsas_cmd_timeout(mpt, ptgt->m_devhdl);
+                                }
                                 ptgt = (mptsas_target_t *)mptsas_hash_traverse(
                                     &mpt->m_active->m_tgttbl, MPTSAS_HASH_NEXT);
                                 continue;
                         }
 
@@ -9425,10 +9477,44 @@
                     "recovery failed!", devhdl);
         }
 }
 
 /*
+ * target causing too many timeouts
+ */
+static void
+mptsas_kill_target(mptsas_t *mpt, mptsas_target_t *ptgt)
+{
+        mptsas_topo_change_list_t       *topo_node = NULL;
+
+        NDBG29(("mptsas_tgt_kill: target=%d", ptgt->m_devhdl));
+        mptsas_log(mpt, CE_WARN, "timeout threshold exceeded for "
+            "Target %d", ptgt->m_devhdl);
+
+        topo_node = kmem_zalloc(sizeof (mptsas_topo_change_list_t), KM_SLEEP);
+        topo_node->mpt = mpt;
+        topo_node->un.phymask = ptgt->m_phymask;
+        topo_node->event = MPTSAS_DR_EVENT_OFFLINE_TARGET;
+        topo_node->devhdl = ptgt->m_devhdl;
+        if (ptgt->m_deviceinfo & DEVINFO_DIRECT_ATTACHED)
+                topo_node->flags = MPTSAS_TOPO_FLAG_DIRECT_ATTACHED_DEVICE;
+        else
+                topo_node->flags = MPTSAS_TOPO_FLAG_EXPANDER_ATTACHED_DEVICE;
+        topo_node->object = NULL;
+
+        /*
+         * Launch DR taskq to fake topology change
+         */
+        if ((ddi_taskq_dispatch(mpt->m_dr_taskq,
+            mptsas_handle_dr, (void *)topo_node,
+            DDI_NOSLEEP)) != DDI_SUCCESS) {
+                mptsas_log(mpt, CE_NOTE, "mptsas start taskq "
+                    "for fake offline event failed. \n");
+        }
+}
+
+/*
  * Device / Hotplug control
  */
 static int
 mptsas_scsi_quiesce(dev_info_t *dip)
 {
@@ -11324,70 +11410,12 @@
                 mutex_exit(&mpt->m_mutex);
         }
 
         if (iport_flag) {
                 status = scsi_hba_ioctl(dev, cmd, data, mode, credp, rval);
-                if (status != 0) {
                         goto out;
                 }
-                /*
-                 * The following code control the OK2RM LED, it doesn't affect
-                 * the ioctl return status.
-                 */
-                if ((cmd == DEVCTL_DEVICE_ONLINE) ||
-                    (cmd == DEVCTL_DEVICE_OFFLINE)) {
-                        if (ndi_dc_allochdl((void *)data, &dcp) !=
-                            NDI_SUCCESS) {
-                                goto out;
-                        }
-                        addr = ndi_dc_getaddr(dcp);
-                        ptgt = mptsas_addr_to_ptgt(mpt, addr, phymask);
-                        if (ptgt == NULL) {
-                                NDBG14(("mptsas_ioctl led control: tgt %s not "
-                                    "found", addr));
-                                ndi_dc_freehdl(dcp);
-                                goto out;
-                        }
-                        mutex_enter(&mpt->m_mutex);
-                        if (cmd == DEVCTL_DEVICE_ONLINE) {
-                                ptgt->m_tgt_unconfigured = 0;
-                        } else if (cmd == DEVCTL_DEVICE_OFFLINE) {
-                                ptgt->m_tgt_unconfigured = 1;
-                        }
-                        slotstatus = 0;
-#ifdef MPTSAS_GET_LED
-                        /*
-                         * The get led status can't get a valid/reasonable
-                         * state, so ignore the get led status, and write the
-                         * required value directly
-                         */
-                        if (mptsas_get_led_status(mpt, ptgt, &slotstatus) !=
-                            DDI_SUCCESS) {
-                                NDBG14(("mptsas_ioctl: get LED for tgt %s "
-                                    "failed %x", addr, slotstatus));
-                                slotstatus = 0;
-                        }
-                        NDBG14(("mptsas_ioctl: LED status %x for %s",
-                            slotstatus, addr));
-#endif
-                        if (cmd == DEVCTL_DEVICE_OFFLINE) {
-                                slotstatus |=
-                                    MPI2_SEP_REQ_SLOTSTATUS_REQUEST_REMOVE;
-                        } else {
-                                slotstatus &=
-                                    ~MPI2_SEP_REQ_SLOTSTATUS_REQUEST_REMOVE;
-                        }
-                        if (mptsas_set_led_status(mpt, ptgt, slotstatus) !=
-                            DDI_SUCCESS) {
-                                NDBG14(("mptsas_ioctl: set LED for tgt %s "
-                                    "failed %x", addr, slotstatus));
-                        }
-                        mutex_exit(&mpt->m_mutex);
-                        ndi_dc_freehdl(dcp);
-                }
-                goto out;
-        }
         switch (cmd) {
                 case MPTIOCTL_UPDATE_FLASH:
                         if (ddi_copyin((void *)data, &flashdata,
                                 sizeof (struct mptsas_update_flash), mode)) {
                                 status = EFAULT;
@@ -13836,14 +13864,10 @@
                                 (void) ddi_prop_free(old_guid);
                                 if ((!MDI_PI_IS_ONLINE(*pip)) &&
                                     (!MDI_PI_IS_STANDBY(*pip)) &&
                                     (ptgt->m_tgt_unconfigured == 0)) {
                                         rval = mdi_pi_online(*pip, 0);
-                                        mutex_enter(&mpt->m_mutex);
-                                        (void) mptsas_set_led_status(mpt, ptgt,
-                                            0);
-                                        mutex_exit(&mpt->m_mutex);
                                 } else {
                                         rval = DDI_SUCCESS;
                                 }
                                 if (rval != DDI_SUCCESS) {
                                         mptsas_log(mpt, CE_WARN, "path:target: "
@@ -14093,19 +14117,10 @@
                         mdi_rtn = MDI_FAILURE;
                         goto virt_create_done;
                 }
                 NDBG20(("new path:%s onlining,", MDI_PI(*pip)->pi_addr));
                 mdi_rtn = mdi_pi_online(*pip, 0);
-                if (mdi_rtn == MDI_SUCCESS) {
-                        mutex_enter(&mpt->m_mutex);
-                        if (mptsas_set_led_status(mpt, ptgt, 0) !=
-                            DDI_SUCCESS) {
-                                NDBG14(("mptsas: clear LED for slot %x "
-                                    "failed", ptgt->m_slot_num));
-                        }
-                        mutex_exit(&mpt->m_mutex);
-                }
                 if (mdi_rtn == MDI_NOT_SUPPORTED) {
                         mdi_rtn = MDI_FAILURE;
                 }
 virt_create_done:
                 if (*pip && mdi_rtn != MDI_SUCCESS) {
@@ -14455,19 +14470,10 @@
                         /*
                          * Try to online the new node
                          */
                         ndi_rtn = ndi_devi_online(*lun_dip, NDI_ONLINE_ATTACH);
                 }
-                if (ndi_rtn == NDI_SUCCESS) {
-                        mutex_enter(&mpt->m_mutex);
-                        if (mptsas_set_led_status(mpt, ptgt, 0) !=
-                            DDI_SUCCESS) {
-                                NDBG14(("mptsas: clear LED for tgt %x "
-                                    "failed", ptgt->m_slot_num));
-                        }
-                        mutex_exit(&mpt->m_mutex);
-                }
 
                 /*
                  * If success set rtn flag, else unwire alloc'd lun
                  */
                 if (ndi_rtn != NDI_SUCCESS) {
@@ -15356,83 +15362,10 @@
                 ptgt = mptsas_phy_to_tgt(mpt, (int)phymask, phynum);
         }
         return (ptgt);
 }
 
-#ifdef MPTSAS_GET_LED
-static int
-mptsas_get_led_status(mptsas_t *mpt, mptsas_target_t *ptgt,
-    uint32_t *slotstatus)
-{
-        return (mptsas_send_sep(mpt, ptgt, slotstatus,
-            MPI2_SEP_REQ_ACTION_READ_STATUS));
-}
-#endif
-static int
-mptsas_set_led_status(mptsas_t *mpt, mptsas_target_t *ptgt, uint32_t slotstatus)
-{
-        NDBG14(("mptsas_ioctl: set LED status %x for slot %x",
-            slotstatus, ptgt->m_slot_num));
-        return (mptsas_send_sep(mpt, ptgt, &slotstatus,
-            MPI2_SEP_REQ_ACTION_WRITE_STATUS));
-}
-/*
- *  send sep request, use enclosure/slot addressing
- */
-static int mptsas_send_sep(mptsas_t *mpt, mptsas_target_t *ptgt,
-    uint32_t *status, uint8_t act)
-{
-        Mpi2SepRequest_t        req;
-        Mpi2SepReply_t          rep;
-        int                     ret;
-
-        ASSERT(mutex_owned(&mpt->m_mutex));
-
-        bzero(&req, sizeof (req));
-        bzero(&rep, sizeof (rep));
-
-        /* Do nothing for RAID volumes */
-        if (ptgt->m_phymask == 0) {
-                NDBG14(("mptsas_send_sep: Skip RAID volumes"));
-                return (DDI_FAILURE);
-        }
-
-        req.Function = MPI2_FUNCTION_SCSI_ENCLOSURE_PROCESSOR;
-        req.Action = act;
-        req.Flags = MPI2_SEP_REQ_FLAGS_ENCLOSURE_SLOT_ADDRESS;
-        req.EnclosureHandle = LE_16(ptgt->m_enclosure);
-        req.Slot = LE_16(ptgt->m_slot_num);
-        if (act == MPI2_SEP_REQ_ACTION_WRITE_STATUS) {
-                req.SlotStatus = LE_32(*status);
-        }
-        ret = mptsas_do_passthru(mpt, (uint8_t *)&req, (uint8_t *)&rep, NULL,
-            sizeof (req), sizeof (rep), NULL, 0, NULL, 0, 60, FKIOCTL);
-        if (ret != 0) {
-                mptsas_log(mpt, CE_NOTE, "mptsas_send_sep: passthru SEP "
-                    "Processor Request message error %d", ret);
-                return (DDI_FAILURE);
-        }
-        /* do passthrough success, check the ioc status */
-        if (LE_16(rep.IOCStatus) != MPI2_IOCSTATUS_SUCCESS) {
-                if ((LE_16(rep.IOCStatus) & MPI2_IOCSTATUS_MASK) ==
-                    MPI2_IOCSTATUS_INVALID_FIELD) {
-                        mptsas_log(mpt, CE_NOTE, "send sep act %x: Not "
-                            "supported action, loginfo %x", act,
-                            LE_32(rep.IOCLogInfo));
-                        return (DDI_FAILURE);
-                }
-                mptsas_log(mpt, CE_NOTE, "send_sep act %x: ioc "
-                    "status:%x", act, LE_16(rep.IOCStatus));
-                return (DDI_FAILURE);
-        }
-        if (act != MPI2_SEP_REQ_ACTION_WRITE_STATUS) {
-                *status = LE_32(rep.SlotStatus);
-        }
-
-        return (DDI_SUCCESS);
-}
-
 int
 mptsas_dma_addr_create(mptsas_t *mpt, ddi_dma_attr_t dma_attr,
     ddi_dma_handle_t *dma_hdp, ddi_acc_handle_t *acc_hdp, caddr_t *dma_memp,
     uint32_t alloc_size, ddi_dma_cookie_t *cookiep)
 {