Print this page
re #7364 rb2201 "hddisco" hangs after unplugging both cables from JBOD (and NMS too)
re #8346 rb2639 KT disk failures
re #8346 rb2639 KT disk failures
re #10443 rb3479 3.1.3 crash: BAD TRAP: type=e (#pf Page fault)
        
*** 235,244 ****
--- 235,245 ----
      uint32_t *control, pMpi2SCSIIORequest_t frame, ddi_acc_handle_t acc_hdl);
  
  static void mptsas_watch(void *arg);
  static void mptsas_watchsubr(mptsas_t *mpt);
  static void mptsas_cmd_timeout(mptsas_t *mpt, uint16_t devhdl);
+ static void mptsas_kill_target(mptsas_t *mpt, mptsas_target_t *ptgt);
  
  static void mptsas_start_passthru(mptsas_t *mpt, mptsas_cmd_t *cmd);
  static int mptsas_do_passthru(mptsas_t *mpt, uint8_t *request, uint8_t *reply,
      uint8_t *data, uint32_t request_size, uint32_t reply_size,
      uint32_t data_size, uint32_t direction, uint8_t *dataout,
*** 345,362 ****
  
  static int mptsas_get_target_device_info(mptsas_t *mpt, uint32_t page_address,
      uint16_t *handle, mptsas_target_t **pptgt);
  static void mptsas_update_phymask(mptsas_t *mpt);
  
- static int mptsas_send_sep(mptsas_t *mpt, mptsas_target_t *ptgt,
-     uint32_t *status, uint8_t cmd);
  static dev_info_t *mptsas_get_dip_from_dev(dev_t dev,
      mptsas_phymask_t *phymask);
  static mptsas_target_t *mptsas_addr_to_ptgt(mptsas_t *mpt, char *addr,
      mptsas_phymask_t phymask);
- static int mptsas_set_led_status(mptsas_t *mpt, mptsas_target_t *ptgt,
-     uint32_t slotstatus);
  
  
  /*
   * Enumeration / DR functions
   */
--- 346,359 ----
*** 463,472 ****
--- 460,477 ----
  /*
   * Tunable timeout value for Inquiry VPD page 0x83
   * By default the value is 30 seconds.
   */
  int mptsas_inq83_retry_timeout = 30;
+ /*
+  * Maximum number of command timeouts (0 - 255) considered acceptable.
+  */
+ int mptsas_timeout_threshold = 2;
+ /*
+  * Timeouts exceeding threshold within this period are considered excessive.
+  */
+ int mptsas_timeout_interval = 30;
  
  /*
   * This is used to allocate memory for message frame storage, not for
   * data I/O DMA. All message frames must be stored in the first 4G of
   * physical memory.
*** 2616,2632 ****
  }
  
  static void
  mptsas_alloc_reply_args(mptsas_t *mpt)
  {
!         if (mpt->m_replyh_args != NULL) {
!                 kmem_free(mpt->m_replyh_args, sizeof (m_replyh_arg_t)
!                     * mpt->m_max_replies);
!                 mpt->m_replyh_args = NULL;
!         }
          mpt->m_replyh_args = kmem_zalloc(sizeof (m_replyh_arg_t) *
              mpt->m_max_replies, KM_SLEEP);
  }
  
  static int
  mptsas_alloc_extra_sgl_frame(mptsas_t *mpt, mptsas_cmd_t *cmd)
  {
--- 2621,2634 ----
  }
  
  static void
  mptsas_alloc_reply_args(mptsas_t *mpt)
  {
!         if (mpt->m_replyh_args == NULL) {
                  mpt->m_replyh_args = kmem_zalloc(sizeof (m_replyh_arg_t) *
                      mpt->m_max_replies, KM_SLEEP);
+         }
  }
  
  static int
  mptsas_alloc_extra_sgl_frame(mptsas_t *mpt, mptsas_cmd_t *cmd)
  {
*** 4824,4833 ****
--- 4826,4841 ----
                           * just let taskq resolve ack action
                           * and ack would be sent in taskq thread
                           */
                          NDBG20(("send mptsas_handle_event_sync success"));
                  }
+ 
+                 if (mpt->m_in_reset) {
+                         NDBG20(("dropping event received during reset"));
+                         return;
+                 }
+ 
                  if ((ddi_taskq_dispatch(mpt->m_event_taskq, mptsas_handle_event,
                      (void *)args, DDI_NOSLEEP)) != DDI_SUCCESS) {
                          mptsas_log(mpt, CE_WARN, "No memory available"
                          "for dispatch taskq");
                          /*
*** 5761,5772 ****
                  }
                  ASSERT(parent);
  handle_topo_change:
  
                  mutex_enter(&mpt->m_mutex);
! 
                  mptsas_handle_topo_change(topo_node, parent);
                  save_node = topo_node;
                  topo_node = topo_node->next;
                  ASSERT(save_node);
                  kmem_free(save_node, sizeof (mptsas_topo_change_list_t));
                  mutex_exit(&mpt->m_mutex);
--- 5769,5786 ----
                  }
                  ASSERT(parent);
  handle_topo_change:
  
                  mutex_enter(&mpt->m_mutex);
!                 /*
!                  * If HBA is being reset, don't perform operations depending
!                  * on the IOC. We must free the topo list, however.
!                  */
!                 if (!mpt->m_in_reset)
                          mptsas_handle_topo_change(topo_node, parent);
+                 else
+                         NDBG20(("skipping topo change received during reset"));
                  save_node = topo_node;
                  topo_node = topo_node->next;
                  ASSERT(save_node);
                  kmem_free(save_node, sizeof (mptsas_topo_change_list_t));
                  mutex_exit(&mpt->m_mutex);
*** 6052,6065 ****
                                  break;
                          }
                  }
  
                  mutex_enter(&mpt->m_mutex);
-                 if (mptsas_set_led_status(mpt, ptgt, 0) != DDI_SUCCESS) {
-                         NDBG14(("mptsas: clear LED for tgt %x failed",
-                             ptgt->m_slot_num));
-                 }
                  if (rval == DDI_SUCCESS) {
                          mptsas_tgt_free(&mpt->m_active->m_tgttbl,
                              ptgt->m_sas_wwn, ptgt->m_phymask);
                          ptgt = NULL;
                  } else {
--- 6066,6075 ----
*** 6988,6997 ****
--- 6998,7015 ----
          replyh_arg = (m_replyh_arg_t *)args;
          rfm = replyh_arg->rfm;
          mpt = replyh_arg->mpt;
  
          mutex_enter(&mpt->m_mutex);
+         /*
+          * If HBA is being reset, drop incoming event.
+          */
+         if (mpt->m_in_reset) {
+                 NDBG20(("dropping event received prior to reset"));
+                 mutex_exit(&mpt->m_mutex);
+                 return;
+         }
  
          eventreply = (pMpi2EventNotificationReply_t)
              (mpt->m_reply_frame + (rfm - mpt->m_reply_frame_dma_addr));
          event = ddi_get16(mpt->m_acc_reply_frame_hdl, &eventreply->Event);
  
*** 8515,8524 ****
--- 8533,8552 ----
                  reason = CMD_RESET;
                  stat = STAT_DEV_RESET;
                  switch (tasktype) {
                  case MPI2_SCSITASKMGMT_TASKTYPE_TARGET_RESET:
                          if (Tgt(cmd) == target) {
+                                 if (cmd->cmd_tgt_addr->m_timeout < 0) {
+                                         /*
+                                          * When timeout requested, propagate
+                                          * proper reason and statistics to
+                                          * target drivers.
+                                          */
+                                         reason = CMD_TIMEOUT;
+                                         stat |= STAT_TIMEOUT;
+                                 }
+                                 
                                  NDBG25(("mptsas_flush_target discovered non-"
                                      "NULL cmd in slot %d, tasktype 0x%x", slot,
                                      tasktype));
                                  mptsas_dump_cmd(mpt, cmd);
                                  mptsas_remove_cmd(mpt, cmd);
*** 8702,8711 ****
--- 8730,8747 ----
                  mptsas_set_pkt_reason(mpt, cmd, CMD_RESET, STAT_BUS_RESET);
                  mptsas_doneq_add(mpt, cmd);
                  mutex_enter(&mpt->m_tx_waitq_mutex);
          }
          mutex_exit(&mpt->m_tx_waitq_mutex);
+ 
+         /*
+          * Drain the taskqs prior to reallocating resources.
+          */
+         mutex_exit(&mpt->m_mutex);
+         ddi_taskq_wait(mpt->m_event_taskq);
+         ddi_taskq_wait(mpt->m_dr_taskq);
+         mutex_enter(&mpt->m_mutex);
  }
  
  /*
   * set pkt_reason and OR in pkt_statistics flag
   */
*** 9382,9393 ****
--- 9418,9445 ----
                                  continue;
                          }
  
                          ptgt->m_timeout -= mptsas_scsi_watchdog_tick;
  
+                         if (ptgt->m_timeout_count > 0) {
+                                 ptgt->m_timeout_interval +=
+                                     mptsas_scsi_watchdog_tick;
+                         }
+                         if (ptgt->m_timeout_interval > mptsas_timeout_interval) {
+                                 ptgt->m_timeout_interval = 0;
+                                 ptgt->m_timeout_count = 0;
+                         }
+ 
                          if (ptgt->m_timeout < 0) {
+                                 ptgt->m_timeout_count++;
+                                 if (ptgt->m_timeout_count >
+                                     mptsas_timeout_threshold) {
+                                         ptgt->m_timeout_count = 0;
+                                         mptsas_kill_target(mpt, ptgt);
+                                 } else {
                                          mptsas_cmd_timeout(mpt, ptgt->m_devhdl);
+                                 }
                                  ptgt = (mptsas_target_t *)mptsas_hash_traverse(
                                      &mpt->m_active->m_tgttbl, MPTSAS_HASH_NEXT);
                                  continue;
                          }
  
*** 9425,9434 ****
--- 9477,9520 ----
                      "recovery failed!", devhdl);
          }
  }
  
  /*
+  * target causing too many timeouts
+  */
+ static void
+ mptsas_kill_target(mptsas_t *mpt, mptsas_target_t *ptgt)
+ {
+         mptsas_topo_change_list_t       *topo_node = NULL;
+ 
+         NDBG29(("mptsas_tgt_kill: target=%d", ptgt->m_devhdl));
+         mptsas_log(mpt, CE_WARN, "timeout threshold exceeded for "
+             "Target %d", ptgt->m_devhdl);
+ 
+         topo_node = kmem_zalloc(sizeof (mptsas_topo_change_list_t), KM_SLEEP);
+         topo_node->mpt = mpt;
+         topo_node->un.phymask = ptgt->m_phymask;
+         topo_node->event = MPTSAS_DR_EVENT_OFFLINE_TARGET;
+         topo_node->devhdl = ptgt->m_devhdl;
+         if (ptgt->m_deviceinfo & DEVINFO_DIRECT_ATTACHED)
+                 topo_node->flags = MPTSAS_TOPO_FLAG_DIRECT_ATTACHED_DEVICE;
+         else
+                 topo_node->flags = MPTSAS_TOPO_FLAG_EXPANDER_ATTACHED_DEVICE;
+         topo_node->object = NULL;
+ 
+         /*
+          * Launch DR taskq to fake topology change
+          */
+         if ((ddi_taskq_dispatch(mpt->m_dr_taskq,
+             mptsas_handle_dr, (void *)topo_node,
+             DDI_NOSLEEP)) != DDI_SUCCESS) {
+                 mptsas_log(mpt, CE_NOTE, "mptsas start taskq "
+                     "for fake offline event failed. \n");
+         }
+ }
+ 
+ /*
   * Device / Hotplug control
   */
  static int
  mptsas_scsi_quiesce(dev_info_t *dip)
  {
*** 11324,11393 ****
                  mutex_exit(&mpt->m_mutex);
          }
  
          if (iport_flag) {
                  status = scsi_hba_ioctl(dev, cmd, data, mode, credp, rval);
-                 if (status != 0) {
                          goto out;
                  }
-                 /*
-                  * The following code control the OK2RM LED, it doesn't affect
-                  * the ioctl return status.
-                  */
-                 if ((cmd == DEVCTL_DEVICE_ONLINE) ||
-                     (cmd == DEVCTL_DEVICE_OFFLINE)) {
-                         if (ndi_dc_allochdl((void *)data, &dcp) !=
-                             NDI_SUCCESS) {
-                                 goto out;
-                         }
-                         addr = ndi_dc_getaddr(dcp);
-                         ptgt = mptsas_addr_to_ptgt(mpt, addr, phymask);
-                         if (ptgt == NULL) {
-                                 NDBG14(("mptsas_ioctl led control: tgt %s not "
-                                     "found", addr));
-                                 ndi_dc_freehdl(dcp);
-                                 goto out;
-                         }
-                         mutex_enter(&mpt->m_mutex);
-                         if (cmd == DEVCTL_DEVICE_ONLINE) {
-                                 ptgt->m_tgt_unconfigured = 0;
-                         } else if (cmd == DEVCTL_DEVICE_OFFLINE) {
-                                 ptgt->m_tgt_unconfigured = 1;
-                         }
-                         slotstatus = 0;
- #ifdef MPTSAS_GET_LED
-                         /*
-                          * The get led status can't get a valid/reasonable
-                          * state, so ignore the get led status, and write the
-                          * required value directly
-                          */
-                         if (mptsas_get_led_status(mpt, ptgt, &slotstatus) !=
-                             DDI_SUCCESS) {
-                                 NDBG14(("mptsas_ioctl: get LED for tgt %s "
-                                     "failed %x", addr, slotstatus));
-                                 slotstatus = 0;
-                         }
-                         NDBG14(("mptsas_ioctl: LED status %x for %s",
-                             slotstatus, addr));
- #endif
-                         if (cmd == DEVCTL_DEVICE_OFFLINE) {
-                                 slotstatus |=
-                                     MPI2_SEP_REQ_SLOTSTATUS_REQUEST_REMOVE;
-                         } else {
-                                 slotstatus &=
-                                     ~MPI2_SEP_REQ_SLOTSTATUS_REQUEST_REMOVE;
-                         }
-                         if (mptsas_set_led_status(mpt, ptgt, slotstatus) !=
-                             DDI_SUCCESS) {
-                                 NDBG14(("mptsas_ioctl: set LED for tgt %s "
-                                     "failed %x", addr, slotstatus));
-                         }
-                         mutex_exit(&mpt->m_mutex);
-                         ndi_dc_freehdl(dcp);
-                 }
-                 goto out;
-         }
          switch (cmd) {
                  case MPTIOCTL_UPDATE_FLASH:
                          if (ddi_copyin((void *)data, &flashdata,
                                  sizeof (struct mptsas_update_flash), mode)) {
                                  status = EFAULT;
--- 11410,11421 ----
*** 13836,13849 ****
                                  (void) ddi_prop_free(old_guid);
                                  if ((!MDI_PI_IS_ONLINE(*pip)) &&
                                      (!MDI_PI_IS_STANDBY(*pip)) &&
                                      (ptgt->m_tgt_unconfigured == 0)) {
                                          rval = mdi_pi_online(*pip, 0);
-                                         mutex_enter(&mpt->m_mutex);
-                                         (void) mptsas_set_led_status(mpt, ptgt,
-                                             0);
-                                         mutex_exit(&mpt->m_mutex);
                                  } else {
                                          rval = DDI_SUCCESS;
                                  }
                                  if (rval != DDI_SUCCESS) {
                                          mptsas_log(mpt, CE_WARN, "path:target: "
--- 13864,13873 ----
*** 14093,14111 ****
                          mdi_rtn = MDI_FAILURE;
                          goto virt_create_done;
                  }
                  NDBG20(("new path:%s onlining,", MDI_PI(*pip)->pi_addr));
                  mdi_rtn = mdi_pi_online(*pip, 0);
-                 if (mdi_rtn == MDI_SUCCESS) {
-                         mutex_enter(&mpt->m_mutex);
-                         if (mptsas_set_led_status(mpt, ptgt, 0) !=
-                             DDI_SUCCESS) {
-                                 NDBG14(("mptsas: clear LED for slot %x "
-                                     "failed", ptgt->m_slot_num));
-                         }
-                         mutex_exit(&mpt->m_mutex);
-                 }
                  if (mdi_rtn == MDI_NOT_SUPPORTED) {
                          mdi_rtn = MDI_FAILURE;
                  }
  virt_create_done:
                  if (*pip && mdi_rtn != MDI_SUCCESS) {
--- 14117,14126 ----
*** 14455,14473 ****
                          /*
                           * Try to online the new node
                           */
                          ndi_rtn = ndi_devi_online(*lun_dip, NDI_ONLINE_ATTACH);
                  }
-                 if (ndi_rtn == NDI_SUCCESS) {
-                         mutex_enter(&mpt->m_mutex);
-                         if (mptsas_set_led_status(mpt, ptgt, 0) !=
-                             DDI_SUCCESS) {
-                                 NDBG14(("mptsas: clear LED for tgt %x "
-                                     "failed", ptgt->m_slot_num));
-                         }
-                         mutex_exit(&mpt->m_mutex);
-                 }
  
                  /*
                   * If success set rtn flag, else unwire alloc'd lun
                   */
                  if (ndi_rtn != NDI_SUCCESS) {
--- 14470,14479 ----
*** 15356,15438 ****
                  ptgt = mptsas_phy_to_tgt(mpt, (int)phymask, phynum);
          }
          return (ptgt);
  }
  
- #ifdef MPTSAS_GET_LED
- static int
- mptsas_get_led_status(mptsas_t *mpt, mptsas_target_t *ptgt,
-     uint32_t *slotstatus)
- {
-         return (mptsas_send_sep(mpt, ptgt, slotstatus,
-             MPI2_SEP_REQ_ACTION_READ_STATUS));
- }
- #endif
- static int
- mptsas_set_led_status(mptsas_t *mpt, mptsas_target_t *ptgt, uint32_t slotstatus)
- {
-         NDBG14(("mptsas_ioctl: set LED status %x for slot %x",
-             slotstatus, ptgt->m_slot_num));
-         return (mptsas_send_sep(mpt, ptgt, &slotstatus,
-             MPI2_SEP_REQ_ACTION_WRITE_STATUS));
- }
- /*
-  *  send sep request, use enclosure/slot addressing
-  */
- static int mptsas_send_sep(mptsas_t *mpt, mptsas_target_t *ptgt,
-     uint32_t *status, uint8_t act)
- {
-         Mpi2SepRequest_t        req;
-         Mpi2SepReply_t          rep;
-         int                     ret;
- 
-         ASSERT(mutex_owned(&mpt->m_mutex));
- 
-         bzero(&req, sizeof (req));
-         bzero(&rep, sizeof (rep));
- 
-         /* Do nothing for RAID volumes */
-         if (ptgt->m_phymask == 0) {
-                 NDBG14(("mptsas_send_sep: Skip RAID volumes"));
-                 return (DDI_FAILURE);
-         }
- 
-         req.Function = MPI2_FUNCTION_SCSI_ENCLOSURE_PROCESSOR;
-         req.Action = act;
-         req.Flags = MPI2_SEP_REQ_FLAGS_ENCLOSURE_SLOT_ADDRESS;
-         req.EnclosureHandle = LE_16(ptgt->m_enclosure);
-         req.Slot = LE_16(ptgt->m_slot_num);
-         if (act == MPI2_SEP_REQ_ACTION_WRITE_STATUS) {
-                 req.SlotStatus = LE_32(*status);
-         }
-         ret = mptsas_do_passthru(mpt, (uint8_t *)&req, (uint8_t *)&rep, NULL,
-             sizeof (req), sizeof (rep), NULL, 0, NULL, 0, 60, FKIOCTL);
-         if (ret != 0) {
-                 mptsas_log(mpt, CE_NOTE, "mptsas_send_sep: passthru SEP "
-                     "Processor Request message error %d", ret);
-                 return (DDI_FAILURE);
-         }
-         /* do passthrough success, check the ioc status */
-         if (LE_16(rep.IOCStatus) != MPI2_IOCSTATUS_SUCCESS) {
-                 if ((LE_16(rep.IOCStatus) & MPI2_IOCSTATUS_MASK) ==
-                     MPI2_IOCSTATUS_INVALID_FIELD) {
-                         mptsas_log(mpt, CE_NOTE, "send sep act %x: Not "
-                             "supported action, loginfo %x", act,
-                             LE_32(rep.IOCLogInfo));
-                         return (DDI_FAILURE);
-                 }
-                 mptsas_log(mpt, CE_NOTE, "send_sep act %x: ioc "
-                     "status:%x", act, LE_16(rep.IOCStatus));
-                 return (DDI_FAILURE);
-         }
-         if (act != MPI2_SEP_REQ_ACTION_WRITE_STATUS) {
-                 *status = LE_32(rep.SlotStatus);
-         }
- 
-         return (DDI_SUCCESS);
- }
- 
  int
  mptsas_dma_addr_create(mptsas_t *mpt, ddi_dma_attr_t dma_attr,
      ddi_dma_handle_t *dma_hdp, ddi_acc_handle_t *acc_hdp, caddr_t *dma_memp,
      uint32_t alloc_size, ddi_dma_cookie_t *cookiep)
  {
--- 15362,15371 ----