Print this page
NEX-6018 Return of the walking dead idm_refcnt_wait_ref comstar threads
Reviewed by: Rick McNeal <rick.mcneal@nexenta.com>
Reviewed by: Evan Layton <evan.layton@nexenta.com>
NEX-5428 Backout the 5.0 changes
NEX-2937 Continuous write_same starves all other commands
Reviewed by: Rick McNeal <rick.mcneal@nexenta.com>
Reviewed by: Steve Peng <steve.peng@nexenta.com>
NEX-5602 cpqary3: add support for more hp gen9 smart array controllers
Reviewed by: Alek Pinchuk <alek.pinchuk@nexenta.com>
Reviewed by: Marcel Telka <marcel.telka@nexenta.com>
NEX-1533 fcinfo hba-port doesn't get correct supported speeds and connection speed for 16Gb target ports
Reviewed by: Hans Rosenfeld <hans.rosenfeld@nexenta.com>
Reviewed by: Steve Peng <steve.peng@nexenta.com>
Reviewed by: Rob Gittins <rob.gittins@nexenta.com>
NEX-4532 STC comstar FC test causes panic on 5.0
Reviewed by: Rob Gittins <rob.gittins@nexenta.com>
Reviewed by: Steve Peng <steve.peng@nexenta.com>
NEX-3856 panic is occurred in module "fct" due to a NULL pointer dereference
Reviewed by: Rob Gittins <rob.gittins@nexenta.com>
Reviewed by: Steve Peng <steve.peng@nexenta.com>
NEX-2787 Multiple comstar / fibre channel / qlt threads stuck waiting on locks with a spinning interrupt thread
Reviewed by: Rob Gittins <rob.gittins@nexenta.com>
Approved by: Jean McCormack <jean.mccormack@nexenta.com>
@@ -111,11 +111,27 @@
static uint32_t rportid_table_size = FCT_HASH_TABLE_SIZE;
static int max_cached_ncmds = FCT_MAX_CACHED_CMDS;
static fct_i_local_port_t *fct_iport_list = NULL;
static kmutex_t fct_global_mutex;
uint32_t fct_rscn_options = RSCN_OPTION_VERIFY;
+/*
+ * This is to keep fibre channel from hanging if syseventd is
+ * not working correctly and the queue fills. It is a tunable
+ * to allow the user to force event logging to always happen
+ * which is the default.
+ */
+static uint8_t fct_force_log = 0; /* use DDI_SLEEP on ddi_log_sysevent */
+/*
+ * For use during core examination. These counts are normally really low
+ * since they're bumped during port operations. If a customer core shows
+ * really high values without having an uptime of a year something is most
+ * likely wrong with their environment.
+ */
+int fct_els_cnt = 0;
+int fct_abort_cnt = 0;
+
int
_init(void)
{
int ret;
@@ -1186,11 +1202,15 @@
/* fct_cmds for SCSI traffic */
iport->iport_total_alloced_ncmds = 0;
iport->iport_cached_ncmds = 0;
port->port_fca_fcp_cmd_size =
(port->port_fca_fcp_cmd_size + 7) & ~7;
- iport->iport_cached_cmdlist = NULL;
+ list_create(&iport->iport_cached_cmdlist, sizeof (fct_i_cmd_t),
+ offsetof(fct_i_cmd_t, icmd_node));
+ list_create(&iport->iport_abort_queue, sizeof (fct_i_cmd_t),
+ offsetof(fct_i_cmd_t, icmd_node));
+
mutex_init(&iport->iport_cached_cmd_lock, NULL, MUTEX_DRIVER, NULL);
/* Initialize cmd slots */
iport->iport_cmd_slots = (fct_cmd_slot_t *)kmem_zalloc(
port->port_max_xchges * sizeof (fct_cmd_slot_t), KM_SLEEP);
@@ -1275,11 +1295,11 @@
fct_status_t
fct_deregister_local_port(fct_local_port_t *port)
{
fct_i_local_port_t *iport;
- fct_i_cmd_t *icmd, *next_icmd;
+ fct_i_cmd_t *icmd;
int ndx;
iport = (fct_i_local_port_t *)port->port_fct_private;
if ((iport->iport_state != FCT_STATE_OFFLINE) ||
@@ -1320,13 +1340,13 @@
/*
* At this time, there should be no outstanding and pending
* I/Os, so we can just release resources.
*/
ASSERT(iport->iport_total_alloced_ncmds == iport->iport_cached_ncmds);
- for (icmd = iport->iport_cached_cmdlist; icmd; icmd = next_icmd) {
- next_icmd = icmd->icmd_next;
- fct_free(icmd->icmd_cmd);
+ while (!list_is_empty(&iport->iport_cached_cmdlist)) {
+ icmd = list_remove_head(&iport->iport_cached_cmdlist);
+ fct_free(icmd);
}
mutex_destroy(&iport->iport_cached_cmd_lock);
kmem_free(iport->iport_cmd_slots, port->port_max_xchges *
sizeof (fct_cmd_slot_t));
kmem_free(iport->iport_rp_slots, port->port_max_logins *
@@ -1475,22 +1495,23 @@
rw_enter(&irp->irp_lock, RW_WRITER);
if ((irp->irp_flags & IRP_IN_DISCOVERY_QUEUE) == 0) {
logging_out = 0;
goto ilo_done;
}
- if ((irp->irp_els_list == NULL) && (irp->irp_deregister_timer)) {
+ if (list_is_empty(&irp->irp_els_list) && (irp->irp_deregister_timer)) {
if (force_implicit && irp->irp_nonfcp_xchg_count) {
logging_out = 0;
} else {
logging_out = 1;
}
goto ilo_done;
}
- if (irp->irp_els_list) {
+ if (!list_is_empty(&irp->irp_els_list)) {
fct_i_cmd_t *icmd;
/* Last session affecting ELS should be a LOGO */
- for (icmd = irp->irp_els_list; icmd; icmd = icmd->icmd_next) {
+ for (icmd = list_head(&irp->irp_els_list); icmd;
+ icmd = list_next(&irp->irp_els_list, icmd)) {
uint8_t op = (ICMD_TO_ELS(icmd))->els_req_payload[0];
if (op == ELS_OP_LOGO) {
if (force_implicit) {
if (icmd->icmd_flags & ICMD_IMPLICIT)
logging_out = 1;
@@ -1610,11 +1631,12 @@
return (1);
}
fct_cmd_t *
fct_scsi_task_alloc(fct_local_port_t *port, uint16_t rp_handle,
- uint32_t rportid, uint8_t *lun, uint16_t cdb_length, uint16_t task_ext)
+ uint32_t rportid, uint8_t *lun, uint16_t cdb_length,
+ uint16_t task_ext)
{
fct_cmd_t *cmd;
fct_i_cmd_t *icmd;
fct_i_local_port_t *iport =
(fct_i_local_port_t *)port->port_fct_private;
@@ -1658,12 +1680,12 @@
"login was not done. portid=%x, rp=%p", rp->rp_id, rp);
return (NULL);
}
mutex_enter(&iport->iport_cached_cmd_lock);
- if ((icmd = iport->iport_cached_cmdlist) != NULL) {
- iport->iport_cached_cmdlist = icmd->icmd_next;
+ if (!list_is_empty(&iport->iport_cached_cmdlist)) {
+ icmd = list_remove_head(&iport->iport_cached_cmdlist);
iport->iport_cached_ncmds--;
cmd = icmd->icmd_cmd;
} else {
icmd = NULL;
}
@@ -1678,11 +1700,11 @@
"memory, port=%p", port);
return (NULL);
}
icmd = (fct_i_cmd_t *)cmd->cmd_fct_private;
- icmd->icmd_next = NULL;
+ list_link_init(&icmd->icmd_node);
cmd->cmd_port = port;
atomic_inc_32(&iport->iport_total_alloced_ncmds);
}
/*
@@ -1882,20 +1904,14 @@
*/
void
fct_post_to_discovery_queue(fct_i_local_port_t *iport,
fct_i_remote_port_t *irp, fct_i_cmd_t *icmd)
{
- fct_i_cmd_t **p;
-
ASSERT(!MUTEX_HELD(&iport->iport_worker_lock));
if (icmd) {
- icmd->icmd_next = NULL;
- for (p = &irp->irp_els_list; *p != NULL;
- p = &((*p)->icmd_next))
- ;
-
- *p = icmd;
+ list_insert_tail(&irp->irp_els_list, icmd);
+ fct_els_cnt++;
atomic_or_32(&icmd->icmd_flags, ICMD_IN_IRP_QUEUE);
}
mutex_enter(&iport->iport_worker_lock);
if ((irp->irp_flags & IRP_IN_DISCOVERY_QUEUE) == 0) {
@@ -2123,12 +2139,11 @@
/* Free the cmd */
if (cmd->cmd_type == FCT_CMD_FCP_XCHG) {
if (iport->iport_cached_ncmds < max_cached_ncmds) {
icmd->icmd_flags = 0;
mutex_enter(&iport->iport_cached_cmd_lock);
- icmd->icmd_next = iport->iport_cached_cmdlist;
- iport->iport_cached_cmdlist = icmd;
+ list_insert_head(&iport->iport_cached_cmdlist, icmd);
iport->iport_cached_ncmds++;
mutex_exit(&iport->iport_cached_cmd_lock);
} else {
atomic_dec_32(&iport->iport_total_alloced_ncmds);
fct_free(cmd);
@@ -2827,12 +2842,12 @@
atomic_and_32(&icmd->icmd_flags, ~ICMD_KNOWN_TO_FCA);
/* For non FCP Rest of the work is done by the terminator */
/* For FCP stuff just call stmf */
if (cmd->cmd_type == FCT_CMD_FCP_XCHG) {
- stmf_task_lport_aborted((scsi_task_t *)cmd->cmd_specific,
- s, STMF_IOF_LPORT_DONE);
+ stmf_task_lport_aborted_unlocked(
+ (scsi_task_t *)cmd->cmd_specific, s, STMF_IOF_LPORT_DONE);
}
}
/*
* FCA drivers will use it, when they want to abort some FC transactions
@@ -2901,23 +2916,69 @@
*((uint16_t *)(p+4)) = BE_16(cmd->cmd_oxid);
*((uint16_t *)(p+6)) = BE_16(cmd->cmd_rxid);
p[10] = p[11] = 0xff;
}
+/*
+ * fct_cmd_unlink_els -- remove icmd from ELS queue
+ *
+ * The commands are found via the slot array of active commands and will be
+ * terminated shortly after being removed.
+ */
void
+fct_cmd_unlink_els(fct_i_remote_port_t *irp, fct_i_cmd_t *icmd)
+{
+ ASSERT(rw_write_held(&irp->irp_lock));
+ if (icmd->icmd_node.list_next) {
+ /*
+ * Command is on two queues. Determine which queue and
+ * handle appropriately.
+ */
+ if (icmd->icmd_flags & ICMD_IN_IRP_QUEUE) {
+ /*
+ * If the command is active on the IRP queue it
+ * will be freed during command termination
+ * processing. Unfortuntely the ELS processing will
+ * peek at the command and possibly panic if it's
+ * been freed already. Remove it from the ELS
+ * queue to avoid that.
+ */
+ if (icmd->icmd_flags & ICMD_SESSION_AFFECTING)
+ atomic_dec_16(&irp->irp_sa_elses_count);
+ else
+ atomic_dec_16(&irp->irp_nsa_elses_count);
+ atomic_and_32(&icmd->icmd_flags, ~ICMD_IN_IRP_QUEUE);
+ list_remove(&irp->irp_els_list, icmd);
+ }
+ /*
+ * There's an else case here, but the processing is handled
+ * in fct_check_solcmd_queue(). In this case the command
+ * is on the solicited queue and will be marked as aborted.
+ * During command termination processing the command will be
+ * marked as complete, but not freed. The freeing of the memory
+ * is done in fct_check_solcmd_queue(). If that routine, which
+ * holds the appropriate lock, is run first it will remove the
+ * command from the abort queue so that no memory access
+ * is done after the command has been freed.
+ */
+ }
+}
+
+void
fct_handle_rcvd_abts(fct_cmd_t *cmd)
{
char info[FCT_INFO_LEN];
fct_local_port_t *port = cmd->cmd_port;
fct_i_local_port_t *iport =
(fct_i_local_port_t *)port->port_fct_private;
fct_i_cmd_t *icmd = (fct_i_cmd_t *)cmd->cmd_fct_private;
fct_i_remote_port_t *irp;
- fct_cmd_t *c = NULL;
+ fct_cmd_t *c = NULL, *term_cmd;
fct_i_cmd_t *ic = NULL;
int found = 0;
int i;
+ fct_status_t term_val;
icmd->icmd_start_time = ddi_get_lbolt();
icmd->icmd_flags |= ICMD_KNOWN_TO_FCA;
rw_enter(&iport->iport_lock, RW_WRITER);
@@ -2996,27 +3057,27 @@
fct_cmd_free(cmd);
}
return;
}
+ fct_cmd_unlink_els(irp, ic);
+
/* Check if this an abts retry */
if (c->cmd_link && (ic->icmd_flags & ICMD_ABTS_RECEIVED)) {
/* Kill this abts. */
- fct_q_for_termination_lock_held(iport, icmd, FCT_ABORTED);
- if (IS_WORKER_SLEEPING(iport))
- cv_signal(&iport->iport_worker_cv);
- mutex_exit(&iport->iport_worker_lock);
- rw_exit(&irp->irp_lock);
- rw_exit(&iport->iport_lock);
- return;
- }
+ term_cmd = icmd->icmd_cmd;
+ term_val = FCT_ABORTED;
+ } else {
c->cmd_link = cmd;
atomic_or_32(&ic->icmd_flags, ICMD_ABTS_RECEIVED);
cmd->cmd_link = c;
+ term_cmd = c;
+ term_val = FCT_ABTS_RECEIVED;
+ }
mutex_exit(&iport->iport_worker_lock);
rw_exit(&irp->irp_lock);
- fct_queue_cmd_for_termination(c, FCT_ABTS_RECEIVED);
+ fct_queue_cmd_for_termination(term_cmd, term_val);
rw_exit(&iport->iport_lock);
}
void
fct_queue_cmd_for_termination(fct_cmd_t *cmd, fct_status_t s)
@@ -3043,11 +3104,10 @@
void
fct_q_for_termination_lock_held(fct_i_local_port_t *iport, fct_i_cmd_t *icmd,
fct_status_t s)
{
uint32_t old, new;
- fct_i_cmd_t **ppicmd;
do {
old = icmd->icmd_flags;
if (old & ICMD_BEING_ABORTED)
return;
@@ -3055,16 +3115,12 @@
} while (atomic_cas_32(&icmd->icmd_flags, old, new) != old);
icmd->icmd_start_time = ddi_get_lbolt();
icmd->icmd_cmd->cmd_comp_status = s;
- icmd->icmd_next = NULL;
- for (ppicmd = &(iport->iport_abort_queue); *ppicmd != NULL;
- ppicmd = &((*ppicmd)->icmd_next))
- ;
-
- *ppicmd = icmd;
+ list_insert_tail(&iport->iport_abort_queue, icmd);
+ fct_abort_cnt++;
}
/*
* For those cmds, for which we called fca_abort but it has not yet completed,
* reset the FCA_ABORT_CALLED flag, so that abort can be called again.
@@ -3239,11 +3295,11 @@
disc_action_t
fct_cmd_terminator(fct_i_local_port_t *iport)
{
char info[FCT_INFO_LEN];
clock_t endtime;
- fct_i_cmd_t **ppicmd;
+ fct_i_cmd_t *next;
fct_i_cmd_t *icmd;
fct_cmd_t *cmd;
fct_local_port_t *port = iport->iport_port;
disc_action_t ret = DISC_ACTION_NO_WORK;
fct_status_t abort_ret;
@@ -3254,23 +3310,23 @@
/* Lets Limit each run to 20ms max. */
endtime = ddi_get_lbolt() + drv_usectohz(20000);
/* Start from where we left off last time */
if (iport->iport_ppicmd_term) {
- ppicmd = iport->iport_ppicmd_term;
+ icmd = iport->iport_ppicmd_term;
iport->iport_ppicmd_term = NULL;
} else {
- ppicmd = &iport->iport_abort_queue;
+ icmd = list_head(&iport->iport_abort_queue);
}
/*
* Once a command gets on discovery queue, this is the only thread
* which can access it. So no need for the lock here.
*/
mutex_exit(&iport->iport_worker_lock);
- while ((icmd = *ppicmd) != NULL) {
+ while (icmd) {
cmd = icmd->icmd_cmd;
/* Always remember that cmd->cmd_rp can be NULL */
if ((icmd->icmd_flags & (ICMD_KNOWN_TO_FCA |
ICMD_FCA_ABORT_CALLED)) == ICMD_KNOWN_TO_FCA) {
@@ -3299,11 +3355,11 @@
iport->iport_port,
STMF_RFLAG_FATAL_ERROR |
STMF_RFLAG_RESET, info);
mutex_enter(&iport->iport_worker_lock);
- iport->iport_ppicmd_term = ppicmd;
+ iport->iport_ppicmd_term = icmd;
return (DISC_ACTION_DELAY_RESCAN);
}
atomic_and_32(&icmd->icmd_flags,
~ICMD_FCA_ABORT_CALLED);
} else if ((flags & FCT_IOF_FORCE_FCA_DONE) ||
@@ -3327,13 +3383,14 @@
fct_done = 1;
else
fct_done = 0;
if ((fca_done || cmd_implicit) && fct_done) {
mutex_enter(&iport->iport_worker_lock);
- ASSERT(*ppicmd == icmd);
- *ppicmd = (*ppicmd)->icmd_next;
+ next = list_next(&iport->iport_abort_queue, icmd);
+ list_remove(&iport->iport_abort_queue, icmd);
mutex_exit(&iport->iport_worker_lock);
+
if ((cmd->cmd_type == FCT_CMD_RCVD_ELS) ||
(cmd->cmd_type == FCT_CMD_RCVD_ABTS)) {
/* Free the cmd */
fct_cmd_free(cmd);
} else if (cmd->cmd_type == FCT_CMD_SOL_ELS) {
@@ -3405,21 +3462,25 @@
st);
(void) fct_port_shutdown(port,
STMF_RFLAG_FATAL_ERROR | STMF_RFLAG_RESET,
info);
}
- ppicmd = &((*ppicmd)->icmd_next);
+ mutex_enter(&iport->iport_worker_lock);
+ next = list_next(&iport->iport_abort_queue, icmd);
+ mutex_exit(&iport->iport_worker_lock);
}
if (ddi_get_lbolt() > endtime) {
mutex_enter(&iport->iport_worker_lock);
- iport->iport_ppicmd_term = ppicmd;
+ iport->iport_ppicmd_term = next;
return (DISC_ACTION_DELAY_RESCAN);
+ } else {
+ icmd = next;
}
}
mutex_enter(&iport->iport_worker_lock);
- if (iport->iport_abort_queue)
+ if (!list_is_empty(&iport->iport_abort_queue))
return (DISC_ACTION_DELAY_RESCAN);
if (ret == DISC_ACTION_NO_WORK)
return (DISC_ACTION_RESCAN);
return (ret);
}
@@ -3430,10 +3491,11 @@
void
fct_log_local_port_event(fct_local_port_t *port, char *subclass)
{
nvlist_t *attr_list;
int port_instance;
+ int rc, sleep = DDI_SLEEP;
if (!fct_dip)
return;
port_instance = ddi_get_instance(fct_dip);
@@ -3450,12 +3512,19 @@
if (nvlist_add_byte_array(attr_list, "port-wwn",
port->port_pwwn, 8) != DDI_SUCCESS) {
goto error;
}
- (void) ddi_log_sysevent(fct_dip, DDI_VENDOR_SUNW, EC_SUNFC,
- subclass, attr_list, NULL, DDI_SLEEP);
+ if (fct_force_log == 0) {
+ sleep = DDI_NOSLEEP;
+ }
+ rc = ddi_log_sysevent(fct_dip, DDI_VENDOR_SUNW, EC_SUNFC,
+ subclass, attr_list, NULL, sleep);
+ if (rc != DDI_SUCCESS) {
+ cmn_err(CE_WARN, "%s: queue full event lost", __func__);
+ goto error;
+ }
nvlist_free(attr_list);
return;
error:
@@ -3469,10 +3538,11 @@
fct_log_remote_port_event(fct_local_port_t *port, char *subclass,
uint8_t *rp_pwwn, uint32_t rp_id)
{
nvlist_t *attr_list;
int port_instance;
+ int rc, sleep = DDI_SLEEP;
if (!fct_dip)
return;
port_instance = ddi_get_instance(fct_dip);
@@ -3499,12 +3569,19 @@
if (nvlist_add_uint32(attr_list, "target-port-id",
rp_id) != DDI_SUCCESS) {
goto error;
}
- (void) ddi_log_sysevent(fct_dip, DDI_VENDOR_SUNW, EC_SUNFC,
- subclass, attr_list, NULL, DDI_SLEEP);
+ if (fct_force_log == 0) {
+ sleep = DDI_NOSLEEP;
+ }
+ rc = ddi_log_sysevent(fct_dip, DDI_VENDOR_SUNW, EC_SUNFC,
+ subclass, attr_list, NULL, sleep);
+ if (rc != DDI_SUCCESS) {
+ cmn_err(CE_WARN, "%s:event dropped", __func__);
+ goto error;
+ }
nvlist_free(attr_list);
return;
error: