Print this page
NEX-10223 Return of Fibre Channel ports stuck in offline state and unable to clear
Reviewed by: Evan Layton <evan.layton@nexenta.com>
Reviewed by: Rob Gittins <rob.gittins@nexenta.com>
NEX-4532 STC comstar FC test causes panic on 5.0
Reviewed by: Rob Gittins <rob.gittins@nexenta.com>
Reviewed by: Steve Peng <steve.peng@nexenta.com>
NEX-3856 panic is occurred in module "fct" due to a NULL pointer dereference
Reviewed by: Rob Gittins <rob.gittins@nexenta.com>
Reviewed by: Steve Peng <steve.peng@nexenta.com>
NEX-3277 Panic of both nodes in failover time (FC clients)
Reviewed by: Rick McNeal <rick.mcneal@nexenta.com>
Reviewed by: Gordon Ross <gordon.ross@nexenta.com>
NEX-2787 Multiple comstar / fibre channel / qlt threads stuck waiting on locks with a spinning interrupt thread
Reviewed by: Rob Gittins <rob.gittins@nexenta.com>
Approved by: Jean McCormack <jean.mccormack@nexenta.com>
@@ -18,10 +18,11 @@
*
* CDDL HEADER END
*/
/*
* Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2015 Nexenta Systems, Inc. All rights reserved.
*/
#include <sys/sysmacros.h>
#include <sys/conf.h>
#include <sys/file.h>
@@ -144,11 +145,11 @@
}
/*
* We process cmd aborting in the end
*/
- if (iport->iport_abort_queue) {
+ if (!list_is_empty(&iport->iport_abort_queue)) {
suggested_action |= fct_cmd_terminator(iport);
}
/*
* Check cmd max/free
@@ -619,15 +620,19 @@
mutex_enter(&iport->iport_worker_lock);
}
}
}
/* Find out if we need to do PLOGI at all */
+ rw_enter(&iport->iport_lock, RW_READER);
if (iport->iport_nrps_login) {
iport->iport_li_state++;
atomic_and_32(&iport->iport_flags,
~IPORT_ALLOW_UNSOL_FLOGI);
+ rw_exit(&iport->iport_lock);
goto check_state_again;
+ } else {
+ rw_exit(&iport->iport_lock);
}
if ((ddi_get_lbolt() >= iport->iport_li_cmd_timeout) &&
(!fct_lport_has_bigger_wwn(iport))) {
/* Cant wait forever */
stmf_trace(iport->iport_alias, "N2N: Remote port is "
@@ -907,10 +912,12 @@
fct_queue_cmd_for_termination(cmd,
FCT_ALLOC_FAILURE);
return;
}
irp = (fct_i_remote_port_t *)rp->rp_fct_private;
+ list_create(&irp->irp_els_list, sizeof (fct_i_cmd_t),
+ offsetof(fct_i_cmd_t, icmd_node));
rw_init(&irp->irp_lock, 0, RW_DRIVER, 0);
irp->irp_rp = rp;
irp->irp_portid = cmd->cmd_rportid;
rp->rp_port = port;
rp->rp_id = cmd->cmd_rportid;
@@ -985,10 +992,18 @@
atomic_or_32(&icmd->icmd_flags, ICMD_IMPLICIT_CMD_HAS_RESOURCE);
}
atomic_inc_16(&irp->irp_nonfcp_xchg_count);
/*
+ * The iport_lock is currently held as a Reader lock, protocol
+ * dictates that to modify iport_nrps_login the lock must be held
+ * as a Writer.
+ */
+ rw_exit(&iport->iport_lock);
+ rw_enter(&iport->iport_lock, RW_WRITER);
+
+ /*
* Grab the remote port lock while we modify the port state.
* we should not drop the fca port lock (as a reader) until we
* modify the remote port state.
*/
rw_enter(&irp->irp_lock, RW_WRITER);
@@ -1055,16 +1070,18 @@
if (cmd->cmd_rp != rp) {
skipped++;
continue;
}
if (cmd->cmd_type & ttc) {
- if (cmd->cmd_type == FCT_CMD_FCP_XCHG)
+ if (cmd->cmd_type == FCT_CMD_FCP_XCHG) {
fct_queue_scsi_task_for_termination(cmd,
FCT_ABORTED);
- else
+ } else {
+ fct_cmd_unlink_els(irp, icmd);
fct_q_for_termination_lock_held(iport, icmd,
FCT_ABORTED);
+ }
cleaned++;
} else {
skipped++;
}
}
@@ -1091,12 +1108,11 @@
fct_dequeue_els(fct_i_remote_port_t *irp)
{
fct_i_cmd_t *icmd;
rw_enter(&irp->irp_lock, RW_WRITER);
- icmd = irp->irp_els_list;
- irp->irp_els_list = icmd->icmd_next;
+ icmd = list_remove_head(&irp->irp_els_list);
atomic_and_32(&icmd->icmd_flags, ~ICMD_IN_IRP_QUEUE);
rw_exit(&irp->irp_lock);
}
fct_status_t
@@ -1187,16 +1203,18 @@
fct_status_t ret = FCT_SUCCESS;
fct_i_local_port_t *iport = PORT_TO_IPORT(port);
fct_i_remote_port_t *irp = RP_TO_IRP(rp);
if (irp->irp_snn) {
- kmem_free(irp->irp_snn, strlen(irp->irp_snn) + 1);
+ kmem_free(irp->irp_snn, irp->irp_snn_len);
irp->irp_snn = NULL;
+ irp->irp_snn_len = 0;
}
if (irp->irp_spn) {
- kmem_free(irp->irp_spn, strlen(irp->irp_spn) + 1);
+ kmem_free(irp->irp_spn, irp->irp_spn_len);
irp->irp_spn = NULL;
+ irp->irp_spn_len = 0;
}
if ((ret = port->port_deregister_remote_port(port, rp)) !=
FCT_SUCCESS) {
return (ret);
@@ -1247,11 +1265,11 @@
fct_i_remote_port_t *irp = *pirp;
disc_action_t ret = DISC_ACTION_NO_WORK;
int do_deregister = 0;
int irp_deregister_timer = 0;
- if (irp->irp_els_list) {
+ if (!list_is_empty(&irp->irp_els_list)) {
ret |= fct_process_els(iport, irp);
}
irp_deregister_timer = irp->irp_deregister_timer;
if (irp_deregister_timer) {
@@ -1261,16 +1279,16 @@
ret |= DISC_ACTION_DELAY_RESCAN;
}
}
suggested_action |= ret;
- if (irp->irp_els_list == NULL) {
+ if (list_is_empty(&irp->irp_els_list)) {
mutex_exit(&iport->iport_worker_lock);
rw_enter(&iport->iport_lock, RW_WRITER);
rw_enter(&irp->irp_lock, RW_WRITER);
mutex_enter(&iport->iport_worker_lock);
- if (irp->irp_els_list == NULL) {
+ if (list_is_empty(&irp->irp_els_list)) {
if (!irp_deregister_timer ||
(do_deregister &&
!irp->irp_sa_elses_count &&
!irp->irp_nsa_elses_count &&
!irp->irp_fcp_xchg_count &&
@@ -1514,14 +1532,18 @@
}
atomic_dec_16(&irp->irp_sa_elses_count);
if (ret == FCT_SUCCESS) {
if (cmd_type == FCT_CMD_RCVD_ELS) {
+ rw_enter(&iport->iport_lock, RW_WRITER);
+ rw_enter(&irp->irp_lock, RW_WRITER);
atomic_or_32(&irp->irp_flags, IRP_PLOGI_DONE);
atomic_inc_32(&iport->iport_nrps_login);
if (irp->irp_deregister_timer)
irp->irp_deregister_timer = 0;
+ rw_exit(&irp->irp_lock);
+ rw_exit(&iport->iport_lock);
}
if (icmd_flags & ICMD_IMPLICIT) {
DTRACE_FC_5(rport__login__end,
fct_cmd_t, cmd,
fct_local_port_t, port,
@@ -2000,12 +2022,12 @@
}
disc_action_t
fct_process_els(fct_i_local_port_t *iport, fct_i_remote_port_t *irp)
{
- fct_i_cmd_t *cmd_to_abort = NULL;
- fct_i_cmd_t **ppcmd, *icmd;
+ list_t cmd_to_abort;
+ fct_i_cmd_t *next, *icmd;
fct_cmd_t *cmd;
fct_els_t *els;
int dq;
disc_action_t ret = DISC_ACTION_NO_WORK;
uint8_t op;
@@ -2022,18 +2044,20 @@
* NOTE: There is a side effect, if a sa ELS (non PLOGI) is received
* while a PLOGI is pending, it will kill itself and the PLOGI.
* which is probably ok.
*/
rw_enter(&irp->irp_lock, RW_WRITER);
- ppcmd = &irp->irp_els_list;
- while ((*ppcmd) != NULL) {
+ icmd = list_head(&irp->irp_els_list);
+ list_create(&cmd_to_abort, sizeof (fct_i_cmd_t),
+ offsetof(fct_i_cmd_t, icmd_node));
+ while (icmd != NULL) {
int special_prli_cond = 0;
dq = 0;
- els = (fct_els_t *)((*ppcmd)->icmd_cmd)->cmd_specific;
+ els = (fct_els_t *)(icmd->icmd_cmd)->cmd_specific;
- if (((*ppcmd)->icmd_cmd->cmd_type == FCT_CMD_RCVD_ELS) &&
+ if ((icmd->icmd_cmd->cmd_type == FCT_CMD_RCVD_ELS) &&
(els->els_req_payload[0] == ELS_OP_PRLI) &&
(irp->irp_flags & IRP_SOL_PLOGI_IN_PROGRESS)) {
/*
* The initiator sent a PRLI right after responding
* to PLOGI and we have not yet finished processing
@@ -2041,20 +2065,20 @@
* as the initiator may not retry it.
*/
special_prli_cond = 1;
}
- if ((*ppcmd)->icmd_flags & ICMD_BEING_ABORTED) {
+ if (icmd->icmd_flags & ICMD_BEING_ABORTED) {
dq = 1;
} else if (irp->irp_sa_elses_count > 1) {
dq = 1;
/* This els might have set the CLEANUP flag */
atomic_and_32(&irp->irp_flags, ~IRP_SESSION_CLEANUP);
stmf_trace(iport->iport_alias, "Killing ELS %x cond 1",
els->els_req_payload[0]);
} else if (irp->irp_sa_elses_count &&
- (((*ppcmd)->icmd_flags & ICMD_SESSION_AFFECTING) == 0)) {
+ ((icmd->icmd_flags & ICMD_SESSION_AFFECTING) == 0)) {
stmf_trace(iport->iport_alias, "Killing ELS %x cond 2",
els->els_req_payload[0]);
dq = 1;
} else if (((irp->irp_flags & IRP_PLOGI_DONE) == 0) &&
(els->els_req_payload[0] != ELS_OP_PLOGI) &&
@@ -2063,39 +2087,34 @@
stmf_trace(iport->iport_alias, "Killing ELS %x cond 3",
els->els_req_payload[0]);
dq = 1;
}
+ next = list_next(&irp->irp_els_list, icmd);
if (dq) {
- fct_i_cmd_t *c = (*ppcmd)->icmd_next;
-
- if ((*ppcmd)->icmd_flags & ICMD_SESSION_AFFECTING)
+ list_remove(&irp->irp_els_list, icmd);
+ if (icmd->icmd_flags & ICMD_SESSION_AFFECTING)
atomic_dec_16(&irp->irp_sa_elses_count);
else
atomic_dec_16(&irp->irp_nsa_elses_count);
- (*ppcmd)->icmd_next = cmd_to_abort;
- cmd_to_abort = *ppcmd;
- *ppcmd = c;
- } else {
- ppcmd = &((*ppcmd)->icmd_next);
+ list_insert_head(&cmd_to_abort, icmd);
}
+ icmd = next;
}
rw_exit(&irp->irp_lock);
- while (cmd_to_abort) {
- fct_i_cmd_t *c = cmd_to_abort->icmd_next;
+ while (!list_is_empty(&cmd_to_abort)) {
+ fct_i_cmd_t *c = list_remove_head(&cmd_to_abort);
- atomic_and_32(&cmd_to_abort->icmd_flags, ~ICMD_IN_IRP_QUEUE);
- fct_queue_cmd_for_termination(cmd_to_abort->icmd_cmd,
- FCT_ABORTED);
- cmd_to_abort = c;
+ atomic_and_32(&c->icmd_flags, ~ICMD_IN_IRP_QUEUE);
+ fct_queue_cmd_for_termination(c->icmd_cmd, FCT_ABORTED);
}
/*
* pick from the top of the queue
*/
- icmd = irp->irp_els_list;
+ icmd = list_head(&irp->irp_els_list);
if (icmd == NULL) {
/*
* The cleanup took care of everything.
*/
@@ -2177,20 +2196,26 @@
bcopy(els->els_resp_payload + 20, irp->irp_rp->rp_pwwn, 8);
bcopy(els->els_resp_payload + 28, irp->irp_rp->rp_nwwn, 8);
stmf_wwn_to_devid_desc((scsi_devid_desc_t *)irp->irp_id,
irp->irp_rp->rp_pwwn, PROTOCOL_FIBRE_CHANNEL);
+ rw_enter(&iport->iport_lock, RW_WRITER);
+ rw_enter(&irp->irp_lock, RW_WRITER);
atomic_or_32(&irp->irp_flags, IRP_PLOGI_DONE);
atomic_inc_32(&iport->iport_nrps_login);
if (irp->irp_deregister_timer) {
irp->irp_deregister_timer = 0;
irp->irp_dereg_count = 0;
}
+ rw_exit(&irp->irp_lock);
+ rw_exit(&iport->iport_lock);
}
if (irp && (els->els_req_payload[0] == ELS_OP_PLOGI)) {
+ rw_enter(&irp->irp_lock, RW_WRITER);
atomic_and_32(&irp->irp_flags, ~IRP_SOL_PLOGI_IN_PROGRESS);
+ rw_exit(&irp->irp_lock);
}
atomic_or_32(&icmd->icmd_flags, ICMD_CMD_COMPLETE);
stmf_trace(iport->iport_alias, "Sol ELS %x (%s) completed with "
"status %llx, did/%x", op, FCT_ELS_NAME(op),
icmd->icmd_cmd->cmd_comp_status, icmd->icmd_cmd->cmd_rportid);
@@ -2216,16 +2241,15 @@
num_to_release = (total + 1 - max_active) / 2;
mutex_exit(&iport->iport_worker_lock);
for (ndx = 0; ndx < num_to_release; ndx++) {
mutex_enter(&iport->iport_cached_cmd_lock);
- icmd = iport->iport_cached_cmdlist;
- if (icmd == NULL) {
+ if (list_is_empty(&iport->iport_cached_cmdlist)) {
mutex_exit(&iport->iport_cached_cmd_lock);
break;
}
- iport->iport_cached_cmdlist = icmd->icmd_next;
+ icmd = list_remove_head(&iport->iport_cached_cmdlist);
iport->iport_cached_ncmds--;
mutex_exit(&iport->iport_cached_cmd_lock);
atomic_dec_32(&iport->iport_total_alloced_ncmds);
fct_free(icmd->icmd_cmd);
}
@@ -2290,10 +2314,21 @@
}
prev_icmd->icmd_solcmd_next = next_icmd;
}
icmd->icmd_cb = NULL;
+
+ /*
+ * If the command has none-zero icmd_node pointers
+ * it means it's been linked onto the iport_abort_queue.
+ * Since the iport_worker_lock is held the command
+ * can be removed before it's freed.
+ */
+ if (icmd->icmd_node.list_next != NULL) {
+ list_remove(&iport->iport_abort_queue, icmd);
+ }
+
mutex_exit(&iport->iport_worker_lock);
fct_cmd_free(icmd->icmd_cmd);
mutex_enter(&iport->iport_worker_lock);
} else {
/*
@@ -2426,14 +2461,15 @@
/*
* Release previous resource, then allocate needed resource
*/
sn = query_irp->irp_snn;
if (sn) {
- kmem_free(sn, strlen(sn) + 1);
+ kmem_free(sn, query_irp->irp_snn_len);
}
query_irp->irp_snn = NULL;
+ query_irp->irp_snn_len = 0;
sn = kmem_zalloc(snlen + 1, KM_SLEEP);
(void) strncpy(sn, (char *)
ICMD_TO_CT(icmd)->ct_resp_payload + 17, snlen);
if (strlen(sn) != snlen) {
stmf_trace(ICMD_TO_IPORT(icmd)->iport_alias,
@@ -2444,10 +2480,12 @@
/*
* Update symbolic node name
*/
query_irp->irp_snn = sn;
+ if (sn != NULL)
+ query_irp->irp_snn_len = snlen + 1;
if ((query_irp->irp_flags & IRP_SCSI_SESSION_STARTED) &&
(query_irp->irp_session)) {
query_irp->irp_session->ss_rport_alias =
query_irp->irp_snn;
}
@@ -2654,13 +2692,15 @@
if (query_irp) {
spnlen = resp[16];
if (spnlen > 0) {
if (query_irp->irp_spn) {
kmem_free(query_irp->irp_spn,
- strlen(query_irp->irp_spn) + 1);
+ query_irp->irp_spn_len);
}
- query_irp->irp_spn = kmem_zalloc(spnlen + 1, KM_SLEEP);
+ query_irp->irp_spn_len = spnlen + 1;
+ query_irp->irp_spn = kmem_zalloc(
+ query_irp->irp_spn_len, KM_SLEEP);
(void) strncpy(query_irp->irp_spn,
(char *)resp + 17, spnlen);
}
}
rw_exit(&iport->iport_lock);