Print this page
NEX-10223 Return of Fibre Channel ports stuck in offline state and unable to clear
Reviewed by: Evan Layton <evan.layton@nexenta.com>
Reviewed by: Rob Gittins <rob.gittins@nexenta.com>
NEX-4532 STC comstar FC test causes panic on 5.0
Reviewed by: Rob Gittins <rob.gittins@nexenta.com>
Reviewed by: Steve Peng <steve.peng@nexenta.com>
NEX-3856 panic is occurred in module "fct" due to a NULL pointer dereference
Reviewed by: Rob Gittins <rob.gittins@nexenta.com>
Reviewed by: Steve Peng <steve.peng@nexenta.com>
NEX-3277 Panic of both nodes in failover time (FC clients)
Reviewed by: Rick McNeal <rick.mcneal@nexenta.com>
Reviewed by: Gordon Ross <gordon.ross@nexenta.com>
NEX-2787 Multiple comstar / fibre channel / qlt threads stuck waiting on locks with a spinning interrupt thread
Reviewed by: Rob Gittins <rob.gittins@nexenta.com>
Approved by: Jean McCormack <jean.mccormack@nexenta.com>
*** 18,27 ****
--- 18,28 ----
*
* CDDL HEADER END
*/
/*
* Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2015 Nexenta Systems, Inc. All rights reserved.
*/
#include <sys/sysmacros.h>
#include <sys/conf.h>
#include <sys/file.h>
*** 144,154 ****
}
/*
* We process cmd aborting in the end
*/
! if (iport->iport_abort_queue) {
suggested_action |= fct_cmd_terminator(iport);
}
/*
* Check cmd max/free
--- 145,155 ----
}
/*
* We process cmd aborting in the end
*/
! if (!list_is_empty(&iport->iport_abort_queue)) {
suggested_action |= fct_cmd_terminator(iport);
}
/*
* Check cmd max/free
*** 619,633 ****
--- 620,638 ----
mutex_enter(&iport->iport_worker_lock);
}
}
}
/* Find out if we need to do PLOGI at all */
+ rw_enter(&iport->iport_lock, RW_READER);
if (iport->iport_nrps_login) {
iport->iport_li_state++;
atomic_and_32(&iport->iport_flags,
~IPORT_ALLOW_UNSOL_FLOGI);
+ rw_exit(&iport->iport_lock);
goto check_state_again;
+ } else {
+ rw_exit(&iport->iport_lock);
}
if ((ddi_get_lbolt() >= iport->iport_li_cmd_timeout) &&
(!fct_lport_has_bigger_wwn(iport))) {
/* Cant wait forever */
stmf_trace(iport->iport_alias, "N2N: Remote port is "
*** 907,916 ****
--- 912,923 ----
fct_queue_cmd_for_termination(cmd,
FCT_ALLOC_FAILURE);
return;
}
irp = (fct_i_remote_port_t *)rp->rp_fct_private;
+ list_create(&irp->irp_els_list, sizeof (fct_i_cmd_t),
+ offsetof(fct_i_cmd_t, icmd_node));
rw_init(&irp->irp_lock, 0, RW_DRIVER, 0);
irp->irp_rp = rp;
irp->irp_portid = cmd->cmd_rportid;
rp->rp_port = port;
rp->rp_id = cmd->cmd_rportid;
*** 985,994 ****
--- 992,1009 ----
atomic_or_32(&icmd->icmd_flags, ICMD_IMPLICIT_CMD_HAS_RESOURCE);
}
atomic_inc_16(&irp->irp_nonfcp_xchg_count);
/*
+ * The iport_lock is currently held as a Reader lock, protocol
+ * dictates that to modify iport_nrps_login the lock must be held
+ * as a Writer.
+ */
+ rw_exit(&iport->iport_lock);
+ rw_enter(&iport->iport_lock, RW_WRITER);
+
+ /*
* Grab the remote port lock while we modify the port state.
* we should not drop the fca port lock (as a reader) until we
* modify the remote port state.
*/
rw_enter(&irp->irp_lock, RW_WRITER);
*** 1055,1070 ****
if (cmd->cmd_rp != rp) {
skipped++;
continue;
}
if (cmd->cmd_type & ttc) {
! if (cmd->cmd_type == FCT_CMD_FCP_XCHG)
fct_queue_scsi_task_for_termination(cmd,
FCT_ABORTED);
! else
fct_q_for_termination_lock_held(iport, icmd,
FCT_ABORTED);
cleaned++;
} else {
skipped++;
}
}
--- 1070,1087 ----
if (cmd->cmd_rp != rp) {
skipped++;
continue;
}
if (cmd->cmd_type & ttc) {
! if (cmd->cmd_type == FCT_CMD_FCP_XCHG) {
fct_queue_scsi_task_for_termination(cmd,
FCT_ABORTED);
! } else {
! fct_cmd_unlink_els(irp, icmd);
fct_q_for_termination_lock_held(iport, icmd,
FCT_ABORTED);
+ }
cleaned++;
} else {
skipped++;
}
}
*** 1091,1102 ****
fct_dequeue_els(fct_i_remote_port_t *irp)
{
fct_i_cmd_t *icmd;
rw_enter(&irp->irp_lock, RW_WRITER);
! icmd = irp->irp_els_list;
! irp->irp_els_list = icmd->icmd_next;
atomic_and_32(&icmd->icmd_flags, ~ICMD_IN_IRP_QUEUE);
rw_exit(&irp->irp_lock);
}
fct_status_t
--- 1108,1118 ----
fct_dequeue_els(fct_i_remote_port_t *irp)
{
fct_i_cmd_t *icmd;
rw_enter(&irp->irp_lock, RW_WRITER);
! icmd = list_remove_head(&irp->irp_els_list);
atomic_and_32(&icmd->icmd_flags, ~ICMD_IN_IRP_QUEUE);
rw_exit(&irp->irp_lock);
}
fct_status_t
*** 1187,1202 ****
fct_status_t ret = FCT_SUCCESS;
fct_i_local_port_t *iport = PORT_TO_IPORT(port);
fct_i_remote_port_t *irp = RP_TO_IRP(rp);
if (irp->irp_snn) {
! kmem_free(irp->irp_snn, strlen(irp->irp_snn) + 1);
irp->irp_snn = NULL;
}
if (irp->irp_spn) {
! kmem_free(irp->irp_spn, strlen(irp->irp_spn) + 1);
irp->irp_spn = NULL;
}
if ((ret = port->port_deregister_remote_port(port, rp)) !=
FCT_SUCCESS) {
return (ret);
--- 1203,1220 ----
fct_status_t ret = FCT_SUCCESS;
fct_i_local_port_t *iport = PORT_TO_IPORT(port);
fct_i_remote_port_t *irp = RP_TO_IRP(rp);
if (irp->irp_snn) {
! kmem_free(irp->irp_snn, irp->irp_snn_len);
irp->irp_snn = NULL;
+ irp->irp_snn_len = 0;
}
if (irp->irp_spn) {
! kmem_free(irp->irp_spn, irp->irp_spn_len);
irp->irp_spn = NULL;
+ irp->irp_spn_len = 0;
}
if ((ret = port->port_deregister_remote_port(port, rp)) !=
FCT_SUCCESS) {
return (ret);
*** 1247,1257 ****
fct_i_remote_port_t *irp = *pirp;
disc_action_t ret = DISC_ACTION_NO_WORK;
int do_deregister = 0;
int irp_deregister_timer = 0;
! if (irp->irp_els_list) {
ret |= fct_process_els(iport, irp);
}
irp_deregister_timer = irp->irp_deregister_timer;
if (irp_deregister_timer) {
--- 1265,1275 ----
fct_i_remote_port_t *irp = *pirp;
disc_action_t ret = DISC_ACTION_NO_WORK;
int do_deregister = 0;
int irp_deregister_timer = 0;
! if (!list_is_empty(&irp->irp_els_list)) {
ret |= fct_process_els(iport, irp);
}
irp_deregister_timer = irp->irp_deregister_timer;
if (irp_deregister_timer) {
*** 1261,1276 ****
ret |= DISC_ACTION_DELAY_RESCAN;
}
}
suggested_action |= ret;
! if (irp->irp_els_list == NULL) {
mutex_exit(&iport->iport_worker_lock);
rw_enter(&iport->iport_lock, RW_WRITER);
rw_enter(&irp->irp_lock, RW_WRITER);
mutex_enter(&iport->iport_worker_lock);
! if (irp->irp_els_list == NULL) {
if (!irp_deregister_timer ||
(do_deregister &&
!irp->irp_sa_elses_count &&
!irp->irp_nsa_elses_count &&
!irp->irp_fcp_xchg_count &&
--- 1279,1294 ----
ret |= DISC_ACTION_DELAY_RESCAN;
}
}
suggested_action |= ret;
! if (list_is_empty(&irp->irp_els_list)) {
mutex_exit(&iport->iport_worker_lock);
rw_enter(&iport->iport_lock, RW_WRITER);
rw_enter(&irp->irp_lock, RW_WRITER);
mutex_enter(&iport->iport_worker_lock);
! if (list_is_empty(&irp->irp_els_list)) {
if (!irp_deregister_timer ||
(do_deregister &&
!irp->irp_sa_elses_count &&
!irp->irp_nsa_elses_count &&
!irp->irp_fcp_xchg_count &&
*** 1514,1527 ****
--- 1532,1549 ----
}
atomic_dec_16(&irp->irp_sa_elses_count);
if (ret == FCT_SUCCESS) {
if (cmd_type == FCT_CMD_RCVD_ELS) {
+ rw_enter(&iport->iport_lock, RW_WRITER);
+ rw_enter(&irp->irp_lock, RW_WRITER);
atomic_or_32(&irp->irp_flags, IRP_PLOGI_DONE);
atomic_inc_32(&iport->iport_nrps_login);
if (irp->irp_deregister_timer)
irp->irp_deregister_timer = 0;
+ rw_exit(&irp->irp_lock);
+ rw_exit(&iport->iport_lock);
}
if (icmd_flags & ICMD_IMPLICIT) {
DTRACE_FC_5(rport__login__end,
fct_cmd_t, cmd,
fct_local_port_t, port,
*** 2000,2011 ****
}
disc_action_t
fct_process_els(fct_i_local_port_t *iport, fct_i_remote_port_t *irp)
{
! fct_i_cmd_t *cmd_to_abort = NULL;
! fct_i_cmd_t **ppcmd, *icmd;
fct_cmd_t *cmd;
fct_els_t *els;
int dq;
disc_action_t ret = DISC_ACTION_NO_WORK;
uint8_t op;
--- 2022,2033 ----
}
disc_action_t
fct_process_els(fct_i_local_port_t *iport, fct_i_remote_port_t *irp)
{
! list_t cmd_to_abort;
! fct_i_cmd_t *next, *icmd;
fct_cmd_t *cmd;
fct_els_t *els;
int dq;
disc_action_t ret = DISC_ACTION_NO_WORK;
uint8_t op;
*** 2022,2039 ****
* NOTE: There is a side effect, if a sa ELS (non PLOGI) is received
* while a PLOGI is pending, it will kill itself and the PLOGI.
* which is probably ok.
*/
rw_enter(&irp->irp_lock, RW_WRITER);
! ppcmd = &irp->irp_els_list;
! while ((*ppcmd) != NULL) {
int special_prli_cond = 0;
dq = 0;
! els = (fct_els_t *)((*ppcmd)->icmd_cmd)->cmd_specific;
! if (((*ppcmd)->icmd_cmd->cmd_type == FCT_CMD_RCVD_ELS) &&
(els->els_req_payload[0] == ELS_OP_PRLI) &&
(irp->irp_flags & IRP_SOL_PLOGI_IN_PROGRESS)) {
/*
* The initiator sent a PRLI right after responding
* to PLOGI and we have not yet finished processing
--- 2044,2063 ----
* NOTE: There is a side effect, if a sa ELS (non PLOGI) is received
* while a PLOGI is pending, it will kill itself and the PLOGI.
* which is probably ok.
*/
rw_enter(&irp->irp_lock, RW_WRITER);
! icmd = list_head(&irp->irp_els_list);
! list_create(&cmd_to_abort, sizeof (fct_i_cmd_t),
! offsetof(fct_i_cmd_t, icmd_node));
! while (icmd != NULL) {
int special_prli_cond = 0;
dq = 0;
! els = (fct_els_t *)(icmd->icmd_cmd)->cmd_specific;
! if ((icmd->icmd_cmd->cmd_type == FCT_CMD_RCVD_ELS) &&
(els->els_req_payload[0] == ELS_OP_PRLI) &&
(irp->irp_flags & IRP_SOL_PLOGI_IN_PROGRESS)) {
/*
* The initiator sent a PRLI right after responding
* to PLOGI and we have not yet finished processing
*** 2041,2060 ****
* as the initiator may not retry it.
*/
special_prli_cond = 1;
}
! if ((*ppcmd)->icmd_flags & ICMD_BEING_ABORTED) {
dq = 1;
} else if (irp->irp_sa_elses_count > 1) {
dq = 1;
/* This els might have set the CLEANUP flag */
atomic_and_32(&irp->irp_flags, ~IRP_SESSION_CLEANUP);
stmf_trace(iport->iport_alias, "Killing ELS %x cond 1",
els->els_req_payload[0]);
} else if (irp->irp_sa_elses_count &&
! (((*ppcmd)->icmd_flags & ICMD_SESSION_AFFECTING) == 0)) {
stmf_trace(iport->iport_alias, "Killing ELS %x cond 2",
els->els_req_payload[0]);
dq = 1;
} else if (((irp->irp_flags & IRP_PLOGI_DONE) == 0) &&
(els->els_req_payload[0] != ELS_OP_PLOGI) &&
--- 2065,2084 ----
* as the initiator may not retry it.
*/
special_prli_cond = 1;
}
! if (icmd->icmd_flags & ICMD_BEING_ABORTED) {
dq = 1;
} else if (irp->irp_sa_elses_count > 1) {
dq = 1;
/* This els might have set the CLEANUP flag */
atomic_and_32(&irp->irp_flags, ~IRP_SESSION_CLEANUP);
stmf_trace(iport->iport_alias, "Killing ELS %x cond 1",
els->els_req_payload[0]);
} else if (irp->irp_sa_elses_count &&
! ((icmd->icmd_flags & ICMD_SESSION_AFFECTING) == 0)) {
stmf_trace(iport->iport_alias, "Killing ELS %x cond 2",
els->els_req_payload[0]);
dq = 1;
} else if (((irp->irp_flags & IRP_PLOGI_DONE) == 0) &&
(els->els_req_payload[0] != ELS_OP_PLOGI) &&
*** 2063,2101 ****
stmf_trace(iport->iport_alias, "Killing ELS %x cond 3",
els->els_req_payload[0]);
dq = 1;
}
if (dq) {
! fct_i_cmd_t *c = (*ppcmd)->icmd_next;
!
! if ((*ppcmd)->icmd_flags & ICMD_SESSION_AFFECTING)
atomic_dec_16(&irp->irp_sa_elses_count);
else
atomic_dec_16(&irp->irp_nsa_elses_count);
! (*ppcmd)->icmd_next = cmd_to_abort;
! cmd_to_abort = *ppcmd;
! *ppcmd = c;
! } else {
! ppcmd = &((*ppcmd)->icmd_next);
}
}
rw_exit(&irp->irp_lock);
! while (cmd_to_abort) {
! fct_i_cmd_t *c = cmd_to_abort->icmd_next;
! atomic_and_32(&cmd_to_abort->icmd_flags, ~ICMD_IN_IRP_QUEUE);
! fct_queue_cmd_for_termination(cmd_to_abort->icmd_cmd,
! FCT_ABORTED);
! cmd_to_abort = c;
}
/*
* pick from the top of the queue
*/
! icmd = irp->irp_els_list;
if (icmd == NULL) {
/*
* The cleanup took care of everything.
*/
--- 2087,2120 ----
stmf_trace(iport->iport_alias, "Killing ELS %x cond 3",
els->els_req_payload[0]);
dq = 1;
}
+ next = list_next(&irp->irp_els_list, icmd);
if (dq) {
! list_remove(&irp->irp_els_list, icmd);
! if (icmd->icmd_flags & ICMD_SESSION_AFFECTING)
atomic_dec_16(&irp->irp_sa_elses_count);
else
atomic_dec_16(&irp->irp_nsa_elses_count);
! list_insert_head(&cmd_to_abort, icmd);
}
+ icmd = next;
}
rw_exit(&irp->irp_lock);
! while (!list_is_empty(&cmd_to_abort)) {
! fct_i_cmd_t *c = list_remove_head(&cmd_to_abort);
! atomic_and_32(&c->icmd_flags, ~ICMD_IN_IRP_QUEUE);
! fct_queue_cmd_for_termination(c->icmd_cmd, FCT_ABORTED);
}
/*
* pick from the top of the queue
*/
! icmd = list_head(&irp->irp_els_list);
if (icmd == NULL) {
/*
* The cleanup took care of everything.
*/
*** 2177,2196 ****
--- 2196,2221 ----
bcopy(els->els_resp_payload + 20, irp->irp_rp->rp_pwwn, 8);
bcopy(els->els_resp_payload + 28, irp->irp_rp->rp_nwwn, 8);
stmf_wwn_to_devid_desc((scsi_devid_desc_t *)irp->irp_id,
irp->irp_rp->rp_pwwn, PROTOCOL_FIBRE_CHANNEL);
+ rw_enter(&iport->iport_lock, RW_WRITER);
+ rw_enter(&irp->irp_lock, RW_WRITER);
atomic_or_32(&irp->irp_flags, IRP_PLOGI_DONE);
atomic_inc_32(&iport->iport_nrps_login);
if (irp->irp_deregister_timer) {
irp->irp_deregister_timer = 0;
irp->irp_dereg_count = 0;
}
+ rw_exit(&irp->irp_lock);
+ rw_exit(&iport->iport_lock);
}
if (irp && (els->els_req_payload[0] == ELS_OP_PLOGI)) {
+ rw_enter(&irp->irp_lock, RW_WRITER);
atomic_and_32(&irp->irp_flags, ~IRP_SOL_PLOGI_IN_PROGRESS);
+ rw_exit(&irp->irp_lock);
}
atomic_or_32(&icmd->icmd_flags, ICMD_CMD_COMPLETE);
stmf_trace(iport->iport_alias, "Sol ELS %x (%s) completed with "
"status %llx, did/%x", op, FCT_ELS_NAME(op),
icmd->icmd_cmd->cmd_comp_status, icmd->icmd_cmd->cmd_rportid);
*** 2216,2231 ****
num_to_release = (total + 1 - max_active) / 2;
mutex_exit(&iport->iport_worker_lock);
for (ndx = 0; ndx < num_to_release; ndx++) {
mutex_enter(&iport->iport_cached_cmd_lock);
! icmd = iport->iport_cached_cmdlist;
! if (icmd == NULL) {
mutex_exit(&iport->iport_cached_cmd_lock);
break;
}
! iport->iport_cached_cmdlist = icmd->icmd_next;
iport->iport_cached_ncmds--;
mutex_exit(&iport->iport_cached_cmd_lock);
atomic_dec_32(&iport->iport_total_alloced_ncmds);
fct_free(icmd->icmd_cmd);
}
--- 2241,2255 ----
num_to_release = (total + 1 - max_active) / 2;
mutex_exit(&iport->iport_worker_lock);
for (ndx = 0; ndx < num_to_release; ndx++) {
mutex_enter(&iport->iport_cached_cmd_lock);
! if (list_is_empty(&iport->iport_cached_cmdlist)) {
mutex_exit(&iport->iport_cached_cmd_lock);
break;
}
! icmd = list_remove_head(&iport->iport_cached_cmdlist);
iport->iport_cached_ncmds--;
mutex_exit(&iport->iport_cached_cmd_lock);
atomic_dec_32(&iport->iport_total_alloced_ncmds);
fct_free(icmd->icmd_cmd);
}
*** 2290,2299 ****
--- 2314,2334 ----
}
prev_icmd->icmd_solcmd_next = next_icmd;
}
icmd->icmd_cb = NULL;
+
+ /*
+ * If the command has none-zero icmd_node pointers
+ * it means it's been linked onto the iport_abort_queue.
+ * Since the iport_worker_lock is held the command
+ * can be removed before it's freed.
+ */
+ if (icmd->icmd_node.list_next != NULL) {
+ list_remove(&iport->iport_abort_queue, icmd);
+ }
+
mutex_exit(&iport->iport_worker_lock);
fct_cmd_free(icmd->icmd_cmd);
mutex_enter(&iport->iport_worker_lock);
} else {
/*
*** 2426,2439 ****
/*
* Release previous resource, then allocate needed resource
*/
sn = query_irp->irp_snn;
if (sn) {
! kmem_free(sn, strlen(sn) + 1);
}
query_irp->irp_snn = NULL;
sn = kmem_zalloc(snlen + 1, KM_SLEEP);
(void) strncpy(sn, (char *)
ICMD_TO_CT(icmd)->ct_resp_payload + 17, snlen);
if (strlen(sn) != snlen) {
stmf_trace(ICMD_TO_IPORT(icmd)->iport_alias,
--- 2461,2475 ----
/*
* Release previous resource, then allocate needed resource
*/
sn = query_irp->irp_snn;
if (sn) {
! kmem_free(sn, query_irp->irp_snn_len);
}
query_irp->irp_snn = NULL;
+ query_irp->irp_snn_len = 0;
sn = kmem_zalloc(snlen + 1, KM_SLEEP);
(void) strncpy(sn, (char *)
ICMD_TO_CT(icmd)->ct_resp_payload + 17, snlen);
if (strlen(sn) != snlen) {
stmf_trace(ICMD_TO_IPORT(icmd)->iport_alias,
*** 2444,2453 ****
--- 2480,2491 ----
/*
* Update symbolic node name
*/
query_irp->irp_snn = sn;
+ if (sn != NULL)
+ query_irp->irp_snn_len = snlen + 1;
if ((query_irp->irp_flags & IRP_SCSI_SESSION_STARTED) &&
(query_irp->irp_session)) {
query_irp->irp_session->ss_rport_alias =
query_irp->irp_snn;
}
*** 2654,2666 ****
if (query_irp) {
spnlen = resp[16];
if (spnlen > 0) {
if (query_irp->irp_spn) {
kmem_free(query_irp->irp_spn,
! strlen(query_irp->irp_spn) + 1);
}
! query_irp->irp_spn = kmem_zalloc(spnlen + 1, KM_SLEEP);
(void) strncpy(query_irp->irp_spn,
(char *)resp + 17, spnlen);
}
}
rw_exit(&iport->iport_lock);
--- 2692,2706 ----
if (query_irp) {
spnlen = resp[16];
if (spnlen > 0) {
if (query_irp->irp_spn) {
kmem_free(query_irp->irp_spn,
! query_irp->irp_spn_len);
}
! query_irp->irp_spn_len = spnlen + 1;
! query_irp->irp_spn = kmem_zalloc(
! query_irp->irp_spn_len, KM_SLEEP);
(void) strncpy(query_irp->irp_spn,
(char *)resp + 17, spnlen);
}
}
rw_exit(&iport->iport_lock);