Print this page
NEX-10223 Return of Fibre Channel ports stuck in offline state and unable to clear
Reviewed by: Evan Layton <evan.layton@nexenta.com>
Reviewed by: Rob Gittins <rob.gittins@nexenta.com>
NEX-4532 STC comstar FC test causes panic on 5.0
Reviewed by: Rob Gittins <rob.gittins@nexenta.com>
Reviewed by: Steve Peng <steve.peng@nexenta.com>
NEX-3856 panic is occurred in module "fct" due to a NULL pointer dereference
Reviewed by: Rob Gittins <rob.gittins@nexenta.com>
Reviewed by: Steve Peng <steve.peng@nexenta.com>
NEX-3277 Panic of both nodes in failover time (FC clients)
        Reviewed by: Rick McNeal <rick.mcneal@nexenta.com>
        Reviewed by: Gordon Ross <gordon.ross@nexenta.com>
NEX-2787 Multiple comstar / fibre channel / qlt threads stuck waiting on locks with a spinning interrupt thread
Reviewed by: Rob Gittins <rob.gittins@nexenta.com>
Approved by: Jean McCormack <jean.mccormack@nexenta.com>

*** 18,27 **** --- 18,28 ---- * * CDDL HEADER END */ /* * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright 2015 Nexenta Systems, Inc. All rights reserved. */ #include <sys/sysmacros.h> #include <sys/conf.h> #include <sys/file.h>
*** 144,154 **** } /* * We process cmd aborting in the end */ ! if (iport->iport_abort_queue) { suggested_action |= fct_cmd_terminator(iport); } /* * Check cmd max/free --- 145,155 ---- } /* * We process cmd aborting in the end */ ! if (!list_is_empty(&iport->iport_abort_queue)) { suggested_action |= fct_cmd_terminator(iport); } /* * Check cmd max/free
*** 619,633 **** --- 620,638 ---- mutex_enter(&iport->iport_worker_lock); } } } /* Find out if we need to do PLOGI at all */ + rw_enter(&iport->iport_lock, RW_READER); if (iport->iport_nrps_login) { iport->iport_li_state++; atomic_and_32(&iport->iport_flags, ~IPORT_ALLOW_UNSOL_FLOGI); + rw_exit(&iport->iport_lock); goto check_state_again; + } else { + rw_exit(&iport->iport_lock); } if ((ddi_get_lbolt() >= iport->iport_li_cmd_timeout) && (!fct_lport_has_bigger_wwn(iport))) { /* Cant wait forever */ stmf_trace(iport->iport_alias, "N2N: Remote port is "
*** 907,916 **** --- 912,923 ---- fct_queue_cmd_for_termination(cmd, FCT_ALLOC_FAILURE); return; } irp = (fct_i_remote_port_t *)rp->rp_fct_private; + list_create(&irp->irp_els_list, sizeof (fct_i_cmd_t), + offsetof(fct_i_cmd_t, icmd_node)); rw_init(&irp->irp_lock, 0, RW_DRIVER, 0); irp->irp_rp = rp; irp->irp_portid = cmd->cmd_rportid; rp->rp_port = port; rp->rp_id = cmd->cmd_rportid;
*** 985,994 **** --- 992,1009 ---- atomic_or_32(&icmd->icmd_flags, ICMD_IMPLICIT_CMD_HAS_RESOURCE); } atomic_inc_16(&irp->irp_nonfcp_xchg_count); /* + * The iport_lock is currently held as a Reader lock, protocol + * dictates that to modify iport_nrps_login the lock must be held + * as a Writer. + */ + rw_exit(&iport->iport_lock); + rw_enter(&iport->iport_lock, RW_WRITER); + + /* * Grab the remote port lock while we modify the port state. * we should not drop the fca port lock (as a reader) until we * modify the remote port state. */ rw_enter(&irp->irp_lock, RW_WRITER);
*** 1055,1070 **** if (cmd->cmd_rp != rp) { skipped++; continue; } if (cmd->cmd_type & ttc) { ! if (cmd->cmd_type == FCT_CMD_FCP_XCHG) fct_queue_scsi_task_for_termination(cmd, FCT_ABORTED); ! else fct_q_for_termination_lock_held(iport, icmd, FCT_ABORTED); cleaned++; } else { skipped++; } } --- 1070,1087 ---- if (cmd->cmd_rp != rp) { skipped++; continue; } if (cmd->cmd_type & ttc) { ! if (cmd->cmd_type == FCT_CMD_FCP_XCHG) { fct_queue_scsi_task_for_termination(cmd, FCT_ABORTED); ! } else { ! fct_cmd_unlink_els(irp, icmd); fct_q_for_termination_lock_held(iport, icmd, FCT_ABORTED); + } cleaned++; } else { skipped++; } }
*** 1091,1102 **** fct_dequeue_els(fct_i_remote_port_t *irp) { fct_i_cmd_t *icmd; rw_enter(&irp->irp_lock, RW_WRITER); ! icmd = irp->irp_els_list; ! irp->irp_els_list = icmd->icmd_next; atomic_and_32(&icmd->icmd_flags, ~ICMD_IN_IRP_QUEUE); rw_exit(&irp->irp_lock); } fct_status_t --- 1108,1118 ---- fct_dequeue_els(fct_i_remote_port_t *irp) { fct_i_cmd_t *icmd; rw_enter(&irp->irp_lock, RW_WRITER); ! icmd = list_remove_head(&irp->irp_els_list); atomic_and_32(&icmd->icmd_flags, ~ICMD_IN_IRP_QUEUE); rw_exit(&irp->irp_lock); } fct_status_t
*** 1187,1202 **** fct_status_t ret = FCT_SUCCESS; fct_i_local_port_t *iport = PORT_TO_IPORT(port); fct_i_remote_port_t *irp = RP_TO_IRP(rp); if (irp->irp_snn) { ! kmem_free(irp->irp_snn, strlen(irp->irp_snn) + 1); irp->irp_snn = NULL; } if (irp->irp_spn) { ! kmem_free(irp->irp_spn, strlen(irp->irp_spn) + 1); irp->irp_spn = NULL; } if ((ret = port->port_deregister_remote_port(port, rp)) != FCT_SUCCESS) { return (ret); --- 1203,1220 ---- fct_status_t ret = FCT_SUCCESS; fct_i_local_port_t *iport = PORT_TO_IPORT(port); fct_i_remote_port_t *irp = RP_TO_IRP(rp); if (irp->irp_snn) { ! kmem_free(irp->irp_snn, irp->irp_snn_len); irp->irp_snn = NULL; + irp->irp_snn_len = 0; } if (irp->irp_spn) { ! kmem_free(irp->irp_spn, irp->irp_spn_len); irp->irp_spn = NULL; + irp->irp_spn_len = 0; } if ((ret = port->port_deregister_remote_port(port, rp)) != FCT_SUCCESS) { return (ret);
*** 1247,1257 **** fct_i_remote_port_t *irp = *pirp; disc_action_t ret = DISC_ACTION_NO_WORK; int do_deregister = 0; int irp_deregister_timer = 0; ! if (irp->irp_els_list) { ret |= fct_process_els(iport, irp); } irp_deregister_timer = irp->irp_deregister_timer; if (irp_deregister_timer) { --- 1265,1275 ---- fct_i_remote_port_t *irp = *pirp; disc_action_t ret = DISC_ACTION_NO_WORK; int do_deregister = 0; int irp_deregister_timer = 0; ! if (!list_is_empty(&irp->irp_els_list)) { ret |= fct_process_els(iport, irp); } irp_deregister_timer = irp->irp_deregister_timer; if (irp_deregister_timer) {
*** 1261,1276 **** ret |= DISC_ACTION_DELAY_RESCAN; } } suggested_action |= ret; ! if (irp->irp_els_list == NULL) { mutex_exit(&iport->iport_worker_lock); rw_enter(&iport->iport_lock, RW_WRITER); rw_enter(&irp->irp_lock, RW_WRITER); mutex_enter(&iport->iport_worker_lock); ! if (irp->irp_els_list == NULL) { if (!irp_deregister_timer || (do_deregister && !irp->irp_sa_elses_count && !irp->irp_nsa_elses_count && !irp->irp_fcp_xchg_count && --- 1279,1294 ---- ret |= DISC_ACTION_DELAY_RESCAN; } } suggested_action |= ret; ! if (list_is_empty(&irp->irp_els_list)) { mutex_exit(&iport->iport_worker_lock); rw_enter(&iport->iport_lock, RW_WRITER); rw_enter(&irp->irp_lock, RW_WRITER); mutex_enter(&iport->iport_worker_lock); ! if (list_is_empty(&irp->irp_els_list)) { if (!irp_deregister_timer || (do_deregister && !irp->irp_sa_elses_count && !irp->irp_nsa_elses_count && !irp->irp_fcp_xchg_count &&
*** 1514,1527 **** --- 1532,1549 ---- } atomic_dec_16(&irp->irp_sa_elses_count); if (ret == FCT_SUCCESS) { if (cmd_type == FCT_CMD_RCVD_ELS) { + rw_enter(&iport->iport_lock, RW_WRITER); + rw_enter(&irp->irp_lock, RW_WRITER); atomic_or_32(&irp->irp_flags, IRP_PLOGI_DONE); atomic_inc_32(&iport->iport_nrps_login); if (irp->irp_deregister_timer) irp->irp_deregister_timer = 0; + rw_exit(&irp->irp_lock); + rw_exit(&iport->iport_lock); } if (icmd_flags & ICMD_IMPLICIT) { DTRACE_FC_5(rport__login__end, fct_cmd_t, cmd, fct_local_port_t, port,
*** 2000,2011 **** } disc_action_t fct_process_els(fct_i_local_port_t *iport, fct_i_remote_port_t *irp) { ! fct_i_cmd_t *cmd_to_abort = NULL; ! fct_i_cmd_t **ppcmd, *icmd; fct_cmd_t *cmd; fct_els_t *els; int dq; disc_action_t ret = DISC_ACTION_NO_WORK; uint8_t op; --- 2022,2033 ---- } disc_action_t fct_process_els(fct_i_local_port_t *iport, fct_i_remote_port_t *irp) { ! list_t cmd_to_abort; ! fct_i_cmd_t *next, *icmd; fct_cmd_t *cmd; fct_els_t *els; int dq; disc_action_t ret = DISC_ACTION_NO_WORK; uint8_t op;
*** 2022,2039 **** * NOTE: There is a side effect, if a sa ELS (non PLOGI) is received * while a PLOGI is pending, it will kill itself and the PLOGI. * which is probably ok. */ rw_enter(&irp->irp_lock, RW_WRITER); ! ppcmd = &irp->irp_els_list; ! while ((*ppcmd) != NULL) { int special_prli_cond = 0; dq = 0; ! els = (fct_els_t *)((*ppcmd)->icmd_cmd)->cmd_specific; ! if (((*ppcmd)->icmd_cmd->cmd_type == FCT_CMD_RCVD_ELS) && (els->els_req_payload[0] == ELS_OP_PRLI) && (irp->irp_flags & IRP_SOL_PLOGI_IN_PROGRESS)) { /* * The initiator sent a PRLI right after responding * to PLOGI and we have not yet finished processing --- 2044,2063 ---- * NOTE: There is a side effect, if a sa ELS (non PLOGI) is received * while a PLOGI is pending, it will kill itself and the PLOGI. * which is probably ok. */ rw_enter(&irp->irp_lock, RW_WRITER); ! icmd = list_head(&irp->irp_els_list); ! list_create(&cmd_to_abort, sizeof (fct_i_cmd_t), ! offsetof(fct_i_cmd_t, icmd_node)); ! while (icmd != NULL) { int special_prli_cond = 0; dq = 0; ! els = (fct_els_t *)(icmd->icmd_cmd)->cmd_specific; ! if ((icmd->icmd_cmd->cmd_type == FCT_CMD_RCVD_ELS) && (els->els_req_payload[0] == ELS_OP_PRLI) && (irp->irp_flags & IRP_SOL_PLOGI_IN_PROGRESS)) { /* * The initiator sent a PRLI right after responding * to PLOGI and we have not yet finished processing
*** 2041,2060 **** * as the initiator may not retry it. */ special_prli_cond = 1; } ! if ((*ppcmd)->icmd_flags & ICMD_BEING_ABORTED) { dq = 1; } else if (irp->irp_sa_elses_count > 1) { dq = 1; /* This els might have set the CLEANUP flag */ atomic_and_32(&irp->irp_flags, ~IRP_SESSION_CLEANUP); stmf_trace(iport->iport_alias, "Killing ELS %x cond 1", els->els_req_payload[0]); } else if (irp->irp_sa_elses_count && ! (((*ppcmd)->icmd_flags & ICMD_SESSION_AFFECTING) == 0)) { stmf_trace(iport->iport_alias, "Killing ELS %x cond 2", els->els_req_payload[0]); dq = 1; } else if (((irp->irp_flags & IRP_PLOGI_DONE) == 0) && (els->els_req_payload[0] != ELS_OP_PLOGI) && --- 2065,2084 ---- * as the initiator may not retry it. */ special_prli_cond = 1; } ! if (icmd->icmd_flags & ICMD_BEING_ABORTED) { dq = 1; } else if (irp->irp_sa_elses_count > 1) { dq = 1; /* This els might have set the CLEANUP flag */ atomic_and_32(&irp->irp_flags, ~IRP_SESSION_CLEANUP); stmf_trace(iport->iport_alias, "Killing ELS %x cond 1", els->els_req_payload[0]); } else if (irp->irp_sa_elses_count && ! ((icmd->icmd_flags & ICMD_SESSION_AFFECTING) == 0)) { stmf_trace(iport->iport_alias, "Killing ELS %x cond 2", els->els_req_payload[0]); dq = 1; } else if (((irp->irp_flags & IRP_PLOGI_DONE) == 0) && (els->els_req_payload[0] != ELS_OP_PLOGI) &&
*** 2063,2101 **** stmf_trace(iport->iport_alias, "Killing ELS %x cond 3", els->els_req_payload[0]); dq = 1; } if (dq) { ! fct_i_cmd_t *c = (*ppcmd)->icmd_next; ! ! if ((*ppcmd)->icmd_flags & ICMD_SESSION_AFFECTING) atomic_dec_16(&irp->irp_sa_elses_count); else atomic_dec_16(&irp->irp_nsa_elses_count); ! (*ppcmd)->icmd_next = cmd_to_abort; ! cmd_to_abort = *ppcmd; ! *ppcmd = c; ! } else { ! ppcmd = &((*ppcmd)->icmd_next); } } rw_exit(&irp->irp_lock); ! while (cmd_to_abort) { ! fct_i_cmd_t *c = cmd_to_abort->icmd_next; ! atomic_and_32(&cmd_to_abort->icmd_flags, ~ICMD_IN_IRP_QUEUE); ! fct_queue_cmd_for_termination(cmd_to_abort->icmd_cmd, ! FCT_ABORTED); ! cmd_to_abort = c; } /* * pick from the top of the queue */ ! icmd = irp->irp_els_list; if (icmd == NULL) { /* * The cleanup took care of everything. */ --- 2087,2120 ---- stmf_trace(iport->iport_alias, "Killing ELS %x cond 3", els->els_req_payload[0]); dq = 1; } + next = list_next(&irp->irp_els_list, icmd); if (dq) { ! list_remove(&irp->irp_els_list, icmd); ! if (icmd->icmd_flags & ICMD_SESSION_AFFECTING) atomic_dec_16(&irp->irp_sa_elses_count); else atomic_dec_16(&irp->irp_nsa_elses_count); ! list_insert_head(&cmd_to_abort, icmd); } + icmd = next; } rw_exit(&irp->irp_lock); ! while (!list_is_empty(&cmd_to_abort)) { ! fct_i_cmd_t *c = list_remove_head(&cmd_to_abort); ! atomic_and_32(&c->icmd_flags, ~ICMD_IN_IRP_QUEUE); ! fct_queue_cmd_for_termination(c->icmd_cmd, FCT_ABORTED); } /* * pick from the top of the queue */ ! icmd = list_head(&irp->irp_els_list); if (icmd == NULL) { /* * The cleanup took care of everything. */
*** 2177,2196 **** --- 2196,2221 ---- bcopy(els->els_resp_payload + 20, irp->irp_rp->rp_pwwn, 8); bcopy(els->els_resp_payload + 28, irp->irp_rp->rp_nwwn, 8); stmf_wwn_to_devid_desc((scsi_devid_desc_t *)irp->irp_id, irp->irp_rp->rp_pwwn, PROTOCOL_FIBRE_CHANNEL); + rw_enter(&iport->iport_lock, RW_WRITER); + rw_enter(&irp->irp_lock, RW_WRITER); atomic_or_32(&irp->irp_flags, IRP_PLOGI_DONE); atomic_inc_32(&iport->iport_nrps_login); if (irp->irp_deregister_timer) { irp->irp_deregister_timer = 0; irp->irp_dereg_count = 0; } + rw_exit(&irp->irp_lock); + rw_exit(&iport->iport_lock); } if (irp && (els->els_req_payload[0] == ELS_OP_PLOGI)) { + rw_enter(&irp->irp_lock, RW_WRITER); atomic_and_32(&irp->irp_flags, ~IRP_SOL_PLOGI_IN_PROGRESS); + rw_exit(&irp->irp_lock); } atomic_or_32(&icmd->icmd_flags, ICMD_CMD_COMPLETE); stmf_trace(iport->iport_alias, "Sol ELS %x (%s) completed with " "status %llx, did/%x", op, FCT_ELS_NAME(op), icmd->icmd_cmd->cmd_comp_status, icmd->icmd_cmd->cmd_rportid);
*** 2216,2231 **** num_to_release = (total + 1 - max_active) / 2; mutex_exit(&iport->iport_worker_lock); for (ndx = 0; ndx < num_to_release; ndx++) { mutex_enter(&iport->iport_cached_cmd_lock); ! icmd = iport->iport_cached_cmdlist; ! if (icmd == NULL) { mutex_exit(&iport->iport_cached_cmd_lock); break; } ! iport->iport_cached_cmdlist = icmd->icmd_next; iport->iport_cached_ncmds--; mutex_exit(&iport->iport_cached_cmd_lock); atomic_dec_32(&iport->iport_total_alloced_ncmds); fct_free(icmd->icmd_cmd); } --- 2241,2255 ---- num_to_release = (total + 1 - max_active) / 2; mutex_exit(&iport->iport_worker_lock); for (ndx = 0; ndx < num_to_release; ndx++) { mutex_enter(&iport->iport_cached_cmd_lock); ! if (list_is_empty(&iport->iport_cached_cmdlist)) { mutex_exit(&iport->iport_cached_cmd_lock); break; } ! icmd = list_remove_head(&iport->iport_cached_cmdlist); iport->iport_cached_ncmds--; mutex_exit(&iport->iport_cached_cmd_lock); atomic_dec_32(&iport->iport_total_alloced_ncmds); fct_free(icmd->icmd_cmd); }
*** 2290,2299 **** --- 2314,2334 ---- } prev_icmd->icmd_solcmd_next = next_icmd; } icmd->icmd_cb = NULL; + + /* + * If the command has none-zero icmd_node pointers + * it means it's been linked onto the iport_abort_queue. + * Since the iport_worker_lock is held the command + * can be removed before it's freed. + */ + if (icmd->icmd_node.list_next != NULL) { + list_remove(&iport->iport_abort_queue, icmd); + } + mutex_exit(&iport->iport_worker_lock); fct_cmd_free(icmd->icmd_cmd); mutex_enter(&iport->iport_worker_lock); } else { /*
*** 2426,2439 **** /* * Release previous resource, then allocate needed resource */ sn = query_irp->irp_snn; if (sn) { ! kmem_free(sn, strlen(sn) + 1); } query_irp->irp_snn = NULL; sn = kmem_zalloc(snlen + 1, KM_SLEEP); (void) strncpy(sn, (char *) ICMD_TO_CT(icmd)->ct_resp_payload + 17, snlen); if (strlen(sn) != snlen) { stmf_trace(ICMD_TO_IPORT(icmd)->iport_alias, --- 2461,2475 ---- /* * Release previous resource, then allocate needed resource */ sn = query_irp->irp_snn; if (sn) { ! kmem_free(sn, query_irp->irp_snn_len); } query_irp->irp_snn = NULL; + query_irp->irp_snn_len = 0; sn = kmem_zalloc(snlen + 1, KM_SLEEP); (void) strncpy(sn, (char *) ICMD_TO_CT(icmd)->ct_resp_payload + 17, snlen); if (strlen(sn) != snlen) { stmf_trace(ICMD_TO_IPORT(icmd)->iport_alias,
*** 2444,2453 **** --- 2480,2491 ---- /* * Update symbolic node name */ query_irp->irp_snn = sn; + if (sn != NULL) + query_irp->irp_snn_len = snlen + 1; if ((query_irp->irp_flags & IRP_SCSI_SESSION_STARTED) && (query_irp->irp_session)) { query_irp->irp_session->ss_rport_alias = query_irp->irp_snn; }
*** 2654,2666 **** if (query_irp) { spnlen = resp[16]; if (spnlen > 0) { if (query_irp->irp_spn) { kmem_free(query_irp->irp_spn, ! strlen(query_irp->irp_spn) + 1); } ! query_irp->irp_spn = kmem_zalloc(spnlen + 1, KM_SLEEP); (void) strncpy(query_irp->irp_spn, (char *)resp + 17, spnlen); } } rw_exit(&iport->iport_lock); --- 2692,2706 ---- if (query_irp) { spnlen = resp[16]; if (spnlen > 0) { if (query_irp->irp_spn) { kmem_free(query_irp->irp_spn, ! query_irp->irp_spn_len); } ! query_irp->irp_spn_len = spnlen + 1; ! query_irp->irp_spn = kmem_zalloc( ! query_irp->irp_spn_len, KM_SLEEP); (void) strncpy(query_irp->irp_spn, (char *)resp + 17, spnlen); } } rw_exit(&iport->iport_lock);