Print this page
NEX-10223 Return of Fibre Channel ports stuck in offline state and unable to clear
Reviewed by: Evan Layton <evan.layton@nexenta.com>
Reviewed by: Rob Gittins <rob.gittins@nexenta.com>
NEX-4532 STC comstar FC test causes panic on 5.0
Reviewed by: Rob Gittins <rob.gittins@nexenta.com>
Reviewed by: Steve Peng <steve.peng@nexenta.com>
NEX-3856 panic is occurred in module "fct" due to a NULL pointer dereference
Reviewed by: Rob Gittins <rob.gittins@nexenta.com>
Reviewed by: Steve Peng <steve.peng@nexenta.com>
NEX-3277 Panic of both nodes in failover time (FC clients)
        Reviewed by: Rick McNeal <rick.mcneal@nexenta.com>
        Reviewed by: Gordon Ross <gordon.ross@nexenta.com>
NEX-2787 Multiple comstar / fibre channel / qlt threads stuck waiting on locks with a spinning interrupt thread
Reviewed by: Rob Gittins <rob.gittins@nexenta.com>
Approved by: Jean McCormack <jean.mccormack@nexenta.com>

@@ -18,10 +18,11 @@
  *
  * CDDL HEADER END
  */
 /*
  * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2015 Nexenta Systems, Inc. All rights reserved.
  */
 
 #include <sys/sysmacros.h>
 #include <sys/conf.h>
 #include <sys/file.h>

@@ -144,11 +145,11 @@
                 }
 
                 /*
                  * We process cmd aborting in the end
                  */
-                if (iport->iport_abort_queue) {
+                if (!list_is_empty(&iport->iport_abort_queue)) {
                         suggested_action |= fct_cmd_terminator(iport);
                 }
 
                 /*
                  * Check cmd max/free

@@ -619,15 +620,19 @@
                                         mutex_enter(&iport->iport_worker_lock);
                                 }
                         }
                 }
                 /* Find out if we need to do PLOGI at all */
+                rw_enter(&iport->iport_lock, RW_READER);
                 if (iport->iport_nrps_login) {
                         iport->iport_li_state++;
                         atomic_and_32(&iport->iport_flags,
                             ~IPORT_ALLOW_UNSOL_FLOGI);
+                        rw_exit(&iport->iport_lock);
                         goto check_state_again;
+                } else {
+                        rw_exit(&iport->iport_lock);
                 }
                 if ((ddi_get_lbolt() >= iport->iport_li_cmd_timeout) &&
                     (!fct_lport_has_bigger_wwn(iport))) {
                         /* Cant wait forever */
                         stmf_trace(iport->iport_alias, "N2N: Remote port is "

@@ -907,10 +912,12 @@
                                 fct_queue_cmd_for_termination(cmd,
                                     FCT_ALLOC_FAILURE);
                                 return;
                         }
                         irp = (fct_i_remote_port_t *)rp->rp_fct_private;
+                        list_create(&irp->irp_els_list, sizeof (fct_i_cmd_t),
+                            offsetof(fct_i_cmd_t, icmd_node));
                         rw_init(&irp->irp_lock, 0, RW_DRIVER, 0);
                         irp->irp_rp = rp;
                         irp->irp_portid = cmd->cmd_rportid;
                         rp->rp_port = port;
                         rp->rp_id = cmd->cmd_rportid;

@@ -985,10 +992,18 @@
                 atomic_or_32(&icmd->icmd_flags, ICMD_IMPLICIT_CMD_HAS_RESOURCE);
         }
         atomic_inc_16(&irp->irp_nonfcp_xchg_count);
 
         /*
+         * The iport_lock is currently held as a Reader lock, protocol
+         * dictates that to modify iport_nrps_login the lock must be held
+         * as a Writer.
+         */
+        rw_exit(&iport->iport_lock);
+        rw_enter(&iport->iport_lock, RW_WRITER);
+
+        /*
          * Grab the remote port lock while we modify the port state.
          * we should not drop the fca port lock (as a reader) until we
          * modify the remote port state.
          */
         rw_enter(&irp->irp_lock, RW_WRITER);

@@ -1055,16 +1070,18 @@
                 if (cmd->cmd_rp != rp) {
                         skipped++;
                         continue;
                 }
                 if (cmd->cmd_type & ttc) {
-                        if (cmd->cmd_type == FCT_CMD_FCP_XCHG)
+                        if (cmd->cmd_type == FCT_CMD_FCP_XCHG) {
                                 fct_queue_scsi_task_for_termination(cmd,
                                     FCT_ABORTED);
-                        else
+                        } else {
+                                fct_cmd_unlink_els(irp, icmd);
                                 fct_q_for_termination_lock_held(iport, icmd,
                                     FCT_ABORTED);
+                        }
                         cleaned++;
                 } else {
                         skipped++;
                 }
         }

@@ -1091,12 +1108,11 @@
 fct_dequeue_els(fct_i_remote_port_t *irp)
 {
         fct_i_cmd_t *icmd;
 
         rw_enter(&irp->irp_lock, RW_WRITER);
-        icmd = irp->irp_els_list;
-        irp->irp_els_list = icmd->icmd_next;
+        icmd = list_remove_head(&irp->irp_els_list);
         atomic_and_32(&icmd->icmd_flags, ~ICMD_IN_IRP_QUEUE);
         rw_exit(&irp->irp_lock);
 }
 
 fct_status_t

@@ -1187,16 +1203,18 @@
         fct_status_t             ret   = FCT_SUCCESS;
         fct_i_local_port_t      *iport = PORT_TO_IPORT(port);
         fct_i_remote_port_t     *irp   = RP_TO_IRP(rp);
 
         if (irp->irp_snn) {
-                kmem_free(irp->irp_snn, strlen(irp->irp_snn) + 1);
+                kmem_free(irp->irp_snn, irp->irp_snn_len);
                 irp->irp_snn = NULL;
+                irp->irp_snn_len = 0;
         }
         if (irp->irp_spn) {
-                kmem_free(irp->irp_spn, strlen(irp->irp_spn) + 1);
+                kmem_free(irp->irp_spn, irp->irp_spn_len);
                 irp->irp_spn = NULL;
+                irp->irp_spn_len = 0;
         }
 
         if ((ret = port->port_deregister_remote_port(port, rp)) !=
             FCT_SUCCESS) {
                 return (ret);

@@ -1247,11 +1265,11 @@
                 fct_i_remote_port_t *irp = *pirp;
                 disc_action_t ret = DISC_ACTION_NO_WORK;
                 int do_deregister = 0;
                 int irp_deregister_timer = 0;
 
-                if (irp->irp_els_list) {
+                if (!list_is_empty(&irp->irp_els_list)) {
                         ret |= fct_process_els(iport, irp);
                 }
 
                 irp_deregister_timer = irp->irp_deregister_timer;
                 if (irp_deregister_timer) {

@@ -1261,16 +1279,16 @@
                                 ret |= DISC_ACTION_DELAY_RESCAN;
                         }
                 }
                 suggested_action |= ret;
 
-                if (irp->irp_els_list == NULL) {
+                if (list_is_empty(&irp->irp_els_list)) {
                         mutex_exit(&iport->iport_worker_lock);
                         rw_enter(&iport->iport_lock, RW_WRITER);
                         rw_enter(&irp->irp_lock, RW_WRITER);
                         mutex_enter(&iport->iport_worker_lock);
-                        if (irp->irp_els_list == NULL) {
+                        if (list_is_empty(&irp->irp_els_list)) {
                                 if (!irp_deregister_timer ||
                                     (do_deregister &&
                                     !irp->irp_sa_elses_count &&
                                     !irp->irp_nsa_elses_count &&
                                     !irp->irp_fcp_xchg_count &&

@@ -1514,14 +1532,18 @@
         }
         atomic_dec_16(&irp->irp_sa_elses_count);
 
         if (ret == FCT_SUCCESS) {
                 if (cmd_type == FCT_CMD_RCVD_ELS) {
+                        rw_enter(&iport->iport_lock, RW_WRITER);
+                        rw_enter(&irp->irp_lock, RW_WRITER);
                         atomic_or_32(&irp->irp_flags, IRP_PLOGI_DONE);
                         atomic_inc_32(&iport->iport_nrps_login);
                         if (irp->irp_deregister_timer)
                                 irp->irp_deregister_timer = 0;
+                        rw_exit(&irp->irp_lock);
+                        rw_exit(&iport->iport_lock);
                 }
                 if (icmd_flags & ICMD_IMPLICIT) {
                         DTRACE_FC_5(rport__login__end,
                             fct_cmd_t, cmd,
                             fct_local_port_t, port,

@@ -2000,12 +2022,12 @@
 }
 
 disc_action_t
 fct_process_els(fct_i_local_port_t *iport, fct_i_remote_port_t *irp)
 {
-        fct_i_cmd_t     *cmd_to_abort = NULL;
-        fct_i_cmd_t     **ppcmd, *icmd;
+        list_t          cmd_to_abort;
+        fct_i_cmd_t     *next, *icmd;
         fct_cmd_t       *cmd;
         fct_els_t       *els;
         int             dq;
         disc_action_t   ret = DISC_ACTION_NO_WORK;
         uint8_t         op;

@@ -2022,18 +2044,20 @@
          * NOTE: There is a side effect, if a sa ELS (non PLOGI) is received
          * while a PLOGI is pending, it will kill itself and the PLOGI.
          * which is probably ok.
          */
         rw_enter(&irp->irp_lock, RW_WRITER);
-        ppcmd = &irp->irp_els_list;
-        while ((*ppcmd) != NULL) {
+        icmd = list_head(&irp->irp_els_list);
+        list_create(&cmd_to_abort, sizeof (fct_i_cmd_t),
+            offsetof(fct_i_cmd_t, icmd_node));
+        while (icmd != NULL) {
                 int special_prli_cond = 0;
                 dq = 0;
 
-                els = (fct_els_t *)((*ppcmd)->icmd_cmd)->cmd_specific;
+                els = (fct_els_t *)(icmd->icmd_cmd)->cmd_specific;
 
-                if (((*ppcmd)->icmd_cmd->cmd_type == FCT_CMD_RCVD_ELS) &&
+                if ((icmd->icmd_cmd->cmd_type == FCT_CMD_RCVD_ELS) &&
                     (els->els_req_payload[0] == ELS_OP_PRLI) &&
                     (irp->irp_flags & IRP_SOL_PLOGI_IN_PROGRESS)) {
                         /*
                          * The initiator sent a PRLI right after responding
                          * to PLOGI and we have not yet finished processing

@@ -2041,20 +2065,20 @@
                          * as the initiator may not retry it.
                          */
                         special_prli_cond = 1;
                 }
 
-                if ((*ppcmd)->icmd_flags & ICMD_BEING_ABORTED) {
+                if (icmd->icmd_flags & ICMD_BEING_ABORTED) {
                         dq = 1;
                 } else if (irp->irp_sa_elses_count > 1) {
                         dq = 1;
                         /* This els might have set the CLEANUP flag */
                         atomic_and_32(&irp->irp_flags, ~IRP_SESSION_CLEANUP);
                         stmf_trace(iport->iport_alias, "Killing ELS %x cond 1",
                             els->els_req_payload[0]);
                 } else if (irp->irp_sa_elses_count &&
-                    (((*ppcmd)->icmd_flags & ICMD_SESSION_AFFECTING) == 0)) {
+                    ((icmd->icmd_flags & ICMD_SESSION_AFFECTING) == 0)) {
                         stmf_trace(iport->iport_alias, "Killing ELS %x cond 2",
                             els->els_req_payload[0]);
                         dq = 1;
                 } else if (((irp->irp_flags & IRP_PLOGI_DONE) == 0) &&
                     (els->els_req_payload[0] != ELS_OP_PLOGI) &&

@@ -2063,39 +2087,34 @@
                         stmf_trace(iport->iport_alias, "Killing ELS %x cond 3",
                             els->els_req_payload[0]);
                         dq = 1;
                 }
 
+                next = list_next(&irp->irp_els_list, icmd);
                 if (dq) {
-                        fct_i_cmd_t *c = (*ppcmd)->icmd_next;
-
-                        if ((*ppcmd)->icmd_flags & ICMD_SESSION_AFFECTING)
+                        list_remove(&irp->irp_els_list, icmd);
+                        if (icmd->icmd_flags & ICMD_SESSION_AFFECTING)
                                 atomic_dec_16(&irp->irp_sa_elses_count);
                         else
                                 atomic_dec_16(&irp->irp_nsa_elses_count);
-                        (*ppcmd)->icmd_next = cmd_to_abort;
-                        cmd_to_abort = *ppcmd;
-                        *ppcmd = c;
-                } else {
-                        ppcmd = &((*ppcmd)->icmd_next);
+                        list_insert_head(&cmd_to_abort, icmd);
                 }
+                icmd = next;
         }
         rw_exit(&irp->irp_lock);
 
-        while (cmd_to_abort) {
-                fct_i_cmd_t *c = cmd_to_abort->icmd_next;
+        while (!list_is_empty(&cmd_to_abort)) {
+                fct_i_cmd_t *c = list_remove_head(&cmd_to_abort);
 
-                atomic_and_32(&cmd_to_abort->icmd_flags, ~ICMD_IN_IRP_QUEUE);
-                fct_queue_cmd_for_termination(cmd_to_abort->icmd_cmd,
-                    FCT_ABORTED);
-                cmd_to_abort = c;
+                atomic_and_32(&c->icmd_flags, ~ICMD_IN_IRP_QUEUE);
+                fct_queue_cmd_for_termination(c->icmd_cmd, FCT_ABORTED);
         }
 
         /*
          * pick from the top of the queue
          */
-        icmd = irp->irp_els_list;
+        icmd = list_head(&irp->irp_els_list);
         if (icmd == NULL) {
                 /*
                  * The cleanup took care of everything.
                  */
 

@@ -2177,20 +2196,26 @@
                 bcopy(els->els_resp_payload + 20, irp->irp_rp->rp_pwwn, 8);
                 bcopy(els->els_resp_payload + 28, irp->irp_rp->rp_nwwn, 8);
 
                 stmf_wwn_to_devid_desc((scsi_devid_desc_t *)irp->irp_id,
                     irp->irp_rp->rp_pwwn, PROTOCOL_FIBRE_CHANNEL);
+                rw_enter(&iport->iport_lock, RW_WRITER);
+                rw_enter(&irp->irp_lock, RW_WRITER);
                 atomic_or_32(&irp->irp_flags, IRP_PLOGI_DONE);
                 atomic_inc_32(&iport->iport_nrps_login);
                 if (irp->irp_deregister_timer) {
                         irp->irp_deregister_timer = 0;
                         irp->irp_dereg_count = 0;
                 }
+                rw_exit(&irp->irp_lock);
+                rw_exit(&iport->iport_lock);
         }
 
         if (irp && (els->els_req_payload[0] == ELS_OP_PLOGI)) {
+                rw_enter(&irp->irp_lock, RW_WRITER);
                 atomic_and_32(&irp->irp_flags, ~IRP_SOL_PLOGI_IN_PROGRESS);
+                rw_exit(&irp->irp_lock);
         }
         atomic_or_32(&icmd->icmd_flags, ICMD_CMD_COMPLETE);
         stmf_trace(iport->iport_alias, "Sol ELS %x (%s) completed with "
             "status %llx, did/%x", op, FCT_ELS_NAME(op),
             icmd->icmd_cmd->cmd_comp_status, icmd->icmd_cmd->cmd_rportid);

@@ -2216,16 +2241,15 @@
         num_to_release = (total + 1 - max_active) / 2;
 
         mutex_exit(&iport->iport_worker_lock);
         for (ndx = 0; ndx < num_to_release; ndx++) {
                 mutex_enter(&iport->iport_cached_cmd_lock);
-                icmd = iport->iport_cached_cmdlist;
-                if (icmd == NULL) {
+                if (list_is_empty(&iport->iport_cached_cmdlist)) {
                         mutex_exit(&iport->iport_cached_cmd_lock);
                         break;
                 }
-                iport->iport_cached_cmdlist = icmd->icmd_next;
+                icmd = list_remove_head(&iport->iport_cached_cmdlist);
                 iport->iport_cached_ncmds--;
                 mutex_exit(&iport->iport_cached_cmd_lock);
                 atomic_dec_32(&iport->iport_total_alloced_ncmds);
                 fct_free(icmd->icmd_cmd);
         }

@@ -2290,10 +2314,21 @@
                                 }
                                 prev_icmd->icmd_solcmd_next = next_icmd;
                         }
 
                         icmd->icmd_cb = NULL;
+
+                        /*
+                         * If the command has none-zero icmd_node pointers
+                         * it means it's been linked onto the iport_abort_queue.
+                         * Since the iport_worker_lock is held the command
+                         * can be removed before it's freed.
+                         */
+                        if (icmd->icmd_node.list_next != NULL) {
+                                list_remove(&iport->iport_abort_queue, icmd);
+                        }
+
                         mutex_exit(&iport->iport_worker_lock);
                         fct_cmd_free(icmd->icmd_cmd);
                         mutex_enter(&iport->iport_worker_lock);
                 } else {
                         /*

@@ -2426,14 +2461,15 @@
                 /*
                  * Release previous resource, then allocate needed resource
                  */
                 sn = query_irp->irp_snn;
                 if (sn) {
-                        kmem_free(sn, strlen(sn) + 1);
+                        kmem_free(sn, query_irp->irp_snn_len);
                 }
 
                 query_irp->irp_snn = NULL;
+                query_irp->irp_snn_len = 0;
                 sn = kmem_zalloc(snlen + 1, KM_SLEEP);
                 (void) strncpy(sn, (char *)
                     ICMD_TO_CT(icmd)->ct_resp_payload + 17, snlen);
                 if (strlen(sn) != snlen) {
                         stmf_trace(ICMD_TO_IPORT(icmd)->iport_alias,

@@ -2444,10 +2480,12 @@
 
                 /*
                  * Update symbolic node name
                  */
                 query_irp->irp_snn = sn;
+                if (sn != NULL)
+                        query_irp->irp_snn_len = snlen + 1;
                 if ((query_irp->irp_flags & IRP_SCSI_SESSION_STARTED) &&
                     (query_irp->irp_session)) {
                         query_irp->irp_session->ss_rport_alias =
                             query_irp->irp_snn;
                 }

@@ -2654,13 +2692,15 @@
         if (query_irp) {
                 spnlen = resp[16];
                 if (spnlen > 0) {
                         if (query_irp->irp_spn) {
                                 kmem_free(query_irp->irp_spn,
-                                    strlen(query_irp->irp_spn) + 1);
+                                    query_irp->irp_spn_len);
                         }
-                        query_irp->irp_spn = kmem_zalloc(spnlen + 1, KM_SLEEP);
+                        query_irp->irp_spn_len = spnlen + 1;
+                        query_irp->irp_spn = kmem_zalloc(
+                            query_irp->irp_spn_len, KM_SLEEP);
                         (void) strncpy(query_irp->irp_spn,
                             (char *)resp + 17, spnlen);
                 }
         }
         rw_exit(&iport->iport_lock);