Print this page
MFV: illumos-joyent@61dc3dec4f82a3e13e94609a0a83d5f66c64e760
OS-6846 want i40e multi-group support
OS-7372 i40e_alloc_ring_mem() unwinds when it shouldn't
Reviewed by: Robert Mustacchi <rm@joyent.com>
Approved by: Robert Mustacchi <rm@joyent.com>
Author: Ryan Zezeski <rpz@joyent.com>
NEX-13226 xvv710 25Gb NIC panics system under load
Reviewed by: Yuri Pankov <yuri.pankov@nexenta.com>
Reviewed by: Evan Layton <evan.layton@nexenta.com>
NEX-7822 40Gb Intel XL710 NIC performance data
Reviewed by: Steve Peng <steve.peng@nexenta.com>
Reviewed by: Evan Layton <evan.layton@nexenta.com>
NEX-6977 Ericsson hangs on reboot with Intel XL710 NICs
Reviewed by: Rick McNeal <rick.mcneal@nexenta.com>
Reviewed by: Evan Layton <evan.layton@nexenta.com>
Reviewed by: Sanjay Nadkarni <sanjay.nadkarni@nexenta.com>
@@ -8,11 +8,11 @@
* source. A copy of the CDDL is also available via the Internet at
* http://www.illumos.org/license/CDDL.
*/
/*
- * Copyright (c) 2017, Joyent, Inc.
+ * Copyright 2018 Joyent, Inc.
* Copyright 2017 Tegile Systems, Inc. All rights reserved.
*/
/*
* -------------------------
@@ -227,16 +227,24 @@
reg = I40E_ITR_INDEX_NONE << I40E_PFINT_DYN_CTL0_ITR_INDX_SHIFT;
I40E_WRITE_REG(hw, I40E_PFINT_DYN_CTL0, reg);
}
+/*
+ * The next two functions enable/disable the reception of interrupts
+ * on the given vector. Only vectors 1..N are programmed by these
+ * functions; vector 0 is special and handled by a different register.
+ * We must subtract one from the vector because i40e implicitly adds
+ * one to the vector value. See section 10.2.2.10.13 for more details.
+ */
static void
i40e_intr_io_enable(i40e_t *i40e, int vector)
{
uint32_t reg;
i40e_hw_t *hw = &i40e->i40e_hw_space;
+ ASSERT3S(vector, >, 0);
reg = I40E_PFINT_DYN_CTLN_INTENA_MASK |
I40E_PFINT_DYN_CTLN_CLEARPBA_MASK |
(I40E_ITR_INDEX_NONE << I40E_PFINT_DYN_CTLN_ITR_INDX_SHIFT);
I40E_WRITE_REG(hw, I40E_PFINT_DYN_CTLN(vector - 1), reg);
}
@@ -245,10 +253,11 @@
i40e_intr_io_disable(i40e_t *i40e, int vector)
{
uint32_t reg;
i40e_hw_t *hw = &i40e->i40e_hw_space;
+ ASSERT3S(vector, >, 0);
reg = I40E_ITR_INDEX_NONE << I40E_PFINT_DYN_CTLN_ITR_INDX_SHIFT;
I40E_WRITE_REG(hw, I40E_PFINT_DYN_CTLN(vector - 1), reg);
}
/*
@@ -373,53 +382,113 @@
i40e_intr_adminq_disable(i40e);
}
/*
- * Enable all of the queues and set the corresponding LNKLSTN registers. Note
- * that we always enable queues as interrupt sources, even though we don't
- * enable the MSI-X interrupt vectors.
+ * Set the head of the interrupt linked list. The PFINT_LNKLSTN[N]
+ * register actually refers to the 'N + 1' interrupt vector. E.g.,
+ * PFINT_LNKLSTN[0] refers to interrupt vector 1.
*/
static void
-i40e_intr_init_queue_msix(i40e_t *i40e)
+i40e_set_lnklstn(i40e_t *i40e, uint_t vector, uint_t queue)
{
- i40e_hw_t *hw = &i40e->i40e_hw_space;
uint32_t reg;
- int i;
+ i40e_hw_t *hw = &i40e->i40e_hw_space;
- /*
- * Map queues to MSI-X interrupts. Queue i is mapped to vector i + 1.
- * Note that we skip the ITR logic for the moment, just to make our
- * lives as explicit and simple as possible.
+ reg = (queue << I40E_PFINT_LNKLSTN_FIRSTQ_INDX_SHIFT) |
+ (I40E_QUEUE_TYPE_RX << I40E_PFINT_LNKLSTN_FIRSTQ_TYPE_SHIFT);
+
+ I40E_WRITE_REG(hw, I40E_PFINT_LNKLSTN(vector), reg);
+ DEBUGOUT2("PFINT_LNKLSTN[%u] = 0x%x", vector, reg);
+}
+
+/*
+ * Set the QINT_RQCTL[queue] register. The next queue is always the Tx
+ * queue associated with this Rx queue. Unlike PFINT_LNKLSTN, the
+ * vector should be the actual vector this queue is on -- i.e., it
+ * should be equal to itrq_rx_intrvec.
*/
- for (i = 0; i < i40e->i40e_num_trqpairs; i++) {
- i40e_trqpair_t *itrq = &i40e->i40e_trqpairs[i];
+static void
+i40e_set_rqctl(i40e_t *i40e, uint_t vector, uint_t queue)
+{
+ uint32_t reg;
+ i40e_hw_t *hw = &i40e->i40e_hw_space;
- reg = (i << I40E_PFINT_LNKLSTN_FIRSTQ_INDX_SHIFT) |
- (I40E_QUEUE_TYPE_RX <<
- I40E_PFINT_LNKLSTN_FIRSTQ_TYPE_SHIFT);
- I40E_WRITE_REG(hw, I40E_PFINT_LNKLSTN(i), reg);
+ ASSERT3U(vector, ==, i40e->i40e_trqpairs[queue].itrq_rx_intrvec);
- reg =
- (itrq->itrq_rx_intrvec << I40E_QINT_RQCTL_MSIX_INDX_SHIFT) |
+ reg = (vector << I40E_QINT_RQCTL_MSIX_INDX_SHIFT) |
(I40E_ITR_INDEX_RX << I40E_QINT_RQCTL_ITR_INDX_SHIFT) |
- (i << I40E_QINT_RQCTL_NEXTQ_INDX_SHIFT) |
+ (queue << I40E_QINT_RQCTL_NEXTQ_INDX_SHIFT) |
(I40E_QUEUE_TYPE_TX << I40E_QINT_RQCTL_NEXTQ_TYPE_SHIFT) |
I40E_QINT_RQCTL_CAUSE_ENA_MASK;
- I40E_WRITE_REG(hw, I40E_QINT_RQCTL(i), reg);
+ I40E_WRITE_REG(hw, I40E_QINT_RQCTL(queue), reg);
+ DEBUGOUT2("QINT_RQCTL[%u] = 0x%x", queue, reg);
+}
- reg =
- (itrq->itrq_tx_intrvec << I40E_QINT_TQCTL_MSIX_INDX_SHIFT) |
- (I40E_ITR_INDEX_TX << I40E_QINT_RQCTL_ITR_INDX_SHIFT) |
- (I40E_QUEUE_TYPE_EOL << I40E_QINT_TQCTL_NEXTQ_INDX_SHIFT) |
+/*
+ * Like i40e_set_rqctl(), but for QINT_TQCTL[queue]. The next queue is
+ * either the Rx queue of another TRQP, or EOL.
+ */
+static void
+i40e_set_tqctl(i40e_t *i40e, uint_t vector, uint_t queue, uint_t next_queue)
+{
+ uint32_t reg;
+ i40e_hw_t *hw = &i40e->i40e_hw_space;
+
+ ASSERT3U(vector, ==, i40e->i40e_trqpairs[queue].itrq_tx_intrvec);
+
+ reg = (vector << I40E_QINT_TQCTL_MSIX_INDX_SHIFT) |
+ (I40E_ITR_INDEX_TX << I40E_QINT_TQCTL_ITR_INDX_SHIFT) |
+ (next_queue << I40E_QINT_TQCTL_NEXTQ_INDX_SHIFT) |
(I40E_QUEUE_TYPE_RX << I40E_QINT_TQCTL_NEXTQ_TYPE_SHIFT) |
I40E_QINT_TQCTL_CAUSE_ENA_MASK;
- I40E_WRITE_REG(hw, I40E_QINT_TQCTL(i), reg);
+ I40E_WRITE_REG(hw, I40E_QINT_TQCTL(queue), reg);
+ DEBUGOUT2("QINT_TQCTL[%u] = 0x%x", queue, reg);
+}
+
+/*
+ * Program the interrupt linked list. Each vector has a linked list of
+ * queues which act as event sources for that vector. When one of
+ * those sources has an event the associated interrupt vector is
+ * fired. This mapping must match the mapping found in
+ * i40e_map_intrs_to_vectors().
+ *
+ * See section 7.5.3 for more information about the configuration of
+ * the interrupt linked list.
+ */
+static void
+i40e_intr_init_queue_msix(i40e_t *i40e)
+{
+ uint_t intr_count;
+
+ /*
+ * The 0th vector is for 'Other Interrupts' only (subject to
+ * change in the future).
+ */
+ intr_count = i40e->i40e_intr_count - 1;
+
+ for (uint_t vec = 0; vec < intr_count; vec++) {
+ boolean_t head = B_TRUE;
+
+ for (uint_t qidx = vec; qidx < i40e->i40e_num_trqpairs;
+ qidx += intr_count) {
+ uint_t next_qidx = qidx + intr_count;
+
+ next_qidx = (next_qidx > i40e->i40e_num_trqpairs) ?
+ I40E_QUEUE_TYPE_EOL : next_qidx;
+
+ if (head) {
+ i40e_set_lnklstn(i40e, vec, qidx);
+ head = B_FALSE;
}
+ i40e_set_rqctl(i40e, vec + 1, qidx);
+ i40e_set_tqctl(i40e, vec + 1, qidx, next_qidx);
+ }
+ }
}
/*
* Set up a single queue to share the admin queue interrupt in the non-MSI-X
* world. Note we do not enable the queue as an interrupt cause at this time. We
@@ -586,15 +655,22 @@
if (ret != I40E_SUCCESS)
break;
opcode = LE_16(evt.desc.opcode);
switch (opcode) {
- case i40e_aqc_opc_get_link_status:
- mutex_enter(&i40e->i40e_general_lock);
- i40e_link_check(i40e);
- mutex_exit(&i40e->i40e_general_lock);
- break;
+ /*
+ * Disable link checks for NEX-6977. With the fibers unplugged
+ * we can end up receiving too many link check interrupts,
+ * saturating one CPU for each link. This can cause system hangs
+ * at boot or shutdown when the system is running single-threaded.
+ *
+ * case i40e_aqc_opc_get_link_status:
+ * mutex_enter(&i40e->i40e_general_lock);
+ * i40e_link_check(i40e);
+ * mutex_exit(&i40e->i40e_general_lock);
+ * break;
+ */
default:
/*
* Longer term we'll want to enable other causes here
* and get these cleaned up and doing something.
*/
@@ -602,35 +678,30 @@
}
}
}
static void
-i40e_intr_rx_work(i40e_t *i40e, int queue)
+i40e_intr_rx_work(i40e_t *i40e, i40e_trqpair_t *itrq)
{
mblk_t *mp = NULL;
- i40e_trqpair_t *itrq;
- ASSERT(queue < i40e->i40e_num_trqpairs);
- itrq = &i40e->i40e_trqpairs[queue];
-
mutex_enter(&itrq->itrq_rx_lock);
if (!itrq->itrq_intr_poll)
mp = i40e_ring_rx(itrq, I40E_POLL_NULL);
mutex_exit(&itrq->itrq_rx_lock);
- if (mp != NULL) {
+ if (mp == NULL)
+ return;
+
mac_rx_ring(i40e->i40e_mac_hdl, itrq->itrq_macrxring, mp,
itrq->itrq_rxgen);
- }
}
+/* ARGSUSED */
static void
-i40e_intr_tx_work(i40e_t *i40e, int queue)
+i40e_intr_tx_work(i40e_t *i40e, i40e_trqpair_t *itrq)
{
- i40e_trqpair_t *itrq;
-
- itrq = &i40e->i40e_trqpairs[queue];
i40e_tx_recycle_ring(itrq);
}
/*
* At the moment, the only 'other' interrupt on ICR0 that we handle is the
@@ -663,16 +734,22 @@
I40E_WRITE_REG(hw, I40E_PFINT_ICR0_ENA, reg);
i40e_intr_adminq_enable(i40e);
}
+/*
+ * Handle an MSI-X interrupt. See section 7.5.1.3 for an overview of
+ * the MSI-X interrupt sequence.
+ */
uint_t
i40e_intr_msix(void *arg1, void *arg2)
{
i40e_t *i40e = (i40e_t *)arg1;
- int vector_idx = (int)(uintptr_t)arg2;
+ uint_t vector_idx = (uint_t)(uintptr_t)arg2;
+ ASSERT3U(vector_idx, <, i40e->i40e_intr_count);
+
/*
* When using MSI-X interrupts, vector 0 is always reserved for the
* adminq at this time. Though longer term, we'll want to also bridge
* some I/O to them.
*/
@@ -679,22 +756,42 @@
if (vector_idx == 0) {
i40e_intr_other_work(i40e);
return (DDI_INTR_CLAIMED);
}
- i40e_intr_rx_work(i40e, vector_idx - 1);
- i40e_intr_tx_work(i40e, vector_idx - 1);
- i40e_intr_io_enable(i40e, vector_idx);
+ ASSERT3U(vector_idx, >, 0);
+ /*
+ * We determine the queue indexes via simple arithmetic (as
+ * opposed to keeping explicit state like a bitmap). While
+ * conveinent, it does mean that i40e_map_intrs_to_vectors(),
+ * i40e_intr_init_queue_msix(), and this function must be
+ * modified as a unit.
+ *
+ * We subtract 1 from the vector to offset the addition we
+ * performed during i40e_map_intrs_to_vectors().
+ */
+ for (uint_t i = vector_idx - 1; i < i40e->i40e_num_trqpairs;
+ i += (i40e->i40e_intr_count - 1)) {
+ i40e_trqpair_t *itrq = &i40e->i40e_trqpairs[i];
+
+ ASSERT3U(i, <, i40e->i40e_num_trqpairs);
+ ASSERT3P(itrq, !=, NULL);
+ i40e_intr_rx_work(i40e, itrq);
+ i40e_intr_tx_work(i40e, itrq);
+ }
+
+ i40e_intr_io_enable(i40e, vector_idx);
return (DDI_INTR_CLAIMED);
}
static uint_t
i40e_intr_notx(i40e_t *i40e, boolean_t shared)
{
i40e_hw_t *hw = &i40e->i40e_hw_space;
uint32_t reg;
+ i40e_trqpair_t *itrq = &i40e->i40e_trqpairs[0];
int ret = DDI_INTR_CLAIMED;
if (shared == B_TRUE) {
mutex_enter(&i40e->i40e_general_lock);
if (i40e->i40e_state & I40E_SUSPENDED) {
@@ -720,14 +817,14 @@
if (reg & I40E_PFINT_ICR0_ADMINQ_MASK)
i40e_intr_adminq_work(i40e);
if (reg & I40E_INTR_NOTX_RX_MASK)
- i40e_intr_rx_work(i40e, 0);
+ i40e_intr_rx_work(i40e, itrq);
if (reg & I40E_INTR_NOTX_TX_MASK)
- i40e_intr_tx_work(i40e, 0);
+ i40e_intr_tx_work(i40e, itrq);
done:
i40e_intr_adminq_enable(i40e);
return (ret);