Print this page
MFV: illumos-joyent@61dc3dec4f82a3e13e94609a0a83d5f66c64e760
OS-6846 want i40e multi-group support
OS-7372 i40e_alloc_ring_mem() unwinds when it shouldn't
Reviewed by: Robert Mustacchi <rm@joyent.com>
Approved by: Robert Mustacchi <rm@joyent.com>
Author: Ryan Zezeski <rpz@joyent.com>
NEX-13226 xvv710 25Gb NIC panics system under load
Reviewed by: Yuri Pankov <yuri.pankov@nexenta.com>
Reviewed by: Evan Layton <evan.layton@nexenta.com>
NEX-7822 40Gb Intel XL710 NIC performance data
Reviewed by: Steve Peng <steve.peng@nexenta.com>
Reviewed by: Evan Layton <evan.layton@nexenta.com>
NEX-6977 Ericsson hangs on reboot with Intel XL710 NICs
Reviewed by: Rick McNeal <rick.mcneal@nexenta.com>
Reviewed by: Evan Layton <evan.layton@nexenta.com>
Reviewed by: Sanjay Nadkarni <sanjay.nadkarni@nexenta.com>

*** 8,18 **** * source. A copy of the CDDL is also available via the Internet at * http://www.illumos.org/license/CDDL. */ /* ! * Copyright (c) 2017, Joyent, Inc. * Copyright 2017 Tegile Systems, Inc. All rights reserved. */ /* * ------------------------- --- 8,18 ---- * source. A copy of the CDDL is also available via the Internet at * http://www.illumos.org/license/CDDL. */ /* ! * Copyright 2018 Joyent, Inc. * Copyright 2017 Tegile Systems, Inc. All rights reserved. */ /* * -------------------------
*** 227,242 **** --- 227,250 ---- reg = I40E_ITR_INDEX_NONE << I40E_PFINT_DYN_CTL0_ITR_INDX_SHIFT; I40E_WRITE_REG(hw, I40E_PFINT_DYN_CTL0, reg); } + /* + * The next two functions enable/disable the reception of interrupts + * on the given vector. Only vectors 1..N are programmed by these + * functions; vector 0 is special and handled by a different register. + * We must subtract one from the vector because i40e implicitly adds + * one to the vector value. See section 10.2.2.10.13 for more details. + */ static void i40e_intr_io_enable(i40e_t *i40e, int vector) { uint32_t reg; i40e_hw_t *hw = &i40e->i40e_hw_space; + ASSERT3S(vector, >, 0); reg = I40E_PFINT_DYN_CTLN_INTENA_MASK | I40E_PFINT_DYN_CTLN_CLEARPBA_MASK | (I40E_ITR_INDEX_NONE << I40E_PFINT_DYN_CTLN_ITR_INDX_SHIFT); I40E_WRITE_REG(hw, I40E_PFINT_DYN_CTLN(vector - 1), reg); }
*** 245,254 **** --- 253,263 ---- i40e_intr_io_disable(i40e_t *i40e, int vector) { uint32_t reg; i40e_hw_t *hw = &i40e->i40e_hw_space; + ASSERT3S(vector, >, 0); reg = I40E_ITR_INDEX_NONE << I40E_PFINT_DYN_CTLN_ITR_INDX_SHIFT; I40E_WRITE_REG(hw, I40E_PFINT_DYN_CTLN(vector - 1), reg); } /*
*** 373,425 **** i40e_intr_adminq_disable(i40e); } /* ! * Enable all of the queues and set the corresponding LNKLSTN registers. Note ! * that we always enable queues as interrupt sources, even though we don't ! * enable the MSI-X interrupt vectors. */ static void ! i40e_intr_init_queue_msix(i40e_t *i40e) { - i40e_hw_t *hw = &i40e->i40e_hw_space; uint32_t reg; ! int i; ! /* ! * Map queues to MSI-X interrupts. Queue i is mapped to vector i + 1. ! * Note that we skip the ITR logic for the moment, just to make our ! * lives as explicit and simple as possible. */ ! for (i = 0; i < i40e->i40e_num_trqpairs; i++) { ! i40e_trqpair_t *itrq = &i40e->i40e_trqpairs[i]; ! reg = (i << I40E_PFINT_LNKLSTN_FIRSTQ_INDX_SHIFT) | ! (I40E_QUEUE_TYPE_RX << ! I40E_PFINT_LNKLSTN_FIRSTQ_TYPE_SHIFT); ! I40E_WRITE_REG(hw, I40E_PFINT_LNKLSTN(i), reg); ! reg = ! (itrq->itrq_rx_intrvec << I40E_QINT_RQCTL_MSIX_INDX_SHIFT) | (I40E_ITR_INDEX_RX << I40E_QINT_RQCTL_ITR_INDX_SHIFT) | ! (i << I40E_QINT_RQCTL_NEXTQ_INDX_SHIFT) | (I40E_QUEUE_TYPE_TX << I40E_QINT_RQCTL_NEXTQ_TYPE_SHIFT) | I40E_QINT_RQCTL_CAUSE_ENA_MASK; ! I40E_WRITE_REG(hw, I40E_QINT_RQCTL(i), reg); ! reg = ! (itrq->itrq_tx_intrvec << I40E_QINT_TQCTL_MSIX_INDX_SHIFT) | ! (I40E_ITR_INDEX_TX << I40E_QINT_RQCTL_ITR_INDX_SHIFT) | ! (I40E_QUEUE_TYPE_EOL << I40E_QINT_TQCTL_NEXTQ_INDX_SHIFT) | (I40E_QUEUE_TYPE_RX << I40E_QINT_TQCTL_NEXTQ_TYPE_SHIFT) | I40E_QINT_TQCTL_CAUSE_ENA_MASK; ! I40E_WRITE_REG(hw, I40E_QINT_TQCTL(i), reg); } } /* * Set up a single queue to share the admin queue interrupt in the non-MSI-X * world. Note we do not enable the queue as an interrupt cause at this time. We --- 382,494 ---- i40e_intr_adminq_disable(i40e); } /* ! * Set the head of the interrupt linked list. The PFINT_LNKLSTN[N] ! * register actually refers to the 'N + 1' interrupt vector. E.g., ! * PFINT_LNKLSTN[0] refers to interrupt vector 1. */ static void ! i40e_set_lnklstn(i40e_t *i40e, uint_t vector, uint_t queue) { uint32_t reg; ! i40e_hw_t *hw = &i40e->i40e_hw_space; ! reg = (queue << I40E_PFINT_LNKLSTN_FIRSTQ_INDX_SHIFT) | ! (I40E_QUEUE_TYPE_RX << I40E_PFINT_LNKLSTN_FIRSTQ_TYPE_SHIFT); ! ! I40E_WRITE_REG(hw, I40E_PFINT_LNKLSTN(vector), reg); ! DEBUGOUT2("PFINT_LNKLSTN[%u] = 0x%x", vector, reg); ! } ! ! /* ! * Set the QINT_RQCTL[queue] register. The next queue is always the Tx ! * queue associated with this Rx queue. Unlike PFINT_LNKLSTN, the ! * vector should be the actual vector this queue is on -- i.e., it ! * should be equal to itrq_rx_intrvec. */ ! static void ! i40e_set_rqctl(i40e_t *i40e, uint_t vector, uint_t queue) ! { ! uint32_t reg; ! i40e_hw_t *hw = &i40e->i40e_hw_space; ! ASSERT3U(vector, ==, i40e->i40e_trqpairs[queue].itrq_rx_intrvec); ! reg = (vector << I40E_QINT_RQCTL_MSIX_INDX_SHIFT) | (I40E_ITR_INDEX_RX << I40E_QINT_RQCTL_ITR_INDX_SHIFT) | ! (queue << I40E_QINT_RQCTL_NEXTQ_INDX_SHIFT) | (I40E_QUEUE_TYPE_TX << I40E_QINT_RQCTL_NEXTQ_TYPE_SHIFT) | I40E_QINT_RQCTL_CAUSE_ENA_MASK; ! I40E_WRITE_REG(hw, I40E_QINT_RQCTL(queue), reg); ! DEBUGOUT2("QINT_RQCTL[%u] = 0x%x", queue, reg); ! } ! /* ! * Like i40e_set_rqctl(), but for QINT_TQCTL[queue]. The next queue is ! * either the Rx queue of another TRQP, or EOL. ! */ ! static void ! i40e_set_tqctl(i40e_t *i40e, uint_t vector, uint_t queue, uint_t next_queue) ! { ! uint32_t reg; ! i40e_hw_t *hw = &i40e->i40e_hw_space; ! ! ASSERT3U(vector, ==, i40e->i40e_trqpairs[queue].itrq_tx_intrvec); ! ! reg = (vector << I40E_QINT_TQCTL_MSIX_INDX_SHIFT) | ! (I40E_ITR_INDEX_TX << I40E_QINT_TQCTL_ITR_INDX_SHIFT) | ! (next_queue << I40E_QINT_TQCTL_NEXTQ_INDX_SHIFT) | (I40E_QUEUE_TYPE_RX << I40E_QINT_TQCTL_NEXTQ_TYPE_SHIFT) | I40E_QINT_TQCTL_CAUSE_ENA_MASK; ! I40E_WRITE_REG(hw, I40E_QINT_TQCTL(queue), reg); ! DEBUGOUT2("QINT_TQCTL[%u] = 0x%x", queue, reg); ! } ! ! /* ! * Program the interrupt linked list. Each vector has a linked list of ! * queues which act as event sources for that vector. When one of ! * those sources has an event the associated interrupt vector is ! * fired. This mapping must match the mapping found in ! * i40e_map_intrs_to_vectors(). ! * ! * See section 7.5.3 for more information about the configuration of ! * the interrupt linked list. ! */ ! static void ! i40e_intr_init_queue_msix(i40e_t *i40e) ! { ! uint_t intr_count; ! ! /* ! * The 0th vector is for 'Other Interrupts' only (subject to ! * change in the future). ! */ ! intr_count = i40e->i40e_intr_count - 1; ! ! for (uint_t vec = 0; vec < intr_count; vec++) { ! boolean_t head = B_TRUE; ! ! for (uint_t qidx = vec; qidx < i40e->i40e_num_trqpairs; ! qidx += intr_count) { ! uint_t next_qidx = qidx + intr_count; ! ! next_qidx = (next_qidx > i40e->i40e_num_trqpairs) ? ! I40E_QUEUE_TYPE_EOL : next_qidx; ! ! if (head) { ! i40e_set_lnklstn(i40e, vec, qidx); ! head = B_FALSE; } + i40e_set_rqctl(i40e, vec + 1, qidx); + i40e_set_tqctl(i40e, vec + 1, qidx, next_qidx); + } + } } /* * Set up a single queue to share the admin queue interrupt in the non-MSI-X * world. Note we do not enable the queue as an interrupt cause at this time. We
*** 586,600 **** if (ret != I40E_SUCCESS) break; opcode = LE_16(evt.desc.opcode); switch (opcode) { ! case i40e_aqc_opc_get_link_status: ! mutex_enter(&i40e->i40e_general_lock); ! i40e_link_check(i40e); ! mutex_exit(&i40e->i40e_general_lock); ! break; default: /* * Longer term we'll want to enable other causes here * and get these cleaned up and doing something. */ --- 655,676 ---- if (ret != I40E_SUCCESS) break; opcode = LE_16(evt.desc.opcode); switch (opcode) { ! /* ! * Disable link checks for NEX-6977. With the fibers unplugged ! * we can end up receiving too many link check interrupts, ! * saturating one CPU for each link. This can cause system hangs ! * at boot or shutdown when the system is running single-threaded. ! * ! * case i40e_aqc_opc_get_link_status: ! * mutex_enter(&i40e->i40e_general_lock); ! * i40e_link_check(i40e); ! * mutex_exit(&i40e->i40e_general_lock); ! * break; ! */ default: /* * Longer term we'll want to enable other causes here * and get these cleaned up and doing something. */
*** 602,636 **** } } } static void ! i40e_intr_rx_work(i40e_t *i40e, int queue) { mblk_t *mp = NULL; - i40e_trqpair_t *itrq; - ASSERT(queue < i40e->i40e_num_trqpairs); - itrq = &i40e->i40e_trqpairs[queue]; - mutex_enter(&itrq->itrq_rx_lock); if (!itrq->itrq_intr_poll) mp = i40e_ring_rx(itrq, I40E_POLL_NULL); mutex_exit(&itrq->itrq_rx_lock); ! if (mp != NULL) { mac_rx_ring(i40e->i40e_mac_hdl, itrq->itrq_macrxring, mp, itrq->itrq_rxgen); - } } static void ! i40e_intr_tx_work(i40e_t *i40e, int queue) { - i40e_trqpair_t *itrq; - - itrq = &i40e->i40e_trqpairs[queue]; i40e_tx_recycle_ring(itrq); } /* * At the moment, the only 'other' interrupt on ICR0 that we handle is the --- 678,707 ---- } } } static void ! i40e_intr_rx_work(i40e_t *i40e, i40e_trqpair_t *itrq) { mblk_t *mp = NULL; mutex_enter(&itrq->itrq_rx_lock); if (!itrq->itrq_intr_poll) mp = i40e_ring_rx(itrq, I40E_POLL_NULL); mutex_exit(&itrq->itrq_rx_lock); ! if (mp == NULL) ! return; ! mac_rx_ring(i40e->i40e_mac_hdl, itrq->itrq_macrxring, mp, itrq->itrq_rxgen); } + /* ARGSUSED */ static void ! i40e_intr_tx_work(i40e_t *i40e, i40e_trqpair_t *itrq) { i40e_tx_recycle_ring(itrq); } /* * At the moment, the only 'other' interrupt on ICR0 that we handle is the
*** 663,678 **** I40E_WRITE_REG(hw, I40E_PFINT_ICR0_ENA, reg); i40e_intr_adminq_enable(i40e); } uint_t i40e_intr_msix(void *arg1, void *arg2) { i40e_t *i40e = (i40e_t *)arg1; ! int vector_idx = (int)(uintptr_t)arg2; /* * When using MSI-X interrupts, vector 0 is always reserved for the * adminq at this time. Though longer term, we'll want to also bridge * some I/O to them. */ --- 734,755 ---- I40E_WRITE_REG(hw, I40E_PFINT_ICR0_ENA, reg); i40e_intr_adminq_enable(i40e); } + /* + * Handle an MSI-X interrupt. See section 7.5.1.3 for an overview of + * the MSI-X interrupt sequence. + */ uint_t i40e_intr_msix(void *arg1, void *arg2) { i40e_t *i40e = (i40e_t *)arg1; ! uint_t vector_idx = (uint_t)(uintptr_t)arg2; + ASSERT3U(vector_idx, <, i40e->i40e_intr_count); + /* * When using MSI-X interrupts, vector 0 is always reserved for the * adminq at this time. Though longer term, we'll want to also bridge * some I/O to them. */
*** 679,700 **** if (vector_idx == 0) { i40e_intr_other_work(i40e); return (DDI_INTR_CLAIMED); } ! i40e_intr_rx_work(i40e, vector_idx - 1); ! i40e_intr_tx_work(i40e, vector_idx - 1); ! i40e_intr_io_enable(i40e, vector_idx); return (DDI_INTR_CLAIMED); } static uint_t i40e_intr_notx(i40e_t *i40e, boolean_t shared) { i40e_hw_t *hw = &i40e->i40e_hw_space; uint32_t reg; int ret = DDI_INTR_CLAIMED; if (shared == B_TRUE) { mutex_enter(&i40e->i40e_general_lock); if (i40e->i40e_state & I40E_SUSPENDED) { --- 756,797 ---- if (vector_idx == 0) { i40e_intr_other_work(i40e); return (DDI_INTR_CLAIMED); } ! ASSERT3U(vector_idx, >, 0); + /* + * We determine the queue indexes via simple arithmetic (as + * opposed to keeping explicit state like a bitmap). While + * conveinent, it does mean that i40e_map_intrs_to_vectors(), + * i40e_intr_init_queue_msix(), and this function must be + * modified as a unit. + * + * We subtract 1 from the vector to offset the addition we + * performed during i40e_map_intrs_to_vectors(). + */ + for (uint_t i = vector_idx - 1; i < i40e->i40e_num_trqpairs; + i += (i40e->i40e_intr_count - 1)) { + i40e_trqpair_t *itrq = &i40e->i40e_trqpairs[i]; + + ASSERT3U(i, <, i40e->i40e_num_trqpairs); + ASSERT3P(itrq, !=, NULL); + i40e_intr_rx_work(i40e, itrq); + i40e_intr_tx_work(i40e, itrq); + } + + i40e_intr_io_enable(i40e, vector_idx); return (DDI_INTR_CLAIMED); } static uint_t i40e_intr_notx(i40e_t *i40e, boolean_t shared) { i40e_hw_t *hw = &i40e->i40e_hw_space; uint32_t reg; + i40e_trqpair_t *itrq = &i40e->i40e_trqpairs[0]; int ret = DDI_INTR_CLAIMED; if (shared == B_TRUE) { mutex_enter(&i40e->i40e_general_lock); if (i40e->i40e_state & I40E_SUSPENDED) {
*** 720,733 **** if (reg & I40E_PFINT_ICR0_ADMINQ_MASK) i40e_intr_adminq_work(i40e); if (reg & I40E_INTR_NOTX_RX_MASK) ! i40e_intr_rx_work(i40e, 0); if (reg & I40E_INTR_NOTX_TX_MASK) ! i40e_intr_tx_work(i40e, 0); done: i40e_intr_adminq_enable(i40e); return (ret); --- 817,830 ---- if (reg & I40E_PFINT_ICR0_ADMINQ_MASK) i40e_intr_adminq_work(i40e); if (reg & I40E_INTR_NOTX_RX_MASK) ! i40e_intr_rx_work(i40e, itrq); if (reg & I40E_INTR_NOTX_TX_MASK) ! i40e_intr_tx_work(i40e, itrq); done: i40e_intr_adminq_enable(i40e); return (ret);