1 /*
2 * This file and its contents are supplied under the terms of the
3 * Common Development and Distribution License ("CDDL"), version 1.0.
4 * You may only use this file in accordance with the terms of version
5 * 1.0 of the CDDL.
6 *
7 * A full copy of the text of the CDDL should have accompanied this
8 * source. A copy of the CDDL is also available via the Internet at
9 * http://www.illumos.org/license/CDDL.
10 */
11
12 /*
13 * Copyright 2018 Joyent, Inc.
14 * Copyright 2017 Tegile Systems, Inc. All rights reserved.
15 */
16
17 /*
18 * -------------------------
19 * Interrupt Handling Theory
20 * -------------------------
21 *
22 * There are a couple different sets of interrupts that we need to worry about:
23 *
24 * - Interrupts from receive queues
25 * - Interrupts from transmit queues
26 * - 'Other Interrupts', such as the administrative queue
27 *
28 * 'Other Interrupts' are asynchronous events such as a link status change event
29 * being posted to the administrative queue, unrecoverable ECC errors, and more.
30 * If we have something being posted to the administrative queue, then we go
31 * through and process it, because it's generally enabled as a separate logical
32 * interrupt. Note, we may need to do more here eventually. To re-enable the
33 * interrupts from the 'Other Interrupts' section, we need to clear the PBA and
34 * write ENA to PFINT_ICR0.
35 *
36 * Interrupts from the transmit and receive queues indicates that our requests
37 * have been processed. In the rx case, it means that we have data that we
38 * should take a look at and send up the stack. In the tx case, it means that
39 * data which we got from MAC has now been sent out on the wire and we can free
40 * the associated data. Most of the logic for acting upon the presence of this
41 * data can be found in i40e_transciever.c which handles all of the DMA, rx, and
42 * tx operations. This file is dedicated to handling and dealing with interrupt
43 * processing.
44 *
45 * All devices supported by this driver support three kinds of interrupts:
46 *
47 * o Extended Message Signaled Interrupts (MSI-X)
48 * o Message Signaled Interrupts (MSI)
49 * o Legacy PCI interrupts (INTx)
50 *
51 * Generally speaking the hardware logically handles MSI and INTx the same and
52 * restricts us to only using a single interrupt, which isn't the interesting
53 * case. With MSI-X available, each physical function of the device provides the
54 * opportunity for multiple interrupts which is what we'll focus on.
55 *
56 * --------------------
57 * Interrupt Management
58 * --------------------
59 *
60 * By default, the admin queue, which consists of the asynchronous other
61 * interrupts is always bound to MSI-X vector zero. Next, we spread out all of
62 * the other interrupts that we have available to us over the remaining
63 * interrupt vectors.
64 *
65 * This means that there may be multiple queues, both tx and rx, which are
66 * mapped to the same interrupt. When the interrupt fires, we'll have to check
67 * all of them for servicing, before we go through and indicate that the
68 * interrupt is claimed.
69 *
70 * The hardware provides the means of mapping various queues to MSI-X interrupts
71 * by programming the I40E_QINT_RQCTL() and I4OE_QINT_TQCTL() registers. These
72 * registers can also be used to enable and disable whether or not the queue is
73 * a source of interrupts. As part of this, the hardware requires that we
74 * maintain a linked list of queues for each interrupt vector. While it may seem
75 * like this is only there for the purproses of ITRs, that's not the case. The
76 * first queue must be programmed in I40E_QINT_LNKLSTN(%vector) register. Each
77 * queue defines the next one in either the I40E_QINT_RQCTL or I40E_QINT_TQCTL
78 * register.
79 *
80 * Finally, the individual interrupt vector itself has the ability to be enabled
81 * and disabled. The overall interrupt is controlled through the
82 * I40E_PFINT_DYN_CTLN() register. This is used to turn on and off the interrupt
83 * as a whole.
84 *
85 * Note that this means that both the individual queue and the interrupt as a
86 * whole can be toggled and re-enabled.
87 *
88 * -------------------
89 * Non-MSIX Management
90 * -------------------
91 *
92 * We may have a case where the Operating System is unable to actually allocate
93 * any MSI-X to the system. In such a world, there is only one transmit/receive
94 * queue pair and it is bound to the same interrupt with index zero. The
95 * hardware doesn't allow us access to additional interrupt vectors in these
96 * modes. Note that technically we could support more transmit/receive queues if
97 * we wanted.
98 *
99 * In this world, because the interrupts for the admin queue and traffic are
100 * mixed together, we have to consult ICR0 to determine what has occurred. The
101 * QINT_TQCTL and QINT_RQCTL registers have a field, 'MSI-X 0 index' which
102 * allows us to set a specific bit in ICR0. There are up to seven such bits;
103 * however, we only use the bit 0 and 1 for the rx and tx queue respectively.
104 * These are contained by the I40E_INTR_NOTX_{R|T}X_QUEUE and
105 * I40E_INTR_NOTX_{R|T}X_MASK registers respectively.
106 *
107 * Unfortunately, these corresponding queue bits have no corresponding entry in
108 * the ICR0_ENA register. So instead, when enabling interrupts on the queues, we
109 * end up enabling it on the queue registers rather than on the MSI-X registers.
110 * In the MSI-X world, because they can be enabled and disabled, this is
111 * different and the queues can always be enabled and disabled, but the
112 * interrupts themselves are toggled (ignoring the question of interrupt
113 * blanking for polling on rings).
114 *
115 * Finally, we still have to set up the interrupt linked list, but the list is
116 * instead rooted at the register I40E_PFINT_LNKLST0, rather than being tied to
117 * one of the other MSI-X registers.
118 *
119 * --------------------
120 * Interrupt Moderation
121 * --------------------
122 *
123 * The XL710 hardware has three different interrupt moderation registers per
124 * interrupt. Unsurprisingly, we use these for:
125 *
126 * o RX interrupts
127 * o TX interrupts
128 * o 'Other interrupts' (link status change, admin queue, etc.)
129 *
130 * By default, we throttle 'other interrupts' the most, then TX interrupts, and
131 * then RX interrupts. The default values for these were based on trying to
132 * reason about both the importance and frequency of events. Generally speaking
133 * 'other interrupts' are not very frequent and they're not important for the
134 * I/O data path in and of itself (though they may indicate issues with the I/O
135 * data path).
136 *
137 * On the flip side, when we're not polling, RX interrupts are very important.
138 * The longer we wait for them, the more latency that we inject into the system.
139 * However, if we allow interrupts to occur too frequently, we risk a few
140 * problems:
141 *
142 * 1) Abusing system resources. Without proper interrupt blanking and polling,
143 * we can see upwards of 200k-300k interrupts per second on the system.
144 *
145 * 2) Not enough data coalescing to enable polling. In other words, the more
146 * data that we allow to build up, the more likely we'll be able to enable
147 * polling mode and allowing us to better handle bulk data.
148 *
149 * In-between the 'other interrupts' and the TX interrupts we have the
150 * reclamation of TX buffers. This operation is not quite as important as we
151 * generally size the ring large enough that we should be able to reclaim a
152 * substantial amount of the descriptors that we have used per interrupt. So
153 * while it's important that this interrupt occur, we don't necessarily need it
154 * firing as frequently as RX; it doesn't, on its own, induce additional latency
155 * into the system.
156 *
157 * Based on all this we currently assign static ITR values for the system. While
158 * we could move to a dynamic system (the hardware supports that), we'd want to
159 * make sure that we're seeing problems from this that we believe would be
160 * generally helped by the added complexity.
161 *
162 * Based on this, the default values that we have allow for the following
163 * interrupt thresholds:
164 *
165 * o 20k interrupts/s for RX
166 * o 5k interrupts/s for TX
167 * o 2k interupts/s for 'Other Interrupts'
168 */
169
170 #include "i40e_sw.h"
171
172 #define I40E_INTR_NOTX_QUEUE 0
173 #define I40E_INTR_NOTX_INTR 0
174 #define I40E_INTR_NOTX_RX_QUEUE 0
175 #define I40E_INTR_NOTX_RX_MASK (1 << I40E_PFINT_ICR0_QUEUE_0_SHIFT)
176 #define I40E_INTR_NOTX_TX_QUEUE 1
177 #define I40E_INTR_NOTX_TX_MASK (1 << I40E_PFINT_ICR0_QUEUE_1_SHIFT)
178
179 void
180 i40e_intr_set_itr(i40e_t *i40e, i40e_itr_index_t itr, uint_t val)
181 {
182 int i;
183 i40e_hw_t *hw = &i40e->i40e_hw_space;
184
185 VERIFY3U(val, <=, I40E_MAX_ITR);
186 VERIFY3U(itr, <, I40E_ITR_INDEX_NONE);
187
188 /*
189 * No matter the interrupt mode, the ITR for other interrupts is always
190 * on interrupt zero and the same is true if we're not using MSI-X.
191 */
192 if (itr == I40E_ITR_INDEX_OTHER ||
193 i40e->i40e_intr_type != DDI_INTR_TYPE_MSIX) {
194 I40E_WRITE_REG(hw, I40E_PFINT_ITR0(itr), val);
195 return;
196 }
197
198 for (i = 0; i < i40e->i40e_num_trqpairs; i++) {
199 I40E_WRITE_REG(hw, I40E_PFINT_ITRN(itr, i), val);
200 }
201 }
202
203 /*
204 * Re-enable the adminq. Note that the adminq doesn't have a traditional queue
205 * associated with it from an interrupt perspective and just lives on ICR0.
206 * However when MSI-X interrupts are not being used, then this also enables and
207 * disables those interrupts.
208 */
209 static void
210 i40e_intr_adminq_enable(i40e_t *i40e)
211 {
212 i40e_hw_t *hw = &i40e->i40e_hw_space;
213 uint32_t reg;
214
215 reg = I40E_PFINT_DYN_CTL0_INTENA_MASK |
216 I40E_PFINT_DYN_CTL0_CLEARPBA_MASK |
217 (I40E_ITR_INDEX_NONE << I40E_PFINT_DYN_CTL0_ITR_INDX_SHIFT);
218 I40E_WRITE_REG(hw, I40E_PFINT_DYN_CTL0, reg);
219 i40e_flush(hw);
220 }
221
222 static void
223 i40e_intr_adminq_disable(i40e_t *i40e)
224 {
225 i40e_hw_t *hw = &i40e->i40e_hw_space;
226 uint32_t reg;
227
228 reg = I40E_ITR_INDEX_NONE << I40E_PFINT_DYN_CTL0_ITR_INDX_SHIFT;
229 I40E_WRITE_REG(hw, I40E_PFINT_DYN_CTL0, reg);
230 }
231
232 /*
233 * The next two functions enable/disable the reception of interrupts
234 * on the given vector. Only vectors 1..N are programmed by these
235 * functions; vector 0 is special and handled by a different register.
236 * We must subtract one from the vector because i40e implicitly adds
237 * one to the vector value. See section 10.2.2.10.13 for more details.
238 */
239 static void
240 i40e_intr_io_enable(i40e_t *i40e, int vector)
241 {
242 uint32_t reg;
243 i40e_hw_t *hw = &i40e->i40e_hw_space;
244
245 ASSERT3S(vector, >, 0);
246 reg = I40E_PFINT_DYN_CTLN_INTENA_MASK |
247 I40E_PFINT_DYN_CTLN_CLEARPBA_MASK |
248 (I40E_ITR_INDEX_NONE << I40E_PFINT_DYN_CTLN_ITR_INDX_SHIFT);
249 I40E_WRITE_REG(hw, I40E_PFINT_DYN_CTLN(vector - 1), reg);
250 }
251
252 static void
253 i40e_intr_io_disable(i40e_t *i40e, int vector)
254 {
255 uint32_t reg;
256 i40e_hw_t *hw = &i40e->i40e_hw_space;
257
258 ASSERT3S(vector, >, 0);
259 reg = I40E_ITR_INDEX_NONE << I40E_PFINT_DYN_CTLN_ITR_INDX_SHIFT;
260 I40E_WRITE_REG(hw, I40E_PFINT_DYN_CTLN(vector - 1), reg);
261 }
262
263 /*
264 * When MSI-X interrupts are being used, then we can enable the actual
265 * interrupts themselves. However, when they are not, we instead have to turn
266 * towards the queue's CAUSE_ENA bit and enable that.
267 */
268 void
269 i40e_intr_io_enable_all(i40e_t *i40e)
270 {
271 if (i40e->i40e_intr_type == DDI_INTR_TYPE_MSIX) {
272 int i;
273
274 for (i = 1; i < i40e->i40e_intr_count; i++) {
275 i40e_intr_io_enable(i40e, i);
276 }
277 } else {
278 uint32_t reg;
279 i40e_hw_t *hw = &i40e->i40e_hw_space;
280
281 reg = I40E_READ_REG(hw, I40E_QINT_RQCTL(I40E_INTR_NOTX_QUEUE));
282 reg |= I40E_QINT_RQCTL_CAUSE_ENA_MASK;
283 I40E_WRITE_REG(hw, I40E_QINT_RQCTL(I40E_INTR_NOTX_QUEUE), reg);
284
285 reg = I40E_READ_REG(hw, I40E_QINT_TQCTL(I40E_INTR_NOTX_QUEUE));
286 reg |= I40E_QINT_TQCTL_CAUSE_ENA_MASK;
287 I40E_WRITE_REG(hw, I40E_QINT_TQCTL(I40E_INTR_NOTX_QUEUE), reg);
288 }
289 }
290
291 /*
292 * When MSI-X interrupts are being used, then we can disable the actual
293 * interrupts themselves. However, when they are not, we instead have to turn
294 * towards the queue's CAUSE_ENA bit and disable that.
295 */
296 void
297 i40e_intr_io_disable_all(i40e_t *i40e)
298 {
299 if (i40e->i40e_intr_type == DDI_INTR_TYPE_MSIX) {
300 int i;
301
302 for (i = 1; i < i40e->i40e_intr_count; i++) {
303 i40e_intr_io_disable(i40e, i);
304 }
305 } else {
306 uint32_t reg;
307 i40e_hw_t *hw = &i40e->i40e_hw_space;
308
309 reg = I40E_READ_REG(hw, I40E_QINT_RQCTL(I40E_INTR_NOTX_QUEUE));
310 reg &= ~I40E_QINT_RQCTL_CAUSE_ENA_MASK;
311 I40E_WRITE_REG(hw, I40E_QINT_RQCTL(I40E_INTR_NOTX_QUEUE), reg);
312
313 reg = I40E_READ_REG(hw, I40E_QINT_TQCTL(I40E_INTR_NOTX_QUEUE));
314 reg &= ~I40E_QINT_TQCTL_CAUSE_ENA_MASK;
315 I40E_WRITE_REG(hw, I40E_QINT_TQCTL(I40E_INTR_NOTX_QUEUE), reg);
316 }
317 }
318
319 /*
320 * As part of disabling the tx and rx queue's we're technically supposed to
321 * remove the linked list entries. The simplest way is to clear the LNKLSTN
322 * register by setting it to I40E_QUEUE_TYPE_EOL (0x7FF).
323 *
324 * Note all of the FM register access checks are performed by the caller.
325 */
326 void
327 i40e_intr_io_clear_cause(i40e_t *i40e)
328 {
329 int i;
330 i40e_hw_t *hw = &i40e->i40e_hw_space;
331
332 if (i40e->i40e_intr_type != DDI_INTR_TYPE_MSIX) {
333 uint32_t reg;
334 reg = I40E_QUEUE_TYPE_EOL;
335 I40E_WRITE_REG(hw, I40E_PFINT_LNKLST0, reg);
336 return;
337 }
338
339 for (i = 0; i < i40e->i40e_num_trqpairs; i++) {
340 uint32_t reg;
341 #ifdef DEBUG
342 /*
343 * Verify that the interrupt in question is disabled. This is a
344 * prerequisite of modifying the data in question.
345 */
346 reg = I40E_READ_REG(hw, I40E_PFINT_DYN_CTLN(i));
347 VERIFY0(reg & I40E_PFINT_DYN_CTLN_INTENA_MASK);
348 #endif
349 reg = I40E_QUEUE_TYPE_EOL;
350 I40E_WRITE_REG(hw, I40E_PFINT_LNKLSTN(i), reg);
351 }
352
353 i40e_flush(hw);
354 }
355
356 /*
357 * Finalize interrupt handling. Mostly this disables the admin queue.
358 */
359 void
360 i40e_intr_chip_fini(i40e_t *i40e)
361 {
362 #ifdef DEBUG
363 int i;
364 uint32_t reg;
365
366 i40e_hw_t *hw = &i40e->i40e_hw_space;
367
368 /*
369 * Take a look and verify that all other interrupts have been disabled
370 * and the interrupt linked lists have been zeroed.
371 */
372 if (i40e->i40e_intr_type == DDI_INTR_TYPE_MSIX) {
373 for (i = 0; i < i40e->i40e_num_trqpairs; i++) {
374 reg = I40E_READ_REG(hw, I40E_PFINT_DYN_CTLN(i));
375 VERIFY0(reg & I40E_PFINT_DYN_CTLN_INTENA_MASK);
376
377 reg = I40E_READ_REG(hw, I40E_PFINT_LNKLSTN(i));
378 VERIFY3U(reg, ==, I40E_QUEUE_TYPE_EOL);
379 }
380 }
381 #endif
382
383 i40e_intr_adminq_disable(i40e);
384 }
385
386 /*
387 * Set the head of the interrupt linked list. The PFINT_LNKLSTN[N]
388 * register actually refers to the 'N + 1' interrupt vector. E.g.,
389 * PFINT_LNKLSTN[0] refers to interrupt vector 1.
390 */
391 static void
392 i40e_set_lnklstn(i40e_t *i40e, uint_t vector, uint_t queue)
393 {
394 uint32_t reg;
395 i40e_hw_t *hw = &i40e->i40e_hw_space;
396
397 reg = (queue << I40E_PFINT_LNKLSTN_FIRSTQ_INDX_SHIFT) |
398 (I40E_QUEUE_TYPE_RX << I40E_PFINT_LNKLSTN_FIRSTQ_TYPE_SHIFT);
399
400 I40E_WRITE_REG(hw, I40E_PFINT_LNKLSTN(vector), reg);
401 DEBUGOUT2("PFINT_LNKLSTN[%u] = 0x%x", vector, reg);
402 }
403
404 /*
405 * Set the QINT_RQCTL[queue] register. The next queue is always the Tx
406 * queue associated with this Rx queue. Unlike PFINT_LNKLSTN, the
407 * vector should be the actual vector this queue is on -- i.e., it
408 * should be equal to itrq_rx_intrvec.
409 */
410 static void
411 i40e_set_rqctl(i40e_t *i40e, uint_t vector, uint_t queue)
412 {
413 uint32_t reg;
414 i40e_hw_t *hw = &i40e->i40e_hw_space;
415
416 ASSERT3U(vector, ==, i40e->i40e_trqpairs[queue].itrq_rx_intrvec);
417
418 reg = (vector << I40E_QINT_RQCTL_MSIX_INDX_SHIFT) |
419 (I40E_ITR_INDEX_RX << I40E_QINT_RQCTL_ITR_INDX_SHIFT) |
420 (queue << I40E_QINT_RQCTL_NEXTQ_INDX_SHIFT) |
421 (I40E_QUEUE_TYPE_TX << I40E_QINT_RQCTL_NEXTQ_TYPE_SHIFT) |
422 I40E_QINT_RQCTL_CAUSE_ENA_MASK;
423
424 I40E_WRITE_REG(hw, I40E_QINT_RQCTL(queue), reg);
425 DEBUGOUT2("QINT_RQCTL[%u] = 0x%x", queue, reg);
426 }
427
428 /*
429 * Like i40e_set_rqctl(), but for QINT_TQCTL[queue]. The next queue is
430 * either the Rx queue of another TRQP, or EOL.
431 */
432 static void
433 i40e_set_tqctl(i40e_t *i40e, uint_t vector, uint_t queue, uint_t next_queue)
434 {
435 uint32_t reg;
436 i40e_hw_t *hw = &i40e->i40e_hw_space;
437
438 ASSERT3U(vector, ==, i40e->i40e_trqpairs[queue].itrq_tx_intrvec);
439
440 reg = (vector << I40E_QINT_TQCTL_MSIX_INDX_SHIFT) |
441 (I40E_ITR_INDEX_TX << I40E_QINT_TQCTL_ITR_INDX_SHIFT) |
442 (next_queue << I40E_QINT_TQCTL_NEXTQ_INDX_SHIFT) |
443 (I40E_QUEUE_TYPE_RX << I40E_QINT_TQCTL_NEXTQ_TYPE_SHIFT) |
444 I40E_QINT_TQCTL_CAUSE_ENA_MASK;
445
446 I40E_WRITE_REG(hw, I40E_QINT_TQCTL(queue), reg);
447 DEBUGOUT2("QINT_TQCTL[%u] = 0x%x", queue, reg);
448 }
449
450 /*
451 * Program the interrupt linked list. Each vector has a linked list of
452 * queues which act as event sources for that vector. When one of
453 * those sources has an event the associated interrupt vector is
454 * fired. This mapping must match the mapping found in
455 * i40e_map_intrs_to_vectors().
456 *
457 * See section 7.5.3 for more information about the configuration of
458 * the interrupt linked list.
459 */
460 static void
461 i40e_intr_init_queue_msix(i40e_t *i40e)
462 {
463 uint_t intr_count;
464
465 /*
466 * The 0th vector is for 'Other Interrupts' only (subject to
467 * change in the future).
468 */
469 intr_count = i40e->i40e_intr_count - 1;
470
471 for (uint_t vec = 0; vec < intr_count; vec++) {
472 boolean_t head = B_TRUE;
473
474 for (uint_t qidx = vec; qidx < i40e->i40e_num_trqpairs;
475 qidx += intr_count) {
476 uint_t next_qidx = qidx + intr_count;
477
478 next_qidx = (next_qidx > i40e->i40e_num_trqpairs) ?
479 I40E_QUEUE_TYPE_EOL : next_qidx;
480
481 if (head) {
482 i40e_set_lnklstn(i40e, vec, qidx);
483 head = B_FALSE;
484 }
485
486 i40e_set_rqctl(i40e, vec + 1, qidx);
487 i40e_set_tqctl(i40e, vec + 1, qidx, next_qidx);
488 }
489 }
490 }
491
492 /*
493 * Set up a single queue to share the admin queue interrupt in the non-MSI-X
494 * world. Note we do not enable the queue as an interrupt cause at this time. We
495 * don't have any other vector of control here, unlike with the MSI-X interrupt
496 * case.
497 */
498 static void
499 i40e_intr_init_queue_shared(i40e_t *i40e)
500 {
501 i40e_hw_t *hw = &i40e->i40e_hw_space;
502 uint32_t reg;
503
504 VERIFY(i40e->i40e_intr_type == DDI_INTR_TYPE_FIXED ||
505 i40e->i40e_intr_type == DDI_INTR_TYPE_MSI);
506
507 reg = (I40E_INTR_NOTX_QUEUE << I40E_PFINT_LNKLST0_FIRSTQ_INDX_SHIFT) |
508 (I40E_QUEUE_TYPE_RX << I40E_PFINT_LNKLSTN_FIRSTQ_TYPE_SHIFT);
509 I40E_WRITE_REG(hw, I40E_PFINT_LNKLST0, reg);
510
511 reg = (I40E_INTR_NOTX_INTR << I40E_QINT_RQCTL_MSIX_INDX_SHIFT) |
512 (I40E_ITR_INDEX_RX << I40E_QINT_RQCTL_ITR_INDX_SHIFT) |
513 (I40E_INTR_NOTX_RX_QUEUE << I40E_QINT_RQCTL_MSIX0_INDX_SHIFT) |
514 (I40E_INTR_NOTX_QUEUE << I40E_QINT_RQCTL_NEXTQ_INDX_SHIFT) |
515 (I40E_QUEUE_TYPE_TX << I40E_QINT_RQCTL_NEXTQ_TYPE_SHIFT);
516
517 I40E_WRITE_REG(hw, I40E_QINT_RQCTL(I40E_INTR_NOTX_QUEUE), reg);
518
519 reg = (I40E_INTR_NOTX_INTR << I40E_QINT_TQCTL_MSIX_INDX_SHIFT) |
520 (I40E_ITR_INDEX_TX << I40E_QINT_TQCTL_ITR_INDX_SHIFT) |
521 (I40E_INTR_NOTX_TX_QUEUE << I40E_QINT_TQCTL_MSIX0_INDX_SHIFT) |
522 (I40E_QUEUE_TYPE_EOL << I40E_QINT_TQCTL_NEXTQ_INDX_SHIFT) |
523 (I40E_QUEUE_TYPE_RX << I40E_QINT_TQCTL_NEXTQ_TYPE_SHIFT);
524
525 I40E_WRITE_REG(hw, I40E_QINT_TQCTL(I40E_INTR_NOTX_QUEUE), reg);
526 }
527
528 /*
529 * Enable the specified queue as a valid source of interrupts. Note, this should
530 * only be used as part of the GLDv3's interrupt blanking routines. The debug
531 * build assertions are specific to that.
532 */
533 void
534 i40e_intr_rx_queue_enable(i40e_trqpair_t *itrq)
535 {
536 uint32_t reg;
537 uint_t queue = itrq->itrq_index;
538 i40e_hw_t *hw = &itrq->itrq_i40e->i40e_hw_space;
539
540 ASSERT(MUTEX_HELD(&itrq->itrq_rx_lock));
541 ASSERT(queue < itrq->itrq_i40e->i40e_num_trqpairs);
542
543 reg = I40E_READ_REG(hw, I40E_QINT_RQCTL(queue));
544 ASSERT0(reg & I40E_QINT_RQCTL_CAUSE_ENA_MASK);
545 reg |= I40E_QINT_RQCTL_CAUSE_ENA_MASK;
546 I40E_WRITE_REG(hw, I40E_QINT_RQCTL(queue), reg);
547 }
548
549 /*
550 * Disable the specified queue as a valid source of interrupts. Note, this
551 * should only be used as part of the GLDv3's interrupt blanking routines. The
552 * debug build assertions are specific to that.
553 */
554 void
555 i40e_intr_rx_queue_disable(i40e_trqpair_t *itrq)
556 {
557 uint32_t reg;
558 uint_t queue = itrq->itrq_index;
559 i40e_hw_t *hw = &itrq->itrq_i40e->i40e_hw_space;
560
561 ASSERT(MUTEX_HELD(&itrq->itrq_rx_lock));
562 ASSERT(queue < itrq->itrq_i40e->i40e_num_trqpairs);
563
564 reg = I40E_READ_REG(hw, I40E_QINT_RQCTL(queue));
565 ASSERT3U(reg & I40E_QINT_RQCTL_CAUSE_ENA_MASK, ==,
566 I40E_QINT_RQCTL_CAUSE_ENA_MASK);
567 reg &= ~I40E_QINT_RQCTL_CAUSE_ENA_MASK;
568 I40E_WRITE_REG(hw, I40E_QINT_RQCTL(queue), reg);
569 }
570
571 /*
572 * Start up the various chip's interrupt handling. We not only configure the
573 * adminq here, but we also go through and configure all of the actual queues,
574 * the interrupt linked lists, and others.
575 */
576 void
577 i40e_intr_chip_init(i40e_t *i40e)
578 {
579 i40e_hw_t *hw = &i40e->i40e_hw_space;
580 uint32_t reg;
581
582 /*
583 * Ensure that all non adminq interrupts are disabled at the chip level.
584 */
585 i40e_intr_io_disable_all(i40e);
586
587 I40E_WRITE_REG(hw, I40E_PFINT_ICR0_ENA, 0);
588 (void) I40E_READ_REG(hw, I40E_PFINT_ICR0);
589
590 /*
591 * Always enable all of the other-class interrupts to be on their own
592 * ITR. This only needs to be set on interrupt zero, which has its own
593 * special setting.
594 */
595 reg = I40E_ITR_INDEX_OTHER << I40E_PFINT_STAT_CTL0_OTHER_ITR_INDX_SHIFT;
596 I40E_WRITE_REG(hw, I40E_PFINT_STAT_CTL0, reg);
597
598 /*
599 * Enable interrupt types we expect to receive. At the moment, this
600 * is limited to the adminq; however, we'll want to review 11.2.2.9.22
601 * for more types here as we add support for detecting them, handling
602 * them, and resetting the device as appropriate.
603 */
604 reg = I40E_PFINT_ICR0_ENA_ADMINQ_MASK;
605 I40E_WRITE_REG(hw, I40E_PFINT_ICR0_ENA, reg);
606
607 /*
608 * Always set the interrupt linked list to empty. We'll come back and
609 * change this if MSI-X are actually on the scene.
610 */
611 I40E_WRITE_REG(hw, I40E_PFINT_LNKLST0, I40E_QUEUE_TYPE_EOL);
612
613 i40e_intr_adminq_enable(i40e);
614
615 /*
616 * Set up all of the queues and map them to interrupts based on the bit
617 * assignments.
618 */
619 if (i40e->i40e_intr_type == DDI_INTR_TYPE_MSIX) {
620 i40e_intr_init_queue_msix(i40e);
621 } else {
622 i40e_intr_init_queue_shared(i40e);
623 }
624
625 /*
626 * Finally set all of the default ITRs for the interrupts. Note that the
627 * queues will have been set up above.
628 */
629 i40e_intr_set_itr(i40e, I40E_ITR_INDEX_RX, i40e->i40e_rx_itr);
630 i40e_intr_set_itr(i40e, I40E_ITR_INDEX_TX, i40e->i40e_tx_itr);
631 i40e_intr_set_itr(i40e, I40E_ITR_INDEX_OTHER, i40e->i40e_other_itr);
632 }
633
634 static void
635 i40e_intr_adminq_work(i40e_t *i40e)
636 {
637 struct i40e_hw *hw = &i40e->i40e_hw_space;
638 struct i40e_arq_event_info evt;
639 uint16_t remain = 1;
640
641 bzero(&evt, sizeof (struct i40e_arq_event_info));
642 evt.buf_len = I40E_ADMINQ_BUFSZ;
643 evt.msg_buf = i40e->i40e_aqbuf;
644
645 while (remain != 0) {
646 enum i40e_status_code ret;
647 uint16_t opcode;
648
649 /*
650 * At the moment, the only error code that seems to be returned
651 * is one saying that there's no work. In such a case we leave
652 * this be.
653 */
654 ret = i40e_clean_arq_element(hw, &evt, &remain);
655 if (ret != I40E_SUCCESS)
656 break;
657
658 opcode = LE_16(evt.desc.opcode);
659 switch (opcode) {
660 /*
661 * Disable link checks for NEX-6977. With the fibers unplugged
662 * we can end up receiving too many link check interrupts,
663 * saturating one CPU for each link. This can cause system hangs
664 * at boot or shutdown when the system is running single-threaded.
665 *
666 * case i40e_aqc_opc_get_link_status:
667 * mutex_enter(&i40e->i40e_general_lock);
668 * i40e_link_check(i40e);
669 * mutex_exit(&i40e->i40e_general_lock);
670 * break;
671 */
672 default:
673 /*
674 * Longer term we'll want to enable other causes here
675 * and get these cleaned up and doing something.
676 */
677 break;
678 }
679 }
680 }
681
682 static void
683 i40e_intr_rx_work(i40e_t *i40e, i40e_trqpair_t *itrq)
684 {
685 mblk_t *mp = NULL;
686
687 mutex_enter(&itrq->itrq_rx_lock);
688 if (!itrq->itrq_intr_poll)
689 mp = i40e_ring_rx(itrq, I40E_POLL_NULL);
690 mutex_exit(&itrq->itrq_rx_lock);
691
692 if (mp == NULL)
693 return;
694
695 mac_rx_ring(i40e->i40e_mac_hdl, itrq->itrq_macrxring, mp,
696 itrq->itrq_rxgen);
697 }
698
699 /* ARGSUSED */
700 static void
701 i40e_intr_tx_work(i40e_t *i40e, i40e_trqpair_t *itrq)
702 {
703 i40e_tx_recycle_ring(itrq);
704 }
705
706 /*
707 * At the moment, the only 'other' interrupt on ICR0 that we handle is the
708 * adminq. We should go through and support the other notifications at some
709 * point.
710 */
711 static void
712 i40e_intr_other_work(i40e_t *i40e)
713 {
714 struct i40e_hw *hw = &i40e->i40e_hw_space;
715 uint32_t reg;
716
717 reg = I40E_READ_REG(hw, I40E_PFINT_ICR0);
718 if (i40e_check_acc_handle(i40e->i40e_osdep_space.ios_reg_handle) !=
719 DDI_FM_OK) {
720 ddi_fm_service_impact(i40e->i40e_dip, DDI_SERVICE_DEGRADED);
721 atomic_or_32(&i40e->i40e_state, I40E_ERROR);
722 return;
723 }
724
725 if (reg & I40E_PFINT_ICR0_ADMINQ_MASK)
726 i40e_intr_adminq_work(i40e);
727
728 /*
729 * Make sure that the adminq interrupt is not masked and then explicitly
730 * enable the adminq and thus the other interrupt.
731 */
732 reg = I40E_READ_REG(hw, I40E_PFINT_ICR0_ENA);
733 reg |= I40E_PFINT_ICR0_ENA_ADMINQ_MASK;
734 I40E_WRITE_REG(hw, I40E_PFINT_ICR0_ENA, reg);
735
736 i40e_intr_adminq_enable(i40e);
737 }
738
739 /*
740 * Handle an MSI-X interrupt. See section 7.5.1.3 for an overview of
741 * the MSI-X interrupt sequence.
742 */
743 uint_t
744 i40e_intr_msix(void *arg1, void *arg2)
745 {
746 i40e_t *i40e = (i40e_t *)arg1;
747 uint_t vector_idx = (uint_t)(uintptr_t)arg2;
748
749 ASSERT3U(vector_idx, <, i40e->i40e_intr_count);
750
751 /*
752 * When using MSI-X interrupts, vector 0 is always reserved for the
753 * adminq at this time. Though longer term, we'll want to also bridge
754 * some I/O to them.
755 */
756 if (vector_idx == 0) {
757 i40e_intr_other_work(i40e);
758 return (DDI_INTR_CLAIMED);
759 }
760
761 ASSERT3U(vector_idx, >, 0);
762
763 /*
764 * We determine the queue indexes via simple arithmetic (as
765 * opposed to keeping explicit state like a bitmap). While
766 * conveinent, it does mean that i40e_map_intrs_to_vectors(),
767 * i40e_intr_init_queue_msix(), and this function must be
768 * modified as a unit.
769 *
770 * We subtract 1 from the vector to offset the addition we
771 * performed during i40e_map_intrs_to_vectors().
772 */
773 for (uint_t i = vector_idx - 1; i < i40e->i40e_num_trqpairs;
774 i += (i40e->i40e_intr_count - 1)) {
775 i40e_trqpair_t *itrq = &i40e->i40e_trqpairs[i];
776
777 ASSERT3U(i, <, i40e->i40e_num_trqpairs);
778 ASSERT3P(itrq, !=, NULL);
779 i40e_intr_rx_work(i40e, itrq);
780 i40e_intr_tx_work(i40e, itrq);
781 }
782
783 i40e_intr_io_enable(i40e, vector_idx);
784 return (DDI_INTR_CLAIMED);
785 }
786
787 static uint_t
788 i40e_intr_notx(i40e_t *i40e, boolean_t shared)
789 {
790 i40e_hw_t *hw = &i40e->i40e_hw_space;
791 uint32_t reg;
792 i40e_trqpair_t *itrq = &i40e->i40e_trqpairs[0];
793 int ret = DDI_INTR_CLAIMED;
794
795 if (shared == B_TRUE) {
796 mutex_enter(&i40e->i40e_general_lock);
797 if (i40e->i40e_state & I40E_SUSPENDED) {
798 mutex_exit(&i40e->i40e_general_lock);
799 return (DDI_INTR_UNCLAIMED);
800 }
801 mutex_exit(&i40e->i40e_general_lock);
802 }
803
804 reg = I40E_READ_REG(hw, I40E_PFINT_ICR0);
805 if (i40e_check_acc_handle(i40e->i40e_osdep_space.ios_reg_handle) !=
806 DDI_FM_OK) {
807 ddi_fm_service_impact(i40e->i40e_dip, DDI_SERVICE_DEGRADED);
808 atomic_or_32(&i40e->i40e_state, I40E_ERROR);
809 return (DDI_INTR_CLAIMED);
810 }
811
812 if (reg == 0) {
813 if (shared == B_TRUE)
814 ret = DDI_INTR_UNCLAIMED;
815 goto done;
816 }
817
818 if (reg & I40E_PFINT_ICR0_ADMINQ_MASK)
819 i40e_intr_adminq_work(i40e);
820
821 if (reg & I40E_INTR_NOTX_RX_MASK)
822 i40e_intr_rx_work(i40e, itrq);
823
824 if (reg & I40E_INTR_NOTX_TX_MASK)
825 i40e_intr_tx_work(i40e, itrq);
826
827 done:
828 i40e_intr_adminq_enable(i40e);
829 return (ret);
830
831 }
832
833 /* ARGSUSED */
834 uint_t
835 i40e_intr_msi(void *arg1, void *arg2)
836 {
837 i40e_t *i40e = (i40e_t *)arg1;
838
839 return (i40e_intr_notx(i40e, B_FALSE));
840 }
841
842 /* ARGSUSED */
843 uint_t
844 i40e_intr_legacy(void *arg1, void *arg2)
845 {
846 i40e_t *i40e = (i40e_t *)arg1;
847
848 return (i40e_intr_notx(i40e, B_TRUE));
849 }