Print this page
MFV: illumos-joyent@61dc3dec4f82a3e13e94609a0a83d5f66c64e760
OS-6846 want i40e multi-group support
OS-7372 i40e_alloc_ring_mem() unwinds when it shouldn't
Reviewed by: Robert Mustacchi <rm@joyent.com>
Approved by: Robert Mustacchi <rm@joyent.com>
Author: Ryan Zezeski <rpz@joyent.com>
NEX-13226 xvv710 25Gb NIC panics system under load
Reviewed by: Yuri Pankov <yuri.pankov@nexenta.com>
Reviewed by: Evan Layton <evan.layton@nexenta.com>
NEX-7822 40Gb Intel XL710 NIC performance data
Reviewed by: Steve Peng <steve.peng@nexenta.com>
Reviewed by: Evan Layton <evan.layton@nexenta.com>
NEX-6977 Ericsson hangs on reboot with Intel XL710 NICs
Reviewed by: Rick McNeal <rick.mcneal@nexenta.com>
Reviewed by: Evan Layton <evan.layton@nexenta.com>
Reviewed by: Sanjay Nadkarni <sanjay.nadkarni@nexenta.com>
   1 /*
   2  * This file and its contents are supplied under the terms of the
   3  * Common Development and Distribution License ("CDDL"), version 1.0.
   4  * You may only use this file in accordance with the terms of version
   5  * 1.0 of the CDDL.
   6  *
   7  * A full copy of the text of the CDDL should have accompanied this
   8  * source.  A copy of the CDDL is also available via the Internet at
   9  * http://www.illumos.org/license/CDDL.
  10  */
  11 
  12 /*
  13  * Copyright (c) 2017, Joyent, Inc.
  14  * Copyright 2017 Tegile Systems, Inc.  All rights reserved.
  15  */
  16 
  17 /*
  18  * -------------------------
  19  * Interrupt Handling Theory
  20  * -------------------------
  21  *
  22  * There are a couple different sets of interrupts that we need to worry about:
  23  *
  24  *   - Interrupts from receive queues
  25  *   - Interrupts from transmit queues
  26  *   - 'Other Interrupts', such as the administrative queue
  27  *
  28  * 'Other Interrupts' are asynchronous events such as a link status change event
  29  * being posted to the administrative queue, unrecoverable ECC errors, and more.
  30  * If we have something being posted to the administrative queue, then we go
  31  * through and process it, because it's generally enabled as a separate logical
  32  * interrupt. Note, we may need to do more here eventually. To re-enable the
  33  * interrupts from the 'Other Interrupts' section, we need to clear the PBA and


 212         i40e_hw_t *hw = &i40e->i40e_hw_space;
 213         uint32_t reg;
 214 
 215         reg = I40E_PFINT_DYN_CTL0_INTENA_MASK |
 216             I40E_PFINT_DYN_CTL0_CLEARPBA_MASK |
 217             (I40E_ITR_INDEX_NONE << I40E_PFINT_DYN_CTL0_ITR_INDX_SHIFT);
 218         I40E_WRITE_REG(hw, I40E_PFINT_DYN_CTL0, reg);
 219         i40e_flush(hw);
 220 }
 221 
 222 static void
 223 i40e_intr_adminq_disable(i40e_t *i40e)
 224 {
 225         i40e_hw_t *hw = &i40e->i40e_hw_space;
 226         uint32_t reg;
 227 
 228         reg = I40E_ITR_INDEX_NONE << I40E_PFINT_DYN_CTL0_ITR_INDX_SHIFT;
 229         I40E_WRITE_REG(hw, I40E_PFINT_DYN_CTL0, reg);
 230 }
 231 







 232 static void
 233 i40e_intr_io_enable(i40e_t *i40e, int vector)
 234 {
 235         uint32_t reg;
 236         i40e_hw_t *hw = &i40e->i40e_hw_space;
 237 

 238         reg = I40E_PFINT_DYN_CTLN_INTENA_MASK |
 239             I40E_PFINT_DYN_CTLN_CLEARPBA_MASK |
 240             (I40E_ITR_INDEX_NONE << I40E_PFINT_DYN_CTLN_ITR_INDX_SHIFT);
 241         I40E_WRITE_REG(hw, I40E_PFINT_DYN_CTLN(vector - 1), reg);
 242 }
 243 
 244 static void
 245 i40e_intr_io_disable(i40e_t *i40e, int vector)
 246 {
 247         uint32_t reg;
 248         i40e_hw_t *hw = &i40e->i40e_hw_space;
 249 

 250         reg = I40E_ITR_INDEX_NONE << I40E_PFINT_DYN_CTLN_ITR_INDX_SHIFT;
 251         I40E_WRITE_REG(hw, I40E_PFINT_DYN_CTLN(vector - 1), reg);
 252 }
 253 
 254 /*
 255  * When MSI-X interrupts are being used, then we can enable the actual
 256  * interrupts themselves. However, when they are not, we instead have to turn
 257  * towards the queue's CAUSE_ENA bit and enable that.
 258  */
 259 void
 260 i40e_intr_io_enable_all(i40e_t *i40e)
 261 {
 262         if (i40e->i40e_intr_type == DDI_INTR_TYPE_MSIX) {
 263                 int i;
 264 
 265                 for (i = 1; i < i40e->i40e_intr_count; i++) {
 266                         i40e_intr_io_enable(i40e, i);
 267                 }
 268         } else {
 269                 uint32_t reg;


 358 
 359         /*
 360          * Take a look and verify that all other interrupts have been disabled
 361          * and the interrupt linked lists have been zeroed.
 362          */
 363         if (i40e->i40e_intr_type == DDI_INTR_TYPE_MSIX) {
 364                 for (i = 0; i < i40e->i40e_num_trqpairs; i++) {
 365                         reg = I40E_READ_REG(hw, I40E_PFINT_DYN_CTLN(i));
 366                         VERIFY0(reg & I40E_PFINT_DYN_CTLN_INTENA_MASK);
 367 
 368                         reg = I40E_READ_REG(hw, I40E_PFINT_LNKLSTN(i));
 369                         VERIFY3U(reg, ==, I40E_QUEUE_TYPE_EOL);
 370                 }
 371         }
 372 #endif
 373 
 374         i40e_intr_adminq_disable(i40e);
 375 }
 376 
 377 /*
 378  * Enable all of the queues and set the corresponding LNKLSTN registers. Note
 379  * that we always enable queues as interrupt sources, even though we don't
 380  * enable the MSI-X interrupt vectors.
 381  */
 382 static void
 383 i40e_intr_init_queue_msix(i40e_t *i40e)
 384 {
 385         i40e_hw_t *hw = &i40e->i40e_hw_space;
 386         uint32_t reg;
 387         int i;
 388 
 389         /*
 390          * Map queues to MSI-X interrupts. Queue i is mapped to vector i + 1.
 391          * Note that we skip the ITR logic for the moment, just to make our
 392          * lives as explicit and simple as possible.








 393          */
 394         for (i = 0; i < i40e->i40e_num_trqpairs; i++) {
 395                 i40e_trqpair_t *itrq = &i40e->i40e_trqpairs[i];



 396 
 397                 reg = (i << I40E_PFINT_LNKLSTN_FIRSTQ_INDX_SHIFT) |
 398                     (I40E_QUEUE_TYPE_RX <<
 399                     I40E_PFINT_LNKLSTN_FIRSTQ_TYPE_SHIFT);
 400                 I40E_WRITE_REG(hw, I40E_PFINT_LNKLSTN(i), reg);
 401 
 402                 reg =
 403                     (itrq->itrq_rx_intrvec << I40E_QINT_RQCTL_MSIX_INDX_SHIFT) |
 404                     (I40E_ITR_INDEX_RX << I40E_QINT_RQCTL_ITR_INDX_SHIFT) |
 405                     (i << I40E_QINT_RQCTL_NEXTQ_INDX_SHIFT) |
 406                     (I40E_QUEUE_TYPE_TX << I40E_QINT_RQCTL_NEXTQ_TYPE_SHIFT) |
 407                     I40E_QINT_RQCTL_CAUSE_ENA_MASK;
 408 
 409                 I40E_WRITE_REG(hw, I40E_QINT_RQCTL(i), reg);


 410 
 411                 reg =
 412                     (itrq->itrq_tx_intrvec << I40E_QINT_TQCTL_MSIX_INDX_SHIFT) |
 413                     (I40E_ITR_INDEX_TX << I40E_QINT_RQCTL_ITR_INDX_SHIFT) |
 414                     (I40E_QUEUE_TYPE_EOL << I40E_QINT_TQCTL_NEXTQ_INDX_SHIFT) |











 415                     (I40E_QUEUE_TYPE_RX << I40E_QINT_TQCTL_NEXTQ_TYPE_SHIFT) |
 416                     I40E_QINT_TQCTL_CAUSE_ENA_MASK;
 417 
 418                 I40E_WRITE_REG(hw, I40E_QINT_TQCTL(i), reg);





































 419         }
 420 




 421 }
 422 
 423 /*
 424  * Set up a single queue to share the admin queue interrupt in the non-MSI-X
 425  * world. Note we do not enable the queue as an interrupt cause at this time. We
 426  * don't have any other vector of control here, unlike with the MSI-X interrupt
 427  * case.
 428  */
 429 static void
 430 i40e_intr_init_queue_shared(i40e_t *i40e)
 431 {
 432         i40e_hw_t *hw = &i40e->i40e_hw_space;
 433         uint32_t reg;
 434 
 435         VERIFY(i40e->i40e_intr_type == DDI_INTR_TYPE_FIXED ||
 436             i40e->i40e_intr_type == DDI_INTR_TYPE_MSI);
 437 
 438         reg = (I40E_INTR_NOTX_QUEUE << I40E_PFINT_LNKLST0_FIRSTQ_INDX_SHIFT) |
 439             (I40E_QUEUE_TYPE_RX << I40E_PFINT_LNKLSTN_FIRSTQ_TYPE_SHIFT);
 440         I40E_WRITE_REG(hw, I40E_PFINT_LNKLST0, reg);


 571 
 572         bzero(&evt, sizeof (struct i40e_arq_event_info));
 573         evt.buf_len = I40E_ADMINQ_BUFSZ;
 574         evt.msg_buf = i40e->i40e_aqbuf;
 575 
 576         while (remain != 0) {
 577                 enum i40e_status_code ret;
 578                 uint16_t opcode;
 579 
 580                 /*
 581                  * At the moment, the only error code that seems to be returned
 582                  * is one saying that there's no work. In such a case we leave
 583                  * this be.
 584                  */
 585                 ret = i40e_clean_arq_element(hw, &evt, &remain);
 586                 if (ret != I40E_SUCCESS)
 587                         break;
 588 
 589                 opcode = LE_16(evt.desc.opcode);
 590                 switch (opcode) {
 591                 case i40e_aqc_opc_get_link_status:
 592                         mutex_enter(&i40e->i40e_general_lock);
 593                         i40e_link_check(i40e);
 594                         mutex_exit(&i40e->i40e_general_lock);
 595                         break;







 596                 default:
 597                         /*
 598                          * Longer term we'll want to enable other causes here
 599                          * and get these cleaned up and doing something.
 600                          */
 601                         break;
 602                 }
 603         }
 604 }
 605 
 606 static void
 607 i40e_intr_rx_work(i40e_t *i40e, int queue)
 608 {
 609         mblk_t *mp = NULL;
 610         i40e_trqpair_t *itrq;
 611 
 612         ASSERT(queue < i40e->i40e_num_trqpairs);
 613         itrq = &i40e->i40e_trqpairs[queue];
 614 
 615         mutex_enter(&itrq->itrq_rx_lock);
 616         if (!itrq->itrq_intr_poll)
 617                 mp = i40e_ring_rx(itrq, I40E_POLL_NULL);
 618         mutex_exit(&itrq->itrq_rx_lock);
 619 
 620         if (mp != NULL) {


 621                 mac_rx_ring(i40e->i40e_mac_hdl, itrq->itrq_macrxring, mp,
 622                     itrq->itrq_rxgen);
 623         }
 624 }
 625 

 626 static void
 627 i40e_intr_tx_work(i40e_t *i40e, int queue)
 628 {
 629         i40e_trqpair_t *itrq;
 630 
 631         itrq = &i40e->i40e_trqpairs[queue];
 632         i40e_tx_recycle_ring(itrq);
 633 }
 634 
 635 /*
 636  * At the moment, the only 'other' interrupt on ICR0 that we handle is the
 637  * adminq. We should go through and support the other notifications at some
 638  * point.
 639  */
 640 static void
 641 i40e_intr_other_work(i40e_t *i40e)
 642 {
 643         struct i40e_hw *hw = &i40e->i40e_hw_space;
 644         uint32_t reg;
 645 
 646         reg = I40E_READ_REG(hw, I40E_PFINT_ICR0);
 647         if (i40e_check_acc_handle(i40e->i40e_osdep_space.ios_reg_handle) !=
 648             DDI_FM_OK) {
 649                 ddi_fm_service_impact(i40e->i40e_dip, DDI_SERVICE_DEGRADED);
 650                 atomic_or_32(&i40e->i40e_state, I40E_ERROR);
 651                 return;
 652         }
 653 
 654         if (reg & I40E_PFINT_ICR0_ADMINQ_MASK)
 655                 i40e_intr_adminq_work(i40e);
 656 
 657         /*
 658          * Make sure that the adminq interrupt is not masked and then explicitly
 659          * enable the adminq and thus the other interrupt.
 660          */
 661         reg = I40E_READ_REG(hw, I40E_PFINT_ICR0_ENA);
 662         reg |= I40E_PFINT_ICR0_ENA_ADMINQ_MASK;
 663         I40E_WRITE_REG(hw, I40E_PFINT_ICR0_ENA, reg);
 664 
 665         i40e_intr_adminq_enable(i40e);
 666 }
 667 




 668 uint_t
 669 i40e_intr_msix(void *arg1, void *arg2)
 670 {
 671         i40e_t *i40e = (i40e_t *)arg1;
 672         int vector_idx = (int)(uintptr_t)arg2;
 673 


 674         /*
 675          * When using MSI-X interrupts, vector 0 is always reserved for the
 676          * adminq at this time. Though longer term, we'll want to also bridge
 677          * some I/O to them.
 678          */
 679         if (vector_idx == 0) {
 680                 i40e_intr_other_work(i40e);
 681                 return (DDI_INTR_CLAIMED);
 682         }
 683 
 684         i40e_intr_rx_work(i40e, vector_idx - 1);
 685         i40e_intr_tx_work(i40e, vector_idx - 1);
 686         i40e_intr_io_enable(i40e, vector_idx);
 687 





















 688         return (DDI_INTR_CLAIMED);
 689 }
 690 
 691 static uint_t
 692 i40e_intr_notx(i40e_t *i40e, boolean_t shared)
 693 {
 694         i40e_hw_t *hw = &i40e->i40e_hw_space;
 695         uint32_t reg;

 696         int ret = DDI_INTR_CLAIMED;
 697 
 698         if (shared == B_TRUE) {
 699                 mutex_enter(&i40e->i40e_general_lock);
 700                 if (i40e->i40e_state & I40E_SUSPENDED) {
 701                         mutex_exit(&i40e->i40e_general_lock);
 702                         return (DDI_INTR_UNCLAIMED);
 703                 }
 704                 mutex_exit(&i40e->i40e_general_lock);
 705         }
 706 
 707         reg = I40E_READ_REG(hw, I40E_PFINT_ICR0);
 708         if (i40e_check_acc_handle(i40e->i40e_osdep_space.ios_reg_handle) !=
 709             DDI_FM_OK) {
 710                 ddi_fm_service_impact(i40e->i40e_dip, DDI_SERVICE_DEGRADED);
 711                 atomic_or_32(&i40e->i40e_state, I40E_ERROR);
 712                 return (DDI_INTR_CLAIMED);
 713         }
 714 
 715         if (reg == 0) {
 716                 if (shared == B_TRUE)
 717                         ret = DDI_INTR_UNCLAIMED;
 718                 goto done;
 719         }
 720 
 721         if (reg & I40E_PFINT_ICR0_ADMINQ_MASK)
 722                 i40e_intr_adminq_work(i40e);
 723 
 724         if (reg & I40E_INTR_NOTX_RX_MASK)
 725                 i40e_intr_rx_work(i40e, 0);
 726 
 727         if (reg & I40E_INTR_NOTX_TX_MASK)
 728                 i40e_intr_tx_work(i40e, 0);
 729 
 730 done:
 731         i40e_intr_adminq_enable(i40e);
 732         return (ret);
 733 
 734 }
 735 
 736 /* ARGSUSED */
 737 uint_t
 738 i40e_intr_msi(void *arg1, void *arg2)
 739 {
 740         i40e_t *i40e = (i40e_t *)arg1;
 741 
 742         return (i40e_intr_notx(i40e, B_FALSE));
 743 }
 744 
 745 /* ARGSUSED */
 746 uint_t
 747 i40e_intr_legacy(void *arg1, void *arg2)
 748 {
   1 /*
   2  * This file and its contents are supplied under the terms of the
   3  * Common Development and Distribution License ("CDDL"), version 1.0.
   4  * You may only use this file in accordance with the terms of version
   5  * 1.0 of the CDDL.
   6  *
   7  * A full copy of the text of the CDDL should have accompanied this
   8  * source.  A copy of the CDDL is also available via the Internet at
   9  * http://www.illumos.org/license/CDDL.
  10  */
  11 
  12 /*
  13  * Copyright 2018 Joyent, Inc.
  14  * Copyright 2017 Tegile Systems, Inc.  All rights reserved.
  15  */
  16 
  17 /*
  18  * -------------------------
  19  * Interrupt Handling Theory
  20  * -------------------------
  21  *
  22  * There are a couple different sets of interrupts that we need to worry about:
  23  *
  24  *   - Interrupts from receive queues
  25  *   - Interrupts from transmit queues
  26  *   - 'Other Interrupts', such as the administrative queue
  27  *
  28  * 'Other Interrupts' are asynchronous events such as a link status change event
  29  * being posted to the administrative queue, unrecoverable ECC errors, and more.
  30  * If we have something being posted to the administrative queue, then we go
  31  * through and process it, because it's generally enabled as a separate logical
  32  * interrupt. Note, we may need to do more here eventually. To re-enable the
  33  * interrupts from the 'Other Interrupts' section, we need to clear the PBA and


 212         i40e_hw_t *hw = &i40e->i40e_hw_space;
 213         uint32_t reg;
 214 
 215         reg = I40E_PFINT_DYN_CTL0_INTENA_MASK |
 216             I40E_PFINT_DYN_CTL0_CLEARPBA_MASK |
 217             (I40E_ITR_INDEX_NONE << I40E_PFINT_DYN_CTL0_ITR_INDX_SHIFT);
 218         I40E_WRITE_REG(hw, I40E_PFINT_DYN_CTL0, reg);
 219         i40e_flush(hw);
 220 }
 221 
 222 static void
 223 i40e_intr_adminq_disable(i40e_t *i40e)
 224 {
 225         i40e_hw_t *hw = &i40e->i40e_hw_space;
 226         uint32_t reg;
 227 
 228         reg = I40E_ITR_INDEX_NONE << I40E_PFINT_DYN_CTL0_ITR_INDX_SHIFT;
 229         I40E_WRITE_REG(hw, I40E_PFINT_DYN_CTL0, reg);
 230 }
 231 
 232 /*
 233  * The next two functions enable/disable the reception of interrupts
 234  * on the given vector. Only vectors 1..N are programmed by these
 235  * functions; vector 0 is special and handled by a different register.
 236  * We must subtract one from the vector because i40e implicitly adds
 237  * one to the vector value. See section 10.2.2.10.13 for more details.
 238  */
 239 static void
 240 i40e_intr_io_enable(i40e_t *i40e, int vector)
 241 {
 242         uint32_t reg;
 243         i40e_hw_t *hw = &i40e->i40e_hw_space;
 244 
 245         ASSERT3S(vector, >, 0);
 246         reg = I40E_PFINT_DYN_CTLN_INTENA_MASK |
 247             I40E_PFINT_DYN_CTLN_CLEARPBA_MASK |
 248             (I40E_ITR_INDEX_NONE << I40E_PFINT_DYN_CTLN_ITR_INDX_SHIFT);
 249         I40E_WRITE_REG(hw, I40E_PFINT_DYN_CTLN(vector - 1), reg);
 250 }
 251 
 252 static void
 253 i40e_intr_io_disable(i40e_t *i40e, int vector)
 254 {
 255         uint32_t reg;
 256         i40e_hw_t *hw = &i40e->i40e_hw_space;
 257 
 258         ASSERT3S(vector, >, 0);
 259         reg = I40E_ITR_INDEX_NONE << I40E_PFINT_DYN_CTLN_ITR_INDX_SHIFT;
 260         I40E_WRITE_REG(hw, I40E_PFINT_DYN_CTLN(vector - 1), reg);
 261 }
 262 
 263 /*
 264  * When MSI-X interrupts are being used, then we can enable the actual
 265  * interrupts themselves. However, when they are not, we instead have to turn
 266  * towards the queue's CAUSE_ENA bit and enable that.
 267  */
 268 void
 269 i40e_intr_io_enable_all(i40e_t *i40e)
 270 {
 271         if (i40e->i40e_intr_type == DDI_INTR_TYPE_MSIX) {
 272                 int i;
 273 
 274                 for (i = 1; i < i40e->i40e_intr_count; i++) {
 275                         i40e_intr_io_enable(i40e, i);
 276                 }
 277         } else {
 278                 uint32_t reg;


 367 
 368         /*
 369          * Take a look and verify that all other interrupts have been disabled
 370          * and the interrupt linked lists have been zeroed.
 371          */
 372         if (i40e->i40e_intr_type == DDI_INTR_TYPE_MSIX) {
 373                 for (i = 0; i < i40e->i40e_num_trqpairs; i++) {
 374                         reg = I40E_READ_REG(hw, I40E_PFINT_DYN_CTLN(i));
 375                         VERIFY0(reg & I40E_PFINT_DYN_CTLN_INTENA_MASK);
 376 
 377                         reg = I40E_READ_REG(hw, I40E_PFINT_LNKLSTN(i));
 378                         VERIFY3U(reg, ==, I40E_QUEUE_TYPE_EOL);
 379                 }
 380         }
 381 #endif
 382 
 383         i40e_intr_adminq_disable(i40e);
 384 }
 385 
 386 /*
 387  * Set the head of the interrupt linked list. The PFINT_LNKLSTN[N]
 388  * register actually refers to the 'N + 1' interrupt vector. E.g.,
 389  * PFINT_LNKLSTN[0] refers to interrupt vector 1.
 390  */
 391 static void
 392 i40e_set_lnklstn(i40e_t *i40e, uint_t vector, uint_t queue)
 393 {

 394         uint32_t        reg;
 395         i40e_hw_t       *hw = &i40e->i40e_hw_space;
 396 
 397         reg = (queue << I40E_PFINT_LNKLSTN_FIRSTQ_INDX_SHIFT) |
 398             (I40E_QUEUE_TYPE_RX << I40E_PFINT_LNKLSTN_FIRSTQ_TYPE_SHIFT);
 399 
 400         I40E_WRITE_REG(hw, I40E_PFINT_LNKLSTN(vector), reg);
 401         DEBUGOUT2("PFINT_LNKLSTN[%u] = 0x%x", vector, reg);
 402 }
 403 
 404 /*
 405  * Set the QINT_RQCTL[queue] register. The next queue is always the Tx
 406  * queue associated with this Rx queue. Unlike PFINT_LNKLSTN, the
 407  * vector should be the actual vector this queue is on -- i.e., it
 408  * should be equal to itrq_rx_intrvec.
 409  */
 410 static void
 411 i40e_set_rqctl(i40e_t *i40e, uint_t vector, uint_t queue)
 412 {
 413         uint32_t        reg;
 414         i40e_hw_t       *hw = &i40e->i40e_hw_space;
 415 
 416         ASSERT3U(vector, ==, i40e->i40e_trqpairs[queue].itrq_rx_intrvec);



 417 
 418         reg = (vector << I40E_QINT_RQCTL_MSIX_INDX_SHIFT) |

 419             (I40E_ITR_INDEX_RX << I40E_QINT_RQCTL_ITR_INDX_SHIFT) |
 420             (queue << I40E_QINT_RQCTL_NEXTQ_INDX_SHIFT) |
 421             (I40E_QUEUE_TYPE_TX << I40E_QINT_RQCTL_NEXTQ_TYPE_SHIFT) |
 422             I40E_QINT_RQCTL_CAUSE_ENA_MASK;
 423 
 424         I40E_WRITE_REG(hw, I40E_QINT_RQCTL(queue), reg);
 425         DEBUGOUT2("QINT_RQCTL[%u] = 0x%x", queue, reg);
 426 }
 427 
 428 /*
 429  * Like i40e_set_rqctl(), but for QINT_TQCTL[queue]. The next queue is
 430  * either the Rx queue of another TRQP, or EOL.
 431  */
 432 static void
 433 i40e_set_tqctl(i40e_t *i40e, uint_t vector, uint_t queue, uint_t next_queue)
 434 {
 435         uint32_t        reg;
 436         i40e_hw_t       *hw = &i40e->i40e_hw_space;
 437 
 438         ASSERT3U(vector, ==, i40e->i40e_trqpairs[queue].itrq_tx_intrvec);
 439 
 440         reg = (vector << I40E_QINT_TQCTL_MSIX_INDX_SHIFT) |
 441             (I40E_ITR_INDEX_TX << I40E_QINT_TQCTL_ITR_INDX_SHIFT) |
 442             (next_queue << I40E_QINT_TQCTL_NEXTQ_INDX_SHIFT) |
 443             (I40E_QUEUE_TYPE_RX << I40E_QINT_TQCTL_NEXTQ_TYPE_SHIFT) |
 444             I40E_QINT_TQCTL_CAUSE_ENA_MASK;
 445 
 446         I40E_WRITE_REG(hw, I40E_QINT_TQCTL(queue), reg);
 447         DEBUGOUT2("QINT_TQCTL[%u] = 0x%x", queue, reg);
 448 }
 449 
 450 /*
 451  * Program the interrupt linked list. Each vector has a linked list of
 452  * queues which act as event sources for that vector. When one of
 453  * those sources has an event the associated interrupt vector is
 454  * fired. This mapping must match the mapping found in
 455  * i40e_map_intrs_to_vectors().
 456  *
 457  * See section 7.5.3 for more information about the configuration of
 458  * the interrupt linked list.
 459  */
 460 static void
 461 i40e_intr_init_queue_msix(i40e_t *i40e)
 462 {
 463         uint_t intr_count;
 464 
 465         /*
 466          * The 0th vector is for 'Other Interrupts' only (subject to
 467          * change in the future).
 468          */
 469         intr_count = i40e->i40e_intr_count - 1;
 470 
 471         for (uint_t vec = 0; vec < intr_count; vec++) {
 472                 boolean_t head = B_TRUE;
 473 
 474                 for (uint_t qidx = vec; qidx < i40e->i40e_num_trqpairs;
 475                      qidx += intr_count) {
 476                         uint_t next_qidx = qidx + intr_count;
 477 
 478                         next_qidx = (next_qidx > i40e->i40e_num_trqpairs) ?
 479                             I40E_QUEUE_TYPE_EOL : next_qidx;
 480 
 481                         if (head) {
 482                                 i40e_set_lnklstn(i40e, vec, qidx);
 483                                 head = B_FALSE;
 484                         }
 485 
 486                         i40e_set_rqctl(i40e, vec + 1, qidx);
 487                         i40e_set_tqctl(i40e, vec + 1, qidx, next_qidx);
 488                 }
 489         }
 490 }
 491 
 492 /*
 493  * Set up a single queue to share the admin queue interrupt in the non-MSI-X
 494  * world. Note we do not enable the queue as an interrupt cause at this time. We
 495  * don't have any other vector of control here, unlike with the MSI-X interrupt
 496  * case.
 497  */
 498 static void
 499 i40e_intr_init_queue_shared(i40e_t *i40e)
 500 {
 501         i40e_hw_t *hw = &i40e->i40e_hw_space;
 502         uint32_t reg;
 503 
 504         VERIFY(i40e->i40e_intr_type == DDI_INTR_TYPE_FIXED ||
 505             i40e->i40e_intr_type == DDI_INTR_TYPE_MSI);
 506 
 507         reg = (I40E_INTR_NOTX_QUEUE << I40E_PFINT_LNKLST0_FIRSTQ_INDX_SHIFT) |
 508             (I40E_QUEUE_TYPE_RX << I40E_PFINT_LNKLSTN_FIRSTQ_TYPE_SHIFT);
 509         I40E_WRITE_REG(hw, I40E_PFINT_LNKLST0, reg);


 640 
 641         bzero(&evt, sizeof (struct i40e_arq_event_info));
 642         evt.buf_len = I40E_ADMINQ_BUFSZ;
 643         evt.msg_buf = i40e->i40e_aqbuf;
 644 
 645         while (remain != 0) {
 646                 enum i40e_status_code ret;
 647                 uint16_t opcode;
 648 
 649                 /*
 650                  * At the moment, the only error code that seems to be returned
 651                  * is one saying that there's no work. In such a case we leave
 652                  * this be.
 653                  */
 654                 ret = i40e_clean_arq_element(hw, &evt, &remain);
 655                 if (ret != I40E_SUCCESS)
 656                         break;
 657 
 658                 opcode = LE_16(evt.desc.opcode);
 659                 switch (opcode) {
 660                 /*
 661                  * Disable link checks for NEX-6977. With the fibers unplugged
 662                  * we can end up receiving too many link check interrupts,
 663                  * saturating one CPU for each link. This can cause system hangs
 664                  * at boot or shutdown when the system is running single-threaded.
 665                  *
 666                  * case i40e_aqc_opc_get_link_status:
 667                  *      mutex_enter(&i40e->i40e_general_lock);
 668                  *      i40e_link_check(i40e);
 669                  *      mutex_exit(&i40e->i40e_general_lock);
 670                  *      break;
 671                  */
 672                 default:
 673                         /*
 674                          * Longer term we'll want to enable other causes here
 675                          * and get these cleaned up and doing something.
 676                          */
 677                         break;
 678                 }
 679         }
 680 }
 681 
 682 static void
 683 i40e_intr_rx_work(i40e_t *i40e, i40e_trqpair_t *itrq)
 684 {
 685         mblk_t *mp = NULL;

 686 



 687         mutex_enter(&itrq->itrq_rx_lock);
 688         if (!itrq->itrq_intr_poll)
 689                 mp = i40e_ring_rx(itrq, I40E_POLL_NULL);
 690         mutex_exit(&itrq->itrq_rx_lock);
 691 
 692         if (mp == NULL)
 693                 return;
 694 
 695         mac_rx_ring(i40e->i40e_mac_hdl, itrq->itrq_macrxring, mp,
 696             itrq->itrq_rxgen);

 697 }
 698 
 699 /* ARGSUSED */
 700 static void
 701 i40e_intr_tx_work(i40e_t *i40e, i40e_trqpair_t *itrq)
 702 {



 703         i40e_tx_recycle_ring(itrq);
 704 }
 705 
 706 /*
 707  * At the moment, the only 'other' interrupt on ICR0 that we handle is the
 708  * adminq. We should go through and support the other notifications at some
 709  * point.
 710  */
 711 static void
 712 i40e_intr_other_work(i40e_t *i40e)
 713 {
 714         struct i40e_hw *hw = &i40e->i40e_hw_space;
 715         uint32_t reg;
 716 
 717         reg = I40E_READ_REG(hw, I40E_PFINT_ICR0);
 718         if (i40e_check_acc_handle(i40e->i40e_osdep_space.ios_reg_handle) !=
 719             DDI_FM_OK) {
 720                 ddi_fm_service_impact(i40e->i40e_dip, DDI_SERVICE_DEGRADED);
 721                 atomic_or_32(&i40e->i40e_state, I40E_ERROR);
 722                 return;
 723         }
 724 
 725         if (reg & I40E_PFINT_ICR0_ADMINQ_MASK)
 726                 i40e_intr_adminq_work(i40e);
 727 
 728         /*
 729          * Make sure that the adminq interrupt is not masked and then explicitly
 730          * enable the adminq and thus the other interrupt.
 731          */
 732         reg = I40E_READ_REG(hw, I40E_PFINT_ICR0_ENA);
 733         reg |= I40E_PFINT_ICR0_ENA_ADMINQ_MASK;
 734         I40E_WRITE_REG(hw, I40E_PFINT_ICR0_ENA, reg);
 735 
 736         i40e_intr_adminq_enable(i40e);
 737 }
 738 
 739 /*
 740  * Handle an MSI-X interrupt. See section 7.5.1.3 for an overview of
 741  * the MSI-X interrupt sequence.
 742  */
 743 uint_t
 744 i40e_intr_msix(void *arg1, void *arg2)
 745 {
 746         i40e_t *i40e = (i40e_t *)arg1;
 747         uint_t vector_idx = (uint_t)(uintptr_t)arg2;
 748 
 749         ASSERT3U(vector_idx, <, i40e->i40e_intr_count);
 750 
 751         /*
 752          * When using MSI-X interrupts, vector 0 is always reserved for the
 753          * adminq at this time. Though longer term, we'll want to also bridge
 754          * some I/O to them.
 755          */
 756         if (vector_idx == 0) {
 757                 i40e_intr_other_work(i40e);
 758                 return (DDI_INTR_CLAIMED);
 759         }
 760 
 761         ASSERT3U(vector_idx, >, 0);


 762 
 763         /*
 764          * We determine the queue indexes via simple arithmetic (as
 765          * opposed to keeping explicit state like a bitmap). While
 766          * conveinent, it does mean that i40e_map_intrs_to_vectors(),
 767          * i40e_intr_init_queue_msix(), and this function must be
 768          * modified as a unit.
 769          *
 770          * We subtract 1 from the vector to offset the addition we
 771          * performed during i40e_map_intrs_to_vectors().
 772          */
 773         for (uint_t i = vector_idx - 1; i < i40e->i40e_num_trqpairs;
 774              i += (i40e->i40e_intr_count - 1)) {
 775                 i40e_trqpair_t *itrq = &i40e->i40e_trqpairs[i];
 776 
 777                 ASSERT3U(i, <, i40e->i40e_num_trqpairs);
 778                 ASSERT3P(itrq, !=, NULL);
 779                 i40e_intr_rx_work(i40e, itrq);
 780                 i40e_intr_tx_work(i40e, itrq);
 781         }
 782 
 783         i40e_intr_io_enable(i40e, vector_idx);
 784         return (DDI_INTR_CLAIMED);
 785 }
 786 
 787 static uint_t
 788 i40e_intr_notx(i40e_t *i40e, boolean_t shared)
 789 {
 790         i40e_hw_t *hw = &i40e->i40e_hw_space;
 791         uint32_t reg;
 792         i40e_trqpair_t *itrq = &i40e->i40e_trqpairs[0];
 793         int ret = DDI_INTR_CLAIMED;
 794 
 795         if (shared == B_TRUE) {
 796                 mutex_enter(&i40e->i40e_general_lock);
 797                 if (i40e->i40e_state & I40E_SUSPENDED) {
 798                         mutex_exit(&i40e->i40e_general_lock);
 799                         return (DDI_INTR_UNCLAIMED);
 800                 }
 801                 mutex_exit(&i40e->i40e_general_lock);
 802         }
 803 
 804         reg = I40E_READ_REG(hw, I40E_PFINT_ICR0);
 805         if (i40e_check_acc_handle(i40e->i40e_osdep_space.ios_reg_handle) !=
 806             DDI_FM_OK) {
 807                 ddi_fm_service_impact(i40e->i40e_dip, DDI_SERVICE_DEGRADED);
 808                 atomic_or_32(&i40e->i40e_state, I40E_ERROR);
 809                 return (DDI_INTR_CLAIMED);
 810         }
 811 
 812         if (reg == 0) {
 813                 if (shared == B_TRUE)
 814                         ret = DDI_INTR_UNCLAIMED;
 815                 goto done;
 816         }
 817 
 818         if (reg & I40E_PFINT_ICR0_ADMINQ_MASK)
 819                 i40e_intr_adminq_work(i40e);
 820 
 821         if (reg & I40E_INTR_NOTX_RX_MASK)
 822                 i40e_intr_rx_work(i40e, itrq);
 823 
 824         if (reg & I40E_INTR_NOTX_TX_MASK)
 825                 i40e_intr_tx_work(i40e, itrq);
 826 
 827 done:
 828         i40e_intr_adminq_enable(i40e);
 829         return (ret);
 830 
 831 }
 832 
 833 /* ARGSUSED */
 834 uint_t
 835 i40e_intr_msi(void *arg1, void *arg2)
 836 {
 837         i40e_t *i40e = (i40e_t *)arg1;
 838 
 839         return (i40e_intr_notx(i40e, B_FALSE));
 840 }
 841 
 842 /* ARGSUSED */
 843 uint_t
 844 i40e_intr_legacy(void *arg1, void *arg2)
 845 {