Print this page
NEX-20178 Heavy read load using 10G i40e causes network disconnect
MFV illumos-joyent@83a8d0d616db36010b59cc850d1926c0f6a30de1
OS-7457 i40e Tx freezes on zero descriptors
Reviewed by: Robert Mustacchi <rm@joyent.com>
Reviewed by: Rob Johnston <rob.johnston@joyent.com>
Approved by: Robert Mustacchi <rm@joyent.com>
MFV illumos-joyent@0d3f2b61dcfb18edace4fd257054f6fdbe07c99c
OS-7492 i40e Tx freeze when b_cont chain exceeds 8 descriptors
Reviewed by: Robert Mustacchi <rm@joyent.com>
Reviewed by: Rob Johnston <rob.johnston@joyent.com>
Approved by: Robert Mustacchi <rm@joyent.com>
MFV illumos-joyent@b4bede175d4c50ac1b36078a677b69388f6fb59f
OS-7577 initialize FC for i40e
Reviewed by: Robert Mustacchi <rm@joyent.com>
Approved by: Rob Johnston <rob.johnston@joyent.com>
MFV illumos-joyent@83a8d0d616db36010b59cc850d1926c0f6a30de1
OS-7457 i40e Tx freezes on zero descriptors
Reviewed by: Robert Mustacchi <rm@joyent.com>
Reviewed by: Rob Johnston <rob.johnston@joyent.com>
Approved by: Robert Mustacchi <rm@joyent.com>
MFV: illumos-joyent@61dc3dec4f82a3e13e94609a0a83d5f66c64e760
OS-6846 want i40e multi-group support
OS-7372 i40e_alloc_ring_mem() unwinds when it shouldn't
Reviewed by: Robert Mustacchi <rm@joyent.com>
Approved by: Robert Mustacchi <rm@joyent.com>
Author: Ryan Zezeski <rpz@joyent.com>
MFV: illumos-joyent@757454db6669c1186f60bc625510c1b67217aae6
OS-7082 i40e: blown assert in i40e_tx_cleanup_ring()
OS-7086 i40e: add mdb dcmd to dump info on tx descriptor rings
OS-7101 i40e: add kstat to track TX DMA bind failures
Reviewed by: Ryan Zezeski <rpz@joyent.com>
Reviewed by: Robert Mustacchi <rm@joyent.com>
Approved by: Patrick Mooney <patrick.mooney@joyent.com>
Author: Rob Johnston <rob.johnston@joyent.com>
MFV: illumos-joyent@9e30beee2f0c127bf41868db46257124206e28d6
OS-5225 Want Fortville TSO support
Reviewed by: Ryan Zezeski <rpz@joyent.com>
Reviewed by: Robert Mustacchi <rm@joyent.com>
Approved by: Patrick Mooney <patrick.mooney@joyent.com>
Author: Rob Johnston <rob.johnston@joyent.com>
NEX-13226 xvv710 25Gb NIC panics system under load
Reviewed by: Yuri Pankov <yuri.pankov@nexenta.com>
Reviewed by: Evan Layton <evan.layton@nexenta.com>
NEX-7822 40Gb Intel XL710 NIC performance data
Reviewed by: Steve Peng <steve.peng@nexenta.com>
Reviewed by: Evan Layton <evan.layton@nexenta.com>
        
*** 9,19 ****
   * http://www.illumos.org/license/CDDL.
   */
  
  /*
   * Copyright 2015 OmniTI Computer Consulting, Inc. All rights reserved.
!  * Copyright (c) 2017, Joyent, Inc.
   * Copyright 2017 Tegile Systems, Inc.  All rights reserved.
   */
  
  /*
   * Please see i40e_main.c for an introduction to the device driver, its layout,
--- 9,19 ----
   * http://www.illumos.org/license/CDDL.
   */
  
  /*
   * Copyright 2015 OmniTI Computer Consulting, Inc. All rights reserved.
!  * Copyright 2019 Joyent, Inc.
   * Copyright 2017 Tegile Systems, Inc.  All rights reserved.
   */
  
  /*
   * Please see i40e_main.c for an introduction to the device driver, its layout,
*** 150,162 ****
          I40E_ITR_INDEX_OTHER    = 0x2,
          I40E_ITR_INDEX_NONE     = 0x3
  } i40e_itr_index_t;
  
  /*
!  * Table 1-5 of the PRM notes that LSO supports up to 256 KB.
   */
! #define I40E_LSO_MAXLEN (256 * 1024)
  
  #define I40E_CYCLIC_PERIOD NANOSEC      /* 1 second */
  #define I40E_DRAIN_RX_WAIT      (500 * MILLISEC)        /* In us */
  
  /*
--- 150,163 ----
          I40E_ITR_INDEX_OTHER    = 0x2,
          I40E_ITR_INDEX_NONE     = 0x3
  } i40e_itr_index_t;
  
  /*
!  * The hardware claims to support LSO up to 256 KB, but due to the limitations
!  * imposed by the IP header for non-jumbo frames, we cap it at 64 KB.
   */
! #define I40E_LSO_MAXLEN (64 * 1024)
  
  #define I40E_CYCLIC_PERIOD NANOSEC      /* 1 second */
  #define I40E_DRAIN_RX_WAIT      (500 * MILLISEC)        /* In us */
  
  /*
*** 171,187 ****
   * received by the OS.
   */
  #define I40E_BUF_IPHDR_ALIGNMENT        2
  
  /*
!  * The XL710 controller has a limit of eight buffers being allowed to be used
!  * for the transmission of a single frame. This is defined in 8.4.1 - Transmit
   * Packet in System Memory.
   */
  #define I40E_TX_MAX_COOKIE      8
  
  /*
   * Sizing to determine the amount of available descriptors at which we'll
   * consider ourselves blocked. Also, when we have these available, we'll then
   * consider ourselves available to transmit to MAC again. Strictly speaking, the
   * MAX is based on the ring size. The default sizing is based on ixgbe.
   */
--- 172,197 ----
   * received by the OS.
   */
  #define I40E_BUF_IPHDR_ALIGNMENT        2
  
  /*
!  * The XL710 controller has a total of eight buffers available for the
!  * transmission of any single frame. This is defined in 8.4.1 - Transmit
   * Packet in System Memory.
   */
  #define I40E_TX_MAX_COOKIE      8
  
  /*
+  * An LSO frame can be as large as 64KB, so we allow a DMA bind to span more
+  * cookies than a non-LSO frame.  The key here to is to select a value such
+  * that once the HW has chunked up the LSO frame into MSS-sized segments that no
+  * single segment spans more than 8 cookies (see comments for
+  * I40E_TX_MAX_COOKIE)
+  */
+ #define I40E_TX_LSO_MAX_COOKIE  32
+ 
+ /*
   * Sizing to determine the amount of available descriptors at which we'll
   * consider ourselves blocked. Also, when we have these available, we'll then
   * consider ourselves available to transmit to MAC again. Strictly speaking, the
   * MAX is based on the ring size. The default sizing is based on ixgbe.
   */
*** 201,210 ****
--- 211,226 ----
  #define I40E_MIN_TX_DMA_THRESH          0
  #define I40E_DEF_TX_DMA_THRESH          256
  #define I40E_MAX_TX_DMA_THRESH          INT32_MAX
  
  /*
+  * The max size of each individual tx buffer is 16KB - 1.
+  * See table 8-17
+  */
+ #define I40E_MAX_TX_BUFSZ               0x0000000000003FFFull
+ 
+ /*
   * Resource sizing counts. There are various aspects of hardware where we may
   * have some variable number of elements that we need to handle. Such as the
   * hardware capabilities and switch capacities. We cannot know a priori how many
   * elements to do, so instead we take a starting guess and then will grow it up
   * to an upper bound on a number of elements, to limit memory consumption in
*** 238,262 ****
  #define I40E_HMC_TX_ALT_VLAN_DISABLE    0
  #define I40E_HMC_TX_WB_ENABLE           1
  #define I40E_HMC_TX_TPH_DISABLE         0
  
  /*
-  * Whenever we establish and create a VSI, we need to assign some number of
-  * queues that it's allowed to access from the PF. Because we only have a single
-  * VSI per PF at this time, we assign it all the queues.
-  *
-  * Many of the devices support what's called Data-center Bridging. Which is a
-  * feature that we don't have much use of at this time. However, we still need
-  * to fill in this information. We follow the guidance of the note in Table 7-80
-  * which talks about bytes 62-77. It says that if we don't want to assign
-  * anything to traffic classes, we should set the field to zero. Effectively
-  * this means that everything in the system is assigned to traffic class zero.
-  */
- #define I40E_ASSIGN_ALL_QUEUES          0
- #define I40E_TRAFFIC_CLASS_NO_QUEUES    0
- 
- /*
   * This defines the error mask that we care about from rx descriptors. Currently
   * we're only concerned with the general errors and oversize errors.
   */
  #define I40E_RX_ERR_BITS        ((1 << I40E_RX_DESC_ERROR_RXE_SHIFT) | \
          (1 << I40E_RX_DESC_ERROR_OVERSIZE_SHIFT))
--- 254,263 ----
*** 266,281 ****
   * enough to hold 32-bit quantities transformed to strings as %d.%d or %x.
   */
  #define I40E_DDI_PROP_LEN       64
  
  /*
!  * We currently consolidate some overrides that we use in the code here. These
!  * will be gone in the fullness of time, but as we're bringing up the device,
!  * this is what we use.
   */
! #define I40E_GROUP_MAX          1
! #define I40E_TRQPAIR_MAX        1
  
  #define I40E_GROUP_NOMSIX       1
  #define I40E_TRQPAIR_NOMSIX     1
  
  /*
--- 267,282 ----
   * enough to hold 32-bit quantities transformed to strings as %d.%d or %x.
   */
  #define I40E_DDI_PROP_LEN       64
  
  /*
!  * Place an artificial limit on the max number of groups. The X710
!  * series supports up to 384 VSIs to be partitioned across PFs as the
!  * driver sees fit. But until we support more interrupts this seems
!  * like a good place to start.
   */
! #define I40E_GROUP_MAX          32
  
  #define I40E_GROUP_NOMSIX       1
  #define I40E_TRQPAIR_NOMSIX     1
  
  /*
*** 403,424 ****
  } i40e_rx_control_block_t;
  
  typedef enum {
          I40E_TX_NONE,
          I40E_TX_COPY,
!         I40E_TX_DMA
  } i40e_tx_type_t;
  
  typedef struct i40e_tx_desc i40e_tx_desc_t;
  typedef union i40e_32byte_rx_desc i40e_rx_desc_t;
  
  typedef struct i40e_tx_control_block {
          struct i40e_tx_control_block    *tcb_next;
          mblk_t                          *tcb_mp;
          i40e_tx_type_t                  tcb_type;
          ddi_dma_handle_t                tcb_dma_handle;
          i40e_dma_buffer_t               tcb_dma;
  } i40e_tx_control_block_t;
  
  /*
   * Receive ring data (used below).
   */
--- 404,436 ----
  } i40e_rx_control_block_t;
  
  typedef enum {
          I40E_TX_NONE,
          I40E_TX_COPY,
!         I40E_TX_DMA,
!         I40E_TX_DESC,
  } i40e_tx_type_t;
  
  typedef struct i40e_tx_desc i40e_tx_desc_t;
+ typedef struct i40e_tx_context_desc i40e_tx_context_desc_t;
  typedef union i40e_32byte_rx_desc i40e_rx_desc_t;
  
+ struct i40e_dma_bind_info {
+         caddr_t dbi_paddr;
+         size_t dbi_len;
+ };
+ 
  typedef struct i40e_tx_control_block {
          struct i40e_tx_control_block    *tcb_next;
          mblk_t                          *tcb_mp;
          i40e_tx_type_t                  tcb_type;
          ddi_dma_handle_t                tcb_dma_handle;
+         ddi_dma_handle_t                tcb_lso_dma_handle;
          i40e_dma_buffer_t               tcb_dma;
+         struct i40e_dma_bind_info       *tcb_bind_info;
+         uint_t                          tcb_bind_ncookies;
+         boolean_t                       tcb_used_lso;
  } i40e_tx_control_block_t;
  
  /*
   * Receive ring data (used below).
   */
*** 515,533 ****
--- 527,549 ----
  typedef struct i40e_txq_stat {
          kstat_named_t   itxs_bytes;             /* Bytes out on queue */
          kstat_named_t   itxs_packets;           /* Packets out on queue */
          kstat_named_t   itxs_descriptors;       /* Descriptors issued */
          kstat_named_t   itxs_recycled;          /* Descriptors reclaimed */
+         kstat_named_t   itxs_force_copy;        /* non-TSO force copy */
+         kstat_named_t   itxs_tso_force_copy;    /* TSO force copy */
          /*
           * Various failure conditions.
           */
          kstat_named_t   itxs_hck_meoifail;      /* ether offload failures */
          kstat_named_t   itxs_hck_nol2info;      /* Missing l2 info */
          kstat_named_t   itxs_hck_nol3info;      /* Missing l3 info */
          kstat_named_t   itxs_hck_nol4info;      /* Missing l4 info */
          kstat_named_t   itxs_hck_badl3;         /* Not IPv4/IPv6 */
          kstat_named_t   itxs_hck_badl4;         /* Bad L4 Paylaod */
+         kstat_named_t   itxs_lso_nohck;         /* Missing offloads for LSO */
+         kstat_named_t   itxs_bind_fails;        /* DMA bind failures */
  
          kstat_named_t   itxs_err_notcb;         /* No tcb's available */
          kstat_named_t   itxs_err_nodescs;       /* No tcb's available */
          kstat_named_t   itxs_err_context;       /* Total context failures */
  
*** 759,769 ****
--- 775,804 ----
          uint_t  ifr_nmacfilt_used;
          uint_t  ifr_nmcastfilt;
          uint_t  ifr_nmcastfilt_used;
  } i40e_func_rsrc_t;
  
+ typedef struct i40e_vsi {
+         uint16_t                iv_seid;
+         uint16_t                iv_number;
+         kstat_t                 *iv_kstats;
+         i40e_vsi_stats_t        iv_stats;
+         uint16_t                iv_stats_id;
+ } i40e_vsi_t;
+ 
  /*
+  * While irg_index and irg_grp_hdl aren't used anywhere, they are
+  * still useful for debugging.
+  */
+ typedef struct i40e_rx_group {
+         uint32_t                irg_index;    /* index in i40e_rx_groups[] */
+         uint16_t                irg_vsi_seid; /* SEID of VSI for this group */
+         mac_group_handle_t      irg_grp_hdl;  /* handle to mac_group_t */
+         struct i40e             *irg_i40e;    /* ref to i40e_t */
+ } i40e_rx_group_t;
+ 
+ /*
   * Main i40e per-instance state.
   */
  typedef struct i40e {
          list_node_t     i40e_glink;             /* Global list link */
          list_node_t     i40e_dlink;             /* Device list link */
*** 787,801 ****
          struct i40e_hw                          i40e_hw_space;
          struct i40e_osdep                       i40e_osdep_space;
          struct i40e_aq_get_phy_abilities_resp   i40e_phy;
          void                                    *i40e_aqbuf;
  
          /*
           * Device state, switch information, and resources.
           */
!         int                     i40e_vsi_id;
!         uint16_t                i40e_vsi_num;
          struct i40e_device      *i40e_device;
          i40e_func_rsrc_t        i40e_resources;
          uint16_t                i40e_switch_rsrc_alloc;
          uint16_t                i40e_switch_rsrc_actual;
          i40e_switch_rsrc_t      *i40e_switch_rsrcs;
--- 822,843 ----
          struct i40e_hw                          i40e_hw_space;
          struct i40e_osdep                       i40e_osdep_space;
          struct i40e_aq_get_phy_abilities_resp   i40e_phy;
          void                                    *i40e_aqbuf;
  
+ #define I40E_DEF_VSI_IDX        0
+ #define I40E_DEF_VSI(i40e)      ((i40e)->i40e_vsis[I40E_DEF_VSI_IDX])
+ #define I40E_DEF_VSI_SEID(i40e) (I40E_DEF_VSI(i40e).iv_seid)
+ 
          /*
           * Device state, switch information, and resources.
           */
!         i40e_vsi_t              i40e_vsis[I40E_GROUP_MAX];
!         uint16_t                i40e_mac_seid;   /* SEID of physical MAC */
!         uint16_t                i40e_veb_seid;   /* switch atop MAC (SEID) */
!         uint16_t                i40e_vsi_avail;  /* VSIs avail to this PF */
!         uint16_t                i40e_vsi_used;   /* VSIs used by this PF */
          struct i40e_device      *i40e_device;
          i40e_func_rsrc_t        i40e_resources;
          uint16_t                i40e_switch_rsrc_alloc;
          uint16_t                i40e_switch_rsrc_actual;
          i40e_switch_rsrc_t      *i40e_switch_rsrcs;
*** 812,827 ****
          /*
           * Transmit and receive information, tunables, and MAC info.
           */
          i40e_trqpair_t  *i40e_trqpairs;
          boolean_t       i40e_mr_enable;
!         int             i40e_num_trqpairs;
          uint_t          i40e_other_itr;
  
!         int             i40e_num_rx_groups;
          int             i40e_num_rx_descs;
-         mac_group_handle_t i40e_rx_group_handle;
          uint32_t        i40e_rx_ring_size;
          uint32_t        i40e_rx_buf_size;
          boolean_t       i40e_rx_hcksum_enable;
          uint32_t        i40e_rx_dma_min;
          uint32_t        i40e_rx_limit_per_intr;
--- 854,870 ----
          /*
           * Transmit and receive information, tunables, and MAC info.
           */
          i40e_trqpair_t  *i40e_trqpairs;
          boolean_t       i40e_mr_enable;
!         uint_t          i40e_num_trqpairs; /* total TRQPs (per PF) */
!         uint_t          i40e_num_trqpairs_per_vsi; /* TRQPs per VSI */
          uint_t          i40e_other_itr;
  
!         i40e_rx_group_t *i40e_rx_groups;
!         uint_t          i40e_num_rx_groups;
          int             i40e_num_rx_descs;
          uint32_t        i40e_rx_ring_size;
          uint32_t        i40e_rx_buf_size;
          boolean_t       i40e_rx_hcksum_enable;
          uint32_t        i40e_rx_dma_min;
          uint32_t        i40e_rx_limit_per_intr;
*** 830,839 ****
--- 873,883 ----
          int             i40e_num_tx_descs;
          uint32_t        i40e_tx_ring_size;
          uint32_t        i40e_tx_buf_size;
          uint32_t        i40e_tx_block_thresh;
          boolean_t       i40e_tx_hcksum_enable;
+         boolean_t       i40e_tx_lso_enable;
          uint32_t        i40e_tx_dma_min;
          uint_t          i40e_tx_itr;
  
          /*
           * Interrupt state
*** 853,862 ****
--- 897,907 ----
           * DMA attributes. See i40e_transceiver.c for why we have copies of them
           * in the i40e_t.
           */
          ddi_dma_attr_t          i40e_static_dma_attr;
          ddi_dma_attr_t          i40e_txbind_dma_attr;
+         ddi_dma_attr_t          i40e_txbind_lso_dma_attr;
          ddi_device_acc_attr_t   i40e_desc_acc_attr;
          ddi_device_acc_attr_t   i40e_buf_acc_attr;
  
          /*
           * The following two fields are used to protect and keep track of
*** 870,883 ****
          /*
           * PF statistics and VSI statistics.
           */
          kmutex_t                i40e_stat_lock;
          kstat_t                 *i40e_pf_kstat;
-         kstat_t                 *i40e_vsi_kstat;
          i40e_pf_stats_t         i40e_pf_stat;
-         i40e_vsi_stats_t        i40e_vsi_stat;
-         uint16_t                i40e_vsi_stat_id;
  
          /*
           * Misc. stats and counters that should maybe one day be kstats.
           */
          uint64_t        i40e_s_link_status_errs;
--- 915,925 ----
*** 973,984 ****
  /*
   * Statistics functions.
   */
  extern boolean_t i40e_stats_init(i40e_t *);
  extern void i40e_stats_fini(i40e_t *);
! extern boolean_t i40e_stat_vsi_init(i40e_t *);
! extern void i40e_stat_vsi_fini(i40e_t *);
  extern boolean_t i40e_stats_trqpair_init(i40e_trqpair_t *);
  extern void i40e_stats_trqpair_fini(i40e_trqpair_t *);
  extern int i40e_m_stat(void *, uint_t, uint64_t *);
  extern int i40e_rx_ring_stat(mac_ring_driver_t, uint_t, uint64_t *);
  extern int i40e_tx_ring_stat(mac_ring_driver_t, uint_t, uint64_t *);
--- 1015,1026 ----
  /*
   * Statistics functions.
   */
  extern boolean_t i40e_stats_init(i40e_t *);
  extern void i40e_stats_fini(i40e_t *);
! extern boolean_t i40e_stat_vsi_init(i40e_t *, uint_t);
! extern void i40e_stat_vsi_fini(i40e_t *, uint_t);
  extern boolean_t i40e_stats_trqpair_init(i40e_trqpair_t *);
  extern void i40e_stats_trqpair_fini(i40e_trqpair_t *);
  extern int i40e_m_stat(void *, uint_t, uint64_t *);
  extern int i40e_rx_ring_stat(mac_ring_driver_t, uint_t, uint64_t *);
  extern int i40e_tx_ring_stat(mac_ring_driver_t, uint_t, uint64_t *);