Print this page
MFV: illumos-joyent@61dc3dec4f82a3e13e94609a0a83d5f66c64e760
OS-6846 want i40e multi-group support
OS-7372 i40e_alloc_ring_mem() unwinds when it shouldn't
Reviewed by: Robert Mustacchi <rm@joyent.com>
Approved by: Robert Mustacchi <rm@joyent.com>
Author: Ryan Zezeski <rpz@joyent.com>
MFV: illumos-joyent@9e30beee2f0c127bf41868db46257124206e28d6
OS-5225 Want Fortville TSO support
Reviewed by: Ryan Zezeski <rpz@joyent.com>
Reviewed by: Robert Mustacchi <rm@joyent.com>
Approved by: Patrick Mooney <patrick.mooney@joyent.com>
Author: Rob Johnston <rob.johnston@joyent.com>
MFV: illumos-gate@286d309c80aad9eac1fdbcb0388ed194d995d837
9805 i40e should read SFP data when firmware supports it
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
Reviewed by: Rob Johnston <rob.johnston@joyent.com>
Reviewed by: Dale Ghent <dale.ghent@joyent.com>
Reviewed by: Richard Lowe <richlowe@richlowe.net>
Approved by: Dan McDonald <danmcd@joyent.com>
Author: Robert Mustacchi <rm@joyent.com>
NEX-13226 xvv710 25Gb NIC panics system under load
Reviewed by: Yuri Pankov <yuri.pankov@nexenta.com>
Reviewed by: Evan Layton <evan.layton@nexenta.com>
NEX-7822 40Gb Intel XL710 NIC performance data
Reviewed by: Steve Peng <steve.peng@nexenta.com>
Reviewed by: Evan Layton <evan.layton@nexenta.com>

@@ -9,11 +9,11 @@
  * http://www.illumos.org/license/CDDL.
  */
 
 /*
  * Copyright 2015 OmniTI Computer Consulting, Inc. All rights reserved.
- * Copyright (c) 2017, Joyent, Inc.
+ * Copyright (c) 2018, Joyent, Inc.
  * Copyright 2017 Tegile Systems, Inc.  All rights reserved.
  */
 
 /*
  * For more information, please see the big theory statement in i40e_main.c.

@@ -37,11 +37,12 @@
 };
 
 static int
 i40e_group_remove_mac(void *arg, const uint8_t *mac_addr)
 {
-        i40e_t *i40e = arg;
+        i40e_rx_group_t *rxg = arg;
+        i40e_t *i40e = rxg->irg_i40e;
         struct i40e_aqc_remove_macvlan_element_data filt;
         struct i40e_hw *hw = &i40e->i40e_hw_space;
         int ret, i, last;
         i40e_uaddr_t *iua;
 

@@ -105,11 +106,12 @@
 }
 
 static int
 i40e_group_add_mac(void *arg, const uint8_t *mac_addr)
 {
-        i40e_t *i40e = arg;
+        i40e_rx_group_t *rxg = arg;
+        i40e_t          *i40e = rxg->irg_i40e;
         struct i40e_hw *hw = &i40e->i40e_hw_space;
         int i, ret;
         i40e_uaddr_t *iua;
         struct i40e_aqc_add_macvlan_element_data filt;
 

@@ -134,20 +136,16 @@
                         ret = EEXIST;
                         goto done;
                 }
         }
 
-        /*
-         * Note, the general use of the i40e_vsi_id will have to be refactored
-         * when we have proper group support.
-         */
         bzero(&filt, sizeof (filt));
         bcopy(mac_addr, filt.mac_addr, ETHERADDRL);
         filt.flags = I40E_AQC_MACVLAN_ADD_PERFECT_MATCH |
             I40E_AQC_MACVLAN_ADD_IGNORE_VLAN;
 
-        if ((ret = i40e_aq_add_macvlan(hw, i40e->i40e_vsi_id, &filt, 1,
+        if ((ret = i40e_aq_add_macvlan(hw, rxg->irg_vsi_seid, &filt, 1,
             NULL)) != I40E_SUCCESS) {
                 i40e_error(i40e, "failed to add mac address "
                     "%2x:%2x:%2x:%2x:%2x:%2x to unicast filter: %d",
                     mac_addr[0], mac_addr[1], mac_addr[2], mac_addr[3],
                     mac_addr[4], mac_addr[5], ret);

@@ -155,11 +153,11 @@
                 goto done;
         }
 
         iua = &i40e->i40e_uaddrs[i40e->i40e_resources.ifr_nmacfilt_used];
         bcopy(mac_addr, iua->iua_mac, ETHERADDRL);
-        iua->iua_vsi = i40e->i40e_vsi_id;
+        iua->iua_vsi = rxg->irg_vsi_seid;
         i40e->i40e_resources.ifr_nmacfilt_used++;
         ASSERT(i40e->i40e_resources.ifr_nmacfilt_used <=
             i40e->i40e_resources.ifr_nmacfilt);
         ret = 0;
 done:

@@ -225,11 +223,11 @@
                 ret = ECANCELED;
                 goto done;
         }
 
 
-        ret = i40e_aq_set_vsi_unicast_promiscuous(hw, i40e->i40e_vsi_id,
+        ret = i40e_aq_set_vsi_unicast_promiscuous(hw, I40E_DEF_VSI_SEID(i40e),
             on, NULL, B_FALSE);
         if (ret != I40E_SUCCESS) {
                 i40e_error(i40e, "failed to %s unicast promiscuity on "
                     "the default VSI: %d", on == B_TRUE ? "enable" : "disable",
                     ret);

@@ -244,11 +242,11 @@
         if (i40e->i40e_mcast_promisc_count > 0) {
                 i40e->i40e_promisc_on = on;
                 goto done;
         }
 
-        ret = i40e_aq_set_vsi_multicast_promiscuous(hw, i40e->i40e_vsi_id,
+        ret = i40e_aq_set_vsi_multicast_promiscuous(hw, I40E_DEF_VSI_SEID(i40e),
             on, NULL);
         if (ret != I40E_SUCCESS) {
                 i40e_error(i40e, "failed to %s multicast promiscuity on "
                     "the default VSI: %d", on == B_TRUE ? "enable" : "disable",
                     ret);

@@ -255,12 +253,12 @@
 
                 /*
                  * Try our best to put us back into a state that MAC expects us
                  * to be in.
                  */
-                ret = i40e_aq_set_vsi_unicast_promiscuous(hw, i40e->i40e_vsi_id,
-                    !on, NULL, B_FALSE);
+                ret = i40e_aq_set_vsi_unicast_promiscuous(hw,
+                    I40E_DEF_VSI_SEID(i40e), !on, NULL, B_FALSE);
                 if (ret != I40E_SUCCESS) {
                         i40e_error(i40e, "failed to %s unicast promiscuity on "
                             "the default VSI after toggling multicast failed: "
                             "%d", on == B_TRUE ? "disable" : "enable", ret);
                 }

@@ -292,15 +290,15 @@
         if (i40e->i40e_resources.ifr_nmcastfilt_used ==
             i40e->i40e_resources.ifr_nmcastfilt) {
                 if (i40e->i40e_mcast_promisc_count == 0 &&
                     i40e->i40e_promisc_on == B_FALSE) {
                         ret = i40e_aq_set_vsi_multicast_promiscuous(hw,
-                            i40e->i40e_vsi_id, B_TRUE, NULL);
+                            I40E_DEF_VSI_SEID(i40e), B_TRUE, NULL);
                         if (ret != I40E_SUCCESS) {
                                 i40e_error(i40e, "failed to enable multicast "
                                     "promiscuous mode on VSI %d: %d",
-                                    i40e->i40e_vsi_id, ret);
+                                    I40E_DEF_VSI_SEID(i40e), ret);
                                 return (EIO);
                         }
                 }
                 i40e->i40e_mcast_promisc_count++;
                 return (0);

@@ -310,11 +308,11 @@
         bzero(&filt, sizeof (filt));
         bcopy(multicast_address, filt.mac_addr, ETHERADDRL);
         filt.flags = I40E_AQC_MACVLAN_ADD_HASH_MATCH |
             I40E_AQC_MACVLAN_ADD_IGNORE_VLAN;
 
-        if ((ret = i40e_aq_add_macvlan(hw, i40e->i40e_vsi_id, &filt, 1,
+        if ((ret = i40e_aq_add_macvlan(hw, I40E_DEF_VSI_SEID(i40e), &filt, 1,
             NULL)) != I40E_SUCCESS) {
                 i40e_error(i40e, "failed to add mac address "
                     "%2x:%2x:%2x:%2x:%2x:%2x to multicast filter: %d",
                     multicast_address[0], multicast_address[1],
                     multicast_address[2], multicast_address[3],

@@ -351,12 +349,12 @@
                 bzero(&filt, sizeof (filt));
                 bcopy(multicast_address, filt.mac_addr, ETHERADDRL);
                 filt.flags = I40E_AQC_MACVLAN_DEL_HASH_MATCH |
                     I40E_AQC_MACVLAN_DEL_IGNORE_VLAN;
 
-                if (i40e_aq_remove_macvlan(hw, i40e->i40e_vsi_id,
-                    &filt, 1, NULL) != I40E_SUCCESS) {
+                if (i40e_aq_remove_macvlan(hw, I40E_DEF_VSI_SEID(i40e), &filt,
+                    1, NULL) != I40E_SUCCESS) {
                         i40e_error(i40e, "failed to remove mac address "
                             "%2x:%2x:%2x:%2x:%2x:%2x from multicast "
                             "filter: %d",
                             multicast_address[0], multicast_address[1],
                             multicast_address[2], multicast_address[3],

@@ -379,15 +377,15 @@
 
         if (i40e->i40e_mcast_promisc_count > 0) {
                 if (i40e->i40e_mcast_promisc_count == 1 &&
                     i40e->i40e_promisc_on == B_FALSE) {
                         ret = i40e_aq_set_vsi_multicast_promiscuous(hw,
-                            i40e->i40e_vsi_id, B_FALSE, NULL);
+                            I40E_DEF_VSI_SEID(i40e), B_FALSE, NULL);
                         if (ret != I40E_SUCCESS) {
                                 i40e_error(i40e, "failed to disable "
                                     "multicast promiscuous mode on VSI %d: %d",
-                                    i40e->i40e_vsi_id, ret);
+                                    I40E_DEF_VSI_SEID(i40e), ret);
                                 return (EIO);
                         }
                 }
                 i40e->i40e_mcast_promisc_count--;
 

@@ -488,11 +486,11 @@
         /*
          * Note the group index here is expected to be -1 due to the fact that
          * we're not actually grouping things tx-wise at this time.
          */
         ASSERT(group_index == -1);
-        ASSERT(ring_index < i40e->i40e_num_trqpairs);
+        ASSERT(ring_index < i40e->i40e_num_trqpairs_per_vsi);
 
         itrq->itrq_mactxring = rh;
         infop->mri_driver = (mac_ring_driver_t)itrq;
         infop->mri_start = NULL;
         infop->mri_stop = NULL;

@@ -514,19 +512,20 @@
 i40e_fill_rx_ring(void *arg, mac_ring_type_t rtype, const int group_index,
     const int ring_index, mac_ring_info_t *infop, mac_ring_handle_t rh)
 {
         i40e_t *i40e = arg;
         mac_intr_t *mintr = &infop->mri_intr;
-        i40e_trqpair_t *itrq = &i40e->i40e_trqpairs[ring_index];
+        uint_t trqpair_index;
+        i40e_trqpair_t *itrq;
 
-        /*
-         * We assert the group number and ring index to help sanity check
-         * ourselves and mark that we'll need to rework this when we have
-         * multiple groups.
-         */
-        ASSERT3S(group_index, ==, 0);
-        ASSERT3S(ring_index, <, i40e->i40e_num_trqpairs);
+        /* This assumes static groups. */
+        ASSERT3S(group_index, >=, 0);
+        ASSERT3S(ring_index, >=, 0);
+        trqpair_index = (group_index * i40e->i40e_num_trqpairs_per_vsi) +
+            ring_index;
+        ASSERT3U(trqpair_index, <, i40e->i40e_num_trqpairs);
+        itrq = &i40e->i40e_trqpairs[trqpair_index];
 
         itrq->itrq_macrxring = rh;
         infop->mri_driver = (mac_ring_driver_t)itrq;
         infop->mri_start = i40e_ring_start;
         infop->mri_stop = NULL;

@@ -550,28 +549,26 @@
 static void
 i40e_fill_rx_group(void *arg, mac_ring_type_t rtype, const int index,
     mac_group_info_t *infop, mac_group_handle_t gh)
 {
         i40e_t *i40e = arg;
+        i40e_rx_group_t *rxg;
 
         if (rtype != MAC_RING_TYPE_RX)
                 return;
 
-        /*
-         * Note, this is a simplified view of a group, given that we only have a
-         * single group and a single ring at the moment. We'll want to expand
-         * upon this as we leverage more hardware functionality.
-         */
-        i40e->i40e_rx_group_handle = gh;
-        infop->mgi_driver = (mac_group_driver_t)i40e;
+        rxg = &i40e->i40e_rx_groups[index];
+        rxg->irg_grp_hdl = gh;
+
+        infop->mgi_driver = (mac_group_driver_t)rxg;
         infop->mgi_start = NULL;
         infop->mgi_stop = NULL;
         infop->mgi_addmac = i40e_group_add_mac;
         infop->mgi_remmac = i40e_group_remove_mac;
 
-        ASSERT(i40e->i40e_num_rx_groups == I40E_GROUP_MAX);
-        infop->mgi_count = i40e->i40e_num_trqpairs;
+        ASSERT(i40e->i40e_num_rx_groups <= I40E_GROUP_MAX);
+        infop->mgi_count = i40e->i40e_num_trqpairs_per_vsi;
 }
 
 static int
 i40e_transceiver_info(void *arg, uint_t id, mac_transceiver_info_t *infop)
 {

@@ -580,10 +577,19 @@
 
         if (id != 0 || infop == NULL)
                 return (EINVAL);
 
         mutex_enter(&i40e->i40e_general_lock);
+        switch (i40e->i40e_hw_space.phy.link_info.module_type[0]) {
+        case I40E_MODULE_TYPE_SFP:
+        case I40E_MODULE_TYPE_QSFP:
+                break;
+        default:
+                mutex_exit(&i40e->i40e_general_lock);
+                return (ENOTSUP);
+        }
+
         present = !!(i40e->i40e_hw_space.phy.link_info.link_info &
             I40E_AQ_MEDIA_AVAILABLE);
         if (present) {
                 usable = !!(i40e->i40e_hw_space.phy.link_info.an_info &
                     I40E_AQ_QUALIFIED_MODULE);

@@ -597,10 +603,73 @@
 
         return (0);
 }
 
 static int
+i40e_transceiver_read(void *arg, uint_t id, uint_t page, void *buf,
+    size_t nbytes, off_t offset, size_t *nread)
+{
+        i40e_t *i40e = arg;
+        struct i40e_hw *hw = &i40e->i40e_hw_space;
+        uint8_t *buf8 = buf;
+        size_t i;
+
+        if (id != 0 || buf == NULL || nbytes == 0 || nread == NULL ||
+            (page != 0xa0 && page != 0xa2) || offset < 0)
+                return (EINVAL);
+
+        /*
+         * Both supported pages have a length of 256 bytes, ensure nothing asks
+         * us to go beyond that.
+         */
+        if (nbytes > 256 || offset >= 256 || (offset + nbytes > 256)) {
+                return (EINVAL);
+        }
+
+        mutex_enter(&i40e->i40e_general_lock);
+        switch (i40e->i40e_hw_space.phy.link_info.module_type[0]) {
+        case I40E_MODULE_TYPE_SFP:
+        case I40E_MODULE_TYPE_QSFP:
+                break;
+        default:
+                mutex_exit(&i40e->i40e_general_lock);
+                return (ENOTSUP);
+        }
+
+        /*
+         * Make sure we have a sufficiently new firmware version to run this
+         * command. This was introduced in firmware API 1.7. This is apparently
+         * only supported on the XL710 MAC, not the XL722.
+         */
+        if (hw->mac.type != I40E_MAC_XL710 || hw->aq.api_maj_ver != 1 ||
+            hw->aq.api_min_ver < 7) {
+                mutex_exit(&i40e->i40e_general_lock);
+                return (ENOTSUP);
+        }
+
+        for (i = 0; i < nbytes; i++, offset++) {
+                enum i40e_status_code status;
+                uint32_t val;
+
+                status = i40e_aq_get_phy_register(hw,
+                    I40E_AQ_PHY_REG_ACCESS_EXTERNAL_MODULE, page, offset,
+                    &val, NULL);
+                if (status != I40E_SUCCESS) {
+                        mutex_exit(&i40e->i40e_general_lock);
+                        return (EIO);
+                }
+
+                buf8[i] = (uint8_t)val;
+        }
+
+        mutex_exit(&i40e->i40e_general_lock);
+        *nread = nbytes;
+
+        return (0);
+}
+
+static int
 i40e_gld_led_set(void *arg, mac_led_mode_t mode, uint_t flags)
 {
         i40e_t *i40e = arg;
         struct i40e_hw *hw = &i40e->i40e_hw_space;
 

@@ -658,33 +727,45 @@
                 if (i40e->i40e_tx_hcksum_enable == B_TRUE)
                         *txflags = HCKSUM_INET_PARTIAL | HCKSUM_IPHDRCKSUM;
                 break;
         }
 
+        case MAC_CAPAB_LSO: {
+                mac_capab_lso_t *cap_lso = cap_data;
+
+                if (i40e->i40e_tx_lso_enable == B_TRUE) {
+                        cap_lso->lso_flags = LSO_TX_BASIC_TCP_IPV4;
+                        cap_lso->lso_basic_tcp_ipv4.lso_max = I40E_LSO_MAXLEN;
+                } else {
+                        return (B_FALSE);
+                }
+                break;
+        }
+
         case MAC_CAPAB_RINGS:
                 cap_rings = cap_data;
                 cap_rings->mr_group_type = MAC_GROUP_TYPE_STATIC;
                 switch (cap_rings->mr_type) {
                 case MAC_RING_TYPE_TX:
                         /*
-                         * Note, saying we have no rings, but some number of
-                         * groups indicates to MAC that it should create
-                         * psuedo-groups with one for each TX ring. This may not
-                         * be the long term behavior we want, but it'll work for
-                         * now.
+                         * Note, saying we have no groups, but some
+                         * number of rings indicates to MAC that it
+                         * should create psuedo-groups with one for
+                         * each TX ring. This may not be the long term
+                         * behavior we want, but it'll work for now.
                          */
                         cap_rings->mr_gnum = 0;
-                        cap_rings->mr_rnum = i40e->i40e_num_trqpairs;
+                        cap_rings->mr_rnum = i40e->i40e_num_trqpairs_per_vsi;
                         cap_rings->mr_rget = i40e_fill_tx_ring;
                         cap_rings->mr_gget = NULL;
                         cap_rings->mr_gaddring = NULL;
                         cap_rings->mr_gremring = NULL;
                         break;
                 case MAC_RING_TYPE_RX:
                         cap_rings->mr_rnum = i40e->i40e_num_trqpairs;
                         cap_rings->mr_rget = i40e_fill_rx_ring;
-                        cap_rings->mr_gnum = I40E_GROUP_MAX;
+                        cap_rings->mr_gnum = i40e->i40e_num_rx_groups;
                         cap_rings->mr_gget = i40e_fill_rx_group;
                         cap_rings->mr_gaddring = NULL;
                         cap_rings->mr_gremring = NULL;
                         break;
                 default:

@@ -700,11 +781,11 @@
                  * advertise the support for this capability.
                  */
                 mct->mct_flags = 0;
                 mct->mct_ntransceivers = 1;
                 mct->mct_info = i40e_transceiver_info;
-                mct->mct_read = NULL;
+                mct->mct_read = i40e_transceiver_read;
 
                 return (B_TRUE);
         case MAC_CAPAB_LED:
                 mcl = cap_data;