Print this page
16884 viona TSO should better handle csum offloads


  19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  24  * SUCH DAMAGE.
  25  */
  26 /*
  27  * This file and its contents are supplied under the terms of the
  28  * Common Development and Distribution License ("CDDL"), version 1.0.
  29  * You may only use this file in accordance with the terms of version
  30  * 1.0 of the CDDL.
  31  *
  32  * A full copy of the text of the CDDL should have accompanied this
  33  * source.  A copy of the CDDL is also available via the Internet at
  34  * http://www.illumos.org/license/CDDL.
  35  *
  36  * Copyright 2015 Pluribus Networks Inc.
  37  * Copyright 2019 Joyent, Inc.
  38  * Copyright 2024 Oxide Computer Company

  39  */
  40 
  41 
  42 #include <sys/types.h>
  43 #include <sys/smt.h>
  44 #include <sys/strsubr.h>
  45 
  46 #include <sys/pattr.h>
  47 #include <sys/dlpi.h>
  48 #include <inet/ip.h>
  49 #include <inet/ip_impl.h>
  50 
  51 #include "viona_impl.h"
  52 
  53 #define BNXE_NIC_DRIVER         "bnxe"
  54 
  55 /*
  56  * Tunable controls tx copy by default on or off
  57  */
  58 boolean_t viona_default_tx_copy = B_TRUE;


 367                 eth_len = sizeof (struct ether_vlan_header);
 368                 veth = (const struct ether_vlan_header *)eth;
 369                 ftype = ntohs(veth->ether_type);
 370         }
 371 
 372         if (ftype == ETHERTYPE_IP) {
 373                 ipha = (ipha_t *)(mp->b_rptr + eth_len);
 374 
 375                 ipproto = ipha->ipha_protocol;
 376         } else if (ftype == ETHERTYPE_IPV6) {
 377                 ip6_t *ip6h = (ip6_t *)(mp->b_rptr + eth_len);
 378 
 379                 ipproto = ip6h->ip6_nxt;
 380         }
 381 
 382         /*
 383          * We ignore hdr_len because the spec says it can't be
 384          * trusted. Besides, our own stack will determine the header
 385          * boundary.
 386          */
 387         if ((link->l_cap_csum & HCKSUM_INET_PARTIAL) != 0 &&
 388             (hdr->vrh_gso_type & VIRTIO_NET_HDR_GSO_TCPV4) != 0 &&
 389             ftype == ETHERTYPE_IP) {

 390                 uint16_t        *cksump;
 391                 uint32_t        cksum;
 392                 ipaddr_t        src = ipha->ipha_src;
 393                 ipaddr_t        dst = ipha->ipha_dst;
 394 
 395                 /*
 396                  * Our native IP stack doesn't set the L4 length field
 397                  * of the pseudo header when LSO is in play. Other IP
 398                  * stacks, e.g. Linux, do include the length field.
 399                  * This is a problem because the hardware expects that
 400                  * the length field is not set. When it is set it will
 401                  * cause an incorrect TCP checksum to be generated.
 402                  * The reason this works in Linux is because Linux
 403                  * corrects the pseudo-header checksum in the driver
 404                  * code. In order to get the correct HW checksum we
 405                  * need to assume the guest's IP stack gave us a bogus
 406                  * TCP partial checksum and calculate it ourselves.
 407                  */
 408                 cksump = IPH_TCPH_CHECKSUMP(ipha, IPH_HDR_LENGTH(ipha));
 409                 cksum = IP_TCP_CSUM_COMP;
 410                 cksum += (dst >> 16) + (dst & 0xFFFF) +
 411                     (src >> 16) + (src & 0xFFFF);
 412                 cksum = (cksum & 0xFFFF) + (cksum >> 16);
 413                 *(cksump) = (cksum & 0xFFFF) + (cksum >> 16);

 414 
 415                 /*
 416                  * Since viona is a "legacy device", the data stored
 417                  * by the driver will be in the guest's native endian
 418                  * format (see sections 2.4.3 and 5.1.6.1 of the
 419                  * VIRTIO 1.0 spec for more info). At this time the
 420                  * only guests using viona are x86 and we can assume
 421                  * little-endian.
 422                  */
 423                 lso_info_set(mp, LE_16(hdr->vrh_gso_size), HW_LSO);
 424 
 425                 /*
 426                  * Hardware, like ixgbe, expects the client to request
 427                  * IP header checksum offload if it's sending LSO (see
 428                  * ixgbe_get_context()). Unfortunately, virtio makes
 429                  * no allowances for negotiating IP header checksum
 430                  * and HW offload, only TCP checksum. We add the flag
 431                  * and zero-out the checksum field. This mirrors the
 432                  * behavior of our native IP stack (which does this in
 433                  * the interest of HW that expects the field to be




  19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  24  * SUCH DAMAGE.
  25  */
  26 /*
  27  * This file and its contents are supplied under the terms of the
  28  * Common Development and Distribution License ("CDDL"), version 1.0.
  29  * You may only use this file in accordance with the terms of version
  30  * 1.0 of the CDDL.
  31  *
  32  * A full copy of the text of the CDDL should have accompanied this
  33  * source.  A copy of the CDDL is also available via the Internet at
  34  * http://www.illumos.org/license/CDDL.
  35  *
  36  * Copyright 2015 Pluribus Networks Inc.
  37  * Copyright 2019 Joyent, Inc.
  38  * Copyright 2024 Oxide Computer Company
  39  * Copyright 2024 MNX Cloud, Inc.
  40  */
  41 
  42 
  43 #include <sys/types.h>
  44 #include <sys/smt.h>
  45 #include <sys/strsubr.h>
  46 
  47 #include <sys/pattr.h>
  48 #include <sys/dlpi.h>
  49 #include <inet/ip.h>
  50 #include <inet/ip_impl.h>
  51 
  52 #include "viona_impl.h"
  53 
  54 #define BNXE_NIC_DRIVER         "bnxe"
  55 
  56 /*
  57  * Tunable controls tx copy by default on or off
  58  */
  59 boolean_t viona_default_tx_copy = B_TRUE;


 368                 eth_len = sizeof (struct ether_vlan_header);
 369                 veth = (const struct ether_vlan_header *)eth;
 370                 ftype = ntohs(veth->ether_type);
 371         }
 372 
 373         if (ftype == ETHERTYPE_IP) {
 374                 ipha = (ipha_t *)(mp->b_rptr + eth_len);
 375 
 376                 ipproto = ipha->ipha_protocol;
 377         } else if (ftype == ETHERTYPE_IPV6) {
 378                 ip6_t *ip6h = (ip6_t *)(mp->b_rptr + eth_len);
 379 
 380                 ipproto = ip6h->ip6_nxt;
 381         }
 382 
 383         /*
 384          * We ignore hdr_len because the spec says it can't be
 385          * trusted. Besides, our own stack will determine the header
 386          * boundary.
 387          */
 388         if ((hdr->vrh_gso_type & VIRTIO_NET_HDR_GSO_TCPV4) != 0 &&

 389             ftype == ETHERTYPE_IP) {
 390                 if ((link->l_cap_csum & HCKSUM_INET_PARTIAL) != 0) {
 391                         uint16_t        *cksump;
 392                         uint32_t        cksum;
 393                         ipaddr_t        src = ipha->ipha_src;
 394                         ipaddr_t        dst = ipha->ipha_dst;
 395 
 396                         /*
 397                          * Our native IP stack doesn't set the L4 length field
 398                          * of the pseudo header when LSO is in play. Other IP
 399                          * stacks, e.g. Linux, do include the length field.
 400                          * This is a problem because the hardware expects that
 401                          * the length field is not set. When it is set it will
 402                          * cause an incorrect TCP checksum to be generated.
 403                          * The reason this works in Linux is because Linux
 404                          * corrects the pseudo-header checksum in the driver
 405                          * code. In order to get the correct HW checksum we
 406                          * need to assume the guest's IP stack gave us a bogus
 407                          * TCP partial checksum and calculate it ourselves.
 408                          */
 409                         cksump = IPH_TCPH_CHECKSUMP(ipha, IPH_HDR_LENGTH(ipha));
 410                         cksum = IP_TCP_CSUM_COMP;
 411                         cksum += (dst >> 16) + (dst & 0xFFFF) +
 412                             (src >> 16) + (src & 0xFFFF);
 413                         cksum = (cksum & 0xFFFF) + (cksum >> 16);
 414                         *(cksump) = (cksum & 0xFFFF) + (cksum >> 16);
 415                 }
 416 
 417                 /*
 418                  * Since viona is a "legacy device", the data stored
 419                  * by the driver will be in the guest's native endian
 420                  * format (see sections 2.4.3 and 5.1.6.1 of the
 421                  * VIRTIO 1.0 spec for more info). At this time the
 422                  * only guests using viona are x86 and we can assume
 423                  * little-endian.
 424                  */
 425                 lso_info_set(mp, LE_16(hdr->vrh_gso_size), HW_LSO);
 426 
 427                 /*
 428                  * Hardware, like ixgbe, expects the client to request
 429                  * IP header checksum offload if it's sending LSO (see
 430                  * ixgbe_get_context()). Unfortunately, virtio makes
 431                  * no allowances for negotiating IP header checksum
 432                  * and HW offload, only TCP checksum. We add the flag
 433                  * and zero-out the checksum field. This mirrors the
 434                  * behavior of our native IP stack (which does this in
 435                  * the interest of HW that expects the field to be