4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
  23  * Use is subject to license terms.
  24  */
  25 /*
  26  * Copyright 2011 Joyent, Inc.  All rights reserved.
  27  */
  28 
  29 #include <sys/types.h>
  30 #include <sys/callb.h>
  31 #include <sys/sdt.h>
  32 #include <sys/strsubr.h>
  33 #include <sys/strsun.h>
  34 #include <sys/vlan.h>
  35 #include <sys/stack.h>
  36 #include <sys/archsystm.h>
  37 #include <inet/ipsec_impl.h>
  38 #include <inet/ip_impl.h>
  39 #include <inet/sadb.h>
  40 #include <inet/ipsecesp.h>
  41 #include <inet/ipsecah.h>
  42 #include <inet/ip6.h>
  43 
  44 #include <sys/mac_impl.h>
  45 #include <sys/mac_client_impl.h>
  46 #include <sys/mac_client_priv.h>
 
 
 513         (cnt)++;                                                        \
 514         if ((bw_ctl))                                                   \
 515                 (sz) += (sz0);                                          \
 516 }
 517 
 518 #define MAC_FANOUT_DEFAULT      0
 519 #define MAC_FANOUT_RND_ROBIN    1
 520 int mac_fanout_type = MAC_FANOUT_DEFAULT;
 521 
 522 #define MAX_SR_TYPES    3
 523 /* fanout types for port based hashing */
 524 enum pkt_type {
 525         V4_TCP = 0,
 526         V4_UDP,
 527         OTH,
 528         UNDEF
 529 };
 530 
 531 /*
 532  * In general we do port based hashing to spread traffic over different
 533  * softrings. The below tunable allows to override that behavior. Setting it
 534  * to B_TRUE allows to do a fanout based on src ipv6 address. This behavior
 535  * is also the applicable to ipv6 packets carrying multiple optional headers
 536  * and other uncommon packet types.
 537  */
 538 boolean_t mac_src_ipv6_fanout = B_FALSE;
 539 
 540 /*
 541  * Pair of local and remote ports in the transport header
 542  */
 543 #define PORTS_SIZE 4
 544 
 545 /*
 546  * mac_rx_srs_proto_fanout
 547  *
 548  * This routine delivers packets destined to an SRS into one of the
 549  * protocol soft rings.
 550  *
 551  * Given a chain of packets we need to split it up into multiple sub chains
 552  * destined into TCP, UDP or OTH soft ring. Instead of entering
 553  * the soft ring one packet at a time, we want to enter it in the form of a
 554  * chain otherwise we get this start/stop behaviour where the worker thread
 555  * goes to sleep and then next packets comes in forcing it to wake up etc.
 556  */
 557 static void
 558 mac_rx_srs_proto_fanout(mac_soft_ring_set_t *mac_srs, mblk_t *head)
 
 743                 if (headmp[type] != NULL) {
 744                         mac_soft_ring_t                 *softring;
 745 
 746                         ASSERT(tailmp[type]->b_next == NULL);
 747                         switch (type) {
 748                         case V4_TCP:
 749                                 softring = mac_srs->srs_tcp_soft_rings[0];
 750                                 break;
 751                         case V4_UDP:
 752                                 softring = mac_srs->srs_udp_soft_rings[0];
 753                                 break;
 754                         case OTH:
 755                                 softring = mac_srs->srs_oth_soft_rings[0];
 756                         }
 757                         mac_rx_soft_ring_process(mcip, softring,
 758                             headmp[type], tailmp[type], cnt[type], sz[type]);
 759                 }
 760         }
 761 }
 762 
 763 int     fanout_unalligned = 0;
 764 
 765 /*
 766  * mac_rx_srs_long_fanout
 767  *
 768  * The fanout routine for IPv6
 769  */
 770 static int
 771 mac_rx_srs_long_fanout(mac_soft_ring_set_t *mac_srs, mblk_t *mp,
 772     uint32_t sap, size_t hdrsize, enum pkt_type *type, uint_t *indx)
 773 {
 774         ip6_t           *ip6h;
 775         uint8_t         *whereptr;
 776         uint_t          hash;
 777         uint16_t        remlen;
 778         uint8_t         nexthdr;
 779         uint16_t        hdr_len;
 780 
 781         if (sap == ETHERTYPE_IPV6) {
 782                 boolean_t       modifiable = B_TRUE;
 783 
 784                 ASSERT(MBLKL(mp) >= hdrsize);
 785 
 786                 ip6h = (ip6_t *)(mp->b_rptr + hdrsize);
 787                 if ((unsigned char *)ip6h == mp->b_wptr) {
 788                         /*
 789                          * The first mblk_t only includes the mac header.
 790                          * Note that it is safe to change the mp pointer here,
 791                          * as the subsequent operation does not assume mp
 792                          * points to the start of the mac header.
 793                          */
 794                         mp = mp->b_cont;
 795 
 796                         /*
 797                          * Make sure ip6h holds the full ip6_t structure.
 798                          */
 799                         if (mp == NULL)
 800                                 return (-1);
 801 
 802                         if (MBLKL(mp) < IPV6_HDR_LEN) {
 803                                 modifiable = (DB_REF(mp) == 1);
 804 
 805                                 if (modifiable &&
 806                                     !pullupmsg(mp, IPV6_HDR_LEN)) {
 807                                         return (-1);
 808                                 }
 809                         }
 810 
 811                         ip6h = (ip6_t *)mp->b_rptr;
 812                 }
 813 
 814                 if (!modifiable || !(OK_32PTR((char *)ip6h)) ||
 815                     ((unsigned char *)ip6h + IPV6_HDR_LEN > mp->b_wptr)) {
 816                         /*
 817                          * If either ip6h is not alligned, or ip6h does not
 818                          * hold the complete ip6_t structure (a pullupmsg()
 819                          * is not an option since it would result in an
 820                          * unalligned ip6h), fanout to the default ring. Note
 821                          * that this may cause packets reordering.
 822                          */
 823                         *indx = 0;
 824                         *type = OTH;
 825                         fanout_unalligned++;
 826                         return (0);
 827                 }
 828 
 829                 remlen = ntohs(ip6h->ip6_plen);
 830                 nexthdr = ip6h->ip6_nxt;
 831 
 832                 if (remlen < MIN_EHDR_LEN)
 833                         return (-1);
 834                 /*
 835                  * Do src based fanout if below tunable is set to B_TRUE or
 836                  * when mac_ip_hdr_length_v6() fails because of malformed
 837                  * packets or because mblk's need to be concatenated using
 838                  * pullupmsg().
 839                  */
 840                 if (mac_src_ipv6_fanout || !mac_ip_hdr_length_v6(ip6h,
 841                     mp->b_wptr, &hdr_len, &nexthdr, NULL)) {
 842                         goto src_based_fanout;
 843                 }
 844                 whereptr = (uint8_t *)ip6h + hdr_len;
 845 
 846                 /* If the transport is one of below, we do port based fanout */
 847                 switch (nexthdr) {
 848                 case IPPROTO_TCP:
 849                 case IPPROTO_UDP:
 850                 case IPPROTO_SCTP:
 851                 case IPPROTO_ESP:
 852                         /*
 853                          * If the ports in the transport header is not part of
 854                          * the mblk, do src_based_fanout, instead of calling
 855                          * pullupmsg().
 856                          */
 857                         if (mp->b_cont != NULL &&
 858                             whereptr + PORTS_SIZE > mp->b_wptr) {
 859                                 goto src_based_fanout;
 860                         }
 861                         break;
 862                 default:
 863                         break;
 864                 }
 865 
 866                 switch (nexthdr) {
 867                 case IPPROTO_TCP:
 868                         hash = HASH_ADDR(V4_PART_OF_V6(ip6h->ip6_src),
 869                             *(uint32_t *)whereptr);
 870                         *indx = COMPUTE_INDEX(hash,
 871                             mac_srs->srs_tcp_ring_count);
 872                         *type = OTH;
 873                         break;
 874 
 875                 case IPPROTO_UDP:
 876                 case IPPROTO_SCTP:
 877                 case IPPROTO_ESP:
 878                         if (mac_fanout_type == MAC_FANOUT_DEFAULT) {
 879                                 hash = HASH_ADDR(V4_PART_OF_V6(ip6h->ip6_src),
 880                                     *(uint32_t *)whereptr);
 881                                 *indx = COMPUTE_INDEX(hash,
 882                                     mac_srs->srs_udp_ring_count);
 883                         } else {
 884                                 *indx = mac_srs->srs_ind %
 885                                     mac_srs->srs_udp_ring_count;
 886                                 mac_srs->srs_ind++;
 887                         }
 888                         *type = OTH;
 889                         break;
 890 
 891                         /* For all other protocol, do source based fanout */
 892                 default:
 893                         goto src_based_fanout;
 894                 }
 895         } else {
 896                 *indx = 0;
 897                 *type = OTH;
 898         }
 899         return (0);
 900 
 901 src_based_fanout:
 902         hash = HASH_ADDR(V4_PART_OF_V6(ip6h->ip6_src), (uint32_t)0);
 903         *indx = COMPUTE_INDEX(hash, mac_srs->srs_oth_ring_count);
 904         *type = OTH;
 905         return (0);
 906 }
 907 
 908 /*
 909  * mac_rx_srs_fanout
 910  *
 911  * This routine delivers packets destined to an SRS into a soft ring member
 912  * of the set.
 913  *
 914  * Given a chain of packets we need to split it up into multiple sub chains
 915  * destined for one of the TCP, UDP or OTH soft rings. Instead of entering
 916  * the soft ring one packet at a time, we want to enter it in the form of a
 917  * chain otherwise we get this start/stop behaviour where the worker thread
 918  * goes to sleep and then next packets comes in forcing it to wake up etc.
 919  *
 920  * Note:
 921  * Since we know what is the maximum fanout possible, we create a 2D array
 922  * of 'softring types * MAX_SR_FANOUT' for the head, tail, cnt and sz
 
 | 
 
 
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
  23  * Use is subject to license terms.
  24  * Copyright 2011 Joyent, Inc.  All rights reserved.
  25  * Copyright 2012 Nexenta Systems, Inc. All rights reserved.
  26  */
  27 
  28 #include <sys/types.h>
  29 #include <sys/callb.h>
  30 #include <sys/sdt.h>
  31 #include <sys/strsubr.h>
  32 #include <sys/strsun.h>
  33 #include <sys/vlan.h>
  34 #include <sys/stack.h>
  35 #include <sys/archsystm.h>
  36 #include <inet/ipsec_impl.h>
  37 #include <inet/ip_impl.h>
  38 #include <inet/sadb.h>
  39 #include <inet/ipsecesp.h>
  40 #include <inet/ipsecah.h>
  41 #include <inet/ip6.h>
  42 
  43 #include <sys/mac_impl.h>
  44 #include <sys/mac_client_impl.h>
  45 #include <sys/mac_client_priv.h>
 
 
 512         (cnt)++;                                                        \
 513         if ((bw_ctl))                                                   \
 514                 (sz) += (sz0);                                          \
 515 }
 516 
 517 #define MAC_FANOUT_DEFAULT      0
 518 #define MAC_FANOUT_RND_ROBIN    1
 519 int mac_fanout_type = MAC_FANOUT_DEFAULT;
 520 
 521 #define MAX_SR_TYPES    3
 522 /* fanout types for port based hashing */
 523 enum pkt_type {
 524         V4_TCP = 0,
 525         V4_UDP,
 526         OTH,
 527         UNDEF
 528 };
 529 
 530 /*
 531  * In general we do port based hashing to spread traffic over different
 532  * softrings. The below tunables allow to override that behavior. Setting one
 533  * (depending on IPv6 or IPv4) to B_TRUE allows a fanout based on src
 534  * IPv6 or IPv4 address. This behavior is also applicable to IPv6 packets
 535  * carrying multiple optional headers and other uncommon packet types.
 536  */
 537 boolean_t mac_src_ipv6_fanout = B_FALSE;
 538 boolean_t mac_src_ipv4_fanout = B_FALSE;
 539 
 540 /*
 541  * Pair of local and remote ports in the transport header
 542  */
 543 #define PORTS_SIZE 4
 544 
 545 /*
 546  * mac_rx_srs_proto_fanout
 547  *
 548  * This routine delivers packets destined to an SRS into one of the
 549  * protocol soft rings.
 550  *
 551  * Given a chain of packets we need to split it up into multiple sub chains
 552  * destined into TCP, UDP or OTH soft ring. Instead of entering
 553  * the soft ring one packet at a time, we want to enter it in the form of a
 554  * chain otherwise we get this start/stop behaviour where the worker thread
 555  * goes to sleep and then next packets comes in forcing it to wake up etc.
 556  */
 557 static void
 558 mac_rx_srs_proto_fanout(mac_soft_ring_set_t *mac_srs, mblk_t *head)
 
 743                 if (headmp[type] != NULL) {
 744                         mac_soft_ring_t                 *softring;
 745 
 746                         ASSERT(tailmp[type]->b_next == NULL);
 747                         switch (type) {
 748                         case V4_TCP:
 749                                 softring = mac_srs->srs_tcp_soft_rings[0];
 750                                 break;
 751                         case V4_UDP:
 752                                 softring = mac_srs->srs_udp_soft_rings[0];
 753                                 break;
 754                         case OTH:
 755                                 softring = mac_srs->srs_oth_soft_rings[0];
 756                         }
 757                         mac_rx_soft_ring_process(mcip, softring,
 758                             headmp[type], tailmp[type], cnt[type], sz[type]);
 759                 }
 760         }
 761 }
 762 
 763 int     fanout_unaligned = 0;
 764 
 765 /*
 766  * mac_rx_srs_long_fanout
 767  *
 768  * The fanout routine for VLANs, and for anything else that isn't performing
 769  * explicit dls bypass.  Returns -1 on an error (drop the packet due to a
 770  * malformed packet), 0 on success, with values written in *indx and *type.
 771  */
 772 static int
 773 mac_rx_srs_long_fanout(mac_soft_ring_set_t *mac_srs, mblk_t *mp,
 774     uint32_t sap, size_t hdrsize, enum pkt_type *type, uint_t *indx)
 775 {
 776         ip6_t           *ip6h;
 777         ipha_t          *ipha;
 778         uint8_t         *whereptr;
 779         uint_t          hash;
 780         uint16_t        remlen;
 781         uint8_t         nexthdr;
 782         uint16_t        hdr_len;
 783         uint32_t        src_val;
 784         boolean_t       modifiable = B_TRUE;
 785         boolean_t       v6;
 786 
 787         ASSERT(MBLKL(mp) >= hdrsize);
 788 
 789         if (sap == ETHERTYPE_IPV6) {
 790                 v6 = B_TRUE;
 791                 hdr_len = IPV6_HDR_LEN;
 792         } else if (sap == ETHERTYPE_IP) {
 793                 v6 = B_FALSE;
 794                 hdr_len = IP_SIMPLE_HDR_LENGTH;
 795         } else {
 796                 *indx = 0;
 797                 *type = OTH;
 798                 return (0);
 799         }
 800 
 801         ip6h = (ip6_t *)(mp->b_rptr + hdrsize);
 802         ipha = (ipha_t *)ip6h;
 803 
 804         if ((uint8_t *)ip6h == mp->b_wptr) {
 805                 /*
 806                  * The first mblk_t only includes the mac header.
 807                  * Note that it is safe to change the mp pointer here,
 808                  * as the subsequent operation does not assume mp
 809                  * points to the start of the mac header.
 810                  */
 811                 mp = mp->b_cont;
 812 
 813                 /*
 814                  * Make sure the IP header points to an entire one.
 815                  */
 816                 if (mp == NULL)
 817                         return (-1);
 818 
 819                 if (MBLKL(mp) < hdr_len) {
 820                         modifiable = (DB_REF(mp) == 1);
 821 
 822                         if (modifiable && !pullupmsg(mp, hdr_len))
 823                                 return (-1);
 824                 }
 825 
 826                 ip6h = (ip6_t *)mp->b_rptr;
 827                 ipha = (ipha_t *)ip6h;
 828         }
 829 
 830         if (!modifiable || !(OK_32PTR((char *)ip6h)) ||
 831             ((uint8_t *)ip6h + hdr_len > mp->b_wptr)) {
 832                 /*
 833                  * If either the IP header is not aligned, or it does not hold
 834                  * the complete simple structure (a pullupmsg() is not an
 835                  * option since it would result in an unaligned IP header),
 836                  * fanout to the default ring.
 837                  *
 838                  * Note that this may cause packet reordering.
 839                  */
 840                 *indx = 0;
 841                 *type = OTH;
 842                 fanout_unaligned++;
 843                 return (0);
 844         }
 845 
 846         /*
 847          * Extract next-header, full header length, and source-hash value
 848          * using v4/v6 specific fields.
 849          */
 850         if (v6) {
 851                 remlen = ntohs(ip6h->ip6_plen);
 852                 nexthdr = ip6h->ip6_nxt;
 853                 src_val = V4_PART_OF_V6(ip6h->ip6_src);
 854                 /*
 855                  * Do src based fanout if below tunable is set to B_TRUE or
 856                  * when mac_ip_hdr_length_v6() fails because of malformed
 857                  * packets or because mblks need to be concatenated using
 858                  * pullupmsg().
 859                  */
 860                 if (mac_src_ipv6_fanout || !mac_ip_hdr_length_v6(ip6h,
 861                     mp->b_wptr, &hdr_len, &nexthdr, NULL)) {
 862                         goto src_based_fanout;
 863                 }
 864         } else {
 865                 hdr_len = IPH_HDR_LENGTH(ipha);
 866                 remlen = ntohs(ipha->ipha_length) - hdr_len;
 867                 nexthdr = ipha->ipha_protocol;
 868                 src_val = (uint32_t)ipha->ipha_src;
 869                 /*
 870                  * Catch IPv4 fragment case here.  IPv6 has nexthdr == FRAG
 871                  * for its equivalent case.
 872                  */
 873                 if (mac_src_ipv4_fanout ||
 874                     (ntohs(ipha->ipha_fragment_offset_and_flags) &
 875                     (IPH_MF | IPH_OFFSET)) != 0) {
 876                         goto src_based_fanout;
 877                 }
 878         }
 879         if (remlen < MIN_EHDR_LEN)
 880                 return (-1);
 881         whereptr = (uint8_t *)ip6h + hdr_len;
 882 
 883         /* If the transport is one of below, we do port/SPI based fanout */
 884         switch (nexthdr) {
 885         case IPPROTO_TCP:
 886         case IPPROTO_UDP:
 887         case IPPROTO_SCTP:
 888         case IPPROTO_ESP:
 889                 /*
 890                  * If the ports or SPI in the transport header is not part of
 891                  * the mblk, do src_based_fanout, instead of calling
 892                  * pullupmsg().
 893                  */
 894                 if (mp->b_cont == NULL || whereptr + PORTS_SIZE <= mp->b_wptr)
 895                         break;  /* out of switch... */
 896                 /* FALLTHRU */
 897         default:
 898                 goto src_based_fanout;
 899         }
 900 
 901         switch (nexthdr) {
 902         case IPPROTO_TCP:
 903                 hash = HASH_ADDR(src_val, *(uint32_t *)whereptr);
 904                 *indx = COMPUTE_INDEX(hash, mac_srs->srs_tcp_ring_count);
 905                 *type = OTH;
 906                 break;
 907         case IPPROTO_UDP:
 908         case IPPROTO_SCTP:
 909         case IPPROTO_ESP:
 910                 if (mac_fanout_type == MAC_FANOUT_DEFAULT) {
 911                         hash = HASH_ADDR(src_val, *(uint32_t *)whereptr);
 912                         *indx = COMPUTE_INDEX(hash,
 913                             mac_srs->srs_udp_ring_count);
 914                 } else {
 915                         *indx = mac_srs->srs_ind % mac_srs->srs_udp_ring_count;
 916                         mac_srs->srs_ind++;
 917                 }
 918                 *type = OTH;
 919                 break;
 920         }
 921         return (0);
 922 
 923 src_based_fanout:
 924         hash = HASH_ADDR(src_val, (uint32_t)0);
 925         *indx = COMPUTE_INDEX(hash, mac_srs->srs_oth_ring_count);
 926         *type = OTH;
 927         return (0);
 928 }
 929 
 930 /*
 931  * mac_rx_srs_fanout
 932  *
 933  * This routine delivers packets destined to an SRS into a soft ring member
 934  * of the set.
 935  *
 936  * Given a chain of packets we need to split it up into multiple sub chains
 937  * destined for one of the TCP, UDP or OTH soft rings. Instead of entering
 938  * the soft ring one packet at a time, we want to enter it in the form of a
 939  * chain otherwise we get this start/stop behaviour where the worker thread
 940  * goes to sleep and then next packets comes in forcing it to wake up etc.
 941  *
 942  * Note:
 943  * Since we know what is the maximum fanout possible, we create a 2D array
 944  * of 'softring types * MAX_SR_FANOUT' for the head, tail, cnt and sz
 
 |