4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 */
25 /*
26 * Copyright 2011 Joyent, Inc. All rights reserved.
27 */
28
29 #include <sys/types.h>
30 #include <sys/callb.h>
31 #include <sys/sdt.h>
32 #include <sys/strsubr.h>
33 #include <sys/strsun.h>
34 #include <sys/vlan.h>
35 #include <sys/stack.h>
36 #include <sys/archsystm.h>
37 #include <inet/ipsec_impl.h>
38 #include <inet/ip_impl.h>
39 #include <inet/sadb.h>
40 #include <inet/ipsecesp.h>
41 #include <inet/ipsecah.h>
42 #include <inet/ip6.h>
43
44 #include <sys/mac_impl.h>
45 #include <sys/mac_client_impl.h>
46 #include <sys/mac_client_priv.h>
513 (cnt)++; \
514 if ((bw_ctl)) \
515 (sz) += (sz0); \
516 }
517
518 #define MAC_FANOUT_DEFAULT 0
519 #define MAC_FANOUT_RND_ROBIN 1
520 int mac_fanout_type = MAC_FANOUT_DEFAULT;
521
522 #define MAX_SR_TYPES 3
523 /* fanout types for port based hashing */
524 enum pkt_type {
525 V4_TCP = 0,
526 V4_UDP,
527 OTH,
528 UNDEF
529 };
530
531 /*
532 * In general we do port based hashing to spread traffic over different
533 * softrings. The below tunable allows to override that behavior. Setting it
534 * to B_TRUE allows to do a fanout based on src ipv6 address. This behavior
535 * is also the applicable to ipv6 packets carrying multiple optional headers
536 * and other uncommon packet types.
537 */
538 boolean_t mac_src_ipv6_fanout = B_FALSE;
539
540 /*
541 * Pair of local and remote ports in the transport header
542 */
543 #define PORTS_SIZE 4
544
545 /*
546 * mac_rx_srs_proto_fanout
547 *
548 * This routine delivers packets destined to an SRS into one of the
549 * protocol soft rings.
550 *
551 * Given a chain of packets we need to split it up into multiple sub chains
552 * destined into TCP, UDP or OTH soft ring. Instead of entering
553 * the soft ring one packet at a time, we want to enter it in the form of a
554 * chain otherwise we get this start/stop behaviour where the worker thread
555 * goes to sleep and then next packets comes in forcing it to wake up etc.
556 */
557 static void
558 mac_rx_srs_proto_fanout(mac_soft_ring_set_t *mac_srs, mblk_t *head)
743 if (headmp[type] != NULL) {
744 mac_soft_ring_t *softring;
745
746 ASSERT(tailmp[type]->b_next == NULL);
747 switch (type) {
748 case V4_TCP:
749 softring = mac_srs->srs_tcp_soft_rings[0];
750 break;
751 case V4_UDP:
752 softring = mac_srs->srs_udp_soft_rings[0];
753 break;
754 case OTH:
755 softring = mac_srs->srs_oth_soft_rings[0];
756 }
757 mac_rx_soft_ring_process(mcip, softring,
758 headmp[type], tailmp[type], cnt[type], sz[type]);
759 }
760 }
761 }
762
763 int fanout_unalligned = 0;
764
765 /*
766 * mac_rx_srs_long_fanout
767 *
768 * The fanout routine for IPv6
769 */
770 static int
771 mac_rx_srs_long_fanout(mac_soft_ring_set_t *mac_srs, mblk_t *mp,
772 uint32_t sap, size_t hdrsize, enum pkt_type *type, uint_t *indx)
773 {
774 ip6_t *ip6h;
775 uint8_t *whereptr;
776 uint_t hash;
777 uint16_t remlen;
778 uint8_t nexthdr;
779 uint16_t hdr_len;
780
781 if (sap == ETHERTYPE_IPV6) {
782 boolean_t modifiable = B_TRUE;
783
784 ASSERT(MBLKL(mp) >= hdrsize);
785
786 ip6h = (ip6_t *)(mp->b_rptr + hdrsize);
787 if ((unsigned char *)ip6h == mp->b_wptr) {
788 /*
789 * The first mblk_t only includes the mac header.
790 * Note that it is safe to change the mp pointer here,
791 * as the subsequent operation does not assume mp
792 * points to the start of the mac header.
793 */
794 mp = mp->b_cont;
795
796 /*
797 * Make sure ip6h holds the full ip6_t structure.
798 */
799 if (mp == NULL)
800 return (-1);
801
802 if (MBLKL(mp) < IPV6_HDR_LEN) {
803 modifiable = (DB_REF(mp) == 1);
804
805 if (modifiable &&
806 !pullupmsg(mp, IPV6_HDR_LEN)) {
807 return (-1);
808 }
809 }
810
811 ip6h = (ip6_t *)mp->b_rptr;
812 }
813
814 if (!modifiable || !(OK_32PTR((char *)ip6h)) ||
815 ((unsigned char *)ip6h + IPV6_HDR_LEN > mp->b_wptr)) {
816 /*
817 * If either ip6h is not alligned, or ip6h does not
818 * hold the complete ip6_t structure (a pullupmsg()
819 * is not an option since it would result in an
820 * unalligned ip6h), fanout to the default ring. Note
821 * that this may cause packets reordering.
822 */
823 *indx = 0;
824 *type = OTH;
825 fanout_unalligned++;
826 return (0);
827 }
828
829 remlen = ntohs(ip6h->ip6_plen);
830 nexthdr = ip6h->ip6_nxt;
831
832 if (remlen < MIN_EHDR_LEN)
833 return (-1);
834 /*
835 * Do src based fanout if below tunable is set to B_TRUE or
836 * when mac_ip_hdr_length_v6() fails because of malformed
837 * packets or because mblk's need to be concatenated using
838 * pullupmsg().
839 */
840 if (mac_src_ipv6_fanout || !mac_ip_hdr_length_v6(ip6h,
841 mp->b_wptr, &hdr_len, &nexthdr, NULL)) {
842 goto src_based_fanout;
843 }
844 whereptr = (uint8_t *)ip6h + hdr_len;
845
846 /* If the transport is one of below, we do port based fanout */
847 switch (nexthdr) {
848 case IPPROTO_TCP:
849 case IPPROTO_UDP:
850 case IPPROTO_SCTP:
851 case IPPROTO_ESP:
852 /*
853 * If the ports in the transport header is not part of
854 * the mblk, do src_based_fanout, instead of calling
855 * pullupmsg().
856 */
857 if (mp->b_cont != NULL &&
858 whereptr + PORTS_SIZE > mp->b_wptr) {
859 goto src_based_fanout;
860 }
861 break;
862 default:
863 break;
864 }
865
866 switch (nexthdr) {
867 case IPPROTO_TCP:
868 hash = HASH_ADDR(V4_PART_OF_V6(ip6h->ip6_src),
869 *(uint32_t *)whereptr);
870 *indx = COMPUTE_INDEX(hash,
871 mac_srs->srs_tcp_ring_count);
872 *type = OTH;
873 break;
874
875 case IPPROTO_UDP:
876 case IPPROTO_SCTP:
877 case IPPROTO_ESP:
878 if (mac_fanout_type == MAC_FANOUT_DEFAULT) {
879 hash = HASH_ADDR(V4_PART_OF_V6(ip6h->ip6_src),
880 *(uint32_t *)whereptr);
881 *indx = COMPUTE_INDEX(hash,
882 mac_srs->srs_udp_ring_count);
883 } else {
884 *indx = mac_srs->srs_ind %
885 mac_srs->srs_udp_ring_count;
886 mac_srs->srs_ind++;
887 }
888 *type = OTH;
889 break;
890
891 /* For all other protocol, do source based fanout */
892 default:
893 goto src_based_fanout;
894 }
895 } else {
896 *indx = 0;
897 *type = OTH;
898 }
899 return (0);
900
901 src_based_fanout:
902 hash = HASH_ADDR(V4_PART_OF_V6(ip6h->ip6_src), (uint32_t)0);
903 *indx = COMPUTE_INDEX(hash, mac_srs->srs_oth_ring_count);
904 *type = OTH;
905 return (0);
906 }
907
908 /*
909 * mac_rx_srs_fanout
910 *
911 * This routine delivers packets destined to an SRS into a soft ring member
912 * of the set.
913 *
914 * Given a chain of packets we need to split it up into multiple sub chains
915 * destined for one of the TCP, UDP or OTH soft rings. Instead of entering
916 * the soft ring one packet at a time, we want to enter it in the form of a
917 * chain otherwise we get this start/stop behaviour where the worker thread
918 * goes to sleep and then next packets comes in forcing it to wake up etc.
919 *
920 * Note:
921 * Since we know what is the maximum fanout possible, we create a 2D array
922 * of 'softring types * MAX_SR_FANOUT' for the head, tail, cnt and sz
|
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 * Copyright 2011 Joyent, Inc. All rights reserved.
25 * Copyright 2012 Nexenta Systems, Inc. All rights reserved.
26 */
27
28 #include <sys/types.h>
29 #include <sys/callb.h>
30 #include <sys/sdt.h>
31 #include <sys/strsubr.h>
32 #include <sys/strsun.h>
33 #include <sys/vlan.h>
34 #include <sys/stack.h>
35 #include <sys/archsystm.h>
36 #include <inet/ipsec_impl.h>
37 #include <inet/ip_impl.h>
38 #include <inet/sadb.h>
39 #include <inet/ipsecesp.h>
40 #include <inet/ipsecah.h>
41 #include <inet/ip6.h>
42
43 #include <sys/mac_impl.h>
44 #include <sys/mac_client_impl.h>
45 #include <sys/mac_client_priv.h>
512 (cnt)++; \
513 if ((bw_ctl)) \
514 (sz) += (sz0); \
515 }
516
517 #define MAC_FANOUT_DEFAULT 0
518 #define MAC_FANOUT_RND_ROBIN 1
519 int mac_fanout_type = MAC_FANOUT_DEFAULT;
520
521 #define MAX_SR_TYPES 3
522 /* fanout types for port based hashing */
523 enum pkt_type {
524 V4_TCP = 0,
525 V4_UDP,
526 OTH,
527 UNDEF
528 };
529
530 /*
531 * In general we do port based hashing to spread traffic over different
532 * softrings. The below tunables allow to override that behavior. Setting one
533 * (depending on IPv6 or IPv4) to B_TRUE allows a fanout based on src
534 * IPv6 or IPv4 address. This behavior is also applicable to IPv6 packets
535 * carrying multiple optional headers and other uncommon packet types.
536 */
537 boolean_t mac_src_ipv6_fanout = B_FALSE;
538 boolean_t mac_src_ipv4_fanout = B_FALSE;
539
540 /*
541 * Pair of local and remote ports in the transport header
542 */
543 #define PORTS_SIZE 4
544
545 /*
546 * mac_rx_srs_proto_fanout
547 *
548 * This routine delivers packets destined to an SRS into one of the
549 * protocol soft rings.
550 *
551 * Given a chain of packets we need to split it up into multiple sub chains
552 * destined into TCP, UDP or OTH soft ring. Instead of entering
553 * the soft ring one packet at a time, we want to enter it in the form of a
554 * chain otherwise we get this start/stop behaviour where the worker thread
555 * goes to sleep and then next packets comes in forcing it to wake up etc.
556 */
557 static void
558 mac_rx_srs_proto_fanout(mac_soft_ring_set_t *mac_srs, mblk_t *head)
743 if (headmp[type] != NULL) {
744 mac_soft_ring_t *softring;
745
746 ASSERT(tailmp[type]->b_next == NULL);
747 switch (type) {
748 case V4_TCP:
749 softring = mac_srs->srs_tcp_soft_rings[0];
750 break;
751 case V4_UDP:
752 softring = mac_srs->srs_udp_soft_rings[0];
753 break;
754 case OTH:
755 softring = mac_srs->srs_oth_soft_rings[0];
756 }
757 mac_rx_soft_ring_process(mcip, softring,
758 headmp[type], tailmp[type], cnt[type], sz[type]);
759 }
760 }
761 }
762
763 int fanout_unaligned = 0;
764
765 /*
766 * mac_rx_srs_long_fanout
767 *
768 * The fanout routine for VLANs, and for anything else that isn't performing
769 * explicit dls bypass. Returns -1 on an error (drop the packet due to a
770 * malformed packet), 0 on success, with values written in *indx and *type.
771 */
772 static int
773 mac_rx_srs_long_fanout(mac_soft_ring_set_t *mac_srs, mblk_t *mp,
774 uint32_t sap, size_t hdrsize, enum pkt_type *type, uint_t *indx)
775 {
776 ip6_t *ip6h;
777 ipha_t *ipha;
778 uint8_t *whereptr;
779 uint_t hash;
780 uint16_t remlen;
781 uint8_t nexthdr;
782 uint16_t hdr_len;
783 uint32_t src_val;
784 boolean_t modifiable = B_TRUE;
785 boolean_t v6;
786
787 ASSERT(MBLKL(mp) >= hdrsize);
788
789 if (sap == ETHERTYPE_IPV6) {
790 v6 = B_TRUE;
791 hdr_len = IPV6_HDR_LEN;
792 } else if (sap == ETHERTYPE_IP) {
793 v6 = B_FALSE;
794 hdr_len = IP_SIMPLE_HDR_LENGTH;
795 } else {
796 *indx = 0;
797 *type = OTH;
798 return (0);
799 }
800
801 ip6h = (ip6_t *)(mp->b_rptr + hdrsize);
802 ipha = (ipha_t *)ip6h;
803
804 if ((uint8_t *)ip6h == mp->b_wptr) {
805 /*
806 * The first mblk_t only includes the mac header.
807 * Note that it is safe to change the mp pointer here,
808 * as the subsequent operation does not assume mp
809 * points to the start of the mac header.
810 */
811 mp = mp->b_cont;
812
813 /*
814 * Make sure the IP header points to an entire one.
815 */
816 if (mp == NULL)
817 return (-1);
818
819 if (MBLKL(mp) < hdr_len) {
820 modifiable = (DB_REF(mp) == 1);
821
822 if (modifiable && !pullupmsg(mp, hdr_len))
823 return (-1);
824 }
825
826 ip6h = (ip6_t *)mp->b_rptr;
827 ipha = (ipha_t *)ip6h;
828 }
829
830 if (!modifiable || !(OK_32PTR((char *)ip6h)) ||
831 ((uint8_t *)ip6h + hdr_len > mp->b_wptr)) {
832 /*
833 * If either the IP header is not aligned, or it does not hold
834 * the complete simple structure (a pullupmsg() is not an
835 * option since it would result in an unaligned IP header),
836 * fanout to the default ring.
837 *
838 * Note that this may cause packet reordering.
839 */
840 *indx = 0;
841 *type = OTH;
842 fanout_unaligned++;
843 return (0);
844 }
845
846 /*
847 * Extract next-header, full header length, and source-hash value
848 * using v4/v6 specific fields.
849 */
850 if (v6) {
851 remlen = ntohs(ip6h->ip6_plen);
852 nexthdr = ip6h->ip6_nxt;
853 src_val = V4_PART_OF_V6(ip6h->ip6_src);
854 /*
855 * Do src based fanout if below tunable is set to B_TRUE or
856 * when mac_ip_hdr_length_v6() fails because of malformed
857 * packets or because mblks need to be concatenated using
858 * pullupmsg().
859 */
860 if (mac_src_ipv6_fanout || !mac_ip_hdr_length_v6(ip6h,
861 mp->b_wptr, &hdr_len, &nexthdr, NULL)) {
862 goto src_based_fanout;
863 }
864 } else {
865 hdr_len = IPH_HDR_LENGTH(ipha);
866 remlen = ntohs(ipha->ipha_length) - hdr_len;
867 nexthdr = ipha->ipha_protocol;
868 src_val = (uint32_t)ipha->ipha_src;
869 /*
870 * Catch IPv4 fragment case here. IPv6 has nexthdr == FRAG
871 * for its equivalent case.
872 */
873 if (mac_src_ipv4_fanout ||
874 (ntohs(ipha->ipha_fragment_offset_and_flags) &
875 (IPH_MF | IPH_OFFSET)) != 0) {
876 goto src_based_fanout;
877 }
878 }
879 if (remlen < MIN_EHDR_LEN)
880 return (-1);
881 whereptr = (uint8_t *)ip6h + hdr_len;
882
883 /* If the transport is one of below, we do port/SPI based fanout */
884 switch (nexthdr) {
885 case IPPROTO_TCP:
886 case IPPROTO_UDP:
887 case IPPROTO_SCTP:
888 case IPPROTO_ESP:
889 /*
890 * If the ports or SPI in the transport header is not part of
891 * the mblk, do src_based_fanout, instead of calling
892 * pullupmsg().
893 */
894 if (mp->b_cont == NULL || whereptr + PORTS_SIZE <= mp->b_wptr)
895 break; /* out of switch... */
896 /* FALLTHRU */
897 default:
898 goto src_based_fanout;
899 }
900
901 switch (nexthdr) {
902 case IPPROTO_TCP:
903 hash = HASH_ADDR(src_val, *(uint32_t *)whereptr);
904 *indx = COMPUTE_INDEX(hash, mac_srs->srs_tcp_ring_count);
905 *type = OTH;
906 break;
907 case IPPROTO_UDP:
908 case IPPROTO_SCTP:
909 case IPPROTO_ESP:
910 if (mac_fanout_type == MAC_FANOUT_DEFAULT) {
911 hash = HASH_ADDR(src_val, *(uint32_t *)whereptr);
912 *indx = COMPUTE_INDEX(hash,
913 mac_srs->srs_udp_ring_count);
914 } else {
915 *indx = mac_srs->srs_ind % mac_srs->srs_udp_ring_count;
916 mac_srs->srs_ind++;
917 }
918 *type = OTH;
919 break;
920 }
921 return (0);
922
923 src_based_fanout:
924 hash = HASH_ADDR(src_val, (uint32_t)0);
925 *indx = COMPUTE_INDEX(hash, mac_srs->srs_oth_ring_count);
926 *type = OTH;
927 return (0);
928 }
929
930 /*
931 * mac_rx_srs_fanout
932 *
933 * This routine delivers packets destined to an SRS into a soft ring member
934 * of the set.
935 *
936 * Given a chain of packets we need to split it up into multiple sub chains
937 * destined for one of the TCP, UDP or OTH soft rings. Instead of entering
938 * the soft ring one packet at a time, we want to enter it in the form of a
939 * chain otherwise we get this start/stop behaviour where the worker thread
940 * goes to sleep and then next packets comes in forcing it to wake up etc.
941 *
942 * Note:
943 * Since we know what is the maximum fanout possible, we create a 2D array
944 * of 'softring types * MAX_SR_FANOUT' for the head, tail, cnt and sz
|