Print this page
9832 Original bug discovered as 9560 has friends IPv4 packets coming in as IPv6 creating chaos
Reviewed by: Robert Mustacchi <rm@joyent.com>


   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.

  23  */
  24 
  25 /*
  26  * MAC Services Module - misc utilities
  27  */
  28 
  29 #include <sys/types.h>
  30 #include <sys/mac.h>
  31 #include <sys/mac_impl.h>
  32 #include <sys/mac_client_priv.h>
  33 #include <sys/mac_client_impl.h>
  34 #include <sys/mac_soft_ring.h>
  35 #include <sys/strsubr.h>
  36 #include <sys/strsun.h>
  37 #include <sys/vlan.h>
  38 #include <sys/pattr.h>
  39 #include <sys/pci_tools.h>
  40 #include <inet/ip.h>
  41 #include <inet/ip_impl.h>
  42 #include <inet/ip6.h>


 460     boolean_t loopback)
 461 {
 462         mblk_t  *mp1 = mp;
 463 
 464         while (mp1 != NULL) {
 465                 mp1->b_prev = NULL;
 466                 mp1->b_queue = NULL;
 467                 mp1 = mp1->b_next;
 468         }
 469         freemsgchain(mp);
 470 }
 471 
 472 /*
 473  * Determines the IPv6 header length accounting for all the optional IPv6
 474  * headers (hop-by-hop, destination, routing and fragment). The header length
 475  * and next header value (a transport header) is captured.
 476  *
 477  * Returns B_FALSE if all the IP headers are not in the same mblk otherwise
 478  * returns B_TRUE.
 479  */
 480 boolean_t
 481 mac_ip_hdr_length_v6(ip6_t *ip6h, uint8_t *endptr, uint16_t *hdr_length,
 482     uint8_t *next_hdr, ip6_frag_t **fragp)
 483 {
 484         uint16_t length;
 485         uint_t  ehdrlen;
 486         uint8_t *whereptr;
 487         uint8_t *nexthdrp;
 488         ip6_dest_t *desthdr;
 489         ip6_rthdr_t *rthdr;
 490         ip6_frag_t *fraghdr;
 491 
 492         if (((uchar_t *)ip6h + IPV6_HDR_LEN) > endptr)
 493                 return (B_FALSE);
 494         ASSERT(IPH_HDR_VERSION(ip6h) == IPV6_VERSION);






 495         length = IPV6_HDR_LEN;
 496         whereptr = ((uint8_t *)&ip6h[1]); /* point to next hdr */
 497 
 498         if (fragp != NULL)
 499                 *fragp = NULL;
 500 
 501         nexthdrp = &ip6h->ip6_nxt;
 502         while (whereptr < endptr) {
 503                 /* Is there enough left for len + nexthdr? */
 504                 if (whereptr + MIN_EHDR_LEN > endptr)
 505                         break;
 506 
 507                 switch (*nexthdrp) {
 508                 case IPPROTO_HOPOPTS:
 509                 case IPPROTO_DSTOPTS:
 510                         /* Assumes the headers are identical for hbh and dst */
 511                         desthdr = (ip6_dest_t *)whereptr;
 512                         ehdrlen = 8 * (desthdr->ip6d_len + 1);
 513                         if ((uchar_t *)desthdr +  ehdrlen > endptr)
 514                                 return (B_FALSE);
 515                         nexthdrp = &desthdr->ip6d_nxt;
 516                         break;
 517                 case IPPROTO_ROUTING:
 518                         rthdr = (ip6_rthdr_t *)whereptr;
 519                         ehdrlen =  8 * (rthdr->ip6r_len + 1);
 520                         if ((uchar_t *)rthdr +  ehdrlen > endptr)
 521                                 return (B_FALSE);
 522                         nexthdrp = &rthdr->ip6r_nxt;
 523                         break;
 524                 case IPPROTO_FRAGMENT:
 525                         fraghdr = (ip6_frag_t *)whereptr;
 526                         ehdrlen = sizeof (ip6_frag_t);
 527                         if ((uchar_t *)&fraghdr[1] > endptr)
 528                                 return (B_FALSE);
 529                         nexthdrp = &fraghdr->ip6f_nxt;
 530                         if (fragp != NULL)
 531                                 *fragp = fraghdr;
 532                         break;
 533                 case IPPROTO_NONE:
 534                         /* No next header means we're finished */
 535                 default:
 536                         *hdr_length = length;
 537                         *next_hdr = *nexthdrp;
 538                         return (B_TRUE);
 539                 }
 540                 length += ehdrlen;
 541                 whereptr += ehdrlen;
 542                 *hdr_length = length;
 543                 *next_hdr = *nexthdrp;
 544         }
 545         switch (*nexthdrp) {
 546         case IPPROTO_HOPOPTS:
 547         case IPPROTO_DSTOPTS:
 548         case IPPROTO_ROUTING:
 549         case IPPROTO_FRAGMENT:
 550                 /*
 551                  * If any know extension headers are still to be processed,
 552                  * the packet's malformed (or at least all the IP header(s) are
 553                  * not in the same mblk - and that should never happen.



 554                  */
 555                 return (B_FALSE);
 556 
 557         default:
 558                 /*
 559                  * If we get here, we know that all of the IP headers were in
 560                  * the same mblk, even if the ULP header is in the next mblk.
 561                  */
 562                 *hdr_length = length;
 563                 *next_hdr = *nexthdrp;
 564                 return (B_TRUE);
 565         }
 566 }
 567 
 568 /*
 569  * The following set of routines are there to take care of interrupt
 570  * re-targeting for legacy (fixed) interrupts. Some older versions
 571  * of the popular NICs like e1000g do not support MSI-X interrupts
 572  * and they reserve fixed interrupts for RX/TX rings. To re-target
 573  * these interrupts, PCITOOL ioctls need to be used.
 574  */
 575 typedef struct mac_dladm_intr {
 576         int     ino;
 577         int     cpu_id;
 578         char    driver_path[MAXPATHLEN];
 579         char    nexus_path[MAXPATHLEN];
 580 } mac_dladm_intr_t;
 581 
 582 /* Bind the interrupt to cpu_num */
 583 static int
 584 mac_set_intr(ldi_handle_t lh, processorid_t cpu_num, int oldcpuid, int ino)


 951                 skip_len = sizeof (struct ether_header);
 952         }
 953 
 954         /* if ethernet header is in its own mblk, skip it */
 955         if (MBLKL(mp) <= skip_len) {
 956                 skip_len -= MBLKL(mp);
 957                 mp = mp->b_cont;
 958                 if (mp == NULL)
 959                         goto done;
 960         }
 961 
 962         sap = (sap < ETHERTYPE_802_MIN) ? 0 : sap;
 963 
 964         /* compute IP src/dst addresses hash and skip IPv{4,6} header */
 965 
 966         switch (sap) {
 967         case ETHERTYPE_IP: {
 968                 ipha_t *iphp;
 969 
 970                 /*
 971                  * If the header is not aligned or the header doesn't fit
 972                  * in the mblk, bail now. Note that this may cause packets
 973                  * reordering.
 974                  */
 975                 iphp = (ipha_t *)(mp->b_rptr + skip_len);
 976                 if (((unsigned char *)iphp + sizeof (ipha_t) > mp->b_wptr) ||
 977                     !OK_32PTR((char *)iphp))

 978                         goto done;
 979 
 980                 proto = iphp->ipha_protocol;
 981                 skip_len += IPH_HDR_LENGTH(iphp);
 982 
 983                 /* Check if the packet is fragmented. */
 984                 ip_fragmented = ntohs(iphp->ipha_fragment_offset_and_flags) &
 985                     IPH_OFFSET;
 986 
 987                 /*
 988                  * For fragmented packets, use addresses in addition to
 989                  * the frag_id to generate the hash inorder to get
 990                  * better distribution.
 991                  */
 992                 if (ip_fragmented || (policy & MAC_PKT_HASH_L3) != 0) {
 993                         uint8_t *ip_src = (uint8_t *)&(iphp->ipha_src);
 994                         uint8_t *ip_dst = (uint8_t *)&(iphp->ipha_dst);
 995 
 996                         hash ^= (PKT_HASH_4BYTES(ip_src) ^
 997                             PKT_HASH_4BYTES(ip_dst));


1004                         goto done;
1005                 }
1006                 break;
1007         }
1008         case ETHERTYPE_IPV6: {
1009                 ip6_t *ip6hp;
1010                 ip6_frag_t *frag = NULL;
1011                 uint16_t hdr_length;
1012 
1013                 /*
1014                  * If the header is not aligned or the header doesn't fit
1015                  * in the mblk, bail now. Note that this may cause packets
1016                  * reordering.
1017                  */
1018 
1019                 ip6hp = (ip6_t *)(mp->b_rptr + skip_len);
1020                 if (((unsigned char *)ip6hp + IPV6_HDR_LEN > mp->b_wptr) ||
1021                     !OK_32PTR((char *)ip6hp))
1022                         goto done;
1023 
1024                 if (!mac_ip_hdr_length_v6(ip6hp, mp->b_wptr, &hdr_length,
1025                     &proto, &frag))

1026                         goto done;
1027                 skip_len += hdr_length;
1028 
1029                 /*
1030                  * For fragmented packets, use addresses in addition to
1031                  * the frag_id to generate the hash inorder to get
1032                  * better distribution.
1033                  */
1034                 if (frag != NULL || (policy & MAC_PKT_HASH_L3) != 0) {
1035                         uint8_t *ip_src = &(ip6hp->ip6_src.s6_addr8[12]);
1036                         uint8_t *ip_dst = &(ip6hp->ip6_dst.s6_addr8[12]);
1037 
1038                         hash ^= (PKT_HASH_4BYTES(ip_src) ^
1039                             PKT_HASH_4BYTES(ip_dst));
1040                         policy &= ~MAC_PKT_HASH_L3;
1041                 }
1042 
1043                 if (frag != NULL) {
1044                         uint8_t *identp = (uint8_t *)&frag->ip6f_ident;
1045                         hash ^= PKT_HASH_4BYTES(identp);




   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
  23  * Copyright 2019 Joyent, Inc.
  24  */
  25 
  26 /*
  27  * MAC Services Module - misc utilities
  28  */
  29 
  30 #include <sys/types.h>
  31 #include <sys/mac.h>
  32 #include <sys/mac_impl.h>
  33 #include <sys/mac_client_priv.h>
  34 #include <sys/mac_client_impl.h>
  35 #include <sys/mac_soft_ring.h>
  36 #include <sys/strsubr.h>
  37 #include <sys/strsun.h>
  38 #include <sys/vlan.h>
  39 #include <sys/pattr.h>
  40 #include <sys/pci_tools.h>
  41 #include <inet/ip.h>
  42 #include <inet/ip_impl.h>
  43 #include <inet/ip6.h>


 461     boolean_t loopback)
 462 {
 463         mblk_t  *mp1 = mp;
 464 
 465         while (mp1 != NULL) {
 466                 mp1->b_prev = NULL;
 467                 mp1->b_queue = NULL;
 468                 mp1 = mp1->b_next;
 469         }
 470         freemsgchain(mp);
 471 }
 472 
 473 /*
 474  * Determines the IPv6 header length accounting for all the optional IPv6
 475  * headers (hop-by-hop, destination, routing and fragment). The header length
 476  * and next header value (a transport header) is captured.
 477  *
 478  * Returns B_FALSE if all the IP headers are not in the same mblk otherwise
 479  * returns B_TRUE.
 480  */
 481 int
 482 mac_ip_hdr_length_v6(ip6_t *ip6h, uint8_t *endptr, uint16_t *hdr_length,
 483     uint8_t *next_hdr, ip6_frag_t **fragp)
 484 {
 485         uint16_t length;
 486         uint_t  ehdrlen;
 487         uint8_t *whereptr;
 488         uint8_t *nexthdrp;
 489         ip6_dest_t *desthdr;
 490         ip6_rthdr_t *rthdr;
 491         ip6_frag_t *fraghdr;
 492 
 493         if (((uchar_t *)ip6h + IPV6_HDR_LEN) > endptr)
 494                 return (ENOSPC);
 495         /*
 496          * Return EINVAL, which mac_protect callers treat explicitly as "let
 497          * pass", flow callers treat as "not in a flow", and the rest treat
 498          * as "don't do special processing".
 499          */
 500         if (IPH_HDR_VERSION(ip6h) != IPV6_VERSION)
 501                 return (EINVAL);
 502         length = IPV6_HDR_LEN;
 503         whereptr = ((uint8_t *)&ip6h[1]); /* point to next hdr */
 504 
 505         if (fragp != NULL)
 506                 *fragp = NULL;
 507 
 508         nexthdrp = &ip6h->ip6_nxt;
 509         while (whereptr < endptr) {
 510                 /* Is there enough left for len + nexthdr? */
 511                 if (whereptr + MIN_EHDR_LEN > endptr)
 512                         break;
 513 
 514                 switch (*nexthdrp) {
 515                 case IPPROTO_HOPOPTS:
 516                 case IPPROTO_DSTOPTS:
 517                         /* Assumes the headers are identical for hbh and dst */
 518                         desthdr = (ip6_dest_t *)whereptr;
 519                         ehdrlen = 8 * (desthdr->ip6d_len + 1);
 520                         if ((uchar_t *)desthdr +  ehdrlen > endptr)
 521                                 return (ENOSPC);
 522                         nexthdrp = &desthdr->ip6d_nxt;
 523                         break;
 524                 case IPPROTO_ROUTING:
 525                         rthdr = (ip6_rthdr_t *)whereptr;
 526                         ehdrlen =  8 * (rthdr->ip6r_len + 1);
 527                         if ((uchar_t *)rthdr +  ehdrlen > endptr)
 528                                 return (ENOSPC);
 529                         nexthdrp = &rthdr->ip6r_nxt;
 530                         break;
 531                 case IPPROTO_FRAGMENT:
 532                         fraghdr = (ip6_frag_t *)whereptr;
 533                         ehdrlen = sizeof (ip6_frag_t);
 534                         if ((uchar_t *)&fraghdr[1] > endptr)
 535                                 return (ENOSPC);
 536                         nexthdrp = &fraghdr->ip6f_nxt;
 537                         if (fragp != NULL)
 538                                 *fragp = fraghdr;
 539                         break;
 540                 case IPPROTO_NONE:
 541                         /* No next header means we're finished */
 542                 default:
 543                         *hdr_length = length;
 544                         *next_hdr = *nexthdrp;
 545                         return (0);
 546                 }
 547                 length += ehdrlen;
 548                 whereptr += ehdrlen;
 549                 *hdr_length = length;
 550                 *next_hdr = *nexthdrp;
 551         }
 552         switch (*nexthdrp) {
 553         case IPPROTO_HOPOPTS:
 554         case IPPROTO_DSTOPTS:
 555         case IPPROTO_ROUTING:
 556         case IPPROTO_FRAGMENT:
 557                 /*
 558                  * If any know extension headers are still to be processed,
 559                  * the packet's malformed (or at least all the IP header(s) are
 560                  * not in the same mblk - and that should never happen.
 561                  *
 562                  * Return ENOSPC because it MAY be spread across mblks, and
 563                  * and the rest of MAC or IPv6 itself can cope.
 564                  */
 565                 return (ENOSPC);
 566 
 567         default:
 568                 /*
 569                  * If we get here, we know that all of the IP headers were in
 570                  * the same mblk, even if the ULP header is in the next mblk.
 571                  */
 572                 *hdr_length = length;
 573                 *next_hdr = *nexthdrp;
 574                 return (0);
 575         }
 576 }
 577 
 578 /*
 579  * The following set of routines are there to take care of interrupt
 580  * re-targeting for legacy (fixed) interrupts. Some older versions
 581  * of the popular NICs like e1000g do not support MSI-X interrupts
 582  * and they reserve fixed interrupts for RX/TX rings. To re-target
 583  * these interrupts, PCITOOL ioctls need to be used.
 584  */
 585 typedef struct mac_dladm_intr {
 586         int     ino;
 587         int     cpu_id;
 588         char    driver_path[MAXPATHLEN];
 589         char    nexus_path[MAXPATHLEN];
 590 } mac_dladm_intr_t;
 591 
 592 /* Bind the interrupt to cpu_num */
 593 static int
 594 mac_set_intr(ldi_handle_t lh, processorid_t cpu_num, int oldcpuid, int ino)


 961                 skip_len = sizeof (struct ether_header);
 962         }
 963 
 964         /* if ethernet header is in its own mblk, skip it */
 965         if (MBLKL(mp) <= skip_len) {
 966                 skip_len -= MBLKL(mp);
 967                 mp = mp->b_cont;
 968                 if (mp == NULL)
 969                         goto done;
 970         }
 971 
 972         sap = (sap < ETHERTYPE_802_MIN) ? 0 : sap;
 973 
 974         /* compute IP src/dst addresses hash and skip IPv{4,6} header */
 975 
 976         switch (sap) {
 977         case ETHERTYPE_IP: {
 978                 ipha_t *iphp;
 979 
 980                 /*
 981                  * If the header is not aligned, the header doesn't fit in the
 982                  * mblk, OR we have a bad IP version, bail now. Note that this
 983                  * may cause packets reordering.
 984                  */
 985                 iphp = (ipha_t *)(mp->b_rptr + skip_len);
 986                 if (((unsigned char *)iphp + sizeof (ipha_t) > mp->b_wptr) ||
 987                     !OK_32PTR((char *)iphp) ||
 988                     IPH_HDR_VERSION(iphp) != IPV4_VERSION)
 989                         goto done;
 990 
 991                 proto = iphp->ipha_protocol;
 992                 skip_len += IPH_HDR_LENGTH(iphp);
 993 
 994                 /* Check if the packet is fragmented. */
 995                 ip_fragmented = ntohs(iphp->ipha_fragment_offset_and_flags) &
 996                     IPH_OFFSET;
 997 
 998                 /*
 999                  * For fragmented packets, use addresses in addition to
1000                  * the frag_id to generate the hash inorder to get
1001                  * better distribution.
1002                  */
1003                 if (ip_fragmented || (policy & MAC_PKT_HASH_L3) != 0) {
1004                         uint8_t *ip_src = (uint8_t *)&(iphp->ipha_src);
1005                         uint8_t *ip_dst = (uint8_t *)&(iphp->ipha_dst);
1006 
1007                         hash ^= (PKT_HASH_4BYTES(ip_src) ^
1008                             PKT_HASH_4BYTES(ip_dst));


1015                         goto done;
1016                 }
1017                 break;
1018         }
1019         case ETHERTYPE_IPV6: {
1020                 ip6_t *ip6hp;
1021                 ip6_frag_t *frag = NULL;
1022                 uint16_t hdr_length;
1023 
1024                 /*
1025                  * If the header is not aligned or the header doesn't fit
1026                  * in the mblk, bail now. Note that this may cause packets
1027                  * reordering.
1028                  */
1029 
1030                 ip6hp = (ip6_t *)(mp->b_rptr + skip_len);
1031                 if (((unsigned char *)ip6hp + IPV6_HDR_LEN > mp->b_wptr) ||
1032                     !OK_32PTR((char *)ip6hp))
1033                         goto done;
1034 
1035                 /* Also bail, regardless of why, if the function below fails. */
1036                 if (mac_ip_hdr_length_v6(ip6hp, mp->b_wptr, &hdr_length,
1037                     &proto, &frag) != 0)
1038                         goto done;
1039                 skip_len += hdr_length;
1040 
1041                 /*
1042                  * For fragmented packets, use addresses in addition to
1043                  * the frag_id to generate the hash inorder to get
1044                  * better distribution.
1045                  */
1046                 if (frag != NULL || (policy & MAC_PKT_HASH_L3) != 0) {
1047                         uint8_t *ip_src = &(ip6hp->ip6_src.s6_addr8[12]);
1048                         uint8_t *ip_dst = &(ip6hp->ip6_dst.s6_addr8[12]);
1049 
1050                         hash ^= (PKT_HASH_4BYTES(ip_src) ^
1051                             PKT_HASH_4BYTES(ip_dst));
1052                         policy &= ~MAC_PKT_HASH_L3;
1053                 }
1054 
1055                 if (frag != NULL) {
1056                         uint8_t *identp = (uint8_t *)&frag->ip6f_ident;
1057                         hash ^= PKT_HASH_4BYTES(identp);