Print this page

        

*** 20,30 **** */ /* * Copyright (c) 1991, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright 2013 Nexenta Systems, Inc. All rights reserved. * Copyright 2014, OmniTI Computer Consulting, Inc. All rights reserved. - * Copyright 2015, Joyent, Inc. */ /* Copyright (c) 1990 Mentat Inc. */ #include <sys/sysmacros.h> #include <sys/types.h> --- 20,29 ----
*** 75,86 **** #include <inet/snmpcom.h> #include <inet/kstatcom.h> #include <inet/ipclassifier.h> #include <sys/squeue_impl.h> #include <inet/ipnet.h> ! #include <sys/vxlan.h> ! #include <inet/inet_hash.h> #include <sys/tsol/label.h> #include <sys/tsol/tnet.h> #include <rpc/pmap_prot.h> --- 74,84 ---- #include <inet/snmpcom.h> #include <inet/kstatcom.h> #include <inet/ipclassifier.h> #include <sys/squeue_impl.h> #include <inet/ipnet.h> ! #include <sys/ethernet.h> #include <sys/tsol/label.h> #include <sys/tsol/tnet.h> #include <rpc/pmap_prot.h>
*** 346,438 **** void *args) = NULL; typedef union T_primitives *t_primp_t; /* - * Various protocols that encapsulate UDP have no real use for the source port. - * Instead, they want to vary the source port to provide better equal-cost - * multipathing and other systems that use fanout. Consider something like - * VXLAN. If you're actually sending multiple different streams to a single - * host, if you don't vary the source port, then the tuple of ( SRC IP, DST IP, - * SRC Port, DST Port) will always be the same. - * - * Here, we return a port to hash this to, if we know how to hash it. If for - * some reason we can't perform an L4 hash, then we just return the default - * value, usually the default port. After we determine the hash we transform it - * so that it's in the range of [ min, max ]. - * - * We'd like to avoid a pull up for the sake of performing the hash. If the - * first mblk_t doesn't have the full protocol header, then we just send it to - * the default. If for some reason we have an encapsulated packet that has its - * protocol header in different parts of an mblk_t, then we'll go with the - * default port. This means that that if a driver isn't consistent about how it - * generates the frames for a given flow, it will not always be consistently - * hashed. That should be an uncommon event. - */ - uint16_t - udp_srcport_hash(mblk_t *mp, int type, uint16_t min, uint16_t max, - uint16_t def) - { - size_t szused = 0; - struct ether_header *ether; - struct ether_vlan_header *vether; - ip6_t *ip6h; - ipha_t *ipha; - uint16_t sap; - uint64_t hash; - uint32_t mod; - - ASSERT(min <= max); - - if (type != UDP_HASH_VXLAN) - return (def); - - if (!IS_P2ALIGNED(mp->b_rptr, sizeof (uint16_t))) - return (def); - - /* - * The following logic is VXLAN specific to get at the header, if we - * have formats, eg. GENEVE, then we should ignore this. - * - * The kernel overlay device often puts a first mblk_t for the data - * which is just the encap. If so, then we're going to use that and try - * to avoid a pull up. - */ - if (MBLKL(mp) == VXLAN_HDR_LEN) { - if (mp->b_cont == NULL) - return (def); - mp = mp->b_cont; - ether = (struct ether_header *)mp->b_rptr; - } else if (MBLKL(mp) < VXLAN_HDR_LEN) { - return (def); - } else { - szused = VXLAN_HDR_LEN; - ether = (struct ether_header *)((uintptr_t)mp->b_rptr + szused); - } - - /* Can we hold a MAC header? */ - if (MBLKL(mp) + szused < sizeof (struct ether_header)) - return (def); - - /* - * We need to lie about the starting offset into the message block for - * convenience. Undo it at the end. We know that inet_pkt_hash() won't - * modify the mblk_t. - */ - mp->b_rptr += szused; - hash = inet_pkt_hash(DL_ETHER, mp, INET_PKT_HASH_L2 | - INET_PKT_HASH_L3 | INET_PKT_HASH_L4); - mp->b_rptr -= szused; - - if (hash == 0) - return (def); - - mod = max - min + 1; - return ((hash % mod) + min); - } - - /* * Return the next anonymous port in the privileged port range for * bind checking. * * Trusted Extension (TX) notes: TX allows administrator to mark or * reserve ports as Multilevel ports (MLP). MLP has special function --- 344,353 ----
*** 1666,1680 **** case UDP_RCVHDR: mutex_enter(&connp->conn_lock); *i1 = udp->udp_rcvhdr ? 1 : 0; mutex_exit(&connp->conn_lock); return (sizeof (int)); - case UDP_SRCPORT_HASH: - mutex_enter(&connp->conn_lock); - *i1 = udp->udp_vxlanhash; - mutex_exit(&connp->conn_lock); - return (sizeof (int)); case UDP_SND_TO_CONNECTED: mutex_enter(&connp->conn_lock); *i1 = udp->udp_snd_to_conn ? 1 : 0; mutex_exit(&connp->conn_lock); return (sizeof (int)); --- 1581,1590 ----
*** 1811,1840 **** case UDP_RCVHDR: mutex_enter(&connp->conn_lock); udp->udp_rcvhdr = onoff; mutex_exit(&connp->conn_lock); return (0); - case UDP_SRCPORT_HASH: - /* - * This should have already been verified, but double - * check. - */ - if ((error = secpolicy_ip_config(cr, B_FALSE)) != 0) { - return (error); - } - - /* First see if the val is something we understand */ - if (*i1 != UDP_HASH_DISABLE && *i1 != UDP_HASH_VXLAN) - return (EINVAL); - - if (!checkonly) { - mutex_enter(&connp->conn_lock); - udp->udp_vxlanhash = *i1; - mutex_exit(&connp->conn_lock); - } - /* Fully handled this option. */ - return (0); case UDP_SND_TO_CONNECTED: mutex_enter(&connp->conn_lock); udp->udp_snd_to_conn = onoff; mutex_exit(&connp->conn_lock); return (0); --- 1721,1730 ----
*** 2119,2147 **** udp_stack_t *us = connp->conn_netstack->netstack_udp; uint_t data_len; uint32_t cksum; udp_t *udp = connp->conn_udp; boolean_t insert_spi = udp->udp_nat_t_endpoint; - boolean_t hash_srcport = udp->udp_vxlanhash; uint_t ulp_hdr_len; - uint16_t srcport; data_len = msgdsize(data_mp); ulp_hdr_len = UDPH_SIZE; if (insert_spi) ulp_hdr_len += sizeof (uint32_t); - /* - * If we have source port hashing going on, determine the hash before - * we modify the mblk_t. - */ - if (hash_srcport == B_TRUE) { - srcport = udp_srcport_hash(mp, UDP_HASH_VXLAN, - IPPORT_DYNAMIC_MIN, IPPORT_DYNAMIC_MAX, - ntohs(connp->conn_lport)); - } - mp = conn_prepend_hdr(ixa, ipp, v6src, v6dst, IPPROTO_UDP, flowinfo, ulp_hdr_len, data_mp, data_len, us->us_wroff_extra, &cksum, errorp); if (mp == NULL) { ASSERT(*errorp != 0); return (NULL); --- 2009,2025 ----
*** 2149,2163 **** data_len += ulp_hdr_len; ixa->ixa_pktlen = data_len + ixa->ixa_ip_hdr_length; udpha = (udpha_t *)(mp->b_rptr + ixa->ixa_ip_hdr_length); - if (hash_srcport == B_TRUE) { - udpha->uha_src_port = htons(srcport); - } else { udpha->uha_src_port = connp->conn_lport; - } udpha->uha_dst_port = dstport; udpha->uha_checksum = 0; udpha->uha_length = htons(data_len); /* --- 2027,2037 ----
*** 3328,3361 **** const in6_addr_t *v6src, in_port_t dstport, uint32_t flowinfo, int *errorp) { udp_t *udp = connp->conn_udp; udp_stack_t *us = udp->udp_us; boolean_t insert_spi = udp->udp_nat_t_endpoint; - boolean_t hash_srcport = udp->udp_vxlanhash; uint_t pktlen; uint_t alloclen; uint_t copylen; uint8_t *iph; uint_t ip_hdr_length; udpha_t *udpha; uint32_t cksum; ip_pkt_t *ipp; - uint16_t srcport; ASSERT(MUTEX_HELD(&connp->conn_lock)); /* - * If we have source port hashing going on, determine the hash before - * we modify the mblk_t. - */ - if (hash_srcport == B_TRUE) { - srcport = udp_srcport_hash(mp, UDP_HASH_VXLAN, - IPPORT_DYNAMIC_MIN, IPPORT_DYNAMIC_MAX, - ntohs(connp->conn_lport)); - } - - /* * Copy the header template and leave space for an SPI */ copylen = connp->conn_ht_iphc_len; alloclen = copylen + (insert_spi ? sizeof (uint32_t) : 0); pktlen = alloclen + msgdsize(mp); --- 3202,3223 ----
*** 3449,3461 **** /* Insert all-0s SPI now. */ if (insert_spi) *((uint32_t *)(udpha + 1)) = 0; udpha->uha_dst_port = dstport; - if (hash_srcport == B_TRUE) - udpha->uha_src_port = htons(srcport); - return (mp); } /* * Send a T_UDERR_IND in response to an M_DATA --- 3311,3320 ----
*** 6059,6068 **** --- 5918,5928 ---- int error = 0; udp_stack_t *us = udp->udp_us; ushort_t ipversion; pid_t pid = curproc->p_pid; ip_xmit_attr_t *ixa; + boolean_t snd_to_conn; ASSERT(DB_TYPE(mp) == M_DATA); /* All Solaris components should pass a cred for this operation. */ ASSERT(cr != NULL);
*** 6103,6113 **** * Check if we're allowed to send to a connection on which we've * already called 'connect'. The posix spec. allows both behaviors but * historically we've returned an error if already connected. The * client can allow this via a sockopt. */ ! if (udp->udp_state == TS_DATA_XFER && !udp->udp_snd_to_conn) { UDPS_BUMP_MIB(us, udpOutErrors); return (EISCONN); } error = proto_verify_ip_addr(connp->conn_family, --- 5963,5976 ---- * Check if we're allowed to send to a connection on which we've * already called 'connect'. The posix spec. allows both behaviors but * historically we've returned an error if already connected. The * client can allow this via a sockopt. */ ! mutex_enter(&connp->conn_lock); ! snd_to_conn = (udp->udp_snd_to_conn != 0); ! mutex_exit(&connp->conn_lock); ! if (udp->udp_state == TS_DATA_XFER && !snd_to_conn) { UDPS_BUMP_MIB(us, udpOutErrors); return (EISCONN); } error = proto_verify_ip_addr(connp->conn_family,