Print this page
*** 20,30 ****
*/
/*
* Copyright (c) 1991, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright 2013 Nexenta Systems, Inc. All rights reserved.
* Copyright 2014, OmniTI Computer Consulting, Inc. All rights reserved.
- * Copyright 2015, Joyent, Inc.
*/
/* Copyright (c) 1990 Mentat Inc. */
#include <sys/sysmacros.h>
#include <sys/types.h>
--- 20,29 ----
*** 75,86 ****
#include <inet/snmpcom.h>
#include <inet/kstatcom.h>
#include <inet/ipclassifier.h>
#include <sys/squeue_impl.h>
#include <inet/ipnet.h>
! #include <sys/vxlan.h>
! #include <inet/inet_hash.h>
#include <sys/tsol/label.h>
#include <sys/tsol/tnet.h>
#include <rpc/pmap_prot.h>
--- 74,84 ----
#include <inet/snmpcom.h>
#include <inet/kstatcom.h>
#include <inet/ipclassifier.h>
#include <sys/squeue_impl.h>
#include <inet/ipnet.h>
! #include <sys/ethernet.h>
#include <sys/tsol/label.h>
#include <sys/tsol/tnet.h>
#include <rpc/pmap_prot.h>
*** 346,438 ****
void *args) = NULL;
typedef union T_primitives *t_primp_t;
/*
- * Various protocols that encapsulate UDP have no real use for the source port.
- * Instead, they want to vary the source port to provide better equal-cost
- * multipathing and other systems that use fanout. Consider something like
- * VXLAN. If you're actually sending multiple different streams to a single
- * host, if you don't vary the source port, then the tuple of ( SRC IP, DST IP,
- * SRC Port, DST Port) will always be the same.
- *
- * Here, we return a port to hash this to, if we know how to hash it. If for
- * some reason we can't perform an L4 hash, then we just return the default
- * value, usually the default port. After we determine the hash we transform it
- * so that it's in the range of [ min, max ].
- *
- * We'd like to avoid a pull up for the sake of performing the hash. If the
- * first mblk_t doesn't have the full protocol header, then we just send it to
- * the default. If for some reason we have an encapsulated packet that has its
- * protocol header in different parts of an mblk_t, then we'll go with the
- * default port. This means that that if a driver isn't consistent about how it
- * generates the frames for a given flow, it will not always be consistently
- * hashed. That should be an uncommon event.
- */
- uint16_t
- udp_srcport_hash(mblk_t *mp, int type, uint16_t min, uint16_t max,
- uint16_t def)
- {
- size_t szused = 0;
- struct ether_header *ether;
- struct ether_vlan_header *vether;
- ip6_t *ip6h;
- ipha_t *ipha;
- uint16_t sap;
- uint64_t hash;
- uint32_t mod;
-
- ASSERT(min <= max);
-
- if (type != UDP_HASH_VXLAN)
- return (def);
-
- if (!IS_P2ALIGNED(mp->b_rptr, sizeof (uint16_t)))
- return (def);
-
- /*
- * The following logic is VXLAN specific to get at the header, if we
- * have formats, eg. GENEVE, then we should ignore this.
- *
- * The kernel overlay device often puts a first mblk_t for the data
- * which is just the encap. If so, then we're going to use that and try
- * to avoid a pull up.
- */
- if (MBLKL(mp) == VXLAN_HDR_LEN) {
- if (mp->b_cont == NULL)
- return (def);
- mp = mp->b_cont;
- ether = (struct ether_header *)mp->b_rptr;
- } else if (MBLKL(mp) < VXLAN_HDR_LEN) {
- return (def);
- } else {
- szused = VXLAN_HDR_LEN;
- ether = (struct ether_header *)((uintptr_t)mp->b_rptr + szused);
- }
-
- /* Can we hold a MAC header? */
- if (MBLKL(mp) + szused < sizeof (struct ether_header))
- return (def);
-
- /*
- * We need to lie about the starting offset into the message block for
- * convenience. Undo it at the end. We know that inet_pkt_hash() won't
- * modify the mblk_t.
- */
- mp->b_rptr += szused;
- hash = inet_pkt_hash(DL_ETHER, mp, INET_PKT_HASH_L2 |
- INET_PKT_HASH_L3 | INET_PKT_HASH_L4);
- mp->b_rptr -= szused;
-
- if (hash == 0)
- return (def);
-
- mod = max - min + 1;
- return ((hash % mod) + min);
- }
-
- /*
* Return the next anonymous port in the privileged port range for
* bind checking.
*
* Trusted Extension (TX) notes: TX allows administrator to mark or
* reserve ports as Multilevel ports (MLP). MLP has special function
--- 344,353 ----
*** 1666,1680 ****
case UDP_RCVHDR:
mutex_enter(&connp->conn_lock);
*i1 = udp->udp_rcvhdr ? 1 : 0;
mutex_exit(&connp->conn_lock);
return (sizeof (int));
- case UDP_SRCPORT_HASH:
- mutex_enter(&connp->conn_lock);
- *i1 = udp->udp_vxlanhash;
- mutex_exit(&connp->conn_lock);
- return (sizeof (int));
case UDP_SND_TO_CONNECTED:
mutex_enter(&connp->conn_lock);
*i1 = udp->udp_snd_to_conn ? 1 : 0;
mutex_exit(&connp->conn_lock);
return (sizeof (int));
--- 1581,1590 ----
*** 1811,1840 ****
case UDP_RCVHDR:
mutex_enter(&connp->conn_lock);
udp->udp_rcvhdr = onoff;
mutex_exit(&connp->conn_lock);
return (0);
- case UDP_SRCPORT_HASH:
- /*
- * This should have already been verified, but double
- * check.
- */
- if ((error = secpolicy_ip_config(cr, B_FALSE)) != 0) {
- return (error);
- }
-
- /* First see if the val is something we understand */
- if (*i1 != UDP_HASH_DISABLE && *i1 != UDP_HASH_VXLAN)
- return (EINVAL);
-
- if (!checkonly) {
- mutex_enter(&connp->conn_lock);
- udp->udp_vxlanhash = *i1;
- mutex_exit(&connp->conn_lock);
- }
- /* Fully handled this option. */
- return (0);
case UDP_SND_TO_CONNECTED:
mutex_enter(&connp->conn_lock);
udp->udp_snd_to_conn = onoff;
mutex_exit(&connp->conn_lock);
return (0);
--- 1721,1730 ----
*** 2119,2147 ****
udp_stack_t *us = connp->conn_netstack->netstack_udp;
uint_t data_len;
uint32_t cksum;
udp_t *udp = connp->conn_udp;
boolean_t insert_spi = udp->udp_nat_t_endpoint;
- boolean_t hash_srcport = udp->udp_vxlanhash;
uint_t ulp_hdr_len;
- uint16_t srcport;
data_len = msgdsize(data_mp);
ulp_hdr_len = UDPH_SIZE;
if (insert_spi)
ulp_hdr_len += sizeof (uint32_t);
- /*
- * If we have source port hashing going on, determine the hash before
- * we modify the mblk_t.
- */
- if (hash_srcport == B_TRUE) {
- srcport = udp_srcport_hash(mp, UDP_HASH_VXLAN,
- IPPORT_DYNAMIC_MIN, IPPORT_DYNAMIC_MAX,
- ntohs(connp->conn_lport));
- }
-
mp = conn_prepend_hdr(ixa, ipp, v6src, v6dst, IPPROTO_UDP, flowinfo,
ulp_hdr_len, data_mp, data_len, us->us_wroff_extra, &cksum, errorp);
if (mp == NULL) {
ASSERT(*errorp != 0);
return (NULL);
--- 2009,2025 ----
*** 2149,2163 ****
data_len += ulp_hdr_len;
ixa->ixa_pktlen = data_len + ixa->ixa_ip_hdr_length;
udpha = (udpha_t *)(mp->b_rptr + ixa->ixa_ip_hdr_length);
- if (hash_srcport == B_TRUE) {
- udpha->uha_src_port = htons(srcport);
- } else {
udpha->uha_src_port = connp->conn_lport;
- }
udpha->uha_dst_port = dstport;
udpha->uha_checksum = 0;
udpha->uha_length = htons(data_len);
/*
--- 2027,2037 ----
*** 3328,3361 ****
const in6_addr_t *v6src, in_port_t dstport, uint32_t flowinfo, int *errorp)
{
udp_t *udp = connp->conn_udp;
udp_stack_t *us = udp->udp_us;
boolean_t insert_spi = udp->udp_nat_t_endpoint;
- boolean_t hash_srcport = udp->udp_vxlanhash;
uint_t pktlen;
uint_t alloclen;
uint_t copylen;
uint8_t *iph;
uint_t ip_hdr_length;
udpha_t *udpha;
uint32_t cksum;
ip_pkt_t *ipp;
- uint16_t srcport;
ASSERT(MUTEX_HELD(&connp->conn_lock));
/*
- * If we have source port hashing going on, determine the hash before
- * we modify the mblk_t.
- */
- if (hash_srcport == B_TRUE) {
- srcport = udp_srcport_hash(mp, UDP_HASH_VXLAN,
- IPPORT_DYNAMIC_MIN, IPPORT_DYNAMIC_MAX,
- ntohs(connp->conn_lport));
- }
-
- /*
* Copy the header template and leave space for an SPI
*/
copylen = connp->conn_ht_iphc_len;
alloclen = copylen + (insert_spi ? sizeof (uint32_t) : 0);
pktlen = alloclen + msgdsize(mp);
--- 3202,3223 ----
*** 3449,3461 ****
/* Insert all-0s SPI now. */
if (insert_spi)
*((uint32_t *)(udpha + 1)) = 0;
udpha->uha_dst_port = dstport;
- if (hash_srcport == B_TRUE)
- udpha->uha_src_port = htons(srcport);
-
return (mp);
}
/*
* Send a T_UDERR_IND in response to an M_DATA
--- 3311,3320 ----
*** 6059,6068 ****
--- 5918,5928 ----
int error = 0;
udp_stack_t *us = udp->udp_us;
ushort_t ipversion;
pid_t pid = curproc->p_pid;
ip_xmit_attr_t *ixa;
+ boolean_t snd_to_conn;
ASSERT(DB_TYPE(mp) == M_DATA);
/* All Solaris components should pass a cred for this operation. */
ASSERT(cr != NULL);
*** 6103,6113 ****
* Check if we're allowed to send to a connection on which we've
* already called 'connect'. The posix spec. allows both behaviors but
* historically we've returned an error if already connected. The
* client can allow this via a sockopt.
*/
! if (udp->udp_state == TS_DATA_XFER && !udp->udp_snd_to_conn) {
UDPS_BUMP_MIB(us, udpOutErrors);
return (EISCONN);
}
error = proto_verify_ip_addr(connp->conn_family,
--- 5963,5976 ----
* Check if we're allowed to send to a connection on which we've
* already called 'connect'. The posix spec. allows both behaviors but
* historically we've returned an error if already connected. The
* client can allow this via a sockopt.
*/
! mutex_enter(&connp->conn_lock);
! snd_to_conn = (udp->udp_snd_to_conn != 0);
! mutex_exit(&connp->conn_lock);
! if (udp->udp_state == TS_DATA_XFER && !snd_to_conn) {
UDPS_BUMP_MIB(us, udpOutErrors);
return (EISCONN);
}
error = proto_verify_ip_addr(connp->conn_family,