Print this page

        

@@ -20,11 +20,10 @@
  */
 /*
  * Copyright (c) 1991, 2010, Oracle and/or its affiliates. All rights reserved.
  * Copyright 2013 Nexenta Systems, Inc.  All rights reserved.
  * Copyright 2014, OmniTI Computer Consulting, Inc. All rights reserved.
- * Copyright 2015, Joyent, Inc.
  */
 /* Copyright (c) 1990 Mentat Inc. */
 
 #include <sys/sysmacros.h>
 #include <sys/types.h>

@@ -75,12 +74,11 @@
 #include <inet/snmpcom.h>
 #include <inet/kstatcom.h>
 #include <inet/ipclassifier.h>
 #include <sys/squeue_impl.h>
 #include <inet/ipnet.h>
-#include <sys/vxlan.h>
-#include <inet/inet_hash.h>
+#include <sys/ethernet.h>
 
 #include <sys/tsol/label.h>
 #include <sys/tsol/tnet.h>
 #include <rpc/pmap_prot.h>
 

@@ -346,93 +344,10 @@
     void *args) = NULL;
 
 typedef union T_primitives *t_primp_t;
 
 /*
- * Various protocols that encapsulate UDP have no real use for the source port.
- * Instead, they want to vary the source port to provide better equal-cost
- * multipathing and other systems that use fanout. Consider something like
- * VXLAN. If you're actually sending multiple different streams to a single
- * host, if you don't vary the source port, then the tuple of ( SRC IP, DST IP,
- * SRC Port, DST Port) will always be the same.
- *
- * Here, we return a port to hash this to, if we know how to hash it. If for
- * some reason we can't perform an L4 hash, then we just return the default
- * value, usually the default port. After we determine the hash we transform it
- * so that it's in the range of [ min, max ].
- *
- * We'd like to avoid a pull up for the sake of performing the hash. If the
- * first mblk_t doesn't have the full protocol header, then we just send it to
- * the default. If for some reason we have an encapsulated packet that has its
- * protocol header in different parts of an mblk_t, then we'll go with the
- * default port. This means that that if a driver isn't consistent about how it
- * generates the frames for a given flow, it will not always be consistently
- * hashed. That should be an uncommon event.
- */
-uint16_t
-udp_srcport_hash(mblk_t *mp, int type, uint16_t min, uint16_t max,
-    uint16_t def)
-{
-        size_t szused = 0;
-        struct ether_header *ether;
-        struct ether_vlan_header *vether;
-        ip6_t *ip6h;
-        ipha_t *ipha;
-        uint16_t sap;
-        uint64_t hash;
-        uint32_t mod;
-
-        ASSERT(min <= max);
-
-        if (type != UDP_HASH_VXLAN)
-                return (def);
-
-        if (!IS_P2ALIGNED(mp->b_rptr, sizeof (uint16_t)))
-                return (def);
-
-        /*
-         * The following logic is VXLAN specific to get at the header, if we
-         * have formats, eg. GENEVE, then we should ignore this.
-         *
-         * The kernel overlay device often puts a first mblk_t for the data
-         * which is just the encap. If so, then we're going to use that and try
-         * to avoid a pull up.
-         */
-        if (MBLKL(mp) == VXLAN_HDR_LEN) {
-                if (mp->b_cont == NULL)
-                        return (def);
-                mp = mp->b_cont;
-                ether = (struct ether_header *)mp->b_rptr;
-        } else if (MBLKL(mp) < VXLAN_HDR_LEN) {
-                return (def);
-        } else {
-                szused = VXLAN_HDR_LEN;
-                ether = (struct ether_header *)((uintptr_t)mp->b_rptr + szused);
-        }
-
-        /* Can we hold a MAC header? */
-        if (MBLKL(mp) + szused < sizeof (struct ether_header))
-                return (def);
-
-        /*
-         * We need to lie about the starting offset into the message block for
-         * convenience. Undo it at the end. We know that inet_pkt_hash() won't
-         * modify the mblk_t.
-         */
-        mp->b_rptr += szused;
-        hash = inet_pkt_hash(DL_ETHER, mp, INET_PKT_HASH_L2 |
-            INET_PKT_HASH_L3 | INET_PKT_HASH_L4);
-        mp->b_rptr -= szused;
-
-        if (hash == 0)
-                return (def);
-
-        mod = max - min + 1;
-        return ((hash % mod) + min);
-}
-
-/*
  * Return the next anonymous port in the privileged port range for
  * bind checking.
  *
  * Trusted Extension (TX) notes: TX allows administrator to mark or
  * reserve ports as Multilevel ports (MLP). MLP has special function

@@ -1666,15 +1581,10 @@
                 case UDP_RCVHDR:
                         mutex_enter(&connp->conn_lock);
                         *i1 = udp->udp_rcvhdr ? 1 : 0;
                         mutex_exit(&connp->conn_lock);
                         return (sizeof (int));
-                case UDP_SRCPORT_HASH:
-                        mutex_enter(&connp->conn_lock);
-                        *i1 = udp->udp_vxlanhash;
-                        mutex_exit(&connp->conn_lock);
-                        return (sizeof (int));
                 case UDP_SND_TO_CONNECTED:
                         mutex_enter(&connp->conn_lock);
                         *i1 = udp->udp_snd_to_conn ? 1 : 0;
                         mutex_exit(&connp->conn_lock);
                         return (sizeof (int));

@@ -1811,30 +1721,10 @@
                 case UDP_RCVHDR:
                         mutex_enter(&connp->conn_lock);
                         udp->udp_rcvhdr = onoff;
                         mutex_exit(&connp->conn_lock);
                         return (0);
-                case UDP_SRCPORT_HASH:
-                        /*
-                         * This should have already been verified, but double
-                         * check.
-                         */
-                        if ((error = secpolicy_ip_config(cr, B_FALSE)) != 0) {
-                                return (error);
-                        }
-
-                        /* First see if the val is something we understand */
-                        if (*i1 != UDP_HASH_DISABLE && *i1 != UDP_HASH_VXLAN)
-                                return (EINVAL);
-
-                        if (!checkonly) {
-                                mutex_enter(&connp->conn_lock);
-                                udp->udp_vxlanhash = *i1;
-                                mutex_exit(&connp->conn_lock);
-                        }
-                        /* Fully handled this option. */
-                        return (0);
                 case UDP_SND_TO_CONNECTED:
                         mutex_enter(&connp->conn_lock);
                         udp->udp_snd_to_conn = onoff;
                         mutex_exit(&connp->conn_lock);
                         return (0);

@@ -2119,29 +2009,17 @@
         udp_stack_t     *us = connp->conn_netstack->netstack_udp;
         uint_t          data_len;
         uint32_t        cksum;
         udp_t           *udp = connp->conn_udp;
         boolean_t       insert_spi = udp->udp_nat_t_endpoint;
-        boolean_t       hash_srcport = udp->udp_vxlanhash;
         uint_t          ulp_hdr_len;
-        uint16_t        srcport;
 
         data_len = msgdsize(data_mp);
         ulp_hdr_len = UDPH_SIZE;
         if (insert_spi)
                 ulp_hdr_len += sizeof (uint32_t);
 
-        /*
-         * If we have source port hashing going on, determine the hash before
-         * we modify the mblk_t.
-         */
-        if (hash_srcport == B_TRUE) {
-                srcport = udp_srcport_hash(mp, UDP_HASH_VXLAN,
-                    IPPORT_DYNAMIC_MIN, IPPORT_DYNAMIC_MAX,
-                    ntohs(connp->conn_lport));
-        }
-
         mp = conn_prepend_hdr(ixa, ipp, v6src, v6dst, IPPROTO_UDP, flowinfo,
             ulp_hdr_len, data_mp, data_len, us->us_wroff_extra, &cksum, errorp);
         if (mp == NULL) {
                 ASSERT(*errorp != 0);
                 return (NULL);

@@ -2149,15 +2027,11 @@
 
         data_len += ulp_hdr_len;
         ixa->ixa_pktlen = data_len + ixa->ixa_ip_hdr_length;
 
         udpha = (udpha_t *)(mp->b_rptr + ixa->ixa_ip_hdr_length);
-        if (hash_srcport == B_TRUE) {
-                udpha->uha_src_port = htons(srcport);
-        } else {
                 udpha->uha_src_port = connp->conn_lport;
-        }
         udpha->uha_dst_port = dstport;
         udpha->uha_checksum = 0;
         udpha->uha_length = htons(data_len);
 
         /*

@@ -3328,34 +3202,22 @@
     const in6_addr_t *v6src, in_port_t dstport, uint32_t flowinfo, int *errorp)
 {
         udp_t           *udp = connp->conn_udp;
         udp_stack_t     *us = udp->udp_us;
         boolean_t       insert_spi = udp->udp_nat_t_endpoint;
-        boolean_t       hash_srcport = udp->udp_vxlanhash;
         uint_t          pktlen;
         uint_t          alloclen;
         uint_t          copylen;
         uint8_t         *iph;
         uint_t          ip_hdr_length;
         udpha_t         *udpha;
         uint32_t        cksum;
         ip_pkt_t        *ipp;
-        uint16_t        srcport;
 
         ASSERT(MUTEX_HELD(&connp->conn_lock));
 
         /*
-         * If we have source port hashing going on, determine the hash before
-         * we modify the mblk_t.
-         */
-        if (hash_srcport == B_TRUE) {
-                srcport = udp_srcport_hash(mp, UDP_HASH_VXLAN,
-                    IPPORT_DYNAMIC_MIN, IPPORT_DYNAMIC_MAX,
-                    ntohs(connp->conn_lport));
-        }
-
-        /*
          * Copy the header template and leave space for an SPI
          */
         copylen = connp->conn_ht_iphc_len;
         alloclen = copylen + (insert_spi ? sizeof (uint32_t) : 0);
         pktlen = alloclen + msgdsize(mp);

@@ -3449,13 +3311,10 @@
         /* Insert all-0s SPI now. */
         if (insert_spi)
                 *((uint32_t *)(udpha + 1)) = 0;
 
         udpha->uha_dst_port = dstport;
-        if (hash_srcport == B_TRUE)
-                udpha->uha_src_port = htons(srcport);
-
         return (mp);
 }
 
 /*
  * Send a T_UDERR_IND in response to an M_DATA

@@ -6059,10 +5918,11 @@
         int             error = 0;
         udp_stack_t     *us = udp->udp_us;
         ushort_t        ipversion;
         pid_t           pid = curproc->p_pid;
         ip_xmit_attr_t  *ixa;
+        boolean_t       snd_to_conn;
 
         ASSERT(DB_TYPE(mp) == M_DATA);
 
         /* All Solaris components should pass a cred for this operation. */
         ASSERT(cr != NULL);

@@ -6103,11 +5963,14 @@
          * Check if we're allowed to send to a connection on which we've
          * already called 'connect'. The posix spec. allows both behaviors but
          * historically we've returned an error if already connected. The
          * client can allow this via a sockopt.
          */
-        if (udp->udp_state == TS_DATA_XFER && !udp->udp_snd_to_conn) {
+        mutex_enter(&connp->conn_lock);
+        snd_to_conn = (udp->udp_snd_to_conn != 0);
+        mutex_exit(&connp->conn_lock);
+        if (udp->udp_state == TS_DATA_XFER && !snd_to_conn) {
                 UDPS_BUMP_MIB(us, udpOutErrors);
                 return (EISCONN);
         }
 
         error = proto_verify_ip_addr(connp->conn_family,