Print this page
        
*** 20,30 ****
   */
  /*
   * Copyright (c) 1991, 2010, Oracle and/or its affiliates. All rights reserved.
   * Copyright 2013 Nexenta Systems, Inc.  All rights reserved.
   * Copyright 2014, OmniTI Computer Consulting, Inc. All rights reserved.
-  * Copyright 2015, Joyent, Inc.
   */
  /* Copyright (c) 1990 Mentat Inc. */
  
  #include <sys/sysmacros.h>
  #include <sys/types.h>
--- 20,29 ----
*** 75,86 ****
  #include <inet/snmpcom.h>
  #include <inet/kstatcom.h>
  #include <inet/ipclassifier.h>
  #include <sys/squeue_impl.h>
  #include <inet/ipnet.h>
! #include <sys/vxlan.h>
! #include <inet/inet_hash.h>
  
  #include <sys/tsol/label.h>
  #include <sys/tsol/tnet.h>
  #include <rpc/pmap_prot.h>
  
--- 74,84 ----
  #include <inet/snmpcom.h>
  #include <inet/kstatcom.h>
  #include <inet/ipclassifier.h>
  #include <sys/squeue_impl.h>
  #include <inet/ipnet.h>
! #include <sys/ethernet.h>
  
  #include <sys/tsol/label.h>
  #include <sys/tsol/tnet.h>
  #include <rpc/pmap_prot.h>
  
*** 346,438 ****
      void *args) = NULL;
  
  typedef union T_primitives *t_primp_t;
  
  /*
-  * Various protocols that encapsulate UDP have no real use for the source port.
-  * Instead, they want to vary the source port to provide better equal-cost
-  * multipathing and other systems that use fanout. Consider something like
-  * VXLAN. If you're actually sending multiple different streams to a single
-  * host, if you don't vary the source port, then the tuple of ( SRC IP, DST IP,
-  * SRC Port, DST Port) will always be the same.
-  *
-  * Here, we return a port to hash this to, if we know how to hash it. If for
-  * some reason we can't perform an L4 hash, then we just return the default
-  * value, usually the default port. After we determine the hash we transform it
-  * so that it's in the range of [ min, max ].
-  *
-  * We'd like to avoid a pull up for the sake of performing the hash. If the
-  * first mblk_t doesn't have the full protocol header, then we just send it to
-  * the default. If for some reason we have an encapsulated packet that has its
-  * protocol header in different parts of an mblk_t, then we'll go with the
-  * default port. This means that that if a driver isn't consistent about how it
-  * generates the frames for a given flow, it will not always be consistently
-  * hashed. That should be an uncommon event.
-  */
- uint16_t
- udp_srcport_hash(mblk_t *mp, int type, uint16_t min, uint16_t max,
-     uint16_t def)
- {
-         size_t szused = 0;
-         struct ether_header *ether;
-         struct ether_vlan_header *vether;
-         ip6_t *ip6h;
-         ipha_t *ipha;
-         uint16_t sap;
-         uint64_t hash;
-         uint32_t mod;
- 
-         ASSERT(min <= max);
- 
-         if (type != UDP_HASH_VXLAN)
-                 return (def);
- 
-         if (!IS_P2ALIGNED(mp->b_rptr, sizeof (uint16_t)))
-                 return (def);
- 
-         /*
-          * The following logic is VXLAN specific to get at the header, if we
-          * have formats, eg. GENEVE, then we should ignore this.
-          *
-          * The kernel overlay device often puts a first mblk_t for the data
-          * which is just the encap. If so, then we're going to use that and try
-          * to avoid a pull up.
-          */
-         if (MBLKL(mp) == VXLAN_HDR_LEN) {
-                 if (mp->b_cont == NULL)
-                         return (def);
-                 mp = mp->b_cont;
-                 ether = (struct ether_header *)mp->b_rptr;
-         } else if (MBLKL(mp) < VXLAN_HDR_LEN) {
-                 return (def);
-         } else {
-                 szused = VXLAN_HDR_LEN;
-                 ether = (struct ether_header *)((uintptr_t)mp->b_rptr + szused);
-         }
- 
-         /* Can we hold a MAC header? */
-         if (MBLKL(mp) + szused < sizeof (struct ether_header))
-                 return (def);
- 
-         /*
-          * We need to lie about the starting offset into the message block for
-          * convenience. Undo it at the end. We know that inet_pkt_hash() won't
-          * modify the mblk_t.
-          */
-         mp->b_rptr += szused;
-         hash = inet_pkt_hash(DL_ETHER, mp, INET_PKT_HASH_L2 |
-             INET_PKT_HASH_L3 | INET_PKT_HASH_L4);
-         mp->b_rptr -= szused;
- 
-         if (hash == 0)
-                 return (def);
- 
-         mod = max - min + 1;
-         return ((hash % mod) + min);
- }
- 
- /*
   * Return the next anonymous port in the privileged port range for
   * bind checking.
   *
   * Trusted Extension (TX) notes: TX allows administrator to mark or
   * reserve ports as Multilevel ports (MLP). MLP has special function
--- 344,353 ----
*** 1666,1680 ****
                  case UDP_RCVHDR:
                          mutex_enter(&connp->conn_lock);
                          *i1 = udp->udp_rcvhdr ? 1 : 0;
                          mutex_exit(&connp->conn_lock);
                          return (sizeof (int));
-                 case UDP_SRCPORT_HASH:
-                         mutex_enter(&connp->conn_lock);
-                         *i1 = udp->udp_vxlanhash;
-                         mutex_exit(&connp->conn_lock);
-                         return (sizeof (int));
                  case UDP_SND_TO_CONNECTED:
                          mutex_enter(&connp->conn_lock);
                          *i1 = udp->udp_snd_to_conn ? 1 : 0;
                          mutex_exit(&connp->conn_lock);
                          return (sizeof (int));
--- 1581,1590 ----
*** 1811,1840 ****
                  case UDP_RCVHDR:
                          mutex_enter(&connp->conn_lock);
                          udp->udp_rcvhdr = onoff;
                          mutex_exit(&connp->conn_lock);
                          return (0);
-                 case UDP_SRCPORT_HASH:
-                         /*
-                          * This should have already been verified, but double
-                          * check.
-                          */
-                         if ((error = secpolicy_ip_config(cr, B_FALSE)) != 0) {
-                                 return (error);
-                         }
- 
-                         /* First see if the val is something we understand */
-                         if (*i1 != UDP_HASH_DISABLE && *i1 != UDP_HASH_VXLAN)
-                                 return (EINVAL);
- 
-                         if (!checkonly) {
-                                 mutex_enter(&connp->conn_lock);
-                                 udp->udp_vxlanhash = *i1;
-                                 mutex_exit(&connp->conn_lock);
-                         }
-                         /* Fully handled this option. */
-                         return (0);
                  case UDP_SND_TO_CONNECTED:
                          mutex_enter(&connp->conn_lock);
                          udp->udp_snd_to_conn = onoff;
                          mutex_exit(&connp->conn_lock);
                          return (0);
--- 1721,1730 ----
*** 2119,2147 ****
          udp_stack_t     *us = connp->conn_netstack->netstack_udp;
          uint_t          data_len;
          uint32_t        cksum;
          udp_t           *udp = connp->conn_udp;
          boolean_t       insert_spi = udp->udp_nat_t_endpoint;
-         boolean_t       hash_srcport = udp->udp_vxlanhash;
          uint_t          ulp_hdr_len;
-         uint16_t        srcport;
  
          data_len = msgdsize(data_mp);
          ulp_hdr_len = UDPH_SIZE;
          if (insert_spi)
                  ulp_hdr_len += sizeof (uint32_t);
  
-         /*
-          * If we have source port hashing going on, determine the hash before
-          * we modify the mblk_t.
-          */
-         if (hash_srcport == B_TRUE) {
-                 srcport = udp_srcport_hash(mp, UDP_HASH_VXLAN,
-                     IPPORT_DYNAMIC_MIN, IPPORT_DYNAMIC_MAX,
-                     ntohs(connp->conn_lport));
-         }
- 
          mp = conn_prepend_hdr(ixa, ipp, v6src, v6dst, IPPROTO_UDP, flowinfo,
              ulp_hdr_len, data_mp, data_len, us->us_wroff_extra, &cksum, errorp);
          if (mp == NULL) {
                  ASSERT(*errorp != 0);
                  return (NULL);
--- 2009,2025 ----
*** 2149,2163 ****
  
          data_len += ulp_hdr_len;
          ixa->ixa_pktlen = data_len + ixa->ixa_ip_hdr_length;
  
          udpha = (udpha_t *)(mp->b_rptr + ixa->ixa_ip_hdr_length);
-         if (hash_srcport == B_TRUE) {
-                 udpha->uha_src_port = htons(srcport);
-         } else {
                  udpha->uha_src_port = connp->conn_lport;
-         }
          udpha->uha_dst_port = dstport;
          udpha->uha_checksum = 0;
          udpha->uha_length = htons(data_len);
  
          /*
--- 2027,2037 ----
*** 3328,3361 ****
      const in6_addr_t *v6src, in_port_t dstport, uint32_t flowinfo, int *errorp)
  {
          udp_t           *udp = connp->conn_udp;
          udp_stack_t     *us = udp->udp_us;
          boolean_t       insert_spi = udp->udp_nat_t_endpoint;
-         boolean_t       hash_srcport = udp->udp_vxlanhash;
          uint_t          pktlen;
          uint_t          alloclen;
          uint_t          copylen;
          uint8_t         *iph;
          uint_t          ip_hdr_length;
          udpha_t         *udpha;
          uint32_t        cksum;
          ip_pkt_t        *ipp;
-         uint16_t        srcport;
  
          ASSERT(MUTEX_HELD(&connp->conn_lock));
  
          /*
-          * If we have source port hashing going on, determine the hash before
-          * we modify the mblk_t.
-          */
-         if (hash_srcport == B_TRUE) {
-                 srcport = udp_srcport_hash(mp, UDP_HASH_VXLAN,
-                     IPPORT_DYNAMIC_MIN, IPPORT_DYNAMIC_MAX,
-                     ntohs(connp->conn_lport));
-         }
- 
-         /*
           * Copy the header template and leave space for an SPI
           */
          copylen = connp->conn_ht_iphc_len;
          alloclen = copylen + (insert_spi ? sizeof (uint32_t) : 0);
          pktlen = alloclen + msgdsize(mp);
--- 3202,3223 ----
*** 3449,3461 ****
          /* Insert all-0s SPI now. */
          if (insert_spi)
                  *((uint32_t *)(udpha + 1)) = 0;
  
          udpha->uha_dst_port = dstport;
-         if (hash_srcport == B_TRUE)
-                 udpha->uha_src_port = htons(srcport);
- 
          return (mp);
  }
  
  /*
   * Send a T_UDERR_IND in response to an M_DATA
--- 3311,3320 ----
*** 6059,6068 ****
--- 5918,5928 ----
          int             error = 0;
          udp_stack_t     *us = udp->udp_us;
          ushort_t        ipversion;
          pid_t           pid = curproc->p_pid;
          ip_xmit_attr_t  *ixa;
+         boolean_t       snd_to_conn;
  
          ASSERT(DB_TYPE(mp) == M_DATA);
  
          /* All Solaris components should pass a cred for this operation. */
          ASSERT(cr != NULL);
*** 6103,6113 ****
           * Check if we're allowed to send to a connection on which we've
           * already called 'connect'. The posix spec. allows both behaviors but
           * historically we've returned an error if already connected. The
           * client can allow this via a sockopt.
           */
!         if (udp->udp_state == TS_DATA_XFER && !udp->udp_snd_to_conn) {
                  UDPS_BUMP_MIB(us, udpOutErrors);
                  return (EISCONN);
          }
  
          error = proto_verify_ip_addr(connp->conn_family,
--- 5963,5976 ----
           * Check if we're allowed to send to a connection on which we've
           * already called 'connect'. The posix spec. allows both behaviors but
           * historically we've returned an error if already connected. The
           * client can allow this via a sockopt.
           */
!         mutex_enter(&connp->conn_lock);
!         snd_to_conn = (udp->udp_snd_to_conn != 0);
!         mutex_exit(&connp->conn_lock);
!         if (udp->udp_state == TS_DATA_XFER && !snd_to_conn) {
                  UDPS_BUMP_MIB(us, udpOutErrors);
                  return (EISCONN);
          }
  
          error = proto_verify_ip_addr(connp->conn_family,