Print this page


Split Close
Expand all
Collapse all
          --- old/usr/src/uts/common/inet/udp/udp.c
          +++ new/usr/src/uts/common/inet/udp/udp.c
↓ open down ↓ 14 lines elided ↑ open up ↑
  15   15   * If applicable, add the following below this CDDL HEADER, with the
  16   16   * fields enclosed by brackets "[]" replaced with your own identifying
  17   17   * information: Portions Copyright [yyyy] [name of copyright owner]
  18   18   *
  19   19   * CDDL HEADER END
  20   20   */
  21   21  /*
  22   22   * Copyright (c) 1991, 2010, Oracle and/or its affiliates. All rights reserved.
  23   23   * Copyright 2013 Nexenta Systems, Inc.  All rights reserved.
  24   24   * Copyright 2014, OmniTI Computer Consulting, Inc. All rights reserved.
  25      - * Copyright 2015, Joyent, Inc.
  26   25   */
  27   26  /* Copyright (c) 1990 Mentat Inc. */
  28   27  
  29   28  #include <sys/sysmacros.h>
  30   29  #include <sys/types.h>
  31   30  #include <sys/stream.h>
  32   31  #include <sys/stropts.h>
  33   32  #include <sys/strlog.h>
  34   33  #include <sys/strsun.h>
  35   34  #define _SUN_TPI_VERSION 2
↓ open down ↓ 34 lines elided ↑ open up ↑
  70   69  #include <inet/ip_multi.h>
  71   70  #include <inet/ip_ndp.h>
  72   71  #include <inet/proto_set.h>
  73   72  #include <inet/mib2.h>
  74   73  #include <inet/optcom.h>
  75   74  #include <inet/snmpcom.h>
  76   75  #include <inet/kstatcom.h>
  77   76  #include <inet/ipclassifier.h>
  78   77  #include <sys/squeue_impl.h>
  79   78  #include <inet/ipnet.h>
  80      -#include <sys/vxlan.h>
  81      -#include <inet/inet_hash.h>
       79 +#include <sys/ethernet.h>
  82   80  
  83   81  #include <sys/tsol/label.h>
  84   82  #include <sys/tsol/tnet.h>
  85   83  #include <rpc/pmap_prot.h>
  86   84  
  87   85  #include <inet/udp_impl.h>
  88   86  
  89   87  /*
  90   88   * Synchronization notes:
  91   89   *
↓ open down ↓ 249 lines elided ↑ open up ↑
 341  339  void (*cl_inet_bind)(netstackid_t stack_id, uchar_t protocol,
 342  340      sa_family_t addr_family, uint8_t *laddrp, in_port_t lport,
 343  341      void *args) = NULL;
 344  342  void (*cl_inet_unbind)(netstackid_t stack_id, uint8_t protocol,
 345  343      sa_family_t addr_family, uint8_t *laddrp, in_port_t lport,
 346  344      void *args) = NULL;
 347  345  
 348  346  typedef union T_primitives *t_primp_t;
 349  347  
 350  348  /*
 351      - * Various protocols that encapsulate UDP have no real use for the source port.
 352      - * Instead, they want to vary the source port to provide better equal-cost
 353      - * multipathing and other systems that use fanout. Consider something like
 354      - * VXLAN. If you're actually sending multiple different streams to a single
 355      - * host, if you don't vary the source port, then the tuple of ( SRC IP, DST IP,
 356      - * SRC Port, DST Port) will always be the same.
 357      - *
 358      - * Here, we return a port to hash this to, if we know how to hash it. If for
 359      - * some reason we can't perform an L4 hash, then we just return the default
 360      - * value, usually the default port. After we determine the hash we transform it
 361      - * so that it's in the range of [ min, max ].
 362      - *
 363      - * We'd like to avoid a pull up for the sake of performing the hash. If the
 364      - * first mblk_t doesn't have the full protocol header, then we just send it to
 365      - * the default. If for some reason we have an encapsulated packet that has its
 366      - * protocol header in different parts of an mblk_t, then we'll go with the
 367      - * default port. This means that that if a driver isn't consistent about how it
 368      - * generates the frames for a given flow, it will not always be consistently
 369      - * hashed. That should be an uncommon event.
 370      - */
 371      -uint16_t
 372      -udp_srcport_hash(mblk_t *mp, int type, uint16_t min, uint16_t max,
 373      -    uint16_t def)
 374      -{
 375      -        size_t szused = 0;
 376      -        struct ether_header *ether;
 377      -        struct ether_vlan_header *vether;
 378      -        ip6_t *ip6h;
 379      -        ipha_t *ipha;
 380      -        uint16_t sap;
 381      -        uint64_t hash;
 382      -        uint32_t mod;
 383      -
 384      -        ASSERT(min <= max);
 385      -
 386      -        if (type != UDP_HASH_VXLAN)
 387      -                return (def);
 388      -
 389      -        if (!IS_P2ALIGNED(mp->b_rptr, sizeof (uint16_t)))
 390      -                return (def);
 391      -
 392      -        /*
 393      -         * The following logic is VXLAN specific to get at the header, if we
 394      -         * have formats, eg. GENEVE, then we should ignore this.
 395      -         *
 396      -         * The kernel overlay device often puts a first mblk_t for the data
 397      -         * which is just the encap. If so, then we're going to use that and try
 398      -         * to avoid a pull up.
 399      -         */
 400      -        if (MBLKL(mp) == VXLAN_HDR_LEN) {
 401      -                if (mp->b_cont == NULL)
 402      -                        return (def);
 403      -                mp = mp->b_cont;
 404      -                ether = (struct ether_header *)mp->b_rptr;
 405      -        } else if (MBLKL(mp) < VXLAN_HDR_LEN) {
 406      -                return (def);
 407      -        } else {
 408      -                szused = VXLAN_HDR_LEN;
 409      -                ether = (struct ether_header *)((uintptr_t)mp->b_rptr + szused);
 410      -        }
 411      -
 412      -        /* Can we hold a MAC header? */
 413      -        if (MBLKL(mp) + szused < sizeof (struct ether_header))
 414      -                return (def);
 415      -
 416      -        /*
 417      -         * We need to lie about the starting offset into the message block for
 418      -         * convenience. Undo it at the end. We know that inet_pkt_hash() won't
 419      -         * modify the mblk_t.
 420      -         */
 421      -        mp->b_rptr += szused;
 422      -        hash = inet_pkt_hash(DL_ETHER, mp, INET_PKT_HASH_L2 |
 423      -            INET_PKT_HASH_L3 | INET_PKT_HASH_L4);
 424      -        mp->b_rptr -= szused;
 425      -
 426      -        if (hash == 0)
 427      -                return (def);
 428      -
 429      -        mod = max - min + 1;
 430      -        return ((hash % mod) + min);
 431      -}
 432      -
 433      -/*
 434  349   * Return the next anonymous port in the privileged port range for
 435  350   * bind checking.
 436  351   *
 437  352   * Trusted Extension (TX) notes: TX allows administrator to mark or
 438  353   * reserve ports as Multilevel ports (MLP). MLP has special function
 439  354   * on TX systems. Once a port is made MLP, it's not available as
 440  355   * ordinary port. This creates "holes" in the port name space. It
 441  356   * may be necessary to skip the "holes" find a suitable anon port.
 442  357   */
 443  358  static in_port_t
↓ open down ↓ 1217 lines elided ↑ open up ↑
1661 1576                  case UDP_NAT_T_ENDPOINT:
1662 1577                          mutex_enter(&connp->conn_lock);
1663 1578                          *i1 = udp->udp_nat_t_endpoint;
1664 1579                          mutex_exit(&connp->conn_lock);
1665 1580                          return (sizeof (int));
1666 1581                  case UDP_RCVHDR:
1667 1582                          mutex_enter(&connp->conn_lock);
1668 1583                          *i1 = udp->udp_rcvhdr ? 1 : 0;
1669 1584                          mutex_exit(&connp->conn_lock);
1670 1585                          return (sizeof (int));
1671      -                case UDP_SRCPORT_HASH:
1672      -                        mutex_enter(&connp->conn_lock);
1673      -                        *i1 = udp->udp_vxlanhash;
1674      -                        mutex_exit(&connp->conn_lock);
1675      -                        return (sizeof (int));
1676 1586                  case UDP_SND_TO_CONNECTED:
1677 1587                          mutex_enter(&connp->conn_lock);
1678 1588                          *i1 = udp->udp_snd_to_conn ? 1 : 0;
1679 1589                          mutex_exit(&connp->conn_lock);
1680 1590                          return (sizeof (int));
1681 1591                  }
1682 1592          }
1683 1593          mutex_enter(&connp->conn_lock);
1684 1594          retval = conn_opt_get(&coas, level, name, ptr);
1685 1595          mutex_exit(&connp->conn_lock);
↓ open down ↓ 120 lines elided ↑ open up ↑
1806 1716                                  coa->coa_changed |= COA_HEADER_CHANGED;
1807 1717                                  coa->coa_changed |= COA_WROFF_CHANGED;
1808 1718                          }
1809 1719                          /* Fully handled this option. */
1810 1720                          return (0);
1811 1721                  case UDP_RCVHDR:
1812 1722                          mutex_enter(&connp->conn_lock);
1813 1723                          udp->udp_rcvhdr = onoff;
1814 1724                          mutex_exit(&connp->conn_lock);
1815 1725                          return (0);
1816      -                case UDP_SRCPORT_HASH:
1817      -                        /*
1818      -                         * This should have already been verified, but double
1819      -                         * check.
1820      -                         */
1821      -                        if ((error = secpolicy_ip_config(cr, B_FALSE)) != 0) {
1822      -                                return (error);
1823      -                        }
1824      -
1825      -                        /* First see if the val is something we understand */
1826      -                        if (*i1 != UDP_HASH_DISABLE && *i1 != UDP_HASH_VXLAN)
1827      -                                return (EINVAL);
1828      -
1829      -                        if (!checkonly) {
1830      -                                mutex_enter(&connp->conn_lock);
1831      -                                udp->udp_vxlanhash = *i1;
1832      -                                mutex_exit(&connp->conn_lock);
1833      -                        }
1834      -                        /* Fully handled this option. */
1835      -                        return (0);
1836 1726                  case UDP_SND_TO_CONNECTED:
1837 1727                          mutex_enter(&connp->conn_lock);
1838 1728                          udp->udp_snd_to_conn = onoff;
1839 1729                          mutex_exit(&connp->conn_lock);
1840 1730                          return (0);
1841 1731                  }
1842 1732                  break;
1843 1733          }
1844 1734          error = conn_opt_set(coa, level, name, inlen, invalp,
1845 1735              checkonly, cr);
↓ open down ↓ 268 lines elided ↑ open up ↑
2114 2004      const in6_addr_t *v6src, const in6_addr_t *v6dst, in_port_t dstport,
2115 2005      uint32_t flowinfo, mblk_t *data_mp, int *errorp)
2116 2006  {
2117 2007          mblk_t          *mp;
2118 2008          udpha_t         *udpha;
2119 2009          udp_stack_t     *us = connp->conn_netstack->netstack_udp;
2120 2010          uint_t          data_len;
2121 2011          uint32_t        cksum;
2122 2012          udp_t           *udp = connp->conn_udp;
2123 2013          boolean_t       insert_spi = udp->udp_nat_t_endpoint;
2124      -        boolean_t       hash_srcport = udp->udp_vxlanhash;
2125 2014          uint_t          ulp_hdr_len;
2126      -        uint16_t        srcport;
2127 2015  
2128 2016          data_len = msgdsize(data_mp);
2129 2017          ulp_hdr_len = UDPH_SIZE;
2130 2018          if (insert_spi)
2131 2019                  ulp_hdr_len += sizeof (uint32_t);
2132 2020  
2133      -        /*
2134      -         * If we have source port hashing going on, determine the hash before
2135      -         * we modify the mblk_t.
2136      -         */
2137      -        if (hash_srcport == B_TRUE) {
2138      -                srcport = udp_srcport_hash(mp, UDP_HASH_VXLAN,
2139      -                    IPPORT_DYNAMIC_MIN, IPPORT_DYNAMIC_MAX,
2140      -                    ntohs(connp->conn_lport));
2141      -        }
2142      -
2143 2021          mp = conn_prepend_hdr(ixa, ipp, v6src, v6dst, IPPROTO_UDP, flowinfo,
2144 2022              ulp_hdr_len, data_mp, data_len, us->us_wroff_extra, &cksum, errorp);
2145 2023          if (mp == NULL) {
2146 2024                  ASSERT(*errorp != 0);
2147 2025                  return (NULL);
2148 2026          }
2149 2027  
2150 2028          data_len += ulp_hdr_len;
2151 2029          ixa->ixa_pktlen = data_len + ixa->ixa_ip_hdr_length;
2152 2030  
2153 2031          udpha = (udpha_t *)(mp->b_rptr + ixa->ixa_ip_hdr_length);
2154      -        if (hash_srcport == B_TRUE) {
2155      -                udpha->uha_src_port = htons(srcport);
2156      -        } else {
2157      -                udpha->uha_src_port = connp->conn_lport;
2158      -        }
     2032 +        udpha->uha_src_port = connp->conn_lport;
2159 2033          udpha->uha_dst_port = dstport;
2160 2034          udpha->uha_checksum = 0;
2161 2035          udpha->uha_length = htons(data_len);
2162 2036  
2163 2037          /*
2164 2038           * If there was a routing option/header then conn_prepend_hdr
2165 2039           * has massaged it and placed the pseudo-header checksum difference
2166 2040           * in the cksum argument.
2167 2041           *
2168 2042           * Setup header length and prepare for ULP checksum done in IP.
↓ open down ↓ 1154 lines elided ↑ open up ↑
3323 3197   * Returns NULL is allocation failed or if the packet would exceed IP_MAXPACKET.
3324 3198   * When it returns NULL it sets errorp.
3325 3199   */
3326 3200  static mblk_t *
3327 3201  udp_prepend_header_template(conn_t *connp, ip_xmit_attr_t *ixa, mblk_t *mp,
3328 3202      const in6_addr_t *v6src, in_port_t dstport, uint32_t flowinfo, int *errorp)
3329 3203  {
3330 3204          udp_t           *udp = connp->conn_udp;
3331 3205          udp_stack_t     *us = udp->udp_us;
3332 3206          boolean_t       insert_spi = udp->udp_nat_t_endpoint;
3333      -        boolean_t       hash_srcport = udp->udp_vxlanhash;
3334 3207          uint_t          pktlen;
3335 3208          uint_t          alloclen;
3336 3209          uint_t          copylen;
3337 3210          uint8_t         *iph;
3338 3211          uint_t          ip_hdr_length;
3339 3212          udpha_t         *udpha;
3340 3213          uint32_t        cksum;
3341 3214          ip_pkt_t        *ipp;
3342      -        uint16_t        srcport;
3343 3215  
3344 3216          ASSERT(MUTEX_HELD(&connp->conn_lock));
3345 3217  
3346 3218          /*
3347      -         * If we have source port hashing going on, determine the hash before
3348      -         * we modify the mblk_t.
3349      -         */
3350      -        if (hash_srcport == B_TRUE) {
3351      -                srcport = udp_srcport_hash(mp, UDP_HASH_VXLAN,
3352      -                    IPPORT_DYNAMIC_MIN, IPPORT_DYNAMIC_MAX,
3353      -                    ntohs(connp->conn_lport));
3354      -        }
3355      -
3356      -        /*
3357 3219           * Copy the header template and leave space for an SPI
3358 3220           */
3359 3221          copylen = connp->conn_ht_iphc_len;
3360 3222          alloclen = copylen + (insert_spi ? sizeof (uint32_t) : 0);
3361 3223          pktlen = alloclen + msgdsize(mp);
3362 3224          if (pktlen > IP_MAXPACKET) {
3363 3225                  freemsg(mp);
3364 3226                  *errorp = EMSGSIZE;
3365 3227                  return (NULL);
3366 3228          }
↓ open down ↓ 77 lines elided ↑ open up ↑
3444 3306                          ip6h->ip6_vcf = IPV6_TCLASS_FLOW(ip6h->ip6_vcf,
3445 3307                              ipp->ipp_tclass);
3446 3308                  }
3447 3309          }
3448 3310  
3449 3311          /* Insert all-0s SPI now. */
3450 3312          if (insert_spi)
3451 3313                  *((uint32_t *)(udpha + 1)) = 0;
3452 3314  
3453 3315          udpha->uha_dst_port = dstport;
3454      -        if (hash_srcport == B_TRUE)
3455      -                udpha->uha_src_port = htons(srcport);
3456      -
3457 3316          return (mp);
3458 3317  }
3459 3318  
3460 3319  /*
3461 3320   * Send a T_UDERR_IND in response to an M_DATA
3462 3321   */
3463 3322  static void
3464 3323  udp_ud_err_connected(conn_t *connp, t_scalar_t error)
3465 3324  {
3466 3325          struct sockaddr_storage ss;
↓ open down ↓ 2587 lines elided ↑ open up ↑
6054 5913          sin6_t          *sin6;
6055 5914          sin_t           *sin = NULL;
6056 5915          uint_t          srcid;
6057 5916          conn_t          *connp = (conn_t *)proto_handle;
6058 5917          udp_t           *udp = connp->conn_udp;
6059 5918          int             error = 0;
6060 5919          udp_stack_t     *us = udp->udp_us;
6061 5920          ushort_t        ipversion;
6062 5921          pid_t           pid = curproc->p_pid;
6063 5922          ip_xmit_attr_t  *ixa;
     5923 +        boolean_t       snd_to_conn;
6064 5924  
6065 5925          ASSERT(DB_TYPE(mp) == M_DATA);
6066 5926  
6067 5927          /* All Solaris components should pass a cred for this operation. */
6068 5928          ASSERT(cr != NULL);
6069 5929  
6070 5930          /* do an implicit bind if necessary */
6071 5931          if (udp->udp_state == TS_UNBND) {
6072 5932                  error = udp_implicit_bind(connp, cr);
6073 5933                  /*
↓ open down ↓ 24 lines elided ↑ open up ↑
6098 5958                  else
6099 5959                          return (error);
6100 5960          }
6101 5961  
6102 5962          /*
6103 5963           * Check if we're allowed to send to a connection on which we've
6104 5964           * already called 'connect'. The posix spec. allows both behaviors but
6105 5965           * historically we've returned an error if already connected. The
6106 5966           * client can allow this via a sockopt.
6107 5967           */
6108      -        if (udp->udp_state == TS_DATA_XFER && !udp->udp_snd_to_conn) {
     5968 +        mutex_enter(&connp->conn_lock);
     5969 +        snd_to_conn = (udp->udp_snd_to_conn != 0);
     5970 +        mutex_exit(&connp->conn_lock);
     5971 +        if (udp->udp_state == TS_DATA_XFER && !snd_to_conn) {
6109 5972                  UDPS_BUMP_MIB(us, udpOutErrors);
6110 5973                  return (EISCONN);
6111 5974          }
6112 5975  
6113 5976          error = proto_verify_ip_addr(connp->conn_family,
6114 5977              (struct sockaddr *)msg->msg_name, msg->msg_namelen);
6115 5978          if (error != 0) {
6116 5979                  UDPS_BUMP_MIB(us, udpOutErrors);
6117 5980                  return (error);
6118 5981          }
↓ open down ↓ 503 lines elided ↑ open up ↑
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX