Print this page
    
    
      
        | Split | 
	Close | 
      
      | Expand all | 
      | Collapse all | 
    
    
          --- old/usr/src/uts/common/inet/udp/udp.c
          +++ new/usr/src/uts/common/inet/udp/udp.c
   1    1  /*
   2    2   * CDDL HEADER START
   3    3   *
   4    4   * The contents of this file are subject to the terms of the
   5    5   * Common Development and Distribution License (the "License").
   6    6   * You may not use this file except in compliance with the License.
   7    7   *
   8    8   * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9    9   * or http://www.opensolaris.org/os/licensing.
  10   10   * See the License for the specific language governing permissions
  11   11   * and limitations under the License.
  12   12   *
  13   13   * When distributing Covered Code, include this CDDL HEADER in each
  14   14   * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  
    | 
      ↓ open down ↓ | 
    14 lines elided | 
    
      ↑ open up ↑ | 
  
  15   15   * If applicable, add the following below this CDDL HEADER, with the
  16   16   * fields enclosed by brackets "[]" replaced with your own identifying
  17   17   * information: Portions Copyright [yyyy] [name of copyright owner]
  18   18   *
  19   19   * CDDL HEADER END
  20   20   */
  21   21  /*
  22   22   * Copyright (c) 1991, 2010, Oracle and/or its affiliates. All rights reserved.
  23   23   * Copyright 2013 Nexenta Systems, Inc.  All rights reserved.
  24   24   * Copyright 2014, OmniTI Computer Consulting, Inc. All rights reserved.
  25      - * Copyright 2015, Joyent, Inc.
  26   25   */
  27   26  /* Copyright (c) 1990 Mentat Inc. */
  28   27  
  29   28  #include <sys/sysmacros.h>
  30   29  #include <sys/types.h>
  31   30  #include <sys/stream.h>
  32   31  #include <sys/stropts.h>
  33   32  #include <sys/strlog.h>
  34   33  #include <sys/strsun.h>
  35   34  #define _SUN_TPI_VERSION 2
  36   35  #include <sys/tihdr.h>
  37   36  #include <sys/timod.h>
  38   37  #include <sys/ddi.h>
  39   38  #include <sys/sunddi.h>
  40   39  #include <sys/strsubr.h>
  41   40  #include <sys/suntpi.h>
  42   41  #include <sys/xti_inet.h>
  43   42  #include <sys/kmem.h>
  44   43  #include <sys/cred_impl.h>
  45   44  #include <sys/policy.h>
  46   45  #include <sys/priv.h>
  47   46  #include <sys/ucred.h>
  48   47  #include <sys/zone.h>
  49   48  
  50   49  #include <sys/socket.h>
  51   50  #include <sys/socketvar.h>
  52   51  #include <sys/sockio.h>
  53   52  #include <sys/vtrace.h>
  54   53  #include <sys/sdt.h>
  55   54  #include <sys/debug.h>
  56   55  #include <sys/isa_defs.h>
  57   56  #include <sys/random.h>
  58   57  #include <netinet/in.h>
  59   58  #include <netinet/ip6.h>
  60   59  #include <netinet/icmp6.h>
  61   60  #include <netinet/udp.h>
  62   61  
  63   62  #include <inet/common.h>
  64   63  #include <inet/ip.h>
  65   64  #include <inet/ip_impl.h>
  66   65  #include <inet/ipsec_impl.h>
  67   66  #include <inet/ip6.h>
  68   67  #include <inet/ip_ire.h>
  69   68  #include <inet/ip_if.h>
  
    | 
      ↓ open down ↓ | 
    34 lines elided | 
    
      ↑ open up ↑ | 
  
  70   69  #include <inet/ip_multi.h>
  71   70  #include <inet/ip_ndp.h>
  72   71  #include <inet/proto_set.h>
  73   72  #include <inet/mib2.h>
  74   73  #include <inet/optcom.h>
  75   74  #include <inet/snmpcom.h>
  76   75  #include <inet/kstatcom.h>
  77   76  #include <inet/ipclassifier.h>
  78   77  #include <sys/squeue_impl.h>
  79   78  #include <inet/ipnet.h>
  80      -#include <sys/vxlan.h>
  81      -#include <inet/inet_hash.h>
       79 +#include <sys/ethernet.h>
  82   80  
  83   81  #include <sys/tsol/label.h>
  84   82  #include <sys/tsol/tnet.h>
  85   83  #include <rpc/pmap_prot.h>
  86   84  
  87   85  #include <inet/udp_impl.h>
  88   86  
  89   87  /*
  90   88   * Synchronization notes:
  91   89   *
  92   90   * UDP is MT and uses the usual kernel synchronization primitives. There are 2
  93   91   * locks, the fanout lock (uf_lock) and conn_lock. conn_lock
  94   92   * protects the contents of the udp_t. uf_lock protects the address and the
  95   93   * fanout information.
  96   94   * The lock order is conn_lock -> uf_lock.
  97   95   *
  98   96   * The fanout lock uf_lock:
  99   97   * When a UDP endpoint is bound to a local port, it is inserted into
 100   98   * a bind hash list.  The list consists of an array of udp_fanout_t buckets.
 101   99   * The size of the array is controlled by the udp_bind_fanout_size variable.
 102  100   * This variable can be changed in /etc/system if the default value is
 103  101   * not large enough.  Each bind hash bucket is protected by a per bucket
 104  102   * lock.  It protects the udp_bind_hash and udp_ptpbhn fields in the udp_t
 105  103   * structure and a few other fields in the udp_t. A UDP endpoint is removed
 106  104   * from the bind hash list only when it is being unbound or being closed.
 107  105   * The per bucket lock also protects a UDP endpoint's state changes.
 108  106   *
 109  107   * Plumbing notes:
 110  108   * UDP is always a device driver. For compatibility with mibopen() code
 111  109   * it is possible to I_PUSH "udp", but that results in pushing a passthrough
 112  110   * dummy module.
 113  111   *
 114  112   * The above implies that we don't support any intermediate module to
 115  113   * reside in between /dev/ip and udp -- in fact, we never supported such
 116  114   * scenario in the past as the inter-layer communication semantics have
 117  115   * always been private.
 118  116   */
 119  117  
 120  118  /* For /etc/system control */
 121  119  uint_t udp_bind_fanout_size = UDP_BIND_FANOUT_SIZE;
 122  120  
 123  121  static void     udp_addr_req(queue_t *q, mblk_t *mp);
 124  122  static void     udp_tpi_bind(queue_t *q, mblk_t *mp);
 125  123  static void     udp_bind_hash_insert(udp_fanout_t *uf, udp_t *udp);
 126  124  static void     udp_bind_hash_remove(udp_t *udp, boolean_t caller_holds_lock);
 127  125  static int      udp_build_hdr_template(conn_t *, const in6_addr_t *,
 128  126      const in6_addr_t *, in_port_t, uint32_t);
 129  127  static void     udp_capability_req(queue_t *q, mblk_t *mp);
 130  128  static int      udp_tpi_close(queue_t *q, int flags);
 131  129  static void     udp_close_free(conn_t *);
 132  130  static void     udp_tpi_connect(queue_t *q, mblk_t *mp);
 133  131  static void     udp_tpi_disconnect(queue_t *q, mblk_t *mp);
 134  132  static void     udp_err_ack(queue_t *q, mblk_t *mp, t_scalar_t t_error,
 135  133      int sys_error);
 136  134  static void     udp_err_ack_prim(queue_t *q, mblk_t *mp, t_scalar_t primitive,
 137  135      t_scalar_t tlierr, int sys_error);
 138  136  static int      udp_extra_priv_ports_get(queue_t *q, mblk_t *mp, caddr_t cp,
 139  137                      cred_t *cr);
 140  138  static int      udp_extra_priv_ports_add(queue_t *q, mblk_t *mp,
 141  139                      char *value, caddr_t cp, cred_t *cr);
 142  140  static int      udp_extra_priv_ports_del(queue_t *q, mblk_t *mp,
 143  141                      char *value, caddr_t cp, cred_t *cr);
 144  142  static void     udp_icmp_input(void *, mblk_t *, void *, ip_recv_attr_t *);
 145  143  static void     udp_icmp_error_ipv6(conn_t *connp, mblk_t *mp,
 146  144      ip_recv_attr_t *ira);
 147  145  static void     udp_info_req(queue_t *q, mblk_t *mp);
 148  146  static void     udp_input(void *, mblk_t *, void *, ip_recv_attr_t *);
 149  147  static void     udp_lrput(queue_t *, mblk_t *);
 150  148  static void     udp_lwput(queue_t *, mblk_t *);
 151  149  static int      udp_open(queue_t *q, dev_t *devp, int flag, int sflag,
 152  150                      cred_t *credp, boolean_t isv6);
 153  151  static int      udp_openv4(queue_t *q, dev_t *devp, int flag, int sflag,
 154  152                      cred_t *credp);
 155  153  static int      udp_openv6(queue_t *q, dev_t *devp, int flag, int sflag,
 156  154                      cred_t *credp);
 157  155  static boolean_t udp_opt_allow_udr_set(t_scalar_t level, t_scalar_t name);
 158  156  int             udp_opt_set(conn_t *connp, uint_t optset_context,
 159  157                      int level, int name, uint_t inlen,
 160  158                      uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp,
 161  159                      void *thisdg_attrs, cred_t *cr);
 162  160  int             udp_opt_get(conn_t *connp, int level, int name,
 163  161                      uchar_t *ptr);
 164  162  static int      udp_output_connected(conn_t *connp, mblk_t *mp, cred_t *cr,
 165  163                      pid_t pid);
 166  164  static int      udp_output_lastdst(conn_t *connp, mblk_t *mp, cred_t *cr,
 167  165      pid_t pid, ip_xmit_attr_t *ixa);
 168  166  static int      udp_output_newdst(conn_t *connp, mblk_t *data_mp, sin_t *sin,
 169  167                      sin6_t *sin6, ushort_t ipversion, cred_t *cr, pid_t,
 170  168                      ip_xmit_attr_t *ixa);
 171  169  static mblk_t   *udp_prepend_hdr(conn_t *, ip_xmit_attr_t *, const ip_pkt_t *,
 172  170      const in6_addr_t *, const in6_addr_t *, in_port_t, uint32_t, mblk_t *,
 173  171      int *);
 174  172  static mblk_t   *udp_prepend_header_template(conn_t *, ip_xmit_attr_t *,
 175  173      mblk_t *, const in6_addr_t *, in_port_t, uint32_t, int *);
 176  174  static void     udp_ud_err(queue_t *q, mblk_t *mp, t_scalar_t err);
 177  175  static void     udp_ud_err_connected(conn_t *, t_scalar_t);
 178  176  static void     udp_tpi_unbind(queue_t *q, mblk_t *mp);
 179  177  static in_port_t udp_update_next_port(udp_t *udp, in_port_t port,
 180  178      boolean_t random);
 181  179  static void     udp_wput_other(queue_t *q, mblk_t *mp);
 182  180  static void     udp_wput_iocdata(queue_t *q, mblk_t *mp);
 183  181  static void     udp_wput_fallback(queue_t *q, mblk_t *mp);
 184  182  static size_t   udp_set_rcv_hiwat(udp_t *udp, size_t size);
 185  183  
 186  184  static void     *udp_stack_init(netstackid_t stackid, netstack_t *ns);
 187  185  static void     udp_stack_fini(netstackid_t stackid, void *arg);
 188  186  
 189  187  /* Common routines for TPI and socket module */
 190  188  static void     udp_ulp_recv(conn_t *, mblk_t *, uint_t, ip_recv_attr_t *);
 191  189  
 192  190  /* Common routine for TPI and socket module */
 193  191  static conn_t   *udp_do_open(cred_t *, boolean_t, int, int *);
 194  192  static void     udp_do_close(conn_t *);
 195  193  static int      udp_do_bind(conn_t *, struct sockaddr *, socklen_t, cred_t *,
 196  194      boolean_t);
 197  195  static int      udp_do_unbind(conn_t *);
 198  196  
 199  197  int             udp_getsockname(sock_lower_handle_t,
 200  198      struct sockaddr *, socklen_t *, cred_t *);
 201  199  int             udp_getpeername(sock_lower_handle_t,
 202  200      struct sockaddr *, socklen_t *, cred_t *);
 203  201  static int      udp_do_connect(conn_t *, const struct sockaddr *, socklen_t,
 204  202      cred_t *, pid_t);
 205  203  
 206  204  #pragma inline(udp_output_connected, udp_output_newdst, udp_output_lastdst)
 207  205  
 208  206  /*
 209  207   * Checks if the given destination addr/port is allowed out.
 210  208   * If allowed, registers the (dest_addr/port, node_ID) mapping at Cluster.
 211  209   * Called for each connect() and for sendto()/sendmsg() to a different
 212  210   * destination.
 213  211   * For connect(), called in udp_connect().
 214  212   * For sendto()/sendmsg(), called in udp_output_newdst().
 215  213   *
 216  214   * This macro assumes that the cl_inet_connect2 hook is not NULL.
 217  215   * Please check this before calling this macro.
 218  216   *
 219  217   * void
 220  218   * CL_INET_UDP_CONNECT(conn_t cp, udp_t *udp, boolean_t is_outgoing,
 221  219   *     in6_addr_t *faddrp, in_port_t (or uint16_t) fport, int err);
 222  220   */
 223  221  #define CL_INET_UDP_CONNECT(cp, is_outgoing, faddrp, fport, err) {      \
 224  222          (err) = 0;                                                      \
 225  223          /*                                                              \
 226  224           * Running in cluster mode - check and register active          \
 227  225           * "connection" information                                     \
 228  226           */                                                             \
 229  227          if ((cp)->conn_ipversion == IPV4_VERSION)                       \
 230  228                  (err) = (*cl_inet_connect2)(                            \
 231  229                      (cp)->conn_netstack->netstack_stackid,              \
 232  230                      IPPROTO_UDP, is_outgoing, AF_INET,                  \
 233  231                      (uint8_t *)&((cp)->conn_laddr_v4),                  \
 234  232                      (cp)->conn_lport,                                   \
 235  233                      (uint8_t *)&(V4_PART_OF_V6(*faddrp)),               \
 236  234                      (in_port_t)(fport), NULL);                          \
 237  235          else                                                            \
 238  236                  (err) = (*cl_inet_connect2)(                            \
 239  237                      (cp)->conn_netstack->netstack_stackid,              \
 240  238                      IPPROTO_UDP, is_outgoing, AF_INET6,                 \
 241  239                      (uint8_t *)&((cp)->conn_laddr_v6),                  \
 242  240                      (cp)->conn_lport,                                   \
 243  241                      (uint8_t *)(faddrp), (in_port_t)(fport), NULL);     \
 244  242  }
 245  243  
 246  244  static struct module_info udp_mod_info =  {
 247  245          UDP_MOD_ID, UDP_MOD_NAME, 1, INFPSZ, UDP_RECV_HIWATER, UDP_RECV_LOWATER
 248  246  };
 249  247  
 250  248  /*
 251  249   * Entry points for UDP as a device.
 252  250   * We have separate open functions for the /dev/udp and /dev/udp6 devices.
 253  251   */
 254  252  static struct qinit udp_rinitv4 = {
 255  253          NULL, NULL, udp_openv4, udp_tpi_close, NULL, &udp_mod_info, NULL
 256  254  };
 257  255  
 258  256  static struct qinit udp_rinitv6 = {
 259  257          NULL, NULL, udp_openv6, udp_tpi_close, NULL, &udp_mod_info, NULL
 260  258  };
 261  259  
 262  260  static struct qinit udp_winit = {
 263  261          (pfi_t)udp_wput, (pfi_t)ip_wsrv, NULL, NULL, NULL, &udp_mod_info
 264  262  };
 265  263  
 266  264  /* UDP entry point during fallback */
 267  265  struct qinit udp_fallback_sock_winit = {
 268  266          (pfi_t)udp_wput_fallback, NULL, NULL, NULL, NULL, &udp_mod_info
 269  267  };
 270  268  
 271  269  /*
 272  270   * UDP needs to handle I_LINK and I_PLINK since ifconfig
 273  271   * likes to use it as a place to hang the various streams.
 274  272   */
 275  273  static struct qinit udp_lrinit = {
 276  274          (pfi_t)udp_lrput, NULL, udp_openv4, udp_tpi_close, NULL, &udp_mod_info
 277  275  };
 278  276  
 279  277  static struct qinit udp_lwinit = {
 280  278          (pfi_t)udp_lwput, NULL, udp_openv4, udp_tpi_close, NULL, &udp_mod_info
 281  279  };
 282  280  
 283  281  /* For AF_INET aka /dev/udp */
 284  282  struct streamtab udpinfov4 = {
 285  283          &udp_rinitv4, &udp_winit, &udp_lrinit, &udp_lwinit
 286  284  };
 287  285  
 288  286  /* For AF_INET6 aka /dev/udp6 */
 289  287  struct streamtab udpinfov6 = {
 290  288          &udp_rinitv6, &udp_winit, &udp_lrinit, &udp_lwinit
 291  289  };
 292  290  
 293  291  #define UDP_MAXPACKET_IPV4 (IP_MAXPACKET - UDPH_SIZE - IP_SIMPLE_HDR_LENGTH)
 294  292  
 295  293  /* Default structure copied into T_INFO_ACK messages */
 296  294  static struct T_info_ack udp_g_t_info_ack_ipv4 = {
 297  295          T_INFO_ACK,
 298  296          UDP_MAXPACKET_IPV4,     /* TSDU_size. Excl. headers */
 299  297          T_INVALID,      /* ETSU_size.  udp does not support expedited data. */
 300  298          T_INVALID,      /* CDATA_size. udp does not support connect data. */
 301  299          T_INVALID,      /* DDATA_size. udp does not support disconnect data. */
 302  300          sizeof (sin_t), /* ADDR_size. */
 303  301          0,              /* OPT_size - not initialized here */
 304  302          UDP_MAXPACKET_IPV4,     /* TIDU_size.  Excl. headers */
 305  303          T_CLTS,         /* SERV_type.  udp supports connection-less. */
 306  304          TS_UNBND,       /* CURRENT_state.  This is set from udp_state. */
 307  305          (XPG4_1|SENDZERO) /* PROVIDER_flag */
 308  306  };
 309  307  
 310  308  #define UDP_MAXPACKET_IPV6 (IP_MAXPACKET - UDPH_SIZE - IPV6_HDR_LEN)
 311  309  
 312  310  static  struct T_info_ack udp_g_t_info_ack_ipv6 = {
 313  311          T_INFO_ACK,
 314  312          UDP_MAXPACKET_IPV6,     /* TSDU_size.  Excl. headers */
 315  313          T_INVALID,      /* ETSU_size.  udp does not support expedited data. */
 316  314          T_INVALID,      /* CDATA_size. udp does not support connect data. */
 317  315          T_INVALID,      /* DDATA_size. udp does not support disconnect data. */
 318  316          sizeof (sin6_t), /* ADDR_size. */
 319  317          0,              /* OPT_size - not initialized here */
 320  318          UDP_MAXPACKET_IPV6,     /* TIDU_size. Excl. headers */
 321  319          T_CLTS,         /* SERV_type.  udp supports connection-less. */
 322  320          TS_UNBND,       /* CURRENT_state.  This is set from udp_state. */
 323  321          (XPG4_1|SENDZERO) /* PROVIDER_flag */
 324  322  };
 325  323  
 326  324  /*
 327  325   * UDP tunables related declarations. Definitions are in udp_tunables.c
 328  326   */
 329  327  extern mod_prop_info_t udp_propinfo_tbl[];
 330  328  extern int udp_propinfo_count;
 331  329  
 332  330  /* Setable in /etc/system */
 333  331  /* If set to 0, pick ephemeral port sequentially; otherwise randomly. */
 334  332  uint32_t udp_random_anon_port = 1;
 335  333  
 336  334  /*
 337  335   * Hook functions to enable cluster networking.
 338  336   * On non-clustered systems these vectors must always be NULL
 339  337   */
 340  338  
  
    | 
      ↓ open down ↓ | 
    249 lines elided | 
    
      ↑ open up ↑ | 
  
 341  339  void (*cl_inet_bind)(netstackid_t stack_id, uchar_t protocol,
 342  340      sa_family_t addr_family, uint8_t *laddrp, in_port_t lport,
 343  341      void *args) = NULL;
 344  342  void (*cl_inet_unbind)(netstackid_t stack_id, uint8_t protocol,
 345  343      sa_family_t addr_family, uint8_t *laddrp, in_port_t lport,
 346  344      void *args) = NULL;
 347  345  
 348  346  typedef union T_primitives *t_primp_t;
 349  347  
 350  348  /*
 351      - * Various protocols that encapsulate UDP have no real use for the source port.
 352      - * Instead, they want to vary the source port to provide better equal-cost
 353      - * multipathing and other systems that use fanout. Consider something like
 354      - * VXLAN. If you're actually sending multiple different streams to a single
 355      - * host, if you don't vary the source port, then the tuple of ( SRC IP, DST IP,
 356      - * SRC Port, DST Port) will always be the same.
 357      - *
 358      - * Here, we return a port to hash this to, if we know how to hash it. If for
 359      - * some reason we can't perform an L4 hash, then we just return the default
 360      - * value, usually the default port. After we determine the hash we transform it
 361      - * so that it's in the range of [ min, max ].
 362      - *
 363      - * We'd like to avoid a pull up for the sake of performing the hash. If the
 364      - * first mblk_t doesn't have the full protocol header, then we just send it to
 365      - * the default. If for some reason we have an encapsulated packet that has its
 366      - * protocol header in different parts of an mblk_t, then we'll go with the
 367      - * default port. This means that that if a driver isn't consistent about how it
 368      - * generates the frames for a given flow, it will not always be consistently
 369      - * hashed. That should be an uncommon event.
 370      - */
 371      -uint16_t
 372      -udp_srcport_hash(mblk_t *mp, int type, uint16_t min, uint16_t max,
 373      -    uint16_t def)
 374      -{
 375      -        size_t szused = 0;
 376      -        struct ether_header *ether;
 377      -        struct ether_vlan_header *vether;
 378      -        ip6_t *ip6h;
 379      -        ipha_t *ipha;
 380      -        uint16_t sap;
 381      -        uint64_t hash;
 382      -        uint32_t mod;
 383      -
 384      -        ASSERT(min <= max);
 385      -
 386      -        if (type != UDP_HASH_VXLAN)
 387      -                return (def);
 388      -
 389      -        if (!IS_P2ALIGNED(mp->b_rptr, sizeof (uint16_t)))
 390      -                return (def);
 391      -
 392      -        /*
 393      -         * The following logic is VXLAN specific to get at the header, if we
 394      -         * have formats, eg. GENEVE, then we should ignore this.
 395      -         *
 396      -         * The kernel overlay device often puts a first mblk_t for the data
 397      -         * which is just the encap. If so, then we're going to use that and try
 398      -         * to avoid a pull up.
 399      -         */
 400      -        if (MBLKL(mp) == VXLAN_HDR_LEN) {
 401      -                if (mp->b_cont == NULL)
 402      -                        return (def);
 403      -                mp = mp->b_cont;
 404      -                ether = (struct ether_header *)mp->b_rptr;
 405      -        } else if (MBLKL(mp) < VXLAN_HDR_LEN) {
 406      -                return (def);
 407      -        } else {
 408      -                szused = VXLAN_HDR_LEN;
 409      -                ether = (struct ether_header *)((uintptr_t)mp->b_rptr + szused);
 410      -        }
 411      -
 412      -        /* Can we hold a MAC header? */
 413      -        if (MBLKL(mp) + szused < sizeof (struct ether_header))
 414      -                return (def);
 415      -
 416      -        /*
 417      -         * We need to lie about the starting offset into the message block for
 418      -         * convenience. Undo it at the end. We know that inet_pkt_hash() won't
 419      -         * modify the mblk_t.
 420      -         */
 421      -        mp->b_rptr += szused;
 422      -        hash = inet_pkt_hash(DL_ETHER, mp, INET_PKT_HASH_L2 |
 423      -            INET_PKT_HASH_L3 | INET_PKT_HASH_L4);
 424      -        mp->b_rptr -= szused;
 425      -
 426      -        if (hash == 0)
 427      -                return (def);
 428      -
 429      -        mod = max - min + 1;
 430      -        return ((hash % mod) + min);
 431      -}
 432      -
 433      -/*
 434  349   * Return the next anonymous port in the privileged port range for
 435  350   * bind checking.
 436  351   *
 437  352   * Trusted Extension (TX) notes: TX allows administrator to mark or
 438  353   * reserve ports as Multilevel ports (MLP). MLP has special function
 439  354   * on TX systems. Once a port is made MLP, it's not available as
 440  355   * ordinary port. This creates "holes" in the port name space. It
 441  356   * may be necessary to skip the "holes" find a suitable anon port.
 442  357   */
 443  358  static in_port_t
 444  359  udp_get_next_priv_port(udp_t *udp)
 445  360  {
 446  361          static in_port_t next_priv_port = IPPORT_RESERVED - 1;
 447  362          in_port_t nextport;
 448  363          boolean_t restart = B_FALSE;
 449  364          udp_stack_t *us = udp->udp_us;
 450  365  
 451  366  retry:
 452  367          if (next_priv_port < us->us_min_anonpriv_port ||
 453  368              next_priv_port >= IPPORT_RESERVED) {
 454  369                  next_priv_port = IPPORT_RESERVED - 1;
 455  370                  if (restart)
 456  371                          return (0);
 457  372                  restart = B_TRUE;
 458  373          }
 459  374  
 460  375          if (is_system_labeled() &&
 461  376              (nextport = tsol_next_port(crgetzone(udp->udp_connp->conn_cred),
 462  377              next_priv_port, IPPROTO_UDP, B_FALSE)) != 0) {
 463  378                  next_priv_port = nextport;
 464  379                  goto retry;
 465  380          }
 466  381  
 467  382          return (next_priv_port--);
 468  383  }
 469  384  
 470  385  /*
 471  386   * Hash list removal routine for udp_t structures.
 472  387   */
 473  388  static void
 474  389  udp_bind_hash_remove(udp_t *udp, boolean_t caller_holds_lock)
 475  390  {
 476  391          udp_t           *udpnext;
 477  392          kmutex_t        *lockp;
 478  393          udp_stack_t     *us = udp->udp_us;
 479  394          conn_t          *connp = udp->udp_connp;
 480  395  
 481  396          if (udp->udp_ptpbhn == NULL)
 482  397                  return;
 483  398  
 484  399          /*
 485  400           * Extract the lock pointer in case there are concurrent
 486  401           * hash_remove's for this instance.
 487  402           */
 488  403          ASSERT(connp->conn_lport != 0);
 489  404          if (!caller_holds_lock) {
 490  405                  lockp = &us->us_bind_fanout[UDP_BIND_HASH(connp->conn_lport,
 491  406                      us->us_bind_fanout_size)].uf_lock;
 492  407                  ASSERT(lockp != NULL);
 493  408                  mutex_enter(lockp);
 494  409          }
 495  410          if (udp->udp_ptpbhn != NULL) {
 496  411                  udpnext = udp->udp_bind_hash;
 497  412                  if (udpnext != NULL) {
 498  413                          udpnext->udp_ptpbhn = udp->udp_ptpbhn;
 499  414                          udp->udp_bind_hash = NULL;
 500  415                  }
 501  416                  *udp->udp_ptpbhn = udpnext;
 502  417                  udp->udp_ptpbhn = NULL;
 503  418          }
 504  419          if (!caller_holds_lock) {
 505  420                  mutex_exit(lockp);
 506  421          }
 507  422  }
 508  423  
 509  424  static void
 510  425  udp_bind_hash_insert(udp_fanout_t *uf, udp_t *udp)
 511  426  {
 512  427          conn_t  *connp = udp->udp_connp;
 513  428          udp_t   **udpp;
 514  429          udp_t   *udpnext;
 515  430          conn_t  *connext;
 516  431  
 517  432          ASSERT(MUTEX_HELD(&uf->uf_lock));
 518  433          ASSERT(udp->udp_ptpbhn == NULL);
 519  434          udpp = &uf->uf_udp;
 520  435          udpnext = udpp[0];
 521  436          if (udpnext != NULL) {
 522  437                  /*
 523  438                   * If the new udp bound to the INADDR_ANY address
 524  439                   * and the first one in the list is not bound to
 525  440                   * INADDR_ANY we skip all entries until we find the
 526  441                   * first one bound to INADDR_ANY.
 527  442                   * This makes sure that applications binding to a
 528  443                   * specific address get preference over those binding to
 529  444                   * INADDR_ANY.
 530  445                   */
 531  446                  connext = udpnext->udp_connp;
 532  447                  if (V6_OR_V4_INADDR_ANY(connp->conn_bound_addr_v6) &&
 533  448                      !V6_OR_V4_INADDR_ANY(connext->conn_bound_addr_v6)) {
 534  449                          while ((udpnext = udpp[0]) != NULL &&
 535  450                              !V6_OR_V4_INADDR_ANY(connext->conn_bound_addr_v6)) {
 536  451                                  udpp = &(udpnext->udp_bind_hash);
 537  452                          }
 538  453                          if (udpnext != NULL)
 539  454                                  udpnext->udp_ptpbhn = &udp->udp_bind_hash;
 540  455                  } else {
 541  456                          udpnext->udp_ptpbhn = &udp->udp_bind_hash;
 542  457                  }
 543  458          }
 544  459          udp->udp_bind_hash = udpnext;
 545  460          udp->udp_ptpbhn = udpp;
 546  461          udpp[0] = udp;
 547  462  }
 548  463  
 549  464  /*
 550  465   * This routine is called to handle each O_T_BIND_REQ/T_BIND_REQ message
 551  466   * passed to udp_wput.
 552  467   * It associates a port number and local address with the stream.
 553  468   * It calls IP to verify the local IP address, and calls IP to insert
 554  469   * the conn_t in the fanout table.
 555  470   * If everything is ok it then sends the T_BIND_ACK back up.
 556  471   *
 557  472   * Note that UDP over IPv4 and IPv6 sockets can use the same port number
 558  473   * without setting SO_REUSEADDR. This is needed so that they
 559  474   * can be viewed as two independent transport protocols.
 560  475   * However, anonymouns ports are allocated from the same range to avoid
 561  476   * duplicating the us->us_next_port_to_try.
 562  477   */
 563  478  static void
 564  479  udp_tpi_bind(queue_t *q, mblk_t *mp)
 565  480  {
 566  481          sin_t           *sin;
 567  482          sin6_t          *sin6;
 568  483          mblk_t          *mp1;
 569  484          struct T_bind_req *tbr;
 570  485          conn_t          *connp;
 571  486          udp_t           *udp;
 572  487          int             error;
 573  488          struct sockaddr *sa;
 574  489          cred_t          *cr;
 575  490  
 576  491          /*
 577  492           * All Solaris components should pass a db_credp
 578  493           * for this TPI message, hence we ASSERT.
 579  494           * But in case there is some other M_PROTO that looks
 580  495           * like a TPI message sent by some other kernel
 581  496           * component, we check and return an error.
 582  497           */
 583  498          cr = msg_getcred(mp, NULL);
 584  499          ASSERT(cr != NULL);
 585  500          if (cr == NULL) {
 586  501                  udp_err_ack(q, mp, TSYSERR, EINVAL);
 587  502                  return;
 588  503          }
 589  504  
 590  505          connp = Q_TO_CONN(q);
 591  506          udp = connp->conn_udp;
 592  507          if ((mp->b_wptr - mp->b_rptr) < sizeof (*tbr)) {
 593  508                  (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE,
 594  509                      "udp_bind: bad req, len %u",
 595  510                      (uint_t)(mp->b_wptr - mp->b_rptr));
 596  511                  udp_err_ack(q, mp, TPROTO, 0);
 597  512                  return;
 598  513          }
 599  514          if (udp->udp_state != TS_UNBND) {
 600  515                  (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE,
 601  516                      "udp_bind: bad state, %u", udp->udp_state);
 602  517                  udp_err_ack(q, mp, TOUTSTATE, 0);
 603  518                  return;
 604  519          }
 605  520          /*
 606  521           * Reallocate the message to make sure we have enough room for an
 607  522           * address.
 608  523           */
 609  524          mp1 = reallocb(mp, sizeof (struct T_bind_ack) + sizeof (sin6_t), 1);
 610  525          if (mp1 == NULL) {
 611  526                  udp_err_ack(q, mp, TSYSERR, ENOMEM);
 612  527                  return;
 613  528          }
 614  529  
 615  530          mp = mp1;
 616  531  
 617  532          /* Reset the message type in preparation for shipping it back. */
 618  533          DB_TYPE(mp) = M_PCPROTO;
 619  534  
 620  535          tbr = (struct T_bind_req *)mp->b_rptr;
 621  536          switch (tbr->ADDR_length) {
 622  537          case 0:                 /* Request for a generic port */
 623  538                  tbr->ADDR_offset = sizeof (struct T_bind_req);
 624  539                  if (connp->conn_family == AF_INET) {
 625  540                          tbr->ADDR_length = sizeof (sin_t);
 626  541                          sin = (sin_t *)&tbr[1];
 627  542                          *sin = sin_null;
 628  543                          sin->sin_family = AF_INET;
 629  544                          mp->b_wptr = (uchar_t *)&sin[1];
 630  545                          sa = (struct sockaddr *)sin;
 631  546                  } else {
 632  547                          ASSERT(connp->conn_family == AF_INET6);
 633  548                          tbr->ADDR_length = sizeof (sin6_t);
 634  549                          sin6 = (sin6_t *)&tbr[1];
 635  550                          *sin6 = sin6_null;
 636  551                          sin6->sin6_family = AF_INET6;
 637  552                          mp->b_wptr = (uchar_t *)&sin6[1];
 638  553                          sa = (struct sockaddr *)sin6;
 639  554                  }
 640  555                  break;
 641  556  
 642  557          case sizeof (sin_t):    /* Complete IPv4 address */
 643  558                  sa = (struct sockaddr *)mi_offset_param(mp, tbr->ADDR_offset,
 644  559                      sizeof (sin_t));
 645  560                  if (sa == NULL || !OK_32PTR((char *)sa)) {
 646  561                          udp_err_ack(q, mp, TSYSERR, EINVAL);
 647  562                          return;
 648  563                  }
 649  564                  if (connp->conn_family != AF_INET ||
 650  565                      sa->sa_family != AF_INET) {
 651  566                          udp_err_ack(q, mp, TSYSERR, EAFNOSUPPORT);
 652  567                          return;
 653  568                  }
 654  569                  break;
 655  570  
 656  571          case sizeof (sin6_t):   /* complete IPv6 address */
 657  572                  sa = (struct sockaddr *)mi_offset_param(mp, tbr->ADDR_offset,
 658  573                      sizeof (sin6_t));
 659  574                  if (sa == NULL || !OK_32PTR((char *)sa)) {
 660  575                          udp_err_ack(q, mp, TSYSERR, EINVAL);
 661  576                          return;
 662  577                  }
 663  578                  if (connp->conn_family != AF_INET6 ||
 664  579                      sa->sa_family != AF_INET6) {
 665  580                          udp_err_ack(q, mp, TSYSERR, EAFNOSUPPORT);
 666  581                          return;
 667  582                  }
 668  583                  break;
 669  584  
 670  585          default:                /* Invalid request */
 671  586                  (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE,
 672  587                      "udp_bind: bad ADDR_length length %u", tbr->ADDR_length);
 673  588                  udp_err_ack(q, mp, TBADADDR, 0);
 674  589                  return;
 675  590          }
 676  591  
 677  592          error = udp_do_bind(connp, sa, tbr->ADDR_length, cr,
 678  593              tbr->PRIM_type != O_T_BIND_REQ);
 679  594  
 680  595          if (error != 0) {
 681  596                  if (error > 0) {
 682  597                          udp_err_ack(q, mp, TSYSERR, error);
 683  598                  } else {
 684  599                          udp_err_ack(q, mp, -error, 0);
 685  600                  }
 686  601          } else {
 687  602                  tbr->PRIM_type = T_BIND_ACK;
 688  603                  qreply(q, mp);
 689  604          }
 690  605  }
 691  606  
 692  607  /*
 693  608   * This routine handles each T_CONN_REQ message passed to udp.  It
 694  609   * associates a default destination address with the stream.
 695  610   *
 696  611   * After various error checks are completed, udp_connect() lays
 697  612   * the target address and port into the composite header template.
 698  613   * Then we ask IP for information, including a source address if we didn't
 699  614   * already have one. Finally we send up the T_OK_ACK reply message.
 700  615   */
 701  616  static void
 702  617  udp_tpi_connect(queue_t *q, mblk_t *mp)
 703  618  {
 704  619          conn_t  *connp = Q_TO_CONN(q);
 705  620          int     error;
 706  621          socklen_t       len;
 707  622          struct sockaddr         *sa;
 708  623          struct T_conn_req       *tcr;
 709  624          cred_t          *cr;
 710  625          pid_t           pid;
 711  626          /*
 712  627           * All Solaris components should pass a db_credp
 713  628           * for this TPI message, hence we ASSERT.
 714  629           * But in case there is some other M_PROTO that looks
 715  630           * like a TPI message sent by some other kernel
 716  631           * component, we check and return an error.
 717  632           */
 718  633          cr = msg_getcred(mp, &pid);
 719  634          ASSERT(cr != NULL);
 720  635          if (cr == NULL) {
 721  636                  udp_err_ack(q, mp, TSYSERR, EINVAL);
 722  637                  return;
 723  638          }
 724  639  
 725  640          tcr = (struct T_conn_req *)mp->b_rptr;
 726  641  
 727  642          /* A bit of sanity checking */
 728  643          if ((mp->b_wptr - mp->b_rptr) < sizeof (struct T_conn_req)) {
 729  644                  udp_err_ack(q, mp, TPROTO, 0);
 730  645                  return;
 731  646          }
 732  647  
 733  648          if (tcr->OPT_length != 0) {
 734  649                  udp_err_ack(q, mp, TBADOPT, 0);
 735  650                  return;
 736  651          }
 737  652  
 738  653          /*
 739  654           * Determine packet type based on type of address passed in
 740  655           * the request should contain an IPv4 or IPv6 address.
 741  656           * Make sure that address family matches the type of
 742  657           * family of the address passed down.
 743  658           */
 744  659          len = tcr->DEST_length;
 745  660          switch (tcr->DEST_length) {
 746  661          default:
 747  662                  udp_err_ack(q, mp, TBADADDR, 0);
 748  663                  return;
 749  664  
 750  665          case sizeof (sin_t):
 751  666                  sa = (struct sockaddr *)mi_offset_param(mp, tcr->DEST_offset,
 752  667                      sizeof (sin_t));
 753  668                  break;
 754  669  
 755  670          case sizeof (sin6_t):
 756  671                  sa = (struct sockaddr *)mi_offset_param(mp, tcr->DEST_offset,
 757  672                      sizeof (sin6_t));
 758  673                  break;
 759  674          }
 760  675  
 761  676          error = proto_verify_ip_addr(connp->conn_family, sa, len);
 762  677          if (error != 0) {
 763  678                  udp_err_ack(q, mp, TSYSERR, error);
 764  679                  return;
 765  680          }
 766  681  
 767  682          error = udp_do_connect(connp, sa, len, cr, pid);
 768  683          if (error != 0) {
 769  684                  if (error < 0)
 770  685                          udp_err_ack(q, mp, -error, 0);
 771  686                  else
 772  687                          udp_err_ack(q, mp, TSYSERR, error);
 773  688          } else {
 774  689                  mblk_t  *mp1;
 775  690                  /*
 776  691                   * We have to send a connection confirmation to
 777  692                   * keep TLI happy.
 778  693                   */
 779  694                  if (connp->conn_family == AF_INET) {
 780  695                          mp1 = mi_tpi_conn_con(NULL, (char *)sa,
 781  696                              sizeof (sin_t), NULL, 0);
 782  697                  } else {
 783  698                          mp1 = mi_tpi_conn_con(NULL, (char *)sa,
 784  699                              sizeof (sin6_t), NULL, 0);
 785  700                  }
 786  701                  if (mp1 == NULL) {
 787  702                          udp_err_ack(q, mp, TSYSERR, ENOMEM);
 788  703                          return;
 789  704                  }
 790  705  
 791  706                  /*
 792  707                   * Send ok_ack for T_CONN_REQ
 793  708                   */
 794  709                  mp = mi_tpi_ok_ack_alloc(mp);
 795  710                  if (mp == NULL) {
 796  711                          /* Unable to reuse the T_CONN_REQ for the ack. */
 797  712                          udp_err_ack_prim(q, mp1, T_CONN_REQ, TSYSERR, ENOMEM);
 798  713                          return;
 799  714                  }
 800  715  
 801  716                  putnext(connp->conn_rq, mp);
 802  717                  putnext(connp->conn_rq, mp1);
 803  718          }
 804  719  }
 805  720  
 806  721  static int
 807  722  udp_tpi_close(queue_t *q, int flags)
 808  723  {
 809  724          conn_t  *connp;
 810  725  
 811  726          if (flags & SO_FALLBACK) {
 812  727                  /*
 813  728                   * stream is being closed while in fallback
 814  729                   * simply free the resources that were allocated
 815  730                   */
 816  731                  inet_minor_free(WR(q)->q_ptr, (dev_t)(RD(q)->q_ptr));
 817  732                  qprocsoff(q);
 818  733                  goto done;
 819  734          }
 820  735  
 821  736          connp = Q_TO_CONN(q);
 822  737          udp_do_close(connp);
 823  738  done:
 824  739          q->q_ptr = WR(q)->q_ptr = NULL;
 825  740          return (0);
 826  741  }
 827  742  
 828  743  static void
 829  744  udp_close_free(conn_t *connp)
 830  745  {
 831  746          udp_t *udp = connp->conn_udp;
 832  747  
 833  748          /* If there are any options associated with the stream, free them. */
 834  749          if (udp->udp_recv_ipp.ipp_fields != 0)
 835  750                  ip_pkt_free(&udp->udp_recv_ipp);
 836  751  
 837  752          /*
 838  753           * Clear any fields which the kmem_cache constructor clears.
 839  754           * Only udp_connp needs to be preserved.
 840  755           * TBD: We should make this more efficient to avoid clearing
 841  756           * everything.
 842  757           */
 843  758          ASSERT(udp->udp_connp == connp);
 844  759          bzero(udp, sizeof (udp_t));
 845  760          udp->udp_connp = connp;
 846  761  }
 847  762  
 848  763  static int
 849  764  udp_do_disconnect(conn_t *connp)
 850  765  {
 851  766          udp_t   *udp;
 852  767          udp_fanout_t *udpf;
 853  768          udp_stack_t *us;
 854  769          int     error;
 855  770  
 856  771          udp = connp->conn_udp;
 857  772          us = udp->udp_us;
 858  773          mutex_enter(&connp->conn_lock);
 859  774          if (udp->udp_state != TS_DATA_XFER) {
 860  775                  mutex_exit(&connp->conn_lock);
 861  776                  return (-TOUTSTATE);
 862  777          }
 863  778          udpf = &us->us_bind_fanout[UDP_BIND_HASH(connp->conn_lport,
 864  779              us->us_bind_fanout_size)];
 865  780          mutex_enter(&udpf->uf_lock);
 866  781          if (connp->conn_mcbc_bind)
 867  782                  connp->conn_saddr_v6 = ipv6_all_zeros;
 868  783          else
 869  784                  connp->conn_saddr_v6 = connp->conn_bound_addr_v6;
 870  785          connp->conn_laddr_v6 = connp->conn_bound_addr_v6;
 871  786          connp->conn_faddr_v6 = ipv6_all_zeros;
 872  787          connp->conn_fport = 0;
 873  788          udp->udp_state = TS_IDLE;
 874  789          mutex_exit(&udpf->uf_lock);
 875  790  
 876  791          /* Remove any remnants of mapped address binding */
 877  792          if (connp->conn_family == AF_INET6)
 878  793                  connp->conn_ipversion = IPV6_VERSION;
 879  794  
 880  795          connp->conn_v6lastdst = ipv6_all_zeros;
 881  796          error = udp_build_hdr_template(connp, &connp->conn_saddr_v6,
 882  797              &connp->conn_faddr_v6, connp->conn_fport, connp->conn_flowinfo);
 883  798          mutex_exit(&connp->conn_lock);
 884  799          if (error != 0)
 885  800                  return (error);
 886  801  
 887  802          /*
 888  803           * Tell IP to remove the full binding and revert
 889  804           * to the local address binding.
 890  805           */
 891  806          return (ip_laddr_fanout_insert(connp));
 892  807  }
 893  808  
 894  809  static void
 895  810  udp_tpi_disconnect(queue_t *q, mblk_t *mp)
 896  811  {
 897  812          conn_t  *connp = Q_TO_CONN(q);
 898  813          int     error;
 899  814  
 900  815          /*
 901  816           * Allocate the largest primitive we need to send back
 902  817           * T_error_ack is > than T_ok_ack
 903  818           */
 904  819          mp = reallocb(mp, sizeof (struct T_error_ack), 1);
 905  820          if (mp == NULL) {
 906  821                  /* Unable to reuse the T_DISCON_REQ for the ack. */
 907  822                  udp_err_ack_prim(q, mp, T_DISCON_REQ, TSYSERR, ENOMEM);
 908  823                  return;
 909  824          }
 910  825  
 911  826          error = udp_do_disconnect(connp);
 912  827  
 913  828          if (error != 0) {
 914  829                  if (error < 0) {
 915  830                          udp_err_ack(q, mp, -error, 0);
 916  831                  } else {
 917  832                          udp_err_ack(q, mp, TSYSERR, error);
 918  833                  }
 919  834          } else {
 920  835                  mp = mi_tpi_ok_ack_alloc(mp);
 921  836                  ASSERT(mp != NULL);
 922  837                  qreply(q, mp);
 923  838          }
 924  839  }
 925  840  
 926  841  int
 927  842  udp_disconnect(conn_t *connp)
 928  843  {
 929  844          int error;
 930  845  
 931  846          connp->conn_dgram_errind = B_FALSE;
 932  847          error = udp_do_disconnect(connp);
 933  848          if (error < 0)
 934  849                  error = proto_tlitosyserr(-error);
 935  850  
 936  851          return (error);
 937  852  }
 938  853  
 939  854  /* This routine creates a T_ERROR_ACK message and passes it upstream. */
 940  855  static void
 941  856  udp_err_ack(queue_t *q, mblk_t *mp, t_scalar_t t_error, int sys_error)
 942  857  {
 943  858          if ((mp = mi_tpi_err_ack_alloc(mp, t_error, sys_error)) != NULL)
 944  859                  qreply(q, mp);
 945  860  }
 946  861  
 947  862  /* Shorthand to generate and send TPI error acks to our client */
 948  863  static void
 949  864  udp_err_ack_prim(queue_t *q, mblk_t *mp, t_scalar_t primitive,
 950  865      t_scalar_t t_error, int sys_error)
 951  866  {
 952  867          struct T_error_ack      *teackp;
 953  868  
 954  869          if ((mp = tpi_ack_alloc(mp, sizeof (struct T_error_ack),
 955  870              M_PCPROTO, T_ERROR_ACK)) != NULL) {
 956  871                  teackp = (struct T_error_ack *)mp->b_rptr;
 957  872                  teackp->ERROR_prim = primitive;
 958  873                  teackp->TLI_error = t_error;
 959  874                  teackp->UNIX_error = sys_error;
 960  875                  qreply(q, mp);
 961  876          }
 962  877  }
 963  878  
 964  879  /* At minimum we need 4 bytes of UDP header */
 965  880  #define ICMP_MIN_UDP_HDR        4
 966  881  
 967  882  /*
 968  883   * udp_icmp_input is called as conn_recvicmp to process ICMP messages.
 969  884   * Generates the appropriate T_UDERROR_IND for permanent (non-transient) errors.
 970  885   * Assumes that IP has pulled up everything up to and including the ICMP header.
 971  886   */
 972  887  /* ARGSUSED2 */
 973  888  static void
 974  889  udp_icmp_input(void *arg1, mblk_t *mp, void *arg2, ip_recv_attr_t *ira)
 975  890  {
 976  891          conn_t          *connp = (conn_t *)arg1;
 977  892          icmph_t         *icmph;
 978  893          ipha_t          *ipha;
 979  894          int             iph_hdr_length;
 980  895          udpha_t         *udpha;
 981  896          sin_t           sin;
 982  897          sin6_t          sin6;
 983  898          mblk_t          *mp1;
 984  899          int             error = 0;
 985  900          udp_t           *udp = connp->conn_udp;
 986  901  
 987  902          ipha = (ipha_t *)mp->b_rptr;
 988  903  
 989  904          ASSERT(OK_32PTR(mp->b_rptr));
 990  905  
 991  906          if (IPH_HDR_VERSION(ipha) != IPV4_VERSION) {
 992  907                  ASSERT(IPH_HDR_VERSION(ipha) == IPV6_VERSION);
 993  908                  udp_icmp_error_ipv6(connp, mp, ira);
 994  909                  return;
 995  910          }
 996  911          ASSERT(IPH_HDR_VERSION(ipha) == IPV4_VERSION);
 997  912  
 998  913          /* Skip past the outer IP and ICMP headers */
 999  914          ASSERT(IPH_HDR_LENGTH(ipha) == ira->ira_ip_hdr_length);
1000  915          iph_hdr_length = ira->ira_ip_hdr_length;
1001  916          icmph = (icmph_t *)&mp->b_rptr[iph_hdr_length];
1002  917          ipha = (ipha_t *)&icmph[1];     /* Inner IP header */
1003  918  
1004  919          /* Skip past the inner IP and find the ULP header */
1005  920          iph_hdr_length = IPH_HDR_LENGTH(ipha);
1006  921          udpha = (udpha_t *)((char *)ipha + iph_hdr_length);
1007  922  
1008  923          switch (icmph->icmph_type) {
1009  924          case ICMP_DEST_UNREACHABLE:
1010  925                  switch (icmph->icmph_code) {
1011  926                  case ICMP_FRAGMENTATION_NEEDED: {
1012  927                          ipha_t          *ipha;
1013  928                          ip_xmit_attr_t  *ixa;
1014  929                          /*
1015  930                           * IP has already adjusted the path MTU.
1016  931                           * But we need to adjust DF for IPv4.
1017  932                           */
1018  933                          if (connp->conn_ipversion != IPV4_VERSION)
1019  934                                  break;
1020  935  
1021  936                          ixa = conn_get_ixa(connp, B_FALSE);
1022  937                          if (ixa == NULL || ixa->ixa_ire == NULL) {
1023  938                                  /*
1024  939                                   * Some other thread holds conn_ixa. We will
1025  940                                   * redo this on the next ICMP too big.
1026  941                                   */
1027  942                                  if (ixa != NULL)
1028  943                                          ixa_refrele(ixa);
1029  944                                  break;
1030  945                          }
1031  946                          (void) ip_get_pmtu(ixa);
1032  947  
1033  948                          mutex_enter(&connp->conn_lock);
1034  949                          ipha = (ipha_t *)connp->conn_ht_iphc;
1035  950                          if (ixa->ixa_flags & IXAF_PMTU_IPV4_DF) {
1036  951                                  ipha->ipha_fragment_offset_and_flags |=
1037  952                                      IPH_DF_HTONS;
1038  953                          } else {
1039  954                                  ipha->ipha_fragment_offset_and_flags &=
1040  955                                      ~IPH_DF_HTONS;
1041  956                          }
1042  957                          mutex_exit(&connp->conn_lock);
1043  958                          ixa_refrele(ixa);
1044  959                          break;
1045  960                  }
1046  961                  case ICMP_PORT_UNREACHABLE:
1047  962                  case ICMP_PROTOCOL_UNREACHABLE:
1048  963                          error = ECONNREFUSED;
1049  964                          break;
1050  965                  default:
1051  966                          /* Transient errors */
1052  967                          break;
1053  968                  }
1054  969                  break;
1055  970          default:
1056  971                  /* Transient errors */
1057  972                  break;
1058  973          }
1059  974          if (error == 0) {
1060  975                  freemsg(mp);
1061  976                  return;
1062  977          }
1063  978  
1064  979          /*
1065  980           * Deliver T_UDERROR_IND when the application has asked for it.
1066  981           * The socket layer enables this automatically when connected.
1067  982           */
1068  983          if (!connp->conn_dgram_errind) {
1069  984                  freemsg(mp);
1070  985                  return;
1071  986          }
1072  987  
1073  988          switch (connp->conn_family) {
1074  989          case AF_INET:
1075  990                  sin = sin_null;
1076  991                  sin.sin_family = AF_INET;
1077  992                  sin.sin_addr.s_addr = ipha->ipha_dst;
1078  993                  sin.sin_port = udpha->uha_dst_port;
1079  994                  if (IPCL_IS_NONSTR(connp)) {
1080  995                          mutex_enter(&connp->conn_lock);
1081  996                          if (udp->udp_state == TS_DATA_XFER) {
1082  997                                  if (sin.sin_port == connp->conn_fport &&
1083  998                                      sin.sin_addr.s_addr ==
1084  999                                      connp->conn_faddr_v4) {
1085 1000                                          mutex_exit(&connp->conn_lock);
1086 1001                                          (*connp->conn_upcalls->su_set_error)
1087 1002                                              (connp->conn_upper_handle, error);
1088 1003                                          goto done;
1089 1004                                  }
1090 1005                          } else {
1091 1006                                  udp->udp_delayed_error = error;
1092 1007                                  *((sin_t *)&udp->udp_delayed_addr) = sin;
1093 1008                          }
1094 1009                          mutex_exit(&connp->conn_lock);
1095 1010                  } else {
1096 1011                          mp1 = mi_tpi_uderror_ind((char *)&sin, sizeof (sin_t),
1097 1012                              NULL, 0, error);
1098 1013                          if (mp1 != NULL)
1099 1014                                  putnext(connp->conn_rq, mp1);
1100 1015                  }
1101 1016                  break;
1102 1017          case AF_INET6:
1103 1018                  sin6 = sin6_null;
1104 1019                  sin6.sin6_family = AF_INET6;
1105 1020                  IN6_IPADDR_TO_V4MAPPED(ipha->ipha_dst, &sin6.sin6_addr);
1106 1021                  sin6.sin6_port = udpha->uha_dst_port;
1107 1022                  if (IPCL_IS_NONSTR(connp)) {
1108 1023                          mutex_enter(&connp->conn_lock);
1109 1024                          if (udp->udp_state == TS_DATA_XFER) {
1110 1025                                  if (sin6.sin6_port == connp->conn_fport &&
1111 1026                                      IN6_ARE_ADDR_EQUAL(&sin6.sin6_addr,
1112 1027                                      &connp->conn_faddr_v6)) {
1113 1028                                          mutex_exit(&connp->conn_lock);
1114 1029                                          (*connp->conn_upcalls->su_set_error)
1115 1030                                              (connp->conn_upper_handle, error);
1116 1031                                          goto done;
1117 1032                                  }
1118 1033                          } else {
1119 1034                                  udp->udp_delayed_error = error;
1120 1035                                  *((sin6_t *)&udp->udp_delayed_addr) = sin6;
1121 1036                          }
1122 1037                          mutex_exit(&connp->conn_lock);
1123 1038                  } else {
1124 1039                          mp1 = mi_tpi_uderror_ind((char *)&sin6, sizeof (sin6_t),
1125 1040                              NULL, 0, error);
1126 1041                          if (mp1 != NULL)
1127 1042                                  putnext(connp->conn_rq, mp1);
1128 1043                  }
1129 1044                  break;
1130 1045          }
1131 1046  done:
1132 1047          freemsg(mp);
1133 1048  }
1134 1049  
1135 1050  /*
1136 1051   * udp_icmp_error_ipv6 is called by udp_icmp_error to process ICMP for IPv6.
1137 1052   * Generates the appropriate T_UDERROR_IND for permanent (non-transient) errors.
1138 1053   * Assumes that IP has pulled up all the extension headers as well as the
1139 1054   * ICMPv6 header.
1140 1055   */
1141 1056  static void
1142 1057  udp_icmp_error_ipv6(conn_t *connp, mblk_t *mp, ip_recv_attr_t *ira)
1143 1058  {
1144 1059          icmp6_t         *icmp6;
1145 1060          ip6_t           *ip6h, *outer_ip6h;
1146 1061          uint16_t        iph_hdr_length;
1147 1062          uint8_t         *nexthdrp;
1148 1063          udpha_t         *udpha;
1149 1064          sin6_t          sin6;
1150 1065          mblk_t          *mp1;
1151 1066          int             error = 0;
1152 1067          udp_t           *udp = connp->conn_udp;
1153 1068          udp_stack_t     *us = udp->udp_us;
1154 1069  
1155 1070          outer_ip6h = (ip6_t *)mp->b_rptr;
1156 1071  #ifdef DEBUG
1157 1072          if (outer_ip6h->ip6_nxt != IPPROTO_ICMPV6)
1158 1073                  iph_hdr_length = ip_hdr_length_v6(mp, outer_ip6h);
1159 1074          else
1160 1075                  iph_hdr_length = IPV6_HDR_LEN;
1161 1076          ASSERT(iph_hdr_length == ira->ira_ip_hdr_length);
1162 1077  #endif
1163 1078          /* Skip past the outer IP and ICMP headers */
1164 1079          iph_hdr_length = ira->ira_ip_hdr_length;
1165 1080          icmp6 = (icmp6_t *)&mp->b_rptr[iph_hdr_length];
1166 1081  
1167 1082          /* Skip past the inner IP and find the ULP header */
1168 1083          ip6h = (ip6_t *)&icmp6[1];      /* Inner IP header */
1169 1084          if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &iph_hdr_length, &nexthdrp)) {
1170 1085                  freemsg(mp);
1171 1086                  return;
1172 1087          }
1173 1088          udpha = (udpha_t *)((char *)ip6h + iph_hdr_length);
1174 1089  
1175 1090          switch (icmp6->icmp6_type) {
1176 1091          case ICMP6_DST_UNREACH:
1177 1092                  switch (icmp6->icmp6_code) {
1178 1093                  case ICMP6_DST_UNREACH_NOPORT:
1179 1094                          error = ECONNREFUSED;
1180 1095                          break;
1181 1096                  case ICMP6_DST_UNREACH_ADMIN:
1182 1097                  case ICMP6_DST_UNREACH_NOROUTE:
1183 1098                  case ICMP6_DST_UNREACH_BEYONDSCOPE:
1184 1099                  case ICMP6_DST_UNREACH_ADDR:
1185 1100                          /* Transient errors */
1186 1101                          break;
1187 1102                  default:
1188 1103                          break;
1189 1104                  }
1190 1105                  break;
1191 1106          case ICMP6_PACKET_TOO_BIG: {
1192 1107                  struct T_unitdata_ind   *tudi;
1193 1108                  struct T_opthdr         *toh;
1194 1109                  size_t                  udi_size;
1195 1110                  mblk_t                  *newmp;
1196 1111                  t_scalar_t              opt_length = sizeof (struct T_opthdr) +
1197 1112                      sizeof (struct ip6_mtuinfo);
1198 1113                  sin6_t                  *sin6;
1199 1114                  struct ip6_mtuinfo      *mtuinfo;
1200 1115  
1201 1116                  /*
1202 1117                   * If the application has requested to receive path mtu
1203 1118                   * information, send up an empty message containing an
1204 1119                   * IPV6_PATHMTU ancillary data item.
1205 1120                   */
1206 1121                  if (!connp->conn_ipv6_recvpathmtu)
1207 1122                          break;
1208 1123  
1209 1124                  udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin6_t) +
1210 1125                      opt_length;
1211 1126                  if ((newmp = allocb(udi_size, BPRI_MED)) == NULL) {
1212 1127                          UDPS_BUMP_MIB(us, udpInErrors);
1213 1128                          break;
1214 1129                  }
1215 1130  
1216 1131                  /*
1217 1132                   * newmp->b_cont is left to NULL on purpose.  This is an
1218 1133                   * empty message containing only ancillary data.
1219 1134                   */
1220 1135                  newmp->b_datap->db_type = M_PROTO;
1221 1136                  tudi = (struct T_unitdata_ind *)newmp->b_rptr;
1222 1137                  newmp->b_wptr = (uchar_t *)tudi + udi_size;
1223 1138                  tudi->PRIM_type = T_UNITDATA_IND;
1224 1139                  tudi->SRC_length = sizeof (sin6_t);
1225 1140                  tudi->SRC_offset = sizeof (struct T_unitdata_ind);
1226 1141                  tudi->OPT_offset = tudi->SRC_offset + sizeof (sin6_t);
1227 1142                  tudi->OPT_length = opt_length;
1228 1143  
1229 1144                  sin6 = (sin6_t *)&tudi[1];
1230 1145                  bzero(sin6, sizeof (sin6_t));
1231 1146                  sin6->sin6_family = AF_INET6;
1232 1147                  sin6->sin6_addr = connp->conn_faddr_v6;
1233 1148  
1234 1149                  toh = (struct T_opthdr *)&sin6[1];
1235 1150                  toh->level = IPPROTO_IPV6;
1236 1151                  toh->name = IPV6_PATHMTU;
1237 1152                  toh->len = opt_length;
1238 1153                  toh->status = 0;
1239 1154  
1240 1155                  mtuinfo = (struct ip6_mtuinfo *)&toh[1];
1241 1156                  bzero(mtuinfo, sizeof (struct ip6_mtuinfo));
1242 1157                  mtuinfo->ip6m_addr.sin6_family = AF_INET6;
1243 1158                  mtuinfo->ip6m_addr.sin6_addr = ip6h->ip6_dst;
1244 1159                  mtuinfo->ip6m_mtu = icmp6->icmp6_mtu;
1245 1160                  /*
1246 1161                   * We've consumed everything we need from the original
1247 1162                   * message.  Free it, then send our empty message.
1248 1163                   */
1249 1164                  freemsg(mp);
1250 1165                  udp_ulp_recv(connp, newmp, msgdsize(newmp), ira);
1251 1166                  return;
1252 1167          }
1253 1168          case ICMP6_TIME_EXCEEDED:
1254 1169                  /* Transient errors */
1255 1170                  break;
1256 1171          case ICMP6_PARAM_PROB:
1257 1172                  /* If this corresponds to an ICMP_PROTOCOL_UNREACHABLE */
1258 1173                  if (icmp6->icmp6_code == ICMP6_PARAMPROB_NEXTHEADER &&
1259 1174                      (uchar_t *)ip6h + icmp6->icmp6_pptr ==
1260 1175                      (uchar_t *)nexthdrp) {
1261 1176                          error = ECONNREFUSED;
1262 1177                          break;
1263 1178                  }
1264 1179                  break;
1265 1180          }
1266 1181          if (error == 0) {
1267 1182                  freemsg(mp);
1268 1183                  return;
1269 1184          }
1270 1185  
1271 1186          /*
1272 1187           * Deliver T_UDERROR_IND when the application has asked for it.
1273 1188           * The socket layer enables this automatically when connected.
1274 1189           */
1275 1190          if (!connp->conn_dgram_errind) {
1276 1191                  freemsg(mp);
1277 1192                  return;
1278 1193          }
1279 1194  
1280 1195          sin6 = sin6_null;
1281 1196          sin6.sin6_family = AF_INET6;
1282 1197          sin6.sin6_addr = ip6h->ip6_dst;
1283 1198          sin6.sin6_port = udpha->uha_dst_port;
1284 1199          sin6.sin6_flowinfo = ip6h->ip6_vcf & ~IPV6_VERS_AND_FLOW_MASK;
1285 1200  
1286 1201          if (IPCL_IS_NONSTR(connp)) {
1287 1202                  mutex_enter(&connp->conn_lock);
1288 1203                  if (udp->udp_state == TS_DATA_XFER) {
1289 1204                          if (sin6.sin6_port == connp->conn_fport &&
1290 1205                              IN6_ARE_ADDR_EQUAL(&sin6.sin6_addr,
1291 1206                              &connp->conn_faddr_v6)) {
1292 1207                                  mutex_exit(&connp->conn_lock);
1293 1208                                  (*connp->conn_upcalls->su_set_error)
1294 1209                                      (connp->conn_upper_handle, error);
1295 1210                                  goto done;
1296 1211                          }
1297 1212                  } else {
1298 1213                          udp->udp_delayed_error = error;
1299 1214                          *((sin6_t *)&udp->udp_delayed_addr) = sin6;
1300 1215                  }
1301 1216                  mutex_exit(&connp->conn_lock);
1302 1217          } else {
1303 1218                  mp1 = mi_tpi_uderror_ind((char *)&sin6, sizeof (sin6_t),
1304 1219                      NULL, 0, error);
1305 1220                  if (mp1 != NULL)
1306 1221                          putnext(connp->conn_rq, mp1);
1307 1222          }
1308 1223  done:
1309 1224          freemsg(mp);
1310 1225  }
1311 1226  
1312 1227  /*
1313 1228   * This routine responds to T_ADDR_REQ messages.  It is called by udp_wput.
1314 1229   * The local address is filled in if endpoint is bound. The remote address
1315 1230   * is filled in if remote address has been precified ("connected endpoint")
1316 1231   * (The concept of connected CLTS sockets is alien to published TPI
1317 1232   *  but we support it anyway).
1318 1233   */
1319 1234  static void
1320 1235  udp_addr_req(queue_t *q, mblk_t *mp)
1321 1236  {
1322 1237          struct sockaddr *sa;
1323 1238          mblk_t  *ackmp;
1324 1239          struct T_addr_ack *taa;
1325 1240          udp_t   *udp = Q_TO_UDP(q);
1326 1241          conn_t  *connp = udp->udp_connp;
1327 1242          uint_t  addrlen;
1328 1243  
1329 1244          /* Make it large enough for worst case */
1330 1245          ackmp = reallocb(mp, sizeof (struct T_addr_ack) +
1331 1246              2 * sizeof (sin6_t), 1);
1332 1247          if (ackmp == NULL) {
1333 1248                  udp_err_ack(q, mp, TSYSERR, ENOMEM);
1334 1249                  return;
1335 1250          }
1336 1251          taa = (struct T_addr_ack *)ackmp->b_rptr;
1337 1252  
1338 1253          bzero(taa, sizeof (struct T_addr_ack));
1339 1254          ackmp->b_wptr = (uchar_t *)&taa[1];
1340 1255  
1341 1256          taa->PRIM_type = T_ADDR_ACK;
1342 1257          ackmp->b_datap->db_type = M_PCPROTO;
1343 1258  
1344 1259          if (connp->conn_family == AF_INET)
1345 1260                  addrlen = sizeof (sin_t);
1346 1261          else
1347 1262                  addrlen = sizeof (sin6_t);
1348 1263  
1349 1264          mutex_enter(&connp->conn_lock);
1350 1265          /*
1351 1266           * Note: Following code assumes 32 bit alignment of basic
1352 1267           * data structures like sin_t and struct T_addr_ack.
1353 1268           */
1354 1269          if (udp->udp_state != TS_UNBND) {
1355 1270                  /*
1356 1271                   * Fill in local address first
1357 1272                   */
1358 1273                  taa->LOCADDR_offset = sizeof (*taa);
1359 1274                  taa->LOCADDR_length = addrlen;
1360 1275                  sa = (struct sockaddr *)&taa[1];
1361 1276                  (void) conn_getsockname(connp, sa, &addrlen);
1362 1277                  ackmp->b_wptr += addrlen;
1363 1278          }
1364 1279          if (udp->udp_state == TS_DATA_XFER) {
1365 1280                  /*
1366 1281                   * connected, fill remote address too
1367 1282                   */
1368 1283                  taa->REMADDR_length = addrlen;
1369 1284                  /* assumed 32-bit alignment */
1370 1285                  taa->REMADDR_offset = taa->LOCADDR_offset + taa->LOCADDR_length;
1371 1286                  sa = (struct sockaddr *)(ackmp->b_rptr + taa->REMADDR_offset);
1372 1287                  (void) conn_getpeername(connp, sa, &addrlen);
1373 1288                  ackmp->b_wptr += addrlen;
1374 1289          }
1375 1290          mutex_exit(&connp->conn_lock);
1376 1291          ASSERT(ackmp->b_wptr <= ackmp->b_datap->db_lim);
1377 1292          qreply(q, ackmp);
1378 1293  }
1379 1294  
1380 1295  static void
1381 1296  udp_copy_info(struct T_info_ack *tap, udp_t *udp)
1382 1297  {
1383 1298          conn_t          *connp = udp->udp_connp;
1384 1299  
1385 1300          if (connp->conn_family == AF_INET) {
1386 1301                  *tap = udp_g_t_info_ack_ipv4;
1387 1302          } else {
1388 1303                  *tap = udp_g_t_info_ack_ipv6;
1389 1304          }
1390 1305          tap->CURRENT_state = udp->udp_state;
1391 1306          tap->OPT_size = udp_max_optsize;
1392 1307  }
1393 1308  
1394 1309  static void
1395 1310  udp_do_capability_ack(udp_t *udp, struct T_capability_ack *tcap,
1396 1311      t_uscalar_t cap_bits1)
1397 1312  {
1398 1313          tcap->CAP_bits1 = 0;
1399 1314  
1400 1315          if (cap_bits1 & TC1_INFO) {
1401 1316                  udp_copy_info(&tcap->INFO_ack, udp);
1402 1317                  tcap->CAP_bits1 |= TC1_INFO;
1403 1318          }
1404 1319  }
1405 1320  
1406 1321  /*
1407 1322   * This routine responds to T_CAPABILITY_REQ messages.  It is called by
1408 1323   * udp_wput.  Much of the T_CAPABILITY_ACK information is copied from
1409 1324   * udp_g_t_info_ack.  The current state of the stream is copied from
1410 1325   * udp_state.
1411 1326   */
1412 1327  static void
1413 1328  udp_capability_req(queue_t *q, mblk_t *mp)
1414 1329  {
1415 1330          t_uscalar_t             cap_bits1;
1416 1331          struct T_capability_ack *tcap;
1417 1332          udp_t   *udp = Q_TO_UDP(q);
1418 1333  
1419 1334          cap_bits1 = ((struct T_capability_req *)mp->b_rptr)->CAP_bits1;
1420 1335  
1421 1336          mp = tpi_ack_alloc(mp, sizeof (struct T_capability_ack),
1422 1337              mp->b_datap->db_type, T_CAPABILITY_ACK);
1423 1338          if (!mp)
1424 1339                  return;
1425 1340  
1426 1341          tcap = (struct T_capability_ack *)mp->b_rptr;
1427 1342          udp_do_capability_ack(udp, tcap, cap_bits1);
1428 1343  
1429 1344          qreply(q, mp);
1430 1345  }
1431 1346  
1432 1347  /*
1433 1348   * This routine responds to T_INFO_REQ messages.  It is called by udp_wput.
1434 1349   * Most of the T_INFO_ACK information is copied from udp_g_t_info_ack.
1435 1350   * The current state of the stream is copied from udp_state.
1436 1351   */
1437 1352  static void
1438 1353  udp_info_req(queue_t *q, mblk_t *mp)
1439 1354  {
1440 1355          udp_t *udp = Q_TO_UDP(q);
1441 1356  
1442 1357          /* Create a T_INFO_ACK message. */
1443 1358          mp = tpi_ack_alloc(mp, sizeof (struct T_info_ack), M_PCPROTO,
1444 1359              T_INFO_ACK);
1445 1360          if (!mp)
1446 1361                  return;
1447 1362          udp_copy_info((struct T_info_ack *)mp->b_rptr, udp);
1448 1363          qreply(q, mp);
1449 1364  }
1450 1365  
1451 1366  /* For /dev/udp aka AF_INET open */
1452 1367  static int
1453 1368  udp_openv4(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp)
1454 1369  {
1455 1370          return (udp_open(q, devp, flag, sflag, credp, B_FALSE));
1456 1371  }
1457 1372  
1458 1373  /* For /dev/udp6 aka AF_INET6 open */
1459 1374  static int
1460 1375  udp_openv6(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp)
1461 1376  {
1462 1377          return (udp_open(q, devp, flag, sflag, credp, B_TRUE));
1463 1378  }
1464 1379  
1465 1380  /*
1466 1381   * This is the open routine for udp.  It allocates a udp_t structure for
1467 1382   * the stream and, on the first open of the module, creates an ND table.
1468 1383   */
1469 1384  static int
1470 1385  udp_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp,
1471 1386      boolean_t isv6)
1472 1387  {
1473 1388          udp_t           *udp;
1474 1389          conn_t          *connp;
1475 1390          dev_t           conn_dev;
1476 1391          vmem_t          *minor_arena;
1477 1392          int             err;
1478 1393  
1479 1394          /* If the stream is already open, return immediately. */
1480 1395          if (q->q_ptr != NULL)
1481 1396                  return (0);
1482 1397  
1483 1398          if (sflag == MODOPEN)
1484 1399                  return (EINVAL);
1485 1400  
1486 1401          if ((ip_minor_arena_la != NULL) && (flag & SO_SOCKSTR) &&
1487 1402              ((conn_dev = inet_minor_alloc(ip_minor_arena_la)) != 0)) {
1488 1403                  minor_arena = ip_minor_arena_la;
1489 1404          } else {
1490 1405                  /*
1491 1406                   * Either minor numbers in the large arena were exhausted
1492 1407                   * or a non socket application is doing the open.
1493 1408                   * Try to allocate from the small arena.
1494 1409                   */
1495 1410                  if ((conn_dev = inet_minor_alloc(ip_minor_arena_sa)) == 0)
1496 1411                          return (EBUSY);
1497 1412  
1498 1413                  minor_arena = ip_minor_arena_sa;
1499 1414          }
1500 1415  
1501 1416          if (flag & SO_FALLBACK) {
1502 1417                  /*
1503 1418                   * Non streams socket needs a stream to fallback to
1504 1419                   */
1505 1420                  RD(q)->q_ptr = (void *)conn_dev;
1506 1421                  WR(q)->q_qinfo = &udp_fallback_sock_winit;
1507 1422                  WR(q)->q_ptr = (void *)minor_arena;
1508 1423                  qprocson(q);
1509 1424                  return (0);
1510 1425          }
1511 1426  
1512 1427          connp = udp_do_open(credp, isv6, KM_SLEEP, &err);
1513 1428          if (connp == NULL) {
1514 1429                  inet_minor_free(minor_arena, conn_dev);
1515 1430                  return (err);
1516 1431          }
1517 1432          udp = connp->conn_udp;
1518 1433  
1519 1434          *devp = makedevice(getemajor(*devp), (minor_t)conn_dev);
1520 1435          connp->conn_dev = conn_dev;
1521 1436          connp->conn_minor_arena = minor_arena;
1522 1437  
1523 1438          /*
1524 1439           * Initialize the udp_t structure for this stream.
1525 1440           */
1526 1441          q->q_ptr = connp;
1527 1442          WR(q)->q_ptr = connp;
1528 1443          connp->conn_rq = q;
1529 1444          connp->conn_wq = WR(q);
1530 1445  
1531 1446          /*
1532 1447           * Since this conn_t/udp_t is not yet visible to anybody else we don't
1533 1448           * need to lock anything.
1534 1449           */
1535 1450          ASSERT(connp->conn_proto == IPPROTO_UDP);
1536 1451          ASSERT(connp->conn_udp == udp);
1537 1452          ASSERT(udp->udp_connp == connp);
1538 1453  
1539 1454          if (flag & SO_SOCKSTR) {
1540 1455                  udp->udp_issocket = B_TRUE;
1541 1456          }
1542 1457  
1543 1458          WR(q)->q_hiwat = connp->conn_sndbuf;
1544 1459          WR(q)->q_lowat = connp->conn_sndlowat;
1545 1460  
1546 1461          qprocson(q);
1547 1462  
1548 1463          /* Set the Stream head write offset and high watermark. */
1549 1464          (void) proto_set_tx_wroff(q, connp, connp->conn_wroff);
1550 1465          (void) proto_set_rx_hiwat(q, connp,
1551 1466              udp_set_rcv_hiwat(udp, connp->conn_rcvbuf));
1552 1467  
1553 1468          mutex_enter(&connp->conn_lock);
1554 1469          connp->conn_state_flags &= ~CONN_INCIPIENT;
1555 1470          mutex_exit(&connp->conn_lock);
1556 1471          return (0);
1557 1472  }
1558 1473  
1559 1474  /*
1560 1475   * Which UDP options OK to set through T_UNITDATA_REQ...
1561 1476   */
1562 1477  /* ARGSUSED */
1563 1478  static boolean_t
1564 1479  udp_opt_allow_udr_set(t_scalar_t level, t_scalar_t name)
1565 1480  {
1566 1481          return (B_TRUE);
1567 1482  }
1568 1483  
1569 1484  /*
1570 1485   * This routine gets default values of certain options whose default
1571 1486   * values are maintained by protcol specific code
1572 1487   */
1573 1488  int
1574 1489  udp_opt_default(queue_t *q, t_scalar_t level, t_scalar_t name, uchar_t *ptr)
1575 1490  {
1576 1491          udp_t           *udp = Q_TO_UDP(q);
1577 1492          udp_stack_t *us = udp->udp_us;
1578 1493          int *i1 = (int *)ptr;
1579 1494  
1580 1495          switch (level) {
1581 1496          case IPPROTO_IP:
1582 1497                  switch (name) {
1583 1498                  case IP_MULTICAST_TTL:
1584 1499                          *ptr = (uchar_t)IP_DEFAULT_MULTICAST_TTL;
1585 1500                          return (sizeof (uchar_t));
1586 1501                  case IP_MULTICAST_LOOP:
1587 1502                          *ptr = (uchar_t)IP_DEFAULT_MULTICAST_LOOP;
1588 1503                          return (sizeof (uchar_t));
1589 1504                  }
1590 1505                  break;
1591 1506          case IPPROTO_IPV6:
1592 1507                  switch (name) {
1593 1508                  case IPV6_MULTICAST_HOPS:
1594 1509                          *i1 = IP_DEFAULT_MULTICAST_TTL;
1595 1510                          return (sizeof (int));
1596 1511                  case IPV6_MULTICAST_LOOP:
1597 1512                          *i1 = IP_DEFAULT_MULTICAST_LOOP;
1598 1513                          return (sizeof (int));
1599 1514                  case IPV6_UNICAST_HOPS:
1600 1515                          *i1 = us->us_ipv6_hoplimit;
1601 1516                          return (sizeof (int));
1602 1517                  }
1603 1518                  break;
1604 1519          }
1605 1520          return (-1);
1606 1521  }
1607 1522  
1608 1523  /*
1609 1524   * This routine retrieves the current status of socket options.
1610 1525   * It returns the size of the option retrieved, or -1.
1611 1526   */
1612 1527  int
1613 1528  udp_opt_get(conn_t *connp, t_scalar_t level, t_scalar_t name,
1614 1529      uchar_t *ptr)
1615 1530  {
1616 1531          int             *i1 = (int *)ptr;
1617 1532          udp_t           *udp = connp->conn_udp;
1618 1533          int             len;
1619 1534          conn_opt_arg_t  coas;
1620 1535          int             retval;
1621 1536  
1622 1537          coas.coa_connp = connp;
1623 1538          coas.coa_ixa = connp->conn_ixa;
1624 1539          coas.coa_ipp = &connp->conn_xmit_ipp;
1625 1540          coas.coa_ancillary = B_FALSE;
1626 1541          coas.coa_changed = 0;
1627 1542  
1628 1543          /*
1629 1544           * We assume that the optcom framework has checked for the set
1630 1545           * of levels and names that are supported, hence we don't worry
1631 1546           * about rejecting based on that.
1632 1547           * First check for UDP specific handling, then pass to common routine.
1633 1548           */
1634 1549          switch (level) {
1635 1550          case IPPROTO_IP:
1636 1551                  /*
1637 1552                   * Only allow IPv4 option processing on IPv4 sockets.
1638 1553                   */
1639 1554                  if (connp->conn_family != AF_INET)
1640 1555                          return (-1);
1641 1556  
1642 1557                  switch (name) {
1643 1558                  case IP_OPTIONS:
1644 1559                  case T_IP_OPTIONS:
1645 1560                          mutex_enter(&connp->conn_lock);
1646 1561                          if (!(udp->udp_recv_ipp.ipp_fields &
1647 1562                              IPPF_IPV4_OPTIONS)) {
1648 1563                                  mutex_exit(&connp->conn_lock);
1649 1564                                  return (0);
1650 1565                          }
1651 1566  
1652 1567                          len = udp->udp_recv_ipp.ipp_ipv4_options_len;
1653 1568                          ASSERT(len != 0);
1654 1569                          bcopy(udp->udp_recv_ipp.ipp_ipv4_options, ptr, len);
1655 1570                          mutex_exit(&connp->conn_lock);
1656 1571                          return (len);
1657 1572                  }
1658 1573                  break;
1659 1574          case IPPROTO_UDP:
1660 1575                  switch (name) {
  
    | 
      ↓ open down ↓ | 
    1217 lines elided | 
    
      ↑ open up ↑ | 
  
1661 1576                  case UDP_NAT_T_ENDPOINT:
1662 1577                          mutex_enter(&connp->conn_lock);
1663 1578                          *i1 = udp->udp_nat_t_endpoint;
1664 1579                          mutex_exit(&connp->conn_lock);
1665 1580                          return (sizeof (int));
1666 1581                  case UDP_RCVHDR:
1667 1582                          mutex_enter(&connp->conn_lock);
1668 1583                          *i1 = udp->udp_rcvhdr ? 1 : 0;
1669 1584                          mutex_exit(&connp->conn_lock);
1670 1585                          return (sizeof (int));
1671      -                case UDP_SRCPORT_HASH:
1672      -                        mutex_enter(&connp->conn_lock);
1673      -                        *i1 = udp->udp_vxlanhash;
1674      -                        mutex_exit(&connp->conn_lock);
1675      -                        return (sizeof (int));
1676 1586                  case UDP_SND_TO_CONNECTED:
1677 1587                          mutex_enter(&connp->conn_lock);
1678 1588                          *i1 = udp->udp_snd_to_conn ? 1 : 0;
1679 1589                          mutex_exit(&connp->conn_lock);
1680 1590                          return (sizeof (int));
1681 1591                  }
1682 1592          }
1683 1593          mutex_enter(&connp->conn_lock);
1684 1594          retval = conn_opt_get(&coas, level, name, ptr);
1685 1595          mutex_exit(&connp->conn_lock);
1686 1596          return (retval);
1687 1597  }
1688 1598  
1689 1599  /*
1690 1600   * This routine retrieves the current status of socket options.
1691 1601   * It returns the size of the option retrieved, or -1.
1692 1602   */
1693 1603  int
1694 1604  udp_tpi_opt_get(queue_t *q, t_scalar_t level, t_scalar_t name, uchar_t *ptr)
1695 1605  {
1696 1606          conn_t          *connp = Q_TO_CONN(q);
1697 1607          int             err;
1698 1608  
1699 1609          err = udp_opt_get(connp, level, name, ptr);
1700 1610          return (err);
1701 1611  }
1702 1612  
1703 1613  /*
1704 1614   * This routine sets socket options.
1705 1615   */
1706 1616  int
1707 1617  udp_do_opt_set(conn_opt_arg_t *coa, int level, int name,
1708 1618      uint_t inlen, uchar_t *invalp, cred_t *cr, boolean_t checkonly)
1709 1619  {
1710 1620          conn_t          *connp = coa->coa_connp;
1711 1621          ip_xmit_attr_t  *ixa = coa->coa_ixa;
1712 1622          udp_t           *udp = connp->conn_udp;
1713 1623          udp_stack_t     *us = udp->udp_us;
1714 1624          int             *i1 = (int *)invalp;
1715 1625          boolean_t       onoff = (*i1 == 0) ? 0 : 1;
1716 1626          int             error;
1717 1627  
1718 1628          ASSERT(MUTEX_NOT_HELD(&coa->coa_connp->conn_lock));
1719 1629          /*
1720 1630           * First do UDP specific sanity checks and handle UDP specific
1721 1631           * options. Note that some IPPROTO_UDP options are handled
1722 1632           * by conn_opt_set.
1723 1633           */
1724 1634          switch (level) {
1725 1635          case SOL_SOCKET:
1726 1636                  switch (name) {
1727 1637                  case SO_SNDBUF:
1728 1638                          if (*i1 > us->us_max_buf) {
1729 1639                                  return (ENOBUFS);
1730 1640                          }
1731 1641                          break;
1732 1642                  case SO_RCVBUF:
1733 1643                          if (*i1 > us->us_max_buf) {
1734 1644                                  return (ENOBUFS);
1735 1645                          }
1736 1646                          break;
1737 1647  
1738 1648                  case SCM_UCRED: {
1739 1649                          struct ucred_s *ucr;
1740 1650                          cred_t *newcr;
1741 1651                          ts_label_t *tsl;
1742 1652  
1743 1653                          /*
1744 1654                           * Only sockets that have proper privileges and are
1745 1655                           * bound to MLPs will have any other value here, so
1746 1656                           * this implicitly tests for privilege to set label.
1747 1657                           */
1748 1658                          if (connp->conn_mlp_type == mlptSingle)
1749 1659                                  break;
1750 1660  
1751 1661                          ucr = (struct ucred_s *)invalp;
1752 1662                          if (inlen < sizeof (*ucr) + sizeof (bslabel_t) ||
1753 1663                              ucr->uc_labeloff < sizeof (*ucr) ||
1754 1664                              ucr->uc_labeloff + sizeof (bslabel_t) > inlen)
1755 1665                                  return (EINVAL);
1756 1666                          if (!checkonly) {
1757 1667                                  /*
1758 1668                                   * Set ixa_tsl to the new label.
1759 1669                                   * We assume that crgetzoneid doesn't change
1760 1670                                   * as part of the SCM_UCRED.
1761 1671                                   */
1762 1672                                  ASSERT(cr != NULL);
1763 1673                                  if ((tsl = crgetlabel(cr)) == NULL)
1764 1674                                          return (EINVAL);
1765 1675                                  newcr = copycred_from_bslabel(cr, UCLABEL(ucr),
1766 1676                                      tsl->tsl_doi, KM_NOSLEEP);
1767 1677                                  if (newcr == NULL)
1768 1678                                          return (ENOSR);
1769 1679                                  ASSERT(newcr->cr_label != NULL);
1770 1680                                  /*
1771 1681                                   * Move the hold on the cr_label to ixa_tsl by
1772 1682                                   * setting cr_label to NULL. Then release newcr.
1773 1683                                   */
1774 1684                                  ip_xmit_attr_replace_tsl(ixa, newcr->cr_label);
1775 1685                                  ixa->ixa_flags |= IXAF_UCRED_TSL;
1776 1686                                  newcr->cr_label = NULL;
1777 1687                                  crfree(newcr);
1778 1688                                  coa->coa_changed |= COA_HEADER_CHANGED;
1779 1689                                  coa->coa_changed |= COA_WROFF_CHANGED;
1780 1690                          }
1781 1691                          /* Fully handled this option. */
1782 1692                          return (0);
1783 1693                  }
1784 1694                  }
1785 1695                  break;
1786 1696          case IPPROTO_UDP:
1787 1697                  switch (name) {
1788 1698                  case UDP_NAT_T_ENDPOINT:
1789 1699                          if ((error = secpolicy_ip_config(cr, B_FALSE)) != 0) {
1790 1700                                  return (error);
1791 1701                          }
1792 1702  
1793 1703                          /*
1794 1704                           * Use conn_family instead so we can avoid ambiguitites
1795 1705                           * with AF_INET6 sockets that may switch from IPv4
1796 1706                           * to IPv6.
1797 1707                           */
1798 1708                          if (connp->conn_family != AF_INET) {
1799 1709                                  return (EAFNOSUPPORT);
1800 1710                          }
1801 1711  
1802 1712                          if (!checkonly) {
1803 1713                                  mutex_enter(&connp->conn_lock);
1804 1714                                  udp->udp_nat_t_endpoint = onoff;
1805 1715                                  mutex_exit(&connp->conn_lock);
  
    | 
      ↓ open down ↓ | 
    120 lines elided | 
    
      ↑ open up ↑ | 
  
1806 1716                                  coa->coa_changed |= COA_HEADER_CHANGED;
1807 1717                                  coa->coa_changed |= COA_WROFF_CHANGED;
1808 1718                          }
1809 1719                          /* Fully handled this option. */
1810 1720                          return (0);
1811 1721                  case UDP_RCVHDR:
1812 1722                          mutex_enter(&connp->conn_lock);
1813 1723                          udp->udp_rcvhdr = onoff;
1814 1724                          mutex_exit(&connp->conn_lock);
1815 1725                          return (0);
1816      -                case UDP_SRCPORT_HASH:
1817      -                        /*
1818      -                         * This should have already been verified, but double
1819      -                         * check.
1820      -                         */
1821      -                        if ((error = secpolicy_ip_config(cr, B_FALSE)) != 0) {
1822      -                                return (error);
1823      -                        }
1824      -
1825      -                        /* First see if the val is something we understand */
1826      -                        if (*i1 != UDP_HASH_DISABLE && *i1 != UDP_HASH_VXLAN)
1827      -                                return (EINVAL);
1828      -
1829      -                        if (!checkonly) {
1830      -                                mutex_enter(&connp->conn_lock);
1831      -                                udp->udp_vxlanhash = *i1;
1832      -                                mutex_exit(&connp->conn_lock);
1833      -                        }
1834      -                        /* Fully handled this option. */
1835      -                        return (0);
1836 1726                  case UDP_SND_TO_CONNECTED:
1837 1727                          mutex_enter(&connp->conn_lock);
1838 1728                          udp->udp_snd_to_conn = onoff;
1839 1729                          mutex_exit(&connp->conn_lock);
1840 1730                          return (0);
1841 1731                  }
1842 1732                  break;
1843 1733          }
1844 1734          error = conn_opt_set(coa, level, name, inlen, invalp,
1845 1735              checkonly, cr);
1846 1736          return (error);
1847 1737  }
1848 1738  
1849 1739  /*
1850 1740   * This routine sets socket options.
1851 1741   */
1852 1742  int
1853 1743  udp_opt_set(conn_t *connp, uint_t optset_context, int level,
1854 1744      int name, uint_t inlen, uchar_t *invalp, uint_t *outlenp,
1855 1745      uchar_t *outvalp, void *thisdg_attrs, cred_t *cr)
1856 1746  {
1857 1747          udp_t           *udp = connp->conn_udp;
1858 1748          int             err;
1859 1749          conn_opt_arg_t  coas, *coa;
1860 1750          boolean_t       checkonly;
1861 1751          udp_stack_t     *us = udp->udp_us;
1862 1752  
1863 1753          switch (optset_context) {
1864 1754          case SETFN_OPTCOM_CHECKONLY:
1865 1755                  checkonly = B_TRUE;
1866 1756                  /*
1867 1757                   * Note: Implies T_CHECK semantics for T_OPTCOM_REQ
1868 1758                   * inlen != 0 implies value supplied and
1869 1759                   *      we have to "pretend" to set it.
1870 1760                   * inlen == 0 implies that there is no
1871 1761                   *      value part in T_CHECK request and just validation
1872 1762                   * done elsewhere should be enough, we just return here.
1873 1763                   */
1874 1764                  if (inlen == 0) {
1875 1765                          *outlenp = 0;
1876 1766                          return (0);
1877 1767                  }
1878 1768                  break;
1879 1769          case SETFN_OPTCOM_NEGOTIATE:
1880 1770                  checkonly = B_FALSE;
1881 1771                  break;
1882 1772          case SETFN_UD_NEGOTIATE:
1883 1773          case SETFN_CONN_NEGOTIATE:
1884 1774                  checkonly = B_FALSE;
1885 1775                  /*
1886 1776                   * Negotiating local and "association-related" options
1887 1777                   * through T_UNITDATA_REQ.
1888 1778                   *
1889 1779                   * Following routine can filter out ones we do not
1890 1780                   * want to be "set" this way.
1891 1781                   */
1892 1782                  if (!udp_opt_allow_udr_set(level, name)) {
1893 1783                          *outlenp = 0;
1894 1784                          return (EINVAL);
1895 1785                  }
1896 1786                  break;
1897 1787          default:
1898 1788                  /*
1899 1789                   * We should never get here
1900 1790                   */
1901 1791                  *outlenp = 0;
1902 1792                  return (EINVAL);
1903 1793          }
1904 1794  
1905 1795          ASSERT((optset_context != SETFN_OPTCOM_CHECKONLY) ||
1906 1796              (optset_context == SETFN_OPTCOM_CHECKONLY && inlen != 0));
1907 1797  
1908 1798          if (thisdg_attrs != NULL) {
1909 1799                  /* Options from T_UNITDATA_REQ */
1910 1800                  coa = (conn_opt_arg_t *)thisdg_attrs;
1911 1801                  ASSERT(coa->coa_connp == connp);
1912 1802                  ASSERT(coa->coa_ixa != NULL);
1913 1803                  ASSERT(coa->coa_ipp != NULL);
1914 1804                  ASSERT(coa->coa_ancillary);
1915 1805          } else {
1916 1806                  coa = &coas;
1917 1807                  coas.coa_connp = connp;
1918 1808                  /* Get a reference on conn_ixa to prevent concurrent mods */
1919 1809                  coas.coa_ixa = conn_get_ixa(connp, B_TRUE);
1920 1810                  if (coas.coa_ixa == NULL) {
1921 1811                          *outlenp = 0;
1922 1812                          return (ENOMEM);
1923 1813                  }
1924 1814                  coas.coa_ipp = &connp->conn_xmit_ipp;
1925 1815                  coas.coa_ancillary = B_FALSE;
1926 1816                  coas.coa_changed = 0;
1927 1817          }
1928 1818  
1929 1819          err = udp_do_opt_set(coa, level, name, inlen, invalp,
1930 1820              cr, checkonly);
1931 1821          if (err != 0) {
1932 1822  errout:
1933 1823                  if (!coa->coa_ancillary)
1934 1824                          ixa_refrele(coa->coa_ixa);
1935 1825                  *outlenp = 0;
1936 1826                  return (err);
1937 1827          }
1938 1828          /* Handle DHCPINIT here outside of lock */
1939 1829          if (level == IPPROTO_IP && name == IP_DHCPINIT_IF) {
1940 1830                  uint_t  ifindex;
1941 1831                  ill_t   *ill;
1942 1832  
1943 1833                  ifindex = *(uint_t *)invalp;
1944 1834                  if (ifindex == 0) {
1945 1835                          ill = NULL;
1946 1836                  } else {
1947 1837                          ill = ill_lookup_on_ifindex(ifindex, B_FALSE,
1948 1838                              coa->coa_ixa->ixa_ipst);
1949 1839                          if (ill == NULL) {
1950 1840                                  err = ENXIO;
1951 1841                                  goto errout;
1952 1842                          }
1953 1843  
1954 1844                          mutex_enter(&ill->ill_lock);
1955 1845                          if (ill->ill_state_flags & ILL_CONDEMNED) {
1956 1846                                  mutex_exit(&ill->ill_lock);
1957 1847                                  ill_refrele(ill);
1958 1848                                  err = ENXIO;
1959 1849                                  goto errout;
1960 1850                          }
1961 1851                          if (IS_VNI(ill)) {
1962 1852                                  mutex_exit(&ill->ill_lock);
1963 1853                                  ill_refrele(ill);
1964 1854                                  err = EINVAL;
1965 1855                                  goto errout;
1966 1856                          }
1967 1857                  }
1968 1858                  mutex_enter(&connp->conn_lock);
1969 1859  
1970 1860                  if (connp->conn_dhcpinit_ill != NULL) {
1971 1861                          /*
1972 1862                           * We've locked the conn so conn_cleanup_ill()
1973 1863                           * cannot clear conn_dhcpinit_ill -- so it's
1974 1864                           * safe to access the ill.
1975 1865                           */
1976 1866                          ill_t *oill = connp->conn_dhcpinit_ill;
1977 1867  
1978 1868                          ASSERT(oill->ill_dhcpinit != 0);
1979 1869                          atomic_dec_32(&oill->ill_dhcpinit);
1980 1870                          ill_set_inputfn(connp->conn_dhcpinit_ill);
1981 1871                          connp->conn_dhcpinit_ill = NULL;
1982 1872                  }
1983 1873  
1984 1874                  if (ill != NULL) {
1985 1875                          connp->conn_dhcpinit_ill = ill;
1986 1876                          atomic_inc_32(&ill->ill_dhcpinit);
1987 1877                          ill_set_inputfn(ill);
1988 1878                          mutex_exit(&connp->conn_lock);
1989 1879                          mutex_exit(&ill->ill_lock);
1990 1880                          ill_refrele(ill);
1991 1881                  } else {
1992 1882                          mutex_exit(&connp->conn_lock);
1993 1883                  }
1994 1884          }
1995 1885  
1996 1886          /*
1997 1887           * Common case of OK return with outval same as inval.
1998 1888           */
1999 1889          if (invalp != outvalp) {
2000 1890                  /* don't trust bcopy for identical src/dst */
2001 1891                  (void) bcopy(invalp, outvalp, inlen);
2002 1892          }
2003 1893          *outlenp = inlen;
2004 1894  
2005 1895          /*
2006 1896           * If this was not ancillary data, then we rebuild the headers,
2007 1897           * update the IRE/NCE, and IPsec as needed.
2008 1898           * Since the label depends on the destination we go through
2009 1899           * ip_set_destination first.
2010 1900           */
2011 1901          if (coa->coa_ancillary) {
2012 1902                  return (0);
2013 1903          }
2014 1904  
2015 1905          if (coa->coa_changed & COA_ROUTE_CHANGED) {
2016 1906                  in6_addr_t saddr, faddr, nexthop;
2017 1907                  in_port_t fport;
2018 1908  
2019 1909                  /*
2020 1910                   * We clear lastdst to make sure we pick up the change
2021 1911                   * next time sending.
2022 1912                   * If we are connected we re-cache the information.
2023 1913                   * We ignore errors to preserve BSD behavior.
2024 1914                   * Note that we don't redo IPsec policy lookup here
2025 1915                   * since the final destination (or source) didn't change.
2026 1916                   */
2027 1917                  mutex_enter(&connp->conn_lock);
2028 1918                  connp->conn_v6lastdst = ipv6_all_zeros;
2029 1919  
2030 1920                  ip_attr_nexthop(coa->coa_ipp, coa->coa_ixa,
2031 1921                      &connp->conn_faddr_v6, &nexthop);
2032 1922                  saddr = connp->conn_saddr_v6;
2033 1923                  faddr = connp->conn_faddr_v6;
2034 1924                  fport = connp->conn_fport;
2035 1925                  mutex_exit(&connp->conn_lock);
2036 1926  
2037 1927                  if (!IN6_IS_ADDR_UNSPECIFIED(&faddr) &&
2038 1928                      !IN6_IS_ADDR_V4MAPPED_ANY(&faddr)) {
2039 1929                          (void) ip_attr_connect(connp, coa->coa_ixa,
2040 1930                              &saddr, &faddr, &nexthop, fport, NULL, NULL,
2041 1931                              IPDF_ALLOW_MCBC | IPDF_VERIFY_DST);
2042 1932                  }
2043 1933          }
2044 1934  
2045 1935          ixa_refrele(coa->coa_ixa);
2046 1936  
2047 1937          if (coa->coa_changed & COA_HEADER_CHANGED) {
2048 1938                  /*
2049 1939                   * Rebuild the header template if we are connected.
2050 1940                   * Otherwise clear conn_v6lastdst so we rebuild the header
2051 1941                   * in the data path.
2052 1942                   */
2053 1943                  mutex_enter(&connp->conn_lock);
2054 1944                  if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_faddr_v6) &&
2055 1945                      !IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_faddr_v6)) {
2056 1946                          err = udp_build_hdr_template(connp,
2057 1947                              &connp->conn_saddr_v6, &connp->conn_faddr_v6,
2058 1948                              connp->conn_fport, connp->conn_flowinfo);
2059 1949                          if (err != 0) {
2060 1950                                  mutex_exit(&connp->conn_lock);
2061 1951                                  return (err);
2062 1952                          }
2063 1953                  } else {
2064 1954                          connp->conn_v6lastdst = ipv6_all_zeros;
2065 1955                  }
2066 1956                  mutex_exit(&connp->conn_lock);
2067 1957          }
2068 1958          if (coa->coa_changed & COA_RCVBUF_CHANGED) {
2069 1959                  (void) proto_set_rx_hiwat(connp->conn_rq, connp,
2070 1960                      connp->conn_rcvbuf);
2071 1961          }
2072 1962          if ((coa->coa_changed & COA_SNDBUF_CHANGED) && !IPCL_IS_NONSTR(connp)) {
2073 1963                  connp->conn_wq->q_hiwat = connp->conn_sndbuf;
2074 1964          }
2075 1965          if (coa->coa_changed & COA_WROFF_CHANGED) {
2076 1966                  /* Increase wroff if needed */
2077 1967                  uint_t wroff;
2078 1968  
2079 1969                  mutex_enter(&connp->conn_lock);
2080 1970                  wroff = connp->conn_ht_iphc_allocated + us->us_wroff_extra;
2081 1971                  if (udp->udp_nat_t_endpoint)
2082 1972                          wroff += sizeof (uint32_t);
2083 1973                  if (wroff > connp->conn_wroff) {
2084 1974                          connp->conn_wroff = wroff;
2085 1975                          mutex_exit(&connp->conn_lock);
2086 1976                          (void) proto_set_tx_wroff(connp->conn_rq, connp, wroff);
2087 1977                  } else {
2088 1978                          mutex_exit(&connp->conn_lock);
2089 1979                  }
2090 1980          }
2091 1981          return (err);
2092 1982  }
2093 1983  
2094 1984  /* This routine sets socket options. */
2095 1985  int
2096 1986  udp_tpi_opt_set(queue_t *q, uint_t optset_context, int level, int name,
2097 1987      uint_t inlen, uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp,
2098 1988      void *thisdg_attrs, cred_t *cr)
2099 1989  {
2100 1990          conn_t  *connp = Q_TO_CONN(q);
2101 1991          int error;
2102 1992  
2103 1993          error = udp_opt_set(connp, optset_context, level, name, inlen, invalp,
2104 1994              outlenp, outvalp, thisdg_attrs, cr);
2105 1995          return (error);
2106 1996  }
2107 1997  
2108 1998  /*
2109 1999   * Setup IP and UDP headers.
2110 2000   * Returns NULL on allocation failure, in which case data_mp is freed.
2111 2001   */
2112 2002  mblk_t *
2113 2003  udp_prepend_hdr(conn_t *connp, ip_xmit_attr_t *ixa, const ip_pkt_t *ipp,
  
    | 
      ↓ open down ↓ | 
    268 lines elided | 
    
      ↑ open up ↑ | 
  
2114 2004      const in6_addr_t *v6src, const in6_addr_t *v6dst, in_port_t dstport,
2115 2005      uint32_t flowinfo, mblk_t *data_mp, int *errorp)
2116 2006  {
2117 2007          mblk_t          *mp;
2118 2008          udpha_t         *udpha;
2119 2009          udp_stack_t     *us = connp->conn_netstack->netstack_udp;
2120 2010          uint_t          data_len;
2121 2011          uint32_t        cksum;
2122 2012          udp_t           *udp = connp->conn_udp;
2123 2013          boolean_t       insert_spi = udp->udp_nat_t_endpoint;
2124      -        boolean_t       hash_srcport = udp->udp_vxlanhash;
2125 2014          uint_t          ulp_hdr_len;
2126      -        uint16_t        srcport;
2127 2015  
2128 2016          data_len = msgdsize(data_mp);
2129 2017          ulp_hdr_len = UDPH_SIZE;
2130 2018          if (insert_spi)
2131 2019                  ulp_hdr_len += sizeof (uint32_t);
2132 2020  
2133      -        /*
2134      -         * If we have source port hashing going on, determine the hash before
2135      -         * we modify the mblk_t.
2136      -         */
2137      -        if (hash_srcport == B_TRUE) {
2138      -                srcport = udp_srcport_hash(mp, UDP_HASH_VXLAN,
2139      -                    IPPORT_DYNAMIC_MIN, IPPORT_DYNAMIC_MAX,
2140      -                    ntohs(connp->conn_lport));
2141      -        }
2142      -
2143 2021          mp = conn_prepend_hdr(ixa, ipp, v6src, v6dst, IPPROTO_UDP, flowinfo,
2144 2022              ulp_hdr_len, data_mp, data_len, us->us_wroff_extra, &cksum, errorp);
2145 2023          if (mp == NULL) {
2146 2024                  ASSERT(*errorp != 0);
2147 2025                  return (NULL);
2148 2026          }
2149 2027  
2150 2028          data_len += ulp_hdr_len;
2151 2029          ixa->ixa_pktlen = data_len + ixa->ixa_ip_hdr_length;
2152 2030  
2153 2031          udpha = (udpha_t *)(mp->b_rptr + ixa->ixa_ip_hdr_length);
2154      -        if (hash_srcport == B_TRUE) {
2155      -                udpha->uha_src_port = htons(srcport);
2156      -        } else {
2157      -                udpha->uha_src_port = connp->conn_lport;
2158      -        }
     2032 +        udpha->uha_src_port = connp->conn_lport;
2159 2033          udpha->uha_dst_port = dstport;
2160 2034          udpha->uha_checksum = 0;
2161 2035          udpha->uha_length = htons(data_len);
2162 2036  
2163 2037          /*
2164 2038           * If there was a routing option/header then conn_prepend_hdr
2165 2039           * has massaged it and placed the pseudo-header checksum difference
2166 2040           * in the cksum argument.
2167 2041           *
2168 2042           * Setup header length and prepare for ULP checksum done in IP.
2169 2043           *
2170 2044           * We make it easy for IP to include our pseudo header
2171 2045           * by putting our length in uha_checksum.
2172 2046           * The IP source, destination, and length have already been set by
2173 2047           * conn_prepend_hdr.
2174 2048           */
2175 2049          cksum += data_len;
2176 2050          cksum = (cksum >> 16) + (cksum & 0xFFFF);
2177 2051          ASSERT(cksum < 0x10000);
2178 2052  
2179 2053          if (ixa->ixa_flags & IXAF_IS_IPV4) {
2180 2054                  ipha_t  *ipha = (ipha_t *)mp->b_rptr;
2181 2055  
2182 2056                  ASSERT(ntohs(ipha->ipha_length) == ixa->ixa_pktlen);
2183 2057  
2184 2058                  /* IP does the checksum if uha_checksum is non-zero */
2185 2059                  if (us->us_do_checksum) {
2186 2060                          if (cksum == 0)
2187 2061                                  udpha->uha_checksum = 0xffff;
2188 2062                          else
2189 2063                                  udpha->uha_checksum = htons(cksum);
2190 2064                  } else {
2191 2065                          udpha->uha_checksum = 0;
2192 2066                  }
2193 2067          } else {
2194 2068                  ip6_t *ip6h = (ip6_t *)mp->b_rptr;
2195 2069  
2196 2070                  ASSERT(ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN == ixa->ixa_pktlen);
2197 2071                  if (cksum == 0)
2198 2072                          udpha->uha_checksum = 0xffff;
2199 2073                  else
2200 2074                          udpha->uha_checksum = htons(cksum);
2201 2075          }
2202 2076  
2203 2077          /* Insert all-0s SPI now. */
2204 2078          if (insert_spi)
2205 2079                  *((uint32_t *)(udpha + 1)) = 0;
2206 2080  
2207 2081          return (mp);
2208 2082  }
2209 2083  
2210 2084  static int
2211 2085  udp_build_hdr_template(conn_t *connp, const in6_addr_t *v6src,
2212 2086      const in6_addr_t *v6dst, in_port_t dstport, uint32_t flowinfo)
2213 2087  {
2214 2088          udpha_t         *udpha;
2215 2089          int             error;
2216 2090  
2217 2091          ASSERT(MUTEX_HELD(&connp->conn_lock));
2218 2092          /*
2219 2093           * We clear lastdst to make sure we don't use the lastdst path
2220 2094           * next time sending since we might not have set v6dst yet.
2221 2095           */
2222 2096          connp->conn_v6lastdst = ipv6_all_zeros;
2223 2097  
2224 2098          error = conn_build_hdr_template(connp, UDPH_SIZE, 0, v6src, v6dst,
2225 2099              flowinfo);
2226 2100          if (error != 0)
2227 2101                  return (error);
2228 2102  
2229 2103          /*
2230 2104           * Any routing header/option has been massaged. The checksum difference
2231 2105           * is stored in conn_sum.
2232 2106           */
2233 2107          udpha = (udpha_t *)connp->conn_ht_ulp;
2234 2108          udpha->uha_src_port = connp->conn_lport;
2235 2109          udpha->uha_dst_port = dstport;
2236 2110          udpha->uha_checksum = 0;
2237 2111          udpha->uha_length = htons(UDPH_SIZE);   /* Filled in later */
2238 2112          return (0);
2239 2113  }
2240 2114  
2241 2115  static mblk_t *
2242 2116  udp_queue_fallback(udp_t *udp, mblk_t *mp)
2243 2117  {
2244 2118          ASSERT(MUTEX_HELD(&udp->udp_recv_lock));
2245 2119          if (IPCL_IS_NONSTR(udp->udp_connp)) {
2246 2120                  /*
2247 2121                   * fallback has started but messages have not been moved yet
2248 2122                   */
2249 2123                  if (udp->udp_fallback_queue_head == NULL) {
2250 2124                          ASSERT(udp->udp_fallback_queue_tail == NULL);
2251 2125                          udp->udp_fallback_queue_head = mp;
2252 2126                          udp->udp_fallback_queue_tail = mp;
2253 2127                  } else {
2254 2128                          ASSERT(udp->udp_fallback_queue_tail != NULL);
2255 2129                          udp->udp_fallback_queue_tail->b_next = mp;
2256 2130                          udp->udp_fallback_queue_tail = mp;
2257 2131                  }
2258 2132                  return (NULL);
2259 2133          } else {
2260 2134                  /*
2261 2135                   * Fallback completed, let the caller putnext() the mblk.
2262 2136                   */
2263 2137                  return (mp);
2264 2138          }
2265 2139  }
2266 2140  
2267 2141  /*
2268 2142   * Deliver data to ULP. In case we have a socket, and it's falling back to
2269 2143   * TPI, then we'll queue the mp for later processing.
2270 2144   */
2271 2145  static void
2272 2146  udp_ulp_recv(conn_t *connp, mblk_t *mp, uint_t len, ip_recv_attr_t *ira)
2273 2147  {
2274 2148          if (IPCL_IS_NONSTR(connp)) {
2275 2149                  udp_t *udp = connp->conn_udp;
2276 2150                  int error;
2277 2151  
2278 2152                  ASSERT(len == msgdsize(mp));
2279 2153                  if ((*connp->conn_upcalls->su_recv)
2280 2154                      (connp->conn_upper_handle, mp, len, 0, &error, NULL) < 0) {
2281 2155                          mutex_enter(&udp->udp_recv_lock);
2282 2156                          if (error == ENOSPC) {
2283 2157                                  /*
2284 2158                                   * let's confirm while holding the lock
2285 2159                                   */
2286 2160                                  if ((*connp->conn_upcalls->su_recv)
2287 2161                                      (connp->conn_upper_handle, NULL, 0, 0,
2288 2162                                      &error, NULL) < 0) {
2289 2163                                          ASSERT(error == ENOSPC);
2290 2164                                          if (error == ENOSPC) {
2291 2165                                                  connp->conn_flow_cntrld =
2292 2166                                                      B_TRUE;
2293 2167                                          }
2294 2168                                  }
2295 2169                                  mutex_exit(&udp->udp_recv_lock);
2296 2170                          } else {
2297 2171                                  ASSERT(error == EOPNOTSUPP);
2298 2172                                  mp = udp_queue_fallback(udp, mp);
2299 2173                                  mutex_exit(&udp->udp_recv_lock);
2300 2174                                  if (mp != NULL)
2301 2175                                          putnext(connp->conn_rq, mp);
2302 2176                          }
2303 2177                  }
2304 2178                  ASSERT(MUTEX_NOT_HELD(&udp->udp_recv_lock));
2305 2179          } else {
2306 2180                  if (is_system_labeled()) {
2307 2181                          ASSERT(ira->ira_cred != NULL);
2308 2182                          /*
2309 2183                           * Provide for protocols above UDP such as RPC
2310 2184                           * NOPID leaves db_cpid unchanged.
2311 2185                           */
2312 2186                          mblk_setcred(mp, ira->ira_cred, NOPID);
2313 2187                  }
2314 2188  
2315 2189                  putnext(connp->conn_rq, mp);
2316 2190          }
2317 2191  }
2318 2192  
2319 2193  /*
2320 2194   * This is the inbound data path.
2321 2195   * IP has already pulled up the IP plus UDP headers and verified alignment
2322 2196   * etc.
2323 2197   */
2324 2198  /* ARGSUSED2 */
2325 2199  static void
2326 2200  udp_input(void *arg1, mblk_t *mp, void *arg2, ip_recv_attr_t *ira)
2327 2201  {
2328 2202          conn_t                  *connp = (conn_t *)arg1;
2329 2203          struct T_unitdata_ind   *tudi;
2330 2204          uchar_t                 *rptr;          /* Pointer to IP header */
2331 2205          int                     hdr_length;     /* Length of IP+UDP headers */
2332 2206          int                     udi_size;       /* Size of T_unitdata_ind */
2333 2207          int                     pkt_len;
2334 2208          udp_t                   *udp;
2335 2209          udpha_t                 *udpha;
2336 2210          ip_pkt_t                ipps;
2337 2211          ip6_t                   *ip6h;
2338 2212          mblk_t                  *mp1;
2339 2213          uint32_t                udp_ipv4_options_len;
2340 2214          crb_t                   recv_ancillary;
2341 2215          udp_stack_t             *us;
2342 2216  
2343 2217          ASSERT(connp->conn_flags & IPCL_UDPCONN);
2344 2218  
2345 2219          udp = connp->conn_udp;
2346 2220          us = udp->udp_us;
2347 2221          rptr = mp->b_rptr;
2348 2222  
2349 2223          ASSERT(DB_TYPE(mp) == M_DATA);
2350 2224          ASSERT(OK_32PTR(rptr));
2351 2225          ASSERT(ira->ira_pktlen == msgdsize(mp));
2352 2226          pkt_len = ira->ira_pktlen;
2353 2227  
2354 2228          /*
2355 2229           * Get a snapshot of these and allow other threads to change
2356 2230           * them after that. We need the same recv_ancillary when determining
2357 2231           * the size as when adding the ancillary data items.
2358 2232           */
2359 2233          mutex_enter(&connp->conn_lock);
2360 2234          udp_ipv4_options_len = udp->udp_recv_ipp.ipp_ipv4_options_len;
2361 2235          recv_ancillary = connp->conn_recv_ancillary;
2362 2236          mutex_exit(&connp->conn_lock);
2363 2237  
2364 2238          hdr_length = ira->ira_ip_hdr_length;
2365 2239  
2366 2240          /*
2367 2241           * IP inspected the UDP header thus all of it must be in the mblk.
2368 2242           * UDP length check is performed for IPv6 packets and IPv4 packets
2369 2243           * to check if the size of the packet as specified
2370 2244           * by the UDP header is the same as the length derived from the IP
2371 2245           * header.
2372 2246           */
2373 2247          udpha = (udpha_t *)(rptr + hdr_length);
2374 2248          if (pkt_len != ntohs(udpha->uha_length) + hdr_length)
2375 2249                  goto tossit;
2376 2250  
2377 2251          hdr_length += UDPH_SIZE;
2378 2252          ASSERT(MBLKL(mp) >= hdr_length);        /* IP did a pullup */
2379 2253  
2380 2254          /* Initialize regardless of IP version */
2381 2255          ipps.ipp_fields = 0;
2382 2256  
2383 2257          if (((ira->ira_flags & IRAF_IPV4_OPTIONS) ||
2384 2258              udp_ipv4_options_len > 0) &&
2385 2259              connp->conn_family == AF_INET) {
2386 2260                  int     err;
2387 2261  
2388 2262                  /*
2389 2263                   * Record/update udp_recv_ipp with the lock
2390 2264                   * held. Not needed for AF_INET6 sockets
2391 2265                   * since they don't support a getsockopt of IP_OPTIONS.
2392 2266                   */
2393 2267                  mutex_enter(&connp->conn_lock);
2394 2268                  err = ip_find_hdr_v4((ipha_t *)rptr, &udp->udp_recv_ipp,
2395 2269                      B_TRUE);
2396 2270                  if (err != 0) {
2397 2271                          /* Allocation failed. Drop packet */
2398 2272                          mutex_exit(&connp->conn_lock);
2399 2273                          freemsg(mp);
2400 2274                          UDPS_BUMP_MIB(us, udpInErrors);
2401 2275                          return;
2402 2276                  }
2403 2277                  mutex_exit(&connp->conn_lock);
2404 2278          }
2405 2279  
2406 2280          if (recv_ancillary.crb_all != 0) {
2407 2281                  /*
2408 2282                   * Record packet information in the ip_pkt_t
2409 2283                   */
2410 2284                  if (ira->ira_flags & IRAF_IS_IPV4) {
2411 2285                          ASSERT(IPH_HDR_VERSION(rptr) == IPV4_VERSION);
2412 2286                          ASSERT(MBLKL(mp) >= sizeof (ipha_t));
2413 2287                          ASSERT(((ipha_t *)rptr)->ipha_protocol == IPPROTO_UDP);
2414 2288                          ASSERT(ira->ira_ip_hdr_length == IPH_HDR_LENGTH(rptr));
2415 2289  
2416 2290                          (void) ip_find_hdr_v4((ipha_t *)rptr, &ipps, B_FALSE);
2417 2291                  } else {
2418 2292                          uint8_t nexthdrp;
2419 2293  
2420 2294                          ASSERT(IPH_HDR_VERSION(rptr) == IPV6_VERSION);
2421 2295                          /*
2422 2296                           * IPv6 packets can only be received by applications
2423 2297                           * that are prepared to receive IPv6 addresses.
2424 2298                           * The IP fanout must ensure this.
2425 2299                           */
2426 2300                          ASSERT(connp->conn_family == AF_INET6);
2427 2301  
2428 2302                          ip6h = (ip6_t *)rptr;
2429 2303  
2430 2304                          /* We don't care about the length, but need the ipp */
2431 2305                          hdr_length = ip_find_hdr_v6(mp, ip6h, B_TRUE, &ipps,
2432 2306                              &nexthdrp);
2433 2307                          ASSERT(hdr_length == ira->ira_ip_hdr_length);
2434 2308                          /* Restore */
2435 2309                          hdr_length = ira->ira_ip_hdr_length + UDPH_SIZE;
2436 2310                          ASSERT(nexthdrp == IPPROTO_UDP);
2437 2311                  }
2438 2312          }
2439 2313  
2440 2314          /*
2441 2315           * This is the inbound data path.  Packets are passed upstream as
2442 2316           * T_UNITDATA_IND messages.
2443 2317           */
2444 2318          if (connp->conn_family == AF_INET) {
2445 2319                  sin_t *sin;
2446 2320  
2447 2321                  ASSERT(IPH_HDR_VERSION((ipha_t *)rptr) == IPV4_VERSION);
2448 2322  
2449 2323                  /*
2450 2324                   * Normally only send up the source address.
2451 2325                   * If any ancillary data items are wanted we add those.
2452 2326                   */
2453 2327                  udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin_t);
2454 2328                  if (recv_ancillary.crb_all != 0) {
2455 2329                          udi_size += conn_recvancillary_size(connp,
2456 2330                              recv_ancillary, ira, mp, &ipps);
2457 2331                  }
2458 2332  
2459 2333                  /* Allocate a message block for the T_UNITDATA_IND structure. */
2460 2334                  mp1 = allocb(udi_size, BPRI_MED);
2461 2335                  if (mp1 == NULL) {
2462 2336                          freemsg(mp);
2463 2337                          UDPS_BUMP_MIB(us, udpInErrors);
2464 2338                          return;
2465 2339                  }
2466 2340                  mp1->b_cont = mp;
2467 2341                  mp1->b_datap->db_type = M_PROTO;
2468 2342                  tudi = (struct T_unitdata_ind *)mp1->b_rptr;
2469 2343                  mp1->b_wptr = (uchar_t *)tudi + udi_size;
2470 2344                  tudi->PRIM_type = T_UNITDATA_IND;
2471 2345                  tudi->SRC_length = sizeof (sin_t);
2472 2346                  tudi->SRC_offset = sizeof (struct T_unitdata_ind);
2473 2347                  tudi->OPT_offset = sizeof (struct T_unitdata_ind) +
2474 2348                      sizeof (sin_t);
2475 2349                  udi_size -= (sizeof (struct T_unitdata_ind) + sizeof (sin_t));
2476 2350                  tudi->OPT_length = udi_size;
2477 2351                  sin = (sin_t *)&tudi[1];
2478 2352                  sin->sin_addr.s_addr = ((ipha_t *)rptr)->ipha_src;
2479 2353                  sin->sin_port = udpha->uha_src_port;
2480 2354                  sin->sin_family = connp->conn_family;
2481 2355                  *(uint32_t *)&sin->sin_zero[0] = 0;
2482 2356                  *(uint32_t *)&sin->sin_zero[4] = 0;
2483 2357  
2484 2358                  /*
2485 2359                   * Add options if IP_RECVDSTADDR, IP_RECVIF, IP_RECVSLLA or
2486 2360                   * IP_RECVTTL has been set.
2487 2361                   */
2488 2362                  if (udi_size != 0) {
2489 2363                          conn_recvancillary_add(connp, recv_ancillary, ira,
2490 2364                              &ipps, (uchar_t *)&sin[1], udi_size);
2491 2365                  }
2492 2366          } else {
2493 2367                  sin6_t *sin6;
2494 2368  
2495 2369                  /*
2496 2370                   * Handle both IPv4 and IPv6 packets for IPv6 sockets.
2497 2371                   *
2498 2372                   * Normally we only send up the address. If receiving of any
2499 2373                   * optional receive side information is enabled, we also send
2500 2374                   * that up as options.
2501 2375                   */
2502 2376                  udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin6_t);
2503 2377  
2504 2378                  if (recv_ancillary.crb_all != 0) {
2505 2379                          udi_size += conn_recvancillary_size(connp,
2506 2380                              recv_ancillary, ira, mp, &ipps);
2507 2381                  }
2508 2382  
2509 2383                  mp1 = allocb(udi_size, BPRI_MED);
2510 2384                  if (mp1 == NULL) {
2511 2385                          freemsg(mp);
2512 2386                          UDPS_BUMP_MIB(us, udpInErrors);
2513 2387                          return;
2514 2388                  }
2515 2389                  mp1->b_cont = mp;
2516 2390                  mp1->b_datap->db_type = M_PROTO;
2517 2391                  tudi = (struct T_unitdata_ind *)mp1->b_rptr;
2518 2392                  mp1->b_wptr = (uchar_t *)tudi + udi_size;
2519 2393                  tudi->PRIM_type = T_UNITDATA_IND;
2520 2394                  tudi->SRC_length = sizeof (sin6_t);
2521 2395                  tudi->SRC_offset = sizeof (struct T_unitdata_ind);
2522 2396                  tudi->OPT_offset = sizeof (struct T_unitdata_ind) +
2523 2397                      sizeof (sin6_t);
2524 2398                  udi_size -= (sizeof (struct T_unitdata_ind) + sizeof (sin6_t));
2525 2399                  tudi->OPT_length = udi_size;
2526 2400                  sin6 = (sin6_t *)&tudi[1];
2527 2401                  if (ira->ira_flags & IRAF_IS_IPV4) {
2528 2402                          in6_addr_t v6dst;
2529 2403  
2530 2404                          IN6_IPADDR_TO_V4MAPPED(((ipha_t *)rptr)->ipha_src,
2531 2405                              &sin6->sin6_addr);
2532 2406                          IN6_IPADDR_TO_V4MAPPED(((ipha_t *)rptr)->ipha_dst,
2533 2407                              &v6dst);
2534 2408                          sin6->sin6_flowinfo = 0;
2535 2409                          sin6->sin6_scope_id = 0;
2536 2410                          sin6->__sin6_src_id = ip_srcid_find_addr(&v6dst,
2537 2411                              IPCL_ZONEID(connp), us->us_netstack);
2538 2412                  } else {
2539 2413                          ip6h = (ip6_t *)rptr;
2540 2414  
2541 2415                          sin6->sin6_addr = ip6h->ip6_src;
2542 2416                          /* No sin6_flowinfo per API */
2543 2417                          sin6->sin6_flowinfo = 0;
2544 2418                          /* For link-scope pass up scope id */
2545 2419                          if (IN6_IS_ADDR_LINKSCOPE(&ip6h->ip6_src))
2546 2420                                  sin6->sin6_scope_id = ira->ira_ruifindex;
2547 2421                          else
2548 2422                                  sin6->sin6_scope_id = 0;
2549 2423                          sin6->__sin6_src_id = ip_srcid_find_addr(
2550 2424                              &ip6h->ip6_dst, IPCL_ZONEID(connp),
2551 2425                              us->us_netstack);
2552 2426                  }
2553 2427                  sin6->sin6_port = udpha->uha_src_port;
2554 2428                  sin6->sin6_family = connp->conn_family;
2555 2429  
2556 2430                  if (udi_size != 0) {
2557 2431                          conn_recvancillary_add(connp, recv_ancillary, ira,
2558 2432                              &ipps, (uchar_t *)&sin6[1], udi_size);
2559 2433                  }
2560 2434          }
2561 2435  
2562 2436          /*
2563 2437           * DTrace this UDP input as udp:::receive (this is for IPv4, IPv6 and
2564 2438           * loopback traffic).
2565 2439           */
2566 2440          DTRACE_UDP5(receive, mblk_t *, NULL, ip_xmit_attr_t *, connp->conn_ixa,
2567 2441              void_ip_t *, rptr, udp_t *, udp, udpha_t *, udpha);
2568 2442  
2569 2443          /* Walk past the headers unless IP_RECVHDR was set. */
2570 2444          if (!udp->udp_rcvhdr) {
2571 2445                  mp->b_rptr = rptr + hdr_length;
2572 2446                  pkt_len -= hdr_length;
2573 2447          }
2574 2448  
2575 2449          UDPS_BUMP_MIB(us, udpHCInDatagrams);
2576 2450          udp_ulp_recv(connp, mp1, pkt_len, ira);
2577 2451          return;
2578 2452  
2579 2453  tossit:
2580 2454          freemsg(mp);
2581 2455          UDPS_BUMP_MIB(us, udpInErrors);
2582 2456  }
2583 2457  
2584 2458  /*
2585 2459   * This routine creates a T_UDERROR_IND message and passes it upstream.
2586 2460   * The address and options are copied from the T_UNITDATA_REQ message
2587 2461   * passed in mp.  This message is freed.
2588 2462   */
2589 2463  static void
2590 2464  udp_ud_err(queue_t *q, mblk_t *mp, t_scalar_t err)
2591 2465  {
2592 2466          struct T_unitdata_req *tudr;
2593 2467          mblk_t  *mp1;
2594 2468          uchar_t *destaddr;
2595 2469          t_scalar_t destlen;
2596 2470          uchar_t *optaddr;
2597 2471          t_scalar_t optlen;
2598 2472  
2599 2473          if ((mp->b_wptr < mp->b_rptr) ||
2600 2474              (MBLKL(mp)) < sizeof (struct T_unitdata_req)) {
2601 2475                  goto done;
2602 2476          }
2603 2477          tudr = (struct T_unitdata_req *)mp->b_rptr;
2604 2478          destaddr = mp->b_rptr + tudr->DEST_offset;
2605 2479          if (destaddr < mp->b_rptr || destaddr >= mp->b_wptr ||
2606 2480              destaddr + tudr->DEST_length < mp->b_rptr ||
2607 2481              destaddr + tudr->DEST_length > mp->b_wptr) {
2608 2482                  goto done;
2609 2483          }
2610 2484          optaddr = mp->b_rptr + tudr->OPT_offset;
2611 2485          if (optaddr < mp->b_rptr || optaddr >= mp->b_wptr ||
2612 2486              optaddr + tudr->OPT_length < mp->b_rptr ||
2613 2487              optaddr + tudr->OPT_length > mp->b_wptr) {
2614 2488                  goto done;
2615 2489          }
2616 2490          destlen = tudr->DEST_length;
2617 2491          optlen = tudr->OPT_length;
2618 2492  
2619 2493          mp1 = mi_tpi_uderror_ind((char *)destaddr, destlen,
2620 2494              (char *)optaddr, optlen, err);
2621 2495          if (mp1 != NULL)
2622 2496                  qreply(q, mp1);
2623 2497  
2624 2498  done:
2625 2499          freemsg(mp);
2626 2500  }
2627 2501  
2628 2502  /*
2629 2503   * This routine removes a port number association from a stream.  It
2630 2504   * is called by udp_wput to handle T_UNBIND_REQ messages.
2631 2505   */
2632 2506  static void
2633 2507  udp_tpi_unbind(queue_t *q, mblk_t *mp)
2634 2508  {
2635 2509          conn_t  *connp = Q_TO_CONN(q);
2636 2510          int     error;
2637 2511  
2638 2512          error = udp_do_unbind(connp);
2639 2513          if (error) {
2640 2514                  if (error < 0)
2641 2515                          udp_err_ack(q, mp, -error, 0);
2642 2516                  else
2643 2517                          udp_err_ack(q, mp, TSYSERR, error);
2644 2518                  return;
2645 2519          }
2646 2520  
2647 2521          mp = mi_tpi_ok_ack_alloc(mp);
2648 2522          ASSERT(mp != NULL);
2649 2523          ASSERT(((struct T_ok_ack *)mp->b_rptr)->PRIM_type == T_OK_ACK);
2650 2524          qreply(q, mp);
2651 2525  }
2652 2526  
2653 2527  /*
2654 2528   * Don't let port fall into the privileged range.
2655 2529   * Since the extra privileged ports can be arbitrary we also
2656 2530   * ensure that we exclude those from consideration.
2657 2531   * us->us_epriv_ports is not sorted thus we loop over it until
2658 2532   * there are no changes.
2659 2533   */
2660 2534  static in_port_t
2661 2535  udp_update_next_port(udp_t *udp, in_port_t port, boolean_t random)
2662 2536  {
2663 2537          int i, bump;
2664 2538          in_port_t nextport;
2665 2539          boolean_t restart = B_FALSE;
2666 2540          udp_stack_t *us = udp->udp_us;
2667 2541  
2668 2542          if (random && udp_random_anon_port != 0) {
2669 2543                  (void) random_get_pseudo_bytes((uint8_t *)&port,
2670 2544                      sizeof (in_port_t));
2671 2545                  /*
2672 2546                   * Unless changed by a sys admin, the smallest anon port
2673 2547                   * is 32768 and the largest anon port is 65535.  It is
2674 2548                   * very likely (50%) for the random port to be smaller
2675 2549                   * than the smallest anon port.  When that happens,
2676 2550                   * add port % (anon port range) to the smallest anon
2677 2551                   * port to get the random port.  It should fall into the
2678 2552                   * valid anon port range.
2679 2553                   */
2680 2554                  if ((port < us->us_smallest_anon_port) ||
2681 2555                      (port > us->us_largest_anon_port)) {
2682 2556                          if (us->us_smallest_anon_port ==
2683 2557                              us->us_largest_anon_port) {
2684 2558                                  bump = 0;
2685 2559                          } else {
2686 2560                                  bump = port % (us->us_largest_anon_port -
2687 2561                                      us->us_smallest_anon_port);
2688 2562                          }
2689 2563  
2690 2564                          port = us->us_smallest_anon_port + bump;
2691 2565                  }
2692 2566          }
2693 2567  
2694 2568  retry:
2695 2569          if (port < us->us_smallest_anon_port)
2696 2570                  port = us->us_smallest_anon_port;
2697 2571  
2698 2572          if (port > us->us_largest_anon_port) {
2699 2573                  port = us->us_smallest_anon_port;
2700 2574                  if (restart)
2701 2575                          return (0);
2702 2576                  restart = B_TRUE;
2703 2577          }
2704 2578  
2705 2579          if (port < us->us_smallest_nonpriv_port)
2706 2580                  port = us->us_smallest_nonpriv_port;
2707 2581  
2708 2582          for (i = 0; i < us->us_num_epriv_ports; i++) {
2709 2583                  if (port == us->us_epriv_ports[i]) {
2710 2584                          port++;
2711 2585                          /*
2712 2586                           * Make sure that the port is in the
2713 2587                           * valid range.
2714 2588                           */
2715 2589                          goto retry;
2716 2590                  }
2717 2591          }
2718 2592  
2719 2593          if (is_system_labeled() &&
2720 2594              (nextport = tsol_next_port(crgetzone(udp->udp_connp->conn_cred),
2721 2595              port, IPPROTO_UDP, B_TRUE)) != 0) {
2722 2596                  port = nextport;
2723 2597                  goto retry;
2724 2598          }
2725 2599  
2726 2600          return (port);
2727 2601  }
2728 2602  
2729 2603  /*
2730 2604   * Handle T_UNITDATA_REQ with options. Both IPv4 and IPv6
2731 2605   * Either tudr_mp or msg is set. If tudr_mp we take ancillary data from
2732 2606   * the TPI options, otherwise we take them from msg_control.
2733 2607   * If both sin and sin6 is set it is a connected socket and we use conn_faddr.
2734 2608   * Always consumes mp; never consumes tudr_mp.
2735 2609   */
2736 2610  static int
2737 2611  udp_output_ancillary(conn_t *connp, sin_t *sin, sin6_t *sin6, mblk_t *mp,
2738 2612      mblk_t *tudr_mp, struct nmsghdr *msg, cred_t *cr, pid_t pid)
2739 2613  {
2740 2614          udp_t           *udp = connp->conn_udp;
2741 2615          udp_stack_t     *us = udp->udp_us;
2742 2616          int             error;
2743 2617          ip_xmit_attr_t  *ixa;
2744 2618          ip_pkt_t        *ipp;
2745 2619          in6_addr_t      v6src;
2746 2620          in6_addr_t      v6dst;
2747 2621          in6_addr_t      v6nexthop;
2748 2622          in_port_t       dstport;
2749 2623          uint32_t        flowinfo;
2750 2624          uint_t          srcid;
2751 2625          int             is_absreq_failure = 0;
2752 2626          conn_opt_arg_t  coas, *coa;
2753 2627  
2754 2628          ASSERT(tudr_mp != NULL || msg != NULL);
2755 2629  
2756 2630          /*
2757 2631           * Get ixa before checking state to handle a disconnect race.
2758 2632           *
2759 2633           * We need an exclusive copy of conn_ixa since the ancillary data
2760 2634           * options might modify it. That copy has no pointers hence we
2761 2635           * need to set them up once we've parsed the ancillary data.
2762 2636           */
2763 2637          ixa = conn_get_ixa_exclusive(connp);
2764 2638          if (ixa == NULL) {
2765 2639                  UDPS_BUMP_MIB(us, udpOutErrors);
2766 2640                  freemsg(mp);
2767 2641                  return (ENOMEM);
2768 2642          }
2769 2643          ASSERT(cr != NULL);
2770 2644          ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED));
2771 2645          ixa->ixa_cred = cr;
2772 2646          ixa->ixa_cpid = pid;
2773 2647          if (is_system_labeled()) {
2774 2648                  /* We need to restart with a label based on the cred */
2775 2649                  ip_xmit_attr_restore_tsl(ixa, ixa->ixa_cred);
2776 2650          }
2777 2651  
2778 2652          /* In case previous destination was multicast or multirt */
2779 2653          ip_attr_newdst(ixa);
2780 2654  
2781 2655          /* Get a copy of conn_xmit_ipp since the options might change it */
2782 2656          ipp = kmem_zalloc(sizeof (*ipp), KM_NOSLEEP);
2783 2657          if (ipp == NULL) {
2784 2658                  ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED));
2785 2659                  ixa->ixa_cred = connp->conn_cred;       /* Restore */
2786 2660                  ixa->ixa_cpid = connp->conn_cpid;
2787 2661                  ixa_refrele(ixa);
2788 2662                  UDPS_BUMP_MIB(us, udpOutErrors);
2789 2663                  freemsg(mp);
2790 2664                  return (ENOMEM);
2791 2665          }
2792 2666          mutex_enter(&connp->conn_lock);
2793 2667          error = ip_pkt_copy(&connp->conn_xmit_ipp, ipp, KM_NOSLEEP);
2794 2668          mutex_exit(&connp->conn_lock);
2795 2669          if (error != 0) {
2796 2670                  UDPS_BUMP_MIB(us, udpOutErrors);
2797 2671                  freemsg(mp);
2798 2672                  goto done;
2799 2673          }
2800 2674  
2801 2675          /*
2802 2676           * Parse the options and update ixa and ipp as a result.
2803 2677           * Note that ixa_tsl can be updated if SCM_UCRED.
2804 2678           * ixa_refrele/ixa_inactivate will release any reference on ixa_tsl.
2805 2679           */
2806 2680  
2807 2681          coa = &coas;
2808 2682          coa->coa_connp = connp;
2809 2683          coa->coa_ixa = ixa;
2810 2684          coa->coa_ipp = ipp;
2811 2685          coa->coa_ancillary = B_TRUE;
2812 2686          coa->coa_changed = 0;
2813 2687  
2814 2688          if (msg != NULL) {
2815 2689                  error = process_auxiliary_options(connp, msg->msg_control,
2816 2690                      msg->msg_controllen, coa, &udp_opt_obj, udp_opt_set, cr);
2817 2691          } else {
2818 2692                  struct T_unitdata_req *tudr;
2819 2693  
2820 2694                  tudr = (struct T_unitdata_req *)tudr_mp->b_rptr;
2821 2695                  ASSERT(tudr->PRIM_type == T_UNITDATA_REQ);
2822 2696                  error = tpi_optcom_buf(connp->conn_wq, tudr_mp,
2823 2697                      &tudr->OPT_length, tudr->OPT_offset, cr, &udp_opt_obj,
2824 2698                      coa, &is_absreq_failure);
2825 2699          }
2826 2700          if (error != 0) {
2827 2701                  /*
2828 2702                   * Note: No special action needed in this
2829 2703                   * module for "is_absreq_failure"
2830 2704                   */
2831 2705                  freemsg(mp);
2832 2706                  UDPS_BUMP_MIB(us, udpOutErrors);
2833 2707                  goto done;
2834 2708          }
2835 2709          ASSERT(is_absreq_failure == 0);
2836 2710  
2837 2711          mutex_enter(&connp->conn_lock);
2838 2712          /*
2839 2713           * If laddr is unspecified then we look at sin6_src_id.
2840 2714           * We will give precedence to a source address set with IPV6_PKTINFO
2841 2715           * (aka IPPF_ADDR) but that is handled in build_hdrs. However, we don't
2842 2716           * want ip_attr_connect to select a source (since it can fail) when
2843 2717           * IPV6_PKTINFO is specified.
2844 2718           * If this doesn't result in a source address then we get a source
2845 2719           * from ip_attr_connect() below.
2846 2720           */
2847 2721          v6src = connp->conn_saddr_v6;
2848 2722          if (sin != NULL) {
2849 2723                  IN6_IPADDR_TO_V4MAPPED(sin->sin_addr.s_addr, &v6dst);
2850 2724                  dstport = sin->sin_port;
2851 2725                  flowinfo = 0;
2852 2726                  ixa->ixa_flags &= ~IXAF_SCOPEID_SET;
2853 2727                  ixa->ixa_flags |= IXAF_IS_IPV4;
2854 2728          } else if (sin6 != NULL) {
2855 2729                  boolean_t v4mapped;
2856 2730  
2857 2731                  v6dst = sin6->sin6_addr;
2858 2732                  dstport = sin6->sin6_port;
2859 2733                  flowinfo = sin6->sin6_flowinfo;
2860 2734                  srcid = sin6->__sin6_src_id;
2861 2735                  if (IN6_IS_ADDR_LINKSCOPE(&v6dst) && sin6->sin6_scope_id != 0) {
2862 2736                          ixa->ixa_scopeid = sin6->sin6_scope_id;
2863 2737                          ixa->ixa_flags |= IXAF_SCOPEID_SET;
2864 2738                  } else {
2865 2739                          ixa->ixa_flags &= ~IXAF_SCOPEID_SET;
2866 2740                  }
2867 2741                  v4mapped = IN6_IS_ADDR_V4MAPPED(&v6dst);
2868 2742                  if (v4mapped)
2869 2743                          ixa->ixa_flags |= IXAF_IS_IPV4;
2870 2744                  else
2871 2745                          ixa->ixa_flags &= ~IXAF_IS_IPV4;
2872 2746                  if (srcid != 0 && IN6_IS_ADDR_UNSPECIFIED(&v6src)) {
2873 2747                          if (!ip_srcid_find_id(srcid, &v6src, IPCL_ZONEID(connp),
2874 2748                              v4mapped, connp->conn_netstack)) {
2875 2749                                  /* Mismatch - v4mapped/v6 specified by srcid. */
2876 2750                                  mutex_exit(&connp->conn_lock);
2877 2751                                  error = EADDRNOTAVAIL;
2878 2752                                  goto failed;    /* Does freemsg() and mib. */
2879 2753                          }
2880 2754                  }
2881 2755          } else {
2882 2756                  /* Connected case */
2883 2757                  v6dst = connp->conn_faddr_v6;
2884 2758                  dstport = connp->conn_fport;
2885 2759                  flowinfo = connp->conn_flowinfo;
2886 2760          }
2887 2761          mutex_exit(&connp->conn_lock);
2888 2762  
2889 2763          /* Handle IP_PKTINFO/IPV6_PKTINFO setting source address. */
2890 2764          if (ipp->ipp_fields & IPPF_ADDR) {
2891 2765                  if (ixa->ixa_flags & IXAF_IS_IPV4) {
2892 2766                          if (IN6_IS_ADDR_V4MAPPED(&ipp->ipp_addr))
2893 2767                                  v6src = ipp->ipp_addr;
2894 2768                  } else {
2895 2769                          if (!IN6_IS_ADDR_V4MAPPED(&ipp->ipp_addr))
2896 2770                                  v6src = ipp->ipp_addr;
2897 2771                  }
2898 2772          }
2899 2773  
2900 2774          ip_attr_nexthop(ipp, ixa, &v6dst, &v6nexthop);
2901 2775          error = ip_attr_connect(connp, ixa, &v6src, &v6dst, &v6nexthop, dstport,
2902 2776              &v6src, NULL, IPDF_ALLOW_MCBC | IPDF_VERIFY_DST | IPDF_IPSEC);
2903 2777  
2904 2778          switch (error) {
2905 2779          case 0:
2906 2780                  break;
2907 2781          case EADDRNOTAVAIL:
2908 2782                  /*
2909 2783                   * IXAF_VERIFY_SOURCE tells us to pick a better source.
2910 2784                   * Don't have the application see that errno
2911 2785                   */
2912 2786                  error = ENETUNREACH;
2913 2787                  goto failed;
2914 2788          case ENETDOWN:
2915 2789                  /*
2916 2790                   * Have !ipif_addr_ready address; drop packet silently
2917 2791                   * until we can get applications to not send until we
2918 2792                   * are ready.
2919 2793                   */
2920 2794                  error = 0;
2921 2795                  goto failed;
2922 2796          case EHOSTUNREACH:
2923 2797          case ENETUNREACH:
2924 2798                  if (ixa->ixa_ire != NULL) {
2925 2799                          /*
2926 2800                           * Let conn_ip_output/ire_send_noroute return
2927 2801                           * the error and send any local ICMP error.
2928 2802                           */
2929 2803                          error = 0;
2930 2804                          break;
2931 2805                  }
2932 2806                  /* FALLTHRU */
2933 2807          default:
2934 2808          failed:
2935 2809                  freemsg(mp);
2936 2810                  UDPS_BUMP_MIB(us, udpOutErrors);
2937 2811                  goto done;
2938 2812          }
2939 2813  
2940 2814          /*
2941 2815           * We might be going to a different destination than last time,
2942 2816           * thus check that TX allows the communication and compute any
2943 2817           * needed label.
2944 2818           *
2945 2819           * TSOL Note: We have an exclusive ipp and ixa for this thread so we
2946 2820           * don't have to worry about concurrent threads.
2947 2821           */
2948 2822          if (is_system_labeled()) {
2949 2823                  /* Using UDP MLP requires SCM_UCRED from user */
2950 2824                  if (connp->conn_mlp_type != mlptSingle &&
2951 2825                      !((ixa->ixa_flags & IXAF_UCRED_TSL))) {
2952 2826                          UDPS_BUMP_MIB(us, udpOutErrors);
2953 2827                          error = ECONNREFUSED;
2954 2828                          freemsg(mp);
2955 2829                          goto done;
2956 2830                  }
2957 2831                  /*
2958 2832                   * Check whether Trusted Solaris policy allows communication
2959 2833                   * with this host, and pretend that the destination is
2960 2834                   * unreachable if not.
2961 2835                   * Compute any needed label and place it in ipp_label_v4/v6.
2962 2836                   *
2963 2837                   * Later conn_build_hdr_template/conn_prepend_hdr takes
2964 2838                   * ipp_label_v4/v6 to form the packet.
2965 2839                   *
2966 2840                   * Tsol note: We have ipp structure local to this thread so
2967 2841                   * no locking is needed.
2968 2842                   */
2969 2843                  error = conn_update_label(connp, ixa, &v6dst, ipp);
2970 2844                  if (error != 0) {
2971 2845                          freemsg(mp);
2972 2846                          UDPS_BUMP_MIB(us, udpOutErrors);
2973 2847                          goto done;
2974 2848                  }
2975 2849          }
2976 2850          mp = udp_prepend_hdr(connp, ixa, ipp, &v6src, &v6dst, dstport,
2977 2851              flowinfo, mp, &error);
2978 2852          if (mp == NULL) {
2979 2853                  ASSERT(error != 0);
2980 2854                  UDPS_BUMP_MIB(us, udpOutErrors);
2981 2855                  goto done;
2982 2856          }
2983 2857          if (ixa->ixa_pktlen > IP_MAXPACKET) {
2984 2858                  error = EMSGSIZE;
2985 2859                  UDPS_BUMP_MIB(us, udpOutErrors);
2986 2860                  freemsg(mp);
2987 2861                  goto done;
2988 2862          }
2989 2863          /* We're done.  Pass the packet to ip. */
2990 2864          UDPS_BUMP_MIB(us, udpHCOutDatagrams);
2991 2865  
2992 2866          DTRACE_UDP5(send, mblk_t *, NULL, ip_xmit_attr_t *, ixa,
2993 2867              void_ip_t *, mp->b_rptr, udp_t *, udp, udpha_t *,
2994 2868              &mp->b_rptr[ixa->ixa_ip_hdr_length]);
2995 2869  
2996 2870          error = conn_ip_output(mp, ixa);
2997 2871          /* No udpOutErrors if an error since IP increases its error counter */
2998 2872          switch (error) {
2999 2873          case 0:
3000 2874                  break;
3001 2875          case EWOULDBLOCK:
3002 2876                  (void) ixa_check_drain_insert(connp, ixa);
3003 2877                  error = 0;
3004 2878                  break;
3005 2879          case EADDRNOTAVAIL:
3006 2880                  /*
3007 2881                   * IXAF_VERIFY_SOURCE tells us to pick a better source.
3008 2882                   * Don't have the application see that errno
3009 2883                   */
3010 2884                  error = ENETUNREACH;
3011 2885                  /* FALLTHRU */
3012 2886          default:
3013 2887                  mutex_enter(&connp->conn_lock);
3014 2888                  /*
3015 2889                   * Clear the source and v6lastdst so we call ip_attr_connect
3016 2890                   * for the next packet and try to pick a better source.
3017 2891                   */
3018 2892                  if (connp->conn_mcbc_bind)
3019 2893                          connp->conn_saddr_v6 = ipv6_all_zeros;
3020 2894                  else
3021 2895                          connp->conn_saddr_v6 = connp->conn_bound_addr_v6;
3022 2896                  connp->conn_v6lastdst = ipv6_all_zeros;
3023 2897                  mutex_exit(&connp->conn_lock);
3024 2898                  break;
3025 2899          }
3026 2900  done:
3027 2901          ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED));
3028 2902          ixa->ixa_cred = connp->conn_cred;       /* Restore */
3029 2903          ixa->ixa_cpid = connp->conn_cpid;
3030 2904          ixa_refrele(ixa);
3031 2905          ip_pkt_free(ipp);
3032 2906          kmem_free(ipp, sizeof (*ipp));
3033 2907          return (error);
3034 2908  }
3035 2909  
3036 2910  /*
3037 2911   * Handle sending an M_DATA for a connected socket.
3038 2912   * Handles both IPv4 and IPv6.
3039 2913   */
3040 2914  static int
3041 2915  udp_output_connected(conn_t *connp, mblk_t *mp, cred_t *cr, pid_t pid)
3042 2916  {
3043 2917          udp_t           *udp = connp->conn_udp;
3044 2918          udp_stack_t     *us = udp->udp_us;
3045 2919          int             error;
3046 2920          ip_xmit_attr_t  *ixa;
3047 2921  
3048 2922          /*
3049 2923           * If no other thread is using conn_ixa this just gets a reference to
3050 2924           * conn_ixa. Otherwise we get a safe copy of conn_ixa.
3051 2925           */
3052 2926          ixa = conn_get_ixa(connp, B_FALSE);
3053 2927          if (ixa == NULL) {
3054 2928                  UDPS_BUMP_MIB(us, udpOutErrors);
3055 2929                  freemsg(mp);
3056 2930                  return (ENOMEM);
3057 2931          }
3058 2932  
3059 2933          ASSERT(cr != NULL);
3060 2934          ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED));
3061 2935          ixa->ixa_cred = cr;
3062 2936          ixa->ixa_cpid = pid;
3063 2937  
3064 2938          mutex_enter(&connp->conn_lock);
3065 2939          mp = udp_prepend_header_template(connp, ixa, mp, &connp->conn_saddr_v6,
3066 2940              connp->conn_fport, connp->conn_flowinfo, &error);
3067 2941  
3068 2942          if (mp == NULL) {
3069 2943                  ASSERT(error != 0);
3070 2944                  mutex_exit(&connp->conn_lock);
3071 2945                  ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED));
3072 2946                  ixa->ixa_cred = connp->conn_cred;       /* Restore */
3073 2947                  ixa->ixa_cpid = connp->conn_cpid;
3074 2948                  ixa_refrele(ixa);
3075 2949                  UDPS_BUMP_MIB(us, udpOutErrors);
3076 2950                  freemsg(mp);
3077 2951                  return (error);
3078 2952          }
3079 2953  
3080 2954          /*
3081 2955           * In case we got a safe copy of conn_ixa, or if opt_set made us a new
3082 2956           * safe copy, then we need to fill in any pointers in it.
3083 2957           */
3084 2958          if (ixa->ixa_ire == NULL) {
3085 2959                  in6_addr_t      faddr, saddr;
3086 2960                  in6_addr_t      nexthop;
3087 2961                  in_port_t       fport;
3088 2962  
3089 2963                  saddr = connp->conn_saddr_v6;
3090 2964                  faddr = connp->conn_faddr_v6;
3091 2965                  fport = connp->conn_fport;
3092 2966                  ip_attr_nexthop(&connp->conn_xmit_ipp, ixa, &faddr, &nexthop);
3093 2967                  mutex_exit(&connp->conn_lock);
3094 2968  
3095 2969                  error = ip_attr_connect(connp, ixa, &saddr, &faddr, &nexthop,
3096 2970                      fport, NULL, NULL, IPDF_ALLOW_MCBC | IPDF_VERIFY_DST |
3097 2971                      IPDF_IPSEC);
3098 2972                  switch (error) {
3099 2973                  case 0:
3100 2974                          break;
3101 2975                  case EADDRNOTAVAIL:
3102 2976                          /*
3103 2977                           * IXAF_VERIFY_SOURCE tells us to pick a better source.
3104 2978                           * Don't have the application see that errno
3105 2979                           */
3106 2980                          error = ENETUNREACH;
3107 2981                          goto failed;
3108 2982                  case ENETDOWN:
3109 2983                          /*
3110 2984                           * Have !ipif_addr_ready address; drop packet silently
3111 2985                           * until we can get applications to not send until we
3112 2986                           * are ready.
3113 2987                           */
3114 2988                          error = 0;
3115 2989                          goto failed;
3116 2990                  case EHOSTUNREACH:
3117 2991                  case ENETUNREACH:
3118 2992                          if (ixa->ixa_ire != NULL) {
3119 2993                                  /*
3120 2994                                   * Let conn_ip_output/ire_send_noroute return
3121 2995                                   * the error and send any local ICMP error.
3122 2996                                   */
3123 2997                                  error = 0;
3124 2998                                  break;
3125 2999                          }
3126 3000                          /* FALLTHRU */
3127 3001                  default:
3128 3002                  failed:
3129 3003                          ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED));
3130 3004                          ixa->ixa_cred = connp->conn_cred;       /* Restore */
3131 3005                          ixa->ixa_cpid = connp->conn_cpid;
3132 3006                          ixa_refrele(ixa);
3133 3007                          freemsg(mp);
3134 3008                          UDPS_BUMP_MIB(us, udpOutErrors);
3135 3009                          return (error);
3136 3010                  }
3137 3011          } else {
3138 3012                  /* Done with conn_t */
3139 3013                  mutex_exit(&connp->conn_lock);
3140 3014          }
3141 3015          ASSERT(ixa->ixa_ire != NULL);
3142 3016  
3143 3017          /* We're done.  Pass the packet to ip. */
3144 3018          UDPS_BUMP_MIB(us, udpHCOutDatagrams);
3145 3019  
3146 3020          DTRACE_UDP5(send, mblk_t *, NULL, ip_xmit_attr_t *, ixa,
3147 3021              void_ip_t *, mp->b_rptr, udp_t *, udp, udpha_t *,
3148 3022              &mp->b_rptr[ixa->ixa_ip_hdr_length]);
3149 3023  
3150 3024          error = conn_ip_output(mp, ixa);
3151 3025          /* No udpOutErrors if an error since IP increases its error counter */
3152 3026          switch (error) {
3153 3027          case 0:
3154 3028                  break;
3155 3029          case EWOULDBLOCK:
3156 3030                  (void) ixa_check_drain_insert(connp, ixa);
3157 3031                  error = 0;
3158 3032                  break;
3159 3033          case EADDRNOTAVAIL:
3160 3034                  /*
3161 3035                   * IXAF_VERIFY_SOURCE tells us to pick a better source.
3162 3036                   * Don't have the application see that errno
3163 3037                   */
3164 3038                  error = ENETUNREACH;
3165 3039                  break;
3166 3040          }
3167 3041          ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED));
3168 3042          ixa->ixa_cred = connp->conn_cred;       /* Restore */
3169 3043          ixa->ixa_cpid = connp->conn_cpid;
3170 3044          ixa_refrele(ixa);
3171 3045          return (error);
3172 3046  }
3173 3047  
3174 3048  /*
3175 3049   * Handle sending an M_DATA to the last destination.
3176 3050   * Handles both IPv4 and IPv6.
3177 3051   *
3178 3052   * NOTE: The caller must hold conn_lock and we drop it here.
3179 3053   */
3180 3054  static int
3181 3055  udp_output_lastdst(conn_t *connp, mblk_t *mp, cred_t *cr, pid_t pid,
3182 3056      ip_xmit_attr_t *ixa)
3183 3057  {
3184 3058          udp_t           *udp = connp->conn_udp;
3185 3059          udp_stack_t     *us = udp->udp_us;
3186 3060          int             error;
3187 3061  
3188 3062          ASSERT(MUTEX_HELD(&connp->conn_lock));
3189 3063          ASSERT(ixa != NULL);
3190 3064  
3191 3065          ASSERT(cr != NULL);
3192 3066          ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED));
3193 3067          ixa->ixa_cred = cr;
3194 3068          ixa->ixa_cpid = pid;
3195 3069  
3196 3070          mp = udp_prepend_header_template(connp, ixa, mp, &connp->conn_v6lastsrc,
3197 3071              connp->conn_lastdstport, connp->conn_lastflowinfo, &error);
3198 3072  
3199 3073          if (mp == NULL) {
3200 3074                  ASSERT(error != 0);
3201 3075                  mutex_exit(&connp->conn_lock);
3202 3076                  ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED));
3203 3077                  ixa->ixa_cred = connp->conn_cred;       /* Restore */
3204 3078                  ixa->ixa_cpid = connp->conn_cpid;
3205 3079                  ixa_refrele(ixa);
3206 3080                  UDPS_BUMP_MIB(us, udpOutErrors);
3207 3081                  freemsg(mp);
3208 3082                  return (error);
3209 3083          }
3210 3084  
3211 3085          /*
3212 3086           * In case we got a safe copy of conn_ixa, or if opt_set made us a new
3213 3087           * safe copy, then we need to fill in any pointers in it.
3214 3088           */
3215 3089          if (ixa->ixa_ire == NULL) {
3216 3090                  in6_addr_t      lastdst, lastsrc;
3217 3091                  in6_addr_t      nexthop;
3218 3092                  in_port_t       lastport;
3219 3093  
3220 3094                  lastsrc = connp->conn_v6lastsrc;
3221 3095                  lastdst = connp->conn_v6lastdst;
3222 3096                  lastport = connp->conn_lastdstport;
3223 3097                  ip_attr_nexthop(&connp->conn_xmit_ipp, ixa, &lastdst, &nexthop);
3224 3098                  mutex_exit(&connp->conn_lock);
3225 3099  
3226 3100                  error = ip_attr_connect(connp, ixa, &lastsrc, &lastdst,
3227 3101                      &nexthop, lastport, NULL, NULL, IPDF_ALLOW_MCBC |
3228 3102                      IPDF_VERIFY_DST | IPDF_IPSEC);
3229 3103                  switch (error) {
3230 3104                  case 0:
3231 3105                          break;
3232 3106                  case EADDRNOTAVAIL:
3233 3107                          /*
3234 3108                           * IXAF_VERIFY_SOURCE tells us to pick a better source.
3235 3109                           * Don't have the application see that errno
3236 3110                           */
3237 3111                          error = ENETUNREACH;
3238 3112                          goto failed;
3239 3113                  case ENETDOWN:
3240 3114                          /*
3241 3115                           * Have !ipif_addr_ready address; drop packet silently
3242 3116                           * until we can get applications to not send until we
3243 3117                           * are ready.
3244 3118                           */
3245 3119                          error = 0;
3246 3120                          goto failed;
3247 3121                  case EHOSTUNREACH:
3248 3122                  case ENETUNREACH:
3249 3123                          if (ixa->ixa_ire != NULL) {
3250 3124                                  /*
3251 3125                                   * Let conn_ip_output/ire_send_noroute return
3252 3126                                   * the error and send any local ICMP error.
3253 3127                                   */
3254 3128                                  error = 0;
3255 3129                                  break;
3256 3130                          }
3257 3131                          /* FALLTHRU */
3258 3132                  default:
3259 3133                  failed:
3260 3134                          ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED));
3261 3135                          ixa->ixa_cred = connp->conn_cred;       /* Restore */
3262 3136                          ixa->ixa_cpid = connp->conn_cpid;
3263 3137                          ixa_refrele(ixa);
3264 3138                          freemsg(mp);
3265 3139                          UDPS_BUMP_MIB(us, udpOutErrors);
3266 3140                          return (error);
3267 3141                  }
3268 3142          } else {
3269 3143                  /* Done with conn_t */
3270 3144                  mutex_exit(&connp->conn_lock);
3271 3145          }
3272 3146  
3273 3147          /* We're done.  Pass the packet to ip. */
3274 3148          UDPS_BUMP_MIB(us, udpHCOutDatagrams);
3275 3149  
3276 3150          DTRACE_UDP5(send, mblk_t *, NULL, ip_xmit_attr_t *, ixa,
3277 3151              void_ip_t *, mp->b_rptr, udp_t *, udp, udpha_t *,
3278 3152              &mp->b_rptr[ixa->ixa_ip_hdr_length]);
3279 3153  
3280 3154          error = conn_ip_output(mp, ixa);
3281 3155          /* No udpOutErrors if an error since IP increases its error counter */
3282 3156          switch (error) {
3283 3157          case 0:
3284 3158                  break;
3285 3159          case EWOULDBLOCK:
3286 3160                  (void) ixa_check_drain_insert(connp, ixa);
3287 3161                  error = 0;
3288 3162                  break;
3289 3163          case EADDRNOTAVAIL:
3290 3164                  /*
3291 3165                   * IXAF_VERIFY_SOURCE tells us to pick a better source.
3292 3166                   * Don't have the application see that errno
3293 3167                   */
3294 3168                  error = ENETUNREACH;
3295 3169                  /* FALLTHRU */
3296 3170          default:
3297 3171                  mutex_enter(&connp->conn_lock);
3298 3172                  /*
3299 3173                   * Clear the source and v6lastdst so we call ip_attr_connect
3300 3174                   * for the next packet and try to pick a better source.
3301 3175                   */
3302 3176                  if (connp->conn_mcbc_bind)
3303 3177                          connp->conn_saddr_v6 = ipv6_all_zeros;
3304 3178                  else
3305 3179                          connp->conn_saddr_v6 = connp->conn_bound_addr_v6;
3306 3180                  connp->conn_v6lastdst = ipv6_all_zeros;
3307 3181                  mutex_exit(&connp->conn_lock);
3308 3182                  break;
3309 3183          }
3310 3184          ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED));
3311 3185          ixa->ixa_cred = connp->conn_cred;       /* Restore */
3312 3186          ixa->ixa_cpid = connp->conn_cpid;
3313 3187          ixa_refrele(ixa);
3314 3188          return (error);
3315 3189  }
3316 3190  
3317 3191  
3318 3192  /*
3319 3193   * Prepend the header template and then fill in the source and
3320 3194   * flowinfo. The caller needs to handle the destination address since
3321 3195   * it's setting is different if rthdr or source route.
3322 3196   *
  
    | 
      ↓ open down ↓ | 
    1154 lines elided | 
    
      ↑ open up ↑ | 
  
3323 3197   * Returns NULL is allocation failed or if the packet would exceed IP_MAXPACKET.
3324 3198   * When it returns NULL it sets errorp.
3325 3199   */
3326 3200  static mblk_t *
3327 3201  udp_prepend_header_template(conn_t *connp, ip_xmit_attr_t *ixa, mblk_t *mp,
3328 3202      const in6_addr_t *v6src, in_port_t dstport, uint32_t flowinfo, int *errorp)
3329 3203  {
3330 3204          udp_t           *udp = connp->conn_udp;
3331 3205          udp_stack_t     *us = udp->udp_us;
3332 3206          boolean_t       insert_spi = udp->udp_nat_t_endpoint;
3333      -        boolean_t       hash_srcport = udp->udp_vxlanhash;
3334 3207          uint_t          pktlen;
3335 3208          uint_t          alloclen;
3336 3209          uint_t          copylen;
3337 3210          uint8_t         *iph;
3338 3211          uint_t          ip_hdr_length;
3339 3212          udpha_t         *udpha;
3340 3213          uint32_t        cksum;
3341 3214          ip_pkt_t        *ipp;
3342      -        uint16_t        srcport;
3343 3215  
3344 3216          ASSERT(MUTEX_HELD(&connp->conn_lock));
3345 3217  
3346 3218          /*
3347      -         * If we have source port hashing going on, determine the hash before
3348      -         * we modify the mblk_t.
3349      -         */
3350      -        if (hash_srcport == B_TRUE) {
3351      -                srcport = udp_srcport_hash(mp, UDP_HASH_VXLAN,
3352      -                    IPPORT_DYNAMIC_MIN, IPPORT_DYNAMIC_MAX,
3353      -                    ntohs(connp->conn_lport));
3354      -        }
3355      -
3356      -        /*
3357 3219           * Copy the header template and leave space for an SPI
3358 3220           */
3359 3221          copylen = connp->conn_ht_iphc_len;
3360 3222          alloclen = copylen + (insert_spi ? sizeof (uint32_t) : 0);
3361 3223          pktlen = alloclen + msgdsize(mp);
3362 3224          if (pktlen > IP_MAXPACKET) {
3363 3225                  freemsg(mp);
3364 3226                  *errorp = EMSGSIZE;
3365 3227                  return (NULL);
3366 3228          }
3367 3229          ixa->ixa_pktlen = pktlen;
3368 3230  
3369 3231          /* check/fix buffer config, setup pointers into it */
3370 3232          iph = mp->b_rptr - alloclen;
3371 3233          if (DB_REF(mp) != 1 || iph < DB_BASE(mp) || !OK_32PTR(iph)) {
3372 3234                  mblk_t *mp1;
3373 3235  
3374 3236                  mp1 = allocb(alloclen + us->us_wroff_extra, BPRI_MED);
3375 3237                  if (mp1 == NULL) {
3376 3238                          freemsg(mp);
3377 3239                          *errorp = ENOMEM;
3378 3240                          return (NULL);
3379 3241                  }
3380 3242                  mp1->b_wptr = DB_LIM(mp1);
3381 3243                  mp1->b_cont = mp;
3382 3244                  mp = mp1;
3383 3245                  iph = (mp->b_wptr - alloclen);
3384 3246          }
3385 3247          mp->b_rptr = iph;
3386 3248          bcopy(connp->conn_ht_iphc, iph, copylen);
3387 3249          ip_hdr_length = (uint_t)(connp->conn_ht_ulp - connp->conn_ht_iphc);
3388 3250  
3389 3251          ixa->ixa_ip_hdr_length = ip_hdr_length;
3390 3252          udpha = (udpha_t *)(iph + ip_hdr_length);
3391 3253  
3392 3254          /*
3393 3255           * Setup header length and prepare for ULP checksum done in IP.
3394 3256           * udp_build_hdr_template has already massaged any routing header
3395 3257           * and placed the result in conn_sum.
3396 3258           *
3397 3259           * We make it easy for IP to include our pseudo header
3398 3260           * by putting our length in uha_checksum.
3399 3261           */
3400 3262          cksum = pktlen - ip_hdr_length;
3401 3263          udpha->uha_length = htons(cksum);
3402 3264  
3403 3265          cksum += connp->conn_sum;
3404 3266          cksum = (cksum >> 16) + (cksum & 0xFFFF);
3405 3267          ASSERT(cksum < 0x10000);
3406 3268  
3407 3269          ipp = &connp->conn_xmit_ipp;
3408 3270          if (ixa->ixa_flags & IXAF_IS_IPV4) {
3409 3271                  ipha_t  *ipha = (ipha_t *)iph;
3410 3272  
3411 3273                  ipha->ipha_length = htons((uint16_t)pktlen);
3412 3274  
3413 3275                  /* IP does the checksum if uha_checksum is non-zero */
3414 3276                  if (us->us_do_checksum)
3415 3277                          udpha->uha_checksum = htons(cksum);
3416 3278  
3417 3279                  /* if IP_PKTINFO specified an addres it wins over bind() */
3418 3280                  if ((ipp->ipp_fields & IPPF_ADDR) &&
3419 3281                      IN6_IS_ADDR_V4MAPPED(&ipp->ipp_addr)) {
3420 3282                          ASSERT(ipp->ipp_addr_v4 != INADDR_ANY);
3421 3283                          ipha->ipha_src = ipp->ipp_addr_v4;
3422 3284                  } else {
3423 3285                          IN6_V4MAPPED_TO_IPADDR(v6src, ipha->ipha_src);
3424 3286                  }
3425 3287          } else {
3426 3288                  ip6_t *ip6h = (ip6_t *)iph;
3427 3289  
3428 3290                  ip6h->ip6_plen =  htons((uint16_t)(pktlen - IPV6_HDR_LEN));
3429 3291                  udpha->uha_checksum = htons(cksum);
3430 3292  
3431 3293                  /* if IP_PKTINFO specified an addres it wins over bind() */
3432 3294                  if ((ipp->ipp_fields & IPPF_ADDR) &&
3433 3295                      !IN6_IS_ADDR_V4MAPPED(&ipp->ipp_addr)) {
3434 3296                          ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&ipp->ipp_addr));
3435 3297                          ip6h->ip6_src = ipp->ipp_addr;
3436 3298                  } else {
3437 3299                          ip6h->ip6_src = *v6src;
3438 3300                  }
3439 3301                  ip6h->ip6_vcf =
3440 3302                      (IPV6_DEFAULT_VERS_AND_FLOW & IPV6_VERS_AND_FLOW_MASK) |
3441 3303                      (flowinfo & ~IPV6_VERS_AND_FLOW_MASK);
3442 3304                  if (ipp->ipp_fields & IPPF_TCLASS) {
3443 3305                          /* Overrides the class part of flowinfo */
  
    | 
      ↓ open down ↓ | 
    77 lines elided | 
    
      ↑ open up ↑ | 
  
3444 3306                          ip6h->ip6_vcf = IPV6_TCLASS_FLOW(ip6h->ip6_vcf,
3445 3307                              ipp->ipp_tclass);
3446 3308                  }
3447 3309          }
3448 3310  
3449 3311          /* Insert all-0s SPI now. */
3450 3312          if (insert_spi)
3451 3313                  *((uint32_t *)(udpha + 1)) = 0;
3452 3314  
3453 3315          udpha->uha_dst_port = dstport;
3454      -        if (hash_srcport == B_TRUE)
3455      -                udpha->uha_src_port = htons(srcport);
3456      -
3457 3316          return (mp);
3458 3317  }
3459 3318  
3460 3319  /*
3461 3320   * Send a T_UDERR_IND in response to an M_DATA
3462 3321   */
3463 3322  static void
3464 3323  udp_ud_err_connected(conn_t *connp, t_scalar_t error)
3465 3324  {
3466 3325          struct sockaddr_storage ss;
3467 3326          sin_t           *sin;
3468 3327          sin6_t          *sin6;
3469 3328          struct sockaddr *addr;
3470 3329          socklen_t       addrlen;
3471 3330          mblk_t          *mp1;
3472 3331  
3473 3332          mutex_enter(&connp->conn_lock);
3474 3333          /* Initialize addr and addrlen as if they're passed in */
3475 3334          if (connp->conn_family == AF_INET) {
3476 3335                  sin = (sin_t *)&ss;
3477 3336                  *sin = sin_null;
3478 3337                  sin->sin_family = AF_INET;
3479 3338                  sin->sin_port = connp->conn_fport;
3480 3339                  sin->sin_addr.s_addr = connp->conn_faddr_v4;
3481 3340                  addr = (struct sockaddr *)sin;
3482 3341                  addrlen = sizeof (*sin);
3483 3342          } else {
3484 3343                  sin6 = (sin6_t *)&ss;
3485 3344                  *sin6 = sin6_null;
3486 3345                  sin6->sin6_family = AF_INET6;
3487 3346                  sin6->sin6_port = connp->conn_fport;
3488 3347                  sin6->sin6_flowinfo = connp->conn_flowinfo;
3489 3348                  sin6->sin6_addr = connp->conn_faddr_v6;
3490 3349                  if (IN6_IS_ADDR_LINKSCOPE(&connp->conn_faddr_v6) &&
3491 3350                      (connp->conn_ixa->ixa_flags & IXAF_SCOPEID_SET)) {
3492 3351                          sin6->sin6_scope_id = connp->conn_ixa->ixa_scopeid;
3493 3352                  } else {
3494 3353                          sin6->sin6_scope_id = 0;
3495 3354                  }
3496 3355                  sin6->__sin6_src_id = 0;
3497 3356                  addr = (struct sockaddr *)sin6;
3498 3357                  addrlen = sizeof (*sin6);
3499 3358          }
3500 3359          mutex_exit(&connp->conn_lock);
3501 3360  
3502 3361          mp1 = mi_tpi_uderror_ind((char *)addr, addrlen, NULL, 0, error);
3503 3362          if (mp1 != NULL)
3504 3363                  putnext(connp->conn_rq, mp1);
3505 3364  }
3506 3365  
3507 3366  /*
3508 3367   * This routine handles all messages passed downstream.  It either
3509 3368   * consumes the message or passes it downstream; it never queues a
3510 3369   * a message.
3511 3370   *
3512 3371   * Also entry point for sockfs when udp is in "direct sockfs" mode.  This mode
3513 3372   * is valid when we are directly beneath the stream head, and thus sockfs
3514 3373   * is able to bypass STREAMS and directly call us, passing along the sockaddr
3515 3374   * structure without the cumbersome T_UNITDATA_REQ interface for the case of
3516 3375   * connected endpoints.
3517 3376   */
3518 3377  void
3519 3378  udp_wput(queue_t *q, mblk_t *mp)
3520 3379  {
3521 3380          sin6_t          *sin6;
3522 3381          sin_t           *sin = NULL;
3523 3382          uint_t          srcid;
3524 3383          conn_t          *connp = Q_TO_CONN(q);
3525 3384          udp_t           *udp = connp->conn_udp;
3526 3385          int             error = 0;
3527 3386          struct sockaddr *addr = NULL;
3528 3387          socklen_t       addrlen;
3529 3388          udp_stack_t     *us = udp->udp_us;
3530 3389          struct T_unitdata_req *tudr;
3531 3390          mblk_t          *data_mp;
3532 3391          ushort_t        ipversion;
3533 3392          cred_t          *cr;
3534 3393          pid_t           pid;
3535 3394  
3536 3395          /*
3537 3396           * We directly handle several cases here: T_UNITDATA_REQ message
3538 3397           * coming down as M_PROTO/M_PCPROTO and M_DATA messages for connected
3539 3398           * socket.
3540 3399           */
3541 3400          switch (DB_TYPE(mp)) {
3542 3401          case M_DATA:
3543 3402                  if (!udp->udp_issocket || udp->udp_state != TS_DATA_XFER) {
3544 3403                          /* Not connected; address is required */
3545 3404                          UDPS_BUMP_MIB(us, udpOutErrors);
3546 3405                          UDP_DBGSTAT(us, udp_data_notconn);
3547 3406                          UDP_STAT(us, udp_out_err_notconn);
3548 3407                          freemsg(mp);
3549 3408                          return;
3550 3409                  }
3551 3410                  /*
3552 3411                   * All Solaris components should pass a db_credp
3553 3412                   * for this message, hence we ASSERT.
3554 3413                   * On production kernels we return an error to be robust against
3555 3414                   * random streams modules sitting on top of us.
3556 3415                   */
3557 3416                  cr = msg_getcred(mp, &pid);
3558 3417                  ASSERT(cr != NULL);
3559 3418                  if (cr == NULL) {
3560 3419                          UDPS_BUMP_MIB(us, udpOutErrors);
3561 3420                          freemsg(mp);
3562 3421                          return;
3563 3422                  }
3564 3423                  ASSERT(udp->udp_issocket);
3565 3424                  UDP_DBGSTAT(us, udp_data_conn);
3566 3425                  error = udp_output_connected(connp, mp, cr, pid);
3567 3426                  if (error != 0) {
3568 3427                          UDP_STAT(us, udp_out_err_output);
3569 3428                          if (connp->conn_rq != NULL)
3570 3429                                  udp_ud_err_connected(connp, (t_scalar_t)error);
3571 3430  #ifdef DEBUG
3572 3431                          printf("udp_output_connected returned %d\n", error);
3573 3432  #endif
3574 3433                  }
3575 3434                  return;
3576 3435  
3577 3436          case M_PROTO:
3578 3437          case M_PCPROTO:
3579 3438                  tudr = (struct T_unitdata_req *)mp->b_rptr;
3580 3439                  if (MBLKL(mp) < sizeof (*tudr) ||
3581 3440                      ((t_primp_t)mp->b_rptr)->type != T_UNITDATA_REQ) {
3582 3441                          udp_wput_other(q, mp);
3583 3442                          return;
3584 3443                  }
3585 3444                  break;
3586 3445  
3587 3446          default:
3588 3447                  udp_wput_other(q, mp);
3589 3448                  return;
3590 3449          }
3591 3450  
3592 3451          /* Handle valid T_UNITDATA_REQ here */
3593 3452          data_mp = mp->b_cont;
3594 3453          if (data_mp == NULL) {
3595 3454                  error = EPROTO;
3596 3455                  goto ud_error2;
3597 3456          }
3598 3457          mp->b_cont = NULL;
3599 3458  
3600 3459          if (!MBLKIN(mp, 0, tudr->DEST_offset + tudr->DEST_length)) {
3601 3460                  error = EADDRNOTAVAIL;
3602 3461                  goto ud_error2;
3603 3462          }
3604 3463  
3605 3464          /*
3606 3465           * All Solaris components should pass a db_credp
3607 3466           * for this TPI message, hence we should ASSERT.
3608 3467           * However, RPC (svc_clts_ksend) does this odd thing where it
3609 3468           * passes the options from a T_UNITDATA_IND unchanged in a
3610 3469           * T_UNITDATA_REQ. While that is the right thing to do for
3611 3470           * some options, SCM_UCRED being the key one, this also makes it
3612 3471           * pass down IP_RECVDSTADDR. Hence we can't ASSERT here.
3613 3472           */
3614 3473          cr = msg_getcred(mp, &pid);
3615 3474          if (cr == NULL) {
3616 3475                  cr = connp->conn_cred;
3617 3476                  pid = connp->conn_cpid;
3618 3477          }
3619 3478  
3620 3479          /*
3621 3480           * If a port has not been bound to the stream, fail.
3622 3481           * This is not a problem when sockfs is directly
3623 3482           * above us, because it will ensure that the socket
3624 3483           * is first bound before allowing data to be sent.
3625 3484           */
3626 3485          if (udp->udp_state == TS_UNBND) {
3627 3486                  error = EPROTO;
3628 3487                  goto ud_error2;
3629 3488          }
3630 3489          addr = (struct sockaddr *)&mp->b_rptr[tudr->DEST_offset];
3631 3490          addrlen = tudr->DEST_length;
3632 3491  
3633 3492          switch (connp->conn_family) {
3634 3493          case AF_INET6:
3635 3494                  sin6 = (sin6_t *)addr;
3636 3495                  if (!OK_32PTR((char *)sin6) || (addrlen != sizeof (sin6_t)) ||
3637 3496                      (sin6->sin6_family != AF_INET6)) {
3638 3497                          error = EADDRNOTAVAIL;
3639 3498                          goto ud_error2;
3640 3499                  }
3641 3500  
3642 3501                  srcid = sin6->__sin6_src_id;
3643 3502                  if (!IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
3644 3503                          /*
3645 3504                           * Destination is a non-IPv4-compatible IPv6 address.
3646 3505                           * Send out an IPv6 format packet.
3647 3506                           */
3648 3507  
3649 3508                          /*
3650 3509                           * If the local address is a mapped address return
3651 3510                           * an error.
3652 3511                           * It would be possible to send an IPv6 packet but the
3653 3512                           * response would never make it back to the application
3654 3513                           * since it is bound to a mapped address.
3655 3514                           */
3656 3515                          if (IN6_IS_ADDR_V4MAPPED(&connp->conn_saddr_v6)) {
3657 3516                                  error = EADDRNOTAVAIL;
3658 3517                                  goto ud_error2;
3659 3518                          }
3660 3519  
3661 3520                          UDP_DBGSTAT(us, udp_out_ipv6);
3662 3521  
3663 3522                          if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr))
3664 3523                                  sin6->sin6_addr = ipv6_loopback;
3665 3524                          ipversion = IPV6_VERSION;
3666 3525                  } else {
3667 3526                          if (connp->conn_ipv6_v6only) {
3668 3527                                  error = EADDRNOTAVAIL;
3669 3528                                  goto ud_error2;
3670 3529                          }
3671 3530  
3672 3531                          /*
3673 3532                           * If the local address is not zero or a mapped address
3674 3533                           * return an error.  It would be possible to send an
3675 3534                           * IPv4 packet but the response would never make it
3676 3535                           * back to the application since it is bound to a
3677 3536                           * non-mapped address.
3678 3537                           */
3679 3538                          if (!IN6_IS_ADDR_V4MAPPED(&connp->conn_saddr_v6) &&
3680 3539                              !IN6_IS_ADDR_UNSPECIFIED(&connp->conn_saddr_v6)) {
3681 3540                                  error = EADDRNOTAVAIL;
3682 3541                                  goto ud_error2;
3683 3542                          }
3684 3543                          UDP_DBGSTAT(us, udp_out_mapped);
3685 3544  
3686 3545                          if (V4_PART_OF_V6(sin6->sin6_addr) == INADDR_ANY) {
3687 3546                                  V4_PART_OF_V6(sin6->sin6_addr) =
3688 3547                                      htonl(INADDR_LOOPBACK);
3689 3548                          }
3690 3549                          ipversion = IPV4_VERSION;
3691 3550                  }
3692 3551  
3693 3552                  if (tudr->OPT_length != 0) {
3694 3553                          /*
3695 3554                           * If we are connected then the destination needs to be
3696 3555                           * the same as the connected one.
3697 3556                           */
3698 3557                          if (udp->udp_state == TS_DATA_XFER &&
3699 3558                              !conn_same_as_last_v6(connp, sin6)) {
3700 3559                                  error = EISCONN;
3701 3560                                  goto ud_error2;
3702 3561                          }
3703 3562                          UDP_STAT(us, udp_out_opt);
3704 3563                          error = udp_output_ancillary(connp, NULL, sin6,
3705 3564                              data_mp, mp, NULL, cr, pid);
3706 3565                  } else {
3707 3566                          ip_xmit_attr_t *ixa;
3708 3567  
3709 3568                          /*
3710 3569                           * We have to allocate an ip_xmit_attr_t before we grab
3711 3570                           * conn_lock and we need to hold conn_lock once we've
3712 3571                           * checked conn_same_as_last_v6 to handle concurrent
3713 3572                           * send* calls on a socket.
3714 3573                           */
3715 3574                          ixa = conn_get_ixa(connp, B_FALSE);
3716 3575                          if (ixa == NULL) {
3717 3576                                  error = ENOMEM;
3718 3577                                  goto ud_error2;
3719 3578                          }
3720 3579                          mutex_enter(&connp->conn_lock);
3721 3580  
3722 3581                          if (conn_same_as_last_v6(connp, sin6) &&
3723 3582                              connp->conn_lastsrcid == srcid &&
3724 3583                              ipsec_outbound_policy_current(ixa)) {
3725 3584                                  UDP_DBGSTAT(us, udp_out_lastdst);
3726 3585                                  /* udp_output_lastdst drops conn_lock */
3727 3586                                  error = udp_output_lastdst(connp, data_mp, cr,
3728 3587                                      pid, ixa);
3729 3588                          } else {
3730 3589                                  UDP_DBGSTAT(us, udp_out_diffdst);
3731 3590                                  /* udp_output_newdst drops conn_lock */
3732 3591                                  error = udp_output_newdst(connp, data_mp, NULL,
3733 3592                                      sin6, ipversion, cr, pid, ixa);
3734 3593                          }
3735 3594                          ASSERT(MUTEX_NOT_HELD(&connp->conn_lock));
3736 3595                  }
3737 3596                  if (error == 0) {
3738 3597                          freeb(mp);
3739 3598                          return;
3740 3599                  }
3741 3600                  break;
3742 3601  
3743 3602          case AF_INET:
3744 3603                  sin = (sin_t *)addr;
3745 3604                  if ((!OK_32PTR((char *)sin) || addrlen != sizeof (sin_t)) ||
3746 3605                      (sin->sin_family != AF_INET)) {
3747 3606                          error = EADDRNOTAVAIL;
3748 3607                          goto ud_error2;
3749 3608                  }
3750 3609                  UDP_DBGSTAT(us, udp_out_ipv4);
3751 3610                  if (sin->sin_addr.s_addr == INADDR_ANY)
3752 3611                          sin->sin_addr.s_addr = htonl(INADDR_LOOPBACK);
3753 3612                  ipversion = IPV4_VERSION;
3754 3613  
3755 3614                  srcid = 0;
3756 3615                  if (tudr->OPT_length != 0) {
3757 3616                          /*
3758 3617                           * If we are connected then the destination needs to be
3759 3618                           * the same as the connected one.
3760 3619                           */
3761 3620                          if (udp->udp_state == TS_DATA_XFER &&
3762 3621                              !conn_same_as_last_v4(connp, sin)) {
3763 3622                                  error = EISCONN;
3764 3623                                  goto ud_error2;
3765 3624                          }
3766 3625                          UDP_STAT(us, udp_out_opt);
3767 3626                          error = udp_output_ancillary(connp, sin, NULL,
3768 3627                              data_mp, mp, NULL, cr, pid);
3769 3628                  } else {
3770 3629                          ip_xmit_attr_t *ixa;
3771 3630  
3772 3631                          /*
3773 3632                           * We have to allocate an ip_xmit_attr_t before we grab
3774 3633                           * conn_lock and we need to hold conn_lock once we've
3775 3634                           * checked conn_same_as_last_v4 to handle concurrent
3776 3635                           * send* calls on a socket.
3777 3636                           */
3778 3637                          ixa = conn_get_ixa(connp, B_FALSE);
3779 3638                          if (ixa == NULL) {
3780 3639                                  error = ENOMEM;
3781 3640                                  goto ud_error2;
3782 3641                          }
3783 3642                          mutex_enter(&connp->conn_lock);
3784 3643  
3785 3644                          if (conn_same_as_last_v4(connp, sin) &&
3786 3645                              ipsec_outbound_policy_current(ixa)) {
3787 3646                                  UDP_DBGSTAT(us, udp_out_lastdst);
3788 3647                                  /* udp_output_lastdst drops conn_lock */
3789 3648                                  error = udp_output_lastdst(connp, data_mp, cr,
3790 3649                                      pid, ixa);
3791 3650                          } else {
3792 3651                                  UDP_DBGSTAT(us, udp_out_diffdst);
3793 3652                                  /* udp_output_newdst drops conn_lock */
3794 3653                                  error = udp_output_newdst(connp, data_mp, sin,
3795 3654                                      NULL, ipversion, cr, pid, ixa);
3796 3655                          }
3797 3656                          ASSERT(MUTEX_NOT_HELD(&connp->conn_lock));
3798 3657                  }
3799 3658                  if (error == 0) {
3800 3659                          freeb(mp);
3801 3660                          return;
3802 3661                  }
3803 3662                  break;
3804 3663          }
3805 3664          UDP_STAT(us, udp_out_err_output);
3806 3665          ASSERT(mp != NULL);
3807 3666          /* mp is freed by the following routine */
3808 3667          udp_ud_err(q, mp, (t_scalar_t)error);
3809 3668          return;
3810 3669  
3811 3670  ud_error2:
3812 3671          UDPS_BUMP_MIB(us, udpOutErrors);
3813 3672          freemsg(data_mp);
3814 3673          UDP_STAT(us, udp_out_err_output);
3815 3674          ASSERT(mp != NULL);
3816 3675          /* mp is freed by the following routine */
3817 3676          udp_ud_err(q, mp, (t_scalar_t)error);
3818 3677  }
3819 3678  
3820 3679  /*
3821 3680   * Handle the case of the IP address, port, flow label being different
3822 3681   * for both IPv4 and IPv6.
3823 3682   *
3824 3683   * NOTE: The caller must hold conn_lock and we drop it here.
3825 3684   */
3826 3685  static int
3827 3686  udp_output_newdst(conn_t *connp, mblk_t *data_mp, sin_t *sin, sin6_t *sin6,
3828 3687      ushort_t ipversion, cred_t *cr, pid_t pid, ip_xmit_attr_t *ixa)
3829 3688  {
3830 3689          uint_t          srcid;
3831 3690          uint32_t        flowinfo;
3832 3691          udp_t           *udp = connp->conn_udp;
3833 3692          int             error = 0;
3834 3693          ip_xmit_attr_t  *oldixa;
3835 3694          udp_stack_t     *us = udp->udp_us;
3836 3695          in6_addr_t      v6src;
3837 3696          in6_addr_t      v6dst;
3838 3697          in6_addr_t      v6nexthop;
3839 3698          in_port_t       dstport;
3840 3699  
3841 3700          ASSERT(MUTEX_HELD(&connp->conn_lock));
3842 3701          ASSERT(ixa != NULL);
3843 3702          /*
3844 3703           * We hold conn_lock across all the use and modifications of
3845 3704           * the conn_lastdst, conn_ixa, and conn_xmit_ipp to ensure that they
3846 3705           * stay consistent.
3847 3706           */
3848 3707  
3849 3708          ASSERT(cr != NULL);
3850 3709          ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED));
3851 3710          ixa->ixa_cred = cr;
3852 3711          ixa->ixa_cpid = pid;
3853 3712          if (is_system_labeled()) {
3854 3713                  /* We need to restart with a label based on the cred */
3855 3714                  ip_xmit_attr_restore_tsl(ixa, ixa->ixa_cred);
3856 3715          }
3857 3716  
3858 3717          /*
3859 3718           * If we are connected then the destination needs to be the
3860 3719           * same as the connected one, which is not the case here since we
3861 3720           * checked for that above.
3862 3721           */
3863 3722          if (udp->udp_state == TS_DATA_XFER) {
3864 3723                  mutex_exit(&connp->conn_lock);
3865 3724                  error = EISCONN;
3866 3725                  goto ud_error;
3867 3726          }
3868 3727  
3869 3728          /* In case previous destination was multicast or multirt */
3870 3729          ip_attr_newdst(ixa);
3871 3730  
3872 3731          /*
3873 3732           * If laddr is unspecified then we look at sin6_src_id.
3874 3733           * We will give precedence to a source address set with IPV6_PKTINFO
3875 3734           * (aka IPPF_ADDR) but that is handled in build_hdrs. However, we don't
3876 3735           * want ip_attr_connect to select a source (since it can fail) when
3877 3736           * IPV6_PKTINFO is specified.
3878 3737           * If this doesn't result in a source address then we get a source
3879 3738           * from ip_attr_connect() below.
3880 3739           */
3881 3740          v6src = connp->conn_saddr_v6;
3882 3741          if (sin != NULL) {
3883 3742                  IN6_IPADDR_TO_V4MAPPED(sin->sin_addr.s_addr, &v6dst);
3884 3743                  dstport = sin->sin_port;
3885 3744                  flowinfo = 0;
3886 3745                  /* Don't bother with ip_srcid_find_id(), but indicate anyway. */
3887 3746                  srcid = 0;
3888 3747                  ixa->ixa_flags &= ~IXAF_SCOPEID_SET;
3889 3748                  ixa->ixa_flags |= IXAF_IS_IPV4;
3890 3749          } else {
3891 3750                  boolean_t v4mapped;
3892 3751  
3893 3752                  v6dst = sin6->sin6_addr;
3894 3753                  dstport = sin6->sin6_port;
3895 3754                  flowinfo = sin6->sin6_flowinfo;
3896 3755                  srcid = sin6->__sin6_src_id;
3897 3756                  if (IN6_IS_ADDR_LINKSCOPE(&v6dst) && sin6->sin6_scope_id != 0) {
3898 3757                          ixa->ixa_scopeid = sin6->sin6_scope_id;
3899 3758                          ixa->ixa_flags |= IXAF_SCOPEID_SET;
3900 3759                  } else {
3901 3760                          ixa->ixa_flags &= ~IXAF_SCOPEID_SET;
3902 3761                  }
3903 3762                  v4mapped = IN6_IS_ADDR_V4MAPPED(&v6dst);
3904 3763                  if (v4mapped)
3905 3764                          ixa->ixa_flags |= IXAF_IS_IPV4;
3906 3765                  else
3907 3766                          ixa->ixa_flags &= ~IXAF_IS_IPV4;
3908 3767                  if (srcid != 0 && IN6_IS_ADDR_UNSPECIFIED(&v6src)) {
3909 3768                          if (!ip_srcid_find_id(srcid, &v6src, IPCL_ZONEID(connp),
3910 3769                              v4mapped, connp->conn_netstack)) {
3911 3770                                  /* Mismatched v4mapped/v6 specified by srcid. */
3912 3771                                  mutex_exit(&connp->conn_lock);
3913 3772                                  error = EADDRNOTAVAIL;
3914 3773                                  goto ud_error;
3915 3774                          }
3916 3775                  }
3917 3776          }
3918 3777          /* Handle IP_PKTINFO/IPV6_PKTINFO setting source address. */
3919 3778          if (connp->conn_xmit_ipp.ipp_fields & IPPF_ADDR) {
3920 3779                  ip_pkt_t *ipp = &connp->conn_xmit_ipp;
3921 3780  
3922 3781                  if (ixa->ixa_flags & IXAF_IS_IPV4) {
3923 3782                          if (IN6_IS_ADDR_V4MAPPED(&ipp->ipp_addr))
3924 3783                                  v6src = ipp->ipp_addr;
3925 3784                  } else {
3926 3785                          if (!IN6_IS_ADDR_V4MAPPED(&ipp->ipp_addr))
3927 3786                                  v6src = ipp->ipp_addr;
3928 3787                  }
3929 3788          }
3930 3789  
3931 3790          ip_attr_nexthop(&connp->conn_xmit_ipp, ixa, &v6dst, &v6nexthop);
3932 3791          mutex_exit(&connp->conn_lock);
3933 3792  
3934 3793          error = ip_attr_connect(connp, ixa, &v6src, &v6dst, &v6nexthop, dstport,
3935 3794              &v6src, NULL, IPDF_ALLOW_MCBC | IPDF_VERIFY_DST | IPDF_IPSEC);
3936 3795          switch (error) {
3937 3796          case 0:
3938 3797                  break;
3939 3798          case EADDRNOTAVAIL:
3940 3799                  /*
3941 3800                   * IXAF_VERIFY_SOURCE tells us to pick a better source.
3942 3801                   * Don't have the application see that errno
3943 3802                   */
3944 3803                  error = ENETUNREACH;
3945 3804                  goto failed;
3946 3805          case ENETDOWN:
3947 3806                  /*
3948 3807                   * Have !ipif_addr_ready address; drop packet silently
3949 3808                   * until we can get applications to not send until we
3950 3809                   * are ready.
3951 3810                   */
3952 3811                  error = 0;
3953 3812                  goto failed;
3954 3813          case EHOSTUNREACH:
3955 3814          case ENETUNREACH:
3956 3815                  if (ixa->ixa_ire != NULL) {
3957 3816                          /*
3958 3817                           * Let conn_ip_output/ire_send_noroute return
3959 3818                           * the error and send any local ICMP error.
3960 3819                           */
3961 3820                          error = 0;
3962 3821                          break;
3963 3822                  }
3964 3823                  /* FALLTHRU */
3965 3824          failed:
3966 3825          default:
3967 3826                  goto ud_error;
3968 3827          }
3969 3828  
3970 3829  
3971 3830          /*
3972 3831           * Cluster note: we let the cluster hook know that we are sending to a
3973 3832           * new address and/or port.
3974 3833           */
3975 3834          if (cl_inet_connect2 != NULL) {
3976 3835                  CL_INET_UDP_CONNECT(connp, B_TRUE, &v6dst, dstport, error);
3977 3836                  if (error != 0) {
3978 3837                          error = EHOSTUNREACH;
3979 3838                          goto ud_error;
3980 3839                  }
3981 3840          }
3982 3841  
3983 3842          mutex_enter(&connp->conn_lock);
3984 3843          /*
3985 3844           * While we dropped the lock some other thread might have connected
3986 3845           * this socket. If so we bail out with EISCONN to ensure that the
3987 3846           * connecting thread is the one that updates conn_ixa, conn_ht_*
3988 3847           * and conn_*last*.
3989 3848           */
3990 3849          if (udp->udp_state == TS_DATA_XFER) {
3991 3850                  mutex_exit(&connp->conn_lock);
3992 3851                  error = EISCONN;
3993 3852                  goto ud_error;
3994 3853          }
3995 3854  
3996 3855          /*
3997 3856           * We need to rebuild the headers if
3998 3857           *  - we are labeling packets (could be different for different
3999 3858           *    destinations)
4000 3859           *  - we have a source route (or routing header) since we need to
4001 3860           *    massage that to get the pseudo-header checksum
4002 3861           *  - the IP version is different than the last time
4003 3862           *  - a socket option with COA_HEADER_CHANGED has been set which
4004 3863           *    set conn_v6lastdst to zero.
4005 3864           *
4006 3865           * Otherwise the prepend function will just update the src, dst,
4007 3866           * dstport, and flow label.
4008 3867           */
4009 3868          if (is_system_labeled()) {
4010 3869                  /* TX MLP requires SCM_UCRED and don't have that here */
4011 3870                  if (connp->conn_mlp_type != mlptSingle) {
4012 3871                          mutex_exit(&connp->conn_lock);
4013 3872                          error = ECONNREFUSED;
4014 3873                          goto ud_error;
4015 3874                  }
4016 3875                  /*
4017 3876                   * Check whether Trusted Solaris policy allows communication
4018 3877                   * with this host, and pretend that the destination is
4019 3878                   * unreachable if not.
4020 3879                   * Compute any needed label and place it in ipp_label_v4/v6.
4021 3880                   *
4022 3881                   * Later conn_build_hdr_template/conn_prepend_hdr takes
4023 3882                   * ipp_label_v4/v6 to form the packet.
4024 3883                   *
4025 3884                   * Tsol note: Since we hold conn_lock we know no other
4026 3885                   * thread manipulates conn_xmit_ipp.
4027 3886                   */
4028 3887                  error = conn_update_label(connp, ixa, &v6dst,
4029 3888                      &connp->conn_xmit_ipp);
4030 3889                  if (error != 0) {
4031 3890                          mutex_exit(&connp->conn_lock);
4032 3891                          goto ud_error;
4033 3892                  }
4034 3893                  /* Rebuild the header template */
4035 3894                  error = udp_build_hdr_template(connp, &v6src, &v6dst, dstport,
4036 3895                      flowinfo);
4037 3896                  if (error != 0) {
4038 3897                          mutex_exit(&connp->conn_lock);
4039 3898                          goto ud_error;
4040 3899                  }
4041 3900          } else if ((connp->conn_xmit_ipp.ipp_fields &
4042 3901              (IPPF_IPV4_OPTIONS|IPPF_RTHDR)) ||
4043 3902              ipversion != connp->conn_lastipversion ||
4044 3903              IN6_IS_ADDR_UNSPECIFIED(&connp->conn_v6lastdst)) {
4045 3904                  /* Rebuild the header template */
4046 3905                  error = udp_build_hdr_template(connp, &v6src, &v6dst, dstport,
4047 3906                      flowinfo);
4048 3907                  if (error != 0) {
4049 3908                          mutex_exit(&connp->conn_lock);
4050 3909                          goto ud_error;
4051 3910                  }
4052 3911          } else {
4053 3912                  /* Simply update the destination address if no source route */
4054 3913                  if (ixa->ixa_flags & IXAF_IS_IPV4) {
4055 3914                          ipha_t  *ipha = (ipha_t *)connp->conn_ht_iphc;
4056 3915  
4057 3916                          IN6_V4MAPPED_TO_IPADDR(&v6dst, ipha->ipha_dst);
4058 3917                          if (ixa->ixa_flags & IXAF_PMTU_IPV4_DF) {
4059 3918                                  ipha->ipha_fragment_offset_and_flags |=
4060 3919                                      IPH_DF_HTONS;
4061 3920                          } else {
4062 3921                                  ipha->ipha_fragment_offset_and_flags &=
4063 3922                                      ~IPH_DF_HTONS;
4064 3923                          }
4065 3924                  } else {
4066 3925                          ip6_t *ip6h = (ip6_t *)connp->conn_ht_iphc;
4067 3926                          ip6h->ip6_dst = v6dst;
4068 3927                  }
4069 3928          }
4070 3929  
4071 3930          /*
4072 3931           * Remember the dst/dstport etc which corresponds to the built header
4073 3932           * template and conn_ixa.
4074 3933           */
4075 3934          oldixa = conn_replace_ixa(connp, ixa);
4076 3935          connp->conn_v6lastdst = v6dst;
4077 3936          connp->conn_lastipversion = ipversion;
4078 3937          connp->conn_lastdstport = dstport;
4079 3938          connp->conn_lastflowinfo = flowinfo;
4080 3939          connp->conn_lastscopeid = ixa->ixa_scopeid;
4081 3940          connp->conn_lastsrcid = srcid;
4082 3941          /* Also remember a source to use together with lastdst */
4083 3942          connp->conn_v6lastsrc = v6src;
4084 3943  
4085 3944          data_mp = udp_prepend_header_template(connp, ixa, data_mp, &v6src,
4086 3945              dstport, flowinfo, &error);
4087 3946  
4088 3947          /* Done with conn_t */
4089 3948          mutex_exit(&connp->conn_lock);
4090 3949          ixa_refrele(oldixa);
4091 3950  
4092 3951          if (data_mp == NULL) {
4093 3952                  ASSERT(error != 0);
4094 3953                  goto ud_error;
4095 3954          }
4096 3955  
4097 3956          /* We're done.  Pass the packet to ip. */
4098 3957          UDPS_BUMP_MIB(us, udpHCOutDatagrams);
4099 3958  
4100 3959          DTRACE_UDP5(send, mblk_t *, NULL, ip_xmit_attr_t *, ixa,
4101 3960              void_ip_t *, data_mp->b_rptr, udp_t *, udp, udpha_t *,
4102 3961              &data_mp->b_rptr[ixa->ixa_ip_hdr_length]);
4103 3962  
4104 3963          error = conn_ip_output(data_mp, ixa);
4105 3964          /* No udpOutErrors if an error since IP increases its error counter */
4106 3965          switch (error) {
4107 3966          case 0:
4108 3967                  break;
4109 3968          case EWOULDBLOCK:
4110 3969                  (void) ixa_check_drain_insert(connp, ixa);
4111 3970                  error = 0;
4112 3971                  break;
4113 3972          case EADDRNOTAVAIL:
4114 3973                  /*
4115 3974                   * IXAF_VERIFY_SOURCE tells us to pick a better source.
4116 3975                   * Don't have the application see that errno
4117 3976                   */
4118 3977                  error = ENETUNREACH;
4119 3978                  /* FALLTHRU */
4120 3979          default:
4121 3980                  mutex_enter(&connp->conn_lock);
4122 3981                  /*
4123 3982                   * Clear the source and v6lastdst so we call ip_attr_connect
4124 3983                   * for the next packet and try to pick a better source.
4125 3984                   */
4126 3985                  if (connp->conn_mcbc_bind)
4127 3986                          connp->conn_saddr_v6 = ipv6_all_zeros;
4128 3987                  else
4129 3988                          connp->conn_saddr_v6 = connp->conn_bound_addr_v6;
4130 3989                  connp->conn_v6lastdst = ipv6_all_zeros;
4131 3990                  mutex_exit(&connp->conn_lock);
4132 3991                  break;
4133 3992          }
4134 3993          ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED));
4135 3994          ixa->ixa_cred = connp->conn_cred;       /* Restore */
4136 3995          ixa->ixa_cpid = connp->conn_cpid;
4137 3996          ixa_refrele(ixa);
4138 3997          return (error);
4139 3998  
4140 3999  ud_error:
4141 4000          ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED));
4142 4001          ixa->ixa_cred = connp->conn_cred;       /* Restore */
4143 4002          ixa->ixa_cpid = connp->conn_cpid;
4144 4003          ixa_refrele(ixa);
4145 4004  
4146 4005          freemsg(data_mp);
4147 4006          UDPS_BUMP_MIB(us, udpOutErrors);
4148 4007          UDP_STAT(us, udp_out_err_output);
4149 4008          return (error);
4150 4009  }
4151 4010  
4152 4011  /* ARGSUSED */
4153 4012  static void
4154 4013  udp_wput_fallback(queue_t *wq, mblk_t *mp)
4155 4014  {
4156 4015  #ifdef DEBUG
4157 4016          cmn_err(CE_CONT, "udp_wput_fallback: Message in fallback \n");
4158 4017  #endif
4159 4018          freemsg(mp);
4160 4019  }
4161 4020  
4162 4021  
4163 4022  /*
4164 4023   * Handle special out-of-band ioctl requests (see PSARC/2008/265).
4165 4024   */
4166 4025  static void
4167 4026  udp_wput_cmdblk(queue_t *q, mblk_t *mp)
4168 4027  {
4169 4028          void    *data;
4170 4029          mblk_t  *datamp = mp->b_cont;
4171 4030          conn_t  *connp = Q_TO_CONN(q);
4172 4031          udp_t   *udp = connp->conn_udp;
4173 4032          cmdblk_t *cmdp = (cmdblk_t *)mp->b_rptr;
4174 4033  
4175 4034          if (datamp == NULL || MBLKL(datamp) < cmdp->cb_len) {
4176 4035                  cmdp->cb_error = EPROTO;
4177 4036                  qreply(q, mp);
4178 4037                  return;
4179 4038          }
4180 4039          data = datamp->b_rptr;
4181 4040  
4182 4041          mutex_enter(&connp->conn_lock);
4183 4042          switch (cmdp->cb_cmd) {
4184 4043          case TI_GETPEERNAME:
4185 4044                  if (udp->udp_state != TS_DATA_XFER)
4186 4045                          cmdp->cb_error = ENOTCONN;
4187 4046                  else
4188 4047                          cmdp->cb_error = conn_getpeername(connp, data,
4189 4048                              &cmdp->cb_len);
4190 4049                  break;
4191 4050          case TI_GETMYNAME:
4192 4051                  cmdp->cb_error = conn_getsockname(connp, data, &cmdp->cb_len);
4193 4052                  break;
4194 4053          default:
4195 4054                  cmdp->cb_error = EINVAL;
4196 4055                  break;
4197 4056          }
4198 4057          mutex_exit(&connp->conn_lock);
4199 4058  
4200 4059          qreply(q, mp);
4201 4060  }
4202 4061  
4203 4062  static void
4204 4063  udp_use_pure_tpi(udp_t *udp)
4205 4064  {
4206 4065          conn_t  *connp = udp->udp_connp;
4207 4066  
4208 4067          mutex_enter(&connp->conn_lock);
4209 4068          udp->udp_issocket = B_FALSE;
4210 4069          mutex_exit(&connp->conn_lock);
4211 4070          UDP_STAT(udp->udp_us, udp_sock_fallback);
4212 4071  }
4213 4072  
4214 4073  static void
4215 4074  udp_wput_other(queue_t *q, mblk_t *mp)
4216 4075  {
4217 4076          uchar_t *rptr = mp->b_rptr;
4218 4077          struct iocblk *iocp;
4219 4078          conn_t  *connp = Q_TO_CONN(q);
4220 4079          udp_t   *udp = connp->conn_udp;
4221 4080          cred_t  *cr;
4222 4081  
4223 4082          switch (mp->b_datap->db_type) {
4224 4083          case M_CMD:
4225 4084                  udp_wput_cmdblk(q, mp);
4226 4085                  return;
4227 4086  
4228 4087          case M_PROTO:
4229 4088          case M_PCPROTO:
4230 4089                  if (mp->b_wptr - rptr < sizeof (t_scalar_t)) {
4231 4090                          /*
4232 4091                           * If the message does not contain a PRIM_type,
4233 4092                           * throw it away.
4234 4093                           */
4235 4094                          freemsg(mp);
4236 4095                          return;
4237 4096                  }
4238 4097                  switch (((t_primp_t)rptr)->type) {
4239 4098                  case T_ADDR_REQ:
4240 4099                          udp_addr_req(q, mp);
4241 4100                          return;
4242 4101                  case O_T_BIND_REQ:
4243 4102                  case T_BIND_REQ:
4244 4103                          udp_tpi_bind(q, mp);
4245 4104                          return;
4246 4105                  case T_CONN_REQ:
4247 4106                          udp_tpi_connect(q, mp);
4248 4107                          return;
4249 4108                  case T_CAPABILITY_REQ:
4250 4109                          udp_capability_req(q, mp);
4251 4110                          return;
4252 4111                  case T_INFO_REQ:
4253 4112                          udp_info_req(q, mp);
4254 4113                          return;
4255 4114                  case T_UNITDATA_REQ:
4256 4115                          /*
4257 4116                           * If a T_UNITDATA_REQ gets here, the address must
4258 4117                           * be bad.  Valid T_UNITDATA_REQs are handled
4259 4118                           * in udp_wput.
4260 4119                           */
4261 4120                          udp_ud_err(q, mp, EADDRNOTAVAIL);
4262 4121                          return;
4263 4122                  case T_UNBIND_REQ:
4264 4123                          udp_tpi_unbind(q, mp);
4265 4124                          return;
4266 4125                  case T_SVR4_OPTMGMT_REQ:
4267 4126                          /*
4268 4127                           * All Solaris components should pass a db_credp
4269 4128                           * for this TPI message, hence we ASSERT.
4270 4129                           * But in case there is some other M_PROTO that looks
4271 4130                           * like a TPI message sent by some other kernel
4272 4131                           * component, we check and return an error.
4273 4132                           */
4274 4133                          cr = msg_getcred(mp, NULL);
4275 4134                          ASSERT(cr != NULL);
4276 4135                          if (cr == NULL) {
4277 4136                                  udp_err_ack(q, mp, TSYSERR, EINVAL);
4278 4137                                  return;
4279 4138                          }
4280 4139                          if (!snmpcom_req(q, mp, udp_snmp_set, ip_snmp_get,
4281 4140                              cr)) {
4282 4141                                  svr4_optcom_req(q, mp, cr, &udp_opt_obj);
4283 4142                          }
4284 4143                          return;
4285 4144  
4286 4145                  case T_OPTMGMT_REQ:
4287 4146                          /*
4288 4147                           * All Solaris components should pass a db_credp
4289 4148                           * for this TPI message, hence we ASSERT.
4290 4149                           * But in case there is some other M_PROTO that looks
4291 4150                           * like a TPI message sent by some other kernel
4292 4151                           * component, we check and return an error.
4293 4152                           */
4294 4153                          cr = msg_getcred(mp, NULL);
4295 4154                          ASSERT(cr != NULL);
4296 4155                          if (cr == NULL) {
4297 4156                                  udp_err_ack(q, mp, TSYSERR, EINVAL);
4298 4157                                  return;
4299 4158                          }
4300 4159                          tpi_optcom_req(q, mp, cr, &udp_opt_obj);
4301 4160                          return;
4302 4161  
4303 4162                  case T_DISCON_REQ:
4304 4163                          udp_tpi_disconnect(q, mp);
4305 4164                          return;
4306 4165  
4307 4166                  /* The following TPI message is not supported by udp. */
4308 4167                  case O_T_CONN_RES:
4309 4168                  case T_CONN_RES:
4310 4169                          udp_err_ack(q, mp, TNOTSUPPORT, 0);
4311 4170                          return;
4312 4171  
4313 4172                  /* The following 3 TPI requests are illegal for udp. */
4314 4173                  case T_DATA_REQ:
4315 4174                  case T_EXDATA_REQ:
4316 4175                  case T_ORDREL_REQ:
4317 4176                          udp_err_ack(q, mp, TNOTSUPPORT, 0);
4318 4177                          return;
4319 4178                  default:
4320 4179                          break;
4321 4180                  }
4322 4181                  break;
4323 4182          case M_FLUSH:
4324 4183                  if (*rptr & FLUSHW)
4325 4184                          flushq(q, FLUSHDATA);
4326 4185                  break;
4327 4186          case M_IOCTL:
4328 4187                  iocp = (struct iocblk *)mp->b_rptr;
4329 4188                  switch (iocp->ioc_cmd) {
4330 4189                  case TI_GETPEERNAME:
4331 4190                          if (udp->udp_state != TS_DATA_XFER) {
4332 4191                                  /*
4333 4192                                   * If a default destination address has not
4334 4193                                   * been associated with the stream, then we
4335 4194                                   * don't know the peer's name.
4336 4195                                   */
4337 4196                                  iocp->ioc_error = ENOTCONN;
4338 4197                                  iocp->ioc_count = 0;
4339 4198                                  mp->b_datap->db_type = M_IOCACK;
4340 4199                                  qreply(q, mp);
4341 4200                                  return;
4342 4201                          }
4343 4202                          /* FALLTHRU */
4344 4203                  case TI_GETMYNAME:
4345 4204                          /*
4346 4205                           * For TI_GETPEERNAME and TI_GETMYNAME, we first
4347 4206                           * need to copyin the user's strbuf structure.
4348 4207                           * Processing will continue in the M_IOCDATA case
4349 4208                           * below.
4350 4209                           */
4351 4210                          mi_copyin(q, mp, NULL,
4352 4211                              SIZEOF_STRUCT(strbuf, iocp->ioc_flag));
4353 4212                          return;
4354 4213                  case _SIOCSOCKFALLBACK:
4355 4214                          /*
4356 4215                           * Either sockmod is about to be popped and the
4357 4216                           * socket would now be treated as a plain stream,
4358 4217                           * or a module is about to be pushed so we have
4359 4218                           * to follow pure TPI semantics.
4360 4219                           */
4361 4220                          if (!udp->udp_issocket) {
4362 4221                                  DB_TYPE(mp) = M_IOCNAK;
4363 4222                                  iocp->ioc_error = EINVAL;
4364 4223                          } else {
4365 4224                                  udp_use_pure_tpi(udp);
4366 4225  
4367 4226                                  DB_TYPE(mp) = M_IOCACK;
4368 4227                                  iocp->ioc_error = 0;
4369 4228                          }
4370 4229                          iocp->ioc_count = 0;
4371 4230                          iocp->ioc_rval = 0;
4372 4231                          qreply(q, mp);
4373 4232                          return;
4374 4233                  default:
4375 4234                          break;
4376 4235                  }
4377 4236                  break;
4378 4237          case M_IOCDATA:
4379 4238                  udp_wput_iocdata(q, mp);
4380 4239                  return;
4381 4240          default:
4382 4241                  /* Unrecognized messages are passed through without change. */
4383 4242                  break;
4384 4243          }
4385 4244          ip_wput_nondata(q, mp);
4386 4245  }
4387 4246  
4388 4247  /*
4389 4248   * udp_wput_iocdata is called by udp_wput_other to handle all M_IOCDATA
4390 4249   * messages.
4391 4250   */
4392 4251  static void
4393 4252  udp_wput_iocdata(queue_t *q, mblk_t *mp)
4394 4253  {
4395 4254          mblk_t          *mp1;
4396 4255          struct  iocblk *iocp = (struct iocblk *)mp->b_rptr;
4397 4256          STRUCT_HANDLE(strbuf, sb);
4398 4257          uint_t          addrlen;
4399 4258          conn_t          *connp = Q_TO_CONN(q);
4400 4259          udp_t           *udp = connp->conn_udp;
4401 4260  
4402 4261          /* Make sure it is one of ours. */
4403 4262          switch (iocp->ioc_cmd) {
4404 4263          case TI_GETMYNAME:
4405 4264          case TI_GETPEERNAME:
4406 4265                  break;
4407 4266          default:
4408 4267                  ip_wput_nondata(q, mp);
4409 4268                  return;
4410 4269          }
4411 4270  
4412 4271          switch (mi_copy_state(q, mp, &mp1)) {
4413 4272          case -1:
4414 4273                  return;
4415 4274          case MI_COPY_CASE(MI_COPY_IN, 1):
4416 4275                  break;
4417 4276          case MI_COPY_CASE(MI_COPY_OUT, 1):
4418 4277                  /*
4419 4278                   * The address has been copied out, so now
4420 4279                   * copyout the strbuf.
4421 4280                   */
4422 4281                  mi_copyout(q, mp);
4423 4282                  return;
4424 4283          case MI_COPY_CASE(MI_COPY_OUT, 2):
4425 4284                  /*
4426 4285                   * The address and strbuf have been copied out.
4427 4286                   * We're done, so just acknowledge the original
4428 4287                   * M_IOCTL.
4429 4288                   */
4430 4289                  mi_copy_done(q, mp, 0);
4431 4290                  return;
4432 4291          default:
4433 4292                  /*
4434 4293                   * Something strange has happened, so acknowledge
4435 4294                   * the original M_IOCTL with an EPROTO error.
4436 4295                   */
4437 4296                  mi_copy_done(q, mp, EPROTO);
4438 4297                  return;
4439 4298          }
4440 4299  
4441 4300          /*
4442 4301           * Now we have the strbuf structure for TI_GETMYNAME
4443 4302           * and TI_GETPEERNAME.  Next we copyout the requested
4444 4303           * address and then we'll copyout the strbuf.
4445 4304           */
4446 4305          STRUCT_SET_HANDLE(sb, iocp->ioc_flag, (void *)mp1->b_rptr);
4447 4306  
4448 4307          if (connp->conn_family == AF_INET)
4449 4308                  addrlen = sizeof (sin_t);
4450 4309          else
4451 4310                  addrlen = sizeof (sin6_t);
4452 4311  
4453 4312          if (STRUCT_FGET(sb, maxlen) < addrlen) {
4454 4313                  mi_copy_done(q, mp, EINVAL);
4455 4314                  return;
4456 4315          }
4457 4316  
4458 4317          switch (iocp->ioc_cmd) {
4459 4318          case TI_GETMYNAME:
4460 4319                  break;
4461 4320          case TI_GETPEERNAME:
4462 4321                  if (udp->udp_state != TS_DATA_XFER) {
4463 4322                          mi_copy_done(q, mp, ENOTCONN);
4464 4323                          return;
4465 4324                  }
4466 4325                  break;
4467 4326          }
4468 4327          mp1 = mi_copyout_alloc(q, mp, STRUCT_FGETP(sb, buf), addrlen, B_TRUE);
4469 4328          if (!mp1)
4470 4329                  return;
4471 4330  
4472 4331          STRUCT_FSET(sb, len, addrlen);
4473 4332          switch (((struct iocblk *)mp->b_rptr)->ioc_cmd) {
4474 4333          case TI_GETMYNAME:
4475 4334                  (void) conn_getsockname(connp, (struct sockaddr *)mp1->b_wptr,
4476 4335                      &addrlen);
4477 4336                  break;
4478 4337          case TI_GETPEERNAME:
4479 4338                  (void) conn_getpeername(connp, (struct sockaddr *)mp1->b_wptr,
4480 4339                      &addrlen);
4481 4340                  break;
4482 4341          }
4483 4342          mp1->b_wptr += addrlen;
4484 4343          /* Copy out the address */
4485 4344          mi_copyout(q, mp);
4486 4345  }
4487 4346  
4488 4347  void
4489 4348  udp_ddi_g_init(void)
4490 4349  {
4491 4350          udp_max_optsize = optcom_max_optsize(udp_opt_obj.odb_opt_des_arr,
4492 4351              udp_opt_obj.odb_opt_arr_cnt);
4493 4352  
4494 4353          /*
4495 4354           * We want to be informed each time a stack is created or
4496 4355           * destroyed in the kernel, so we can maintain the
4497 4356           * set of udp_stack_t's.
4498 4357           */
4499 4358          netstack_register(NS_UDP, udp_stack_init, NULL, udp_stack_fini);
4500 4359  }
4501 4360  
4502 4361  void
4503 4362  udp_ddi_g_destroy(void)
4504 4363  {
4505 4364          netstack_unregister(NS_UDP);
4506 4365  }
4507 4366  
4508 4367  #define INET_NAME       "ip"
4509 4368  
4510 4369  /*
4511 4370   * Initialize the UDP stack instance.
4512 4371   */
4513 4372  static void *
4514 4373  udp_stack_init(netstackid_t stackid, netstack_t *ns)
4515 4374  {
4516 4375          udp_stack_t     *us;
4517 4376          int             i;
4518 4377          int             error = 0;
4519 4378          major_t         major;
4520 4379          size_t          arrsz;
4521 4380  
4522 4381          us = (udp_stack_t *)kmem_zalloc(sizeof (*us), KM_SLEEP);
4523 4382          us->us_netstack = ns;
4524 4383  
4525 4384          mutex_init(&us->us_epriv_port_lock, NULL, MUTEX_DEFAULT, NULL);
4526 4385          us->us_num_epriv_ports = UDP_NUM_EPRIV_PORTS;
4527 4386          us->us_epriv_ports[0] = ULP_DEF_EPRIV_PORT1;
4528 4387          us->us_epriv_ports[1] = ULP_DEF_EPRIV_PORT2;
4529 4388  
4530 4389          /*
4531 4390           * The smallest anonymous port in the priviledged port range which UDP
4532 4391           * looks for free port.  Use in the option UDP_ANONPRIVBIND.
4533 4392           */
4534 4393          us->us_min_anonpriv_port = 512;
4535 4394  
4536 4395          us->us_bind_fanout_size = udp_bind_fanout_size;
4537 4396  
4538 4397          /* Roundup variable that might have been modified in /etc/system */
4539 4398          if (!ISP2(us->us_bind_fanout_size)) {
4540 4399                  /* Not a power of two. Round up to nearest power of two */
4541 4400                  for (i = 0; i < 31; i++) {
4542 4401                          if (us->us_bind_fanout_size < (1 << i))
4543 4402                                  break;
4544 4403                  }
4545 4404                  us->us_bind_fanout_size = 1 << i;
4546 4405          }
4547 4406          us->us_bind_fanout = kmem_zalloc(us->us_bind_fanout_size *
4548 4407              sizeof (udp_fanout_t), KM_SLEEP);
4549 4408          for (i = 0; i < us->us_bind_fanout_size; i++) {
4550 4409                  mutex_init(&us->us_bind_fanout[i].uf_lock, NULL, MUTEX_DEFAULT,
4551 4410                      NULL);
4552 4411          }
4553 4412  
4554 4413          arrsz = udp_propinfo_count * sizeof (mod_prop_info_t);
4555 4414          us->us_propinfo_tbl = (mod_prop_info_t *)kmem_alloc(arrsz,
4556 4415              KM_SLEEP);
4557 4416          bcopy(udp_propinfo_tbl, us->us_propinfo_tbl, arrsz);
4558 4417  
4559 4418          /* Allocate the per netstack stats */
4560 4419          mutex_enter(&cpu_lock);
4561 4420          us->us_sc_cnt = MAX(ncpus, boot_ncpus);
4562 4421          mutex_exit(&cpu_lock);
4563 4422          us->us_sc = kmem_zalloc(max_ncpus  * sizeof (udp_stats_cpu_t *),
4564 4423              KM_SLEEP);
4565 4424          for (i = 0; i < us->us_sc_cnt; i++) {
4566 4425                  us->us_sc[i] = kmem_zalloc(sizeof (udp_stats_cpu_t),
4567 4426                      KM_SLEEP);
4568 4427          }
4569 4428  
4570 4429          us->us_kstat = udp_kstat2_init(stackid);
4571 4430          us->us_mibkp = udp_kstat_init(stackid);
4572 4431  
4573 4432          major = mod_name_to_major(INET_NAME);
4574 4433          error = ldi_ident_from_major(major, &us->us_ldi_ident);
4575 4434          ASSERT(error == 0);
4576 4435          return (us);
4577 4436  }
4578 4437  
4579 4438  /*
4580 4439   * Free the UDP stack instance.
4581 4440   */
4582 4441  static void
4583 4442  udp_stack_fini(netstackid_t stackid, void *arg)
4584 4443  {
4585 4444          udp_stack_t *us = (udp_stack_t *)arg;
4586 4445          int i;
4587 4446  
4588 4447          for (i = 0; i < us->us_bind_fanout_size; i++) {
4589 4448                  mutex_destroy(&us->us_bind_fanout[i].uf_lock);
4590 4449          }
4591 4450  
4592 4451          kmem_free(us->us_bind_fanout, us->us_bind_fanout_size *
4593 4452              sizeof (udp_fanout_t));
4594 4453  
4595 4454          us->us_bind_fanout = NULL;
4596 4455  
4597 4456          for (i = 0; i < us->us_sc_cnt; i++)
4598 4457                  kmem_free(us->us_sc[i], sizeof (udp_stats_cpu_t));
4599 4458          kmem_free(us->us_sc, max_ncpus * sizeof (udp_stats_cpu_t *));
4600 4459  
4601 4460          kmem_free(us->us_propinfo_tbl,
4602 4461              udp_propinfo_count * sizeof (mod_prop_info_t));
4603 4462          us->us_propinfo_tbl = NULL;
4604 4463  
4605 4464          udp_kstat_fini(stackid, us->us_mibkp);
4606 4465          us->us_mibkp = NULL;
4607 4466  
4608 4467          udp_kstat2_fini(stackid, us->us_kstat);
4609 4468          us->us_kstat = NULL;
4610 4469  
4611 4470          mutex_destroy(&us->us_epriv_port_lock);
4612 4471          ldi_ident_release(us->us_ldi_ident);
4613 4472          kmem_free(us, sizeof (*us));
4614 4473  }
4615 4474  
4616 4475  static size_t
4617 4476  udp_set_rcv_hiwat(udp_t *udp, size_t size)
4618 4477  {
4619 4478          udp_stack_t *us = udp->udp_us;
4620 4479  
4621 4480          /* We add a bit of extra buffering */
4622 4481          size += size >> 1;
4623 4482          if (size > us->us_max_buf)
4624 4483                  size = us->us_max_buf;
4625 4484  
4626 4485          udp->udp_rcv_hiwat = size;
4627 4486          return (size);
4628 4487  }
4629 4488  
4630 4489  /*
4631 4490   * For the lower queue so that UDP can be a dummy mux.
4632 4491   * Nobody should be sending
4633 4492   * packets up this stream
4634 4493   */
4635 4494  static void
4636 4495  udp_lrput(queue_t *q, mblk_t *mp)
4637 4496  {
4638 4497          switch (mp->b_datap->db_type) {
4639 4498          case M_FLUSH:
4640 4499                  /* Turn around */
4641 4500                  if (*mp->b_rptr & FLUSHW) {
4642 4501                          *mp->b_rptr &= ~FLUSHR;
4643 4502                          qreply(q, mp);
4644 4503                          return;
4645 4504                  }
4646 4505                  break;
4647 4506          }
4648 4507          freemsg(mp);
4649 4508  }
4650 4509  
4651 4510  /*
4652 4511   * For the lower queue so that UDP can be a dummy mux.
4653 4512   * Nobody should be sending packets down this stream.
4654 4513   */
4655 4514  /* ARGSUSED */
4656 4515  void
4657 4516  udp_lwput(queue_t *q, mblk_t *mp)
4658 4517  {
4659 4518          freemsg(mp);
4660 4519  }
4661 4520  
4662 4521  /*
4663 4522   * When a CPU is added, we need to allocate the per CPU stats struct.
4664 4523   */
4665 4524  void
4666 4525  udp_stack_cpu_add(udp_stack_t *us, processorid_t cpu_seqid)
4667 4526  {
4668 4527          int i;
4669 4528  
4670 4529          if (cpu_seqid < us->us_sc_cnt)
4671 4530                  return;
4672 4531          for (i = us->us_sc_cnt; i <= cpu_seqid; i++) {
4673 4532                  ASSERT(us->us_sc[i] == NULL);
4674 4533                  us->us_sc[i] = kmem_zalloc(sizeof (udp_stats_cpu_t),
4675 4534                      KM_SLEEP);
4676 4535          }
4677 4536          membar_producer();
4678 4537          us->us_sc_cnt = cpu_seqid + 1;
4679 4538  }
4680 4539  
4681 4540  /*
4682 4541   * Below routines for UDP socket module.
4683 4542   */
4684 4543  
4685 4544  static conn_t *
4686 4545  udp_do_open(cred_t *credp, boolean_t isv6, int flags, int *errorp)
4687 4546  {
4688 4547          udp_t           *udp;
4689 4548          conn_t          *connp;
4690 4549          zoneid_t        zoneid;
4691 4550          netstack_t      *ns;
4692 4551          udp_stack_t     *us;
4693 4552          int             len;
4694 4553  
4695 4554          ASSERT(errorp != NULL);
4696 4555  
4697 4556          if ((*errorp = secpolicy_basic_net_access(credp)) != 0)
4698 4557                  return (NULL);
4699 4558  
4700 4559          ns = netstack_find_by_cred(credp);
4701 4560          ASSERT(ns != NULL);
4702 4561          us = ns->netstack_udp;
4703 4562          ASSERT(us != NULL);
4704 4563  
4705 4564          /*
4706 4565           * For exclusive stacks we set the zoneid to zero
4707 4566           * to make UDP operate as if in the global zone.
4708 4567           */
4709 4568          if (ns->netstack_stackid != GLOBAL_NETSTACKID)
4710 4569                  zoneid = GLOBAL_ZONEID;
4711 4570          else
4712 4571                  zoneid = crgetzoneid(credp);
4713 4572  
4714 4573          ASSERT(flags == KM_SLEEP || flags == KM_NOSLEEP);
4715 4574  
4716 4575          connp = ipcl_conn_create(IPCL_UDPCONN, flags, ns);
4717 4576          if (connp == NULL) {
4718 4577                  netstack_rele(ns);
4719 4578                  *errorp = ENOMEM;
4720 4579                  return (NULL);
4721 4580          }
4722 4581          udp = connp->conn_udp;
4723 4582  
4724 4583          /*
4725 4584           * ipcl_conn_create did a netstack_hold. Undo the hold that was
4726 4585           * done by netstack_find_by_cred()
4727 4586           */
4728 4587          netstack_rele(ns);
4729 4588  
4730 4589          /*
4731 4590           * Since this conn_t/udp_t is not yet visible to anybody else we don't
4732 4591           * need to lock anything.
4733 4592           */
4734 4593          ASSERT(connp->conn_proto == IPPROTO_UDP);
4735 4594          ASSERT(connp->conn_udp == udp);
4736 4595          ASSERT(udp->udp_connp == connp);
4737 4596  
4738 4597          /* Set the initial state of the stream and the privilege status. */
4739 4598          udp->udp_state = TS_UNBND;
4740 4599          connp->conn_ixa->ixa_flags |= IXAF_VERIFY_SOURCE;
4741 4600          if (isv6) {
4742 4601                  connp->conn_family = AF_INET6;
4743 4602                  connp->conn_ipversion = IPV6_VERSION;
4744 4603                  connp->conn_ixa->ixa_flags &= ~IXAF_IS_IPV4;
4745 4604                  connp->conn_default_ttl = us->us_ipv6_hoplimit;
4746 4605                  len = sizeof (ip6_t) + UDPH_SIZE;
4747 4606          } else {
4748 4607                  connp->conn_family = AF_INET;
4749 4608                  connp->conn_ipversion = IPV4_VERSION;
4750 4609                  connp->conn_ixa->ixa_flags |= IXAF_IS_IPV4;
4751 4610                  connp->conn_default_ttl = us->us_ipv4_ttl;
4752 4611                  len = sizeof (ipha_t) + UDPH_SIZE;
4753 4612          }
4754 4613  
4755 4614          ASSERT(connp->conn_ixa->ixa_protocol == connp->conn_proto);
4756 4615          connp->conn_xmit_ipp.ipp_unicast_hops = connp->conn_default_ttl;
4757 4616  
4758 4617          connp->conn_ixa->ixa_multicast_ttl = IP_DEFAULT_MULTICAST_TTL;
4759 4618          connp->conn_ixa->ixa_flags |= IXAF_MULTICAST_LOOP | IXAF_SET_ULP_CKSUM;
4760 4619          /* conn_allzones can not be set this early, hence no IPCL_ZONEID */
4761 4620          connp->conn_ixa->ixa_zoneid = zoneid;
4762 4621  
4763 4622          connp->conn_zoneid = zoneid;
4764 4623  
4765 4624          /*
4766 4625           * If the caller has the process-wide flag set, then default to MAC
4767 4626           * exempt mode.  This allows read-down to unlabeled hosts.
4768 4627           */
4769 4628          if (getpflags(NET_MAC_AWARE, credp) != 0)
4770 4629                  connp->conn_mac_mode = CONN_MAC_AWARE;
4771 4630  
4772 4631          connp->conn_zone_is_global = (crgetzoneid(credp) == GLOBAL_ZONEID);
4773 4632  
4774 4633          udp->udp_us = us;
4775 4634  
4776 4635          connp->conn_rcvbuf = us->us_recv_hiwat;
4777 4636          connp->conn_sndbuf = us->us_xmit_hiwat;
4778 4637          connp->conn_sndlowat = us->us_xmit_lowat;
4779 4638          connp->conn_rcvlowat = udp_mod_info.mi_lowat;
4780 4639  
4781 4640          connp->conn_wroff = len + us->us_wroff_extra;
4782 4641          connp->conn_so_type = SOCK_DGRAM;
4783 4642  
4784 4643          connp->conn_recv = udp_input;
4785 4644          connp->conn_recvicmp = udp_icmp_input;
4786 4645          crhold(credp);
4787 4646          connp->conn_cred = credp;
4788 4647          connp->conn_cpid = curproc->p_pid;
4789 4648          connp->conn_open_time = ddi_get_lbolt64();
4790 4649          /* Cache things in ixa without an extra refhold */
4791 4650          ASSERT(!(connp->conn_ixa->ixa_free_flags & IXA_FREE_CRED));
4792 4651          connp->conn_ixa->ixa_cred = connp->conn_cred;
4793 4652          connp->conn_ixa->ixa_cpid = connp->conn_cpid;
4794 4653          if (is_system_labeled())
4795 4654                  connp->conn_ixa->ixa_tsl = crgetlabel(connp->conn_cred);
4796 4655  
4797 4656          *((sin6_t *)&udp->udp_delayed_addr) = sin6_null;
4798 4657  
4799 4658          if (us->us_pmtu_discovery)
4800 4659                  connp->conn_ixa->ixa_flags |= IXAF_PMTU_DISCOVERY;
4801 4660  
4802 4661          return (connp);
4803 4662  }
4804 4663  
4805 4664  sock_lower_handle_t
4806 4665  udp_create(int family, int type, int proto, sock_downcalls_t **sock_downcalls,
4807 4666      uint_t *smodep, int *errorp, int flags, cred_t *credp)
4808 4667  {
4809 4668          udp_t           *udp = NULL;
4810 4669          udp_stack_t     *us;
4811 4670          conn_t          *connp;
4812 4671          boolean_t       isv6;
4813 4672  
4814 4673          if (type != SOCK_DGRAM || (family != AF_INET && family != AF_INET6) ||
4815 4674              (proto != 0 && proto != IPPROTO_UDP)) {
4816 4675                  *errorp = EPROTONOSUPPORT;
4817 4676                  return (NULL);
4818 4677          }
4819 4678  
4820 4679          if (family == AF_INET6)
4821 4680                  isv6 = B_TRUE;
4822 4681          else
4823 4682                  isv6 = B_FALSE;
4824 4683  
4825 4684          connp = udp_do_open(credp, isv6, flags, errorp);
4826 4685          if (connp == NULL)
4827 4686                  return (NULL);
4828 4687  
4829 4688          udp = connp->conn_udp;
4830 4689          ASSERT(udp != NULL);
4831 4690          us = udp->udp_us;
4832 4691          ASSERT(us != NULL);
4833 4692  
4834 4693          udp->udp_issocket = B_TRUE;
4835 4694          connp->conn_flags |= IPCL_NONSTR;
4836 4695  
4837 4696          /*
4838 4697           * Set flow control
4839 4698           * Since this conn_t/udp_t is not yet visible to anybody else we don't
4840 4699           * need to lock anything.
4841 4700           */
4842 4701          (void) udp_set_rcv_hiwat(udp, connp->conn_rcvbuf);
4843 4702          udp->udp_rcv_disply_hiwat = connp->conn_rcvbuf;
4844 4703  
4845 4704          connp->conn_flow_cntrld = B_FALSE;
4846 4705  
4847 4706          mutex_enter(&connp->conn_lock);
4848 4707          connp->conn_state_flags &= ~CONN_INCIPIENT;
4849 4708          mutex_exit(&connp->conn_lock);
4850 4709  
4851 4710          *errorp = 0;
4852 4711          *smodep = SM_ATOMIC;
4853 4712          *sock_downcalls = &sock_udp_downcalls;
4854 4713          return ((sock_lower_handle_t)connp);
4855 4714  }
4856 4715  
4857 4716  /* ARGSUSED3 */
4858 4717  void
4859 4718  udp_activate(sock_lower_handle_t proto_handle, sock_upper_handle_t sock_handle,
4860 4719      sock_upcalls_t *sock_upcalls, int flags, cred_t *cr)
4861 4720  {
4862 4721          conn_t          *connp = (conn_t *)proto_handle;
4863 4722          struct sock_proto_props sopp;
4864 4723  
4865 4724          /* All Solaris components should pass a cred for this operation. */
4866 4725          ASSERT(cr != NULL);
4867 4726  
4868 4727          connp->conn_upcalls = sock_upcalls;
4869 4728          connp->conn_upper_handle = sock_handle;
4870 4729  
4871 4730          sopp.sopp_flags = SOCKOPT_WROFF | SOCKOPT_RCVHIWAT | SOCKOPT_RCVLOWAT |
4872 4731              SOCKOPT_MAXBLK | SOCKOPT_MAXPSZ | SOCKOPT_MINPSZ;
4873 4732          sopp.sopp_wroff = connp->conn_wroff;
4874 4733          sopp.sopp_maxblk = INFPSZ;
4875 4734          sopp.sopp_rxhiwat = connp->conn_rcvbuf;
4876 4735          sopp.sopp_rxlowat = connp->conn_rcvlowat;
4877 4736          sopp.sopp_maxaddrlen = sizeof (sin6_t);
4878 4737          sopp.sopp_maxpsz =
4879 4738              (connp->conn_family == AF_INET) ? UDP_MAXPACKET_IPV4 :
4880 4739              UDP_MAXPACKET_IPV6;
4881 4740          sopp.sopp_minpsz = (udp_mod_info.mi_minpsz == 1) ? 0 :
4882 4741              udp_mod_info.mi_minpsz;
4883 4742  
4884 4743          (*connp->conn_upcalls->su_set_proto_props)(connp->conn_upper_handle,
4885 4744              &sopp);
4886 4745  }
4887 4746  
4888 4747  static void
4889 4748  udp_do_close(conn_t *connp)
4890 4749  {
4891 4750          udp_t   *udp;
4892 4751  
4893 4752          ASSERT(connp != NULL && IPCL_IS_UDP(connp));
4894 4753          udp = connp->conn_udp;
4895 4754  
4896 4755          if (cl_inet_unbind != NULL && udp->udp_state == TS_IDLE) {
4897 4756                  /*
4898 4757                   * Running in cluster mode - register unbind information
4899 4758                   */
4900 4759                  if (connp->conn_ipversion == IPV4_VERSION) {
4901 4760                          (*cl_inet_unbind)(
4902 4761                              connp->conn_netstack->netstack_stackid,
4903 4762                              IPPROTO_UDP, AF_INET,
4904 4763                              (uint8_t *)(&V4_PART_OF_V6(connp->conn_laddr_v6)),
4905 4764                              (in_port_t)connp->conn_lport, NULL);
4906 4765                  } else {
4907 4766                          (*cl_inet_unbind)(
4908 4767                              connp->conn_netstack->netstack_stackid,
4909 4768                              IPPROTO_UDP, AF_INET6,
4910 4769                              (uint8_t *)&(connp->conn_laddr_v6),
4911 4770                              (in_port_t)connp->conn_lport, NULL);
4912 4771                  }
4913 4772          }
4914 4773  
4915 4774          udp_bind_hash_remove(udp, B_FALSE);
4916 4775  
4917 4776          ip_quiesce_conn(connp);
4918 4777  
4919 4778          if (!IPCL_IS_NONSTR(connp)) {
4920 4779                  ASSERT(connp->conn_wq != NULL);
4921 4780                  ASSERT(connp->conn_rq != NULL);
4922 4781                  qprocsoff(connp->conn_rq);
4923 4782          }
4924 4783  
4925 4784          udp_close_free(connp);
4926 4785  
4927 4786          /*
4928 4787           * Now we are truly single threaded on this stream, and can
4929 4788           * delete the things hanging off the connp, and finally the connp.
4930 4789           * We removed this connp from the fanout list, it cannot be
4931 4790           * accessed thru the fanouts, and we already waited for the
4932 4791           * conn_ref to drop to 0. We are already in close, so
4933 4792           * there cannot be any other thread from the top. qprocsoff
4934 4793           * has completed, and service has completed or won't run in
4935 4794           * future.
4936 4795           */
4937 4796          ASSERT(connp->conn_ref == 1);
4938 4797  
4939 4798          if (!IPCL_IS_NONSTR(connp)) {
4940 4799                  inet_minor_free(connp->conn_minor_arena, connp->conn_dev);
4941 4800          } else {
4942 4801                  ip_free_helper_stream(connp);
4943 4802          }
4944 4803  
4945 4804          connp->conn_ref--;
4946 4805          ipcl_conn_destroy(connp);
4947 4806  }
4948 4807  
4949 4808  /* ARGSUSED1 */
4950 4809  int
4951 4810  udp_close(sock_lower_handle_t proto_handle, int flags, cred_t *cr)
4952 4811  {
4953 4812          conn_t  *connp = (conn_t *)proto_handle;
4954 4813  
4955 4814          /* All Solaris components should pass a cred for this operation. */
4956 4815          ASSERT(cr != NULL);
4957 4816  
4958 4817          udp_do_close(connp);
4959 4818          return (0);
4960 4819  }
4961 4820  
4962 4821  static int
4963 4822  udp_do_bind(conn_t *connp, struct sockaddr *sa, socklen_t len, cred_t *cr,
4964 4823      boolean_t bind_to_req_port_only)
4965 4824  {
4966 4825          sin_t           *sin;
4967 4826          sin6_t          *sin6;
4968 4827          udp_t           *udp = connp->conn_udp;
4969 4828          int             error = 0;
4970 4829          ip_laddr_t      laddr_type = IPVL_UNICAST_UP;   /* INADDR_ANY */
4971 4830          in_port_t       port;           /* Host byte order */
4972 4831          in_port_t       requested_port; /* Host byte order */
4973 4832          int             count;
4974 4833          ipaddr_t        v4src;          /* Set if AF_INET */
4975 4834          in6_addr_t      v6src;
4976 4835          int             loopmax;
4977 4836          udp_fanout_t    *udpf;
4978 4837          in_port_t       lport;          /* Network byte order */
4979 4838          uint_t          scopeid = 0;
4980 4839          zoneid_t        zoneid = IPCL_ZONEID(connp);
4981 4840          ip_stack_t      *ipst = connp->conn_netstack->netstack_ip;
4982 4841          boolean_t       is_inaddr_any;
4983 4842          mlp_type_t      addrtype, mlptype;
4984 4843          udp_stack_t     *us = udp->udp_us;
4985 4844  
4986 4845          switch (len) {
4987 4846          case sizeof (sin_t):    /* Complete IPv4 address */
4988 4847                  sin = (sin_t *)sa;
4989 4848  
4990 4849                  if (sin == NULL || !OK_32PTR((char *)sin))
4991 4850                          return (EINVAL);
4992 4851  
4993 4852                  if (connp->conn_family != AF_INET ||
4994 4853                      sin->sin_family != AF_INET) {
4995 4854                          return (EAFNOSUPPORT);
4996 4855                  }
4997 4856                  v4src = sin->sin_addr.s_addr;
4998 4857                  IN6_IPADDR_TO_V4MAPPED(v4src, &v6src);
4999 4858                  if (v4src != INADDR_ANY) {
5000 4859                          laddr_type = ip_laddr_verify_v4(v4src, zoneid, ipst,
5001 4860                              B_TRUE);
5002 4861                  }
5003 4862                  port = ntohs(sin->sin_port);
5004 4863                  break;
5005 4864  
5006 4865          case sizeof (sin6_t):   /* complete IPv6 address */
5007 4866                  sin6 = (sin6_t *)sa;
5008 4867  
5009 4868                  if (sin6 == NULL || !OK_32PTR((char *)sin6))
5010 4869                          return (EINVAL);
5011 4870  
5012 4871                  if (connp->conn_family != AF_INET6 ||
5013 4872                      sin6->sin6_family != AF_INET6) {
5014 4873                          return (EAFNOSUPPORT);
5015 4874                  }
5016 4875                  v6src = sin6->sin6_addr;
5017 4876                  if (IN6_IS_ADDR_V4MAPPED(&v6src)) {
5018 4877                          if (connp->conn_ipv6_v6only)
5019 4878                                  return (EADDRNOTAVAIL);
5020 4879  
5021 4880                          IN6_V4MAPPED_TO_IPADDR(&v6src, v4src);
5022 4881                          if (v4src != INADDR_ANY) {
5023 4882                                  laddr_type = ip_laddr_verify_v4(v4src,
5024 4883                                      zoneid, ipst, B_FALSE);
5025 4884                          }
5026 4885                  } else {
5027 4886                          if (!IN6_IS_ADDR_UNSPECIFIED(&v6src)) {
5028 4887                                  if (IN6_IS_ADDR_LINKSCOPE(&v6src))
5029 4888                                          scopeid = sin6->sin6_scope_id;
5030 4889                                  laddr_type = ip_laddr_verify_v6(&v6src,
5031 4890                                      zoneid, ipst, B_TRUE, scopeid);
5032 4891                          }
5033 4892                  }
5034 4893                  port = ntohs(sin6->sin6_port);
5035 4894                  break;
5036 4895  
5037 4896          default:                /* Invalid request */
5038 4897                  (void) strlog(UDP_MOD_ID, 0, 1, SL_ERROR|SL_TRACE,
5039 4898                      "udp_bind: bad ADDR_length length %u", len);
5040 4899                  return (-TBADADDR);
5041 4900          }
5042 4901  
5043 4902          /* Is the local address a valid unicast, multicast, or broadcast? */
5044 4903          if (laddr_type == IPVL_BAD)
5045 4904                  return (EADDRNOTAVAIL);
5046 4905  
5047 4906          requested_port = port;
5048 4907  
5049 4908          if (requested_port == 0 || !bind_to_req_port_only)
5050 4909                  bind_to_req_port_only = B_FALSE;
5051 4910          else            /* T_BIND_REQ and requested_port != 0 */
5052 4911                  bind_to_req_port_only = B_TRUE;
5053 4912  
5054 4913          if (requested_port == 0) {
5055 4914                  /*
5056 4915                   * If the application passed in zero for the port number, it
5057 4916                   * doesn't care which port number we bind to. Get one in the
5058 4917                   * valid range.
5059 4918                   */
5060 4919                  if (connp->conn_anon_priv_bind) {
5061 4920                          port = udp_get_next_priv_port(udp);
5062 4921                  } else {
5063 4922                          port = udp_update_next_port(udp,
5064 4923                              us->us_next_port_to_try, B_TRUE);
5065 4924                  }
5066 4925          } else {
5067 4926                  /*
5068 4927                   * If the port is in the well-known privileged range,
5069 4928                   * make sure the caller was privileged.
5070 4929                   */
5071 4930                  int i;
5072 4931                  boolean_t priv = B_FALSE;
5073 4932  
5074 4933                  if (port < us->us_smallest_nonpriv_port) {
5075 4934                          priv = B_TRUE;
5076 4935                  } else {
5077 4936                          for (i = 0; i < us->us_num_epriv_ports; i++) {
5078 4937                                  if (port == us->us_epriv_ports[i]) {
5079 4938                                          priv = B_TRUE;
5080 4939                                          break;
5081 4940                                  }
5082 4941                          }
5083 4942                  }
5084 4943  
5085 4944                  if (priv) {
5086 4945                          if (secpolicy_net_privaddr(cr, port, IPPROTO_UDP) != 0)
5087 4946                                  return (-TACCES);
5088 4947                  }
5089 4948          }
5090 4949  
5091 4950          if (port == 0)
5092 4951                  return (-TNOADDR);
5093 4952  
5094 4953          /*
5095 4954           * The state must be TS_UNBND. TPI mandates that users must send
5096 4955           * TPI primitives only 1 at a time and wait for the response before
5097 4956           * sending the next primitive.
5098 4957           */
5099 4958          mutex_enter(&connp->conn_lock);
5100 4959          if (udp->udp_state != TS_UNBND) {
5101 4960                  mutex_exit(&connp->conn_lock);
5102 4961                  (void) strlog(UDP_MOD_ID, 0, 1, SL_ERROR|SL_TRACE,
5103 4962                      "udp_bind: bad state, %u", udp->udp_state);
5104 4963                  return (-TOUTSTATE);
5105 4964          }
5106 4965          /*
5107 4966           * Copy the source address into our udp structure. This address
5108 4967           * may still be zero; if so, IP will fill in the correct address
5109 4968           * each time an outbound packet is passed to it. Since the udp is
5110 4969           * not yet in the bind hash list, we don't grab the uf_lock to
5111 4970           * change conn_ipversion
5112 4971           */
5113 4972          if (connp->conn_family == AF_INET) {
5114 4973                  ASSERT(sin != NULL);
5115 4974                  ASSERT(connp->conn_ixa->ixa_flags & IXAF_IS_IPV4);
5116 4975          } else {
5117 4976                  if (IN6_IS_ADDR_V4MAPPED(&v6src)) {
5118 4977                          /*
5119 4978                           * no need to hold the uf_lock to set the conn_ipversion
5120 4979                           * since we are not yet in the fanout list
5121 4980                           */
5122 4981                          connp->conn_ipversion = IPV4_VERSION;
5123 4982                          connp->conn_ixa->ixa_flags |= IXAF_IS_IPV4;
5124 4983                  } else {
5125 4984                          connp->conn_ipversion = IPV6_VERSION;
5126 4985                          connp->conn_ixa->ixa_flags &= ~IXAF_IS_IPV4;
5127 4986                  }
5128 4987          }
5129 4988  
5130 4989          /*
5131 4990           * If conn_reuseaddr is not set, then we have to make sure that
5132 4991           * the IP address and port number the application requested
5133 4992           * (or we selected for the application) is not being used by
5134 4993           * another stream.  If another stream is already using the
5135 4994           * requested IP address and port, the behavior depends on
5136 4995           * "bind_to_req_port_only". If set the bind fails; otherwise we
5137 4996           * search for any an unused port to bind to the stream.
5138 4997           *
5139 4998           * As per the BSD semantics, as modified by the Deering multicast
5140 4999           * changes, if udp_reuseaddr is set, then we allow multiple binds
5141 5000           * to the same port independent of the local IP address.
5142 5001           *
5143 5002           * This is slightly different than in SunOS 4.X which did not
5144 5003           * support IP multicast. Note that the change implemented by the
5145 5004           * Deering multicast code effects all binds - not only binding
5146 5005           * to IP multicast addresses.
5147 5006           *
5148 5007           * Note that when binding to port zero we ignore SO_REUSEADDR in
5149 5008           * order to guarantee a unique port.
5150 5009           */
5151 5010  
5152 5011          count = 0;
5153 5012          if (connp->conn_anon_priv_bind) {
5154 5013                  /*
5155 5014                   * loopmax = (IPPORT_RESERVED-1) -
5156 5015                   *    us->us_min_anonpriv_port + 1
5157 5016                   */
5158 5017                  loopmax = IPPORT_RESERVED - us->us_min_anonpriv_port;
5159 5018          } else {
5160 5019                  loopmax = us->us_largest_anon_port -
5161 5020                      us->us_smallest_anon_port + 1;
5162 5021          }
5163 5022  
5164 5023          is_inaddr_any = V6_OR_V4_INADDR_ANY(v6src);
5165 5024  
5166 5025          for (;;) {
5167 5026                  udp_t           *udp1;
5168 5027                  boolean_t       found_exclbind = B_FALSE;
5169 5028                  conn_t          *connp1;
5170 5029  
5171 5030                  /*
5172 5031                   * Walk through the list of udp streams bound to
5173 5032                   * requested port with the same IP address.
5174 5033                   */
5175 5034                  lport = htons(port);
5176 5035                  udpf = &us->us_bind_fanout[UDP_BIND_HASH(lport,
5177 5036                      us->us_bind_fanout_size)];
5178 5037                  mutex_enter(&udpf->uf_lock);
5179 5038                  for (udp1 = udpf->uf_udp; udp1 != NULL;
5180 5039                      udp1 = udp1->udp_bind_hash) {
5181 5040                          connp1 = udp1->udp_connp;
5182 5041  
5183 5042                          if (lport != connp1->conn_lport)
5184 5043                                  continue;
5185 5044  
5186 5045                          /*
5187 5046                           * On a labeled system, we must treat bindings to ports
5188 5047                           * on shared IP addresses by sockets with MAC exemption
5189 5048                           * privilege as being in all zones, as there's
5190 5049                           * otherwise no way to identify the right receiver.
5191 5050                           */
5192 5051                          if (!IPCL_BIND_ZONE_MATCH(connp1, connp))
5193 5052                                  continue;
5194 5053  
5195 5054                          /*
5196 5055                           * If UDP_EXCLBIND is set for either the bound or
5197 5056                           * binding endpoint, the semantics of bind
5198 5057                           * is changed according to the following chart.
5199 5058                           *
5200 5059                           * spec = specified address (v4 or v6)
5201 5060                           * unspec = unspecified address (v4 or v6)
5202 5061                           * A = specified addresses are different for endpoints
5203 5062                           *
5204 5063                           * bound        bind to         allowed?
5205 5064                           * -------------------------------------
5206 5065                           * unspec       unspec          no
5207 5066                           * unspec       spec            no
5208 5067                           * spec         unspec          no
5209 5068                           * spec         spec            yes if A
5210 5069                           *
5211 5070                           * For labeled systems, SO_MAC_EXEMPT behaves the same
5212 5071                           * as UDP_EXCLBIND, except that zoneid is ignored.
5213 5072                           */
5214 5073                          if (connp1->conn_exclbind || connp->conn_exclbind ||
5215 5074                              IPCL_CONNS_MAC(udp1->udp_connp, connp)) {
5216 5075                                  if (V6_OR_V4_INADDR_ANY(
5217 5076                                      connp1->conn_bound_addr_v6) ||
5218 5077                                      is_inaddr_any ||
5219 5078                                      IN6_ARE_ADDR_EQUAL(
5220 5079                                      &connp1->conn_bound_addr_v6,
5221 5080                                      &v6src)) {
5222 5081                                          found_exclbind = B_TRUE;
5223 5082                                          break;
5224 5083                                  }
5225 5084                                  continue;
5226 5085                          }
5227 5086  
5228 5087                          /*
5229 5088                           * Check ipversion to allow IPv4 and IPv6 sockets to
5230 5089                           * have disjoint port number spaces.
5231 5090                           */
5232 5091                          if (connp->conn_ipversion != connp1->conn_ipversion) {
5233 5092  
5234 5093                                  /*
5235 5094                                   * On the first time through the loop, if the
5236 5095                                   * the user intentionally specified a
5237 5096                                   * particular port number, then ignore any
5238 5097                                   * bindings of the other protocol that may
5239 5098                                   * conflict. This allows the user to bind IPv6
5240 5099                                   * alone and get both v4 and v6, or bind both
5241 5100                                   * both and get each seperately. On subsequent
5242 5101                                   * times through the loop, we're checking a
5243 5102                                   * port that we chose (not the user) and thus
5244 5103                                   * we do not allow casual duplicate bindings.
5245 5104                                   */
5246 5105                                  if (count == 0 && requested_port != 0)
5247 5106                                          continue;
5248 5107                          }
5249 5108  
5250 5109                          /*
5251 5110                           * No difference depending on SO_REUSEADDR.
5252 5111                           *
5253 5112                           * If existing port is bound to a
5254 5113                           * non-wildcard IP address and
5255 5114                           * the requesting stream is bound to
5256 5115                           * a distinct different IP addresses
5257 5116                           * (non-wildcard, also), keep going.
5258 5117                           */
5259 5118                          if (!is_inaddr_any &&
5260 5119                              !V6_OR_V4_INADDR_ANY(connp1->conn_bound_addr_v6) &&
5261 5120                              !IN6_ARE_ADDR_EQUAL(&connp1->conn_laddr_v6,
5262 5121                              &v6src)) {
5263 5122                                  continue;
5264 5123                          }
5265 5124                          break;
5266 5125                  }
5267 5126  
5268 5127                  if (!found_exclbind &&
5269 5128                      (connp->conn_reuseaddr && requested_port != 0)) {
5270 5129                          break;
5271 5130                  }
5272 5131  
5273 5132                  if (udp1 == NULL) {
5274 5133                          /*
5275 5134                           * No other stream has this IP address
5276 5135                           * and port number. We can use it.
5277 5136                           */
5278 5137                          break;
5279 5138                  }
5280 5139                  mutex_exit(&udpf->uf_lock);
5281 5140                  if (bind_to_req_port_only) {
5282 5141                          /*
5283 5142                           * We get here only when requested port
5284 5143                           * is bound (and only first  of the for()
5285 5144                           * loop iteration).
5286 5145                           *
5287 5146                           * The semantics of this bind request
5288 5147                           * require it to fail so we return from
5289 5148                           * the routine (and exit the loop).
5290 5149                           *
5291 5150                           */
5292 5151                          mutex_exit(&connp->conn_lock);
5293 5152                          return (-TADDRBUSY);
5294 5153                  }
5295 5154  
5296 5155                  if (connp->conn_anon_priv_bind) {
5297 5156                          port = udp_get_next_priv_port(udp);
5298 5157                  } else {
5299 5158                          if ((count == 0) && (requested_port != 0)) {
5300 5159                                  /*
5301 5160                                   * If the application wants us to find
5302 5161                                   * a port, get one to start with. Set
5303 5162                                   * requested_port to 0, so that we will
5304 5163                                   * update us->us_next_port_to_try below.
5305 5164                                   */
5306 5165                                  port = udp_update_next_port(udp,
5307 5166                                      us->us_next_port_to_try, B_TRUE);
5308 5167                                  requested_port = 0;
5309 5168                          } else {
5310 5169                                  port = udp_update_next_port(udp, port + 1,
5311 5170                                      B_FALSE);
5312 5171                          }
5313 5172                  }
5314 5173  
5315 5174                  if (port == 0 || ++count >= loopmax) {
5316 5175                          /*
5317 5176                           * We've tried every possible port number and
5318 5177                           * there are none available, so send an error
5319 5178                           * to the user.
5320 5179                           */
5321 5180                          mutex_exit(&connp->conn_lock);
5322 5181                          return (-TNOADDR);
5323 5182                  }
5324 5183          }
5325 5184  
5326 5185          /*
5327 5186           * Copy the source address into our udp structure.  This address
5328 5187           * may still be zero; if so, ip_attr_connect will fill in the correct
5329 5188           * address when a packet is about to be sent.
5330 5189           * If we are binding to a broadcast or multicast address then
5331 5190           * we just set the conn_bound_addr since we don't want to use
5332 5191           * that as the source address when sending.
5333 5192           */
5334 5193          connp->conn_bound_addr_v6 = v6src;
5335 5194          connp->conn_laddr_v6 = v6src;
5336 5195          if (scopeid != 0) {
5337 5196                  connp->conn_ixa->ixa_flags |= IXAF_SCOPEID_SET;
5338 5197                  connp->conn_ixa->ixa_scopeid = scopeid;
5339 5198                  connp->conn_incoming_ifindex = scopeid;
5340 5199          } else {
5341 5200                  connp->conn_ixa->ixa_flags &= ~IXAF_SCOPEID_SET;
5342 5201                  connp->conn_incoming_ifindex = connp->conn_bound_if;
5343 5202          }
5344 5203  
5345 5204          switch (laddr_type) {
5346 5205          case IPVL_UNICAST_UP:
5347 5206          case IPVL_UNICAST_DOWN:
5348 5207                  connp->conn_saddr_v6 = v6src;
5349 5208                  connp->conn_mcbc_bind = B_FALSE;
5350 5209                  break;
5351 5210          case IPVL_MCAST:
5352 5211          case IPVL_BCAST:
5353 5212                  /* ip_set_destination will pick a source address later */
5354 5213                  connp->conn_saddr_v6 = ipv6_all_zeros;
5355 5214                  connp->conn_mcbc_bind = B_TRUE;
5356 5215                  break;
5357 5216          }
5358 5217  
5359 5218          /* Any errors after this point should use late_error */
5360 5219          connp->conn_lport = lport;
5361 5220  
5362 5221          /*
5363 5222           * Now reset the next anonymous port if the application requested
5364 5223           * an anonymous port, or we handed out the next anonymous port.
5365 5224           */
5366 5225          if ((requested_port == 0) && (!connp->conn_anon_priv_bind)) {
5367 5226                  us->us_next_port_to_try = port + 1;
5368 5227          }
5369 5228  
5370 5229          /* Initialize the T_BIND_ACK. */
5371 5230          if (connp->conn_family == AF_INET) {
5372 5231                  sin->sin_port = connp->conn_lport;
5373 5232          } else {
5374 5233                  sin6->sin6_port = connp->conn_lport;
5375 5234          }
5376 5235          udp->udp_state = TS_IDLE;
5377 5236          udp_bind_hash_insert(udpf, udp);
5378 5237          mutex_exit(&udpf->uf_lock);
5379 5238          mutex_exit(&connp->conn_lock);
5380 5239  
5381 5240          if (cl_inet_bind) {
5382 5241                  /*
5383 5242                   * Running in cluster mode - register bind information
5384 5243                   */
5385 5244                  if (connp->conn_ipversion == IPV4_VERSION) {
5386 5245                          (*cl_inet_bind)(connp->conn_netstack->netstack_stackid,
5387 5246                              IPPROTO_UDP, AF_INET, (uint8_t *)&v4src,
5388 5247                              (in_port_t)connp->conn_lport, NULL);
5389 5248                  } else {
5390 5249                          (*cl_inet_bind)(connp->conn_netstack->netstack_stackid,
5391 5250                              IPPROTO_UDP, AF_INET6, (uint8_t *)&v6src,
5392 5251                              (in_port_t)connp->conn_lport, NULL);
5393 5252                  }
5394 5253          }
5395 5254  
5396 5255          mutex_enter(&connp->conn_lock);
5397 5256          connp->conn_anon_port = (is_system_labeled() && requested_port == 0);
5398 5257          if (is_system_labeled() && (!connp->conn_anon_port ||
5399 5258              connp->conn_anon_mlp)) {
5400 5259                  uint16_t mlpport;
5401 5260                  zone_t *zone;
5402 5261  
5403 5262                  zone = crgetzone(cr);
5404 5263                  connp->conn_mlp_type =
5405 5264                      connp->conn_recv_ancillary.crb_recvucred ? mlptBoth :
5406 5265                      mlptSingle;
5407 5266                  addrtype = tsol_mlp_addr_type(
5408 5267                      connp->conn_allzones ? ALL_ZONES : zone->zone_id,
5409 5268                      IPV6_VERSION, &v6src, us->us_netstack->netstack_ip);
5410 5269                  if (addrtype == mlptSingle) {
5411 5270                          error = -TNOADDR;
5412 5271                          mutex_exit(&connp->conn_lock);
5413 5272                          goto late_error;
5414 5273                  }
5415 5274                  mlpport = connp->conn_anon_port ? PMAPPORT : port;
5416 5275                  mlptype = tsol_mlp_port_type(zone, IPPROTO_UDP, mlpport,
5417 5276                      addrtype);
5418 5277  
5419 5278                  /*
5420 5279                   * It is a coding error to attempt to bind an MLP port
5421 5280                   * without first setting SOL_SOCKET/SCM_UCRED.
5422 5281                   */
5423 5282                  if (mlptype != mlptSingle &&
5424 5283                      connp->conn_mlp_type == mlptSingle) {
5425 5284                          error = EINVAL;
5426 5285                          mutex_exit(&connp->conn_lock);
5427 5286                          goto late_error;
5428 5287                  }
5429 5288  
5430 5289                  /*
5431 5290                   * It is an access violation to attempt to bind an MLP port
5432 5291                   * without NET_BINDMLP privilege.
5433 5292                   */
5434 5293                  if (mlptype != mlptSingle &&
5435 5294                      secpolicy_net_bindmlp(cr) != 0) {
5436 5295                          if (connp->conn_debug) {
5437 5296                                  (void) strlog(UDP_MOD_ID, 0, 1,
5438 5297                                      SL_ERROR|SL_TRACE,
5439 5298                                      "udp_bind: no priv for multilevel port %d",
5440 5299                                      mlpport);
5441 5300                          }
5442 5301                          error = -TACCES;
5443 5302                          mutex_exit(&connp->conn_lock);
5444 5303                          goto late_error;
5445 5304                  }
5446 5305  
5447 5306                  /*
5448 5307                   * If we're specifically binding a shared IP address and the
5449 5308                   * port is MLP on shared addresses, then check to see if this
5450 5309                   * zone actually owns the MLP.  Reject if not.
5451 5310                   */
5452 5311                  if (mlptype == mlptShared && addrtype == mlptShared) {
5453 5312                          /*
5454 5313                           * No need to handle exclusive-stack zones since
5455 5314                           * ALL_ZONES only applies to the shared stack.
5456 5315                           */
5457 5316                          zoneid_t mlpzone;
5458 5317  
5459 5318                          mlpzone = tsol_mlp_findzone(IPPROTO_UDP,
5460 5319                              htons(mlpport));
5461 5320                          if (connp->conn_zoneid != mlpzone) {
5462 5321                                  if (connp->conn_debug) {
5463 5322                                          (void) strlog(UDP_MOD_ID, 0, 1,
5464 5323                                              SL_ERROR|SL_TRACE,
5465 5324                                              "udp_bind: attempt to bind port "
5466 5325                                              "%d on shared addr in zone %d "
5467 5326                                              "(should be %d)",
5468 5327                                              mlpport, connp->conn_zoneid,
5469 5328                                              mlpzone);
5470 5329                                  }
5471 5330                                  error = -TACCES;
5472 5331                                  mutex_exit(&connp->conn_lock);
5473 5332                                  goto late_error;
5474 5333                          }
5475 5334                  }
5476 5335                  if (connp->conn_anon_port) {
5477 5336                          error = tsol_mlp_anon(zone, mlptype, connp->conn_proto,
5478 5337                              port, B_TRUE);
5479 5338                          if (error != 0) {
5480 5339                                  if (connp->conn_debug) {
5481 5340                                          (void) strlog(UDP_MOD_ID, 0, 1,
5482 5341                                              SL_ERROR|SL_TRACE,
5483 5342                                              "udp_bind: cannot establish anon "
5484 5343                                              "MLP for port %d", port);
5485 5344                                  }
5486 5345                                  error = -TACCES;
5487 5346                                  mutex_exit(&connp->conn_lock);
5488 5347                                  goto late_error;
5489 5348                          }
5490 5349                  }
5491 5350                  connp->conn_mlp_type = mlptype;
5492 5351          }
5493 5352  
5494 5353          /*
5495 5354           * We create an initial header template here to make a subsequent
5496 5355           * sendto have a starting point. Since conn_last_dst is zero the
5497 5356           * first sendto will always follow the 'dst changed' code path.
5498 5357           * Note that we defer massaging options and the related checksum
5499 5358           * adjustment until we have a destination address.
5500 5359           */
5501 5360          error = udp_build_hdr_template(connp, &connp->conn_saddr_v6,
5502 5361              &connp->conn_faddr_v6, connp->conn_fport, connp->conn_flowinfo);
5503 5362          if (error != 0) {
5504 5363                  mutex_exit(&connp->conn_lock);
5505 5364                  goto late_error;
5506 5365          }
5507 5366          /* Just in case */
5508 5367          connp->conn_faddr_v6 = ipv6_all_zeros;
5509 5368          connp->conn_fport = 0;
5510 5369          connp->conn_v6lastdst = ipv6_all_zeros;
5511 5370          mutex_exit(&connp->conn_lock);
5512 5371  
5513 5372          error = ip_laddr_fanout_insert(connp);
5514 5373          if (error != 0)
5515 5374                  goto late_error;
5516 5375  
5517 5376          /* Bind succeeded */
5518 5377          return (0);
5519 5378  
5520 5379  late_error:
5521 5380          /* We had already picked the port number, and then the bind failed */
5522 5381          mutex_enter(&connp->conn_lock);
5523 5382          udpf = &us->us_bind_fanout[
5524 5383              UDP_BIND_HASH(connp->conn_lport,
5525 5384              us->us_bind_fanout_size)];
5526 5385          mutex_enter(&udpf->uf_lock);
5527 5386          connp->conn_saddr_v6 = ipv6_all_zeros;
5528 5387          connp->conn_bound_addr_v6 = ipv6_all_zeros;
5529 5388          connp->conn_laddr_v6 = ipv6_all_zeros;
5530 5389          if (scopeid != 0) {
5531 5390                  connp->conn_ixa->ixa_flags &= ~IXAF_SCOPEID_SET;
5532 5391                  connp->conn_incoming_ifindex = connp->conn_bound_if;
5533 5392          }
5534 5393          udp->udp_state = TS_UNBND;
5535 5394          udp_bind_hash_remove(udp, B_TRUE);
5536 5395          connp->conn_lport = 0;
5537 5396          mutex_exit(&udpf->uf_lock);
5538 5397          connp->conn_anon_port = B_FALSE;
5539 5398          connp->conn_mlp_type = mlptSingle;
5540 5399  
5541 5400          connp->conn_v6lastdst = ipv6_all_zeros;
5542 5401  
5543 5402          /* Restore the header that was built above - different source address */
5544 5403          (void) udp_build_hdr_template(connp, &connp->conn_saddr_v6,
5545 5404              &connp->conn_faddr_v6, connp->conn_fport, connp->conn_flowinfo);
5546 5405          mutex_exit(&connp->conn_lock);
5547 5406          return (error);
5548 5407  }
5549 5408  
5550 5409  int
5551 5410  udp_bind(sock_lower_handle_t proto_handle, struct sockaddr *sa,
5552 5411      socklen_t len, cred_t *cr)
5553 5412  {
5554 5413          int             error;
5555 5414          conn_t          *connp;
5556 5415  
5557 5416          /* All Solaris components should pass a cred for this operation. */
5558 5417          ASSERT(cr != NULL);
5559 5418  
5560 5419          connp = (conn_t *)proto_handle;
5561 5420  
5562 5421          if (sa == NULL)
5563 5422                  error = udp_do_unbind(connp);
5564 5423          else
5565 5424                  error = udp_do_bind(connp, sa, len, cr, B_TRUE);
5566 5425  
5567 5426          if (error < 0) {
5568 5427                  if (error == -TOUTSTATE)
5569 5428                          error = EINVAL;
5570 5429                  else
5571 5430                          error = proto_tlitosyserr(-error);
5572 5431          }
5573 5432  
5574 5433          return (error);
5575 5434  }
5576 5435  
5577 5436  static int
5578 5437  udp_implicit_bind(conn_t *connp, cred_t *cr)
5579 5438  {
5580 5439          sin6_t sin6addr;
5581 5440          sin_t *sin;
5582 5441          sin6_t *sin6;
5583 5442          socklen_t len;
5584 5443          int error;
5585 5444  
5586 5445          /* All Solaris components should pass a cred for this operation. */
5587 5446          ASSERT(cr != NULL);
5588 5447  
5589 5448          if (connp->conn_family == AF_INET) {
5590 5449                  len = sizeof (struct sockaddr_in);
5591 5450                  sin = (sin_t *)&sin6addr;
5592 5451                  *sin = sin_null;
5593 5452                  sin->sin_family = AF_INET;
5594 5453                  sin->sin_addr.s_addr = INADDR_ANY;
5595 5454          } else {
5596 5455                  ASSERT(connp->conn_family == AF_INET6);
5597 5456                  len = sizeof (sin6_t);
5598 5457                  sin6 = (sin6_t *)&sin6addr;
5599 5458                  *sin6 = sin6_null;
5600 5459                  sin6->sin6_family = AF_INET6;
5601 5460                  V6_SET_ZERO(sin6->sin6_addr);
5602 5461          }
5603 5462  
5604 5463          error = udp_do_bind(connp, (struct sockaddr *)&sin6addr, len,
5605 5464              cr, B_FALSE);
5606 5465          return ((error < 0) ? proto_tlitosyserr(-error) : error);
5607 5466  }
5608 5467  
5609 5468  /*
5610 5469   * This routine removes a port number association from a stream. It
5611 5470   * is called by udp_unbind and udp_tpi_unbind.
5612 5471   */
5613 5472  static int
5614 5473  udp_do_unbind(conn_t *connp)
5615 5474  {
5616 5475          udp_t           *udp = connp->conn_udp;
5617 5476          udp_fanout_t    *udpf;
5618 5477          udp_stack_t     *us = udp->udp_us;
5619 5478  
5620 5479          if (cl_inet_unbind != NULL) {
5621 5480                  /*
5622 5481                   * Running in cluster mode - register unbind information
5623 5482                   */
5624 5483                  if (connp->conn_ipversion == IPV4_VERSION) {
5625 5484                          (*cl_inet_unbind)(
5626 5485                              connp->conn_netstack->netstack_stackid,
5627 5486                              IPPROTO_UDP, AF_INET,
5628 5487                              (uint8_t *)(&V4_PART_OF_V6(connp->conn_laddr_v6)),
5629 5488                              (in_port_t)connp->conn_lport, NULL);
5630 5489                  } else {
5631 5490                          (*cl_inet_unbind)(
5632 5491                              connp->conn_netstack->netstack_stackid,
5633 5492                              IPPROTO_UDP, AF_INET6,
5634 5493                              (uint8_t *)&(connp->conn_laddr_v6),
5635 5494                              (in_port_t)connp->conn_lport, NULL);
5636 5495                  }
5637 5496          }
5638 5497  
5639 5498          mutex_enter(&connp->conn_lock);
5640 5499          /* If a bind has not been done, we can't unbind. */
5641 5500          if (udp->udp_state == TS_UNBND) {
5642 5501                  mutex_exit(&connp->conn_lock);
5643 5502                  return (-TOUTSTATE);
5644 5503          }
5645 5504          udpf = &us->us_bind_fanout[UDP_BIND_HASH(connp->conn_lport,
5646 5505              us->us_bind_fanout_size)];
5647 5506          mutex_enter(&udpf->uf_lock);
5648 5507          udp_bind_hash_remove(udp, B_TRUE);
5649 5508          connp->conn_saddr_v6 = ipv6_all_zeros;
5650 5509          connp->conn_bound_addr_v6 = ipv6_all_zeros;
5651 5510          connp->conn_laddr_v6 = ipv6_all_zeros;
5652 5511          connp->conn_mcbc_bind = B_FALSE;
5653 5512          connp->conn_lport = 0;
5654 5513          /* In case we were also connected */
5655 5514          connp->conn_faddr_v6 = ipv6_all_zeros;
5656 5515          connp->conn_fport = 0;
5657 5516          mutex_exit(&udpf->uf_lock);
5658 5517  
5659 5518          connp->conn_v6lastdst = ipv6_all_zeros;
5660 5519          udp->udp_state = TS_UNBND;
5661 5520  
5662 5521          (void) udp_build_hdr_template(connp, &connp->conn_saddr_v6,
5663 5522              &connp->conn_faddr_v6, connp->conn_fport, connp->conn_flowinfo);
5664 5523          mutex_exit(&connp->conn_lock);
5665 5524  
5666 5525          ip_unbind(connp);
5667 5526  
5668 5527          return (0);
5669 5528  }
5670 5529  
5671 5530  /*
5672 5531   * It associates a default destination address with the stream.
5673 5532   */
5674 5533  static int
5675 5534  udp_do_connect(conn_t *connp, const struct sockaddr *sa, socklen_t len,
5676 5535      cred_t *cr, pid_t pid)
5677 5536  {
5678 5537          sin6_t          *sin6;
5679 5538          sin_t           *sin;
5680 5539          in6_addr_t      v6dst;
5681 5540          ipaddr_t        v4dst;
5682 5541          uint16_t        dstport;
5683 5542          uint32_t        flowinfo;
5684 5543          udp_fanout_t    *udpf;
5685 5544          udp_t           *udp, *udp1;
5686 5545          ushort_t        ipversion;
5687 5546          udp_stack_t     *us;
5688 5547          int             error;
5689 5548          conn_t          *connp1;
5690 5549          ip_xmit_attr_t  *ixa;
5691 5550          ip_xmit_attr_t  *oldixa;
5692 5551          uint_t          scopeid = 0;
5693 5552          uint_t          srcid = 0;
5694 5553          in6_addr_t      v6src = connp->conn_saddr_v6;
5695 5554          boolean_t       v4mapped;
5696 5555  
5697 5556          udp = connp->conn_udp;
5698 5557          us = udp->udp_us;
5699 5558  
5700 5559          /*
5701 5560           * Address has been verified by the caller
5702 5561           */
5703 5562          switch (len) {
5704 5563          default:
5705 5564                  /*
5706 5565                   * Should never happen
5707 5566                   */
5708 5567                  return (EINVAL);
5709 5568  
5710 5569          case sizeof (sin_t):
5711 5570                  sin = (sin_t *)sa;
5712 5571                  v4dst = sin->sin_addr.s_addr;
5713 5572                  dstport = sin->sin_port;
5714 5573                  IN6_IPADDR_TO_V4MAPPED(v4dst, &v6dst);
5715 5574                  ASSERT(connp->conn_ipversion == IPV4_VERSION);
5716 5575                  ipversion = IPV4_VERSION;
5717 5576                  break;
5718 5577  
5719 5578          case sizeof (sin6_t):
5720 5579                  sin6 = (sin6_t *)sa;
5721 5580                  v6dst = sin6->sin6_addr;
5722 5581                  dstport = sin6->sin6_port;
5723 5582                  srcid = sin6->__sin6_src_id;
5724 5583                  v4mapped = IN6_IS_ADDR_V4MAPPED(&v6dst);
5725 5584                  if (srcid != 0 && IN6_IS_ADDR_UNSPECIFIED(&v6src)) {
5726 5585                          if (!ip_srcid_find_id(srcid, &v6src, IPCL_ZONEID(connp),
5727 5586                              v4mapped, connp->conn_netstack)) {
5728 5587                                  /* Mismatch v4mapped/v6 specified by srcid. */
5729 5588                                  return (EADDRNOTAVAIL);
5730 5589                          }
5731 5590                  }
5732 5591                  if (v4mapped) {
5733 5592                          if (connp->conn_ipv6_v6only)
5734 5593                                  return (EADDRNOTAVAIL);
5735 5594  
5736 5595                          /*
5737 5596                           * Destination adress is mapped IPv6 address.
5738 5597                           * Source bound address should be unspecified or
5739 5598                           * IPv6 mapped address as well.
5740 5599                           */
5741 5600                          if (!IN6_IS_ADDR_UNSPECIFIED(
5742 5601                              &connp->conn_bound_addr_v6) &&
5743 5602                              !IN6_IS_ADDR_V4MAPPED(&connp->conn_bound_addr_v6)) {
5744 5603                                  return (EADDRNOTAVAIL);
5745 5604                          }
5746 5605                          IN6_V4MAPPED_TO_IPADDR(&v6dst, v4dst);
5747 5606                          ipversion = IPV4_VERSION;
5748 5607                          flowinfo = 0;
5749 5608                  } else {
5750 5609                          ipversion = IPV6_VERSION;
5751 5610                          flowinfo = sin6->sin6_flowinfo;
5752 5611                          if (IN6_IS_ADDR_LINKLOCAL(&sin6->sin6_addr))
5753 5612                                  scopeid = sin6->sin6_scope_id;
5754 5613                  }
5755 5614                  break;
5756 5615          }
5757 5616  
5758 5617          if (dstport == 0)
5759 5618                  return (-TBADADDR);
5760 5619  
5761 5620          /*
5762 5621           * If there is a different thread using conn_ixa then we get a new
5763 5622           * copy and cut the old one loose from conn_ixa. Otherwise we use
5764 5623           * conn_ixa and prevent any other thread from using/changing it.
5765 5624           * Once connect() is done other threads can use conn_ixa since the
5766 5625           * refcnt will be back at one.
5767 5626           * We defer updating conn_ixa until later to handle any concurrent
5768 5627           * conn_ixa_cleanup thread.
5769 5628           */
5770 5629          ixa = conn_get_ixa(connp, B_FALSE);
5771 5630          if (ixa == NULL)
5772 5631                  return (ENOMEM);
5773 5632  
5774 5633          mutex_enter(&connp->conn_lock);
5775 5634          /*
5776 5635           * This udp_t must have bound to a port already before doing a connect.
5777 5636           * Reject if a connect is in progress (we drop conn_lock during
5778 5637           * udp_do_connect).
5779 5638           */
5780 5639          if (udp->udp_state == TS_UNBND || udp->udp_state == TS_WCON_CREQ) {
5781 5640                  mutex_exit(&connp->conn_lock);
5782 5641                  (void) strlog(UDP_MOD_ID, 0, 1, SL_ERROR|SL_TRACE,
5783 5642                      "udp_connect: bad state, %u", udp->udp_state);
5784 5643                  ixa_refrele(ixa);
5785 5644                  return (-TOUTSTATE);
5786 5645          }
5787 5646          ASSERT(connp->conn_lport != 0 && udp->udp_ptpbhn != NULL);
5788 5647  
5789 5648          udpf = &us->us_bind_fanout[UDP_BIND_HASH(connp->conn_lport,
5790 5649              us->us_bind_fanout_size)];
5791 5650  
5792 5651          mutex_enter(&udpf->uf_lock);
5793 5652          if (udp->udp_state == TS_DATA_XFER) {
5794 5653                  /* Already connected - clear out state */
5795 5654                  if (connp->conn_mcbc_bind)
5796 5655                          connp->conn_saddr_v6 = ipv6_all_zeros;
5797 5656                  else
5798 5657                          connp->conn_saddr_v6 = connp->conn_bound_addr_v6;
5799 5658                  connp->conn_laddr_v6 = connp->conn_bound_addr_v6;
5800 5659                  connp->conn_faddr_v6 = ipv6_all_zeros;
5801 5660                  connp->conn_fport = 0;
5802 5661                  udp->udp_state = TS_IDLE;
5803 5662          }
5804 5663  
5805 5664          connp->conn_fport = dstport;
5806 5665          connp->conn_ipversion = ipversion;
5807 5666          if (ipversion == IPV4_VERSION) {
5808 5667                  /*
5809 5668                   * Interpret a zero destination to mean loopback.
5810 5669                   * Update the T_CONN_REQ (sin/sin6) since it is used to
5811 5670                   * generate the T_CONN_CON.
5812 5671                   */
5813 5672                  if (v4dst == INADDR_ANY) {
5814 5673                          v4dst = htonl(INADDR_LOOPBACK);
5815 5674                          IN6_IPADDR_TO_V4MAPPED(v4dst, &v6dst);
5816 5675                          if (connp->conn_family == AF_INET) {
5817 5676                                  sin->sin_addr.s_addr = v4dst;
5818 5677                          } else {
5819 5678                                  sin6->sin6_addr = v6dst;
5820 5679                          }
5821 5680                  }
5822 5681                  connp->conn_faddr_v6 = v6dst;
5823 5682                  connp->conn_flowinfo = 0;
5824 5683          } else {
5825 5684                  ASSERT(connp->conn_ipversion == IPV6_VERSION);
5826 5685                  /*
5827 5686                   * Interpret a zero destination to mean loopback.
5828 5687                   * Update the T_CONN_REQ (sin/sin6) since it is used to
5829 5688                   * generate the T_CONN_CON.
5830 5689                   */
5831 5690                  if (IN6_IS_ADDR_UNSPECIFIED(&v6dst)) {
5832 5691                          v6dst = ipv6_loopback;
5833 5692                          sin6->sin6_addr = v6dst;
5834 5693                  }
5835 5694                  connp->conn_faddr_v6 = v6dst;
5836 5695                  connp->conn_flowinfo = flowinfo;
5837 5696          }
5838 5697          mutex_exit(&udpf->uf_lock);
5839 5698  
5840 5699          /*
5841 5700           * We update our cred/cpid based on the caller of connect
5842 5701           */
5843 5702          if (connp->conn_cred != cr) {
5844 5703                  crhold(cr);
5845 5704                  crfree(connp->conn_cred);
5846 5705                  connp->conn_cred = cr;
5847 5706          }
5848 5707          connp->conn_cpid = pid;
5849 5708          ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED));
5850 5709          ixa->ixa_cred = cr;
5851 5710          ixa->ixa_cpid = pid;
5852 5711          if (is_system_labeled()) {
5853 5712                  /* We need to restart with a label based on the cred */
5854 5713                  ip_xmit_attr_restore_tsl(ixa, ixa->ixa_cred);
5855 5714          }
5856 5715  
5857 5716          if (scopeid != 0) {
5858 5717                  ixa->ixa_flags |= IXAF_SCOPEID_SET;
5859 5718                  ixa->ixa_scopeid = scopeid;
5860 5719                  connp->conn_incoming_ifindex = scopeid;
5861 5720          } else {
5862 5721                  ixa->ixa_flags &= ~IXAF_SCOPEID_SET;
5863 5722                  connp->conn_incoming_ifindex = connp->conn_bound_if;
5864 5723          }
5865 5724          /*
5866 5725           * conn_connect will drop conn_lock and reacquire it.
5867 5726           * To prevent a send* from messing with this udp_t while the lock
5868 5727           * is dropped we set udp_state and clear conn_v6lastdst.
5869 5728           * That will make all send* fail with EISCONN.
5870 5729           */
5871 5730          connp->conn_v6lastdst = ipv6_all_zeros;
5872 5731          udp->udp_state = TS_WCON_CREQ;
5873 5732  
5874 5733          error = conn_connect(connp, NULL, IPDF_ALLOW_MCBC);
5875 5734          mutex_exit(&connp->conn_lock);
5876 5735          if (error != 0)
5877 5736                  goto connect_failed;
5878 5737  
5879 5738          /*
5880 5739           * The addresses have been verified. Time to insert in
5881 5740           * the correct fanout list.
5882 5741           */
5883 5742          error = ipcl_conn_insert(connp);
5884 5743          if (error != 0)
5885 5744                  goto connect_failed;
5886 5745  
5887 5746          mutex_enter(&connp->conn_lock);
5888 5747          error = udp_build_hdr_template(connp, &connp->conn_saddr_v6,
5889 5748              &connp->conn_faddr_v6, connp->conn_fport, connp->conn_flowinfo);
5890 5749          if (error != 0) {
5891 5750                  mutex_exit(&connp->conn_lock);
5892 5751                  goto connect_failed;
5893 5752          }
5894 5753  
5895 5754          udp->udp_state = TS_DATA_XFER;
5896 5755          /* Record this as the "last" send even though we haven't sent any */
5897 5756          connp->conn_v6lastdst = connp->conn_faddr_v6;
5898 5757          connp->conn_lastipversion = connp->conn_ipversion;
5899 5758          connp->conn_lastdstport = connp->conn_fport;
5900 5759          connp->conn_lastflowinfo = connp->conn_flowinfo;
5901 5760          connp->conn_lastscopeid = scopeid;
5902 5761          connp->conn_lastsrcid = srcid;
5903 5762          /* Also remember a source to use together with lastdst */
5904 5763          connp->conn_v6lastsrc = v6src;
5905 5764  
5906 5765          oldixa = conn_replace_ixa(connp, ixa);
5907 5766          mutex_exit(&connp->conn_lock);
5908 5767          ixa_refrele(oldixa);
5909 5768  
5910 5769          /*
5911 5770           * We've picked a source address above. Now we can
5912 5771           * verify that the src/port/dst/port is unique for all
5913 5772           * connections in TS_DATA_XFER, skipping ourselves.
5914 5773           */
5915 5774          mutex_enter(&udpf->uf_lock);
5916 5775          for (udp1 = udpf->uf_udp; udp1 != NULL; udp1 = udp1->udp_bind_hash) {
5917 5776                  if (udp1->udp_state != TS_DATA_XFER)
5918 5777                          continue;
5919 5778  
5920 5779                  if (udp1 == udp)
5921 5780                          continue;
5922 5781  
5923 5782                  connp1 = udp1->udp_connp;
5924 5783                  if (connp->conn_lport != connp1->conn_lport ||
5925 5784                      connp->conn_ipversion != connp1->conn_ipversion ||
5926 5785                      dstport != connp1->conn_fport ||
5927 5786                      !IN6_ARE_ADDR_EQUAL(&connp->conn_laddr_v6,
5928 5787                      &connp1->conn_laddr_v6) ||
5929 5788                      !IN6_ARE_ADDR_EQUAL(&v6dst, &connp1->conn_faddr_v6) ||
5930 5789                      !(IPCL_ZONE_MATCH(connp, connp1->conn_zoneid) ||
5931 5790                      IPCL_ZONE_MATCH(connp1, connp->conn_zoneid)))
5932 5791                          continue;
5933 5792                  mutex_exit(&udpf->uf_lock);
5934 5793                  error = -TBADADDR;
5935 5794                  goto connect_failed;
5936 5795          }
5937 5796          if (cl_inet_connect2 != NULL) {
5938 5797                  CL_INET_UDP_CONNECT(connp, B_TRUE, &v6dst, dstport, error);
5939 5798                  if (error != 0) {
5940 5799                          mutex_exit(&udpf->uf_lock);
5941 5800                          error = -TBADADDR;
5942 5801                          goto connect_failed;
5943 5802                  }
5944 5803          }
5945 5804          mutex_exit(&udpf->uf_lock);
5946 5805  
5947 5806          ixa_refrele(ixa);
5948 5807          return (0);
5949 5808  
5950 5809  connect_failed:
5951 5810          if (ixa != NULL)
5952 5811                  ixa_refrele(ixa);
5953 5812          mutex_enter(&connp->conn_lock);
5954 5813          mutex_enter(&udpf->uf_lock);
5955 5814          udp->udp_state = TS_IDLE;
5956 5815          connp->conn_faddr_v6 = ipv6_all_zeros;
5957 5816          connp->conn_fport = 0;
5958 5817          /* In case the source address was set above */
5959 5818          if (connp->conn_mcbc_bind)
5960 5819                  connp->conn_saddr_v6 = ipv6_all_zeros;
5961 5820          else
5962 5821                  connp->conn_saddr_v6 = connp->conn_bound_addr_v6;
5963 5822          connp->conn_laddr_v6 = connp->conn_bound_addr_v6;
5964 5823          mutex_exit(&udpf->uf_lock);
5965 5824  
5966 5825          connp->conn_v6lastdst = ipv6_all_zeros;
5967 5826          connp->conn_flowinfo = 0;
5968 5827  
5969 5828          (void) udp_build_hdr_template(connp, &connp->conn_saddr_v6,
5970 5829              &connp->conn_faddr_v6, connp->conn_fport, connp->conn_flowinfo);
5971 5830          mutex_exit(&connp->conn_lock);
5972 5831          return (error);
5973 5832  }
5974 5833  
5975 5834  static int
5976 5835  udp_connect(sock_lower_handle_t proto_handle, const struct sockaddr *sa,
5977 5836      socklen_t len, sock_connid_t *id, cred_t *cr)
5978 5837  {
5979 5838          conn_t  *connp = (conn_t *)proto_handle;
5980 5839          udp_t   *udp = connp->conn_udp;
5981 5840          int     error;
5982 5841          boolean_t did_bind = B_FALSE;
5983 5842          pid_t   pid = curproc->p_pid;
5984 5843  
5985 5844          /* All Solaris components should pass a cred for this operation. */
5986 5845          ASSERT(cr != NULL);
5987 5846  
5988 5847          if (sa == NULL) {
5989 5848                  /*
5990 5849                   * Disconnect
5991 5850                   * Make sure we are connected
5992 5851                   */
5993 5852                  if (udp->udp_state != TS_DATA_XFER)
5994 5853                          return (EINVAL);
5995 5854  
5996 5855                  error = udp_disconnect(connp);
5997 5856                  return (error);
5998 5857          }
5999 5858  
6000 5859          error = proto_verify_ip_addr(connp->conn_family, sa, len);
6001 5860          if (error != 0)
6002 5861                  goto done;
6003 5862  
6004 5863          /* do an implicit bind if necessary */
6005 5864          if (udp->udp_state == TS_UNBND) {
6006 5865                  error = udp_implicit_bind(connp, cr);
6007 5866                  /*
6008 5867                   * We could be racing with an actual bind, in which case
6009 5868                   * we would see EPROTO. We cross our fingers and try
6010 5869                   * to connect.
6011 5870                   */
6012 5871                  if (!(error == 0 || error == EPROTO))
6013 5872                          goto done;
6014 5873                  did_bind = B_TRUE;
6015 5874          }
6016 5875          /*
6017 5876           * set SO_DGRAM_ERRIND
6018 5877           */
6019 5878          connp->conn_dgram_errind = B_TRUE;
6020 5879  
6021 5880          error = udp_do_connect(connp, sa, len, cr, pid);
6022 5881  
6023 5882          if (error != 0 && did_bind) {
6024 5883                  int unbind_err;
6025 5884  
6026 5885                  unbind_err = udp_do_unbind(connp);
6027 5886                  ASSERT(unbind_err == 0);
6028 5887          }
6029 5888  
6030 5889          if (error == 0) {
6031 5890                  *id = 0;
6032 5891                  (*connp->conn_upcalls->su_connected)
6033 5892                      (connp->conn_upper_handle, 0, NULL, -1);
6034 5893          } else if (error < 0) {
6035 5894                  error = proto_tlitosyserr(-error);
6036 5895          }
6037 5896  
6038 5897  done:
6039 5898          if (error != 0 && udp->udp_state == TS_DATA_XFER) {
6040 5899                  /*
6041 5900                   * No need to hold locks to set state
6042 5901                   * after connect failure socket state is undefined
6043 5902                   * We set the state only to imitate old sockfs behavior
6044 5903                   */
6045 5904                  udp->udp_state = TS_IDLE;
6046 5905          }
6047 5906          return (error);
6048 5907  }
6049 5908  
6050 5909  int
6051 5910  udp_send(sock_lower_handle_t proto_handle, mblk_t *mp, struct nmsghdr *msg,
6052 5911      cred_t *cr)
6053 5912  {
  
    | 
      ↓ open down ↓ | 
    2587 lines elided | 
    
      ↑ open up ↑ | 
  
6054 5913          sin6_t          *sin6;
6055 5914          sin_t           *sin = NULL;
6056 5915          uint_t          srcid;
6057 5916          conn_t          *connp = (conn_t *)proto_handle;
6058 5917          udp_t           *udp = connp->conn_udp;
6059 5918          int             error = 0;
6060 5919          udp_stack_t     *us = udp->udp_us;
6061 5920          ushort_t        ipversion;
6062 5921          pid_t           pid = curproc->p_pid;
6063 5922          ip_xmit_attr_t  *ixa;
     5923 +        boolean_t       snd_to_conn;
6064 5924  
6065 5925          ASSERT(DB_TYPE(mp) == M_DATA);
6066 5926  
6067 5927          /* All Solaris components should pass a cred for this operation. */
6068 5928          ASSERT(cr != NULL);
6069 5929  
6070 5930          /* do an implicit bind if necessary */
6071 5931          if (udp->udp_state == TS_UNBND) {
6072 5932                  error = udp_implicit_bind(connp, cr);
6073 5933                  /*
6074 5934                   * We could be racing with an actual bind, in which case
6075 5935                   * we would see EPROTO. We cross our fingers and try
6076 5936                   * to connect.
6077 5937                   */
6078 5938                  if (!(error == 0 || error == EPROTO)) {
6079 5939                          freemsg(mp);
6080 5940                          return (error);
6081 5941                  }
6082 5942          }
6083 5943  
6084 5944          /* Connected? */
6085 5945          if (msg->msg_name == NULL) {
6086 5946                  if (udp->udp_state != TS_DATA_XFER) {
6087 5947                          UDPS_BUMP_MIB(us, udpOutErrors);
6088 5948                          return (EDESTADDRREQ);
6089 5949                  }
6090 5950                  if (msg->msg_controllen != 0) {
6091 5951                          error = udp_output_ancillary(connp, NULL, NULL, mp,
6092 5952                              NULL, msg, cr, pid);
6093 5953                  } else {
6094 5954                          error = udp_output_connected(connp, mp, cr, pid);
6095 5955                  }
6096 5956                  if (us->us_sendto_ignerr)
6097 5957                          return (0);
  
    | 
      ↓ open down ↓ | 
    24 lines elided | 
    
      ↑ open up ↑ | 
  
6098 5958                  else
6099 5959                          return (error);
6100 5960          }
6101 5961  
6102 5962          /*
6103 5963           * Check if we're allowed to send to a connection on which we've
6104 5964           * already called 'connect'. The posix spec. allows both behaviors but
6105 5965           * historically we've returned an error if already connected. The
6106 5966           * client can allow this via a sockopt.
6107 5967           */
6108      -        if (udp->udp_state == TS_DATA_XFER && !udp->udp_snd_to_conn) {
     5968 +        mutex_enter(&connp->conn_lock);
     5969 +        snd_to_conn = (udp->udp_snd_to_conn != 0);
     5970 +        mutex_exit(&connp->conn_lock);
     5971 +        if (udp->udp_state == TS_DATA_XFER && !snd_to_conn) {
6109 5972                  UDPS_BUMP_MIB(us, udpOutErrors);
6110 5973                  return (EISCONN);
6111 5974          }
6112 5975  
6113 5976          error = proto_verify_ip_addr(connp->conn_family,
6114 5977              (struct sockaddr *)msg->msg_name, msg->msg_namelen);
6115 5978          if (error != 0) {
6116 5979                  UDPS_BUMP_MIB(us, udpOutErrors);
6117 5980                  return (error);
6118 5981          }
6119 5982          switch (connp->conn_family) {
6120 5983          case AF_INET6:
6121 5984                  sin6 = (sin6_t *)msg->msg_name;
6122 5985  
6123 5986                  srcid = sin6->__sin6_src_id;
6124 5987  
6125 5988                  if (!IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
6126 5989                          /*
6127 5990                           * Destination is a non-IPv4-compatible IPv6 address.
6128 5991                           * Send out an IPv6 format packet.
6129 5992                           */
6130 5993  
6131 5994                          /*
6132 5995                           * If the local address is a mapped address return
6133 5996                           * an error.
6134 5997                           * It would be possible to send an IPv6 packet but the
6135 5998                           * response would never make it back to the application
6136 5999                           * since it is bound to a mapped address.
6137 6000                           */
6138 6001                          if (IN6_IS_ADDR_V4MAPPED(&connp->conn_saddr_v6)) {
6139 6002                                  UDPS_BUMP_MIB(us, udpOutErrors);
6140 6003                                  return (EADDRNOTAVAIL);
6141 6004                          }
6142 6005                          if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr))
6143 6006                                  sin6->sin6_addr = ipv6_loopback;
6144 6007                          ipversion = IPV6_VERSION;
6145 6008                  } else {
6146 6009                          if (connp->conn_ipv6_v6only) {
6147 6010                                  UDPS_BUMP_MIB(us, udpOutErrors);
6148 6011                                  return (EADDRNOTAVAIL);
6149 6012                          }
6150 6013  
6151 6014                          /*
6152 6015                           * If the local address is not zero or a mapped address
6153 6016                           * return an error.  It would be possible to send an
6154 6017                           * IPv4 packet but the response would never make it
6155 6018                           * back to the application since it is bound to a
6156 6019                           * non-mapped address.
6157 6020                           */
6158 6021                          if (!IN6_IS_ADDR_V4MAPPED(&connp->conn_saddr_v6) &&
6159 6022                              !IN6_IS_ADDR_UNSPECIFIED(&connp->conn_saddr_v6)) {
6160 6023                                  UDPS_BUMP_MIB(us, udpOutErrors);
6161 6024                                  return (EADDRNOTAVAIL);
6162 6025                          }
6163 6026  
6164 6027                          if (V4_PART_OF_V6(sin6->sin6_addr) == INADDR_ANY) {
6165 6028                                  V4_PART_OF_V6(sin6->sin6_addr) =
6166 6029                                      htonl(INADDR_LOOPBACK);
6167 6030                          }
6168 6031                          ipversion = IPV4_VERSION;
6169 6032                  }
6170 6033  
6171 6034                  /*
6172 6035                   * We have to allocate an ip_xmit_attr_t before we grab
6173 6036                   * conn_lock and we need to hold conn_lock once we've check
6174 6037                   * conn_same_as_last_v6 to handle concurrent send* calls on a
6175 6038                   * socket.
6176 6039                   */
6177 6040                  if (msg->msg_controllen == 0) {
6178 6041                          ixa = conn_get_ixa(connp, B_FALSE);
6179 6042                          if (ixa == NULL) {
6180 6043                                  UDPS_BUMP_MIB(us, udpOutErrors);
6181 6044                                  return (ENOMEM);
6182 6045                          }
6183 6046                  } else {
6184 6047                          ixa = NULL;
6185 6048                  }
6186 6049                  mutex_enter(&connp->conn_lock);
6187 6050                  if (udp->udp_delayed_error != 0) {
6188 6051                          sin6_t  *sin2 = (sin6_t *)&udp->udp_delayed_addr;
6189 6052  
6190 6053                          error = udp->udp_delayed_error;
6191 6054                          udp->udp_delayed_error = 0;
6192 6055  
6193 6056                          /* Compare IP address, port, and family */
6194 6057  
6195 6058                          if (sin6->sin6_port == sin2->sin6_port &&
6196 6059                              IN6_ARE_ADDR_EQUAL(&sin6->sin6_addr,
6197 6060                              &sin2->sin6_addr) &&
6198 6061                              sin6->sin6_family == sin2->sin6_family) {
6199 6062                                  mutex_exit(&connp->conn_lock);
6200 6063                                  UDPS_BUMP_MIB(us, udpOutErrors);
6201 6064                                  if (ixa != NULL)
6202 6065                                          ixa_refrele(ixa);
6203 6066                                  return (error);
6204 6067                          }
6205 6068                  }
6206 6069  
6207 6070                  if (msg->msg_controllen != 0) {
6208 6071                          mutex_exit(&connp->conn_lock);
6209 6072                          ASSERT(ixa == NULL);
6210 6073                          error = udp_output_ancillary(connp, NULL, sin6, mp,
6211 6074                              NULL, msg, cr, pid);
6212 6075                  } else if (conn_same_as_last_v6(connp, sin6) &&
6213 6076                      connp->conn_lastsrcid == srcid &&
6214 6077                      ipsec_outbound_policy_current(ixa)) {
6215 6078                          /* udp_output_lastdst drops conn_lock */
6216 6079                          error = udp_output_lastdst(connp, mp, cr, pid, ixa);
6217 6080                  } else {
6218 6081                          /* udp_output_newdst drops conn_lock */
6219 6082                          error = udp_output_newdst(connp, mp, NULL, sin6,
6220 6083                              ipversion, cr, pid, ixa);
6221 6084                  }
6222 6085                  ASSERT(MUTEX_NOT_HELD(&connp->conn_lock));
6223 6086                  if (us->us_sendto_ignerr)
6224 6087                          return (0);
6225 6088                  else
6226 6089                          return (error);
6227 6090          case AF_INET:
6228 6091                  sin = (sin_t *)msg->msg_name;
6229 6092  
6230 6093                  ipversion = IPV4_VERSION;
6231 6094  
6232 6095                  if (sin->sin_addr.s_addr == INADDR_ANY)
6233 6096                          sin->sin_addr.s_addr = htonl(INADDR_LOOPBACK);
6234 6097  
6235 6098                  /*
6236 6099                   * We have to allocate an ip_xmit_attr_t before we grab
6237 6100                   * conn_lock and we need to hold conn_lock once we've check
6238 6101                   * conn_same_as_last_v6 to handle concurrent send* on a socket.
6239 6102                   */
6240 6103                  if (msg->msg_controllen == 0) {
6241 6104                          ixa = conn_get_ixa(connp, B_FALSE);
6242 6105                          if (ixa == NULL) {
6243 6106                                  UDPS_BUMP_MIB(us, udpOutErrors);
6244 6107                                  return (ENOMEM);
6245 6108                          }
6246 6109                  } else {
6247 6110                          ixa = NULL;
6248 6111                  }
6249 6112                  mutex_enter(&connp->conn_lock);
6250 6113                  if (udp->udp_delayed_error != 0) {
6251 6114                          sin_t  *sin2 = (sin_t *)&udp->udp_delayed_addr;
6252 6115  
6253 6116                          error = udp->udp_delayed_error;
6254 6117                          udp->udp_delayed_error = 0;
6255 6118  
6256 6119                          /* Compare IP address and port */
6257 6120  
6258 6121                          if (sin->sin_port == sin2->sin_port &&
6259 6122                              sin->sin_addr.s_addr == sin2->sin_addr.s_addr) {
6260 6123                                  mutex_exit(&connp->conn_lock);
6261 6124                                  UDPS_BUMP_MIB(us, udpOutErrors);
6262 6125                                  if (ixa != NULL)
6263 6126                                          ixa_refrele(ixa);
6264 6127                                  return (error);
6265 6128                          }
6266 6129                  }
6267 6130                  if (msg->msg_controllen != 0) {
6268 6131                          mutex_exit(&connp->conn_lock);
6269 6132                          ASSERT(ixa == NULL);
6270 6133                          error = udp_output_ancillary(connp, sin, NULL, mp,
6271 6134                              NULL, msg, cr, pid);
6272 6135                  } else if (conn_same_as_last_v4(connp, sin) &&
6273 6136                      ipsec_outbound_policy_current(ixa)) {
6274 6137                          /* udp_output_lastdst drops conn_lock */
6275 6138                          error = udp_output_lastdst(connp, mp, cr, pid, ixa);
6276 6139                  } else {
6277 6140                          /* udp_output_newdst drops conn_lock */
6278 6141                          error = udp_output_newdst(connp, mp, sin, NULL,
6279 6142                              ipversion, cr, pid, ixa);
6280 6143                  }
6281 6144                  ASSERT(MUTEX_NOT_HELD(&connp->conn_lock));
6282 6145                  if (us->us_sendto_ignerr)
6283 6146                          return (0);
6284 6147                  else
6285 6148                          return (error);
6286 6149          default:
6287 6150                  return (EINVAL);
6288 6151          }
6289 6152  }
6290 6153  
6291 6154  int
6292 6155  udp_fallback(sock_lower_handle_t proto_handle, queue_t *q,
6293 6156      boolean_t issocket, so_proto_quiesced_cb_t quiesced_cb,
6294 6157      sock_quiesce_arg_t *arg)
6295 6158  {
6296 6159          conn_t  *connp = (conn_t *)proto_handle;
6297 6160          udp_t   *udp;
6298 6161          struct T_capability_ack tca;
6299 6162          struct sockaddr_in6 laddr, faddr;
6300 6163          socklen_t laddrlen, faddrlen;
6301 6164          short opts;
6302 6165          struct stroptions *stropt;
6303 6166          mblk_t *mp, *stropt_mp;
6304 6167          int error;
6305 6168  
6306 6169          udp = connp->conn_udp;
6307 6170  
6308 6171          stropt_mp = allocb_wait(sizeof (*stropt), BPRI_HI, STR_NOSIG, NULL);
6309 6172  
6310 6173          /*
6311 6174           * setup the fallback stream that was allocated
6312 6175           */
6313 6176          connp->conn_dev = (dev_t)RD(q)->q_ptr;
6314 6177          connp->conn_minor_arena = WR(q)->q_ptr;
6315 6178  
6316 6179          RD(q)->q_ptr = WR(q)->q_ptr = connp;
6317 6180  
6318 6181          WR(q)->q_qinfo = &udp_winit;
6319 6182  
6320 6183          connp->conn_rq = RD(q);
6321 6184          connp->conn_wq = WR(q);
6322 6185  
6323 6186          /* Notify stream head about options before sending up data */
6324 6187          stropt_mp->b_datap->db_type = M_SETOPTS;
6325 6188          stropt_mp->b_wptr += sizeof (*stropt);
6326 6189          stropt = (struct stroptions *)stropt_mp->b_rptr;
6327 6190          stropt->so_flags = SO_WROFF | SO_HIWAT;
6328 6191          stropt->so_wroff = connp->conn_wroff;
6329 6192          stropt->so_hiwat = udp->udp_rcv_disply_hiwat;
6330 6193          putnext(RD(q), stropt_mp);
6331 6194  
6332 6195          /*
6333 6196           * Free the helper stream
6334 6197           */
6335 6198          ip_free_helper_stream(connp);
6336 6199  
6337 6200          if (!issocket)
6338 6201                  udp_use_pure_tpi(udp);
6339 6202  
6340 6203          /*
6341 6204           * Collect the information needed to sync with the sonode
6342 6205           */
6343 6206          udp_do_capability_ack(udp, &tca, TC1_INFO);
6344 6207  
6345 6208          laddrlen = faddrlen = sizeof (sin6_t);
6346 6209          (void) udp_getsockname((sock_lower_handle_t)connp,
6347 6210              (struct sockaddr *)&laddr, &laddrlen, CRED());
6348 6211          error = udp_getpeername((sock_lower_handle_t)connp,
6349 6212              (struct sockaddr *)&faddr, &faddrlen, CRED());
6350 6213          if (error != 0)
6351 6214                  faddrlen = 0;
6352 6215  
6353 6216          opts = 0;
6354 6217          if (connp->conn_dgram_errind)
6355 6218                  opts |= SO_DGRAM_ERRIND;
6356 6219          if (connp->conn_ixa->ixa_flags & IXAF_DONTROUTE)
6357 6220                  opts |= SO_DONTROUTE;
6358 6221  
6359 6222          mp = (*quiesced_cb)(connp->conn_upper_handle, arg, &tca,
6360 6223              (struct sockaddr *)&laddr, laddrlen,
6361 6224              (struct sockaddr *)&faddr, faddrlen, opts);
6362 6225  
6363 6226          mutex_enter(&udp->udp_recv_lock);
6364 6227          /*
6365 6228           * Attempts to send data up during fallback will result in it being
6366 6229           * queued in udp_t. First push up the datagrams obtained from the
6367 6230           * socket, then any packets queued in udp_t.
6368 6231           */
6369 6232          if (mp != NULL) {
6370 6233                  mp->b_next = udp->udp_fallback_queue_head;
6371 6234                  udp->udp_fallback_queue_head = mp;
6372 6235          }
6373 6236          while (udp->udp_fallback_queue_head != NULL) {
6374 6237                  mp = udp->udp_fallback_queue_head;
6375 6238                  udp->udp_fallback_queue_head = mp->b_next;
6376 6239                  mutex_exit(&udp->udp_recv_lock);
6377 6240                  mp->b_next = NULL;
6378 6241                  putnext(RD(q), mp);
6379 6242                  mutex_enter(&udp->udp_recv_lock);
6380 6243          }
6381 6244          udp->udp_fallback_queue_tail = udp->udp_fallback_queue_head;
6382 6245          /*
6383 6246           * No longer a streams less socket
6384 6247           */
6385 6248          mutex_enter(&connp->conn_lock);
6386 6249          connp->conn_flags &= ~IPCL_NONSTR;
6387 6250          mutex_exit(&connp->conn_lock);
6388 6251  
6389 6252          mutex_exit(&udp->udp_recv_lock);
6390 6253  
6391 6254          ASSERT(connp->conn_ref >= 1);
6392 6255  
6393 6256          return (0);
6394 6257  }
6395 6258  
6396 6259  /* ARGSUSED3 */
6397 6260  int
6398 6261  udp_getpeername(sock_lower_handle_t proto_handle, struct sockaddr *sa,
6399 6262      socklen_t *salenp, cred_t *cr)
6400 6263  {
6401 6264          conn_t  *connp = (conn_t *)proto_handle;
6402 6265          udp_t   *udp = connp->conn_udp;
6403 6266          int error;
6404 6267  
6405 6268          /* All Solaris components should pass a cred for this operation. */
6406 6269          ASSERT(cr != NULL);
6407 6270  
6408 6271          mutex_enter(&connp->conn_lock);
6409 6272          if (udp->udp_state != TS_DATA_XFER)
6410 6273                  error = ENOTCONN;
6411 6274          else
6412 6275                  error = conn_getpeername(connp, sa, salenp);
6413 6276          mutex_exit(&connp->conn_lock);
6414 6277          return (error);
6415 6278  }
6416 6279  
6417 6280  /* ARGSUSED3 */
6418 6281  int
6419 6282  udp_getsockname(sock_lower_handle_t proto_handle, struct sockaddr *sa,
6420 6283      socklen_t *salenp, cred_t *cr)
6421 6284  {
6422 6285          conn_t  *connp = (conn_t *)proto_handle;
6423 6286          int error;
6424 6287  
6425 6288          /* All Solaris components should pass a cred for this operation. */
6426 6289          ASSERT(cr != NULL);
6427 6290  
6428 6291          mutex_enter(&connp->conn_lock);
6429 6292          error = conn_getsockname(connp, sa, salenp);
6430 6293          mutex_exit(&connp->conn_lock);
6431 6294          return (error);
6432 6295  }
6433 6296  
6434 6297  int
6435 6298  udp_getsockopt(sock_lower_handle_t proto_handle, int level, int option_name,
6436 6299      void *optvalp, socklen_t *optlen, cred_t *cr)
6437 6300  {
6438 6301          conn_t          *connp = (conn_t *)proto_handle;
6439 6302          int             error;
6440 6303          t_uscalar_t     max_optbuf_len;
6441 6304          void            *optvalp_buf;
6442 6305          int             len;
6443 6306  
6444 6307          /* All Solaris components should pass a cred for this operation. */
6445 6308          ASSERT(cr != NULL);
6446 6309  
6447 6310          error = proto_opt_check(level, option_name, *optlen, &max_optbuf_len,
6448 6311              udp_opt_obj.odb_opt_des_arr,
6449 6312              udp_opt_obj.odb_opt_arr_cnt,
6450 6313              B_FALSE, B_TRUE, cr);
6451 6314          if (error != 0) {
6452 6315                  if (error < 0)
6453 6316                          error = proto_tlitosyserr(-error);
6454 6317                  return (error);
6455 6318          }
6456 6319  
6457 6320          optvalp_buf = kmem_alloc(max_optbuf_len, KM_SLEEP);
6458 6321          len = udp_opt_get(connp, level, option_name, optvalp_buf);
6459 6322          if (len == -1) {
6460 6323                  kmem_free(optvalp_buf, max_optbuf_len);
6461 6324                  return (EINVAL);
6462 6325          }
6463 6326  
6464 6327          /*
6465 6328           * update optlen and copy option value
6466 6329           */
6467 6330          t_uscalar_t size = MIN(len, *optlen);
6468 6331  
6469 6332          bcopy(optvalp_buf, optvalp, size);
6470 6333          bcopy(&size, optlen, sizeof (size));
6471 6334  
6472 6335          kmem_free(optvalp_buf, max_optbuf_len);
6473 6336          return (0);
6474 6337  }
6475 6338  
6476 6339  int
6477 6340  udp_setsockopt(sock_lower_handle_t proto_handle, int level, int option_name,
6478 6341      const void *optvalp, socklen_t optlen, cred_t *cr)
6479 6342  {
6480 6343          conn_t          *connp = (conn_t *)proto_handle;
6481 6344          int             error;
6482 6345  
6483 6346          /* All Solaris components should pass a cred for this operation. */
6484 6347          ASSERT(cr != NULL);
6485 6348  
6486 6349          error = proto_opt_check(level, option_name, optlen, NULL,
6487 6350              udp_opt_obj.odb_opt_des_arr,
6488 6351              udp_opt_obj.odb_opt_arr_cnt,
6489 6352              B_TRUE, B_FALSE, cr);
6490 6353  
6491 6354          if (error != 0) {
6492 6355                  if (error < 0)
6493 6356                          error = proto_tlitosyserr(-error);
6494 6357                  return (error);
6495 6358          }
6496 6359  
6497 6360          error = udp_opt_set(connp, SETFN_OPTCOM_NEGOTIATE, level, option_name,
6498 6361              optlen, (uchar_t *)optvalp, (uint_t *)&optlen, (uchar_t *)optvalp,
6499 6362              NULL, cr);
6500 6363  
6501 6364          ASSERT(error >= 0);
6502 6365  
6503 6366          return (error);
6504 6367  }
6505 6368  
6506 6369  void
6507 6370  udp_clr_flowctrl(sock_lower_handle_t proto_handle)
6508 6371  {
6509 6372          conn_t  *connp = (conn_t *)proto_handle;
6510 6373          udp_t   *udp = connp->conn_udp;
6511 6374  
6512 6375          mutex_enter(&udp->udp_recv_lock);
6513 6376          connp->conn_flow_cntrld = B_FALSE;
6514 6377          mutex_exit(&udp->udp_recv_lock);
6515 6378  }
6516 6379  
6517 6380  /* ARGSUSED2 */
6518 6381  int
6519 6382  udp_shutdown(sock_lower_handle_t proto_handle, int how, cred_t *cr)
6520 6383  {
6521 6384          conn_t  *connp = (conn_t *)proto_handle;
6522 6385  
6523 6386          /* All Solaris components should pass a cred for this operation. */
6524 6387          ASSERT(cr != NULL);
6525 6388  
6526 6389          /* shut down the send side */
6527 6390          if (how != SHUT_RD)
6528 6391                  (*connp->conn_upcalls->su_opctl)(connp->conn_upper_handle,
6529 6392                      SOCK_OPCTL_SHUT_SEND, 0);
6530 6393          /* shut down the recv side */
6531 6394          if (how != SHUT_WR)
6532 6395                  (*connp->conn_upcalls->su_opctl)(connp->conn_upper_handle,
6533 6396                      SOCK_OPCTL_SHUT_RECV, 0);
6534 6397          return (0);
6535 6398  }
6536 6399  
6537 6400  int
6538 6401  udp_ioctl(sock_lower_handle_t proto_handle, int cmd, intptr_t arg,
6539 6402      int mode, int32_t *rvalp, cred_t *cr)
6540 6403  {
6541 6404          conn_t          *connp = (conn_t *)proto_handle;
6542 6405          int             error;
6543 6406  
6544 6407          /* All Solaris components should pass a cred for this operation. */
6545 6408          ASSERT(cr != NULL);
6546 6409  
6547 6410          /*
6548 6411           * If we don't have a helper stream then create one.
6549 6412           * ip_create_helper_stream takes care of locking the conn_t,
6550 6413           * so this check for NULL is just a performance optimization.
6551 6414           */
6552 6415          if (connp->conn_helper_info == NULL) {
6553 6416                  udp_stack_t *us = connp->conn_udp->udp_us;
6554 6417  
6555 6418                  ASSERT(us->us_ldi_ident != NULL);
6556 6419  
6557 6420                  /*
6558 6421                   * Create a helper stream for non-STREAMS socket.
6559 6422                   */
6560 6423                  error = ip_create_helper_stream(connp, us->us_ldi_ident);
6561 6424                  if (error != 0) {
6562 6425                          ip0dbg(("tcp_ioctl: create of IP helper stream "
6563 6426                              "failed %d\n", error));
6564 6427                          return (error);
6565 6428                  }
6566 6429          }
6567 6430  
6568 6431          switch (cmd) {
6569 6432                  case _SIOCSOCKFALLBACK:
6570 6433                  case TI_GETPEERNAME:
6571 6434                  case TI_GETMYNAME:
6572 6435                          ip1dbg(("udp_ioctl: cmd 0x%x on non streams socket",
6573 6436                              cmd));
6574 6437                          error = EINVAL;
6575 6438                          break;
6576 6439                  default:
6577 6440                          /*
6578 6441                           * Pass on to IP using helper stream
6579 6442                           */
6580 6443                          error = ldi_ioctl(connp->conn_helper_info->iphs_handle,
6581 6444                              cmd, arg, mode, cr, rvalp);
6582 6445                          break;
6583 6446          }
6584 6447          return (error);
6585 6448  }
6586 6449  
6587 6450  /* ARGSUSED */
6588 6451  int
6589 6452  udp_accept(sock_lower_handle_t lproto_handle,
6590 6453      sock_lower_handle_t eproto_handle, sock_upper_handle_t sock_handle,
6591 6454      cred_t *cr)
6592 6455  {
6593 6456          return (EOPNOTSUPP);
6594 6457  }
6595 6458  
6596 6459  /* ARGSUSED */
6597 6460  int
6598 6461  udp_listen(sock_lower_handle_t proto_handle, int backlog, cred_t *cr)
6599 6462  {
6600 6463          return (EOPNOTSUPP);
6601 6464  }
6602 6465  
6603 6466  sock_downcalls_t sock_udp_downcalls = {
6604 6467          udp_activate,           /* sd_activate */
6605 6468          udp_accept,             /* sd_accept */
6606 6469          udp_bind,               /* sd_bind */
6607 6470          udp_listen,             /* sd_listen */
6608 6471          udp_connect,            /* sd_connect */
6609 6472          udp_getpeername,        /* sd_getpeername */
6610 6473          udp_getsockname,        /* sd_getsockname */
6611 6474          udp_getsockopt,         /* sd_getsockopt */
6612 6475          udp_setsockopt,         /* sd_setsockopt */
6613 6476          udp_send,               /* sd_send */
6614 6477          NULL,                   /* sd_send_uio */
6615 6478          NULL,                   /* sd_recv_uio */
6616 6479          NULL,                   /* sd_poll */
6617 6480          udp_shutdown,           /* sd_shutdown */
6618 6481          udp_clr_flowctrl,       /* sd_setflowctrl */
6619 6482          udp_ioctl,              /* sd_ioctl */
6620 6483          udp_close               /* sd_close */
6621 6484  };
  
    | 
      ↓ open down ↓ | 
    503 lines elided | 
    
      ↑ open up ↑ | 
  
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX