1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright (c) 1991, 2010, Oracle and/or its affiliates. All rights reserved.
23 * Copyright 2013 Nexenta Systems, Inc. All rights reserved.
24 * Copyright 2014, OmniTI Computer Consulting, Inc. All rights reserved.
25 * Copyright 2015, Joyent, Inc.
26 */
27 /* Copyright (c) 1990 Mentat Inc. */
28
29 #include <sys/sysmacros.h>
30 #include <sys/types.h>
31 #include <sys/stream.h>
32 #include <sys/stropts.h>
33 #include <sys/strlog.h>
34 #include <sys/strsun.h>
35 #define _SUN_TPI_VERSION 2
36 #include <sys/tihdr.h>
37 #include <sys/timod.h>
38 #include <sys/ddi.h>
39 #include <sys/sunddi.h>
40 #include <sys/strsubr.h>
41 #include <sys/suntpi.h>
42 #include <sys/xti_inet.h>
43 #include <sys/kmem.h>
44 #include <sys/cred_impl.h>
45 #include <sys/policy.h>
46 #include <sys/priv.h>
47 #include <sys/ucred.h>
48 #include <sys/zone.h>
49
50 #include <sys/socket.h>
51 #include <sys/socketvar.h>
52 #include <sys/sockio.h>
53 #include <sys/vtrace.h>
54 #include <sys/sdt.h>
55 #include <sys/debug.h>
56 #include <sys/isa_defs.h>
57 #include <sys/random.h>
58 #include <netinet/in.h>
59 #include <netinet/ip6.h>
60 #include <netinet/icmp6.h>
61 #include <netinet/udp.h>
62
63 #include <inet/common.h>
64 #include <inet/ip.h>
65 #include <inet/ip_impl.h>
66 #include <inet/ipsec_impl.h>
67 #include <inet/ip6.h>
68 #include <inet/ip_ire.h>
69 #include <inet/ip_if.h>
70 #include <inet/ip_multi.h>
71 #include <inet/ip_ndp.h>
72 #include <inet/proto_set.h>
73 #include <inet/mib2.h>
74 #include <inet/optcom.h>
75 #include <inet/snmpcom.h>
76 #include <inet/kstatcom.h>
77 #include <inet/ipclassifier.h>
78 #include <sys/squeue_impl.h>
79 #include <inet/ipnet.h>
80 #include <sys/vxlan.h>
81 #include <inet/inet_hash.h>
82
83 #include <sys/tsol/label.h>
84 #include <sys/tsol/tnet.h>
85 #include <rpc/pmap_prot.h>
86
87 #include <inet/udp_impl.h>
88
89 /*
90 * Synchronization notes:
91 *
92 * UDP is MT and uses the usual kernel synchronization primitives. There are 2
93 * locks, the fanout lock (uf_lock) and conn_lock. conn_lock
94 * protects the contents of the udp_t. uf_lock protects the address and the
95 * fanout information.
96 * The lock order is conn_lock -> uf_lock.
97 *
98 * The fanout lock uf_lock:
99 * When a UDP endpoint is bound to a local port, it is inserted into
100 * a bind hash list. The list consists of an array of udp_fanout_t buckets.
101 * The size of the array is controlled by the udp_bind_fanout_size variable.
102 * This variable can be changed in /etc/system if the default value is
103 * not large enough. Each bind hash bucket is protected by a per bucket
104 * lock. It protects the udp_bind_hash and udp_ptpbhn fields in the udp_t
105 * structure and a few other fields in the udp_t. A UDP endpoint is removed
106 * from the bind hash list only when it is being unbound or being closed.
107 * The per bucket lock also protects a UDP endpoint's state changes.
108 *
109 * Plumbing notes:
110 * UDP is always a device driver. For compatibility with mibopen() code
111 * it is possible to I_PUSH "udp", but that results in pushing a passthrough
112 * dummy module.
113 *
114 * The above implies that we don't support any intermediate module to
115 * reside in between /dev/ip and udp -- in fact, we never supported such
116 * scenario in the past as the inter-layer communication semantics have
117 * always been private.
118 */
119
120 /* For /etc/system control */
121 uint_t udp_bind_fanout_size = UDP_BIND_FANOUT_SIZE;
122
123 static void udp_addr_req(queue_t *q, mblk_t *mp);
124 static void udp_tpi_bind(queue_t *q, mblk_t *mp);
125 static void udp_bind_hash_insert(udp_fanout_t *uf, udp_t *udp);
126 static void udp_bind_hash_remove(udp_t *udp, boolean_t caller_holds_lock);
127 static int udp_build_hdr_template(conn_t *, const in6_addr_t *,
128 const in6_addr_t *, in_port_t, uint32_t);
129 static void udp_capability_req(queue_t *q, mblk_t *mp);
130 static int udp_tpi_close(queue_t *q, int flags);
131 static void udp_close_free(conn_t *);
132 static void udp_tpi_connect(queue_t *q, mblk_t *mp);
133 static void udp_tpi_disconnect(queue_t *q, mblk_t *mp);
134 static void udp_err_ack(queue_t *q, mblk_t *mp, t_scalar_t t_error,
135 int sys_error);
136 static void udp_err_ack_prim(queue_t *q, mblk_t *mp, t_scalar_t primitive,
137 t_scalar_t tlierr, int sys_error);
138 static int udp_extra_priv_ports_get(queue_t *q, mblk_t *mp, caddr_t cp,
139 cred_t *cr);
140 static int udp_extra_priv_ports_add(queue_t *q, mblk_t *mp,
141 char *value, caddr_t cp, cred_t *cr);
142 static int udp_extra_priv_ports_del(queue_t *q, mblk_t *mp,
143 char *value, caddr_t cp, cred_t *cr);
144 static void udp_icmp_input(void *, mblk_t *, void *, ip_recv_attr_t *);
145 static void udp_icmp_error_ipv6(conn_t *connp, mblk_t *mp,
146 ip_recv_attr_t *ira);
147 static void udp_info_req(queue_t *q, mblk_t *mp);
148 static void udp_input(void *, mblk_t *, void *, ip_recv_attr_t *);
149 static void udp_lrput(queue_t *, mblk_t *);
150 static void udp_lwput(queue_t *, mblk_t *);
151 static int udp_open(queue_t *q, dev_t *devp, int flag, int sflag,
152 cred_t *credp, boolean_t isv6);
153 static int udp_openv4(queue_t *q, dev_t *devp, int flag, int sflag,
154 cred_t *credp);
155 static int udp_openv6(queue_t *q, dev_t *devp, int flag, int sflag,
156 cred_t *credp);
157 static boolean_t udp_opt_allow_udr_set(t_scalar_t level, t_scalar_t name);
158 int udp_opt_set(conn_t *connp, uint_t optset_context,
159 int level, int name, uint_t inlen,
160 uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp,
161 void *thisdg_attrs, cred_t *cr);
162 int udp_opt_get(conn_t *connp, int level, int name,
163 uchar_t *ptr);
164 static int udp_output_connected(conn_t *connp, mblk_t *mp, cred_t *cr,
165 pid_t pid);
166 static int udp_output_lastdst(conn_t *connp, mblk_t *mp, cred_t *cr,
167 pid_t pid, ip_xmit_attr_t *ixa);
168 static int udp_output_newdst(conn_t *connp, mblk_t *data_mp, sin_t *sin,
169 sin6_t *sin6, ushort_t ipversion, cred_t *cr, pid_t,
170 ip_xmit_attr_t *ixa);
171 static mblk_t *udp_prepend_hdr(conn_t *, ip_xmit_attr_t *, const ip_pkt_t *,
172 const in6_addr_t *, const in6_addr_t *, in_port_t, uint32_t, mblk_t *,
173 int *);
174 static mblk_t *udp_prepend_header_template(conn_t *, ip_xmit_attr_t *,
175 mblk_t *, const in6_addr_t *, in_port_t, uint32_t, int *);
176 static void udp_ud_err(queue_t *q, mblk_t *mp, t_scalar_t err);
177 static void udp_ud_err_connected(conn_t *, t_scalar_t);
178 static void udp_tpi_unbind(queue_t *q, mblk_t *mp);
179 static in_port_t udp_update_next_port(udp_t *udp, in_port_t port,
180 boolean_t random);
181 static void udp_wput_other(queue_t *q, mblk_t *mp);
182 static void udp_wput_iocdata(queue_t *q, mblk_t *mp);
183 static void udp_wput_fallback(queue_t *q, mblk_t *mp);
184 static size_t udp_set_rcv_hiwat(udp_t *udp, size_t size);
185
186 static void *udp_stack_init(netstackid_t stackid, netstack_t *ns);
187 static void udp_stack_fini(netstackid_t stackid, void *arg);
188
189 /* Common routines for TPI and socket module */
190 static void udp_ulp_recv(conn_t *, mblk_t *, uint_t, ip_recv_attr_t *);
191
192 /* Common routine for TPI and socket module */
193 static conn_t *udp_do_open(cred_t *, boolean_t, int, int *);
194 static void udp_do_close(conn_t *);
195 static int udp_do_bind(conn_t *, struct sockaddr *, socklen_t, cred_t *,
196 boolean_t);
197 static int udp_do_unbind(conn_t *);
198
199 int udp_getsockname(sock_lower_handle_t,
200 struct sockaddr *, socklen_t *, cred_t *);
201 int udp_getpeername(sock_lower_handle_t,
202 struct sockaddr *, socklen_t *, cred_t *);
203 static int udp_do_connect(conn_t *, const struct sockaddr *, socklen_t,
204 cred_t *, pid_t);
205
206 #pragma inline(udp_output_connected, udp_output_newdst, udp_output_lastdst)
207
208 /*
209 * Checks if the given destination addr/port is allowed out.
210 * If allowed, registers the (dest_addr/port, node_ID) mapping at Cluster.
211 * Called for each connect() and for sendto()/sendmsg() to a different
212 * destination.
213 * For connect(), called in udp_connect().
214 * For sendto()/sendmsg(), called in udp_output_newdst().
215 *
216 * This macro assumes that the cl_inet_connect2 hook is not NULL.
217 * Please check this before calling this macro.
218 *
219 * void
220 * CL_INET_UDP_CONNECT(conn_t cp, udp_t *udp, boolean_t is_outgoing,
221 * in6_addr_t *faddrp, in_port_t (or uint16_t) fport, int err);
222 */
223 #define CL_INET_UDP_CONNECT(cp, is_outgoing, faddrp, fport, err) { \
224 (err) = 0; \
225 /* \
226 * Running in cluster mode - check and register active \
227 * "connection" information \
228 */ \
229 if ((cp)->conn_ipversion == IPV4_VERSION) \
230 (err) = (*cl_inet_connect2)( \
231 (cp)->conn_netstack->netstack_stackid, \
232 IPPROTO_UDP, is_outgoing, AF_INET, \
233 (uint8_t *)&((cp)->conn_laddr_v4), \
234 (cp)->conn_lport, \
235 (uint8_t *)&(V4_PART_OF_V6(*faddrp)), \
236 (in_port_t)(fport), NULL); \
237 else \
238 (err) = (*cl_inet_connect2)( \
239 (cp)->conn_netstack->netstack_stackid, \
240 IPPROTO_UDP, is_outgoing, AF_INET6, \
241 (uint8_t *)&((cp)->conn_laddr_v6), \
242 (cp)->conn_lport, \
243 (uint8_t *)(faddrp), (in_port_t)(fport), NULL); \
244 }
245
246 static struct module_info udp_mod_info = {
247 UDP_MOD_ID, UDP_MOD_NAME, 1, INFPSZ, UDP_RECV_HIWATER, UDP_RECV_LOWATER
248 };
249
250 /*
251 * Entry points for UDP as a device.
252 * We have separate open functions for the /dev/udp and /dev/udp6 devices.
253 */
254 static struct qinit udp_rinitv4 = {
255 NULL, NULL, udp_openv4, udp_tpi_close, NULL, &udp_mod_info, NULL
256 };
257
258 static struct qinit udp_rinitv6 = {
259 NULL, NULL, udp_openv6, udp_tpi_close, NULL, &udp_mod_info, NULL
260 };
261
262 static struct qinit udp_winit = {
263 (pfi_t)udp_wput, (pfi_t)ip_wsrv, NULL, NULL, NULL, &udp_mod_info
264 };
265
266 /* UDP entry point during fallback */
267 struct qinit udp_fallback_sock_winit = {
268 (pfi_t)udp_wput_fallback, NULL, NULL, NULL, NULL, &udp_mod_info
269 };
270
271 /*
272 * UDP needs to handle I_LINK and I_PLINK since ifconfig
273 * likes to use it as a place to hang the various streams.
274 */
275 static struct qinit udp_lrinit = {
276 (pfi_t)udp_lrput, NULL, udp_openv4, udp_tpi_close, NULL, &udp_mod_info
277 };
278
279 static struct qinit udp_lwinit = {
280 (pfi_t)udp_lwput, NULL, udp_openv4, udp_tpi_close, NULL, &udp_mod_info
281 };
282
283 /* For AF_INET aka /dev/udp */
284 struct streamtab udpinfov4 = {
285 &udp_rinitv4, &udp_winit, &udp_lrinit, &udp_lwinit
286 };
287
288 /* For AF_INET6 aka /dev/udp6 */
289 struct streamtab udpinfov6 = {
290 &udp_rinitv6, &udp_winit, &udp_lrinit, &udp_lwinit
291 };
292
293 #define UDP_MAXPACKET_IPV4 (IP_MAXPACKET - UDPH_SIZE - IP_SIMPLE_HDR_LENGTH)
294
295 /* Default structure copied into T_INFO_ACK messages */
296 static struct T_info_ack udp_g_t_info_ack_ipv4 = {
297 T_INFO_ACK,
298 UDP_MAXPACKET_IPV4, /* TSDU_size. Excl. headers */
299 T_INVALID, /* ETSU_size. udp does not support expedited data. */
300 T_INVALID, /* CDATA_size. udp does not support connect data. */
301 T_INVALID, /* DDATA_size. udp does not support disconnect data. */
302 sizeof (sin_t), /* ADDR_size. */
303 0, /* OPT_size - not initialized here */
304 UDP_MAXPACKET_IPV4, /* TIDU_size. Excl. headers */
305 T_CLTS, /* SERV_type. udp supports connection-less. */
306 TS_UNBND, /* CURRENT_state. This is set from udp_state. */
307 (XPG4_1|SENDZERO) /* PROVIDER_flag */
308 };
309
310 #define UDP_MAXPACKET_IPV6 (IP_MAXPACKET - UDPH_SIZE - IPV6_HDR_LEN)
311
312 static struct T_info_ack udp_g_t_info_ack_ipv6 = {
313 T_INFO_ACK,
314 UDP_MAXPACKET_IPV6, /* TSDU_size. Excl. headers */
315 T_INVALID, /* ETSU_size. udp does not support expedited data. */
316 T_INVALID, /* CDATA_size. udp does not support connect data. */
317 T_INVALID, /* DDATA_size. udp does not support disconnect data. */
318 sizeof (sin6_t), /* ADDR_size. */
319 0, /* OPT_size - not initialized here */
320 UDP_MAXPACKET_IPV6, /* TIDU_size. Excl. headers */
321 T_CLTS, /* SERV_type. udp supports connection-less. */
322 TS_UNBND, /* CURRENT_state. This is set from udp_state. */
323 (XPG4_1|SENDZERO) /* PROVIDER_flag */
324 };
325
326 /*
327 * UDP tunables related declarations. Definitions are in udp_tunables.c
328 */
329 extern mod_prop_info_t udp_propinfo_tbl[];
330 extern int udp_propinfo_count;
331
332 /* Setable in /etc/system */
333 /* If set to 0, pick ephemeral port sequentially; otherwise randomly. */
334 uint32_t udp_random_anon_port = 1;
335
336 /*
337 * Hook functions to enable cluster networking.
338 * On non-clustered systems these vectors must always be NULL
339 */
340
341 void (*cl_inet_bind)(netstackid_t stack_id, uchar_t protocol,
342 sa_family_t addr_family, uint8_t *laddrp, in_port_t lport,
343 void *args) = NULL;
344 void (*cl_inet_unbind)(netstackid_t stack_id, uint8_t protocol,
345 sa_family_t addr_family, uint8_t *laddrp, in_port_t lport,
346 void *args) = NULL;
347
348 typedef union T_primitives *t_primp_t;
349
350 /*
351 * Various protocols that encapsulate UDP have no real use for the source port.
352 * Instead, they want to vary the source port to provide better equal-cost
353 * multipathing and other systems that use fanout. Consider something like
354 * VXLAN. If you're actually sending multiple different streams to a single
355 * host, if you don't vary the source port, then the tuple of ( SRC IP, DST IP,
356 * SRC Port, DST Port) will always be the same.
357 *
358 * Here, we return a port to hash this to, if we know how to hash it. If for
359 * some reason we can't perform an L4 hash, then we just return the default
360 * value, usually the default port. After we determine the hash we transform it
361 * so that it's in the range of [ min, max ].
362 *
363 * We'd like to avoid a pull up for the sake of performing the hash. If the
364 * first mblk_t doesn't have the full protocol header, then we just send it to
365 * the default. If for some reason we have an encapsulated packet that has its
366 * protocol header in different parts of an mblk_t, then we'll go with the
367 * default port. This means that that if a driver isn't consistent about how it
368 * generates the frames for a given flow, it will not always be consistently
369 * hashed. That should be an uncommon event.
370 */
371 uint16_t
372 udp_srcport_hash(mblk_t *mp, int type, uint16_t min, uint16_t max,
373 uint16_t def)
374 {
375 size_t szused = 0;
376 struct ether_header *ether;
377 struct ether_vlan_header *vether;
378 ip6_t *ip6h;
379 ipha_t *ipha;
380 uint16_t sap;
381 uint64_t hash;
382 uint32_t mod;
383
384 ASSERT(min <= max);
385
386 if (type != UDP_HASH_VXLAN)
387 return (def);
388
389 if (!IS_P2ALIGNED(mp->b_rptr, sizeof (uint16_t)))
390 return (def);
391
392 /*
393 * The following logic is VXLAN specific to get at the header, if we
394 * have formats, eg. GENEVE, then we should ignore this.
395 *
396 * The kernel overlay device often puts a first mblk_t for the data
397 * which is just the encap. If so, then we're going to use that and try
398 * to avoid a pull up.
399 */
400 if (MBLKL(mp) == VXLAN_HDR_LEN) {
401 if (mp->b_cont == NULL)
402 return (def);
403 mp = mp->b_cont;
404 ether = (struct ether_header *)mp->b_rptr;
405 } else if (MBLKL(mp) < VXLAN_HDR_LEN) {
406 return (def);
407 } else {
408 szused = VXLAN_HDR_LEN;
409 ether = (struct ether_header *)((uintptr_t)mp->b_rptr + szused);
410 }
411
412 /* Can we hold a MAC header? */
413 if (MBLKL(mp) + szused < sizeof (struct ether_header))
414 return (def);
415
416 /*
417 * We need to lie about the starting offset into the message block for
418 * convenience. Undo it at the end. We know that inet_pkt_hash() won't
419 * modify the mblk_t.
420 */
421 mp->b_rptr += szused;
422 hash = inet_pkt_hash(DL_ETHER, mp, INET_PKT_HASH_L2 |
423 INET_PKT_HASH_L3 | INET_PKT_HASH_L4);
424 mp->b_rptr -= szused;
425
426 if (hash == 0)
427 return (def);
428
429 mod = max - min + 1;
430 return ((hash % mod) + min);
431 }
432
433 /*
434 * Return the next anonymous port in the privileged port range for
435 * bind checking.
436 *
437 * Trusted Extension (TX) notes: TX allows administrator to mark or
438 * reserve ports as Multilevel ports (MLP). MLP has special function
439 * on TX systems. Once a port is made MLP, it's not available as
440 * ordinary port. This creates "holes" in the port name space. It
441 * may be necessary to skip the "holes" find a suitable anon port.
442 */
443 static in_port_t
444 udp_get_next_priv_port(udp_t *udp)
445 {
446 static in_port_t next_priv_port = IPPORT_RESERVED - 1;
447 in_port_t nextport;
448 boolean_t restart = B_FALSE;
449 udp_stack_t *us = udp->udp_us;
450
451 retry:
452 if (next_priv_port < us->us_min_anonpriv_port ||
453 next_priv_port >= IPPORT_RESERVED) {
454 next_priv_port = IPPORT_RESERVED - 1;
455 if (restart)
456 return (0);
457 restart = B_TRUE;
458 }
459
460 if (is_system_labeled() &&
461 (nextport = tsol_next_port(crgetzone(udp->udp_connp->conn_cred),
462 next_priv_port, IPPROTO_UDP, B_FALSE)) != 0) {
463 next_priv_port = nextport;
464 goto retry;
465 }
466
467 return (next_priv_port--);
468 }
469
470 /*
471 * Hash list removal routine for udp_t structures.
472 */
473 static void
474 udp_bind_hash_remove(udp_t *udp, boolean_t caller_holds_lock)
475 {
476 udp_t *udpnext;
477 kmutex_t *lockp;
478 udp_stack_t *us = udp->udp_us;
479 conn_t *connp = udp->udp_connp;
480
481 if (udp->udp_ptpbhn == NULL)
482 return;
483
484 /*
485 * Extract the lock pointer in case there are concurrent
486 * hash_remove's for this instance.
487 */
488 ASSERT(connp->conn_lport != 0);
489 if (!caller_holds_lock) {
490 lockp = &us->us_bind_fanout[UDP_BIND_HASH(connp->conn_lport,
491 us->us_bind_fanout_size)].uf_lock;
492 ASSERT(lockp != NULL);
493 mutex_enter(lockp);
494 }
495 if (udp->udp_ptpbhn != NULL) {
496 udpnext = udp->udp_bind_hash;
497 if (udpnext != NULL) {
498 udpnext->udp_ptpbhn = udp->udp_ptpbhn;
499 udp->udp_bind_hash = NULL;
500 }
501 *udp->udp_ptpbhn = udpnext;
502 udp->udp_ptpbhn = NULL;
503 }
504 if (!caller_holds_lock) {
505 mutex_exit(lockp);
506 }
507 }
508
509 static void
510 udp_bind_hash_insert(udp_fanout_t *uf, udp_t *udp)
511 {
512 conn_t *connp = udp->udp_connp;
513 udp_t **udpp;
514 udp_t *udpnext;
515 conn_t *connext;
516
517 ASSERT(MUTEX_HELD(&uf->uf_lock));
518 ASSERT(udp->udp_ptpbhn == NULL);
519 udpp = &uf->uf_udp;
520 udpnext = udpp[0];
521 if (udpnext != NULL) {
522 /*
523 * If the new udp bound to the INADDR_ANY address
524 * and the first one in the list is not bound to
525 * INADDR_ANY we skip all entries until we find the
526 * first one bound to INADDR_ANY.
527 * This makes sure that applications binding to a
528 * specific address get preference over those binding to
529 * INADDR_ANY.
530 */
531 connext = udpnext->udp_connp;
532 if (V6_OR_V4_INADDR_ANY(connp->conn_bound_addr_v6) &&
533 !V6_OR_V4_INADDR_ANY(connext->conn_bound_addr_v6)) {
534 while ((udpnext = udpp[0]) != NULL &&
535 !V6_OR_V4_INADDR_ANY(connext->conn_bound_addr_v6)) {
536 udpp = &(udpnext->udp_bind_hash);
537 }
538 if (udpnext != NULL)
539 udpnext->udp_ptpbhn = &udp->udp_bind_hash;
540 } else {
541 udpnext->udp_ptpbhn = &udp->udp_bind_hash;
542 }
543 }
544 udp->udp_bind_hash = udpnext;
545 udp->udp_ptpbhn = udpp;
546 udpp[0] = udp;
547 }
548
549 /*
550 * This routine is called to handle each O_T_BIND_REQ/T_BIND_REQ message
551 * passed to udp_wput.
552 * It associates a port number and local address with the stream.
553 * It calls IP to verify the local IP address, and calls IP to insert
554 * the conn_t in the fanout table.
555 * If everything is ok it then sends the T_BIND_ACK back up.
556 *
557 * Note that UDP over IPv4 and IPv6 sockets can use the same port number
558 * without setting SO_REUSEADDR. This is needed so that they
559 * can be viewed as two independent transport protocols.
560 * However, anonymouns ports are allocated from the same range to avoid
561 * duplicating the us->us_next_port_to_try.
562 */
563 static void
564 udp_tpi_bind(queue_t *q, mblk_t *mp)
565 {
566 sin_t *sin;
567 sin6_t *sin6;
568 mblk_t *mp1;
569 struct T_bind_req *tbr;
570 conn_t *connp;
571 udp_t *udp;
572 int error;
573 struct sockaddr *sa;
574 cred_t *cr;
575
576 /*
577 * All Solaris components should pass a db_credp
578 * for this TPI message, hence we ASSERT.
579 * But in case there is some other M_PROTO that looks
580 * like a TPI message sent by some other kernel
581 * component, we check and return an error.
582 */
583 cr = msg_getcred(mp, NULL);
584 ASSERT(cr != NULL);
585 if (cr == NULL) {
586 udp_err_ack(q, mp, TSYSERR, EINVAL);
587 return;
588 }
589
590 connp = Q_TO_CONN(q);
591 udp = connp->conn_udp;
592 if ((mp->b_wptr - mp->b_rptr) < sizeof (*tbr)) {
593 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE,
594 "udp_bind: bad req, len %u",
595 (uint_t)(mp->b_wptr - mp->b_rptr));
596 udp_err_ack(q, mp, TPROTO, 0);
597 return;
598 }
599 if (udp->udp_state != TS_UNBND) {
600 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE,
601 "udp_bind: bad state, %u", udp->udp_state);
602 udp_err_ack(q, mp, TOUTSTATE, 0);
603 return;
604 }
605 /*
606 * Reallocate the message to make sure we have enough room for an
607 * address.
608 */
609 mp1 = reallocb(mp, sizeof (struct T_bind_ack) + sizeof (sin6_t), 1);
610 if (mp1 == NULL) {
611 udp_err_ack(q, mp, TSYSERR, ENOMEM);
612 return;
613 }
614
615 mp = mp1;
616
617 /* Reset the message type in preparation for shipping it back. */
618 DB_TYPE(mp) = M_PCPROTO;
619
620 tbr = (struct T_bind_req *)mp->b_rptr;
621 switch (tbr->ADDR_length) {
622 case 0: /* Request for a generic port */
623 tbr->ADDR_offset = sizeof (struct T_bind_req);
624 if (connp->conn_family == AF_INET) {
625 tbr->ADDR_length = sizeof (sin_t);
626 sin = (sin_t *)&tbr[1];
627 *sin = sin_null;
628 sin->sin_family = AF_INET;
629 mp->b_wptr = (uchar_t *)&sin[1];
630 sa = (struct sockaddr *)sin;
631 } else {
632 ASSERT(connp->conn_family == AF_INET6);
633 tbr->ADDR_length = sizeof (sin6_t);
634 sin6 = (sin6_t *)&tbr[1];
635 *sin6 = sin6_null;
636 sin6->sin6_family = AF_INET6;
637 mp->b_wptr = (uchar_t *)&sin6[1];
638 sa = (struct sockaddr *)sin6;
639 }
640 break;
641
642 case sizeof (sin_t): /* Complete IPv4 address */
643 sa = (struct sockaddr *)mi_offset_param(mp, tbr->ADDR_offset,
644 sizeof (sin_t));
645 if (sa == NULL || !OK_32PTR((char *)sa)) {
646 udp_err_ack(q, mp, TSYSERR, EINVAL);
647 return;
648 }
649 if (connp->conn_family != AF_INET ||
650 sa->sa_family != AF_INET) {
651 udp_err_ack(q, mp, TSYSERR, EAFNOSUPPORT);
652 return;
653 }
654 break;
655
656 case sizeof (sin6_t): /* complete IPv6 address */
657 sa = (struct sockaddr *)mi_offset_param(mp, tbr->ADDR_offset,
658 sizeof (sin6_t));
659 if (sa == NULL || !OK_32PTR((char *)sa)) {
660 udp_err_ack(q, mp, TSYSERR, EINVAL);
661 return;
662 }
663 if (connp->conn_family != AF_INET6 ||
664 sa->sa_family != AF_INET6) {
665 udp_err_ack(q, mp, TSYSERR, EAFNOSUPPORT);
666 return;
667 }
668 break;
669
670 default: /* Invalid request */
671 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE,
672 "udp_bind: bad ADDR_length length %u", tbr->ADDR_length);
673 udp_err_ack(q, mp, TBADADDR, 0);
674 return;
675 }
676
677 error = udp_do_bind(connp, sa, tbr->ADDR_length, cr,
678 tbr->PRIM_type != O_T_BIND_REQ);
679
680 if (error != 0) {
681 if (error > 0) {
682 udp_err_ack(q, mp, TSYSERR, error);
683 } else {
684 udp_err_ack(q, mp, -error, 0);
685 }
686 } else {
687 tbr->PRIM_type = T_BIND_ACK;
688 qreply(q, mp);
689 }
690 }
691
692 /*
693 * This routine handles each T_CONN_REQ message passed to udp. It
694 * associates a default destination address with the stream.
695 *
696 * After various error checks are completed, udp_connect() lays
697 * the target address and port into the composite header template.
698 * Then we ask IP for information, including a source address if we didn't
699 * already have one. Finally we send up the T_OK_ACK reply message.
700 */
701 static void
702 udp_tpi_connect(queue_t *q, mblk_t *mp)
703 {
704 conn_t *connp = Q_TO_CONN(q);
705 int error;
706 socklen_t len;
707 struct sockaddr *sa;
708 struct T_conn_req *tcr;
709 cred_t *cr;
710 pid_t pid;
711 /*
712 * All Solaris components should pass a db_credp
713 * for this TPI message, hence we ASSERT.
714 * But in case there is some other M_PROTO that looks
715 * like a TPI message sent by some other kernel
716 * component, we check and return an error.
717 */
718 cr = msg_getcred(mp, &pid);
719 ASSERT(cr != NULL);
720 if (cr == NULL) {
721 udp_err_ack(q, mp, TSYSERR, EINVAL);
722 return;
723 }
724
725 tcr = (struct T_conn_req *)mp->b_rptr;
726
727 /* A bit of sanity checking */
728 if ((mp->b_wptr - mp->b_rptr) < sizeof (struct T_conn_req)) {
729 udp_err_ack(q, mp, TPROTO, 0);
730 return;
731 }
732
733 if (tcr->OPT_length != 0) {
734 udp_err_ack(q, mp, TBADOPT, 0);
735 return;
736 }
737
738 /*
739 * Determine packet type based on type of address passed in
740 * the request should contain an IPv4 or IPv6 address.
741 * Make sure that address family matches the type of
742 * family of the address passed down.
743 */
744 len = tcr->DEST_length;
745 switch (tcr->DEST_length) {
746 default:
747 udp_err_ack(q, mp, TBADADDR, 0);
748 return;
749
750 case sizeof (sin_t):
751 sa = (struct sockaddr *)mi_offset_param(mp, tcr->DEST_offset,
752 sizeof (sin_t));
753 break;
754
755 case sizeof (sin6_t):
756 sa = (struct sockaddr *)mi_offset_param(mp, tcr->DEST_offset,
757 sizeof (sin6_t));
758 break;
759 }
760
761 error = proto_verify_ip_addr(connp->conn_family, sa, len);
762 if (error != 0) {
763 udp_err_ack(q, mp, TSYSERR, error);
764 return;
765 }
766
767 error = udp_do_connect(connp, sa, len, cr, pid);
768 if (error != 0) {
769 if (error < 0)
770 udp_err_ack(q, mp, -error, 0);
771 else
772 udp_err_ack(q, mp, TSYSERR, error);
773 } else {
774 mblk_t *mp1;
775 /*
776 * We have to send a connection confirmation to
777 * keep TLI happy.
778 */
779 if (connp->conn_family == AF_INET) {
780 mp1 = mi_tpi_conn_con(NULL, (char *)sa,
781 sizeof (sin_t), NULL, 0);
782 } else {
783 mp1 = mi_tpi_conn_con(NULL, (char *)sa,
784 sizeof (sin6_t), NULL, 0);
785 }
786 if (mp1 == NULL) {
787 udp_err_ack(q, mp, TSYSERR, ENOMEM);
788 return;
789 }
790
791 /*
792 * Send ok_ack for T_CONN_REQ
793 */
794 mp = mi_tpi_ok_ack_alloc(mp);
795 if (mp == NULL) {
796 /* Unable to reuse the T_CONN_REQ for the ack. */
797 udp_err_ack_prim(q, mp1, T_CONN_REQ, TSYSERR, ENOMEM);
798 return;
799 }
800
801 putnext(connp->conn_rq, mp);
802 putnext(connp->conn_rq, mp1);
803 }
804 }
805
806 static int
807 udp_tpi_close(queue_t *q, int flags)
808 {
809 conn_t *connp;
810
811 if (flags & SO_FALLBACK) {
812 /*
813 * stream is being closed while in fallback
814 * simply free the resources that were allocated
815 */
816 inet_minor_free(WR(q)->q_ptr, (dev_t)(RD(q)->q_ptr));
817 qprocsoff(q);
818 goto done;
819 }
820
821 connp = Q_TO_CONN(q);
822 udp_do_close(connp);
823 done:
824 q->q_ptr = WR(q)->q_ptr = NULL;
825 return (0);
826 }
827
828 static void
829 udp_close_free(conn_t *connp)
830 {
831 udp_t *udp = connp->conn_udp;
832
833 /* If there are any options associated with the stream, free them. */
834 if (udp->udp_recv_ipp.ipp_fields != 0)
835 ip_pkt_free(&udp->udp_recv_ipp);
836
837 /*
838 * Clear any fields which the kmem_cache constructor clears.
839 * Only udp_connp needs to be preserved.
840 * TBD: We should make this more efficient to avoid clearing
841 * everything.
842 */
843 ASSERT(udp->udp_connp == connp);
844 bzero(udp, sizeof (udp_t));
845 udp->udp_connp = connp;
846 }
847
848 static int
849 udp_do_disconnect(conn_t *connp)
850 {
851 udp_t *udp;
852 udp_fanout_t *udpf;
853 udp_stack_t *us;
854 int error;
855
856 udp = connp->conn_udp;
857 us = udp->udp_us;
858 mutex_enter(&connp->conn_lock);
859 if (udp->udp_state != TS_DATA_XFER) {
860 mutex_exit(&connp->conn_lock);
861 return (-TOUTSTATE);
862 }
863 udpf = &us->us_bind_fanout[UDP_BIND_HASH(connp->conn_lport,
864 us->us_bind_fanout_size)];
865 mutex_enter(&udpf->uf_lock);
866 if (connp->conn_mcbc_bind)
867 connp->conn_saddr_v6 = ipv6_all_zeros;
868 else
869 connp->conn_saddr_v6 = connp->conn_bound_addr_v6;
870 connp->conn_laddr_v6 = connp->conn_bound_addr_v6;
871 connp->conn_faddr_v6 = ipv6_all_zeros;
872 connp->conn_fport = 0;
873 udp->udp_state = TS_IDLE;
874 mutex_exit(&udpf->uf_lock);
875
876 /* Remove any remnants of mapped address binding */
877 if (connp->conn_family == AF_INET6)
878 connp->conn_ipversion = IPV6_VERSION;
879
880 connp->conn_v6lastdst = ipv6_all_zeros;
881 error = udp_build_hdr_template(connp, &connp->conn_saddr_v6,
882 &connp->conn_faddr_v6, connp->conn_fport, connp->conn_flowinfo);
883 mutex_exit(&connp->conn_lock);
884 if (error != 0)
885 return (error);
886
887 /*
888 * Tell IP to remove the full binding and revert
889 * to the local address binding.
890 */
891 return (ip_laddr_fanout_insert(connp));
892 }
893
894 static void
895 udp_tpi_disconnect(queue_t *q, mblk_t *mp)
896 {
897 conn_t *connp = Q_TO_CONN(q);
898 int error;
899
900 /*
901 * Allocate the largest primitive we need to send back
902 * T_error_ack is > than T_ok_ack
903 */
904 mp = reallocb(mp, sizeof (struct T_error_ack), 1);
905 if (mp == NULL) {
906 /* Unable to reuse the T_DISCON_REQ for the ack. */
907 udp_err_ack_prim(q, mp, T_DISCON_REQ, TSYSERR, ENOMEM);
908 return;
909 }
910
911 error = udp_do_disconnect(connp);
912
913 if (error != 0) {
914 if (error < 0) {
915 udp_err_ack(q, mp, -error, 0);
916 } else {
917 udp_err_ack(q, mp, TSYSERR, error);
918 }
919 } else {
920 mp = mi_tpi_ok_ack_alloc(mp);
921 ASSERT(mp != NULL);
922 qreply(q, mp);
923 }
924 }
925
926 int
927 udp_disconnect(conn_t *connp)
928 {
929 int error;
930
931 connp->conn_dgram_errind = B_FALSE;
932 error = udp_do_disconnect(connp);
933 if (error < 0)
934 error = proto_tlitosyserr(-error);
935
936 return (error);
937 }
938
939 /* This routine creates a T_ERROR_ACK message and passes it upstream. */
940 static void
941 udp_err_ack(queue_t *q, mblk_t *mp, t_scalar_t t_error, int sys_error)
942 {
943 if ((mp = mi_tpi_err_ack_alloc(mp, t_error, sys_error)) != NULL)
944 qreply(q, mp);
945 }
946
947 /* Shorthand to generate and send TPI error acks to our client */
948 static void
949 udp_err_ack_prim(queue_t *q, mblk_t *mp, t_scalar_t primitive,
950 t_scalar_t t_error, int sys_error)
951 {
952 struct T_error_ack *teackp;
953
954 if ((mp = tpi_ack_alloc(mp, sizeof (struct T_error_ack),
955 M_PCPROTO, T_ERROR_ACK)) != NULL) {
956 teackp = (struct T_error_ack *)mp->b_rptr;
957 teackp->ERROR_prim = primitive;
958 teackp->TLI_error = t_error;
959 teackp->UNIX_error = sys_error;
960 qreply(q, mp);
961 }
962 }
963
964 /* At minimum we need 4 bytes of UDP header */
965 #define ICMP_MIN_UDP_HDR 4
966
967 /*
968 * udp_icmp_input is called as conn_recvicmp to process ICMP messages.
969 * Generates the appropriate T_UDERROR_IND for permanent (non-transient) errors.
970 * Assumes that IP has pulled up everything up to and including the ICMP header.
971 */
972 /* ARGSUSED2 */
973 static void
974 udp_icmp_input(void *arg1, mblk_t *mp, void *arg2, ip_recv_attr_t *ira)
975 {
976 conn_t *connp = (conn_t *)arg1;
977 icmph_t *icmph;
978 ipha_t *ipha;
979 int iph_hdr_length;
980 udpha_t *udpha;
981 sin_t sin;
982 sin6_t sin6;
983 mblk_t *mp1;
984 int error = 0;
985 udp_t *udp = connp->conn_udp;
986
987 ipha = (ipha_t *)mp->b_rptr;
988
989 ASSERT(OK_32PTR(mp->b_rptr));
990
991 if (IPH_HDR_VERSION(ipha) != IPV4_VERSION) {
992 ASSERT(IPH_HDR_VERSION(ipha) == IPV6_VERSION);
993 udp_icmp_error_ipv6(connp, mp, ira);
994 return;
995 }
996 ASSERT(IPH_HDR_VERSION(ipha) == IPV4_VERSION);
997
998 /* Skip past the outer IP and ICMP headers */
999 ASSERT(IPH_HDR_LENGTH(ipha) == ira->ira_ip_hdr_length);
1000 iph_hdr_length = ira->ira_ip_hdr_length;
1001 icmph = (icmph_t *)&mp->b_rptr[iph_hdr_length];
1002 ipha = (ipha_t *)&icmph[1]; /* Inner IP header */
1003
1004 /* Skip past the inner IP and find the ULP header */
1005 iph_hdr_length = IPH_HDR_LENGTH(ipha);
1006 udpha = (udpha_t *)((char *)ipha + iph_hdr_length);
1007
1008 switch (icmph->icmph_type) {
1009 case ICMP_DEST_UNREACHABLE:
1010 switch (icmph->icmph_code) {
1011 case ICMP_FRAGMENTATION_NEEDED: {
1012 ipha_t *ipha;
1013 ip_xmit_attr_t *ixa;
1014 /*
1015 * IP has already adjusted the path MTU.
1016 * But we need to adjust DF for IPv4.
1017 */
1018 if (connp->conn_ipversion != IPV4_VERSION)
1019 break;
1020
1021 ixa = conn_get_ixa(connp, B_FALSE);
1022 if (ixa == NULL || ixa->ixa_ire == NULL) {
1023 /*
1024 * Some other thread holds conn_ixa. We will
1025 * redo this on the next ICMP too big.
1026 */
1027 if (ixa != NULL)
1028 ixa_refrele(ixa);
1029 break;
1030 }
1031 (void) ip_get_pmtu(ixa);
1032
1033 mutex_enter(&connp->conn_lock);
1034 ipha = (ipha_t *)connp->conn_ht_iphc;
1035 if (ixa->ixa_flags & IXAF_PMTU_IPV4_DF) {
1036 ipha->ipha_fragment_offset_and_flags |=
1037 IPH_DF_HTONS;
1038 } else {
1039 ipha->ipha_fragment_offset_and_flags &=
1040 ~IPH_DF_HTONS;
1041 }
1042 mutex_exit(&connp->conn_lock);
1043 ixa_refrele(ixa);
1044 break;
1045 }
1046 case ICMP_PORT_UNREACHABLE:
1047 case ICMP_PROTOCOL_UNREACHABLE:
1048 error = ECONNREFUSED;
1049 break;
1050 default:
1051 /* Transient errors */
1052 break;
1053 }
1054 break;
1055 default:
1056 /* Transient errors */
1057 break;
1058 }
1059 if (error == 0) {
1060 freemsg(mp);
1061 return;
1062 }
1063
1064 /*
1065 * Deliver T_UDERROR_IND when the application has asked for it.
1066 * The socket layer enables this automatically when connected.
1067 */
1068 if (!connp->conn_dgram_errind) {
1069 freemsg(mp);
1070 return;
1071 }
1072
1073 switch (connp->conn_family) {
1074 case AF_INET:
1075 sin = sin_null;
1076 sin.sin_family = AF_INET;
1077 sin.sin_addr.s_addr = ipha->ipha_dst;
1078 sin.sin_port = udpha->uha_dst_port;
1079 if (IPCL_IS_NONSTR(connp)) {
1080 mutex_enter(&connp->conn_lock);
1081 if (udp->udp_state == TS_DATA_XFER) {
1082 if (sin.sin_port == connp->conn_fport &&
1083 sin.sin_addr.s_addr ==
1084 connp->conn_faddr_v4) {
1085 mutex_exit(&connp->conn_lock);
1086 (*connp->conn_upcalls->su_set_error)
1087 (connp->conn_upper_handle, error);
1088 goto done;
1089 }
1090 } else {
1091 udp->udp_delayed_error = error;
1092 *((sin_t *)&udp->udp_delayed_addr) = sin;
1093 }
1094 mutex_exit(&connp->conn_lock);
1095 } else {
1096 mp1 = mi_tpi_uderror_ind((char *)&sin, sizeof (sin_t),
1097 NULL, 0, error);
1098 if (mp1 != NULL)
1099 putnext(connp->conn_rq, mp1);
1100 }
1101 break;
1102 case AF_INET6:
1103 sin6 = sin6_null;
1104 sin6.sin6_family = AF_INET6;
1105 IN6_IPADDR_TO_V4MAPPED(ipha->ipha_dst, &sin6.sin6_addr);
1106 sin6.sin6_port = udpha->uha_dst_port;
1107 if (IPCL_IS_NONSTR(connp)) {
1108 mutex_enter(&connp->conn_lock);
1109 if (udp->udp_state == TS_DATA_XFER) {
1110 if (sin6.sin6_port == connp->conn_fport &&
1111 IN6_ARE_ADDR_EQUAL(&sin6.sin6_addr,
1112 &connp->conn_faddr_v6)) {
1113 mutex_exit(&connp->conn_lock);
1114 (*connp->conn_upcalls->su_set_error)
1115 (connp->conn_upper_handle, error);
1116 goto done;
1117 }
1118 } else {
1119 udp->udp_delayed_error = error;
1120 *((sin6_t *)&udp->udp_delayed_addr) = sin6;
1121 }
1122 mutex_exit(&connp->conn_lock);
1123 } else {
1124 mp1 = mi_tpi_uderror_ind((char *)&sin6, sizeof (sin6_t),
1125 NULL, 0, error);
1126 if (mp1 != NULL)
1127 putnext(connp->conn_rq, mp1);
1128 }
1129 break;
1130 }
1131 done:
1132 freemsg(mp);
1133 }
1134
1135 /*
1136 * udp_icmp_error_ipv6 is called by udp_icmp_error to process ICMP for IPv6.
1137 * Generates the appropriate T_UDERROR_IND for permanent (non-transient) errors.
1138 * Assumes that IP has pulled up all the extension headers as well as the
1139 * ICMPv6 header.
1140 */
1141 static void
1142 udp_icmp_error_ipv6(conn_t *connp, mblk_t *mp, ip_recv_attr_t *ira)
1143 {
1144 icmp6_t *icmp6;
1145 ip6_t *ip6h, *outer_ip6h;
1146 uint16_t iph_hdr_length;
1147 uint8_t *nexthdrp;
1148 udpha_t *udpha;
1149 sin6_t sin6;
1150 mblk_t *mp1;
1151 int error = 0;
1152 udp_t *udp = connp->conn_udp;
1153 udp_stack_t *us = udp->udp_us;
1154
1155 outer_ip6h = (ip6_t *)mp->b_rptr;
1156 #ifdef DEBUG
1157 if (outer_ip6h->ip6_nxt != IPPROTO_ICMPV6)
1158 iph_hdr_length = ip_hdr_length_v6(mp, outer_ip6h);
1159 else
1160 iph_hdr_length = IPV6_HDR_LEN;
1161 ASSERT(iph_hdr_length == ira->ira_ip_hdr_length);
1162 #endif
1163 /* Skip past the outer IP and ICMP headers */
1164 iph_hdr_length = ira->ira_ip_hdr_length;
1165 icmp6 = (icmp6_t *)&mp->b_rptr[iph_hdr_length];
1166
1167 /* Skip past the inner IP and find the ULP header */
1168 ip6h = (ip6_t *)&icmp6[1]; /* Inner IP header */
1169 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &iph_hdr_length, &nexthdrp)) {
1170 freemsg(mp);
1171 return;
1172 }
1173 udpha = (udpha_t *)((char *)ip6h + iph_hdr_length);
1174
1175 switch (icmp6->icmp6_type) {
1176 case ICMP6_DST_UNREACH:
1177 switch (icmp6->icmp6_code) {
1178 case ICMP6_DST_UNREACH_NOPORT:
1179 error = ECONNREFUSED;
1180 break;
1181 case ICMP6_DST_UNREACH_ADMIN:
1182 case ICMP6_DST_UNREACH_NOROUTE:
1183 case ICMP6_DST_UNREACH_BEYONDSCOPE:
1184 case ICMP6_DST_UNREACH_ADDR:
1185 /* Transient errors */
1186 break;
1187 default:
1188 break;
1189 }
1190 break;
1191 case ICMP6_PACKET_TOO_BIG: {
1192 struct T_unitdata_ind *tudi;
1193 struct T_opthdr *toh;
1194 size_t udi_size;
1195 mblk_t *newmp;
1196 t_scalar_t opt_length = sizeof (struct T_opthdr) +
1197 sizeof (struct ip6_mtuinfo);
1198 sin6_t *sin6;
1199 struct ip6_mtuinfo *mtuinfo;
1200
1201 /*
1202 * If the application has requested to receive path mtu
1203 * information, send up an empty message containing an
1204 * IPV6_PATHMTU ancillary data item.
1205 */
1206 if (!connp->conn_ipv6_recvpathmtu)
1207 break;
1208
1209 udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin6_t) +
1210 opt_length;
1211 if ((newmp = allocb(udi_size, BPRI_MED)) == NULL) {
1212 UDPS_BUMP_MIB(us, udpInErrors);
1213 break;
1214 }
1215
1216 /*
1217 * newmp->b_cont is left to NULL on purpose. This is an
1218 * empty message containing only ancillary data.
1219 */
1220 newmp->b_datap->db_type = M_PROTO;
1221 tudi = (struct T_unitdata_ind *)newmp->b_rptr;
1222 newmp->b_wptr = (uchar_t *)tudi + udi_size;
1223 tudi->PRIM_type = T_UNITDATA_IND;
1224 tudi->SRC_length = sizeof (sin6_t);
1225 tudi->SRC_offset = sizeof (struct T_unitdata_ind);
1226 tudi->OPT_offset = tudi->SRC_offset + sizeof (sin6_t);
1227 tudi->OPT_length = opt_length;
1228
1229 sin6 = (sin6_t *)&tudi[1];
1230 bzero(sin6, sizeof (sin6_t));
1231 sin6->sin6_family = AF_INET6;
1232 sin6->sin6_addr = connp->conn_faddr_v6;
1233
1234 toh = (struct T_opthdr *)&sin6[1];
1235 toh->level = IPPROTO_IPV6;
1236 toh->name = IPV6_PATHMTU;
1237 toh->len = opt_length;
1238 toh->status = 0;
1239
1240 mtuinfo = (struct ip6_mtuinfo *)&toh[1];
1241 bzero(mtuinfo, sizeof (struct ip6_mtuinfo));
1242 mtuinfo->ip6m_addr.sin6_family = AF_INET6;
1243 mtuinfo->ip6m_addr.sin6_addr = ip6h->ip6_dst;
1244 mtuinfo->ip6m_mtu = icmp6->icmp6_mtu;
1245 /*
1246 * We've consumed everything we need from the original
1247 * message. Free it, then send our empty message.
1248 */
1249 freemsg(mp);
1250 udp_ulp_recv(connp, newmp, msgdsize(newmp), ira);
1251 return;
1252 }
1253 case ICMP6_TIME_EXCEEDED:
1254 /* Transient errors */
1255 break;
1256 case ICMP6_PARAM_PROB:
1257 /* If this corresponds to an ICMP_PROTOCOL_UNREACHABLE */
1258 if (icmp6->icmp6_code == ICMP6_PARAMPROB_NEXTHEADER &&
1259 (uchar_t *)ip6h + icmp6->icmp6_pptr ==
1260 (uchar_t *)nexthdrp) {
1261 error = ECONNREFUSED;
1262 break;
1263 }
1264 break;
1265 }
1266 if (error == 0) {
1267 freemsg(mp);
1268 return;
1269 }
1270
1271 /*
1272 * Deliver T_UDERROR_IND when the application has asked for it.
1273 * The socket layer enables this automatically when connected.
1274 */
1275 if (!connp->conn_dgram_errind) {
1276 freemsg(mp);
1277 return;
1278 }
1279
1280 sin6 = sin6_null;
1281 sin6.sin6_family = AF_INET6;
1282 sin6.sin6_addr = ip6h->ip6_dst;
1283 sin6.sin6_port = udpha->uha_dst_port;
1284 sin6.sin6_flowinfo = ip6h->ip6_vcf & ~IPV6_VERS_AND_FLOW_MASK;
1285
1286 if (IPCL_IS_NONSTR(connp)) {
1287 mutex_enter(&connp->conn_lock);
1288 if (udp->udp_state == TS_DATA_XFER) {
1289 if (sin6.sin6_port == connp->conn_fport &&
1290 IN6_ARE_ADDR_EQUAL(&sin6.sin6_addr,
1291 &connp->conn_faddr_v6)) {
1292 mutex_exit(&connp->conn_lock);
1293 (*connp->conn_upcalls->su_set_error)
1294 (connp->conn_upper_handle, error);
1295 goto done;
1296 }
1297 } else {
1298 udp->udp_delayed_error = error;
1299 *((sin6_t *)&udp->udp_delayed_addr) = sin6;
1300 }
1301 mutex_exit(&connp->conn_lock);
1302 } else {
1303 mp1 = mi_tpi_uderror_ind((char *)&sin6, sizeof (sin6_t),
1304 NULL, 0, error);
1305 if (mp1 != NULL)
1306 putnext(connp->conn_rq, mp1);
1307 }
1308 done:
1309 freemsg(mp);
1310 }
1311
1312 /*
1313 * This routine responds to T_ADDR_REQ messages. It is called by udp_wput.
1314 * The local address is filled in if endpoint is bound. The remote address
1315 * is filled in if remote address has been precified ("connected endpoint")
1316 * (The concept of connected CLTS sockets is alien to published TPI
1317 * but we support it anyway).
1318 */
1319 static void
1320 udp_addr_req(queue_t *q, mblk_t *mp)
1321 {
1322 struct sockaddr *sa;
1323 mblk_t *ackmp;
1324 struct T_addr_ack *taa;
1325 udp_t *udp = Q_TO_UDP(q);
1326 conn_t *connp = udp->udp_connp;
1327 uint_t addrlen;
1328
1329 /* Make it large enough for worst case */
1330 ackmp = reallocb(mp, sizeof (struct T_addr_ack) +
1331 2 * sizeof (sin6_t), 1);
1332 if (ackmp == NULL) {
1333 udp_err_ack(q, mp, TSYSERR, ENOMEM);
1334 return;
1335 }
1336 taa = (struct T_addr_ack *)ackmp->b_rptr;
1337
1338 bzero(taa, sizeof (struct T_addr_ack));
1339 ackmp->b_wptr = (uchar_t *)&taa[1];
1340
1341 taa->PRIM_type = T_ADDR_ACK;
1342 ackmp->b_datap->db_type = M_PCPROTO;
1343
1344 if (connp->conn_family == AF_INET)
1345 addrlen = sizeof (sin_t);
1346 else
1347 addrlen = sizeof (sin6_t);
1348
1349 mutex_enter(&connp->conn_lock);
1350 /*
1351 * Note: Following code assumes 32 bit alignment of basic
1352 * data structures like sin_t and struct T_addr_ack.
1353 */
1354 if (udp->udp_state != TS_UNBND) {
1355 /*
1356 * Fill in local address first
1357 */
1358 taa->LOCADDR_offset = sizeof (*taa);
1359 taa->LOCADDR_length = addrlen;
1360 sa = (struct sockaddr *)&taa[1];
1361 (void) conn_getsockname(connp, sa, &addrlen);
1362 ackmp->b_wptr += addrlen;
1363 }
1364 if (udp->udp_state == TS_DATA_XFER) {
1365 /*
1366 * connected, fill remote address too
1367 */
1368 taa->REMADDR_length = addrlen;
1369 /* assumed 32-bit alignment */
1370 taa->REMADDR_offset = taa->LOCADDR_offset + taa->LOCADDR_length;
1371 sa = (struct sockaddr *)(ackmp->b_rptr + taa->REMADDR_offset);
1372 (void) conn_getpeername(connp, sa, &addrlen);
1373 ackmp->b_wptr += addrlen;
1374 }
1375 mutex_exit(&connp->conn_lock);
1376 ASSERT(ackmp->b_wptr <= ackmp->b_datap->db_lim);
1377 qreply(q, ackmp);
1378 }
1379
1380 static void
1381 udp_copy_info(struct T_info_ack *tap, udp_t *udp)
1382 {
1383 conn_t *connp = udp->udp_connp;
1384
1385 if (connp->conn_family == AF_INET) {
1386 *tap = udp_g_t_info_ack_ipv4;
1387 } else {
1388 *tap = udp_g_t_info_ack_ipv6;
1389 }
1390 tap->CURRENT_state = udp->udp_state;
1391 tap->OPT_size = udp_max_optsize;
1392 }
1393
1394 static void
1395 udp_do_capability_ack(udp_t *udp, struct T_capability_ack *tcap,
1396 t_uscalar_t cap_bits1)
1397 {
1398 tcap->CAP_bits1 = 0;
1399
1400 if (cap_bits1 & TC1_INFO) {
1401 udp_copy_info(&tcap->INFO_ack, udp);
1402 tcap->CAP_bits1 |= TC1_INFO;
1403 }
1404 }
1405
1406 /*
1407 * This routine responds to T_CAPABILITY_REQ messages. It is called by
1408 * udp_wput. Much of the T_CAPABILITY_ACK information is copied from
1409 * udp_g_t_info_ack. The current state of the stream is copied from
1410 * udp_state.
1411 */
1412 static void
1413 udp_capability_req(queue_t *q, mblk_t *mp)
1414 {
1415 t_uscalar_t cap_bits1;
1416 struct T_capability_ack *tcap;
1417 udp_t *udp = Q_TO_UDP(q);
1418
1419 cap_bits1 = ((struct T_capability_req *)mp->b_rptr)->CAP_bits1;
1420
1421 mp = tpi_ack_alloc(mp, sizeof (struct T_capability_ack),
1422 mp->b_datap->db_type, T_CAPABILITY_ACK);
1423 if (!mp)
1424 return;
1425
1426 tcap = (struct T_capability_ack *)mp->b_rptr;
1427 udp_do_capability_ack(udp, tcap, cap_bits1);
1428
1429 qreply(q, mp);
1430 }
1431
1432 /*
1433 * This routine responds to T_INFO_REQ messages. It is called by udp_wput.
1434 * Most of the T_INFO_ACK information is copied from udp_g_t_info_ack.
1435 * The current state of the stream is copied from udp_state.
1436 */
1437 static void
1438 udp_info_req(queue_t *q, mblk_t *mp)
1439 {
1440 udp_t *udp = Q_TO_UDP(q);
1441
1442 /* Create a T_INFO_ACK message. */
1443 mp = tpi_ack_alloc(mp, sizeof (struct T_info_ack), M_PCPROTO,
1444 T_INFO_ACK);
1445 if (!mp)
1446 return;
1447 udp_copy_info((struct T_info_ack *)mp->b_rptr, udp);
1448 qreply(q, mp);
1449 }
1450
1451 /* For /dev/udp aka AF_INET open */
1452 static int
1453 udp_openv4(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp)
1454 {
1455 return (udp_open(q, devp, flag, sflag, credp, B_FALSE));
1456 }
1457
1458 /* For /dev/udp6 aka AF_INET6 open */
1459 static int
1460 udp_openv6(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp)
1461 {
1462 return (udp_open(q, devp, flag, sflag, credp, B_TRUE));
1463 }
1464
1465 /*
1466 * This is the open routine for udp. It allocates a udp_t structure for
1467 * the stream and, on the first open of the module, creates an ND table.
1468 */
1469 static int
1470 udp_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp,
1471 boolean_t isv6)
1472 {
1473 udp_t *udp;
1474 conn_t *connp;
1475 dev_t conn_dev;
1476 vmem_t *minor_arena;
1477 int err;
1478
1479 /* If the stream is already open, return immediately. */
1480 if (q->q_ptr != NULL)
1481 return (0);
1482
1483 if (sflag == MODOPEN)
1484 return (EINVAL);
1485
1486 if ((ip_minor_arena_la != NULL) && (flag & SO_SOCKSTR) &&
1487 ((conn_dev = inet_minor_alloc(ip_minor_arena_la)) != 0)) {
1488 minor_arena = ip_minor_arena_la;
1489 } else {
1490 /*
1491 * Either minor numbers in the large arena were exhausted
1492 * or a non socket application is doing the open.
1493 * Try to allocate from the small arena.
1494 */
1495 if ((conn_dev = inet_minor_alloc(ip_minor_arena_sa)) == 0)
1496 return (EBUSY);
1497
1498 minor_arena = ip_minor_arena_sa;
1499 }
1500
1501 if (flag & SO_FALLBACK) {
1502 /*
1503 * Non streams socket needs a stream to fallback to
1504 */
1505 RD(q)->q_ptr = (void *)conn_dev;
1506 WR(q)->q_qinfo = &udp_fallback_sock_winit;
1507 WR(q)->q_ptr = (void *)minor_arena;
1508 qprocson(q);
1509 return (0);
1510 }
1511
1512 connp = udp_do_open(credp, isv6, KM_SLEEP, &err);
1513 if (connp == NULL) {
1514 inet_minor_free(minor_arena, conn_dev);
1515 return (err);
1516 }
1517 udp = connp->conn_udp;
1518
1519 *devp = makedevice(getemajor(*devp), (minor_t)conn_dev);
1520 connp->conn_dev = conn_dev;
1521 connp->conn_minor_arena = minor_arena;
1522
1523 /*
1524 * Initialize the udp_t structure for this stream.
1525 */
1526 q->q_ptr = connp;
1527 WR(q)->q_ptr = connp;
1528 connp->conn_rq = q;
1529 connp->conn_wq = WR(q);
1530
1531 /*
1532 * Since this conn_t/udp_t is not yet visible to anybody else we don't
1533 * need to lock anything.
1534 */
1535 ASSERT(connp->conn_proto == IPPROTO_UDP);
1536 ASSERT(connp->conn_udp == udp);
1537 ASSERT(udp->udp_connp == connp);
1538
1539 if (flag & SO_SOCKSTR) {
1540 udp->udp_issocket = B_TRUE;
1541 }
1542
1543 WR(q)->q_hiwat = connp->conn_sndbuf;
1544 WR(q)->q_lowat = connp->conn_sndlowat;
1545
1546 qprocson(q);
1547
1548 /* Set the Stream head write offset and high watermark. */
1549 (void) proto_set_tx_wroff(q, connp, connp->conn_wroff);
1550 (void) proto_set_rx_hiwat(q, connp,
1551 udp_set_rcv_hiwat(udp, connp->conn_rcvbuf));
1552
1553 mutex_enter(&connp->conn_lock);
1554 connp->conn_state_flags &= ~CONN_INCIPIENT;
1555 mutex_exit(&connp->conn_lock);
1556 return (0);
1557 }
1558
1559 /*
1560 * Which UDP options OK to set through T_UNITDATA_REQ...
1561 */
1562 /* ARGSUSED */
1563 static boolean_t
1564 udp_opt_allow_udr_set(t_scalar_t level, t_scalar_t name)
1565 {
1566 return (B_TRUE);
1567 }
1568
1569 /*
1570 * This routine gets default values of certain options whose default
1571 * values are maintained by protcol specific code
1572 */
1573 int
1574 udp_opt_default(queue_t *q, t_scalar_t level, t_scalar_t name, uchar_t *ptr)
1575 {
1576 udp_t *udp = Q_TO_UDP(q);
1577 udp_stack_t *us = udp->udp_us;
1578 int *i1 = (int *)ptr;
1579
1580 switch (level) {
1581 case IPPROTO_IP:
1582 switch (name) {
1583 case IP_MULTICAST_TTL:
1584 *ptr = (uchar_t)IP_DEFAULT_MULTICAST_TTL;
1585 return (sizeof (uchar_t));
1586 case IP_MULTICAST_LOOP:
1587 *ptr = (uchar_t)IP_DEFAULT_MULTICAST_LOOP;
1588 return (sizeof (uchar_t));
1589 }
1590 break;
1591 case IPPROTO_IPV6:
1592 switch (name) {
1593 case IPV6_MULTICAST_HOPS:
1594 *i1 = IP_DEFAULT_MULTICAST_TTL;
1595 return (sizeof (int));
1596 case IPV6_MULTICAST_LOOP:
1597 *i1 = IP_DEFAULT_MULTICAST_LOOP;
1598 return (sizeof (int));
1599 case IPV6_UNICAST_HOPS:
1600 *i1 = us->us_ipv6_hoplimit;
1601 return (sizeof (int));
1602 }
1603 break;
1604 }
1605 return (-1);
1606 }
1607
1608 /*
1609 * This routine retrieves the current status of socket options.
1610 * It returns the size of the option retrieved, or -1.
1611 */
1612 int
1613 udp_opt_get(conn_t *connp, t_scalar_t level, t_scalar_t name,
1614 uchar_t *ptr)
1615 {
1616 int *i1 = (int *)ptr;
1617 udp_t *udp = connp->conn_udp;
1618 int len;
1619 conn_opt_arg_t coas;
1620 int retval;
1621
1622 coas.coa_connp = connp;
1623 coas.coa_ixa = connp->conn_ixa;
1624 coas.coa_ipp = &connp->conn_xmit_ipp;
1625 coas.coa_ancillary = B_FALSE;
1626 coas.coa_changed = 0;
1627
1628 /*
1629 * We assume that the optcom framework has checked for the set
1630 * of levels and names that are supported, hence we don't worry
1631 * about rejecting based on that.
1632 * First check for UDP specific handling, then pass to common routine.
1633 */
1634 switch (level) {
1635 case IPPROTO_IP:
1636 /*
1637 * Only allow IPv4 option processing on IPv4 sockets.
1638 */
1639 if (connp->conn_family != AF_INET)
1640 return (-1);
1641
1642 switch (name) {
1643 case IP_OPTIONS:
1644 case T_IP_OPTIONS:
1645 mutex_enter(&connp->conn_lock);
1646 if (!(udp->udp_recv_ipp.ipp_fields &
1647 IPPF_IPV4_OPTIONS)) {
1648 mutex_exit(&connp->conn_lock);
1649 return (0);
1650 }
1651
1652 len = udp->udp_recv_ipp.ipp_ipv4_options_len;
1653 ASSERT(len != 0);
1654 bcopy(udp->udp_recv_ipp.ipp_ipv4_options, ptr, len);
1655 mutex_exit(&connp->conn_lock);
1656 return (len);
1657 }
1658 break;
1659 case IPPROTO_UDP:
1660 switch (name) {
1661 case UDP_NAT_T_ENDPOINT:
1662 mutex_enter(&connp->conn_lock);
1663 *i1 = udp->udp_nat_t_endpoint;
1664 mutex_exit(&connp->conn_lock);
1665 return (sizeof (int));
1666 case UDP_RCVHDR:
1667 mutex_enter(&connp->conn_lock);
1668 *i1 = udp->udp_rcvhdr ? 1 : 0;
1669 mutex_exit(&connp->conn_lock);
1670 return (sizeof (int));
1671 case UDP_SRCPORT_HASH:
1672 mutex_enter(&connp->conn_lock);
1673 *i1 = udp->udp_vxlanhash;
1674 mutex_exit(&connp->conn_lock);
1675 return (sizeof (int));
1676 case UDP_SND_TO_CONNECTED:
1677 mutex_enter(&connp->conn_lock);
1678 *i1 = udp->udp_snd_to_conn ? 1 : 0;
1679 mutex_exit(&connp->conn_lock);
1680 return (sizeof (int));
1681 }
1682 }
1683 mutex_enter(&connp->conn_lock);
1684 retval = conn_opt_get(&coas, level, name, ptr);
1685 mutex_exit(&connp->conn_lock);
1686 return (retval);
1687 }
1688
1689 /*
1690 * This routine retrieves the current status of socket options.
1691 * It returns the size of the option retrieved, or -1.
1692 */
1693 int
1694 udp_tpi_opt_get(queue_t *q, t_scalar_t level, t_scalar_t name, uchar_t *ptr)
1695 {
1696 conn_t *connp = Q_TO_CONN(q);
1697 int err;
1698
1699 err = udp_opt_get(connp, level, name, ptr);
1700 return (err);
1701 }
1702
1703 /*
1704 * This routine sets socket options.
1705 */
1706 int
1707 udp_do_opt_set(conn_opt_arg_t *coa, int level, int name,
1708 uint_t inlen, uchar_t *invalp, cred_t *cr, boolean_t checkonly)
1709 {
1710 conn_t *connp = coa->coa_connp;
1711 ip_xmit_attr_t *ixa = coa->coa_ixa;
1712 udp_t *udp = connp->conn_udp;
1713 udp_stack_t *us = udp->udp_us;
1714 int *i1 = (int *)invalp;
1715 boolean_t onoff = (*i1 == 0) ? 0 : 1;
1716 int error;
1717
1718 ASSERT(MUTEX_NOT_HELD(&coa->coa_connp->conn_lock));
1719 /*
1720 * First do UDP specific sanity checks and handle UDP specific
1721 * options. Note that some IPPROTO_UDP options are handled
1722 * by conn_opt_set.
1723 */
1724 switch (level) {
1725 case SOL_SOCKET:
1726 switch (name) {
1727 case SO_SNDBUF:
1728 if (*i1 > us->us_max_buf) {
1729 return (ENOBUFS);
1730 }
1731 break;
1732 case SO_RCVBUF:
1733 if (*i1 > us->us_max_buf) {
1734 return (ENOBUFS);
1735 }
1736 break;
1737
1738 case SCM_UCRED: {
1739 struct ucred_s *ucr;
1740 cred_t *newcr;
1741 ts_label_t *tsl;
1742
1743 /*
1744 * Only sockets that have proper privileges and are
1745 * bound to MLPs will have any other value here, so
1746 * this implicitly tests for privilege to set label.
1747 */
1748 if (connp->conn_mlp_type == mlptSingle)
1749 break;
1750
1751 ucr = (struct ucred_s *)invalp;
1752 if (inlen < sizeof (*ucr) + sizeof (bslabel_t) ||
1753 ucr->uc_labeloff < sizeof (*ucr) ||
1754 ucr->uc_labeloff + sizeof (bslabel_t) > inlen)
1755 return (EINVAL);
1756 if (!checkonly) {
1757 /*
1758 * Set ixa_tsl to the new label.
1759 * We assume that crgetzoneid doesn't change
1760 * as part of the SCM_UCRED.
1761 */
1762 ASSERT(cr != NULL);
1763 if ((tsl = crgetlabel(cr)) == NULL)
1764 return (EINVAL);
1765 newcr = copycred_from_bslabel(cr, UCLABEL(ucr),
1766 tsl->tsl_doi, KM_NOSLEEP);
1767 if (newcr == NULL)
1768 return (ENOSR);
1769 ASSERT(newcr->cr_label != NULL);
1770 /*
1771 * Move the hold on the cr_label to ixa_tsl by
1772 * setting cr_label to NULL. Then release newcr.
1773 */
1774 ip_xmit_attr_replace_tsl(ixa, newcr->cr_label);
1775 ixa->ixa_flags |= IXAF_UCRED_TSL;
1776 newcr->cr_label = NULL;
1777 crfree(newcr);
1778 coa->coa_changed |= COA_HEADER_CHANGED;
1779 coa->coa_changed |= COA_WROFF_CHANGED;
1780 }
1781 /* Fully handled this option. */
1782 return (0);
1783 }
1784 }
1785 break;
1786 case IPPROTO_UDP:
1787 switch (name) {
1788 case UDP_NAT_T_ENDPOINT:
1789 if ((error = secpolicy_ip_config(cr, B_FALSE)) != 0) {
1790 return (error);
1791 }
1792
1793 /*
1794 * Use conn_family instead so we can avoid ambiguitites
1795 * with AF_INET6 sockets that may switch from IPv4
1796 * to IPv6.
1797 */
1798 if (connp->conn_family != AF_INET) {
1799 return (EAFNOSUPPORT);
1800 }
1801
1802 if (!checkonly) {
1803 mutex_enter(&connp->conn_lock);
1804 udp->udp_nat_t_endpoint = onoff;
1805 mutex_exit(&connp->conn_lock);
1806 coa->coa_changed |= COA_HEADER_CHANGED;
1807 coa->coa_changed |= COA_WROFF_CHANGED;
1808 }
1809 /* Fully handled this option. */
1810 return (0);
1811 case UDP_RCVHDR:
1812 mutex_enter(&connp->conn_lock);
1813 udp->udp_rcvhdr = onoff;
1814 mutex_exit(&connp->conn_lock);
1815 return (0);
1816 case UDP_SRCPORT_HASH:
1817 /*
1818 * This should have already been verified, but double
1819 * check.
1820 */
1821 if ((error = secpolicy_ip_config(cr, B_FALSE)) != 0) {
1822 return (error);
1823 }
1824
1825 /* First see if the val is something we understand */
1826 if (*i1 != UDP_HASH_DISABLE && *i1 != UDP_HASH_VXLAN)
1827 return (EINVAL);
1828
1829 if (!checkonly) {
1830 mutex_enter(&connp->conn_lock);
1831 udp->udp_vxlanhash = *i1;
1832 mutex_exit(&connp->conn_lock);
1833 }
1834 /* Fully handled this option. */
1835 return (0);
1836 case UDP_SND_TO_CONNECTED:
1837 mutex_enter(&connp->conn_lock);
1838 udp->udp_snd_to_conn = onoff;
1839 mutex_exit(&connp->conn_lock);
1840 return (0);
1841 }
1842 break;
1843 }
1844 error = conn_opt_set(coa, level, name, inlen, invalp,
1845 checkonly, cr);
1846 return (error);
1847 }
1848
1849 /*
1850 * This routine sets socket options.
1851 */
1852 int
1853 udp_opt_set(conn_t *connp, uint_t optset_context, int level,
1854 int name, uint_t inlen, uchar_t *invalp, uint_t *outlenp,
1855 uchar_t *outvalp, void *thisdg_attrs, cred_t *cr)
1856 {
1857 udp_t *udp = connp->conn_udp;
1858 int err;
1859 conn_opt_arg_t coas, *coa;
1860 boolean_t checkonly;
1861 udp_stack_t *us = udp->udp_us;
1862
1863 switch (optset_context) {
1864 case SETFN_OPTCOM_CHECKONLY:
1865 checkonly = B_TRUE;
1866 /*
1867 * Note: Implies T_CHECK semantics for T_OPTCOM_REQ
1868 * inlen != 0 implies value supplied and
1869 * we have to "pretend" to set it.
1870 * inlen == 0 implies that there is no
1871 * value part in T_CHECK request and just validation
1872 * done elsewhere should be enough, we just return here.
1873 */
1874 if (inlen == 0) {
1875 *outlenp = 0;
1876 return (0);
1877 }
1878 break;
1879 case SETFN_OPTCOM_NEGOTIATE:
1880 checkonly = B_FALSE;
1881 break;
1882 case SETFN_UD_NEGOTIATE:
1883 case SETFN_CONN_NEGOTIATE:
1884 checkonly = B_FALSE;
1885 /*
1886 * Negotiating local and "association-related" options
1887 * through T_UNITDATA_REQ.
1888 *
1889 * Following routine can filter out ones we do not
1890 * want to be "set" this way.
1891 */
1892 if (!udp_opt_allow_udr_set(level, name)) {
1893 *outlenp = 0;
1894 return (EINVAL);
1895 }
1896 break;
1897 default:
1898 /*
1899 * We should never get here
1900 */
1901 *outlenp = 0;
1902 return (EINVAL);
1903 }
1904
1905 ASSERT((optset_context != SETFN_OPTCOM_CHECKONLY) ||
1906 (optset_context == SETFN_OPTCOM_CHECKONLY && inlen != 0));
1907
1908 if (thisdg_attrs != NULL) {
1909 /* Options from T_UNITDATA_REQ */
1910 coa = (conn_opt_arg_t *)thisdg_attrs;
1911 ASSERT(coa->coa_connp == connp);
1912 ASSERT(coa->coa_ixa != NULL);
1913 ASSERT(coa->coa_ipp != NULL);
1914 ASSERT(coa->coa_ancillary);
1915 } else {
1916 coa = &coas;
1917 coas.coa_connp = connp;
1918 /* Get a reference on conn_ixa to prevent concurrent mods */
1919 coas.coa_ixa = conn_get_ixa(connp, B_TRUE);
1920 if (coas.coa_ixa == NULL) {
1921 *outlenp = 0;
1922 return (ENOMEM);
1923 }
1924 coas.coa_ipp = &connp->conn_xmit_ipp;
1925 coas.coa_ancillary = B_FALSE;
1926 coas.coa_changed = 0;
1927 }
1928
1929 err = udp_do_opt_set(coa, level, name, inlen, invalp,
1930 cr, checkonly);
1931 if (err != 0) {
1932 errout:
1933 if (!coa->coa_ancillary)
1934 ixa_refrele(coa->coa_ixa);
1935 *outlenp = 0;
1936 return (err);
1937 }
1938 /* Handle DHCPINIT here outside of lock */
1939 if (level == IPPROTO_IP && name == IP_DHCPINIT_IF) {
1940 uint_t ifindex;
1941 ill_t *ill;
1942
1943 ifindex = *(uint_t *)invalp;
1944 if (ifindex == 0) {
1945 ill = NULL;
1946 } else {
1947 ill = ill_lookup_on_ifindex(ifindex, B_FALSE,
1948 coa->coa_ixa->ixa_ipst);
1949 if (ill == NULL) {
1950 err = ENXIO;
1951 goto errout;
1952 }
1953
1954 mutex_enter(&ill->ill_lock);
1955 if (ill->ill_state_flags & ILL_CONDEMNED) {
1956 mutex_exit(&ill->ill_lock);
1957 ill_refrele(ill);
1958 err = ENXIO;
1959 goto errout;
1960 }
1961 if (IS_VNI(ill)) {
1962 mutex_exit(&ill->ill_lock);
1963 ill_refrele(ill);
1964 err = EINVAL;
1965 goto errout;
1966 }
1967 }
1968 mutex_enter(&connp->conn_lock);
1969
1970 if (connp->conn_dhcpinit_ill != NULL) {
1971 /*
1972 * We've locked the conn so conn_cleanup_ill()
1973 * cannot clear conn_dhcpinit_ill -- so it's
1974 * safe to access the ill.
1975 */
1976 ill_t *oill = connp->conn_dhcpinit_ill;
1977
1978 ASSERT(oill->ill_dhcpinit != 0);
1979 atomic_dec_32(&oill->ill_dhcpinit);
1980 ill_set_inputfn(connp->conn_dhcpinit_ill);
1981 connp->conn_dhcpinit_ill = NULL;
1982 }
1983
1984 if (ill != NULL) {
1985 connp->conn_dhcpinit_ill = ill;
1986 atomic_inc_32(&ill->ill_dhcpinit);
1987 ill_set_inputfn(ill);
1988 mutex_exit(&connp->conn_lock);
1989 mutex_exit(&ill->ill_lock);
1990 ill_refrele(ill);
1991 } else {
1992 mutex_exit(&connp->conn_lock);
1993 }
1994 }
1995
1996 /*
1997 * Common case of OK return with outval same as inval.
1998 */
1999 if (invalp != outvalp) {
2000 /* don't trust bcopy for identical src/dst */
2001 (void) bcopy(invalp, outvalp, inlen);
2002 }
2003 *outlenp = inlen;
2004
2005 /*
2006 * If this was not ancillary data, then we rebuild the headers,
2007 * update the IRE/NCE, and IPsec as needed.
2008 * Since the label depends on the destination we go through
2009 * ip_set_destination first.
2010 */
2011 if (coa->coa_ancillary) {
2012 return (0);
2013 }
2014
2015 if (coa->coa_changed & COA_ROUTE_CHANGED) {
2016 in6_addr_t saddr, faddr, nexthop;
2017 in_port_t fport;
2018
2019 /*
2020 * We clear lastdst to make sure we pick up the change
2021 * next time sending.
2022 * If we are connected we re-cache the information.
2023 * We ignore errors to preserve BSD behavior.
2024 * Note that we don't redo IPsec policy lookup here
2025 * since the final destination (or source) didn't change.
2026 */
2027 mutex_enter(&connp->conn_lock);
2028 connp->conn_v6lastdst = ipv6_all_zeros;
2029
2030 ip_attr_nexthop(coa->coa_ipp, coa->coa_ixa,
2031 &connp->conn_faddr_v6, &nexthop);
2032 saddr = connp->conn_saddr_v6;
2033 faddr = connp->conn_faddr_v6;
2034 fport = connp->conn_fport;
2035 mutex_exit(&connp->conn_lock);
2036
2037 if (!IN6_IS_ADDR_UNSPECIFIED(&faddr) &&
2038 !IN6_IS_ADDR_V4MAPPED_ANY(&faddr)) {
2039 (void) ip_attr_connect(connp, coa->coa_ixa,
2040 &saddr, &faddr, &nexthop, fport, NULL, NULL,
2041 IPDF_ALLOW_MCBC | IPDF_VERIFY_DST);
2042 }
2043 }
2044
2045 ixa_refrele(coa->coa_ixa);
2046
2047 if (coa->coa_changed & COA_HEADER_CHANGED) {
2048 /*
2049 * Rebuild the header template if we are connected.
2050 * Otherwise clear conn_v6lastdst so we rebuild the header
2051 * in the data path.
2052 */
2053 mutex_enter(&connp->conn_lock);
2054 if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_faddr_v6) &&
2055 !IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_faddr_v6)) {
2056 err = udp_build_hdr_template(connp,
2057 &connp->conn_saddr_v6, &connp->conn_faddr_v6,
2058 connp->conn_fport, connp->conn_flowinfo);
2059 if (err != 0) {
2060 mutex_exit(&connp->conn_lock);
2061 return (err);
2062 }
2063 } else {
2064 connp->conn_v6lastdst = ipv6_all_zeros;
2065 }
2066 mutex_exit(&connp->conn_lock);
2067 }
2068 if (coa->coa_changed & COA_RCVBUF_CHANGED) {
2069 (void) proto_set_rx_hiwat(connp->conn_rq, connp,
2070 connp->conn_rcvbuf);
2071 }
2072 if ((coa->coa_changed & COA_SNDBUF_CHANGED) && !IPCL_IS_NONSTR(connp)) {
2073 connp->conn_wq->q_hiwat = connp->conn_sndbuf;
2074 }
2075 if (coa->coa_changed & COA_WROFF_CHANGED) {
2076 /* Increase wroff if needed */
2077 uint_t wroff;
2078
2079 mutex_enter(&connp->conn_lock);
2080 wroff = connp->conn_ht_iphc_allocated + us->us_wroff_extra;
2081 if (udp->udp_nat_t_endpoint)
2082 wroff += sizeof (uint32_t);
2083 if (wroff > connp->conn_wroff) {
2084 connp->conn_wroff = wroff;
2085 mutex_exit(&connp->conn_lock);
2086 (void) proto_set_tx_wroff(connp->conn_rq, connp, wroff);
2087 } else {
2088 mutex_exit(&connp->conn_lock);
2089 }
2090 }
2091 return (err);
2092 }
2093
2094 /* This routine sets socket options. */
2095 int
2096 udp_tpi_opt_set(queue_t *q, uint_t optset_context, int level, int name,
2097 uint_t inlen, uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp,
2098 void *thisdg_attrs, cred_t *cr)
2099 {
2100 conn_t *connp = Q_TO_CONN(q);
2101 int error;
2102
2103 error = udp_opt_set(connp, optset_context, level, name, inlen, invalp,
2104 outlenp, outvalp, thisdg_attrs, cr);
2105 return (error);
2106 }
2107
2108 /*
2109 * Setup IP and UDP headers.
2110 * Returns NULL on allocation failure, in which case data_mp is freed.
2111 */
2112 mblk_t *
2113 udp_prepend_hdr(conn_t *connp, ip_xmit_attr_t *ixa, const ip_pkt_t *ipp,
2114 const in6_addr_t *v6src, const in6_addr_t *v6dst, in_port_t dstport,
2115 uint32_t flowinfo, mblk_t *data_mp, int *errorp)
2116 {
2117 mblk_t *mp;
2118 udpha_t *udpha;
2119 udp_stack_t *us = connp->conn_netstack->netstack_udp;
2120 uint_t data_len;
2121 uint32_t cksum;
2122 udp_t *udp = connp->conn_udp;
2123 boolean_t insert_spi = udp->udp_nat_t_endpoint;
2124 boolean_t hash_srcport = udp->udp_vxlanhash;
2125 uint_t ulp_hdr_len;
2126 uint16_t srcport;
2127
2128 data_len = msgdsize(data_mp);
2129 ulp_hdr_len = UDPH_SIZE;
2130 if (insert_spi)
2131 ulp_hdr_len += sizeof (uint32_t);
2132
2133 /*
2134 * If we have source port hashing going on, determine the hash before
2135 * we modify the mblk_t.
2136 */
2137 if (hash_srcport == B_TRUE) {
2138 srcport = udp_srcport_hash(mp, UDP_HASH_VXLAN,
2139 IPPORT_DYNAMIC_MIN, IPPORT_DYNAMIC_MAX,
2140 ntohs(connp->conn_lport));
2141 }
2142
2143 mp = conn_prepend_hdr(ixa, ipp, v6src, v6dst, IPPROTO_UDP, flowinfo,
2144 ulp_hdr_len, data_mp, data_len, us->us_wroff_extra, &cksum, errorp);
2145 if (mp == NULL) {
2146 ASSERT(*errorp != 0);
2147 return (NULL);
2148 }
2149
2150 data_len += ulp_hdr_len;
2151 ixa->ixa_pktlen = data_len + ixa->ixa_ip_hdr_length;
2152
2153 udpha = (udpha_t *)(mp->b_rptr + ixa->ixa_ip_hdr_length);
2154 if (hash_srcport == B_TRUE) {
2155 udpha->uha_src_port = htons(srcport);
2156 } else {
2157 udpha->uha_src_port = connp->conn_lport;
2158 }
2159 udpha->uha_dst_port = dstport;
2160 udpha->uha_checksum = 0;
2161 udpha->uha_length = htons(data_len);
2162
2163 /*
2164 * If there was a routing option/header then conn_prepend_hdr
2165 * has massaged it and placed the pseudo-header checksum difference
2166 * in the cksum argument.
2167 *
2168 * Setup header length and prepare for ULP checksum done in IP.
2169 *
2170 * We make it easy for IP to include our pseudo header
2171 * by putting our length in uha_checksum.
2172 * The IP source, destination, and length have already been set by
2173 * conn_prepend_hdr.
2174 */
2175 cksum += data_len;
2176 cksum = (cksum >> 16) + (cksum & 0xFFFF);
2177 ASSERT(cksum < 0x10000);
2178
2179 if (ixa->ixa_flags & IXAF_IS_IPV4) {
2180 ipha_t *ipha = (ipha_t *)mp->b_rptr;
2181
2182 ASSERT(ntohs(ipha->ipha_length) == ixa->ixa_pktlen);
2183
2184 /* IP does the checksum if uha_checksum is non-zero */
2185 if (us->us_do_checksum) {
2186 if (cksum == 0)
2187 udpha->uha_checksum = 0xffff;
2188 else
2189 udpha->uha_checksum = htons(cksum);
2190 } else {
2191 udpha->uha_checksum = 0;
2192 }
2193 } else {
2194 ip6_t *ip6h = (ip6_t *)mp->b_rptr;
2195
2196 ASSERT(ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN == ixa->ixa_pktlen);
2197 if (cksum == 0)
2198 udpha->uha_checksum = 0xffff;
2199 else
2200 udpha->uha_checksum = htons(cksum);
2201 }
2202
2203 /* Insert all-0s SPI now. */
2204 if (insert_spi)
2205 *((uint32_t *)(udpha + 1)) = 0;
2206
2207 return (mp);
2208 }
2209
2210 static int
2211 udp_build_hdr_template(conn_t *connp, const in6_addr_t *v6src,
2212 const in6_addr_t *v6dst, in_port_t dstport, uint32_t flowinfo)
2213 {
2214 udpha_t *udpha;
2215 int error;
2216
2217 ASSERT(MUTEX_HELD(&connp->conn_lock));
2218 /*
2219 * We clear lastdst to make sure we don't use the lastdst path
2220 * next time sending since we might not have set v6dst yet.
2221 */
2222 connp->conn_v6lastdst = ipv6_all_zeros;
2223
2224 error = conn_build_hdr_template(connp, UDPH_SIZE, 0, v6src, v6dst,
2225 flowinfo);
2226 if (error != 0)
2227 return (error);
2228
2229 /*
2230 * Any routing header/option has been massaged. The checksum difference
2231 * is stored in conn_sum.
2232 */
2233 udpha = (udpha_t *)connp->conn_ht_ulp;
2234 udpha->uha_src_port = connp->conn_lport;
2235 udpha->uha_dst_port = dstport;
2236 udpha->uha_checksum = 0;
2237 udpha->uha_length = htons(UDPH_SIZE); /* Filled in later */
2238 return (0);
2239 }
2240
2241 static mblk_t *
2242 udp_queue_fallback(udp_t *udp, mblk_t *mp)
2243 {
2244 ASSERT(MUTEX_HELD(&udp->udp_recv_lock));
2245 if (IPCL_IS_NONSTR(udp->udp_connp)) {
2246 /*
2247 * fallback has started but messages have not been moved yet
2248 */
2249 if (udp->udp_fallback_queue_head == NULL) {
2250 ASSERT(udp->udp_fallback_queue_tail == NULL);
2251 udp->udp_fallback_queue_head = mp;
2252 udp->udp_fallback_queue_tail = mp;
2253 } else {
2254 ASSERT(udp->udp_fallback_queue_tail != NULL);
2255 udp->udp_fallback_queue_tail->b_next = mp;
2256 udp->udp_fallback_queue_tail = mp;
2257 }
2258 return (NULL);
2259 } else {
2260 /*
2261 * Fallback completed, let the caller putnext() the mblk.
2262 */
2263 return (mp);
2264 }
2265 }
2266
2267 /*
2268 * Deliver data to ULP. In case we have a socket, and it's falling back to
2269 * TPI, then we'll queue the mp for later processing.
2270 */
2271 static void
2272 udp_ulp_recv(conn_t *connp, mblk_t *mp, uint_t len, ip_recv_attr_t *ira)
2273 {
2274 if (IPCL_IS_NONSTR(connp)) {
2275 udp_t *udp = connp->conn_udp;
2276 int error;
2277
2278 ASSERT(len == msgdsize(mp));
2279 if ((*connp->conn_upcalls->su_recv)
2280 (connp->conn_upper_handle, mp, len, 0, &error, NULL) < 0) {
2281 mutex_enter(&udp->udp_recv_lock);
2282 if (error == ENOSPC) {
2283 /*
2284 * let's confirm while holding the lock
2285 */
2286 if ((*connp->conn_upcalls->su_recv)
2287 (connp->conn_upper_handle, NULL, 0, 0,
2288 &error, NULL) < 0) {
2289 ASSERT(error == ENOSPC);
2290 if (error == ENOSPC) {
2291 connp->conn_flow_cntrld =
2292 B_TRUE;
2293 }
2294 }
2295 mutex_exit(&udp->udp_recv_lock);
2296 } else {
2297 ASSERT(error == EOPNOTSUPP);
2298 mp = udp_queue_fallback(udp, mp);
2299 mutex_exit(&udp->udp_recv_lock);
2300 if (mp != NULL)
2301 putnext(connp->conn_rq, mp);
2302 }
2303 }
2304 ASSERT(MUTEX_NOT_HELD(&udp->udp_recv_lock));
2305 } else {
2306 if (is_system_labeled()) {
2307 ASSERT(ira->ira_cred != NULL);
2308 /*
2309 * Provide for protocols above UDP such as RPC
2310 * NOPID leaves db_cpid unchanged.
2311 */
2312 mblk_setcred(mp, ira->ira_cred, NOPID);
2313 }
2314
2315 putnext(connp->conn_rq, mp);
2316 }
2317 }
2318
2319 /*
2320 * This is the inbound data path.
2321 * IP has already pulled up the IP plus UDP headers and verified alignment
2322 * etc.
2323 */
2324 /* ARGSUSED2 */
2325 static void
2326 udp_input(void *arg1, mblk_t *mp, void *arg2, ip_recv_attr_t *ira)
2327 {
2328 conn_t *connp = (conn_t *)arg1;
2329 struct T_unitdata_ind *tudi;
2330 uchar_t *rptr; /* Pointer to IP header */
2331 int hdr_length; /* Length of IP+UDP headers */
2332 int udi_size; /* Size of T_unitdata_ind */
2333 int pkt_len;
2334 udp_t *udp;
2335 udpha_t *udpha;
2336 ip_pkt_t ipps;
2337 ip6_t *ip6h;
2338 mblk_t *mp1;
2339 uint32_t udp_ipv4_options_len;
2340 crb_t recv_ancillary;
2341 udp_stack_t *us;
2342
2343 ASSERT(connp->conn_flags & IPCL_UDPCONN);
2344
2345 udp = connp->conn_udp;
2346 us = udp->udp_us;
2347 rptr = mp->b_rptr;
2348
2349 ASSERT(DB_TYPE(mp) == M_DATA);
2350 ASSERT(OK_32PTR(rptr));
2351 ASSERT(ira->ira_pktlen == msgdsize(mp));
2352 pkt_len = ira->ira_pktlen;
2353
2354 /*
2355 * Get a snapshot of these and allow other threads to change
2356 * them after that. We need the same recv_ancillary when determining
2357 * the size as when adding the ancillary data items.
2358 */
2359 mutex_enter(&connp->conn_lock);
2360 udp_ipv4_options_len = udp->udp_recv_ipp.ipp_ipv4_options_len;
2361 recv_ancillary = connp->conn_recv_ancillary;
2362 mutex_exit(&connp->conn_lock);
2363
2364 hdr_length = ira->ira_ip_hdr_length;
2365
2366 /*
2367 * IP inspected the UDP header thus all of it must be in the mblk.
2368 * UDP length check is performed for IPv6 packets and IPv4 packets
2369 * to check if the size of the packet as specified
2370 * by the UDP header is the same as the length derived from the IP
2371 * header.
2372 */
2373 udpha = (udpha_t *)(rptr + hdr_length);
2374 if (pkt_len != ntohs(udpha->uha_length) + hdr_length)
2375 goto tossit;
2376
2377 hdr_length += UDPH_SIZE;
2378 ASSERT(MBLKL(mp) >= hdr_length); /* IP did a pullup */
2379
2380 /* Initialize regardless of IP version */
2381 ipps.ipp_fields = 0;
2382
2383 if (((ira->ira_flags & IRAF_IPV4_OPTIONS) ||
2384 udp_ipv4_options_len > 0) &&
2385 connp->conn_family == AF_INET) {
2386 int err;
2387
2388 /*
2389 * Record/update udp_recv_ipp with the lock
2390 * held. Not needed for AF_INET6 sockets
2391 * since they don't support a getsockopt of IP_OPTIONS.
2392 */
2393 mutex_enter(&connp->conn_lock);
2394 err = ip_find_hdr_v4((ipha_t *)rptr, &udp->udp_recv_ipp,
2395 B_TRUE);
2396 if (err != 0) {
2397 /* Allocation failed. Drop packet */
2398 mutex_exit(&connp->conn_lock);
2399 freemsg(mp);
2400 UDPS_BUMP_MIB(us, udpInErrors);
2401 return;
2402 }
2403 mutex_exit(&connp->conn_lock);
2404 }
2405
2406 if (recv_ancillary.crb_all != 0) {
2407 /*
2408 * Record packet information in the ip_pkt_t
2409 */
2410 if (ira->ira_flags & IRAF_IS_IPV4) {
2411 ASSERT(IPH_HDR_VERSION(rptr) == IPV4_VERSION);
2412 ASSERT(MBLKL(mp) >= sizeof (ipha_t));
2413 ASSERT(((ipha_t *)rptr)->ipha_protocol == IPPROTO_UDP);
2414 ASSERT(ira->ira_ip_hdr_length == IPH_HDR_LENGTH(rptr));
2415
2416 (void) ip_find_hdr_v4((ipha_t *)rptr, &ipps, B_FALSE);
2417 } else {
2418 uint8_t nexthdrp;
2419
2420 ASSERT(IPH_HDR_VERSION(rptr) == IPV6_VERSION);
2421 /*
2422 * IPv6 packets can only be received by applications
2423 * that are prepared to receive IPv6 addresses.
2424 * The IP fanout must ensure this.
2425 */
2426 ASSERT(connp->conn_family == AF_INET6);
2427
2428 ip6h = (ip6_t *)rptr;
2429
2430 /* We don't care about the length, but need the ipp */
2431 hdr_length = ip_find_hdr_v6(mp, ip6h, B_TRUE, &ipps,
2432 &nexthdrp);
2433 ASSERT(hdr_length == ira->ira_ip_hdr_length);
2434 /* Restore */
2435 hdr_length = ira->ira_ip_hdr_length + UDPH_SIZE;
2436 ASSERT(nexthdrp == IPPROTO_UDP);
2437 }
2438 }
2439
2440 /*
2441 * This is the inbound data path. Packets are passed upstream as
2442 * T_UNITDATA_IND messages.
2443 */
2444 if (connp->conn_family == AF_INET) {
2445 sin_t *sin;
2446
2447 ASSERT(IPH_HDR_VERSION((ipha_t *)rptr) == IPV4_VERSION);
2448
2449 /*
2450 * Normally only send up the source address.
2451 * If any ancillary data items are wanted we add those.
2452 */
2453 udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin_t);
2454 if (recv_ancillary.crb_all != 0) {
2455 udi_size += conn_recvancillary_size(connp,
2456 recv_ancillary, ira, mp, &ipps);
2457 }
2458
2459 /* Allocate a message block for the T_UNITDATA_IND structure. */
2460 mp1 = allocb(udi_size, BPRI_MED);
2461 if (mp1 == NULL) {
2462 freemsg(mp);
2463 UDPS_BUMP_MIB(us, udpInErrors);
2464 return;
2465 }
2466 mp1->b_cont = mp;
2467 mp1->b_datap->db_type = M_PROTO;
2468 tudi = (struct T_unitdata_ind *)mp1->b_rptr;
2469 mp1->b_wptr = (uchar_t *)tudi + udi_size;
2470 tudi->PRIM_type = T_UNITDATA_IND;
2471 tudi->SRC_length = sizeof (sin_t);
2472 tudi->SRC_offset = sizeof (struct T_unitdata_ind);
2473 tudi->OPT_offset = sizeof (struct T_unitdata_ind) +
2474 sizeof (sin_t);
2475 udi_size -= (sizeof (struct T_unitdata_ind) + sizeof (sin_t));
2476 tudi->OPT_length = udi_size;
2477 sin = (sin_t *)&tudi[1];
2478 sin->sin_addr.s_addr = ((ipha_t *)rptr)->ipha_src;
2479 sin->sin_port = udpha->uha_src_port;
2480 sin->sin_family = connp->conn_family;
2481 *(uint32_t *)&sin->sin_zero[0] = 0;
2482 *(uint32_t *)&sin->sin_zero[4] = 0;
2483
2484 /*
2485 * Add options if IP_RECVDSTADDR, IP_RECVIF, IP_RECVSLLA or
2486 * IP_RECVTTL has been set.
2487 */
2488 if (udi_size != 0) {
2489 conn_recvancillary_add(connp, recv_ancillary, ira,
2490 &ipps, (uchar_t *)&sin[1], udi_size);
2491 }
2492 } else {
2493 sin6_t *sin6;
2494
2495 /*
2496 * Handle both IPv4 and IPv6 packets for IPv6 sockets.
2497 *
2498 * Normally we only send up the address. If receiving of any
2499 * optional receive side information is enabled, we also send
2500 * that up as options.
2501 */
2502 udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin6_t);
2503
2504 if (recv_ancillary.crb_all != 0) {
2505 udi_size += conn_recvancillary_size(connp,
2506 recv_ancillary, ira, mp, &ipps);
2507 }
2508
2509 mp1 = allocb(udi_size, BPRI_MED);
2510 if (mp1 == NULL) {
2511 freemsg(mp);
2512 UDPS_BUMP_MIB(us, udpInErrors);
2513 return;
2514 }
2515 mp1->b_cont = mp;
2516 mp1->b_datap->db_type = M_PROTO;
2517 tudi = (struct T_unitdata_ind *)mp1->b_rptr;
2518 mp1->b_wptr = (uchar_t *)tudi + udi_size;
2519 tudi->PRIM_type = T_UNITDATA_IND;
2520 tudi->SRC_length = sizeof (sin6_t);
2521 tudi->SRC_offset = sizeof (struct T_unitdata_ind);
2522 tudi->OPT_offset = sizeof (struct T_unitdata_ind) +
2523 sizeof (sin6_t);
2524 udi_size -= (sizeof (struct T_unitdata_ind) + sizeof (sin6_t));
2525 tudi->OPT_length = udi_size;
2526 sin6 = (sin6_t *)&tudi[1];
2527 if (ira->ira_flags & IRAF_IS_IPV4) {
2528 in6_addr_t v6dst;
2529
2530 IN6_IPADDR_TO_V4MAPPED(((ipha_t *)rptr)->ipha_src,
2531 &sin6->sin6_addr);
2532 IN6_IPADDR_TO_V4MAPPED(((ipha_t *)rptr)->ipha_dst,
2533 &v6dst);
2534 sin6->sin6_flowinfo = 0;
2535 sin6->sin6_scope_id = 0;
2536 sin6->__sin6_src_id = ip_srcid_find_addr(&v6dst,
2537 IPCL_ZONEID(connp), us->us_netstack);
2538 } else {
2539 ip6h = (ip6_t *)rptr;
2540
2541 sin6->sin6_addr = ip6h->ip6_src;
2542 /* No sin6_flowinfo per API */
2543 sin6->sin6_flowinfo = 0;
2544 /* For link-scope pass up scope id */
2545 if (IN6_IS_ADDR_LINKSCOPE(&ip6h->ip6_src))
2546 sin6->sin6_scope_id = ira->ira_ruifindex;
2547 else
2548 sin6->sin6_scope_id = 0;
2549 sin6->__sin6_src_id = ip_srcid_find_addr(
2550 &ip6h->ip6_dst, IPCL_ZONEID(connp),
2551 us->us_netstack);
2552 }
2553 sin6->sin6_port = udpha->uha_src_port;
2554 sin6->sin6_family = connp->conn_family;
2555
2556 if (udi_size != 0) {
2557 conn_recvancillary_add(connp, recv_ancillary, ira,
2558 &ipps, (uchar_t *)&sin6[1], udi_size);
2559 }
2560 }
2561
2562 /*
2563 * DTrace this UDP input as udp:::receive (this is for IPv4, IPv6 and
2564 * loopback traffic).
2565 */
2566 DTRACE_UDP5(receive, mblk_t *, NULL, ip_xmit_attr_t *, connp->conn_ixa,
2567 void_ip_t *, rptr, udp_t *, udp, udpha_t *, udpha);
2568
2569 /* Walk past the headers unless IP_RECVHDR was set. */
2570 if (!udp->udp_rcvhdr) {
2571 mp->b_rptr = rptr + hdr_length;
2572 pkt_len -= hdr_length;
2573 }
2574
2575 UDPS_BUMP_MIB(us, udpHCInDatagrams);
2576 udp_ulp_recv(connp, mp1, pkt_len, ira);
2577 return;
2578
2579 tossit:
2580 freemsg(mp);
2581 UDPS_BUMP_MIB(us, udpInErrors);
2582 }
2583
2584 /*
2585 * This routine creates a T_UDERROR_IND message and passes it upstream.
2586 * The address and options are copied from the T_UNITDATA_REQ message
2587 * passed in mp. This message is freed.
2588 */
2589 static void
2590 udp_ud_err(queue_t *q, mblk_t *mp, t_scalar_t err)
2591 {
2592 struct T_unitdata_req *tudr;
2593 mblk_t *mp1;
2594 uchar_t *destaddr;
2595 t_scalar_t destlen;
2596 uchar_t *optaddr;
2597 t_scalar_t optlen;
2598
2599 if ((mp->b_wptr < mp->b_rptr) ||
2600 (MBLKL(mp)) < sizeof (struct T_unitdata_req)) {
2601 goto done;
2602 }
2603 tudr = (struct T_unitdata_req *)mp->b_rptr;
2604 destaddr = mp->b_rptr + tudr->DEST_offset;
2605 if (destaddr < mp->b_rptr || destaddr >= mp->b_wptr ||
2606 destaddr + tudr->DEST_length < mp->b_rptr ||
2607 destaddr + tudr->DEST_length > mp->b_wptr) {
2608 goto done;
2609 }
2610 optaddr = mp->b_rptr + tudr->OPT_offset;
2611 if (optaddr < mp->b_rptr || optaddr >= mp->b_wptr ||
2612 optaddr + tudr->OPT_length < mp->b_rptr ||
2613 optaddr + tudr->OPT_length > mp->b_wptr) {
2614 goto done;
2615 }
2616 destlen = tudr->DEST_length;
2617 optlen = tudr->OPT_length;
2618
2619 mp1 = mi_tpi_uderror_ind((char *)destaddr, destlen,
2620 (char *)optaddr, optlen, err);
2621 if (mp1 != NULL)
2622 qreply(q, mp1);
2623
2624 done:
2625 freemsg(mp);
2626 }
2627
2628 /*
2629 * This routine removes a port number association from a stream. It
2630 * is called by udp_wput to handle T_UNBIND_REQ messages.
2631 */
2632 static void
2633 udp_tpi_unbind(queue_t *q, mblk_t *mp)
2634 {
2635 conn_t *connp = Q_TO_CONN(q);
2636 int error;
2637
2638 error = udp_do_unbind(connp);
2639 if (error) {
2640 if (error < 0)
2641 udp_err_ack(q, mp, -error, 0);
2642 else
2643 udp_err_ack(q, mp, TSYSERR, error);
2644 return;
2645 }
2646
2647 mp = mi_tpi_ok_ack_alloc(mp);
2648 ASSERT(mp != NULL);
2649 ASSERT(((struct T_ok_ack *)mp->b_rptr)->PRIM_type == T_OK_ACK);
2650 qreply(q, mp);
2651 }
2652
2653 /*
2654 * Don't let port fall into the privileged range.
2655 * Since the extra privileged ports can be arbitrary we also
2656 * ensure that we exclude those from consideration.
2657 * us->us_epriv_ports is not sorted thus we loop over it until
2658 * there are no changes.
2659 */
2660 static in_port_t
2661 udp_update_next_port(udp_t *udp, in_port_t port, boolean_t random)
2662 {
2663 int i, bump;
2664 in_port_t nextport;
2665 boolean_t restart = B_FALSE;
2666 udp_stack_t *us = udp->udp_us;
2667
2668 if (random && udp_random_anon_port != 0) {
2669 (void) random_get_pseudo_bytes((uint8_t *)&port,
2670 sizeof (in_port_t));
2671 /*
2672 * Unless changed by a sys admin, the smallest anon port
2673 * is 32768 and the largest anon port is 65535. It is
2674 * very likely (50%) for the random port to be smaller
2675 * than the smallest anon port. When that happens,
2676 * add port % (anon port range) to the smallest anon
2677 * port to get the random port. It should fall into the
2678 * valid anon port range.
2679 */
2680 if ((port < us->us_smallest_anon_port) ||
2681 (port > us->us_largest_anon_port)) {
2682 if (us->us_smallest_anon_port ==
2683 us->us_largest_anon_port) {
2684 bump = 0;
2685 } else {
2686 bump = port % (us->us_largest_anon_port -
2687 us->us_smallest_anon_port);
2688 }
2689
2690 port = us->us_smallest_anon_port + bump;
2691 }
2692 }
2693
2694 retry:
2695 if (port < us->us_smallest_anon_port)
2696 port = us->us_smallest_anon_port;
2697
2698 if (port > us->us_largest_anon_port) {
2699 port = us->us_smallest_anon_port;
2700 if (restart)
2701 return (0);
2702 restart = B_TRUE;
2703 }
2704
2705 if (port < us->us_smallest_nonpriv_port)
2706 port = us->us_smallest_nonpriv_port;
2707
2708 for (i = 0; i < us->us_num_epriv_ports; i++) {
2709 if (port == us->us_epriv_ports[i]) {
2710 port++;
2711 /*
2712 * Make sure that the port is in the
2713 * valid range.
2714 */
2715 goto retry;
2716 }
2717 }
2718
2719 if (is_system_labeled() &&
2720 (nextport = tsol_next_port(crgetzone(udp->udp_connp->conn_cred),
2721 port, IPPROTO_UDP, B_TRUE)) != 0) {
2722 port = nextport;
2723 goto retry;
2724 }
2725
2726 return (port);
2727 }
2728
2729 /*
2730 * Handle T_UNITDATA_REQ with options. Both IPv4 and IPv6
2731 * Either tudr_mp or msg is set. If tudr_mp we take ancillary data from
2732 * the TPI options, otherwise we take them from msg_control.
2733 * If both sin and sin6 is set it is a connected socket and we use conn_faddr.
2734 * Always consumes mp; never consumes tudr_mp.
2735 */
2736 static int
2737 udp_output_ancillary(conn_t *connp, sin_t *sin, sin6_t *sin6, mblk_t *mp,
2738 mblk_t *tudr_mp, struct nmsghdr *msg, cred_t *cr, pid_t pid)
2739 {
2740 udp_t *udp = connp->conn_udp;
2741 udp_stack_t *us = udp->udp_us;
2742 int error;
2743 ip_xmit_attr_t *ixa;
2744 ip_pkt_t *ipp;
2745 in6_addr_t v6src;
2746 in6_addr_t v6dst;
2747 in6_addr_t v6nexthop;
2748 in_port_t dstport;
2749 uint32_t flowinfo;
2750 uint_t srcid;
2751 int is_absreq_failure = 0;
2752 conn_opt_arg_t coas, *coa;
2753
2754 ASSERT(tudr_mp != NULL || msg != NULL);
2755
2756 /*
2757 * Get ixa before checking state to handle a disconnect race.
2758 *
2759 * We need an exclusive copy of conn_ixa since the ancillary data
2760 * options might modify it. That copy has no pointers hence we
2761 * need to set them up once we've parsed the ancillary data.
2762 */
2763 ixa = conn_get_ixa_exclusive(connp);
2764 if (ixa == NULL) {
2765 UDPS_BUMP_MIB(us, udpOutErrors);
2766 freemsg(mp);
2767 return (ENOMEM);
2768 }
2769 ASSERT(cr != NULL);
2770 ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED));
2771 ixa->ixa_cred = cr;
2772 ixa->ixa_cpid = pid;
2773 if (is_system_labeled()) {
2774 /* We need to restart with a label based on the cred */
2775 ip_xmit_attr_restore_tsl(ixa, ixa->ixa_cred);
2776 }
2777
2778 /* In case previous destination was multicast or multirt */
2779 ip_attr_newdst(ixa);
2780
2781 /* Get a copy of conn_xmit_ipp since the options might change it */
2782 ipp = kmem_zalloc(sizeof (*ipp), KM_NOSLEEP);
2783 if (ipp == NULL) {
2784 ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED));
2785 ixa->ixa_cred = connp->conn_cred; /* Restore */
2786 ixa->ixa_cpid = connp->conn_cpid;
2787 ixa_refrele(ixa);
2788 UDPS_BUMP_MIB(us, udpOutErrors);
2789 freemsg(mp);
2790 return (ENOMEM);
2791 }
2792 mutex_enter(&connp->conn_lock);
2793 error = ip_pkt_copy(&connp->conn_xmit_ipp, ipp, KM_NOSLEEP);
2794 mutex_exit(&connp->conn_lock);
2795 if (error != 0) {
2796 UDPS_BUMP_MIB(us, udpOutErrors);
2797 freemsg(mp);
2798 goto done;
2799 }
2800
2801 /*
2802 * Parse the options and update ixa and ipp as a result.
2803 * Note that ixa_tsl can be updated if SCM_UCRED.
2804 * ixa_refrele/ixa_inactivate will release any reference on ixa_tsl.
2805 */
2806
2807 coa = &coas;
2808 coa->coa_connp = connp;
2809 coa->coa_ixa = ixa;
2810 coa->coa_ipp = ipp;
2811 coa->coa_ancillary = B_TRUE;
2812 coa->coa_changed = 0;
2813
2814 if (msg != NULL) {
2815 error = process_auxiliary_options(connp, msg->msg_control,
2816 msg->msg_controllen, coa, &udp_opt_obj, udp_opt_set, cr);
2817 } else {
2818 struct T_unitdata_req *tudr;
2819
2820 tudr = (struct T_unitdata_req *)tudr_mp->b_rptr;
2821 ASSERT(tudr->PRIM_type == T_UNITDATA_REQ);
2822 error = tpi_optcom_buf(connp->conn_wq, tudr_mp,
2823 &tudr->OPT_length, tudr->OPT_offset, cr, &udp_opt_obj,
2824 coa, &is_absreq_failure);
2825 }
2826 if (error != 0) {
2827 /*
2828 * Note: No special action needed in this
2829 * module for "is_absreq_failure"
2830 */
2831 freemsg(mp);
2832 UDPS_BUMP_MIB(us, udpOutErrors);
2833 goto done;
2834 }
2835 ASSERT(is_absreq_failure == 0);
2836
2837 mutex_enter(&connp->conn_lock);
2838 /*
2839 * If laddr is unspecified then we look at sin6_src_id.
2840 * We will give precedence to a source address set with IPV6_PKTINFO
2841 * (aka IPPF_ADDR) but that is handled in build_hdrs. However, we don't
2842 * want ip_attr_connect to select a source (since it can fail) when
2843 * IPV6_PKTINFO is specified.
2844 * If this doesn't result in a source address then we get a source
2845 * from ip_attr_connect() below.
2846 */
2847 v6src = connp->conn_saddr_v6;
2848 if (sin != NULL) {
2849 IN6_IPADDR_TO_V4MAPPED(sin->sin_addr.s_addr, &v6dst);
2850 dstport = sin->sin_port;
2851 flowinfo = 0;
2852 ixa->ixa_flags &= ~IXAF_SCOPEID_SET;
2853 ixa->ixa_flags |= IXAF_IS_IPV4;
2854 } else if (sin6 != NULL) {
2855 boolean_t v4mapped;
2856
2857 v6dst = sin6->sin6_addr;
2858 dstport = sin6->sin6_port;
2859 flowinfo = sin6->sin6_flowinfo;
2860 srcid = sin6->__sin6_src_id;
2861 if (IN6_IS_ADDR_LINKSCOPE(&v6dst) && sin6->sin6_scope_id != 0) {
2862 ixa->ixa_scopeid = sin6->sin6_scope_id;
2863 ixa->ixa_flags |= IXAF_SCOPEID_SET;
2864 } else {
2865 ixa->ixa_flags &= ~IXAF_SCOPEID_SET;
2866 }
2867 v4mapped = IN6_IS_ADDR_V4MAPPED(&v6dst);
2868 if (v4mapped)
2869 ixa->ixa_flags |= IXAF_IS_IPV4;
2870 else
2871 ixa->ixa_flags &= ~IXAF_IS_IPV4;
2872 if (srcid != 0 && IN6_IS_ADDR_UNSPECIFIED(&v6src)) {
2873 if (!ip_srcid_find_id(srcid, &v6src, IPCL_ZONEID(connp),
2874 v4mapped, connp->conn_netstack)) {
2875 /* Mismatch - v4mapped/v6 specified by srcid. */
2876 mutex_exit(&connp->conn_lock);
2877 error = EADDRNOTAVAIL;
2878 goto failed; /* Does freemsg() and mib. */
2879 }
2880 }
2881 } else {
2882 /* Connected case */
2883 v6dst = connp->conn_faddr_v6;
2884 dstport = connp->conn_fport;
2885 flowinfo = connp->conn_flowinfo;
2886 }
2887 mutex_exit(&connp->conn_lock);
2888
2889 /* Handle IP_PKTINFO/IPV6_PKTINFO setting source address. */
2890 if (ipp->ipp_fields & IPPF_ADDR) {
2891 if (ixa->ixa_flags & IXAF_IS_IPV4) {
2892 if (IN6_IS_ADDR_V4MAPPED(&ipp->ipp_addr))
2893 v6src = ipp->ipp_addr;
2894 } else {
2895 if (!IN6_IS_ADDR_V4MAPPED(&ipp->ipp_addr))
2896 v6src = ipp->ipp_addr;
2897 }
2898 }
2899
2900 ip_attr_nexthop(ipp, ixa, &v6dst, &v6nexthop);
2901 error = ip_attr_connect(connp, ixa, &v6src, &v6dst, &v6nexthop, dstport,
2902 &v6src, NULL, IPDF_ALLOW_MCBC | IPDF_VERIFY_DST | IPDF_IPSEC);
2903
2904 switch (error) {
2905 case 0:
2906 break;
2907 case EADDRNOTAVAIL:
2908 /*
2909 * IXAF_VERIFY_SOURCE tells us to pick a better source.
2910 * Don't have the application see that errno
2911 */
2912 error = ENETUNREACH;
2913 goto failed;
2914 case ENETDOWN:
2915 /*
2916 * Have !ipif_addr_ready address; drop packet silently
2917 * until we can get applications to not send until we
2918 * are ready.
2919 */
2920 error = 0;
2921 goto failed;
2922 case EHOSTUNREACH:
2923 case ENETUNREACH:
2924 if (ixa->ixa_ire != NULL) {
2925 /*
2926 * Let conn_ip_output/ire_send_noroute return
2927 * the error and send any local ICMP error.
2928 */
2929 error = 0;
2930 break;
2931 }
2932 /* FALLTHRU */
2933 default:
2934 failed:
2935 freemsg(mp);
2936 UDPS_BUMP_MIB(us, udpOutErrors);
2937 goto done;
2938 }
2939
2940 /*
2941 * We might be going to a different destination than last time,
2942 * thus check that TX allows the communication and compute any
2943 * needed label.
2944 *
2945 * TSOL Note: We have an exclusive ipp and ixa for this thread so we
2946 * don't have to worry about concurrent threads.
2947 */
2948 if (is_system_labeled()) {
2949 /* Using UDP MLP requires SCM_UCRED from user */
2950 if (connp->conn_mlp_type != mlptSingle &&
2951 !((ixa->ixa_flags & IXAF_UCRED_TSL))) {
2952 UDPS_BUMP_MIB(us, udpOutErrors);
2953 error = ECONNREFUSED;
2954 freemsg(mp);
2955 goto done;
2956 }
2957 /*
2958 * Check whether Trusted Solaris policy allows communication
2959 * with this host, and pretend that the destination is
2960 * unreachable if not.
2961 * Compute any needed label and place it in ipp_label_v4/v6.
2962 *
2963 * Later conn_build_hdr_template/conn_prepend_hdr takes
2964 * ipp_label_v4/v6 to form the packet.
2965 *
2966 * Tsol note: We have ipp structure local to this thread so
2967 * no locking is needed.
2968 */
2969 error = conn_update_label(connp, ixa, &v6dst, ipp);
2970 if (error != 0) {
2971 freemsg(mp);
2972 UDPS_BUMP_MIB(us, udpOutErrors);
2973 goto done;
2974 }
2975 }
2976 mp = udp_prepend_hdr(connp, ixa, ipp, &v6src, &v6dst, dstport,
2977 flowinfo, mp, &error);
2978 if (mp == NULL) {
2979 ASSERT(error != 0);
2980 UDPS_BUMP_MIB(us, udpOutErrors);
2981 goto done;
2982 }
2983 if (ixa->ixa_pktlen > IP_MAXPACKET) {
2984 error = EMSGSIZE;
2985 UDPS_BUMP_MIB(us, udpOutErrors);
2986 freemsg(mp);
2987 goto done;
2988 }
2989 /* We're done. Pass the packet to ip. */
2990 UDPS_BUMP_MIB(us, udpHCOutDatagrams);
2991
2992 DTRACE_UDP5(send, mblk_t *, NULL, ip_xmit_attr_t *, ixa,
2993 void_ip_t *, mp->b_rptr, udp_t *, udp, udpha_t *,
2994 &mp->b_rptr[ixa->ixa_ip_hdr_length]);
2995
2996 error = conn_ip_output(mp, ixa);
2997 /* No udpOutErrors if an error since IP increases its error counter */
2998 switch (error) {
2999 case 0:
3000 break;
3001 case EWOULDBLOCK:
3002 (void) ixa_check_drain_insert(connp, ixa);
3003 error = 0;
3004 break;
3005 case EADDRNOTAVAIL:
3006 /*
3007 * IXAF_VERIFY_SOURCE tells us to pick a better source.
3008 * Don't have the application see that errno
3009 */
3010 error = ENETUNREACH;
3011 /* FALLTHRU */
3012 default:
3013 mutex_enter(&connp->conn_lock);
3014 /*
3015 * Clear the source and v6lastdst so we call ip_attr_connect
3016 * for the next packet and try to pick a better source.
3017 */
3018 if (connp->conn_mcbc_bind)
3019 connp->conn_saddr_v6 = ipv6_all_zeros;
3020 else
3021 connp->conn_saddr_v6 = connp->conn_bound_addr_v6;
3022 connp->conn_v6lastdst = ipv6_all_zeros;
3023 mutex_exit(&connp->conn_lock);
3024 break;
3025 }
3026 done:
3027 ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED));
3028 ixa->ixa_cred = connp->conn_cred; /* Restore */
3029 ixa->ixa_cpid = connp->conn_cpid;
3030 ixa_refrele(ixa);
3031 ip_pkt_free(ipp);
3032 kmem_free(ipp, sizeof (*ipp));
3033 return (error);
3034 }
3035
3036 /*
3037 * Handle sending an M_DATA for a connected socket.
3038 * Handles both IPv4 and IPv6.
3039 */
3040 static int
3041 udp_output_connected(conn_t *connp, mblk_t *mp, cred_t *cr, pid_t pid)
3042 {
3043 udp_t *udp = connp->conn_udp;
3044 udp_stack_t *us = udp->udp_us;
3045 int error;
3046 ip_xmit_attr_t *ixa;
3047
3048 /*
3049 * If no other thread is using conn_ixa this just gets a reference to
3050 * conn_ixa. Otherwise we get a safe copy of conn_ixa.
3051 */
3052 ixa = conn_get_ixa(connp, B_FALSE);
3053 if (ixa == NULL) {
3054 UDPS_BUMP_MIB(us, udpOutErrors);
3055 freemsg(mp);
3056 return (ENOMEM);
3057 }
3058
3059 ASSERT(cr != NULL);
3060 ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED));
3061 ixa->ixa_cred = cr;
3062 ixa->ixa_cpid = pid;
3063
3064 mutex_enter(&connp->conn_lock);
3065 mp = udp_prepend_header_template(connp, ixa, mp, &connp->conn_saddr_v6,
3066 connp->conn_fport, connp->conn_flowinfo, &error);
3067
3068 if (mp == NULL) {
3069 ASSERT(error != 0);
3070 mutex_exit(&connp->conn_lock);
3071 ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED));
3072 ixa->ixa_cred = connp->conn_cred; /* Restore */
3073 ixa->ixa_cpid = connp->conn_cpid;
3074 ixa_refrele(ixa);
3075 UDPS_BUMP_MIB(us, udpOutErrors);
3076 freemsg(mp);
3077 return (error);
3078 }
3079
3080 /*
3081 * In case we got a safe copy of conn_ixa, or if opt_set made us a new
3082 * safe copy, then we need to fill in any pointers in it.
3083 */
3084 if (ixa->ixa_ire == NULL) {
3085 in6_addr_t faddr, saddr;
3086 in6_addr_t nexthop;
3087 in_port_t fport;
3088
3089 saddr = connp->conn_saddr_v6;
3090 faddr = connp->conn_faddr_v6;
3091 fport = connp->conn_fport;
3092 ip_attr_nexthop(&connp->conn_xmit_ipp, ixa, &faddr, &nexthop);
3093 mutex_exit(&connp->conn_lock);
3094
3095 error = ip_attr_connect(connp, ixa, &saddr, &faddr, &nexthop,
3096 fport, NULL, NULL, IPDF_ALLOW_MCBC | IPDF_VERIFY_DST |
3097 IPDF_IPSEC);
3098 switch (error) {
3099 case 0:
3100 break;
3101 case EADDRNOTAVAIL:
3102 /*
3103 * IXAF_VERIFY_SOURCE tells us to pick a better source.
3104 * Don't have the application see that errno
3105 */
3106 error = ENETUNREACH;
3107 goto failed;
3108 case ENETDOWN:
3109 /*
3110 * Have !ipif_addr_ready address; drop packet silently
3111 * until we can get applications to not send until we
3112 * are ready.
3113 */
3114 error = 0;
3115 goto failed;
3116 case EHOSTUNREACH:
3117 case ENETUNREACH:
3118 if (ixa->ixa_ire != NULL) {
3119 /*
3120 * Let conn_ip_output/ire_send_noroute return
3121 * the error and send any local ICMP error.
3122 */
3123 error = 0;
3124 break;
3125 }
3126 /* FALLTHRU */
3127 default:
3128 failed:
3129 ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED));
3130 ixa->ixa_cred = connp->conn_cred; /* Restore */
3131 ixa->ixa_cpid = connp->conn_cpid;
3132 ixa_refrele(ixa);
3133 freemsg(mp);
3134 UDPS_BUMP_MIB(us, udpOutErrors);
3135 return (error);
3136 }
3137 } else {
3138 /* Done with conn_t */
3139 mutex_exit(&connp->conn_lock);
3140 }
3141 ASSERT(ixa->ixa_ire != NULL);
3142
3143 /* We're done. Pass the packet to ip. */
3144 UDPS_BUMP_MIB(us, udpHCOutDatagrams);
3145
3146 DTRACE_UDP5(send, mblk_t *, NULL, ip_xmit_attr_t *, ixa,
3147 void_ip_t *, mp->b_rptr, udp_t *, udp, udpha_t *,
3148 &mp->b_rptr[ixa->ixa_ip_hdr_length]);
3149
3150 error = conn_ip_output(mp, ixa);
3151 /* No udpOutErrors if an error since IP increases its error counter */
3152 switch (error) {
3153 case 0:
3154 break;
3155 case EWOULDBLOCK:
3156 (void) ixa_check_drain_insert(connp, ixa);
3157 error = 0;
3158 break;
3159 case EADDRNOTAVAIL:
3160 /*
3161 * IXAF_VERIFY_SOURCE tells us to pick a better source.
3162 * Don't have the application see that errno
3163 */
3164 error = ENETUNREACH;
3165 break;
3166 }
3167 ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED));
3168 ixa->ixa_cred = connp->conn_cred; /* Restore */
3169 ixa->ixa_cpid = connp->conn_cpid;
3170 ixa_refrele(ixa);
3171 return (error);
3172 }
3173
3174 /*
3175 * Handle sending an M_DATA to the last destination.
3176 * Handles both IPv4 and IPv6.
3177 *
3178 * NOTE: The caller must hold conn_lock and we drop it here.
3179 */
3180 static int
3181 udp_output_lastdst(conn_t *connp, mblk_t *mp, cred_t *cr, pid_t pid,
3182 ip_xmit_attr_t *ixa)
3183 {
3184 udp_t *udp = connp->conn_udp;
3185 udp_stack_t *us = udp->udp_us;
3186 int error;
3187
3188 ASSERT(MUTEX_HELD(&connp->conn_lock));
3189 ASSERT(ixa != NULL);
3190
3191 ASSERT(cr != NULL);
3192 ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED));
3193 ixa->ixa_cred = cr;
3194 ixa->ixa_cpid = pid;
3195
3196 mp = udp_prepend_header_template(connp, ixa, mp, &connp->conn_v6lastsrc,
3197 connp->conn_lastdstport, connp->conn_lastflowinfo, &error);
3198
3199 if (mp == NULL) {
3200 ASSERT(error != 0);
3201 mutex_exit(&connp->conn_lock);
3202 ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED));
3203 ixa->ixa_cred = connp->conn_cred; /* Restore */
3204 ixa->ixa_cpid = connp->conn_cpid;
3205 ixa_refrele(ixa);
3206 UDPS_BUMP_MIB(us, udpOutErrors);
3207 freemsg(mp);
3208 return (error);
3209 }
3210
3211 /*
3212 * In case we got a safe copy of conn_ixa, or if opt_set made us a new
3213 * safe copy, then we need to fill in any pointers in it.
3214 */
3215 if (ixa->ixa_ire == NULL) {
3216 in6_addr_t lastdst, lastsrc;
3217 in6_addr_t nexthop;
3218 in_port_t lastport;
3219
3220 lastsrc = connp->conn_v6lastsrc;
3221 lastdst = connp->conn_v6lastdst;
3222 lastport = connp->conn_lastdstport;
3223 ip_attr_nexthop(&connp->conn_xmit_ipp, ixa, &lastdst, &nexthop);
3224 mutex_exit(&connp->conn_lock);
3225
3226 error = ip_attr_connect(connp, ixa, &lastsrc, &lastdst,
3227 &nexthop, lastport, NULL, NULL, IPDF_ALLOW_MCBC |
3228 IPDF_VERIFY_DST | IPDF_IPSEC);
3229 switch (error) {
3230 case 0:
3231 break;
3232 case EADDRNOTAVAIL:
3233 /*
3234 * IXAF_VERIFY_SOURCE tells us to pick a better source.
3235 * Don't have the application see that errno
3236 */
3237 error = ENETUNREACH;
3238 goto failed;
3239 case ENETDOWN:
3240 /*
3241 * Have !ipif_addr_ready address; drop packet silently
3242 * until we can get applications to not send until we
3243 * are ready.
3244 */
3245 error = 0;
3246 goto failed;
3247 case EHOSTUNREACH:
3248 case ENETUNREACH:
3249 if (ixa->ixa_ire != NULL) {
3250 /*
3251 * Let conn_ip_output/ire_send_noroute return
3252 * the error and send any local ICMP error.
3253 */
3254 error = 0;
3255 break;
3256 }
3257 /* FALLTHRU */
3258 default:
3259 failed:
3260 ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED));
3261 ixa->ixa_cred = connp->conn_cred; /* Restore */
3262 ixa->ixa_cpid = connp->conn_cpid;
3263 ixa_refrele(ixa);
3264 freemsg(mp);
3265 UDPS_BUMP_MIB(us, udpOutErrors);
3266 return (error);
3267 }
3268 } else {
3269 /* Done with conn_t */
3270 mutex_exit(&connp->conn_lock);
3271 }
3272
3273 /* We're done. Pass the packet to ip. */
3274 UDPS_BUMP_MIB(us, udpHCOutDatagrams);
3275
3276 DTRACE_UDP5(send, mblk_t *, NULL, ip_xmit_attr_t *, ixa,
3277 void_ip_t *, mp->b_rptr, udp_t *, udp, udpha_t *,
3278 &mp->b_rptr[ixa->ixa_ip_hdr_length]);
3279
3280 error = conn_ip_output(mp, ixa);
3281 /* No udpOutErrors if an error since IP increases its error counter */
3282 switch (error) {
3283 case 0:
3284 break;
3285 case EWOULDBLOCK:
3286 (void) ixa_check_drain_insert(connp, ixa);
3287 error = 0;
3288 break;
3289 case EADDRNOTAVAIL:
3290 /*
3291 * IXAF_VERIFY_SOURCE tells us to pick a better source.
3292 * Don't have the application see that errno
3293 */
3294 error = ENETUNREACH;
3295 /* FALLTHRU */
3296 default:
3297 mutex_enter(&connp->conn_lock);
3298 /*
3299 * Clear the source and v6lastdst so we call ip_attr_connect
3300 * for the next packet and try to pick a better source.
3301 */
3302 if (connp->conn_mcbc_bind)
3303 connp->conn_saddr_v6 = ipv6_all_zeros;
3304 else
3305 connp->conn_saddr_v6 = connp->conn_bound_addr_v6;
3306 connp->conn_v6lastdst = ipv6_all_zeros;
3307 mutex_exit(&connp->conn_lock);
3308 break;
3309 }
3310 ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED));
3311 ixa->ixa_cred = connp->conn_cred; /* Restore */
3312 ixa->ixa_cpid = connp->conn_cpid;
3313 ixa_refrele(ixa);
3314 return (error);
3315 }
3316
3317
3318 /*
3319 * Prepend the header template and then fill in the source and
3320 * flowinfo. The caller needs to handle the destination address since
3321 * it's setting is different if rthdr or source route.
3322 *
3323 * Returns NULL is allocation failed or if the packet would exceed IP_MAXPACKET.
3324 * When it returns NULL it sets errorp.
3325 */
3326 static mblk_t *
3327 udp_prepend_header_template(conn_t *connp, ip_xmit_attr_t *ixa, mblk_t *mp,
3328 const in6_addr_t *v6src, in_port_t dstport, uint32_t flowinfo, int *errorp)
3329 {
3330 udp_t *udp = connp->conn_udp;
3331 udp_stack_t *us = udp->udp_us;
3332 boolean_t insert_spi = udp->udp_nat_t_endpoint;
3333 boolean_t hash_srcport = udp->udp_vxlanhash;
3334 uint_t pktlen;
3335 uint_t alloclen;
3336 uint_t copylen;
3337 uint8_t *iph;
3338 uint_t ip_hdr_length;
3339 udpha_t *udpha;
3340 uint32_t cksum;
3341 ip_pkt_t *ipp;
3342 uint16_t srcport;
3343
3344 ASSERT(MUTEX_HELD(&connp->conn_lock));
3345
3346 /*
3347 * If we have source port hashing going on, determine the hash before
3348 * we modify the mblk_t.
3349 */
3350 if (hash_srcport == B_TRUE) {
3351 srcport = udp_srcport_hash(mp, UDP_HASH_VXLAN,
3352 IPPORT_DYNAMIC_MIN, IPPORT_DYNAMIC_MAX,
3353 ntohs(connp->conn_lport));
3354 }
3355
3356 /*
3357 * Copy the header template and leave space for an SPI
3358 */
3359 copylen = connp->conn_ht_iphc_len;
3360 alloclen = copylen + (insert_spi ? sizeof (uint32_t) : 0);
3361 pktlen = alloclen + msgdsize(mp);
3362 if (pktlen > IP_MAXPACKET) {
3363 freemsg(mp);
3364 *errorp = EMSGSIZE;
3365 return (NULL);
3366 }
3367 ixa->ixa_pktlen = pktlen;
3368
3369 /* check/fix buffer config, setup pointers into it */
3370 iph = mp->b_rptr - alloclen;
3371 if (DB_REF(mp) != 1 || iph < DB_BASE(mp) || !OK_32PTR(iph)) {
3372 mblk_t *mp1;
3373
3374 mp1 = allocb(alloclen + us->us_wroff_extra, BPRI_MED);
3375 if (mp1 == NULL) {
3376 freemsg(mp);
3377 *errorp = ENOMEM;
3378 return (NULL);
3379 }
3380 mp1->b_wptr = DB_LIM(mp1);
3381 mp1->b_cont = mp;
3382 mp = mp1;
3383 iph = (mp->b_wptr - alloclen);
3384 }
3385 mp->b_rptr = iph;
3386 bcopy(connp->conn_ht_iphc, iph, copylen);
3387 ip_hdr_length = (uint_t)(connp->conn_ht_ulp - connp->conn_ht_iphc);
3388
3389 ixa->ixa_ip_hdr_length = ip_hdr_length;
3390 udpha = (udpha_t *)(iph + ip_hdr_length);
3391
3392 /*
3393 * Setup header length and prepare for ULP checksum done in IP.
3394 * udp_build_hdr_template has already massaged any routing header
3395 * and placed the result in conn_sum.
3396 *
3397 * We make it easy for IP to include our pseudo header
3398 * by putting our length in uha_checksum.
3399 */
3400 cksum = pktlen - ip_hdr_length;
3401 udpha->uha_length = htons(cksum);
3402
3403 cksum += connp->conn_sum;
3404 cksum = (cksum >> 16) + (cksum & 0xFFFF);
3405 ASSERT(cksum < 0x10000);
3406
3407 ipp = &connp->conn_xmit_ipp;
3408 if (ixa->ixa_flags & IXAF_IS_IPV4) {
3409 ipha_t *ipha = (ipha_t *)iph;
3410
3411 ipha->ipha_length = htons((uint16_t)pktlen);
3412
3413 /* IP does the checksum if uha_checksum is non-zero */
3414 if (us->us_do_checksum)
3415 udpha->uha_checksum = htons(cksum);
3416
3417 /* if IP_PKTINFO specified an addres it wins over bind() */
3418 if ((ipp->ipp_fields & IPPF_ADDR) &&
3419 IN6_IS_ADDR_V4MAPPED(&ipp->ipp_addr)) {
3420 ASSERT(ipp->ipp_addr_v4 != INADDR_ANY);
3421 ipha->ipha_src = ipp->ipp_addr_v4;
3422 } else {
3423 IN6_V4MAPPED_TO_IPADDR(v6src, ipha->ipha_src);
3424 }
3425 } else {
3426 ip6_t *ip6h = (ip6_t *)iph;
3427
3428 ip6h->ip6_plen = htons((uint16_t)(pktlen - IPV6_HDR_LEN));
3429 udpha->uha_checksum = htons(cksum);
3430
3431 /* if IP_PKTINFO specified an addres it wins over bind() */
3432 if ((ipp->ipp_fields & IPPF_ADDR) &&
3433 !IN6_IS_ADDR_V4MAPPED(&ipp->ipp_addr)) {
3434 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&ipp->ipp_addr));
3435 ip6h->ip6_src = ipp->ipp_addr;
3436 } else {
3437 ip6h->ip6_src = *v6src;
3438 }
3439 ip6h->ip6_vcf =
3440 (IPV6_DEFAULT_VERS_AND_FLOW & IPV6_VERS_AND_FLOW_MASK) |
3441 (flowinfo & ~IPV6_VERS_AND_FLOW_MASK);
3442 if (ipp->ipp_fields & IPPF_TCLASS) {
3443 /* Overrides the class part of flowinfo */
3444 ip6h->ip6_vcf = IPV6_TCLASS_FLOW(ip6h->ip6_vcf,
3445 ipp->ipp_tclass);
3446 }
3447 }
3448
3449 /* Insert all-0s SPI now. */
3450 if (insert_spi)
3451 *((uint32_t *)(udpha + 1)) = 0;
3452
3453 udpha->uha_dst_port = dstport;
3454 if (hash_srcport == B_TRUE)
3455 udpha->uha_src_port = htons(srcport);
3456
3457 return (mp);
3458 }
3459
3460 /*
3461 * Send a T_UDERR_IND in response to an M_DATA
3462 */
3463 static void
3464 udp_ud_err_connected(conn_t *connp, t_scalar_t error)
3465 {
3466 struct sockaddr_storage ss;
3467 sin_t *sin;
3468 sin6_t *sin6;
3469 struct sockaddr *addr;
3470 socklen_t addrlen;
3471 mblk_t *mp1;
3472
3473 mutex_enter(&connp->conn_lock);
3474 /* Initialize addr and addrlen as if they're passed in */
3475 if (connp->conn_family == AF_INET) {
3476 sin = (sin_t *)&ss;
3477 *sin = sin_null;
3478 sin->sin_family = AF_INET;
3479 sin->sin_port = connp->conn_fport;
3480 sin->sin_addr.s_addr = connp->conn_faddr_v4;
3481 addr = (struct sockaddr *)sin;
3482 addrlen = sizeof (*sin);
3483 } else {
3484 sin6 = (sin6_t *)&ss;
3485 *sin6 = sin6_null;
3486 sin6->sin6_family = AF_INET6;
3487 sin6->sin6_port = connp->conn_fport;
3488 sin6->sin6_flowinfo = connp->conn_flowinfo;
3489 sin6->sin6_addr = connp->conn_faddr_v6;
3490 if (IN6_IS_ADDR_LINKSCOPE(&connp->conn_faddr_v6) &&
3491 (connp->conn_ixa->ixa_flags & IXAF_SCOPEID_SET)) {
3492 sin6->sin6_scope_id = connp->conn_ixa->ixa_scopeid;
3493 } else {
3494 sin6->sin6_scope_id = 0;
3495 }
3496 sin6->__sin6_src_id = 0;
3497 addr = (struct sockaddr *)sin6;
3498 addrlen = sizeof (*sin6);
3499 }
3500 mutex_exit(&connp->conn_lock);
3501
3502 mp1 = mi_tpi_uderror_ind((char *)addr, addrlen, NULL, 0, error);
3503 if (mp1 != NULL)
3504 putnext(connp->conn_rq, mp1);
3505 }
3506
3507 /*
3508 * This routine handles all messages passed downstream. It either
3509 * consumes the message or passes it downstream; it never queues a
3510 * a message.
3511 *
3512 * Also entry point for sockfs when udp is in "direct sockfs" mode. This mode
3513 * is valid when we are directly beneath the stream head, and thus sockfs
3514 * is able to bypass STREAMS and directly call us, passing along the sockaddr
3515 * structure without the cumbersome T_UNITDATA_REQ interface for the case of
3516 * connected endpoints.
3517 */
3518 void
3519 udp_wput(queue_t *q, mblk_t *mp)
3520 {
3521 sin6_t *sin6;
3522 sin_t *sin = NULL;
3523 uint_t srcid;
3524 conn_t *connp = Q_TO_CONN(q);
3525 udp_t *udp = connp->conn_udp;
3526 int error = 0;
3527 struct sockaddr *addr = NULL;
3528 socklen_t addrlen;
3529 udp_stack_t *us = udp->udp_us;
3530 struct T_unitdata_req *tudr;
3531 mblk_t *data_mp;
3532 ushort_t ipversion;
3533 cred_t *cr;
3534 pid_t pid;
3535
3536 /*
3537 * We directly handle several cases here: T_UNITDATA_REQ message
3538 * coming down as M_PROTO/M_PCPROTO and M_DATA messages for connected
3539 * socket.
3540 */
3541 switch (DB_TYPE(mp)) {
3542 case M_DATA:
3543 if (!udp->udp_issocket || udp->udp_state != TS_DATA_XFER) {
3544 /* Not connected; address is required */
3545 UDPS_BUMP_MIB(us, udpOutErrors);
3546 UDP_DBGSTAT(us, udp_data_notconn);
3547 UDP_STAT(us, udp_out_err_notconn);
3548 freemsg(mp);
3549 return;
3550 }
3551 /*
3552 * All Solaris components should pass a db_credp
3553 * for this message, hence we ASSERT.
3554 * On production kernels we return an error to be robust against
3555 * random streams modules sitting on top of us.
3556 */
3557 cr = msg_getcred(mp, &pid);
3558 ASSERT(cr != NULL);
3559 if (cr == NULL) {
3560 UDPS_BUMP_MIB(us, udpOutErrors);
3561 freemsg(mp);
3562 return;
3563 }
3564 ASSERT(udp->udp_issocket);
3565 UDP_DBGSTAT(us, udp_data_conn);
3566 error = udp_output_connected(connp, mp, cr, pid);
3567 if (error != 0) {
3568 UDP_STAT(us, udp_out_err_output);
3569 if (connp->conn_rq != NULL)
3570 udp_ud_err_connected(connp, (t_scalar_t)error);
3571 #ifdef DEBUG
3572 printf("udp_output_connected returned %d\n", error);
3573 #endif
3574 }
3575 return;
3576
3577 case M_PROTO:
3578 case M_PCPROTO:
3579 tudr = (struct T_unitdata_req *)mp->b_rptr;
3580 if (MBLKL(mp) < sizeof (*tudr) ||
3581 ((t_primp_t)mp->b_rptr)->type != T_UNITDATA_REQ) {
3582 udp_wput_other(q, mp);
3583 return;
3584 }
3585 break;
3586
3587 default:
3588 udp_wput_other(q, mp);
3589 return;
3590 }
3591
3592 /* Handle valid T_UNITDATA_REQ here */
3593 data_mp = mp->b_cont;
3594 if (data_mp == NULL) {
3595 error = EPROTO;
3596 goto ud_error2;
3597 }
3598 mp->b_cont = NULL;
3599
3600 if (!MBLKIN(mp, 0, tudr->DEST_offset + tudr->DEST_length)) {
3601 error = EADDRNOTAVAIL;
3602 goto ud_error2;
3603 }
3604
3605 /*
3606 * All Solaris components should pass a db_credp
3607 * for this TPI message, hence we should ASSERT.
3608 * However, RPC (svc_clts_ksend) does this odd thing where it
3609 * passes the options from a T_UNITDATA_IND unchanged in a
3610 * T_UNITDATA_REQ. While that is the right thing to do for
3611 * some options, SCM_UCRED being the key one, this also makes it
3612 * pass down IP_RECVDSTADDR. Hence we can't ASSERT here.
3613 */
3614 cr = msg_getcred(mp, &pid);
3615 if (cr == NULL) {
3616 cr = connp->conn_cred;
3617 pid = connp->conn_cpid;
3618 }
3619
3620 /*
3621 * If a port has not been bound to the stream, fail.
3622 * This is not a problem when sockfs is directly
3623 * above us, because it will ensure that the socket
3624 * is first bound before allowing data to be sent.
3625 */
3626 if (udp->udp_state == TS_UNBND) {
3627 error = EPROTO;
3628 goto ud_error2;
3629 }
3630 addr = (struct sockaddr *)&mp->b_rptr[tudr->DEST_offset];
3631 addrlen = tudr->DEST_length;
3632
3633 switch (connp->conn_family) {
3634 case AF_INET6:
3635 sin6 = (sin6_t *)addr;
3636 if (!OK_32PTR((char *)sin6) || (addrlen != sizeof (sin6_t)) ||
3637 (sin6->sin6_family != AF_INET6)) {
3638 error = EADDRNOTAVAIL;
3639 goto ud_error2;
3640 }
3641
3642 srcid = sin6->__sin6_src_id;
3643 if (!IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
3644 /*
3645 * Destination is a non-IPv4-compatible IPv6 address.
3646 * Send out an IPv6 format packet.
3647 */
3648
3649 /*
3650 * If the local address is a mapped address return
3651 * an error.
3652 * It would be possible to send an IPv6 packet but the
3653 * response would never make it back to the application
3654 * since it is bound to a mapped address.
3655 */
3656 if (IN6_IS_ADDR_V4MAPPED(&connp->conn_saddr_v6)) {
3657 error = EADDRNOTAVAIL;
3658 goto ud_error2;
3659 }
3660
3661 UDP_DBGSTAT(us, udp_out_ipv6);
3662
3663 if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr))
3664 sin6->sin6_addr = ipv6_loopback;
3665 ipversion = IPV6_VERSION;
3666 } else {
3667 if (connp->conn_ipv6_v6only) {
3668 error = EADDRNOTAVAIL;
3669 goto ud_error2;
3670 }
3671
3672 /*
3673 * If the local address is not zero or a mapped address
3674 * return an error. It would be possible to send an
3675 * IPv4 packet but the response would never make it
3676 * back to the application since it is bound to a
3677 * non-mapped address.
3678 */
3679 if (!IN6_IS_ADDR_V4MAPPED(&connp->conn_saddr_v6) &&
3680 !IN6_IS_ADDR_UNSPECIFIED(&connp->conn_saddr_v6)) {
3681 error = EADDRNOTAVAIL;
3682 goto ud_error2;
3683 }
3684 UDP_DBGSTAT(us, udp_out_mapped);
3685
3686 if (V4_PART_OF_V6(sin6->sin6_addr) == INADDR_ANY) {
3687 V4_PART_OF_V6(sin6->sin6_addr) =
3688 htonl(INADDR_LOOPBACK);
3689 }
3690 ipversion = IPV4_VERSION;
3691 }
3692
3693 if (tudr->OPT_length != 0) {
3694 /*
3695 * If we are connected then the destination needs to be
3696 * the same as the connected one.
3697 */
3698 if (udp->udp_state == TS_DATA_XFER &&
3699 !conn_same_as_last_v6(connp, sin6)) {
3700 error = EISCONN;
3701 goto ud_error2;
3702 }
3703 UDP_STAT(us, udp_out_opt);
3704 error = udp_output_ancillary(connp, NULL, sin6,
3705 data_mp, mp, NULL, cr, pid);
3706 } else {
3707 ip_xmit_attr_t *ixa;
3708
3709 /*
3710 * We have to allocate an ip_xmit_attr_t before we grab
3711 * conn_lock and we need to hold conn_lock once we've
3712 * checked conn_same_as_last_v6 to handle concurrent
3713 * send* calls on a socket.
3714 */
3715 ixa = conn_get_ixa(connp, B_FALSE);
3716 if (ixa == NULL) {
3717 error = ENOMEM;
3718 goto ud_error2;
3719 }
3720 mutex_enter(&connp->conn_lock);
3721
3722 if (conn_same_as_last_v6(connp, sin6) &&
3723 connp->conn_lastsrcid == srcid &&
3724 ipsec_outbound_policy_current(ixa)) {
3725 UDP_DBGSTAT(us, udp_out_lastdst);
3726 /* udp_output_lastdst drops conn_lock */
3727 error = udp_output_lastdst(connp, data_mp, cr,
3728 pid, ixa);
3729 } else {
3730 UDP_DBGSTAT(us, udp_out_diffdst);
3731 /* udp_output_newdst drops conn_lock */
3732 error = udp_output_newdst(connp, data_mp, NULL,
3733 sin6, ipversion, cr, pid, ixa);
3734 }
3735 ASSERT(MUTEX_NOT_HELD(&connp->conn_lock));
3736 }
3737 if (error == 0) {
3738 freeb(mp);
3739 return;
3740 }
3741 break;
3742
3743 case AF_INET:
3744 sin = (sin_t *)addr;
3745 if ((!OK_32PTR((char *)sin) || addrlen != sizeof (sin_t)) ||
3746 (sin->sin_family != AF_INET)) {
3747 error = EADDRNOTAVAIL;
3748 goto ud_error2;
3749 }
3750 UDP_DBGSTAT(us, udp_out_ipv4);
3751 if (sin->sin_addr.s_addr == INADDR_ANY)
3752 sin->sin_addr.s_addr = htonl(INADDR_LOOPBACK);
3753 ipversion = IPV4_VERSION;
3754
3755 srcid = 0;
3756 if (tudr->OPT_length != 0) {
3757 /*
3758 * If we are connected then the destination needs to be
3759 * the same as the connected one.
3760 */
3761 if (udp->udp_state == TS_DATA_XFER &&
3762 !conn_same_as_last_v4(connp, sin)) {
3763 error = EISCONN;
3764 goto ud_error2;
3765 }
3766 UDP_STAT(us, udp_out_opt);
3767 error = udp_output_ancillary(connp, sin, NULL,
3768 data_mp, mp, NULL, cr, pid);
3769 } else {
3770 ip_xmit_attr_t *ixa;
3771
3772 /*
3773 * We have to allocate an ip_xmit_attr_t before we grab
3774 * conn_lock and we need to hold conn_lock once we've
3775 * checked conn_same_as_last_v4 to handle concurrent
3776 * send* calls on a socket.
3777 */
3778 ixa = conn_get_ixa(connp, B_FALSE);
3779 if (ixa == NULL) {
3780 error = ENOMEM;
3781 goto ud_error2;
3782 }
3783 mutex_enter(&connp->conn_lock);
3784
3785 if (conn_same_as_last_v4(connp, sin) &&
3786 ipsec_outbound_policy_current(ixa)) {
3787 UDP_DBGSTAT(us, udp_out_lastdst);
3788 /* udp_output_lastdst drops conn_lock */
3789 error = udp_output_lastdst(connp, data_mp, cr,
3790 pid, ixa);
3791 } else {
3792 UDP_DBGSTAT(us, udp_out_diffdst);
3793 /* udp_output_newdst drops conn_lock */
3794 error = udp_output_newdst(connp, data_mp, sin,
3795 NULL, ipversion, cr, pid, ixa);
3796 }
3797 ASSERT(MUTEX_NOT_HELD(&connp->conn_lock));
3798 }
3799 if (error == 0) {
3800 freeb(mp);
3801 return;
3802 }
3803 break;
3804 }
3805 UDP_STAT(us, udp_out_err_output);
3806 ASSERT(mp != NULL);
3807 /* mp is freed by the following routine */
3808 udp_ud_err(q, mp, (t_scalar_t)error);
3809 return;
3810
3811 ud_error2:
3812 UDPS_BUMP_MIB(us, udpOutErrors);
3813 freemsg(data_mp);
3814 UDP_STAT(us, udp_out_err_output);
3815 ASSERT(mp != NULL);
3816 /* mp is freed by the following routine */
3817 udp_ud_err(q, mp, (t_scalar_t)error);
3818 }
3819
3820 /*
3821 * Handle the case of the IP address, port, flow label being different
3822 * for both IPv4 and IPv6.
3823 *
3824 * NOTE: The caller must hold conn_lock and we drop it here.
3825 */
3826 static int
3827 udp_output_newdst(conn_t *connp, mblk_t *data_mp, sin_t *sin, sin6_t *sin6,
3828 ushort_t ipversion, cred_t *cr, pid_t pid, ip_xmit_attr_t *ixa)
3829 {
3830 uint_t srcid;
3831 uint32_t flowinfo;
3832 udp_t *udp = connp->conn_udp;
3833 int error = 0;
3834 ip_xmit_attr_t *oldixa;
3835 udp_stack_t *us = udp->udp_us;
3836 in6_addr_t v6src;
3837 in6_addr_t v6dst;
3838 in6_addr_t v6nexthop;
3839 in_port_t dstport;
3840
3841 ASSERT(MUTEX_HELD(&connp->conn_lock));
3842 ASSERT(ixa != NULL);
3843 /*
3844 * We hold conn_lock across all the use and modifications of
3845 * the conn_lastdst, conn_ixa, and conn_xmit_ipp to ensure that they
3846 * stay consistent.
3847 */
3848
3849 ASSERT(cr != NULL);
3850 ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED));
3851 ixa->ixa_cred = cr;
3852 ixa->ixa_cpid = pid;
3853 if (is_system_labeled()) {
3854 /* We need to restart with a label based on the cred */
3855 ip_xmit_attr_restore_tsl(ixa, ixa->ixa_cred);
3856 }
3857
3858 /*
3859 * If we are connected then the destination needs to be the
3860 * same as the connected one, which is not the case here since we
3861 * checked for that above.
3862 */
3863 if (udp->udp_state == TS_DATA_XFER) {
3864 mutex_exit(&connp->conn_lock);
3865 error = EISCONN;
3866 goto ud_error;
3867 }
3868
3869 /* In case previous destination was multicast or multirt */
3870 ip_attr_newdst(ixa);
3871
3872 /*
3873 * If laddr is unspecified then we look at sin6_src_id.
3874 * We will give precedence to a source address set with IPV6_PKTINFO
3875 * (aka IPPF_ADDR) but that is handled in build_hdrs. However, we don't
3876 * want ip_attr_connect to select a source (since it can fail) when
3877 * IPV6_PKTINFO is specified.
3878 * If this doesn't result in a source address then we get a source
3879 * from ip_attr_connect() below.
3880 */
3881 v6src = connp->conn_saddr_v6;
3882 if (sin != NULL) {
3883 IN6_IPADDR_TO_V4MAPPED(sin->sin_addr.s_addr, &v6dst);
3884 dstport = sin->sin_port;
3885 flowinfo = 0;
3886 /* Don't bother with ip_srcid_find_id(), but indicate anyway. */
3887 srcid = 0;
3888 ixa->ixa_flags &= ~IXAF_SCOPEID_SET;
3889 ixa->ixa_flags |= IXAF_IS_IPV4;
3890 } else {
3891 boolean_t v4mapped;
3892
3893 v6dst = sin6->sin6_addr;
3894 dstport = sin6->sin6_port;
3895 flowinfo = sin6->sin6_flowinfo;
3896 srcid = sin6->__sin6_src_id;
3897 if (IN6_IS_ADDR_LINKSCOPE(&v6dst) && sin6->sin6_scope_id != 0) {
3898 ixa->ixa_scopeid = sin6->sin6_scope_id;
3899 ixa->ixa_flags |= IXAF_SCOPEID_SET;
3900 } else {
3901 ixa->ixa_flags &= ~IXAF_SCOPEID_SET;
3902 }
3903 v4mapped = IN6_IS_ADDR_V4MAPPED(&v6dst);
3904 if (v4mapped)
3905 ixa->ixa_flags |= IXAF_IS_IPV4;
3906 else
3907 ixa->ixa_flags &= ~IXAF_IS_IPV4;
3908 if (srcid != 0 && IN6_IS_ADDR_UNSPECIFIED(&v6src)) {
3909 if (!ip_srcid_find_id(srcid, &v6src, IPCL_ZONEID(connp),
3910 v4mapped, connp->conn_netstack)) {
3911 /* Mismatched v4mapped/v6 specified by srcid. */
3912 mutex_exit(&connp->conn_lock);
3913 error = EADDRNOTAVAIL;
3914 goto ud_error;
3915 }
3916 }
3917 }
3918 /* Handle IP_PKTINFO/IPV6_PKTINFO setting source address. */
3919 if (connp->conn_xmit_ipp.ipp_fields & IPPF_ADDR) {
3920 ip_pkt_t *ipp = &connp->conn_xmit_ipp;
3921
3922 if (ixa->ixa_flags & IXAF_IS_IPV4) {
3923 if (IN6_IS_ADDR_V4MAPPED(&ipp->ipp_addr))
3924 v6src = ipp->ipp_addr;
3925 } else {
3926 if (!IN6_IS_ADDR_V4MAPPED(&ipp->ipp_addr))
3927 v6src = ipp->ipp_addr;
3928 }
3929 }
3930
3931 ip_attr_nexthop(&connp->conn_xmit_ipp, ixa, &v6dst, &v6nexthop);
3932 mutex_exit(&connp->conn_lock);
3933
3934 error = ip_attr_connect(connp, ixa, &v6src, &v6dst, &v6nexthop, dstport,
3935 &v6src, NULL, IPDF_ALLOW_MCBC | IPDF_VERIFY_DST | IPDF_IPSEC);
3936 switch (error) {
3937 case 0:
3938 break;
3939 case EADDRNOTAVAIL:
3940 /*
3941 * IXAF_VERIFY_SOURCE tells us to pick a better source.
3942 * Don't have the application see that errno
3943 */
3944 error = ENETUNREACH;
3945 goto failed;
3946 case ENETDOWN:
3947 /*
3948 * Have !ipif_addr_ready address; drop packet silently
3949 * until we can get applications to not send until we
3950 * are ready.
3951 */
3952 error = 0;
3953 goto failed;
3954 case EHOSTUNREACH:
3955 case ENETUNREACH:
3956 if (ixa->ixa_ire != NULL) {
3957 /*
3958 * Let conn_ip_output/ire_send_noroute return
3959 * the error and send any local ICMP error.
3960 */
3961 error = 0;
3962 break;
3963 }
3964 /* FALLTHRU */
3965 failed:
3966 default:
3967 goto ud_error;
3968 }
3969
3970
3971 /*
3972 * Cluster note: we let the cluster hook know that we are sending to a
3973 * new address and/or port.
3974 */
3975 if (cl_inet_connect2 != NULL) {
3976 CL_INET_UDP_CONNECT(connp, B_TRUE, &v6dst, dstport, error);
3977 if (error != 0) {
3978 error = EHOSTUNREACH;
3979 goto ud_error;
3980 }
3981 }
3982
3983 mutex_enter(&connp->conn_lock);
3984 /*
3985 * While we dropped the lock some other thread might have connected
3986 * this socket. If so we bail out with EISCONN to ensure that the
3987 * connecting thread is the one that updates conn_ixa, conn_ht_*
3988 * and conn_*last*.
3989 */
3990 if (udp->udp_state == TS_DATA_XFER) {
3991 mutex_exit(&connp->conn_lock);
3992 error = EISCONN;
3993 goto ud_error;
3994 }
3995
3996 /*
3997 * We need to rebuild the headers if
3998 * - we are labeling packets (could be different for different
3999 * destinations)
4000 * - we have a source route (or routing header) since we need to
4001 * massage that to get the pseudo-header checksum
4002 * - the IP version is different than the last time
4003 * - a socket option with COA_HEADER_CHANGED has been set which
4004 * set conn_v6lastdst to zero.
4005 *
4006 * Otherwise the prepend function will just update the src, dst,
4007 * dstport, and flow label.
4008 */
4009 if (is_system_labeled()) {
4010 /* TX MLP requires SCM_UCRED and don't have that here */
4011 if (connp->conn_mlp_type != mlptSingle) {
4012 mutex_exit(&connp->conn_lock);
4013 error = ECONNREFUSED;
4014 goto ud_error;
4015 }
4016 /*
4017 * Check whether Trusted Solaris policy allows communication
4018 * with this host, and pretend that the destination is
4019 * unreachable if not.
4020 * Compute any needed label and place it in ipp_label_v4/v6.
4021 *
4022 * Later conn_build_hdr_template/conn_prepend_hdr takes
4023 * ipp_label_v4/v6 to form the packet.
4024 *
4025 * Tsol note: Since we hold conn_lock we know no other
4026 * thread manipulates conn_xmit_ipp.
4027 */
4028 error = conn_update_label(connp, ixa, &v6dst,
4029 &connp->conn_xmit_ipp);
4030 if (error != 0) {
4031 mutex_exit(&connp->conn_lock);
4032 goto ud_error;
4033 }
4034 /* Rebuild the header template */
4035 error = udp_build_hdr_template(connp, &v6src, &v6dst, dstport,
4036 flowinfo);
4037 if (error != 0) {
4038 mutex_exit(&connp->conn_lock);
4039 goto ud_error;
4040 }
4041 } else if ((connp->conn_xmit_ipp.ipp_fields &
4042 (IPPF_IPV4_OPTIONS|IPPF_RTHDR)) ||
4043 ipversion != connp->conn_lastipversion ||
4044 IN6_IS_ADDR_UNSPECIFIED(&connp->conn_v6lastdst)) {
4045 /* Rebuild the header template */
4046 error = udp_build_hdr_template(connp, &v6src, &v6dst, dstport,
4047 flowinfo);
4048 if (error != 0) {
4049 mutex_exit(&connp->conn_lock);
4050 goto ud_error;
4051 }
4052 } else {
4053 /* Simply update the destination address if no source route */
4054 if (ixa->ixa_flags & IXAF_IS_IPV4) {
4055 ipha_t *ipha = (ipha_t *)connp->conn_ht_iphc;
4056
4057 IN6_V4MAPPED_TO_IPADDR(&v6dst, ipha->ipha_dst);
4058 if (ixa->ixa_flags & IXAF_PMTU_IPV4_DF) {
4059 ipha->ipha_fragment_offset_and_flags |=
4060 IPH_DF_HTONS;
4061 } else {
4062 ipha->ipha_fragment_offset_and_flags &=
4063 ~IPH_DF_HTONS;
4064 }
4065 } else {
4066 ip6_t *ip6h = (ip6_t *)connp->conn_ht_iphc;
4067 ip6h->ip6_dst = v6dst;
4068 }
4069 }
4070
4071 /*
4072 * Remember the dst/dstport etc which corresponds to the built header
4073 * template and conn_ixa.
4074 */
4075 oldixa = conn_replace_ixa(connp, ixa);
4076 connp->conn_v6lastdst = v6dst;
4077 connp->conn_lastipversion = ipversion;
4078 connp->conn_lastdstport = dstport;
4079 connp->conn_lastflowinfo = flowinfo;
4080 connp->conn_lastscopeid = ixa->ixa_scopeid;
4081 connp->conn_lastsrcid = srcid;
4082 /* Also remember a source to use together with lastdst */
4083 connp->conn_v6lastsrc = v6src;
4084
4085 data_mp = udp_prepend_header_template(connp, ixa, data_mp, &v6src,
4086 dstport, flowinfo, &error);
4087
4088 /* Done with conn_t */
4089 mutex_exit(&connp->conn_lock);
4090 ixa_refrele(oldixa);
4091
4092 if (data_mp == NULL) {
4093 ASSERT(error != 0);
4094 goto ud_error;
4095 }
4096
4097 /* We're done. Pass the packet to ip. */
4098 UDPS_BUMP_MIB(us, udpHCOutDatagrams);
4099
4100 DTRACE_UDP5(send, mblk_t *, NULL, ip_xmit_attr_t *, ixa,
4101 void_ip_t *, data_mp->b_rptr, udp_t *, udp, udpha_t *,
4102 &data_mp->b_rptr[ixa->ixa_ip_hdr_length]);
4103
4104 error = conn_ip_output(data_mp, ixa);
4105 /* No udpOutErrors if an error since IP increases its error counter */
4106 switch (error) {
4107 case 0:
4108 break;
4109 case EWOULDBLOCK:
4110 (void) ixa_check_drain_insert(connp, ixa);
4111 error = 0;
4112 break;
4113 case EADDRNOTAVAIL:
4114 /*
4115 * IXAF_VERIFY_SOURCE tells us to pick a better source.
4116 * Don't have the application see that errno
4117 */
4118 error = ENETUNREACH;
4119 /* FALLTHRU */
4120 default:
4121 mutex_enter(&connp->conn_lock);
4122 /*
4123 * Clear the source and v6lastdst so we call ip_attr_connect
4124 * for the next packet and try to pick a better source.
4125 */
4126 if (connp->conn_mcbc_bind)
4127 connp->conn_saddr_v6 = ipv6_all_zeros;
4128 else
4129 connp->conn_saddr_v6 = connp->conn_bound_addr_v6;
4130 connp->conn_v6lastdst = ipv6_all_zeros;
4131 mutex_exit(&connp->conn_lock);
4132 break;
4133 }
4134 ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED));
4135 ixa->ixa_cred = connp->conn_cred; /* Restore */
4136 ixa->ixa_cpid = connp->conn_cpid;
4137 ixa_refrele(ixa);
4138 return (error);
4139
4140 ud_error:
4141 ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED));
4142 ixa->ixa_cred = connp->conn_cred; /* Restore */
4143 ixa->ixa_cpid = connp->conn_cpid;
4144 ixa_refrele(ixa);
4145
4146 freemsg(data_mp);
4147 UDPS_BUMP_MIB(us, udpOutErrors);
4148 UDP_STAT(us, udp_out_err_output);
4149 return (error);
4150 }
4151
4152 /* ARGSUSED */
4153 static void
4154 udp_wput_fallback(queue_t *wq, mblk_t *mp)
4155 {
4156 #ifdef DEBUG
4157 cmn_err(CE_CONT, "udp_wput_fallback: Message in fallback \n");
4158 #endif
4159 freemsg(mp);
4160 }
4161
4162
4163 /*
4164 * Handle special out-of-band ioctl requests (see PSARC/2008/265).
4165 */
4166 static void
4167 udp_wput_cmdblk(queue_t *q, mblk_t *mp)
4168 {
4169 void *data;
4170 mblk_t *datamp = mp->b_cont;
4171 conn_t *connp = Q_TO_CONN(q);
4172 udp_t *udp = connp->conn_udp;
4173 cmdblk_t *cmdp = (cmdblk_t *)mp->b_rptr;
4174
4175 if (datamp == NULL || MBLKL(datamp) < cmdp->cb_len) {
4176 cmdp->cb_error = EPROTO;
4177 qreply(q, mp);
4178 return;
4179 }
4180 data = datamp->b_rptr;
4181
4182 mutex_enter(&connp->conn_lock);
4183 switch (cmdp->cb_cmd) {
4184 case TI_GETPEERNAME:
4185 if (udp->udp_state != TS_DATA_XFER)
4186 cmdp->cb_error = ENOTCONN;
4187 else
4188 cmdp->cb_error = conn_getpeername(connp, data,
4189 &cmdp->cb_len);
4190 break;
4191 case TI_GETMYNAME:
4192 cmdp->cb_error = conn_getsockname(connp, data, &cmdp->cb_len);
4193 break;
4194 default:
4195 cmdp->cb_error = EINVAL;
4196 break;
4197 }
4198 mutex_exit(&connp->conn_lock);
4199
4200 qreply(q, mp);
4201 }
4202
4203 static void
4204 udp_use_pure_tpi(udp_t *udp)
4205 {
4206 conn_t *connp = udp->udp_connp;
4207
4208 mutex_enter(&connp->conn_lock);
4209 udp->udp_issocket = B_FALSE;
4210 mutex_exit(&connp->conn_lock);
4211 UDP_STAT(udp->udp_us, udp_sock_fallback);
4212 }
4213
4214 static void
4215 udp_wput_other(queue_t *q, mblk_t *mp)
4216 {
4217 uchar_t *rptr = mp->b_rptr;
4218 struct iocblk *iocp;
4219 conn_t *connp = Q_TO_CONN(q);
4220 udp_t *udp = connp->conn_udp;
4221 cred_t *cr;
4222
4223 switch (mp->b_datap->db_type) {
4224 case M_CMD:
4225 udp_wput_cmdblk(q, mp);
4226 return;
4227
4228 case M_PROTO:
4229 case M_PCPROTO:
4230 if (mp->b_wptr - rptr < sizeof (t_scalar_t)) {
4231 /*
4232 * If the message does not contain a PRIM_type,
4233 * throw it away.
4234 */
4235 freemsg(mp);
4236 return;
4237 }
4238 switch (((t_primp_t)rptr)->type) {
4239 case T_ADDR_REQ:
4240 udp_addr_req(q, mp);
4241 return;
4242 case O_T_BIND_REQ:
4243 case T_BIND_REQ:
4244 udp_tpi_bind(q, mp);
4245 return;
4246 case T_CONN_REQ:
4247 udp_tpi_connect(q, mp);
4248 return;
4249 case T_CAPABILITY_REQ:
4250 udp_capability_req(q, mp);
4251 return;
4252 case T_INFO_REQ:
4253 udp_info_req(q, mp);
4254 return;
4255 case T_UNITDATA_REQ:
4256 /*
4257 * If a T_UNITDATA_REQ gets here, the address must
4258 * be bad. Valid T_UNITDATA_REQs are handled
4259 * in udp_wput.
4260 */
4261 udp_ud_err(q, mp, EADDRNOTAVAIL);
4262 return;
4263 case T_UNBIND_REQ:
4264 udp_tpi_unbind(q, mp);
4265 return;
4266 case T_SVR4_OPTMGMT_REQ:
4267 /*
4268 * All Solaris components should pass a db_credp
4269 * for this TPI message, hence we ASSERT.
4270 * But in case there is some other M_PROTO that looks
4271 * like a TPI message sent by some other kernel
4272 * component, we check and return an error.
4273 */
4274 cr = msg_getcred(mp, NULL);
4275 ASSERT(cr != NULL);
4276 if (cr == NULL) {
4277 udp_err_ack(q, mp, TSYSERR, EINVAL);
4278 return;
4279 }
4280 if (!snmpcom_req(q, mp, udp_snmp_set, ip_snmp_get,
4281 cr)) {
4282 svr4_optcom_req(q, mp, cr, &udp_opt_obj);
4283 }
4284 return;
4285
4286 case T_OPTMGMT_REQ:
4287 /*
4288 * All Solaris components should pass a db_credp
4289 * for this TPI message, hence we ASSERT.
4290 * But in case there is some other M_PROTO that looks
4291 * like a TPI message sent by some other kernel
4292 * component, we check and return an error.
4293 */
4294 cr = msg_getcred(mp, NULL);
4295 ASSERT(cr != NULL);
4296 if (cr == NULL) {
4297 udp_err_ack(q, mp, TSYSERR, EINVAL);
4298 return;
4299 }
4300 tpi_optcom_req(q, mp, cr, &udp_opt_obj);
4301 return;
4302
4303 case T_DISCON_REQ:
4304 udp_tpi_disconnect(q, mp);
4305 return;
4306
4307 /* The following TPI message is not supported by udp. */
4308 case O_T_CONN_RES:
4309 case T_CONN_RES:
4310 udp_err_ack(q, mp, TNOTSUPPORT, 0);
4311 return;
4312
4313 /* The following 3 TPI requests are illegal for udp. */
4314 case T_DATA_REQ:
4315 case T_EXDATA_REQ:
4316 case T_ORDREL_REQ:
4317 udp_err_ack(q, mp, TNOTSUPPORT, 0);
4318 return;
4319 default:
4320 break;
4321 }
4322 break;
4323 case M_FLUSH:
4324 if (*rptr & FLUSHW)
4325 flushq(q, FLUSHDATA);
4326 break;
4327 case M_IOCTL:
4328 iocp = (struct iocblk *)mp->b_rptr;
4329 switch (iocp->ioc_cmd) {
4330 case TI_GETPEERNAME:
4331 if (udp->udp_state != TS_DATA_XFER) {
4332 /*
4333 * If a default destination address has not
4334 * been associated with the stream, then we
4335 * don't know the peer's name.
4336 */
4337 iocp->ioc_error = ENOTCONN;
4338 iocp->ioc_count = 0;
4339 mp->b_datap->db_type = M_IOCACK;
4340 qreply(q, mp);
4341 return;
4342 }
4343 /* FALLTHRU */
4344 case TI_GETMYNAME:
4345 /*
4346 * For TI_GETPEERNAME and TI_GETMYNAME, we first
4347 * need to copyin the user's strbuf structure.
4348 * Processing will continue in the M_IOCDATA case
4349 * below.
4350 */
4351 mi_copyin(q, mp, NULL,
4352 SIZEOF_STRUCT(strbuf, iocp->ioc_flag));
4353 return;
4354 case _SIOCSOCKFALLBACK:
4355 /*
4356 * Either sockmod is about to be popped and the
4357 * socket would now be treated as a plain stream,
4358 * or a module is about to be pushed so we have
4359 * to follow pure TPI semantics.
4360 */
4361 if (!udp->udp_issocket) {
4362 DB_TYPE(mp) = M_IOCNAK;
4363 iocp->ioc_error = EINVAL;
4364 } else {
4365 udp_use_pure_tpi(udp);
4366
4367 DB_TYPE(mp) = M_IOCACK;
4368 iocp->ioc_error = 0;
4369 }
4370 iocp->ioc_count = 0;
4371 iocp->ioc_rval = 0;
4372 qreply(q, mp);
4373 return;
4374 default:
4375 break;
4376 }
4377 break;
4378 case M_IOCDATA:
4379 udp_wput_iocdata(q, mp);
4380 return;
4381 default:
4382 /* Unrecognized messages are passed through without change. */
4383 break;
4384 }
4385 ip_wput_nondata(q, mp);
4386 }
4387
4388 /*
4389 * udp_wput_iocdata is called by udp_wput_other to handle all M_IOCDATA
4390 * messages.
4391 */
4392 static void
4393 udp_wput_iocdata(queue_t *q, mblk_t *mp)
4394 {
4395 mblk_t *mp1;
4396 struct iocblk *iocp = (struct iocblk *)mp->b_rptr;
4397 STRUCT_HANDLE(strbuf, sb);
4398 uint_t addrlen;
4399 conn_t *connp = Q_TO_CONN(q);
4400 udp_t *udp = connp->conn_udp;
4401
4402 /* Make sure it is one of ours. */
4403 switch (iocp->ioc_cmd) {
4404 case TI_GETMYNAME:
4405 case TI_GETPEERNAME:
4406 break;
4407 default:
4408 ip_wput_nondata(q, mp);
4409 return;
4410 }
4411
4412 switch (mi_copy_state(q, mp, &mp1)) {
4413 case -1:
4414 return;
4415 case MI_COPY_CASE(MI_COPY_IN, 1):
4416 break;
4417 case MI_COPY_CASE(MI_COPY_OUT, 1):
4418 /*
4419 * The address has been copied out, so now
4420 * copyout the strbuf.
4421 */
4422 mi_copyout(q, mp);
4423 return;
4424 case MI_COPY_CASE(MI_COPY_OUT, 2):
4425 /*
4426 * The address and strbuf have been copied out.
4427 * We're done, so just acknowledge the original
4428 * M_IOCTL.
4429 */
4430 mi_copy_done(q, mp, 0);
4431 return;
4432 default:
4433 /*
4434 * Something strange has happened, so acknowledge
4435 * the original M_IOCTL with an EPROTO error.
4436 */
4437 mi_copy_done(q, mp, EPROTO);
4438 return;
4439 }
4440
4441 /*
4442 * Now we have the strbuf structure for TI_GETMYNAME
4443 * and TI_GETPEERNAME. Next we copyout the requested
4444 * address and then we'll copyout the strbuf.
4445 */
4446 STRUCT_SET_HANDLE(sb, iocp->ioc_flag, (void *)mp1->b_rptr);
4447
4448 if (connp->conn_family == AF_INET)
4449 addrlen = sizeof (sin_t);
4450 else
4451 addrlen = sizeof (sin6_t);
4452
4453 if (STRUCT_FGET(sb, maxlen) < addrlen) {
4454 mi_copy_done(q, mp, EINVAL);
4455 return;
4456 }
4457
4458 switch (iocp->ioc_cmd) {
4459 case TI_GETMYNAME:
4460 break;
4461 case TI_GETPEERNAME:
4462 if (udp->udp_state != TS_DATA_XFER) {
4463 mi_copy_done(q, mp, ENOTCONN);
4464 return;
4465 }
4466 break;
4467 }
4468 mp1 = mi_copyout_alloc(q, mp, STRUCT_FGETP(sb, buf), addrlen, B_TRUE);
4469 if (!mp1)
4470 return;
4471
4472 STRUCT_FSET(sb, len, addrlen);
4473 switch (((struct iocblk *)mp->b_rptr)->ioc_cmd) {
4474 case TI_GETMYNAME:
4475 (void) conn_getsockname(connp, (struct sockaddr *)mp1->b_wptr,
4476 &addrlen);
4477 break;
4478 case TI_GETPEERNAME:
4479 (void) conn_getpeername(connp, (struct sockaddr *)mp1->b_wptr,
4480 &addrlen);
4481 break;
4482 }
4483 mp1->b_wptr += addrlen;
4484 /* Copy out the address */
4485 mi_copyout(q, mp);
4486 }
4487
4488 void
4489 udp_ddi_g_init(void)
4490 {
4491 udp_max_optsize = optcom_max_optsize(udp_opt_obj.odb_opt_des_arr,
4492 udp_opt_obj.odb_opt_arr_cnt);
4493
4494 /*
4495 * We want to be informed each time a stack is created or
4496 * destroyed in the kernel, so we can maintain the
4497 * set of udp_stack_t's.
4498 */
4499 netstack_register(NS_UDP, udp_stack_init, NULL, udp_stack_fini);
4500 }
4501
4502 void
4503 udp_ddi_g_destroy(void)
4504 {
4505 netstack_unregister(NS_UDP);
4506 }
4507
4508 #define INET_NAME "ip"
4509
4510 /*
4511 * Initialize the UDP stack instance.
4512 */
4513 static void *
4514 udp_stack_init(netstackid_t stackid, netstack_t *ns)
4515 {
4516 udp_stack_t *us;
4517 int i;
4518 int error = 0;
4519 major_t major;
4520 size_t arrsz;
4521
4522 us = (udp_stack_t *)kmem_zalloc(sizeof (*us), KM_SLEEP);
4523 us->us_netstack = ns;
4524
4525 mutex_init(&us->us_epriv_port_lock, NULL, MUTEX_DEFAULT, NULL);
4526 us->us_num_epriv_ports = UDP_NUM_EPRIV_PORTS;
4527 us->us_epriv_ports[0] = ULP_DEF_EPRIV_PORT1;
4528 us->us_epriv_ports[1] = ULP_DEF_EPRIV_PORT2;
4529
4530 /*
4531 * The smallest anonymous port in the priviledged port range which UDP
4532 * looks for free port. Use in the option UDP_ANONPRIVBIND.
4533 */
4534 us->us_min_anonpriv_port = 512;
4535
4536 us->us_bind_fanout_size = udp_bind_fanout_size;
4537
4538 /* Roundup variable that might have been modified in /etc/system */
4539 if (!ISP2(us->us_bind_fanout_size)) {
4540 /* Not a power of two. Round up to nearest power of two */
4541 for (i = 0; i < 31; i++) {
4542 if (us->us_bind_fanout_size < (1 << i))
4543 break;
4544 }
4545 us->us_bind_fanout_size = 1 << i;
4546 }
4547 us->us_bind_fanout = kmem_zalloc(us->us_bind_fanout_size *
4548 sizeof (udp_fanout_t), KM_SLEEP);
4549 for (i = 0; i < us->us_bind_fanout_size; i++) {
4550 mutex_init(&us->us_bind_fanout[i].uf_lock, NULL, MUTEX_DEFAULT,
4551 NULL);
4552 }
4553
4554 arrsz = udp_propinfo_count * sizeof (mod_prop_info_t);
4555 us->us_propinfo_tbl = (mod_prop_info_t *)kmem_alloc(arrsz,
4556 KM_SLEEP);
4557 bcopy(udp_propinfo_tbl, us->us_propinfo_tbl, arrsz);
4558
4559 /* Allocate the per netstack stats */
4560 mutex_enter(&cpu_lock);
4561 us->us_sc_cnt = MAX(ncpus, boot_ncpus);
4562 mutex_exit(&cpu_lock);
4563 us->us_sc = kmem_zalloc(max_ncpus * sizeof (udp_stats_cpu_t *),
4564 KM_SLEEP);
4565 for (i = 0; i < us->us_sc_cnt; i++) {
4566 us->us_sc[i] = kmem_zalloc(sizeof (udp_stats_cpu_t),
4567 KM_SLEEP);
4568 }
4569
4570 us->us_kstat = udp_kstat2_init(stackid);
4571 us->us_mibkp = udp_kstat_init(stackid);
4572
4573 major = mod_name_to_major(INET_NAME);
4574 error = ldi_ident_from_major(major, &us->us_ldi_ident);
4575 ASSERT(error == 0);
4576 return (us);
4577 }
4578
4579 /*
4580 * Free the UDP stack instance.
4581 */
4582 static void
4583 udp_stack_fini(netstackid_t stackid, void *arg)
4584 {
4585 udp_stack_t *us = (udp_stack_t *)arg;
4586 int i;
4587
4588 for (i = 0; i < us->us_bind_fanout_size; i++) {
4589 mutex_destroy(&us->us_bind_fanout[i].uf_lock);
4590 }
4591
4592 kmem_free(us->us_bind_fanout, us->us_bind_fanout_size *
4593 sizeof (udp_fanout_t));
4594
4595 us->us_bind_fanout = NULL;
4596
4597 for (i = 0; i < us->us_sc_cnt; i++)
4598 kmem_free(us->us_sc[i], sizeof (udp_stats_cpu_t));
4599 kmem_free(us->us_sc, max_ncpus * sizeof (udp_stats_cpu_t *));
4600
4601 kmem_free(us->us_propinfo_tbl,
4602 udp_propinfo_count * sizeof (mod_prop_info_t));
4603 us->us_propinfo_tbl = NULL;
4604
4605 udp_kstat_fini(stackid, us->us_mibkp);
4606 us->us_mibkp = NULL;
4607
4608 udp_kstat2_fini(stackid, us->us_kstat);
4609 us->us_kstat = NULL;
4610
4611 mutex_destroy(&us->us_epriv_port_lock);
4612 ldi_ident_release(us->us_ldi_ident);
4613 kmem_free(us, sizeof (*us));
4614 }
4615
4616 static size_t
4617 udp_set_rcv_hiwat(udp_t *udp, size_t size)
4618 {
4619 udp_stack_t *us = udp->udp_us;
4620
4621 /* We add a bit of extra buffering */
4622 size += size >> 1;
4623 if (size > us->us_max_buf)
4624 size = us->us_max_buf;
4625
4626 udp->udp_rcv_hiwat = size;
4627 return (size);
4628 }
4629
4630 /*
4631 * For the lower queue so that UDP can be a dummy mux.
4632 * Nobody should be sending
4633 * packets up this stream
4634 */
4635 static void
4636 udp_lrput(queue_t *q, mblk_t *mp)
4637 {
4638 switch (mp->b_datap->db_type) {
4639 case M_FLUSH:
4640 /* Turn around */
4641 if (*mp->b_rptr & FLUSHW) {
4642 *mp->b_rptr &= ~FLUSHR;
4643 qreply(q, mp);
4644 return;
4645 }
4646 break;
4647 }
4648 freemsg(mp);
4649 }
4650
4651 /*
4652 * For the lower queue so that UDP can be a dummy mux.
4653 * Nobody should be sending packets down this stream.
4654 */
4655 /* ARGSUSED */
4656 void
4657 udp_lwput(queue_t *q, mblk_t *mp)
4658 {
4659 freemsg(mp);
4660 }
4661
4662 /*
4663 * When a CPU is added, we need to allocate the per CPU stats struct.
4664 */
4665 void
4666 udp_stack_cpu_add(udp_stack_t *us, processorid_t cpu_seqid)
4667 {
4668 int i;
4669
4670 if (cpu_seqid < us->us_sc_cnt)
4671 return;
4672 for (i = us->us_sc_cnt; i <= cpu_seqid; i++) {
4673 ASSERT(us->us_sc[i] == NULL);
4674 us->us_sc[i] = kmem_zalloc(sizeof (udp_stats_cpu_t),
4675 KM_SLEEP);
4676 }
4677 membar_producer();
4678 us->us_sc_cnt = cpu_seqid + 1;
4679 }
4680
4681 /*
4682 * Below routines for UDP socket module.
4683 */
4684
4685 static conn_t *
4686 udp_do_open(cred_t *credp, boolean_t isv6, int flags, int *errorp)
4687 {
4688 udp_t *udp;
4689 conn_t *connp;
4690 zoneid_t zoneid;
4691 netstack_t *ns;
4692 udp_stack_t *us;
4693 int len;
4694
4695 ASSERT(errorp != NULL);
4696
4697 if ((*errorp = secpolicy_basic_net_access(credp)) != 0)
4698 return (NULL);
4699
4700 ns = netstack_find_by_cred(credp);
4701 ASSERT(ns != NULL);
4702 us = ns->netstack_udp;
4703 ASSERT(us != NULL);
4704
4705 /*
4706 * For exclusive stacks we set the zoneid to zero
4707 * to make UDP operate as if in the global zone.
4708 */
4709 if (ns->netstack_stackid != GLOBAL_NETSTACKID)
4710 zoneid = GLOBAL_ZONEID;
4711 else
4712 zoneid = crgetzoneid(credp);
4713
4714 ASSERT(flags == KM_SLEEP || flags == KM_NOSLEEP);
4715
4716 connp = ipcl_conn_create(IPCL_UDPCONN, flags, ns);
4717 if (connp == NULL) {
4718 netstack_rele(ns);
4719 *errorp = ENOMEM;
4720 return (NULL);
4721 }
4722 udp = connp->conn_udp;
4723
4724 /*
4725 * ipcl_conn_create did a netstack_hold. Undo the hold that was
4726 * done by netstack_find_by_cred()
4727 */
4728 netstack_rele(ns);
4729
4730 /*
4731 * Since this conn_t/udp_t is not yet visible to anybody else we don't
4732 * need to lock anything.
4733 */
4734 ASSERT(connp->conn_proto == IPPROTO_UDP);
4735 ASSERT(connp->conn_udp == udp);
4736 ASSERT(udp->udp_connp == connp);
4737
4738 /* Set the initial state of the stream and the privilege status. */
4739 udp->udp_state = TS_UNBND;
4740 connp->conn_ixa->ixa_flags |= IXAF_VERIFY_SOURCE;
4741 if (isv6) {
4742 connp->conn_family = AF_INET6;
4743 connp->conn_ipversion = IPV6_VERSION;
4744 connp->conn_ixa->ixa_flags &= ~IXAF_IS_IPV4;
4745 connp->conn_default_ttl = us->us_ipv6_hoplimit;
4746 len = sizeof (ip6_t) + UDPH_SIZE;
4747 } else {
4748 connp->conn_family = AF_INET;
4749 connp->conn_ipversion = IPV4_VERSION;
4750 connp->conn_ixa->ixa_flags |= IXAF_IS_IPV4;
4751 connp->conn_default_ttl = us->us_ipv4_ttl;
4752 len = sizeof (ipha_t) + UDPH_SIZE;
4753 }
4754
4755 ASSERT(connp->conn_ixa->ixa_protocol == connp->conn_proto);
4756 connp->conn_xmit_ipp.ipp_unicast_hops = connp->conn_default_ttl;
4757
4758 connp->conn_ixa->ixa_multicast_ttl = IP_DEFAULT_MULTICAST_TTL;
4759 connp->conn_ixa->ixa_flags |= IXAF_MULTICAST_LOOP | IXAF_SET_ULP_CKSUM;
4760 /* conn_allzones can not be set this early, hence no IPCL_ZONEID */
4761 connp->conn_ixa->ixa_zoneid = zoneid;
4762
4763 connp->conn_zoneid = zoneid;
4764
4765 /*
4766 * If the caller has the process-wide flag set, then default to MAC
4767 * exempt mode. This allows read-down to unlabeled hosts.
4768 */
4769 if (getpflags(NET_MAC_AWARE, credp) != 0)
4770 connp->conn_mac_mode = CONN_MAC_AWARE;
4771
4772 connp->conn_zone_is_global = (crgetzoneid(credp) == GLOBAL_ZONEID);
4773
4774 udp->udp_us = us;
4775
4776 connp->conn_rcvbuf = us->us_recv_hiwat;
4777 connp->conn_sndbuf = us->us_xmit_hiwat;
4778 connp->conn_sndlowat = us->us_xmit_lowat;
4779 connp->conn_rcvlowat = udp_mod_info.mi_lowat;
4780
4781 connp->conn_wroff = len + us->us_wroff_extra;
4782 connp->conn_so_type = SOCK_DGRAM;
4783
4784 connp->conn_recv = udp_input;
4785 connp->conn_recvicmp = udp_icmp_input;
4786 crhold(credp);
4787 connp->conn_cred = credp;
4788 connp->conn_cpid = curproc->p_pid;
4789 connp->conn_open_time = ddi_get_lbolt64();
4790 /* Cache things in ixa without an extra refhold */
4791 ASSERT(!(connp->conn_ixa->ixa_free_flags & IXA_FREE_CRED));
4792 connp->conn_ixa->ixa_cred = connp->conn_cred;
4793 connp->conn_ixa->ixa_cpid = connp->conn_cpid;
4794 if (is_system_labeled())
4795 connp->conn_ixa->ixa_tsl = crgetlabel(connp->conn_cred);
4796
4797 *((sin6_t *)&udp->udp_delayed_addr) = sin6_null;
4798
4799 if (us->us_pmtu_discovery)
4800 connp->conn_ixa->ixa_flags |= IXAF_PMTU_DISCOVERY;
4801
4802 return (connp);
4803 }
4804
4805 sock_lower_handle_t
4806 udp_create(int family, int type, int proto, sock_downcalls_t **sock_downcalls,
4807 uint_t *smodep, int *errorp, int flags, cred_t *credp)
4808 {
4809 udp_t *udp = NULL;
4810 udp_stack_t *us;
4811 conn_t *connp;
4812 boolean_t isv6;
4813
4814 if (type != SOCK_DGRAM || (family != AF_INET && family != AF_INET6) ||
4815 (proto != 0 && proto != IPPROTO_UDP)) {
4816 *errorp = EPROTONOSUPPORT;
4817 return (NULL);
4818 }
4819
4820 if (family == AF_INET6)
4821 isv6 = B_TRUE;
4822 else
4823 isv6 = B_FALSE;
4824
4825 connp = udp_do_open(credp, isv6, flags, errorp);
4826 if (connp == NULL)
4827 return (NULL);
4828
4829 udp = connp->conn_udp;
4830 ASSERT(udp != NULL);
4831 us = udp->udp_us;
4832 ASSERT(us != NULL);
4833
4834 udp->udp_issocket = B_TRUE;
4835 connp->conn_flags |= IPCL_NONSTR;
4836
4837 /*
4838 * Set flow control
4839 * Since this conn_t/udp_t is not yet visible to anybody else we don't
4840 * need to lock anything.
4841 */
4842 (void) udp_set_rcv_hiwat(udp, connp->conn_rcvbuf);
4843 udp->udp_rcv_disply_hiwat = connp->conn_rcvbuf;
4844
4845 connp->conn_flow_cntrld = B_FALSE;
4846
4847 mutex_enter(&connp->conn_lock);
4848 connp->conn_state_flags &= ~CONN_INCIPIENT;
4849 mutex_exit(&connp->conn_lock);
4850
4851 *errorp = 0;
4852 *smodep = SM_ATOMIC;
4853 *sock_downcalls = &sock_udp_downcalls;
4854 return ((sock_lower_handle_t)connp);
4855 }
4856
4857 /* ARGSUSED3 */
4858 void
4859 udp_activate(sock_lower_handle_t proto_handle, sock_upper_handle_t sock_handle,
4860 sock_upcalls_t *sock_upcalls, int flags, cred_t *cr)
4861 {
4862 conn_t *connp = (conn_t *)proto_handle;
4863 struct sock_proto_props sopp;
4864
4865 /* All Solaris components should pass a cred for this operation. */
4866 ASSERT(cr != NULL);
4867
4868 connp->conn_upcalls = sock_upcalls;
4869 connp->conn_upper_handle = sock_handle;
4870
4871 sopp.sopp_flags = SOCKOPT_WROFF | SOCKOPT_RCVHIWAT | SOCKOPT_RCVLOWAT |
4872 SOCKOPT_MAXBLK | SOCKOPT_MAXPSZ | SOCKOPT_MINPSZ;
4873 sopp.sopp_wroff = connp->conn_wroff;
4874 sopp.sopp_maxblk = INFPSZ;
4875 sopp.sopp_rxhiwat = connp->conn_rcvbuf;
4876 sopp.sopp_rxlowat = connp->conn_rcvlowat;
4877 sopp.sopp_maxaddrlen = sizeof (sin6_t);
4878 sopp.sopp_maxpsz =
4879 (connp->conn_family == AF_INET) ? UDP_MAXPACKET_IPV4 :
4880 UDP_MAXPACKET_IPV6;
4881 sopp.sopp_minpsz = (udp_mod_info.mi_minpsz == 1) ? 0 :
4882 udp_mod_info.mi_minpsz;
4883
4884 (*connp->conn_upcalls->su_set_proto_props)(connp->conn_upper_handle,
4885 &sopp);
4886 }
4887
4888 static void
4889 udp_do_close(conn_t *connp)
4890 {
4891 udp_t *udp;
4892
4893 ASSERT(connp != NULL && IPCL_IS_UDP(connp));
4894 udp = connp->conn_udp;
4895
4896 if (cl_inet_unbind != NULL && udp->udp_state == TS_IDLE) {
4897 /*
4898 * Running in cluster mode - register unbind information
4899 */
4900 if (connp->conn_ipversion == IPV4_VERSION) {
4901 (*cl_inet_unbind)(
4902 connp->conn_netstack->netstack_stackid,
4903 IPPROTO_UDP, AF_INET,
4904 (uint8_t *)(&V4_PART_OF_V6(connp->conn_laddr_v6)),
4905 (in_port_t)connp->conn_lport, NULL);
4906 } else {
4907 (*cl_inet_unbind)(
4908 connp->conn_netstack->netstack_stackid,
4909 IPPROTO_UDP, AF_INET6,
4910 (uint8_t *)&(connp->conn_laddr_v6),
4911 (in_port_t)connp->conn_lport, NULL);
4912 }
4913 }
4914
4915 udp_bind_hash_remove(udp, B_FALSE);
4916
4917 ip_quiesce_conn(connp);
4918
4919 if (!IPCL_IS_NONSTR(connp)) {
4920 ASSERT(connp->conn_wq != NULL);
4921 ASSERT(connp->conn_rq != NULL);
4922 qprocsoff(connp->conn_rq);
4923 }
4924
4925 udp_close_free(connp);
4926
4927 /*
4928 * Now we are truly single threaded on this stream, and can
4929 * delete the things hanging off the connp, and finally the connp.
4930 * We removed this connp from the fanout list, it cannot be
4931 * accessed thru the fanouts, and we already waited for the
4932 * conn_ref to drop to 0. We are already in close, so
4933 * there cannot be any other thread from the top. qprocsoff
4934 * has completed, and service has completed or won't run in
4935 * future.
4936 */
4937 ASSERT(connp->conn_ref == 1);
4938
4939 if (!IPCL_IS_NONSTR(connp)) {
4940 inet_minor_free(connp->conn_minor_arena, connp->conn_dev);
4941 } else {
4942 ip_free_helper_stream(connp);
4943 }
4944
4945 connp->conn_ref--;
4946 ipcl_conn_destroy(connp);
4947 }
4948
4949 /* ARGSUSED1 */
4950 int
4951 udp_close(sock_lower_handle_t proto_handle, int flags, cred_t *cr)
4952 {
4953 conn_t *connp = (conn_t *)proto_handle;
4954
4955 /* All Solaris components should pass a cred for this operation. */
4956 ASSERT(cr != NULL);
4957
4958 udp_do_close(connp);
4959 return (0);
4960 }
4961
4962 static int
4963 udp_do_bind(conn_t *connp, struct sockaddr *sa, socklen_t len, cred_t *cr,
4964 boolean_t bind_to_req_port_only)
4965 {
4966 sin_t *sin;
4967 sin6_t *sin6;
4968 udp_t *udp = connp->conn_udp;
4969 int error = 0;
4970 ip_laddr_t laddr_type = IPVL_UNICAST_UP; /* INADDR_ANY */
4971 in_port_t port; /* Host byte order */
4972 in_port_t requested_port; /* Host byte order */
4973 int count;
4974 ipaddr_t v4src; /* Set if AF_INET */
4975 in6_addr_t v6src;
4976 int loopmax;
4977 udp_fanout_t *udpf;
4978 in_port_t lport; /* Network byte order */
4979 uint_t scopeid = 0;
4980 zoneid_t zoneid = IPCL_ZONEID(connp);
4981 ip_stack_t *ipst = connp->conn_netstack->netstack_ip;
4982 boolean_t is_inaddr_any;
4983 mlp_type_t addrtype, mlptype;
4984 udp_stack_t *us = udp->udp_us;
4985
4986 switch (len) {
4987 case sizeof (sin_t): /* Complete IPv4 address */
4988 sin = (sin_t *)sa;
4989
4990 if (sin == NULL || !OK_32PTR((char *)sin))
4991 return (EINVAL);
4992
4993 if (connp->conn_family != AF_INET ||
4994 sin->sin_family != AF_INET) {
4995 return (EAFNOSUPPORT);
4996 }
4997 v4src = sin->sin_addr.s_addr;
4998 IN6_IPADDR_TO_V4MAPPED(v4src, &v6src);
4999 if (v4src != INADDR_ANY) {
5000 laddr_type = ip_laddr_verify_v4(v4src, zoneid, ipst,
5001 B_TRUE);
5002 }
5003 port = ntohs(sin->sin_port);
5004 break;
5005
5006 case sizeof (sin6_t): /* complete IPv6 address */
5007 sin6 = (sin6_t *)sa;
5008
5009 if (sin6 == NULL || !OK_32PTR((char *)sin6))
5010 return (EINVAL);
5011
5012 if (connp->conn_family != AF_INET6 ||
5013 sin6->sin6_family != AF_INET6) {
5014 return (EAFNOSUPPORT);
5015 }
5016 v6src = sin6->sin6_addr;
5017 if (IN6_IS_ADDR_V4MAPPED(&v6src)) {
5018 if (connp->conn_ipv6_v6only)
5019 return (EADDRNOTAVAIL);
5020
5021 IN6_V4MAPPED_TO_IPADDR(&v6src, v4src);
5022 if (v4src != INADDR_ANY) {
5023 laddr_type = ip_laddr_verify_v4(v4src,
5024 zoneid, ipst, B_FALSE);
5025 }
5026 } else {
5027 if (!IN6_IS_ADDR_UNSPECIFIED(&v6src)) {
5028 if (IN6_IS_ADDR_LINKSCOPE(&v6src))
5029 scopeid = sin6->sin6_scope_id;
5030 laddr_type = ip_laddr_verify_v6(&v6src,
5031 zoneid, ipst, B_TRUE, scopeid);
5032 }
5033 }
5034 port = ntohs(sin6->sin6_port);
5035 break;
5036
5037 default: /* Invalid request */
5038 (void) strlog(UDP_MOD_ID, 0, 1, SL_ERROR|SL_TRACE,
5039 "udp_bind: bad ADDR_length length %u", len);
5040 return (-TBADADDR);
5041 }
5042
5043 /* Is the local address a valid unicast, multicast, or broadcast? */
5044 if (laddr_type == IPVL_BAD)
5045 return (EADDRNOTAVAIL);
5046
5047 requested_port = port;
5048
5049 if (requested_port == 0 || !bind_to_req_port_only)
5050 bind_to_req_port_only = B_FALSE;
5051 else /* T_BIND_REQ and requested_port != 0 */
5052 bind_to_req_port_only = B_TRUE;
5053
5054 if (requested_port == 0) {
5055 /*
5056 * If the application passed in zero for the port number, it
5057 * doesn't care which port number we bind to. Get one in the
5058 * valid range.
5059 */
5060 if (connp->conn_anon_priv_bind) {
5061 port = udp_get_next_priv_port(udp);
5062 } else {
5063 port = udp_update_next_port(udp,
5064 us->us_next_port_to_try, B_TRUE);
5065 }
5066 } else {
5067 /*
5068 * If the port is in the well-known privileged range,
5069 * make sure the caller was privileged.
5070 */
5071 int i;
5072 boolean_t priv = B_FALSE;
5073
5074 if (port < us->us_smallest_nonpriv_port) {
5075 priv = B_TRUE;
5076 } else {
5077 for (i = 0; i < us->us_num_epriv_ports; i++) {
5078 if (port == us->us_epriv_ports[i]) {
5079 priv = B_TRUE;
5080 break;
5081 }
5082 }
5083 }
5084
5085 if (priv) {
5086 if (secpolicy_net_privaddr(cr, port, IPPROTO_UDP) != 0)
5087 return (-TACCES);
5088 }
5089 }
5090
5091 if (port == 0)
5092 return (-TNOADDR);
5093
5094 /*
5095 * The state must be TS_UNBND. TPI mandates that users must send
5096 * TPI primitives only 1 at a time and wait for the response before
5097 * sending the next primitive.
5098 */
5099 mutex_enter(&connp->conn_lock);
5100 if (udp->udp_state != TS_UNBND) {
5101 mutex_exit(&connp->conn_lock);
5102 (void) strlog(UDP_MOD_ID, 0, 1, SL_ERROR|SL_TRACE,
5103 "udp_bind: bad state, %u", udp->udp_state);
5104 return (-TOUTSTATE);
5105 }
5106 /*
5107 * Copy the source address into our udp structure. This address
5108 * may still be zero; if so, IP will fill in the correct address
5109 * each time an outbound packet is passed to it. Since the udp is
5110 * not yet in the bind hash list, we don't grab the uf_lock to
5111 * change conn_ipversion
5112 */
5113 if (connp->conn_family == AF_INET) {
5114 ASSERT(sin != NULL);
5115 ASSERT(connp->conn_ixa->ixa_flags & IXAF_IS_IPV4);
5116 } else {
5117 if (IN6_IS_ADDR_V4MAPPED(&v6src)) {
5118 /*
5119 * no need to hold the uf_lock to set the conn_ipversion
5120 * since we are not yet in the fanout list
5121 */
5122 connp->conn_ipversion = IPV4_VERSION;
5123 connp->conn_ixa->ixa_flags |= IXAF_IS_IPV4;
5124 } else {
5125 connp->conn_ipversion = IPV6_VERSION;
5126 connp->conn_ixa->ixa_flags &= ~IXAF_IS_IPV4;
5127 }
5128 }
5129
5130 /*
5131 * If conn_reuseaddr is not set, then we have to make sure that
5132 * the IP address and port number the application requested
5133 * (or we selected for the application) is not being used by
5134 * another stream. If another stream is already using the
5135 * requested IP address and port, the behavior depends on
5136 * "bind_to_req_port_only". If set the bind fails; otherwise we
5137 * search for any an unused port to bind to the stream.
5138 *
5139 * As per the BSD semantics, as modified by the Deering multicast
5140 * changes, if udp_reuseaddr is set, then we allow multiple binds
5141 * to the same port independent of the local IP address.
5142 *
5143 * This is slightly different than in SunOS 4.X which did not
5144 * support IP multicast. Note that the change implemented by the
5145 * Deering multicast code effects all binds - not only binding
5146 * to IP multicast addresses.
5147 *
5148 * Note that when binding to port zero we ignore SO_REUSEADDR in
5149 * order to guarantee a unique port.
5150 */
5151
5152 count = 0;
5153 if (connp->conn_anon_priv_bind) {
5154 /*
5155 * loopmax = (IPPORT_RESERVED-1) -
5156 * us->us_min_anonpriv_port + 1
5157 */
5158 loopmax = IPPORT_RESERVED - us->us_min_anonpriv_port;
5159 } else {
5160 loopmax = us->us_largest_anon_port -
5161 us->us_smallest_anon_port + 1;
5162 }
5163
5164 is_inaddr_any = V6_OR_V4_INADDR_ANY(v6src);
5165
5166 for (;;) {
5167 udp_t *udp1;
5168 boolean_t found_exclbind = B_FALSE;
5169 conn_t *connp1;
5170
5171 /*
5172 * Walk through the list of udp streams bound to
5173 * requested port with the same IP address.
5174 */
5175 lport = htons(port);
5176 udpf = &us->us_bind_fanout[UDP_BIND_HASH(lport,
5177 us->us_bind_fanout_size)];
5178 mutex_enter(&udpf->uf_lock);
5179 for (udp1 = udpf->uf_udp; udp1 != NULL;
5180 udp1 = udp1->udp_bind_hash) {
5181 connp1 = udp1->udp_connp;
5182
5183 if (lport != connp1->conn_lport)
5184 continue;
5185
5186 /*
5187 * On a labeled system, we must treat bindings to ports
5188 * on shared IP addresses by sockets with MAC exemption
5189 * privilege as being in all zones, as there's
5190 * otherwise no way to identify the right receiver.
5191 */
5192 if (!IPCL_BIND_ZONE_MATCH(connp1, connp))
5193 continue;
5194
5195 /*
5196 * If UDP_EXCLBIND is set for either the bound or
5197 * binding endpoint, the semantics of bind
5198 * is changed according to the following chart.
5199 *
5200 * spec = specified address (v4 or v6)
5201 * unspec = unspecified address (v4 or v6)
5202 * A = specified addresses are different for endpoints
5203 *
5204 * bound bind to allowed?
5205 * -------------------------------------
5206 * unspec unspec no
5207 * unspec spec no
5208 * spec unspec no
5209 * spec spec yes if A
5210 *
5211 * For labeled systems, SO_MAC_EXEMPT behaves the same
5212 * as UDP_EXCLBIND, except that zoneid is ignored.
5213 */
5214 if (connp1->conn_exclbind || connp->conn_exclbind ||
5215 IPCL_CONNS_MAC(udp1->udp_connp, connp)) {
5216 if (V6_OR_V4_INADDR_ANY(
5217 connp1->conn_bound_addr_v6) ||
5218 is_inaddr_any ||
5219 IN6_ARE_ADDR_EQUAL(
5220 &connp1->conn_bound_addr_v6,
5221 &v6src)) {
5222 found_exclbind = B_TRUE;
5223 break;
5224 }
5225 continue;
5226 }
5227
5228 /*
5229 * Check ipversion to allow IPv4 and IPv6 sockets to
5230 * have disjoint port number spaces.
5231 */
5232 if (connp->conn_ipversion != connp1->conn_ipversion) {
5233
5234 /*
5235 * On the first time through the loop, if the
5236 * the user intentionally specified a
5237 * particular port number, then ignore any
5238 * bindings of the other protocol that may
5239 * conflict. This allows the user to bind IPv6
5240 * alone and get both v4 and v6, or bind both
5241 * both and get each seperately. On subsequent
5242 * times through the loop, we're checking a
5243 * port that we chose (not the user) and thus
5244 * we do not allow casual duplicate bindings.
5245 */
5246 if (count == 0 && requested_port != 0)
5247 continue;
5248 }
5249
5250 /*
5251 * No difference depending on SO_REUSEADDR.
5252 *
5253 * If existing port is bound to a
5254 * non-wildcard IP address and
5255 * the requesting stream is bound to
5256 * a distinct different IP addresses
5257 * (non-wildcard, also), keep going.
5258 */
5259 if (!is_inaddr_any &&
5260 !V6_OR_V4_INADDR_ANY(connp1->conn_bound_addr_v6) &&
5261 !IN6_ARE_ADDR_EQUAL(&connp1->conn_laddr_v6,
5262 &v6src)) {
5263 continue;
5264 }
5265 break;
5266 }
5267
5268 if (!found_exclbind &&
5269 (connp->conn_reuseaddr && requested_port != 0)) {
5270 break;
5271 }
5272
5273 if (udp1 == NULL) {
5274 /*
5275 * No other stream has this IP address
5276 * and port number. We can use it.
5277 */
5278 break;
5279 }
5280 mutex_exit(&udpf->uf_lock);
5281 if (bind_to_req_port_only) {
5282 /*
5283 * We get here only when requested port
5284 * is bound (and only first of the for()
5285 * loop iteration).
5286 *
5287 * The semantics of this bind request
5288 * require it to fail so we return from
5289 * the routine (and exit the loop).
5290 *
5291 */
5292 mutex_exit(&connp->conn_lock);
5293 return (-TADDRBUSY);
5294 }
5295
5296 if (connp->conn_anon_priv_bind) {
5297 port = udp_get_next_priv_port(udp);
5298 } else {
5299 if ((count == 0) && (requested_port != 0)) {
5300 /*
5301 * If the application wants us to find
5302 * a port, get one to start with. Set
5303 * requested_port to 0, so that we will
5304 * update us->us_next_port_to_try below.
5305 */
5306 port = udp_update_next_port(udp,
5307 us->us_next_port_to_try, B_TRUE);
5308 requested_port = 0;
5309 } else {
5310 port = udp_update_next_port(udp, port + 1,
5311 B_FALSE);
5312 }
5313 }
5314
5315 if (port == 0 || ++count >= loopmax) {
5316 /*
5317 * We've tried every possible port number and
5318 * there are none available, so send an error
5319 * to the user.
5320 */
5321 mutex_exit(&connp->conn_lock);
5322 return (-TNOADDR);
5323 }
5324 }
5325
5326 /*
5327 * Copy the source address into our udp structure. This address
5328 * may still be zero; if so, ip_attr_connect will fill in the correct
5329 * address when a packet is about to be sent.
5330 * If we are binding to a broadcast or multicast address then
5331 * we just set the conn_bound_addr since we don't want to use
5332 * that as the source address when sending.
5333 */
5334 connp->conn_bound_addr_v6 = v6src;
5335 connp->conn_laddr_v6 = v6src;
5336 if (scopeid != 0) {
5337 connp->conn_ixa->ixa_flags |= IXAF_SCOPEID_SET;
5338 connp->conn_ixa->ixa_scopeid = scopeid;
5339 connp->conn_incoming_ifindex = scopeid;
5340 } else {
5341 connp->conn_ixa->ixa_flags &= ~IXAF_SCOPEID_SET;
5342 connp->conn_incoming_ifindex = connp->conn_bound_if;
5343 }
5344
5345 switch (laddr_type) {
5346 case IPVL_UNICAST_UP:
5347 case IPVL_UNICAST_DOWN:
5348 connp->conn_saddr_v6 = v6src;
5349 connp->conn_mcbc_bind = B_FALSE;
5350 break;
5351 case IPVL_MCAST:
5352 case IPVL_BCAST:
5353 /* ip_set_destination will pick a source address later */
5354 connp->conn_saddr_v6 = ipv6_all_zeros;
5355 connp->conn_mcbc_bind = B_TRUE;
5356 break;
5357 }
5358
5359 /* Any errors after this point should use late_error */
5360 connp->conn_lport = lport;
5361
5362 /*
5363 * Now reset the next anonymous port if the application requested
5364 * an anonymous port, or we handed out the next anonymous port.
5365 */
5366 if ((requested_port == 0) && (!connp->conn_anon_priv_bind)) {
5367 us->us_next_port_to_try = port + 1;
5368 }
5369
5370 /* Initialize the T_BIND_ACK. */
5371 if (connp->conn_family == AF_INET) {
5372 sin->sin_port = connp->conn_lport;
5373 } else {
5374 sin6->sin6_port = connp->conn_lport;
5375 }
5376 udp->udp_state = TS_IDLE;
5377 udp_bind_hash_insert(udpf, udp);
5378 mutex_exit(&udpf->uf_lock);
5379 mutex_exit(&connp->conn_lock);
5380
5381 if (cl_inet_bind) {
5382 /*
5383 * Running in cluster mode - register bind information
5384 */
5385 if (connp->conn_ipversion == IPV4_VERSION) {
5386 (*cl_inet_bind)(connp->conn_netstack->netstack_stackid,
5387 IPPROTO_UDP, AF_INET, (uint8_t *)&v4src,
5388 (in_port_t)connp->conn_lport, NULL);
5389 } else {
5390 (*cl_inet_bind)(connp->conn_netstack->netstack_stackid,
5391 IPPROTO_UDP, AF_INET6, (uint8_t *)&v6src,
5392 (in_port_t)connp->conn_lport, NULL);
5393 }
5394 }
5395
5396 mutex_enter(&connp->conn_lock);
5397 connp->conn_anon_port = (is_system_labeled() && requested_port == 0);
5398 if (is_system_labeled() && (!connp->conn_anon_port ||
5399 connp->conn_anon_mlp)) {
5400 uint16_t mlpport;
5401 zone_t *zone;
5402
5403 zone = crgetzone(cr);
5404 connp->conn_mlp_type =
5405 connp->conn_recv_ancillary.crb_recvucred ? mlptBoth :
5406 mlptSingle;
5407 addrtype = tsol_mlp_addr_type(
5408 connp->conn_allzones ? ALL_ZONES : zone->zone_id,
5409 IPV6_VERSION, &v6src, us->us_netstack->netstack_ip);
5410 if (addrtype == mlptSingle) {
5411 error = -TNOADDR;
5412 mutex_exit(&connp->conn_lock);
5413 goto late_error;
5414 }
5415 mlpport = connp->conn_anon_port ? PMAPPORT : port;
5416 mlptype = tsol_mlp_port_type(zone, IPPROTO_UDP, mlpport,
5417 addrtype);
5418
5419 /*
5420 * It is a coding error to attempt to bind an MLP port
5421 * without first setting SOL_SOCKET/SCM_UCRED.
5422 */
5423 if (mlptype != mlptSingle &&
5424 connp->conn_mlp_type == mlptSingle) {
5425 error = EINVAL;
5426 mutex_exit(&connp->conn_lock);
5427 goto late_error;
5428 }
5429
5430 /*
5431 * It is an access violation to attempt to bind an MLP port
5432 * without NET_BINDMLP privilege.
5433 */
5434 if (mlptype != mlptSingle &&
5435 secpolicy_net_bindmlp(cr) != 0) {
5436 if (connp->conn_debug) {
5437 (void) strlog(UDP_MOD_ID, 0, 1,
5438 SL_ERROR|SL_TRACE,
5439 "udp_bind: no priv for multilevel port %d",
5440 mlpport);
5441 }
5442 error = -TACCES;
5443 mutex_exit(&connp->conn_lock);
5444 goto late_error;
5445 }
5446
5447 /*
5448 * If we're specifically binding a shared IP address and the
5449 * port is MLP on shared addresses, then check to see if this
5450 * zone actually owns the MLP. Reject if not.
5451 */
5452 if (mlptype == mlptShared && addrtype == mlptShared) {
5453 /*
5454 * No need to handle exclusive-stack zones since
5455 * ALL_ZONES only applies to the shared stack.
5456 */
5457 zoneid_t mlpzone;
5458
5459 mlpzone = tsol_mlp_findzone(IPPROTO_UDP,
5460 htons(mlpport));
5461 if (connp->conn_zoneid != mlpzone) {
5462 if (connp->conn_debug) {
5463 (void) strlog(UDP_MOD_ID, 0, 1,
5464 SL_ERROR|SL_TRACE,
5465 "udp_bind: attempt to bind port "
5466 "%d on shared addr in zone %d "
5467 "(should be %d)",
5468 mlpport, connp->conn_zoneid,
5469 mlpzone);
5470 }
5471 error = -TACCES;
5472 mutex_exit(&connp->conn_lock);
5473 goto late_error;
5474 }
5475 }
5476 if (connp->conn_anon_port) {
5477 error = tsol_mlp_anon(zone, mlptype, connp->conn_proto,
5478 port, B_TRUE);
5479 if (error != 0) {
5480 if (connp->conn_debug) {
5481 (void) strlog(UDP_MOD_ID, 0, 1,
5482 SL_ERROR|SL_TRACE,
5483 "udp_bind: cannot establish anon "
5484 "MLP for port %d", port);
5485 }
5486 error = -TACCES;
5487 mutex_exit(&connp->conn_lock);
5488 goto late_error;
5489 }
5490 }
5491 connp->conn_mlp_type = mlptype;
5492 }
5493
5494 /*
5495 * We create an initial header template here to make a subsequent
5496 * sendto have a starting point. Since conn_last_dst is zero the
5497 * first sendto will always follow the 'dst changed' code path.
5498 * Note that we defer massaging options and the related checksum
5499 * adjustment until we have a destination address.
5500 */
5501 error = udp_build_hdr_template(connp, &connp->conn_saddr_v6,
5502 &connp->conn_faddr_v6, connp->conn_fport, connp->conn_flowinfo);
5503 if (error != 0) {
5504 mutex_exit(&connp->conn_lock);
5505 goto late_error;
5506 }
5507 /* Just in case */
5508 connp->conn_faddr_v6 = ipv6_all_zeros;
5509 connp->conn_fport = 0;
5510 connp->conn_v6lastdst = ipv6_all_zeros;
5511 mutex_exit(&connp->conn_lock);
5512
5513 error = ip_laddr_fanout_insert(connp);
5514 if (error != 0)
5515 goto late_error;
5516
5517 /* Bind succeeded */
5518 return (0);
5519
5520 late_error:
5521 /* We had already picked the port number, and then the bind failed */
5522 mutex_enter(&connp->conn_lock);
5523 udpf = &us->us_bind_fanout[
5524 UDP_BIND_HASH(connp->conn_lport,
5525 us->us_bind_fanout_size)];
5526 mutex_enter(&udpf->uf_lock);
5527 connp->conn_saddr_v6 = ipv6_all_zeros;
5528 connp->conn_bound_addr_v6 = ipv6_all_zeros;
5529 connp->conn_laddr_v6 = ipv6_all_zeros;
5530 if (scopeid != 0) {
5531 connp->conn_ixa->ixa_flags &= ~IXAF_SCOPEID_SET;
5532 connp->conn_incoming_ifindex = connp->conn_bound_if;
5533 }
5534 udp->udp_state = TS_UNBND;
5535 udp_bind_hash_remove(udp, B_TRUE);
5536 connp->conn_lport = 0;
5537 mutex_exit(&udpf->uf_lock);
5538 connp->conn_anon_port = B_FALSE;
5539 connp->conn_mlp_type = mlptSingle;
5540
5541 connp->conn_v6lastdst = ipv6_all_zeros;
5542
5543 /* Restore the header that was built above - different source address */
5544 (void) udp_build_hdr_template(connp, &connp->conn_saddr_v6,
5545 &connp->conn_faddr_v6, connp->conn_fport, connp->conn_flowinfo);
5546 mutex_exit(&connp->conn_lock);
5547 return (error);
5548 }
5549
5550 int
5551 udp_bind(sock_lower_handle_t proto_handle, struct sockaddr *sa,
5552 socklen_t len, cred_t *cr)
5553 {
5554 int error;
5555 conn_t *connp;
5556
5557 /* All Solaris components should pass a cred for this operation. */
5558 ASSERT(cr != NULL);
5559
5560 connp = (conn_t *)proto_handle;
5561
5562 if (sa == NULL)
5563 error = udp_do_unbind(connp);
5564 else
5565 error = udp_do_bind(connp, sa, len, cr, B_TRUE);
5566
5567 if (error < 0) {
5568 if (error == -TOUTSTATE)
5569 error = EINVAL;
5570 else
5571 error = proto_tlitosyserr(-error);
5572 }
5573
5574 return (error);
5575 }
5576
5577 static int
5578 udp_implicit_bind(conn_t *connp, cred_t *cr)
5579 {
5580 sin6_t sin6addr;
5581 sin_t *sin;
5582 sin6_t *sin6;
5583 socklen_t len;
5584 int error;
5585
5586 /* All Solaris components should pass a cred for this operation. */
5587 ASSERT(cr != NULL);
5588
5589 if (connp->conn_family == AF_INET) {
5590 len = sizeof (struct sockaddr_in);
5591 sin = (sin_t *)&sin6addr;
5592 *sin = sin_null;
5593 sin->sin_family = AF_INET;
5594 sin->sin_addr.s_addr = INADDR_ANY;
5595 } else {
5596 ASSERT(connp->conn_family == AF_INET6);
5597 len = sizeof (sin6_t);
5598 sin6 = (sin6_t *)&sin6addr;
5599 *sin6 = sin6_null;
5600 sin6->sin6_family = AF_INET6;
5601 V6_SET_ZERO(sin6->sin6_addr);
5602 }
5603
5604 error = udp_do_bind(connp, (struct sockaddr *)&sin6addr, len,
5605 cr, B_FALSE);
5606 return ((error < 0) ? proto_tlitosyserr(-error) : error);
5607 }
5608
5609 /*
5610 * This routine removes a port number association from a stream. It
5611 * is called by udp_unbind and udp_tpi_unbind.
5612 */
5613 static int
5614 udp_do_unbind(conn_t *connp)
5615 {
5616 udp_t *udp = connp->conn_udp;
5617 udp_fanout_t *udpf;
5618 udp_stack_t *us = udp->udp_us;
5619
5620 if (cl_inet_unbind != NULL) {
5621 /*
5622 * Running in cluster mode - register unbind information
5623 */
5624 if (connp->conn_ipversion == IPV4_VERSION) {
5625 (*cl_inet_unbind)(
5626 connp->conn_netstack->netstack_stackid,
5627 IPPROTO_UDP, AF_INET,
5628 (uint8_t *)(&V4_PART_OF_V6(connp->conn_laddr_v6)),
5629 (in_port_t)connp->conn_lport, NULL);
5630 } else {
5631 (*cl_inet_unbind)(
5632 connp->conn_netstack->netstack_stackid,
5633 IPPROTO_UDP, AF_INET6,
5634 (uint8_t *)&(connp->conn_laddr_v6),
5635 (in_port_t)connp->conn_lport, NULL);
5636 }
5637 }
5638
5639 mutex_enter(&connp->conn_lock);
5640 /* If a bind has not been done, we can't unbind. */
5641 if (udp->udp_state == TS_UNBND) {
5642 mutex_exit(&connp->conn_lock);
5643 return (-TOUTSTATE);
5644 }
5645 udpf = &us->us_bind_fanout[UDP_BIND_HASH(connp->conn_lport,
5646 us->us_bind_fanout_size)];
5647 mutex_enter(&udpf->uf_lock);
5648 udp_bind_hash_remove(udp, B_TRUE);
5649 connp->conn_saddr_v6 = ipv6_all_zeros;
5650 connp->conn_bound_addr_v6 = ipv6_all_zeros;
5651 connp->conn_laddr_v6 = ipv6_all_zeros;
5652 connp->conn_mcbc_bind = B_FALSE;
5653 connp->conn_lport = 0;
5654 /* In case we were also connected */
5655 connp->conn_faddr_v6 = ipv6_all_zeros;
5656 connp->conn_fport = 0;
5657 mutex_exit(&udpf->uf_lock);
5658
5659 connp->conn_v6lastdst = ipv6_all_zeros;
5660 udp->udp_state = TS_UNBND;
5661
5662 (void) udp_build_hdr_template(connp, &connp->conn_saddr_v6,
5663 &connp->conn_faddr_v6, connp->conn_fport, connp->conn_flowinfo);
5664 mutex_exit(&connp->conn_lock);
5665
5666 ip_unbind(connp);
5667
5668 return (0);
5669 }
5670
5671 /*
5672 * It associates a default destination address with the stream.
5673 */
5674 static int
5675 udp_do_connect(conn_t *connp, const struct sockaddr *sa, socklen_t len,
5676 cred_t *cr, pid_t pid)
5677 {
5678 sin6_t *sin6;
5679 sin_t *sin;
5680 in6_addr_t v6dst;
5681 ipaddr_t v4dst;
5682 uint16_t dstport;
5683 uint32_t flowinfo;
5684 udp_fanout_t *udpf;
5685 udp_t *udp, *udp1;
5686 ushort_t ipversion;
5687 udp_stack_t *us;
5688 int error;
5689 conn_t *connp1;
5690 ip_xmit_attr_t *ixa;
5691 ip_xmit_attr_t *oldixa;
5692 uint_t scopeid = 0;
5693 uint_t srcid = 0;
5694 in6_addr_t v6src = connp->conn_saddr_v6;
5695 boolean_t v4mapped;
5696
5697 udp = connp->conn_udp;
5698 us = udp->udp_us;
5699
5700 /*
5701 * Address has been verified by the caller
5702 */
5703 switch (len) {
5704 default:
5705 /*
5706 * Should never happen
5707 */
5708 return (EINVAL);
5709
5710 case sizeof (sin_t):
5711 sin = (sin_t *)sa;
5712 v4dst = sin->sin_addr.s_addr;
5713 dstport = sin->sin_port;
5714 IN6_IPADDR_TO_V4MAPPED(v4dst, &v6dst);
5715 ASSERT(connp->conn_ipversion == IPV4_VERSION);
5716 ipversion = IPV4_VERSION;
5717 break;
5718
5719 case sizeof (sin6_t):
5720 sin6 = (sin6_t *)sa;
5721 v6dst = sin6->sin6_addr;
5722 dstport = sin6->sin6_port;
5723 srcid = sin6->__sin6_src_id;
5724 v4mapped = IN6_IS_ADDR_V4MAPPED(&v6dst);
5725 if (srcid != 0 && IN6_IS_ADDR_UNSPECIFIED(&v6src)) {
5726 if (!ip_srcid_find_id(srcid, &v6src, IPCL_ZONEID(connp),
5727 v4mapped, connp->conn_netstack)) {
5728 /* Mismatch v4mapped/v6 specified by srcid. */
5729 return (EADDRNOTAVAIL);
5730 }
5731 }
5732 if (v4mapped) {
5733 if (connp->conn_ipv6_v6only)
5734 return (EADDRNOTAVAIL);
5735
5736 /*
5737 * Destination adress is mapped IPv6 address.
5738 * Source bound address should be unspecified or
5739 * IPv6 mapped address as well.
5740 */
5741 if (!IN6_IS_ADDR_UNSPECIFIED(
5742 &connp->conn_bound_addr_v6) &&
5743 !IN6_IS_ADDR_V4MAPPED(&connp->conn_bound_addr_v6)) {
5744 return (EADDRNOTAVAIL);
5745 }
5746 IN6_V4MAPPED_TO_IPADDR(&v6dst, v4dst);
5747 ipversion = IPV4_VERSION;
5748 flowinfo = 0;
5749 } else {
5750 ipversion = IPV6_VERSION;
5751 flowinfo = sin6->sin6_flowinfo;
5752 if (IN6_IS_ADDR_LINKLOCAL(&sin6->sin6_addr))
5753 scopeid = sin6->sin6_scope_id;
5754 }
5755 break;
5756 }
5757
5758 if (dstport == 0)
5759 return (-TBADADDR);
5760
5761 /*
5762 * If there is a different thread using conn_ixa then we get a new
5763 * copy and cut the old one loose from conn_ixa. Otherwise we use
5764 * conn_ixa and prevent any other thread from using/changing it.
5765 * Once connect() is done other threads can use conn_ixa since the
5766 * refcnt will be back at one.
5767 * We defer updating conn_ixa until later to handle any concurrent
5768 * conn_ixa_cleanup thread.
5769 */
5770 ixa = conn_get_ixa(connp, B_FALSE);
5771 if (ixa == NULL)
5772 return (ENOMEM);
5773
5774 mutex_enter(&connp->conn_lock);
5775 /*
5776 * This udp_t must have bound to a port already before doing a connect.
5777 * Reject if a connect is in progress (we drop conn_lock during
5778 * udp_do_connect).
5779 */
5780 if (udp->udp_state == TS_UNBND || udp->udp_state == TS_WCON_CREQ) {
5781 mutex_exit(&connp->conn_lock);
5782 (void) strlog(UDP_MOD_ID, 0, 1, SL_ERROR|SL_TRACE,
5783 "udp_connect: bad state, %u", udp->udp_state);
5784 ixa_refrele(ixa);
5785 return (-TOUTSTATE);
5786 }
5787 ASSERT(connp->conn_lport != 0 && udp->udp_ptpbhn != NULL);
5788
5789 udpf = &us->us_bind_fanout[UDP_BIND_HASH(connp->conn_lport,
5790 us->us_bind_fanout_size)];
5791
5792 mutex_enter(&udpf->uf_lock);
5793 if (udp->udp_state == TS_DATA_XFER) {
5794 /* Already connected - clear out state */
5795 if (connp->conn_mcbc_bind)
5796 connp->conn_saddr_v6 = ipv6_all_zeros;
5797 else
5798 connp->conn_saddr_v6 = connp->conn_bound_addr_v6;
5799 connp->conn_laddr_v6 = connp->conn_bound_addr_v6;
5800 connp->conn_faddr_v6 = ipv6_all_zeros;
5801 connp->conn_fport = 0;
5802 udp->udp_state = TS_IDLE;
5803 }
5804
5805 connp->conn_fport = dstport;
5806 connp->conn_ipversion = ipversion;
5807 if (ipversion == IPV4_VERSION) {
5808 /*
5809 * Interpret a zero destination to mean loopback.
5810 * Update the T_CONN_REQ (sin/sin6) since it is used to
5811 * generate the T_CONN_CON.
5812 */
5813 if (v4dst == INADDR_ANY) {
5814 v4dst = htonl(INADDR_LOOPBACK);
5815 IN6_IPADDR_TO_V4MAPPED(v4dst, &v6dst);
5816 if (connp->conn_family == AF_INET) {
5817 sin->sin_addr.s_addr = v4dst;
5818 } else {
5819 sin6->sin6_addr = v6dst;
5820 }
5821 }
5822 connp->conn_faddr_v6 = v6dst;
5823 connp->conn_flowinfo = 0;
5824 } else {
5825 ASSERT(connp->conn_ipversion == IPV6_VERSION);
5826 /*
5827 * Interpret a zero destination to mean loopback.
5828 * Update the T_CONN_REQ (sin/sin6) since it is used to
5829 * generate the T_CONN_CON.
5830 */
5831 if (IN6_IS_ADDR_UNSPECIFIED(&v6dst)) {
5832 v6dst = ipv6_loopback;
5833 sin6->sin6_addr = v6dst;
5834 }
5835 connp->conn_faddr_v6 = v6dst;
5836 connp->conn_flowinfo = flowinfo;
5837 }
5838 mutex_exit(&udpf->uf_lock);
5839
5840 /*
5841 * We update our cred/cpid based on the caller of connect
5842 */
5843 if (connp->conn_cred != cr) {
5844 crhold(cr);
5845 crfree(connp->conn_cred);
5846 connp->conn_cred = cr;
5847 }
5848 connp->conn_cpid = pid;
5849 ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED));
5850 ixa->ixa_cred = cr;
5851 ixa->ixa_cpid = pid;
5852 if (is_system_labeled()) {
5853 /* We need to restart with a label based on the cred */
5854 ip_xmit_attr_restore_tsl(ixa, ixa->ixa_cred);
5855 }
5856
5857 if (scopeid != 0) {
5858 ixa->ixa_flags |= IXAF_SCOPEID_SET;
5859 ixa->ixa_scopeid = scopeid;
5860 connp->conn_incoming_ifindex = scopeid;
5861 } else {
5862 ixa->ixa_flags &= ~IXAF_SCOPEID_SET;
5863 connp->conn_incoming_ifindex = connp->conn_bound_if;
5864 }
5865 /*
5866 * conn_connect will drop conn_lock and reacquire it.
5867 * To prevent a send* from messing with this udp_t while the lock
5868 * is dropped we set udp_state and clear conn_v6lastdst.
5869 * That will make all send* fail with EISCONN.
5870 */
5871 connp->conn_v6lastdst = ipv6_all_zeros;
5872 udp->udp_state = TS_WCON_CREQ;
5873
5874 error = conn_connect(connp, NULL, IPDF_ALLOW_MCBC);
5875 mutex_exit(&connp->conn_lock);
5876 if (error != 0)
5877 goto connect_failed;
5878
5879 /*
5880 * The addresses have been verified. Time to insert in
5881 * the correct fanout list.
5882 */
5883 error = ipcl_conn_insert(connp);
5884 if (error != 0)
5885 goto connect_failed;
5886
5887 mutex_enter(&connp->conn_lock);
5888 error = udp_build_hdr_template(connp, &connp->conn_saddr_v6,
5889 &connp->conn_faddr_v6, connp->conn_fport, connp->conn_flowinfo);
5890 if (error != 0) {
5891 mutex_exit(&connp->conn_lock);
5892 goto connect_failed;
5893 }
5894
5895 udp->udp_state = TS_DATA_XFER;
5896 /* Record this as the "last" send even though we haven't sent any */
5897 connp->conn_v6lastdst = connp->conn_faddr_v6;
5898 connp->conn_lastipversion = connp->conn_ipversion;
5899 connp->conn_lastdstport = connp->conn_fport;
5900 connp->conn_lastflowinfo = connp->conn_flowinfo;
5901 connp->conn_lastscopeid = scopeid;
5902 connp->conn_lastsrcid = srcid;
5903 /* Also remember a source to use together with lastdst */
5904 connp->conn_v6lastsrc = v6src;
5905
5906 oldixa = conn_replace_ixa(connp, ixa);
5907 mutex_exit(&connp->conn_lock);
5908 ixa_refrele(oldixa);
5909
5910 /*
5911 * We've picked a source address above. Now we can
5912 * verify that the src/port/dst/port is unique for all
5913 * connections in TS_DATA_XFER, skipping ourselves.
5914 */
5915 mutex_enter(&udpf->uf_lock);
5916 for (udp1 = udpf->uf_udp; udp1 != NULL; udp1 = udp1->udp_bind_hash) {
5917 if (udp1->udp_state != TS_DATA_XFER)
5918 continue;
5919
5920 if (udp1 == udp)
5921 continue;
5922
5923 connp1 = udp1->udp_connp;
5924 if (connp->conn_lport != connp1->conn_lport ||
5925 connp->conn_ipversion != connp1->conn_ipversion ||
5926 dstport != connp1->conn_fport ||
5927 !IN6_ARE_ADDR_EQUAL(&connp->conn_laddr_v6,
5928 &connp1->conn_laddr_v6) ||
5929 !IN6_ARE_ADDR_EQUAL(&v6dst, &connp1->conn_faddr_v6) ||
5930 !(IPCL_ZONE_MATCH(connp, connp1->conn_zoneid) ||
5931 IPCL_ZONE_MATCH(connp1, connp->conn_zoneid)))
5932 continue;
5933 mutex_exit(&udpf->uf_lock);
5934 error = -TBADADDR;
5935 goto connect_failed;
5936 }
5937 if (cl_inet_connect2 != NULL) {
5938 CL_INET_UDP_CONNECT(connp, B_TRUE, &v6dst, dstport, error);
5939 if (error != 0) {
5940 mutex_exit(&udpf->uf_lock);
5941 error = -TBADADDR;
5942 goto connect_failed;
5943 }
5944 }
5945 mutex_exit(&udpf->uf_lock);
5946
5947 ixa_refrele(ixa);
5948 return (0);
5949
5950 connect_failed:
5951 if (ixa != NULL)
5952 ixa_refrele(ixa);
5953 mutex_enter(&connp->conn_lock);
5954 mutex_enter(&udpf->uf_lock);
5955 udp->udp_state = TS_IDLE;
5956 connp->conn_faddr_v6 = ipv6_all_zeros;
5957 connp->conn_fport = 0;
5958 /* In case the source address was set above */
5959 if (connp->conn_mcbc_bind)
5960 connp->conn_saddr_v6 = ipv6_all_zeros;
5961 else
5962 connp->conn_saddr_v6 = connp->conn_bound_addr_v6;
5963 connp->conn_laddr_v6 = connp->conn_bound_addr_v6;
5964 mutex_exit(&udpf->uf_lock);
5965
5966 connp->conn_v6lastdst = ipv6_all_zeros;
5967 connp->conn_flowinfo = 0;
5968
5969 (void) udp_build_hdr_template(connp, &connp->conn_saddr_v6,
5970 &connp->conn_faddr_v6, connp->conn_fport, connp->conn_flowinfo);
5971 mutex_exit(&connp->conn_lock);
5972 return (error);
5973 }
5974
5975 static int
5976 udp_connect(sock_lower_handle_t proto_handle, const struct sockaddr *sa,
5977 socklen_t len, sock_connid_t *id, cred_t *cr)
5978 {
5979 conn_t *connp = (conn_t *)proto_handle;
5980 udp_t *udp = connp->conn_udp;
5981 int error;
5982 boolean_t did_bind = B_FALSE;
5983 pid_t pid = curproc->p_pid;
5984
5985 /* All Solaris components should pass a cred for this operation. */
5986 ASSERT(cr != NULL);
5987
5988 if (sa == NULL) {
5989 /*
5990 * Disconnect
5991 * Make sure we are connected
5992 */
5993 if (udp->udp_state != TS_DATA_XFER)
5994 return (EINVAL);
5995
5996 error = udp_disconnect(connp);
5997 return (error);
5998 }
5999
6000 error = proto_verify_ip_addr(connp->conn_family, sa, len);
6001 if (error != 0)
6002 goto done;
6003
6004 /* do an implicit bind if necessary */
6005 if (udp->udp_state == TS_UNBND) {
6006 error = udp_implicit_bind(connp, cr);
6007 /*
6008 * We could be racing with an actual bind, in which case
6009 * we would see EPROTO. We cross our fingers and try
6010 * to connect.
6011 */
6012 if (!(error == 0 || error == EPROTO))
6013 goto done;
6014 did_bind = B_TRUE;
6015 }
6016 /*
6017 * set SO_DGRAM_ERRIND
6018 */
6019 connp->conn_dgram_errind = B_TRUE;
6020
6021 error = udp_do_connect(connp, sa, len, cr, pid);
6022
6023 if (error != 0 && did_bind) {
6024 int unbind_err;
6025
6026 unbind_err = udp_do_unbind(connp);
6027 ASSERT(unbind_err == 0);
6028 }
6029
6030 if (error == 0) {
6031 *id = 0;
6032 (*connp->conn_upcalls->su_connected)
6033 (connp->conn_upper_handle, 0, NULL, -1);
6034 } else if (error < 0) {
6035 error = proto_tlitosyserr(-error);
6036 }
6037
6038 done:
6039 if (error != 0 && udp->udp_state == TS_DATA_XFER) {
6040 /*
6041 * No need to hold locks to set state
6042 * after connect failure socket state is undefined
6043 * We set the state only to imitate old sockfs behavior
6044 */
6045 udp->udp_state = TS_IDLE;
6046 }
6047 return (error);
6048 }
6049
6050 int
6051 udp_send(sock_lower_handle_t proto_handle, mblk_t *mp, struct nmsghdr *msg,
6052 cred_t *cr)
6053 {
6054 sin6_t *sin6;
6055 sin_t *sin = NULL;
6056 uint_t srcid;
6057 conn_t *connp = (conn_t *)proto_handle;
6058 udp_t *udp = connp->conn_udp;
6059 int error = 0;
6060 udp_stack_t *us = udp->udp_us;
6061 ushort_t ipversion;
6062 pid_t pid = curproc->p_pid;
6063 ip_xmit_attr_t *ixa;
6064
6065 ASSERT(DB_TYPE(mp) == M_DATA);
6066
6067 /* All Solaris components should pass a cred for this operation. */
6068 ASSERT(cr != NULL);
6069
6070 /* do an implicit bind if necessary */
6071 if (udp->udp_state == TS_UNBND) {
6072 error = udp_implicit_bind(connp, cr);
6073 /*
6074 * We could be racing with an actual bind, in which case
6075 * we would see EPROTO. We cross our fingers and try
6076 * to connect.
6077 */
6078 if (!(error == 0 || error == EPROTO)) {
6079 freemsg(mp);
6080 return (error);
6081 }
6082 }
6083
6084 /* Connected? */
6085 if (msg->msg_name == NULL) {
6086 if (udp->udp_state != TS_DATA_XFER) {
6087 UDPS_BUMP_MIB(us, udpOutErrors);
6088 return (EDESTADDRREQ);
6089 }
6090 if (msg->msg_controllen != 0) {
6091 error = udp_output_ancillary(connp, NULL, NULL, mp,
6092 NULL, msg, cr, pid);
6093 } else {
6094 error = udp_output_connected(connp, mp, cr, pid);
6095 }
6096 if (us->us_sendto_ignerr)
6097 return (0);
6098 else
6099 return (error);
6100 }
6101
6102 /*
6103 * Check if we're allowed to send to a connection on which we've
6104 * already called 'connect'. The posix spec. allows both behaviors but
6105 * historically we've returned an error if already connected. The
6106 * client can allow this via a sockopt.
6107 */
6108 if (udp->udp_state == TS_DATA_XFER && !udp->udp_snd_to_conn) {
6109 UDPS_BUMP_MIB(us, udpOutErrors);
6110 return (EISCONN);
6111 }
6112
6113 error = proto_verify_ip_addr(connp->conn_family,
6114 (struct sockaddr *)msg->msg_name, msg->msg_namelen);
6115 if (error != 0) {
6116 UDPS_BUMP_MIB(us, udpOutErrors);
6117 return (error);
6118 }
6119 switch (connp->conn_family) {
6120 case AF_INET6:
6121 sin6 = (sin6_t *)msg->msg_name;
6122
6123 srcid = sin6->__sin6_src_id;
6124
6125 if (!IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
6126 /*
6127 * Destination is a non-IPv4-compatible IPv6 address.
6128 * Send out an IPv6 format packet.
6129 */
6130
6131 /*
6132 * If the local address is a mapped address return
6133 * an error.
6134 * It would be possible to send an IPv6 packet but the
6135 * response would never make it back to the application
6136 * since it is bound to a mapped address.
6137 */
6138 if (IN6_IS_ADDR_V4MAPPED(&connp->conn_saddr_v6)) {
6139 UDPS_BUMP_MIB(us, udpOutErrors);
6140 return (EADDRNOTAVAIL);
6141 }
6142 if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr))
6143 sin6->sin6_addr = ipv6_loopback;
6144 ipversion = IPV6_VERSION;
6145 } else {
6146 if (connp->conn_ipv6_v6only) {
6147 UDPS_BUMP_MIB(us, udpOutErrors);
6148 return (EADDRNOTAVAIL);
6149 }
6150
6151 /*
6152 * If the local address is not zero or a mapped address
6153 * return an error. It would be possible to send an
6154 * IPv4 packet but the response would never make it
6155 * back to the application since it is bound to a
6156 * non-mapped address.
6157 */
6158 if (!IN6_IS_ADDR_V4MAPPED(&connp->conn_saddr_v6) &&
6159 !IN6_IS_ADDR_UNSPECIFIED(&connp->conn_saddr_v6)) {
6160 UDPS_BUMP_MIB(us, udpOutErrors);
6161 return (EADDRNOTAVAIL);
6162 }
6163
6164 if (V4_PART_OF_V6(sin6->sin6_addr) == INADDR_ANY) {
6165 V4_PART_OF_V6(sin6->sin6_addr) =
6166 htonl(INADDR_LOOPBACK);
6167 }
6168 ipversion = IPV4_VERSION;
6169 }
6170
6171 /*
6172 * We have to allocate an ip_xmit_attr_t before we grab
6173 * conn_lock and we need to hold conn_lock once we've check
6174 * conn_same_as_last_v6 to handle concurrent send* calls on a
6175 * socket.
6176 */
6177 if (msg->msg_controllen == 0) {
6178 ixa = conn_get_ixa(connp, B_FALSE);
6179 if (ixa == NULL) {
6180 UDPS_BUMP_MIB(us, udpOutErrors);
6181 return (ENOMEM);
6182 }
6183 } else {
6184 ixa = NULL;
6185 }
6186 mutex_enter(&connp->conn_lock);
6187 if (udp->udp_delayed_error != 0) {
6188 sin6_t *sin2 = (sin6_t *)&udp->udp_delayed_addr;
6189
6190 error = udp->udp_delayed_error;
6191 udp->udp_delayed_error = 0;
6192
6193 /* Compare IP address, port, and family */
6194
6195 if (sin6->sin6_port == sin2->sin6_port &&
6196 IN6_ARE_ADDR_EQUAL(&sin6->sin6_addr,
6197 &sin2->sin6_addr) &&
6198 sin6->sin6_family == sin2->sin6_family) {
6199 mutex_exit(&connp->conn_lock);
6200 UDPS_BUMP_MIB(us, udpOutErrors);
6201 if (ixa != NULL)
6202 ixa_refrele(ixa);
6203 return (error);
6204 }
6205 }
6206
6207 if (msg->msg_controllen != 0) {
6208 mutex_exit(&connp->conn_lock);
6209 ASSERT(ixa == NULL);
6210 error = udp_output_ancillary(connp, NULL, sin6, mp,
6211 NULL, msg, cr, pid);
6212 } else if (conn_same_as_last_v6(connp, sin6) &&
6213 connp->conn_lastsrcid == srcid &&
6214 ipsec_outbound_policy_current(ixa)) {
6215 /* udp_output_lastdst drops conn_lock */
6216 error = udp_output_lastdst(connp, mp, cr, pid, ixa);
6217 } else {
6218 /* udp_output_newdst drops conn_lock */
6219 error = udp_output_newdst(connp, mp, NULL, sin6,
6220 ipversion, cr, pid, ixa);
6221 }
6222 ASSERT(MUTEX_NOT_HELD(&connp->conn_lock));
6223 if (us->us_sendto_ignerr)
6224 return (0);
6225 else
6226 return (error);
6227 case AF_INET:
6228 sin = (sin_t *)msg->msg_name;
6229
6230 ipversion = IPV4_VERSION;
6231
6232 if (sin->sin_addr.s_addr == INADDR_ANY)
6233 sin->sin_addr.s_addr = htonl(INADDR_LOOPBACK);
6234
6235 /*
6236 * We have to allocate an ip_xmit_attr_t before we grab
6237 * conn_lock and we need to hold conn_lock once we've check
6238 * conn_same_as_last_v6 to handle concurrent send* on a socket.
6239 */
6240 if (msg->msg_controllen == 0) {
6241 ixa = conn_get_ixa(connp, B_FALSE);
6242 if (ixa == NULL) {
6243 UDPS_BUMP_MIB(us, udpOutErrors);
6244 return (ENOMEM);
6245 }
6246 } else {
6247 ixa = NULL;
6248 }
6249 mutex_enter(&connp->conn_lock);
6250 if (udp->udp_delayed_error != 0) {
6251 sin_t *sin2 = (sin_t *)&udp->udp_delayed_addr;
6252
6253 error = udp->udp_delayed_error;
6254 udp->udp_delayed_error = 0;
6255
6256 /* Compare IP address and port */
6257
6258 if (sin->sin_port == sin2->sin_port &&
6259 sin->sin_addr.s_addr == sin2->sin_addr.s_addr) {
6260 mutex_exit(&connp->conn_lock);
6261 UDPS_BUMP_MIB(us, udpOutErrors);
6262 if (ixa != NULL)
6263 ixa_refrele(ixa);
6264 return (error);
6265 }
6266 }
6267 if (msg->msg_controllen != 0) {
6268 mutex_exit(&connp->conn_lock);
6269 ASSERT(ixa == NULL);
6270 error = udp_output_ancillary(connp, sin, NULL, mp,
6271 NULL, msg, cr, pid);
6272 } else if (conn_same_as_last_v4(connp, sin) &&
6273 ipsec_outbound_policy_current(ixa)) {
6274 /* udp_output_lastdst drops conn_lock */
6275 error = udp_output_lastdst(connp, mp, cr, pid, ixa);
6276 } else {
6277 /* udp_output_newdst drops conn_lock */
6278 error = udp_output_newdst(connp, mp, sin, NULL,
6279 ipversion, cr, pid, ixa);
6280 }
6281 ASSERT(MUTEX_NOT_HELD(&connp->conn_lock));
6282 if (us->us_sendto_ignerr)
6283 return (0);
6284 else
6285 return (error);
6286 default:
6287 return (EINVAL);
6288 }
6289 }
6290
6291 int
6292 udp_fallback(sock_lower_handle_t proto_handle, queue_t *q,
6293 boolean_t issocket, so_proto_quiesced_cb_t quiesced_cb,
6294 sock_quiesce_arg_t *arg)
6295 {
6296 conn_t *connp = (conn_t *)proto_handle;
6297 udp_t *udp;
6298 struct T_capability_ack tca;
6299 struct sockaddr_in6 laddr, faddr;
6300 socklen_t laddrlen, faddrlen;
6301 short opts;
6302 struct stroptions *stropt;
6303 mblk_t *mp, *stropt_mp;
6304 int error;
6305
6306 udp = connp->conn_udp;
6307
6308 stropt_mp = allocb_wait(sizeof (*stropt), BPRI_HI, STR_NOSIG, NULL);
6309
6310 /*
6311 * setup the fallback stream that was allocated
6312 */
6313 connp->conn_dev = (dev_t)RD(q)->q_ptr;
6314 connp->conn_minor_arena = WR(q)->q_ptr;
6315
6316 RD(q)->q_ptr = WR(q)->q_ptr = connp;
6317
6318 WR(q)->q_qinfo = &udp_winit;
6319
6320 connp->conn_rq = RD(q);
6321 connp->conn_wq = WR(q);
6322
6323 /* Notify stream head about options before sending up data */
6324 stropt_mp->b_datap->db_type = M_SETOPTS;
6325 stropt_mp->b_wptr += sizeof (*stropt);
6326 stropt = (struct stroptions *)stropt_mp->b_rptr;
6327 stropt->so_flags = SO_WROFF | SO_HIWAT;
6328 stropt->so_wroff = connp->conn_wroff;
6329 stropt->so_hiwat = udp->udp_rcv_disply_hiwat;
6330 putnext(RD(q), stropt_mp);
6331
6332 /*
6333 * Free the helper stream
6334 */
6335 ip_free_helper_stream(connp);
6336
6337 if (!issocket)
6338 udp_use_pure_tpi(udp);
6339
6340 /*
6341 * Collect the information needed to sync with the sonode
6342 */
6343 udp_do_capability_ack(udp, &tca, TC1_INFO);
6344
6345 laddrlen = faddrlen = sizeof (sin6_t);
6346 (void) udp_getsockname((sock_lower_handle_t)connp,
6347 (struct sockaddr *)&laddr, &laddrlen, CRED());
6348 error = udp_getpeername((sock_lower_handle_t)connp,
6349 (struct sockaddr *)&faddr, &faddrlen, CRED());
6350 if (error != 0)
6351 faddrlen = 0;
6352
6353 opts = 0;
6354 if (connp->conn_dgram_errind)
6355 opts |= SO_DGRAM_ERRIND;
6356 if (connp->conn_ixa->ixa_flags & IXAF_DONTROUTE)
6357 opts |= SO_DONTROUTE;
6358
6359 mp = (*quiesced_cb)(connp->conn_upper_handle, arg, &tca,
6360 (struct sockaddr *)&laddr, laddrlen,
6361 (struct sockaddr *)&faddr, faddrlen, opts);
6362
6363 mutex_enter(&udp->udp_recv_lock);
6364 /*
6365 * Attempts to send data up during fallback will result in it being
6366 * queued in udp_t. First push up the datagrams obtained from the
6367 * socket, then any packets queued in udp_t.
6368 */
6369 if (mp != NULL) {
6370 mp->b_next = udp->udp_fallback_queue_head;
6371 udp->udp_fallback_queue_head = mp;
6372 }
6373 while (udp->udp_fallback_queue_head != NULL) {
6374 mp = udp->udp_fallback_queue_head;
6375 udp->udp_fallback_queue_head = mp->b_next;
6376 mutex_exit(&udp->udp_recv_lock);
6377 mp->b_next = NULL;
6378 putnext(RD(q), mp);
6379 mutex_enter(&udp->udp_recv_lock);
6380 }
6381 udp->udp_fallback_queue_tail = udp->udp_fallback_queue_head;
6382 /*
6383 * No longer a streams less socket
6384 */
6385 mutex_enter(&connp->conn_lock);
6386 connp->conn_flags &= ~IPCL_NONSTR;
6387 mutex_exit(&connp->conn_lock);
6388
6389 mutex_exit(&udp->udp_recv_lock);
6390
6391 ASSERT(connp->conn_ref >= 1);
6392
6393 return (0);
6394 }
6395
6396 /* ARGSUSED3 */
6397 int
6398 udp_getpeername(sock_lower_handle_t proto_handle, struct sockaddr *sa,
6399 socklen_t *salenp, cred_t *cr)
6400 {
6401 conn_t *connp = (conn_t *)proto_handle;
6402 udp_t *udp = connp->conn_udp;
6403 int error;
6404
6405 /* All Solaris components should pass a cred for this operation. */
6406 ASSERT(cr != NULL);
6407
6408 mutex_enter(&connp->conn_lock);
6409 if (udp->udp_state != TS_DATA_XFER)
6410 error = ENOTCONN;
6411 else
6412 error = conn_getpeername(connp, sa, salenp);
6413 mutex_exit(&connp->conn_lock);
6414 return (error);
6415 }
6416
6417 /* ARGSUSED3 */
6418 int
6419 udp_getsockname(sock_lower_handle_t proto_handle, struct sockaddr *sa,
6420 socklen_t *salenp, cred_t *cr)
6421 {
6422 conn_t *connp = (conn_t *)proto_handle;
6423 int error;
6424
6425 /* All Solaris components should pass a cred for this operation. */
6426 ASSERT(cr != NULL);
6427
6428 mutex_enter(&connp->conn_lock);
6429 error = conn_getsockname(connp, sa, salenp);
6430 mutex_exit(&connp->conn_lock);
6431 return (error);
6432 }
6433
6434 int
6435 udp_getsockopt(sock_lower_handle_t proto_handle, int level, int option_name,
6436 void *optvalp, socklen_t *optlen, cred_t *cr)
6437 {
6438 conn_t *connp = (conn_t *)proto_handle;
6439 int error;
6440 t_uscalar_t max_optbuf_len;
6441 void *optvalp_buf;
6442 int len;
6443
6444 /* All Solaris components should pass a cred for this operation. */
6445 ASSERT(cr != NULL);
6446
6447 error = proto_opt_check(level, option_name, *optlen, &max_optbuf_len,
6448 udp_opt_obj.odb_opt_des_arr,
6449 udp_opt_obj.odb_opt_arr_cnt,
6450 B_FALSE, B_TRUE, cr);
6451 if (error != 0) {
6452 if (error < 0)
6453 error = proto_tlitosyserr(-error);
6454 return (error);
6455 }
6456
6457 optvalp_buf = kmem_alloc(max_optbuf_len, KM_SLEEP);
6458 len = udp_opt_get(connp, level, option_name, optvalp_buf);
6459 if (len == -1) {
6460 kmem_free(optvalp_buf, max_optbuf_len);
6461 return (EINVAL);
6462 }
6463
6464 /*
6465 * update optlen and copy option value
6466 */
6467 t_uscalar_t size = MIN(len, *optlen);
6468
6469 bcopy(optvalp_buf, optvalp, size);
6470 bcopy(&size, optlen, sizeof (size));
6471
6472 kmem_free(optvalp_buf, max_optbuf_len);
6473 return (0);
6474 }
6475
6476 int
6477 udp_setsockopt(sock_lower_handle_t proto_handle, int level, int option_name,
6478 const void *optvalp, socklen_t optlen, cred_t *cr)
6479 {
6480 conn_t *connp = (conn_t *)proto_handle;
6481 int error;
6482
6483 /* All Solaris components should pass a cred for this operation. */
6484 ASSERT(cr != NULL);
6485
6486 error = proto_opt_check(level, option_name, optlen, NULL,
6487 udp_opt_obj.odb_opt_des_arr,
6488 udp_opt_obj.odb_opt_arr_cnt,
6489 B_TRUE, B_FALSE, cr);
6490
6491 if (error != 0) {
6492 if (error < 0)
6493 error = proto_tlitosyserr(-error);
6494 return (error);
6495 }
6496
6497 error = udp_opt_set(connp, SETFN_OPTCOM_NEGOTIATE, level, option_name,
6498 optlen, (uchar_t *)optvalp, (uint_t *)&optlen, (uchar_t *)optvalp,
6499 NULL, cr);
6500
6501 ASSERT(error >= 0);
6502
6503 return (error);
6504 }
6505
6506 void
6507 udp_clr_flowctrl(sock_lower_handle_t proto_handle)
6508 {
6509 conn_t *connp = (conn_t *)proto_handle;
6510 udp_t *udp = connp->conn_udp;
6511
6512 mutex_enter(&udp->udp_recv_lock);
6513 connp->conn_flow_cntrld = B_FALSE;
6514 mutex_exit(&udp->udp_recv_lock);
6515 }
6516
6517 /* ARGSUSED2 */
6518 int
6519 udp_shutdown(sock_lower_handle_t proto_handle, int how, cred_t *cr)
6520 {
6521 conn_t *connp = (conn_t *)proto_handle;
6522
6523 /* All Solaris components should pass a cred for this operation. */
6524 ASSERT(cr != NULL);
6525
6526 /* shut down the send side */
6527 if (how != SHUT_RD)
6528 (*connp->conn_upcalls->su_opctl)(connp->conn_upper_handle,
6529 SOCK_OPCTL_SHUT_SEND, 0);
6530 /* shut down the recv side */
6531 if (how != SHUT_WR)
6532 (*connp->conn_upcalls->su_opctl)(connp->conn_upper_handle,
6533 SOCK_OPCTL_SHUT_RECV, 0);
6534 return (0);
6535 }
6536
6537 int
6538 udp_ioctl(sock_lower_handle_t proto_handle, int cmd, intptr_t arg,
6539 int mode, int32_t *rvalp, cred_t *cr)
6540 {
6541 conn_t *connp = (conn_t *)proto_handle;
6542 int error;
6543
6544 /* All Solaris components should pass a cred for this operation. */
6545 ASSERT(cr != NULL);
6546
6547 /*
6548 * If we don't have a helper stream then create one.
6549 * ip_create_helper_stream takes care of locking the conn_t,
6550 * so this check for NULL is just a performance optimization.
6551 */
6552 if (connp->conn_helper_info == NULL) {
6553 udp_stack_t *us = connp->conn_udp->udp_us;
6554
6555 ASSERT(us->us_ldi_ident != NULL);
6556
6557 /*
6558 * Create a helper stream for non-STREAMS socket.
6559 */
6560 error = ip_create_helper_stream(connp, us->us_ldi_ident);
6561 if (error != 0) {
6562 ip0dbg(("tcp_ioctl: create of IP helper stream "
6563 "failed %d\n", error));
6564 return (error);
6565 }
6566 }
6567
6568 switch (cmd) {
6569 case _SIOCSOCKFALLBACK:
6570 case TI_GETPEERNAME:
6571 case TI_GETMYNAME:
6572 ip1dbg(("udp_ioctl: cmd 0x%x on non streams socket",
6573 cmd));
6574 error = EINVAL;
6575 break;
6576 default:
6577 /*
6578 * Pass on to IP using helper stream
6579 */
6580 error = ldi_ioctl(connp->conn_helper_info->iphs_handle,
6581 cmd, arg, mode, cr, rvalp);
6582 break;
6583 }
6584 return (error);
6585 }
6586
6587 /* ARGSUSED */
6588 int
6589 udp_accept(sock_lower_handle_t lproto_handle,
6590 sock_lower_handle_t eproto_handle, sock_upper_handle_t sock_handle,
6591 cred_t *cr)
6592 {
6593 return (EOPNOTSUPP);
6594 }
6595
6596 /* ARGSUSED */
6597 int
6598 udp_listen(sock_lower_handle_t proto_handle, int backlog, cred_t *cr)
6599 {
6600 return (EOPNOTSUPP);
6601 }
6602
6603 sock_downcalls_t sock_udp_downcalls = {
6604 udp_activate, /* sd_activate */
6605 udp_accept, /* sd_accept */
6606 udp_bind, /* sd_bind */
6607 udp_listen, /* sd_listen */
6608 udp_connect, /* sd_connect */
6609 udp_getpeername, /* sd_getpeername */
6610 udp_getsockname, /* sd_getsockname */
6611 udp_getsockopt, /* sd_getsockopt */
6612 udp_setsockopt, /* sd_setsockopt */
6613 udp_send, /* sd_send */
6614 NULL, /* sd_send_uio */
6615 NULL, /* sd_recv_uio */
6616 NULL, /* sd_poll */
6617 udp_shutdown, /* sd_shutdown */
6618 udp_clr_flowctrl, /* sd_setflowctrl */
6619 udp_ioctl, /* sd_ioctl */
6620 udp_close /* sd_close */
6621 };