1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright (c) 1991, 2010, Oracle and/or its affiliates. All rights reserved.
23 * Copyright 2013 Nexenta Systems, Inc. All rights reserved.
24 */
25 /* Copyright (c) 1990 Mentat Inc. */
26
27 #include <sys/types.h>
28 #include <sys/stream.h>
29 #include <sys/stropts.h>
30 #include <sys/strlog.h>
31 #include <sys/strsun.h>
32 #define _SUN_TPI_VERSION 2
33 #include <sys/tihdr.h>
34 #include <sys/timod.h>
35 #include <sys/ddi.h>
36 #include <sys/sunddi.h>
37 #include <sys/strsubr.h>
38 #include <sys/suntpi.h>
39 #include <sys/xti_inet.h>
40 #include <sys/kmem.h>
41 #include <sys/cred_impl.h>
42 #include <sys/policy.h>
43 #include <sys/priv.h>
44 #include <sys/ucred.h>
45 #include <sys/zone.h>
46
47 #include <sys/socket.h>
48 #include <sys/socketvar.h>
49 #include <sys/sockio.h>
50 #include <sys/vtrace.h>
51 #include <sys/sdt.h>
52 #include <sys/debug.h>
53 #include <sys/isa_defs.h>
54 #include <sys/random.h>
55 #include <netinet/in.h>
56 #include <netinet/ip6.h>
57 #include <netinet/icmp6.h>
58 #include <netinet/udp.h>
59
60 #include <inet/common.h>
61 #include <inet/ip.h>
62 #include <inet/ip_impl.h>
63 #include <inet/ipsec_impl.h>
64 #include <inet/ip6.h>
65 #include <inet/ip_ire.h>
66 #include <inet/ip_if.h>
67 #include <inet/ip_multi.h>
68 #include <inet/ip_ndp.h>
69 #include <inet/proto_set.h>
70 #include <inet/mib2.h>
71 #include <inet/optcom.h>
72 #include <inet/snmpcom.h>
73 #include <inet/kstatcom.h>
74 #include <inet/ipclassifier.h>
75 #include <sys/squeue_impl.h>
76 #include <inet/ipnet.h>
77 #include <sys/ethernet.h>
78
79 #include <sys/tsol/label.h>
80 #include <sys/tsol/tnet.h>
81 #include <rpc/pmap_prot.h>
82
83 #include <inet/udp_impl.h>
84
85 /*
86 * Synchronization notes:
87 *
88 * UDP is MT and uses the usual kernel synchronization primitives. There are 2
89 * locks, the fanout lock (uf_lock) and conn_lock. conn_lock
90 * protects the contents of the udp_t. uf_lock protects the address and the
91 * fanout information.
92 * The lock order is conn_lock -> uf_lock.
93 *
94 * The fanout lock uf_lock:
95 * When a UDP endpoint is bound to a local port, it is inserted into
96 * a bind hash list. The list consists of an array of udp_fanout_t buckets.
97 * The size of the array is controlled by the udp_bind_fanout_size variable.
98 * This variable can be changed in /etc/system if the default value is
99 * not large enough. Each bind hash bucket is protected by a per bucket
100 * lock. It protects the udp_bind_hash and udp_ptpbhn fields in the udp_t
101 * structure and a few other fields in the udp_t. A UDP endpoint is removed
102 * from the bind hash list only when it is being unbound or being closed.
103 * The per bucket lock also protects a UDP endpoint's state changes.
104 *
105 * Plumbing notes:
106 * UDP is always a device driver. For compatibility with mibopen() code
107 * it is possible to I_PUSH "udp", but that results in pushing a passthrough
108 * dummy module.
109 *
110 * The above implies that we don't support any intermediate module to
111 * reside in between /dev/ip and udp -- in fact, we never supported such
112 * scenario in the past as the inter-layer communication semantics have
113 * always been private.
114 */
115
116 /* For /etc/system control */
117 uint_t udp_bind_fanout_size = UDP_BIND_FANOUT_SIZE;
118
119 static void udp_addr_req(queue_t *q, mblk_t *mp);
120 static void udp_tpi_bind(queue_t *q, mblk_t *mp);
121 static void udp_bind_hash_insert(udp_fanout_t *uf, udp_t *udp);
122 static void udp_bind_hash_remove(udp_t *udp, boolean_t caller_holds_lock);
123 static int udp_build_hdr_template(conn_t *, const in6_addr_t *,
124 const in6_addr_t *, in_port_t, uint32_t);
125 static void udp_capability_req(queue_t *q, mblk_t *mp);
126 static int udp_tpi_close(queue_t *q, int flags);
127 static void udp_close_free(conn_t *);
128 static void udp_tpi_connect(queue_t *q, mblk_t *mp);
129 static void udp_tpi_disconnect(queue_t *q, mblk_t *mp);
130 static void udp_err_ack(queue_t *q, mblk_t *mp, t_scalar_t t_error,
131 int sys_error);
132 static void udp_err_ack_prim(queue_t *q, mblk_t *mp, t_scalar_t primitive,
133 t_scalar_t tlierr, int sys_error);
134 static int udp_extra_priv_ports_get(queue_t *q, mblk_t *mp, caddr_t cp,
135 cred_t *cr);
136 static int udp_extra_priv_ports_add(queue_t *q, mblk_t *mp,
137 char *value, caddr_t cp, cred_t *cr);
138 static int udp_extra_priv_ports_del(queue_t *q, mblk_t *mp,
139 char *value, caddr_t cp, cred_t *cr);
140 static void udp_icmp_input(void *, mblk_t *, void *, ip_recv_attr_t *);
141 static void udp_icmp_error_ipv6(conn_t *connp, mblk_t *mp,
142 ip_recv_attr_t *ira);
143 static void udp_info_req(queue_t *q, mblk_t *mp);
144 static void udp_input(void *, mblk_t *, void *, ip_recv_attr_t *);
145 static void udp_lrput(queue_t *, mblk_t *);
146 static void udp_lwput(queue_t *, mblk_t *);
147 static int udp_open(queue_t *q, dev_t *devp, int flag, int sflag,
148 cred_t *credp, boolean_t isv6);
149 static int udp_openv4(queue_t *q, dev_t *devp, int flag, int sflag,
150 cred_t *credp);
151 static int udp_openv6(queue_t *q, dev_t *devp, int flag, int sflag,
152 cred_t *credp);
153 static boolean_t udp_opt_allow_udr_set(t_scalar_t level, t_scalar_t name);
154 int udp_opt_set(conn_t *connp, uint_t optset_context,
155 int level, int name, uint_t inlen,
156 uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp,
157 void *thisdg_attrs, cred_t *cr);
158 int udp_opt_get(conn_t *connp, int level, int name,
159 uchar_t *ptr);
160 static int udp_output_connected(conn_t *connp, mblk_t *mp, cred_t *cr,
161 pid_t pid);
162 static int udp_output_lastdst(conn_t *connp, mblk_t *mp, cred_t *cr,
163 pid_t pid, ip_xmit_attr_t *ixa);
164 static int udp_output_newdst(conn_t *connp, mblk_t *data_mp, sin_t *sin,
165 sin6_t *sin6, ushort_t ipversion, cred_t *cr, pid_t,
166 ip_xmit_attr_t *ixa);
167 static mblk_t *udp_prepend_hdr(conn_t *, ip_xmit_attr_t *, const ip_pkt_t *,
168 const in6_addr_t *, const in6_addr_t *, in_port_t, uint32_t, mblk_t *,
169 int *);
170 static mblk_t *udp_prepend_header_template(conn_t *, ip_xmit_attr_t *,
171 mblk_t *, const in6_addr_t *, in_port_t, uint32_t, int *);
172 static void udp_ud_err(queue_t *q, mblk_t *mp, t_scalar_t err);
173 static void udp_ud_err_connected(conn_t *, t_scalar_t);
174 static void udp_tpi_unbind(queue_t *q, mblk_t *mp);
175 static in_port_t udp_update_next_port(udp_t *udp, in_port_t port,
176 boolean_t random);
177 static void udp_wput_other(queue_t *q, mblk_t *mp);
178 static void udp_wput_iocdata(queue_t *q, mblk_t *mp);
179 static void udp_wput_fallback(queue_t *q, mblk_t *mp);
180 static size_t udp_set_rcv_hiwat(udp_t *udp, size_t size);
181
182 static void *udp_stack_init(netstackid_t stackid, netstack_t *ns);
183 static void udp_stack_fini(netstackid_t stackid, void *arg);
184
185 /* Common routines for TPI and socket module */
186 static void udp_ulp_recv(conn_t *, mblk_t *, uint_t, ip_recv_attr_t *);
187
188 /* Common routine for TPI and socket module */
189 static conn_t *udp_do_open(cred_t *, boolean_t, int, int *);
190 static void udp_do_close(conn_t *);
191 static int udp_do_bind(conn_t *, struct sockaddr *, socklen_t, cred_t *,
192 boolean_t);
193 static int udp_do_unbind(conn_t *);
194
195 int udp_getsockname(sock_lower_handle_t,
196 struct sockaddr *, socklen_t *, cred_t *);
197 int udp_getpeername(sock_lower_handle_t,
198 struct sockaddr *, socklen_t *, cred_t *);
199 static int udp_do_connect(conn_t *, const struct sockaddr *, socklen_t,
200 cred_t *, pid_t);
201
202 #pragma inline(udp_output_connected, udp_output_newdst, udp_output_lastdst)
203
204 /*
205 * Checks if the given destination addr/port is allowed out.
206 * If allowed, registers the (dest_addr/port, node_ID) mapping at Cluster.
207 * Called for each connect() and for sendto()/sendmsg() to a different
208 * destination.
209 * For connect(), called in udp_connect().
210 * For sendto()/sendmsg(), called in udp_output_newdst().
211 *
212 * This macro assumes that the cl_inet_connect2 hook is not NULL.
213 * Please check this before calling this macro.
214 *
215 * void
216 * CL_INET_UDP_CONNECT(conn_t cp, udp_t *udp, boolean_t is_outgoing,
217 * in6_addr_t *faddrp, in_port_t (or uint16_t) fport, int err);
218 */
219 #define CL_INET_UDP_CONNECT(cp, is_outgoing, faddrp, fport, err) { \
220 (err) = 0; \
221 /* \
222 * Running in cluster mode - check and register active \
223 * "connection" information \
224 */ \
225 if ((cp)->conn_ipversion == IPV4_VERSION) \
226 (err) = (*cl_inet_connect2)( \
227 (cp)->conn_netstack->netstack_stackid, \
228 IPPROTO_UDP, is_outgoing, AF_INET, \
229 (uint8_t *)&((cp)->conn_laddr_v4), \
230 (cp)->conn_lport, \
231 (uint8_t *)&(V4_PART_OF_V6(*faddrp)), \
232 (in_port_t)(fport), NULL); \
233 else \
234 (err) = (*cl_inet_connect2)( \
235 (cp)->conn_netstack->netstack_stackid, \
236 IPPROTO_UDP, is_outgoing, AF_INET6, \
237 (uint8_t *)&((cp)->conn_laddr_v6), \
238 (cp)->conn_lport, \
239 (uint8_t *)(faddrp), (in_port_t)(fport), NULL); \
240 }
241
242 static struct module_info udp_mod_info = {
243 UDP_MOD_ID, UDP_MOD_NAME, 1, INFPSZ, UDP_RECV_HIWATER, UDP_RECV_LOWATER
244 };
245
246 /*
247 * Entry points for UDP as a device.
248 * We have separate open functions for the /dev/udp and /dev/udp6 devices.
249 */
250 static struct qinit udp_rinitv4 = {
251 NULL, NULL, udp_openv4, udp_tpi_close, NULL, &udp_mod_info, NULL
252 };
253
254 static struct qinit udp_rinitv6 = {
255 NULL, NULL, udp_openv6, udp_tpi_close, NULL, &udp_mod_info, NULL
256 };
257
258 static struct qinit udp_winit = {
259 (pfi_t)udp_wput, (pfi_t)ip_wsrv, NULL, NULL, NULL, &udp_mod_info
260 };
261
262 /* UDP entry point during fallback */
263 struct qinit udp_fallback_sock_winit = {
264 (pfi_t)udp_wput_fallback, NULL, NULL, NULL, NULL, &udp_mod_info
265 };
266
267 /*
268 * UDP needs to handle I_LINK and I_PLINK since ifconfig
269 * likes to use it as a place to hang the various streams.
270 */
271 static struct qinit udp_lrinit = {
272 (pfi_t)udp_lrput, NULL, udp_openv4, udp_tpi_close, NULL, &udp_mod_info
273 };
274
275 static struct qinit udp_lwinit = {
276 (pfi_t)udp_lwput, NULL, udp_openv4, udp_tpi_close, NULL, &udp_mod_info
277 };
278
279 /* For AF_INET aka /dev/udp */
280 struct streamtab udpinfov4 = {
281 &udp_rinitv4, &udp_winit, &udp_lrinit, &udp_lwinit
282 };
283
284 /* For AF_INET6 aka /dev/udp6 */
285 struct streamtab udpinfov6 = {
286 &udp_rinitv6, &udp_winit, &udp_lrinit, &udp_lwinit
287 };
288
289 #define UDP_MAXPACKET_IPV4 (IP_MAXPACKET - UDPH_SIZE - IP_SIMPLE_HDR_LENGTH)
290
291 /* Default structure copied into T_INFO_ACK messages */
292 static struct T_info_ack udp_g_t_info_ack_ipv4 = {
293 T_INFO_ACK,
294 UDP_MAXPACKET_IPV4, /* TSDU_size. Excl. headers */
295 T_INVALID, /* ETSU_size. udp does not support expedited data. */
296 T_INVALID, /* CDATA_size. udp does not support connect data. */
297 T_INVALID, /* DDATA_size. udp does not support disconnect data. */
298 sizeof (sin_t), /* ADDR_size. */
299 0, /* OPT_size - not initialized here */
300 UDP_MAXPACKET_IPV4, /* TIDU_size. Excl. headers */
301 T_CLTS, /* SERV_type. udp supports connection-less. */
302 TS_UNBND, /* CURRENT_state. This is set from udp_state. */
303 (XPG4_1|SENDZERO) /* PROVIDER_flag */
304 };
305
306 #define UDP_MAXPACKET_IPV6 (IP_MAXPACKET - UDPH_SIZE - IPV6_HDR_LEN)
307
308 static struct T_info_ack udp_g_t_info_ack_ipv6 = {
309 T_INFO_ACK,
310 UDP_MAXPACKET_IPV6, /* TSDU_size. Excl. headers */
311 T_INVALID, /* ETSU_size. udp does not support expedited data. */
312 T_INVALID, /* CDATA_size. udp does not support connect data. */
313 T_INVALID, /* DDATA_size. udp does not support disconnect data. */
314 sizeof (sin6_t), /* ADDR_size. */
315 0, /* OPT_size - not initialized here */
316 UDP_MAXPACKET_IPV6, /* TIDU_size. Excl. headers */
317 T_CLTS, /* SERV_type. udp supports connection-less. */
318 TS_UNBND, /* CURRENT_state. This is set from udp_state. */
319 (XPG4_1|SENDZERO) /* PROVIDER_flag */
320 };
321
322 /*
323 * UDP tunables related declarations. Definitions are in udp_tunables.c
324 */
325 extern mod_prop_info_t udp_propinfo_tbl[];
326 extern int udp_propinfo_count;
327
328 /* Setable in /etc/system */
329 /* If set to 0, pick ephemeral port sequentially; otherwise randomly. */
330 uint32_t udp_random_anon_port = 1;
331
332 /*
333 * Hook functions to enable cluster networking.
334 * On non-clustered systems these vectors must always be NULL
335 */
336
337 void (*cl_inet_bind)(netstackid_t stack_id, uchar_t protocol,
338 sa_family_t addr_family, uint8_t *laddrp, in_port_t lport,
339 void *args) = NULL;
340 void (*cl_inet_unbind)(netstackid_t stack_id, uint8_t protocol,
341 sa_family_t addr_family, uint8_t *laddrp, in_port_t lport,
342 void *args) = NULL;
343
344 typedef union T_primitives *t_primp_t;
345
346 /*
347 * Return the next anonymous port in the privileged port range for
348 * bind checking.
349 *
350 * Trusted Extension (TX) notes: TX allows administrator to mark or
351 * reserve ports as Multilevel ports (MLP). MLP has special function
352 * on TX systems. Once a port is made MLP, it's not available as
353 * ordinary port. This creates "holes" in the port name space. It
354 * may be necessary to skip the "holes" find a suitable anon port.
355 */
356 static in_port_t
357 udp_get_next_priv_port(udp_t *udp)
358 {
359 static in_port_t next_priv_port = IPPORT_RESERVED - 1;
360 in_port_t nextport;
361 boolean_t restart = B_FALSE;
362 udp_stack_t *us = udp->udp_us;
363
364 retry:
365 if (next_priv_port < us->us_min_anonpriv_port ||
366 next_priv_port >= IPPORT_RESERVED) {
367 next_priv_port = IPPORT_RESERVED - 1;
368 if (restart)
369 return (0);
370 restart = B_TRUE;
371 }
372
373 if (is_system_labeled() &&
374 (nextport = tsol_next_port(crgetzone(udp->udp_connp->conn_cred),
375 next_priv_port, IPPROTO_UDP, B_FALSE)) != 0) {
376 next_priv_port = nextport;
377 goto retry;
378 }
379
380 return (next_priv_port--);
381 }
382
383 /*
384 * Hash list removal routine for udp_t structures.
385 */
386 static void
387 udp_bind_hash_remove(udp_t *udp, boolean_t caller_holds_lock)
388 {
389 udp_t *udpnext;
390 kmutex_t *lockp;
391 udp_stack_t *us = udp->udp_us;
392 conn_t *connp = udp->udp_connp;
393
394 if (udp->udp_ptpbhn == NULL)
395 return;
396
397 /*
398 * Extract the lock pointer in case there are concurrent
399 * hash_remove's for this instance.
400 */
401 ASSERT(connp->conn_lport != 0);
402 if (!caller_holds_lock) {
403 lockp = &us->us_bind_fanout[UDP_BIND_HASH(connp->conn_lport,
404 us->us_bind_fanout_size)].uf_lock;
405 ASSERT(lockp != NULL);
406 mutex_enter(lockp);
407 }
408 if (udp->udp_ptpbhn != NULL) {
409 udpnext = udp->udp_bind_hash;
410 if (udpnext != NULL) {
411 udpnext->udp_ptpbhn = udp->udp_ptpbhn;
412 udp->udp_bind_hash = NULL;
413 }
414 *udp->udp_ptpbhn = udpnext;
415 udp->udp_ptpbhn = NULL;
416 }
417 if (!caller_holds_lock) {
418 mutex_exit(lockp);
419 }
420 }
421
422 static void
423 udp_bind_hash_insert(udp_fanout_t *uf, udp_t *udp)
424 {
425 conn_t *connp = udp->udp_connp;
426 udp_t **udpp;
427 udp_t *udpnext;
428 conn_t *connext;
429
430 ASSERT(MUTEX_HELD(&uf->uf_lock));
431 ASSERT(udp->udp_ptpbhn == NULL);
432 udpp = &uf->uf_udp;
433 udpnext = udpp[0];
434 if (udpnext != NULL) {
435 /*
436 * If the new udp bound to the INADDR_ANY address
437 * and the first one in the list is not bound to
438 * INADDR_ANY we skip all entries until we find the
439 * first one bound to INADDR_ANY.
440 * This makes sure that applications binding to a
441 * specific address get preference over those binding to
442 * INADDR_ANY.
443 */
444 connext = udpnext->udp_connp;
445 if (V6_OR_V4_INADDR_ANY(connp->conn_bound_addr_v6) &&
446 !V6_OR_V4_INADDR_ANY(connext->conn_bound_addr_v6)) {
447 while ((udpnext = udpp[0]) != NULL &&
448 !V6_OR_V4_INADDR_ANY(connext->conn_bound_addr_v6)) {
449 udpp = &(udpnext->udp_bind_hash);
450 }
451 if (udpnext != NULL)
452 udpnext->udp_ptpbhn = &udp->udp_bind_hash;
453 } else {
454 udpnext->udp_ptpbhn = &udp->udp_bind_hash;
455 }
456 }
457 udp->udp_bind_hash = udpnext;
458 udp->udp_ptpbhn = udpp;
459 udpp[0] = udp;
460 }
461
462 /*
463 * This routine is called to handle each O_T_BIND_REQ/T_BIND_REQ message
464 * passed to udp_wput.
465 * It associates a port number and local address with the stream.
466 * It calls IP to verify the local IP address, and calls IP to insert
467 * the conn_t in the fanout table.
468 * If everything is ok it then sends the T_BIND_ACK back up.
469 *
470 * Note that UDP over IPv4 and IPv6 sockets can use the same port number
471 * without setting SO_REUSEADDR. This is needed so that they
472 * can be viewed as two independent transport protocols.
473 * However, anonymouns ports are allocated from the same range to avoid
474 * duplicating the us->us_next_port_to_try.
475 */
476 static void
477 udp_tpi_bind(queue_t *q, mblk_t *mp)
478 {
479 sin_t *sin;
480 sin6_t *sin6;
481 mblk_t *mp1;
482 struct T_bind_req *tbr;
483 conn_t *connp;
484 udp_t *udp;
485 int error;
486 struct sockaddr *sa;
487 cred_t *cr;
488
489 /*
490 * All Solaris components should pass a db_credp
491 * for this TPI message, hence we ASSERT.
492 * But in case there is some other M_PROTO that looks
493 * like a TPI message sent by some other kernel
494 * component, we check and return an error.
495 */
496 cr = msg_getcred(mp, NULL);
497 ASSERT(cr != NULL);
498 if (cr == NULL) {
499 udp_err_ack(q, mp, TSYSERR, EINVAL);
500 return;
501 }
502
503 connp = Q_TO_CONN(q);
504 udp = connp->conn_udp;
505 if ((mp->b_wptr - mp->b_rptr) < sizeof (*tbr)) {
506 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE,
507 "udp_bind: bad req, len %u",
508 (uint_t)(mp->b_wptr - mp->b_rptr));
509 udp_err_ack(q, mp, TPROTO, 0);
510 return;
511 }
512 if (udp->udp_state != TS_UNBND) {
513 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE,
514 "udp_bind: bad state, %u", udp->udp_state);
515 udp_err_ack(q, mp, TOUTSTATE, 0);
516 return;
517 }
518 /*
519 * Reallocate the message to make sure we have enough room for an
520 * address.
521 */
522 mp1 = reallocb(mp, sizeof (struct T_bind_ack) + sizeof (sin6_t), 1);
523 if (mp1 == NULL) {
524 udp_err_ack(q, mp, TSYSERR, ENOMEM);
525 return;
526 }
527
528 mp = mp1;
529
530 /* Reset the message type in preparation for shipping it back. */
531 DB_TYPE(mp) = M_PCPROTO;
532
533 tbr = (struct T_bind_req *)mp->b_rptr;
534 switch (tbr->ADDR_length) {
535 case 0: /* Request for a generic port */
536 tbr->ADDR_offset = sizeof (struct T_bind_req);
537 if (connp->conn_family == AF_INET) {
538 tbr->ADDR_length = sizeof (sin_t);
539 sin = (sin_t *)&tbr[1];
540 *sin = sin_null;
541 sin->sin_family = AF_INET;
542 mp->b_wptr = (uchar_t *)&sin[1];
543 sa = (struct sockaddr *)sin;
544 } else {
545 ASSERT(connp->conn_family == AF_INET6);
546 tbr->ADDR_length = sizeof (sin6_t);
547 sin6 = (sin6_t *)&tbr[1];
548 *sin6 = sin6_null;
549 sin6->sin6_family = AF_INET6;
550 mp->b_wptr = (uchar_t *)&sin6[1];
551 sa = (struct sockaddr *)sin6;
552 }
553 break;
554
555 case sizeof (sin_t): /* Complete IPv4 address */
556 sa = (struct sockaddr *)mi_offset_param(mp, tbr->ADDR_offset,
557 sizeof (sin_t));
558 if (sa == NULL || !OK_32PTR((char *)sa)) {
559 udp_err_ack(q, mp, TSYSERR, EINVAL);
560 return;
561 }
562 if (connp->conn_family != AF_INET ||
563 sa->sa_family != AF_INET) {
564 udp_err_ack(q, mp, TSYSERR, EAFNOSUPPORT);
565 return;
566 }
567 break;
568
569 case sizeof (sin6_t): /* complete IPv6 address */
570 sa = (struct sockaddr *)mi_offset_param(mp, tbr->ADDR_offset,
571 sizeof (sin6_t));
572 if (sa == NULL || !OK_32PTR((char *)sa)) {
573 udp_err_ack(q, mp, TSYSERR, EINVAL);
574 return;
575 }
576 if (connp->conn_family != AF_INET6 ||
577 sa->sa_family != AF_INET6) {
578 udp_err_ack(q, mp, TSYSERR, EAFNOSUPPORT);
579 return;
580 }
581 break;
582
583 default: /* Invalid request */
584 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE,
585 "udp_bind: bad ADDR_length length %u", tbr->ADDR_length);
586 udp_err_ack(q, mp, TBADADDR, 0);
587 return;
588 }
589
590 error = udp_do_bind(connp, sa, tbr->ADDR_length, cr,
591 tbr->PRIM_type != O_T_BIND_REQ);
592
593 if (error != 0) {
594 if (error > 0) {
595 udp_err_ack(q, mp, TSYSERR, error);
596 } else {
597 udp_err_ack(q, mp, -error, 0);
598 }
599 } else {
600 tbr->PRIM_type = T_BIND_ACK;
601 qreply(q, mp);
602 }
603 }
604
605 /*
606 * This routine handles each T_CONN_REQ message passed to udp. It
607 * associates a default destination address with the stream.
608 *
609 * After various error checks are completed, udp_connect() lays
610 * the target address and port into the composite header template.
611 * Then we ask IP for information, including a source address if we didn't
612 * already have one. Finally we send up the T_OK_ACK reply message.
613 */
614 static void
615 udp_tpi_connect(queue_t *q, mblk_t *mp)
616 {
617 conn_t *connp = Q_TO_CONN(q);
618 int error;
619 socklen_t len;
620 struct sockaddr *sa;
621 struct T_conn_req *tcr;
622 cred_t *cr;
623 pid_t pid;
624 /*
625 * All Solaris components should pass a db_credp
626 * for this TPI message, hence we ASSERT.
627 * But in case there is some other M_PROTO that looks
628 * like a TPI message sent by some other kernel
629 * component, we check and return an error.
630 */
631 cr = msg_getcred(mp, &pid);
632 ASSERT(cr != NULL);
633 if (cr == NULL) {
634 udp_err_ack(q, mp, TSYSERR, EINVAL);
635 return;
636 }
637
638 tcr = (struct T_conn_req *)mp->b_rptr;
639
640 /* A bit of sanity checking */
641 if ((mp->b_wptr - mp->b_rptr) < sizeof (struct T_conn_req)) {
642 udp_err_ack(q, mp, TPROTO, 0);
643 return;
644 }
645
646 if (tcr->OPT_length != 0) {
647 udp_err_ack(q, mp, TBADOPT, 0);
648 return;
649 }
650
651 /*
652 * Determine packet type based on type of address passed in
653 * the request should contain an IPv4 or IPv6 address.
654 * Make sure that address family matches the type of
655 * family of the address passed down.
656 */
657 len = tcr->DEST_length;
658 switch (tcr->DEST_length) {
659 default:
660 udp_err_ack(q, mp, TBADADDR, 0);
661 return;
662
663 case sizeof (sin_t):
664 sa = (struct sockaddr *)mi_offset_param(mp, tcr->DEST_offset,
665 sizeof (sin_t));
666 break;
667
668 case sizeof (sin6_t):
669 sa = (struct sockaddr *)mi_offset_param(mp, tcr->DEST_offset,
670 sizeof (sin6_t));
671 break;
672 }
673
674 error = proto_verify_ip_addr(connp->conn_family, sa, len);
675 if (error != 0) {
676 udp_err_ack(q, mp, TSYSERR, error);
677 return;
678 }
679
680 error = udp_do_connect(connp, sa, len, cr, pid);
681 if (error != 0) {
682 if (error < 0)
683 udp_err_ack(q, mp, -error, 0);
684 else
685 udp_err_ack(q, mp, TSYSERR, error);
686 } else {
687 mblk_t *mp1;
688 /*
689 * We have to send a connection confirmation to
690 * keep TLI happy.
691 */
692 if (connp->conn_family == AF_INET) {
693 mp1 = mi_tpi_conn_con(NULL, (char *)sa,
694 sizeof (sin_t), NULL, 0);
695 } else {
696 mp1 = mi_tpi_conn_con(NULL, (char *)sa,
697 sizeof (sin6_t), NULL, 0);
698 }
699 if (mp1 == NULL) {
700 udp_err_ack(q, mp, TSYSERR, ENOMEM);
701 return;
702 }
703
704 /*
705 * Send ok_ack for T_CONN_REQ
706 */
707 mp = mi_tpi_ok_ack_alloc(mp);
708 if (mp == NULL) {
709 /* Unable to reuse the T_CONN_REQ for the ack. */
710 udp_err_ack_prim(q, mp1, T_CONN_REQ, TSYSERR, ENOMEM);
711 return;
712 }
713
714 putnext(connp->conn_rq, mp);
715 putnext(connp->conn_rq, mp1);
716 }
717 }
718
719 static int
720 udp_tpi_close(queue_t *q, int flags)
721 {
722 conn_t *connp;
723
724 if (flags & SO_FALLBACK) {
725 /*
726 * stream is being closed while in fallback
727 * simply free the resources that were allocated
728 */
729 inet_minor_free(WR(q)->q_ptr, (dev_t)(RD(q)->q_ptr));
730 qprocsoff(q);
731 goto done;
732 }
733
734 connp = Q_TO_CONN(q);
735 udp_do_close(connp);
736 done:
737 q->q_ptr = WR(q)->q_ptr = NULL;
738 return (0);
739 }
740
741 static void
742 udp_close_free(conn_t *connp)
743 {
744 udp_t *udp = connp->conn_udp;
745
746 /* If there are any options associated with the stream, free them. */
747 if (udp->udp_recv_ipp.ipp_fields != 0)
748 ip_pkt_free(&udp->udp_recv_ipp);
749
750 /*
751 * Clear any fields which the kmem_cache constructor clears.
752 * Only udp_connp needs to be preserved.
753 * TBD: We should make this more efficient to avoid clearing
754 * everything.
755 */
756 ASSERT(udp->udp_connp == connp);
757 bzero(udp, sizeof (udp_t));
758 udp->udp_connp = connp;
759 }
760
761 static int
762 udp_do_disconnect(conn_t *connp)
763 {
764 udp_t *udp;
765 udp_fanout_t *udpf;
766 udp_stack_t *us;
767 int error;
768
769 udp = connp->conn_udp;
770 us = udp->udp_us;
771 mutex_enter(&connp->conn_lock);
772 if (udp->udp_state != TS_DATA_XFER) {
773 mutex_exit(&connp->conn_lock);
774 return (-TOUTSTATE);
775 }
776 udpf = &us->us_bind_fanout[UDP_BIND_HASH(connp->conn_lport,
777 us->us_bind_fanout_size)];
778 mutex_enter(&udpf->uf_lock);
779 if (connp->conn_mcbc_bind)
780 connp->conn_saddr_v6 = ipv6_all_zeros;
781 else
782 connp->conn_saddr_v6 = connp->conn_bound_addr_v6;
783 connp->conn_laddr_v6 = connp->conn_bound_addr_v6;
784 connp->conn_faddr_v6 = ipv6_all_zeros;
785 connp->conn_fport = 0;
786 udp->udp_state = TS_IDLE;
787 mutex_exit(&udpf->uf_lock);
788
789 /* Remove any remnants of mapped address binding */
790 if (connp->conn_family == AF_INET6)
791 connp->conn_ipversion = IPV6_VERSION;
792
793 connp->conn_v6lastdst = ipv6_all_zeros;
794 error = udp_build_hdr_template(connp, &connp->conn_saddr_v6,
795 &connp->conn_faddr_v6, connp->conn_fport, connp->conn_flowinfo);
796 mutex_exit(&connp->conn_lock);
797 if (error != 0)
798 return (error);
799
800 /*
801 * Tell IP to remove the full binding and revert
802 * to the local address binding.
803 */
804 return (ip_laddr_fanout_insert(connp));
805 }
806
807 static void
808 udp_tpi_disconnect(queue_t *q, mblk_t *mp)
809 {
810 conn_t *connp = Q_TO_CONN(q);
811 int error;
812
813 /*
814 * Allocate the largest primitive we need to send back
815 * T_error_ack is > than T_ok_ack
816 */
817 mp = reallocb(mp, sizeof (struct T_error_ack), 1);
818 if (mp == NULL) {
819 /* Unable to reuse the T_DISCON_REQ for the ack. */
820 udp_err_ack_prim(q, mp, T_DISCON_REQ, TSYSERR, ENOMEM);
821 return;
822 }
823
824 error = udp_do_disconnect(connp);
825
826 if (error != 0) {
827 if (error < 0) {
828 udp_err_ack(q, mp, -error, 0);
829 } else {
830 udp_err_ack(q, mp, TSYSERR, error);
831 }
832 } else {
833 mp = mi_tpi_ok_ack_alloc(mp);
834 ASSERT(mp != NULL);
835 qreply(q, mp);
836 }
837 }
838
839 int
840 udp_disconnect(conn_t *connp)
841 {
842 int error;
843
844 connp->conn_dgram_errind = B_FALSE;
845 error = udp_do_disconnect(connp);
846 if (error < 0)
847 error = proto_tlitosyserr(-error);
848
849 return (error);
850 }
851
852 /* This routine creates a T_ERROR_ACK message and passes it upstream. */
853 static void
854 udp_err_ack(queue_t *q, mblk_t *mp, t_scalar_t t_error, int sys_error)
855 {
856 if ((mp = mi_tpi_err_ack_alloc(mp, t_error, sys_error)) != NULL)
857 qreply(q, mp);
858 }
859
860 /* Shorthand to generate and send TPI error acks to our client */
861 static void
862 udp_err_ack_prim(queue_t *q, mblk_t *mp, t_scalar_t primitive,
863 t_scalar_t t_error, int sys_error)
864 {
865 struct T_error_ack *teackp;
866
867 if ((mp = tpi_ack_alloc(mp, sizeof (struct T_error_ack),
868 M_PCPROTO, T_ERROR_ACK)) != NULL) {
869 teackp = (struct T_error_ack *)mp->b_rptr;
870 teackp->ERROR_prim = primitive;
871 teackp->TLI_error = t_error;
872 teackp->UNIX_error = sys_error;
873 qreply(q, mp);
874 }
875 }
876
877 /* At minimum we need 4 bytes of UDP header */
878 #define ICMP_MIN_UDP_HDR 4
879
880 /*
881 * udp_icmp_input is called as conn_recvicmp to process ICMP messages.
882 * Generates the appropriate T_UDERROR_IND for permanent (non-transient) errors.
883 * Assumes that IP has pulled up everything up to and including the ICMP header.
884 */
885 /* ARGSUSED2 */
886 static void
887 udp_icmp_input(void *arg1, mblk_t *mp, void *arg2, ip_recv_attr_t *ira)
888 {
889 conn_t *connp = (conn_t *)arg1;
890 icmph_t *icmph;
891 ipha_t *ipha;
892 int iph_hdr_length;
893 udpha_t *udpha;
894 sin_t sin;
895 sin6_t sin6;
896 mblk_t *mp1;
897 int error = 0;
898 udp_t *udp = connp->conn_udp;
899
900 ipha = (ipha_t *)mp->b_rptr;
901
902 ASSERT(OK_32PTR(mp->b_rptr));
903
904 if (IPH_HDR_VERSION(ipha) != IPV4_VERSION) {
905 ASSERT(IPH_HDR_VERSION(ipha) == IPV6_VERSION);
906 udp_icmp_error_ipv6(connp, mp, ira);
907 return;
908 }
909 ASSERT(IPH_HDR_VERSION(ipha) == IPV4_VERSION);
910
911 /* Skip past the outer IP and ICMP headers */
912 ASSERT(IPH_HDR_LENGTH(ipha) == ira->ira_ip_hdr_length);
913 iph_hdr_length = ira->ira_ip_hdr_length;
914 icmph = (icmph_t *)&mp->b_rptr[iph_hdr_length];
915 ipha = (ipha_t *)&icmph[1]; /* Inner IP header */
916
917 /* Skip past the inner IP and find the ULP header */
918 iph_hdr_length = IPH_HDR_LENGTH(ipha);
919 udpha = (udpha_t *)((char *)ipha + iph_hdr_length);
920
921 switch (icmph->icmph_type) {
922 case ICMP_DEST_UNREACHABLE:
923 switch (icmph->icmph_code) {
924 case ICMP_FRAGMENTATION_NEEDED: {
925 ipha_t *ipha;
926 ip_xmit_attr_t *ixa;
927 /*
928 * IP has already adjusted the path MTU.
929 * But we need to adjust DF for IPv4.
930 */
931 if (connp->conn_ipversion != IPV4_VERSION)
932 break;
933
934 ixa = conn_get_ixa(connp, B_FALSE);
935 if (ixa == NULL || ixa->ixa_ire == NULL) {
936 /*
937 * Some other thread holds conn_ixa. We will
938 * redo this on the next ICMP too big.
939 */
940 if (ixa != NULL)
941 ixa_refrele(ixa);
942 break;
943 }
944 (void) ip_get_pmtu(ixa);
945
946 mutex_enter(&connp->conn_lock);
947 ipha = (ipha_t *)connp->conn_ht_iphc;
948 if (ixa->ixa_flags & IXAF_PMTU_IPV4_DF) {
949 ipha->ipha_fragment_offset_and_flags |=
950 IPH_DF_HTONS;
951 } else {
952 ipha->ipha_fragment_offset_and_flags &=
953 ~IPH_DF_HTONS;
954 }
955 mutex_exit(&connp->conn_lock);
956 ixa_refrele(ixa);
957 break;
958 }
959 case ICMP_PORT_UNREACHABLE:
960 case ICMP_PROTOCOL_UNREACHABLE:
961 error = ECONNREFUSED;
962 break;
963 default:
964 /* Transient errors */
965 break;
966 }
967 break;
968 default:
969 /* Transient errors */
970 break;
971 }
972 if (error == 0) {
973 freemsg(mp);
974 return;
975 }
976
977 /*
978 * Deliver T_UDERROR_IND when the application has asked for it.
979 * The socket layer enables this automatically when connected.
980 */
981 if (!connp->conn_dgram_errind) {
982 freemsg(mp);
983 return;
984 }
985
986 switch (connp->conn_family) {
987 case AF_INET:
988 sin = sin_null;
989 sin.sin_family = AF_INET;
990 sin.sin_addr.s_addr = ipha->ipha_dst;
991 sin.sin_port = udpha->uha_dst_port;
992 if (IPCL_IS_NONSTR(connp)) {
993 mutex_enter(&connp->conn_lock);
994 if (udp->udp_state == TS_DATA_XFER) {
995 if (sin.sin_port == connp->conn_fport &&
996 sin.sin_addr.s_addr ==
997 connp->conn_faddr_v4) {
998 mutex_exit(&connp->conn_lock);
999 (*connp->conn_upcalls->su_set_error)
1000 (connp->conn_upper_handle, error);
1001 goto done;
1002 }
1003 } else {
1004 udp->udp_delayed_error = error;
1005 *((sin_t *)&udp->udp_delayed_addr) = sin;
1006 }
1007 mutex_exit(&connp->conn_lock);
1008 } else {
1009 mp1 = mi_tpi_uderror_ind((char *)&sin, sizeof (sin_t),
1010 NULL, 0, error);
1011 if (mp1 != NULL)
1012 putnext(connp->conn_rq, mp1);
1013 }
1014 break;
1015 case AF_INET6:
1016 sin6 = sin6_null;
1017 sin6.sin6_family = AF_INET6;
1018 IN6_IPADDR_TO_V4MAPPED(ipha->ipha_dst, &sin6.sin6_addr);
1019 sin6.sin6_port = udpha->uha_dst_port;
1020 if (IPCL_IS_NONSTR(connp)) {
1021 mutex_enter(&connp->conn_lock);
1022 if (udp->udp_state == TS_DATA_XFER) {
1023 if (sin6.sin6_port == connp->conn_fport &&
1024 IN6_ARE_ADDR_EQUAL(&sin6.sin6_addr,
1025 &connp->conn_faddr_v6)) {
1026 mutex_exit(&connp->conn_lock);
1027 (*connp->conn_upcalls->su_set_error)
1028 (connp->conn_upper_handle, error);
1029 goto done;
1030 }
1031 } else {
1032 udp->udp_delayed_error = error;
1033 *((sin6_t *)&udp->udp_delayed_addr) = sin6;
1034 }
1035 mutex_exit(&connp->conn_lock);
1036 } else {
1037 mp1 = mi_tpi_uderror_ind((char *)&sin6, sizeof (sin6_t),
1038 NULL, 0, error);
1039 if (mp1 != NULL)
1040 putnext(connp->conn_rq, mp1);
1041 }
1042 break;
1043 }
1044 done:
1045 freemsg(mp);
1046 }
1047
1048 /*
1049 * udp_icmp_error_ipv6 is called by udp_icmp_error to process ICMP for IPv6.
1050 * Generates the appropriate T_UDERROR_IND for permanent (non-transient) errors.
1051 * Assumes that IP has pulled up all the extension headers as well as the
1052 * ICMPv6 header.
1053 */
1054 static void
1055 udp_icmp_error_ipv6(conn_t *connp, mblk_t *mp, ip_recv_attr_t *ira)
1056 {
1057 icmp6_t *icmp6;
1058 ip6_t *ip6h, *outer_ip6h;
1059 uint16_t iph_hdr_length;
1060 uint8_t *nexthdrp;
1061 udpha_t *udpha;
1062 sin6_t sin6;
1063 mblk_t *mp1;
1064 int error = 0;
1065 udp_t *udp = connp->conn_udp;
1066 udp_stack_t *us = udp->udp_us;
1067
1068 outer_ip6h = (ip6_t *)mp->b_rptr;
1069 #ifdef DEBUG
1070 if (outer_ip6h->ip6_nxt != IPPROTO_ICMPV6)
1071 iph_hdr_length = ip_hdr_length_v6(mp, outer_ip6h);
1072 else
1073 iph_hdr_length = IPV6_HDR_LEN;
1074 ASSERT(iph_hdr_length == ira->ira_ip_hdr_length);
1075 #endif
1076 /* Skip past the outer IP and ICMP headers */
1077 iph_hdr_length = ira->ira_ip_hdr_length;
1078 icmp6 = (icmp6_t *)&mp->b_rptr[iph_hdr_length];
1079
1080 /* Skip past the inner IP and find the ULP header */
1081 ip6h = (ip6_t *)&icmp6[1]; /* Inner IP header */
1082 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &iph_hdr_length, &nexthdrp)) {
1083 freemsg(mp);
1084 return;
1085 }
1086 udpha = (udpha_t *)((char *)ip6h + iph_hdr_length);
1087
1088 switch (icmp6->icmp6_type) {
1089 case ICMP6_DST_UNREACH:
1090 switch (icmp6->icmp6_code) {
1091 case ICMP6_DST_UNREACH_NOPORT:
1092 error = ECONNREFUSED;
1093 break;
1094 case ICMP6_DST_UNREACH_ADMIN:
1095 case ICMP6_DST_UNREACH_NOROUTE:
1096 case ICMP6_DST_UNREACH_BEYONDSCOPE:
1097 case ICMP6_DST_UNREACH_ADDR:
1098 /* Transient errors */
1099 break;
1100 default:
1101 break;
1102 }
1103 break;
1104 case ICMP6_PACKET_TOO_BIG: {
1105 struct T_unitdata_ind *tudi;
1106 struct T_opthdr *toh;
1107 size_t udi_size;
1108 mblk_t *newmp;
1109 t_scalar_t opt_length = sizeof (struct T_opthdr) +
1110 sizeof (struct ip6_mtuinfo);
1111 sin6_t *sin6;
1112 struct ip6_mtuinfo *mtuinfo;
1113
1114 /*
1115 * If the application has requested to receive path mtu
1116 * information, send up an empty message containing an
1117 * IPV6_PATHMTU ancillary data item.
1118 */
1119 if (!connp->conn_ipv6_recvpathmtu)
1120 break;
1121
1122 udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin6_t) +
1123 opt_length;
1124 if ((newmp = allocb(udi_size, BPRI_MED)) == NULL) {
1125 UDPS_BUMP_MIB(us, udpInErrors);
1126 break;
1127 }
1128
1129 /*
1130 * newmp->b_cont is left to NULL on purpose. This is an
1131 * empty message containing only ancillary data.
1132 */
1133 newmp->b_datap->db_type = M_PROTO;
1134 tudi = (struct T_unitdata_ind *)newmp->b_rptr;
1135 newmp->b_wptr = (uchar_t *)tudi + udi_size;
1136 tudi->PRIM_type = T_UNITDATA_IND;
1137 tudi->SRC_length = sizeof (sin6_t);
1138 tudi->SRC_offset = sizeof (struct T_unitdata_ind);
1139 tudi->OPT_offset = tudi->SRC_offset + sizeof (sin6_t);
1140 tudi->OPT_length = opt_length;
1141
1142 sin6 = (sin6_t *)&tudi[1];
1143 bzero(sin6, sizeof (sin6_t));
1144 sin6->sin6_family = AF_INET6;
1145 sin6->sin6_addr = connp->conn_faddr_v6;
1146
1147 toh = (struct T_opthdr *)&sin6[1];
1148 toh->level = IPPROTO_IPV6;
1149 toh->name = IPV6_PATHMTU;
1150 toh->len = opt_length;
1151 toh->status = 0;
1152
1153 mtuinfo = (struct ip6_mtuinfo *)&toh[1];
1154 bzero(mtuinfo, sizeof (struct ip6_mtuinfo));
1155 mtuinfo->ip6m_addr.sin6_family = AF_INET6;
1156 mtuinfo->ip6m_addr.sin6_addr = ip6h->ip6_dst;
1157 mtuinfo->ip6m_mtu = icmp6->icmp6_mtu;
1158 /*
1159 * We've consumed everything we need from the original
1160 * message. Free it, then send our empty message.
1161 */
1162 freemsg(mp);
1163 udp_ulp_recv(connp, newmp, msgdsize(newmp), ira);
1164 return;
1165 }
1166 case ICMP6_TIME_EXCEEDED:
1167 /* Transient errors */
1168 break;
1169 case ICMP6_PARAM_PROB:
1170 /* If this corresponds to an ICMP_PROTOCOL_UNREACHABLE */
1171 if (icmp6->icmp6_code == ICMP6_PARAMPROB_NEXTHEADER &&
1172 (uchar_t *)ip6h + icmp6->icmp6_pptr ==
1173 (uchar_t *)nexthdrp) {
1174 error = ECONNREFUSED;
1175 break;
1176 }
1177 break;
1178 }
1179 if (error == 0) {
1180 freemsg(mp);
1181 return;
1182 }
1183
1184 /*
1185 * Deliver T_UDERROR_IND when the application has asked for it.
1186 * The socket layer enables this automatically when connected.
1187 */
1188 if (!connp->conn_dgram_errind) {
1189 freemsg(mp);
1190 return;
1191 }
1192
1193 sin6 = sin6_null;
1194 sin6.sin6_family = AF_INET6;
1195 sin6.sin6_addr = ip6h->ip6_dst;
1196 sin6.sin6_port = udpha->uha_dst_port;
1197 sin6.sin6_flowinfo = ip6h->ip6_vcf & ~IPV6_VERS_AND_FLOW_MASK;
1198
1199 if (IPCL_IS_NONSTR(connp)) {
1200 mutex_enter(&connp->conn_lock);
1201 if (udp->udp_state == TS_DATA_XFER) {
1202 if (sin6.sin6_port == connp->conn_fport &&
1203 IN6_ARE_ADDR_EQUAL(&sin6.sin6_addr,
1204 &connp->conn_faddr_v6)) {
1205 mutex_exit(&connp->conn_lock);
1206 (*connp->conn_upcalls->su_set_error)
1207 (connp->conn_upper_handle, error);
1208 goto done;
1209 }
1210 } else {
1211 udp->udp_delayed_error = error;
1212 *((sin6_t *)&udp->udp_delayed_addr) = sin6;
1213 }
1214 mutex_exit(&connp->conn_lock);
1215 } else {
1216 mp1 = mi_tpi_uderror_ind((char *)&sin6, sizeof (sin6_t),
1217 NULL, 0, error);
1218 if (mp1 != NULL)
1219 putnext(connp->conn_rq, mp1);
1220 }
1221 done:
1222 freemsg(mp);
1223 }
1224
1225 /*
1226 * This routine responds to T_ADDR_REQ messages. It is called by udp_wput.
1227 * The local address is filled in if endpoint is bound. The remote address
1228 * is filled in if remote address has been precified ("connected endpoint")
1229 * (The concept of connected CLTS sockets is alien to published TPI
1230 * but we support it anyway).
1231 */
1232 static void
1233 udp_addr_req(queue_t *q, mblk_t *mp)
1234 {
1235 struct sockaddr *sa;
1236 mblk_t *ackmp;
1237 struct T_addr_ack *taa;
1238 udp_t *udp = Q_TO_UDP(q);
1239 conn_t *connp = udp->udp_connp;
1240 uint_t addrlen;
1241
1242 /* Make it large enough for worst case */
1243 ackmp = reallocb(mp, sizeof (struct T_addr_ack) +
1244 2 * sizeof (sin6_t), 1);
1245 if (ackmp == NULL) {
1246 udp_err_ack(q, mp, TSYSERR, ENOMEM);
1247 return;
1248 }
1249 taa = (struct T_addr_ack *)ackmp->b_rptr;
1250
1251 bzero(taa, sizeof (struct T_addr_ack));
1252 ackmp->b_wptr = (uchar_t *)&taa[1];
1253
1254 taa->PRIM_type = T_ADDR_ACK;
1255 ackmp->b_datap->db_type = M_PCPROTO;
1256
1257 if (connp->conn_family == AF_INET)
1258 addrlen = sizeof (sin_t);
1259 else
1260 addrlen = sizeof (sin6_t);
1261
1262 mutex_enter(&connp->conn_lock);
1263 /*
1264 * Note: Following code assumes 32 bit alignment of basic
1265 * data structures like sin_t and struct T_addr_ack.
1266 */
1267 if (udp->udp_state != TS_UNBND) {
1268 /*
1269 * Fill in local address first
1270 */
1271 taa->LOCADDR_offset = sizeof (*taa);
1272 taa->LOCADDR_length = addrlen;
1273 sa = (struct sockaddr *)&taa[1];
1274 (void) conn_getsockname(connp, sa, &addrlen);
1275 ackmp->b_wptr += addrlen;
1276 }
1277 if (udp->udp_state == TS_DATA_XFER) {
1278 /*
1279 * connected, fill remote address too
1280 */
1281 taa->REMADDR_length = addrlen;
1282 /* assumed 32-bit alignment */
1283 taa->REMADDR_offset = taa->LOCADDR_offset + taa->LOCADDR_length;
1284 sa = (struct sockaddr *)(ackmp->b_rptr + taa->REMADDR_offset);
1285 (void) conn_getpeername(connp, sa, &addrlen);
1286 ackmp->b_wptr += addrlen;
1287 }
1288 mutex_exit(&connp->conn_lock);
1289 ASSERT(ackmp->b_wptr <= ackmp->b_datap->db_lim);
1290 qreply(q, ackmp);
1291 }
1292
1293 static void
1294 udp_copy_info(struct T_info_ack *tap, udp_t *udp)
1295 {
1296 conn_t *connp = udp->udp_connp;
1297
1298 if (connp->conn_family == AF_INET) {
1299 *tap = udp_g_t_info_ack_ipv4;
1300 } else {
1301 *tap = udp_g_t_info_ack_ipv6;
1302 }
1303 tap->CURRENT_state = udp->udp_state;
1304 tap->OPT_size = udp_max_optsize;
1305 }
1306
1307 static void
1308 udp_do_capability_ack(udp_t *udp, struct T_capability_ack *tcap,
1309 t_uscalar_t cap_bits1)
1310 {
1311 tcap->CAP_bits1 = 0;
1312
1313 if (cap_bits1 & TC1_INFO) {
1314 udp_copy_info(&tcap->INFO_ack, udp);
1315 tcap->CAP_bits1 |= TC1_INFO;
1316 }
1317 }
1318
1319 /*
1320 * This routine responds to T_CAPABILITY_REQ messages. It is called by
1321 * udp_wput. Much of the T_CAPABILITY_ACK information is copied from
1322 * udp_g_t_info_ack. The current state of the stream is copied from
1323 * udp_state.
1324 */
1325 static void
1326 udp_capability_req(queue_t *q, mblk_t *mp)
1327 {
1328 t_uscalar_t cap_bits1;
1329 struct T_capability_ack *tcap;
1330 udp_t *udp = Q_TO_UDP(q);
1331
1332 cap_bits1 = ((struct T_capability_req *)mp->b_rptr)->CAP_bits1;
1333
1334 mp = tpi_ack_alloc(mp, sizeof (struct T_capability_ack),
1335 mp->b_datap->db_type, T_CAPABILITY_ACK);
1336 if (!mp)
1337 return;
1338
1339 tcap = (struct T_capability_ack *)mp->b_rptr;
1340 udp_do_capability_ack(udp, tcap, cap_bits1);
1341
1342 qreply(q, mp);
1343 }
1344
1345 /*
1346 * This routine responds to T_INFO_REQ messages. It is called by udp_wput.
1347 * Most of the T_INFO_ACK information is copied from udp_g_t_info_ack.
1348 * The current state of the stream is copied from udp_state.
1349 */
1350 static void
1351 udp_info_req(queue_t *q, mblk_t *mp)
1352 {
1353 udp_t *udp = Q_TO_UDP(q);
1354
1355 /* Create a T_INFO_ACK message. */
1356 mp = tpi_ack_alloc(mp, sizeof (struct T_info_ack), M_PCPROTO,
1357 T_INFO_ACK);
1358 if (!mp)
1359 return;
1360 udp_copy_info((struct T_info_ack *)mp->b_rptr, udp);
1361 qreply(q, mp);
1362 }
1363
1364 /* For /dev/udp aka AF_INET open */
1365 static int
1366 udp_openv4(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp)
1367 {
1368 return (udp_open(q, devp, flag, sflag, credp, B_FALSE));
1369 }
1370
1371 /* For /dev/udp6 aka AF_INET6 open */
1372 static int
1373 udp_openv6(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp)
1374 {
1375 return (udp_open(q, devp, flag, sflag, credp, B_TRUE));
1376 }
1377
1378 /*
1379 * This is the open routine for udp. It allocates a udp_t structure for
1380 * the stream and, on the first open of the module, creates an ND table.
1381 */
1382 static int
1383 udp_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp,
1384 boolean_t isv6)
1385 {
1386 udp_t *udp;
1387 conn_t *connp;
1388 dev_t conn_dev;
1389 vmem_t *minor_arena;
1390 int err;
1391
1392 /* If the stream is already open, return immediately. */
1393 if (q->q_ptr != NULL)
1394 return (0);
1395
1396 if (sflag == MODOPEN)
1397 return (EINVAL);
1398
1399 if ((ip_minor_arena_la != NULL) && (flag & SO_SOCKSTR) &&
1400 ((conn_dev = inet_minor_alloc(ip_minor_arena_la)) != 0)) {
1401 minor_arena = ip_minor_arena_la;
1402 } else {
1403 /*
1404 * Either minor numbers in the large arena were exhausted
1405 * or a non socket application is doing the open.
1406 * Try to allocate from the small arena.
1407 */
1408 if ((conn_dev = inet_minor_alloc(ip_minor_arena_sa)) == 0)
1409 return (EBUSY);
1410
1411 minor_arena = ip_minor_arena_sa;
1412 }
1413
1414 if (flag & SO_FALLBACK) {
1415 /*
1416 * Non streams socket needs a stream to fallback to
1417 */
1418 RD(q)->q_ptr = (void *)conn_dev;
1419 WR(q)->q_qinfo = &udp_fallback_sock_winit;
1420 WR(q)->q_ptr = (void *)minor_arena;
1421 qprocson(q);
1422 return (0);
1423 }
1424
1425 connp = udp_do_open(credp, isv6, KM_SLEEP, &err);
1426 if (connp == NULL) {
1427 inet_minor_free(minor_arena, conn_dev);
1428 return (err);
1429 }
1430 udp = connp->conn_udp;
1431
1432 *devp = makedevice(getemajor(*devp), (minor_t)conn_dev);
1433 connp->conn_dev = conn_dev;
1434 connp->conn_minor_arena = minor_arena;
1435
1436 /*
1437 * Initialize the udp_t structure for this stream.
1438 */
1439 q->q_ptr = connp;
1440 WR(q)->q_ptr = connp;
1441 connp->conn_rq = q;
1442 connp->conn_wq = WR(q);
1443
1444 /*
1445 * Since this conn_t/udp_t is not yet visible to anybody else we don't
1446 * need to lock anything.
1447 */
1448 ASSERT(connp->conn_proto == IPPROTO_UDP);
1449 ASSERT(connp->conn_udp == udp);
1450 ASSERT(udp->udp_connp == connp);
1451
1452 if (flag & SO_SOCKSTR) {
1453 udp->udp_issocket = B_TRUE;
1454 }
1455
1456 WR(q)->q_hiwat = connp->conn_sndbuf;
1457 WR(q)->q_lowat = connp->conn_sndlowat;
1458
1459 qprocson(q);
1460
1461 /* Set the Stream head write offset and high watermark. */
1462 (void) proto_set_tx_wroff(q, connp, connp->conn_wroff);
1463 (void) proto_set_rx_hiwat(q, connp,
1464 udp_set_rcv_hiwat(udp, connp->conn_rcvbuf));
1465
1466 mutex_enter(&connp->conn_lock);
1467 connp->conn_state_flags &= ~CONN_INCIPIENT;
1468 mutex_exit(&connp->conn_lock);
1469 return (0);
1470 }
1471
1472 /*
1473 * Which UDP options OK to set through T_UNITDATA_REQ...
1474 */
1475 /* ARGSUSED */
1476 static boolean_t
1477 udp_opt_allow_udr_set(t_scalar_t level, t_scalar_t name)
1478 {
1479 return (B_TRUE);
1480 }
1481
1482 /*
1483 * This routine gets default values of certain options whose default
1484 * values are maintained by protcol specific code
1485 */
1486 int
1487 udp_opt_default(queue_t *q, t_scalar_t level, t_scalar_t name, uchar_t *ptr)
1488 {
1489 udp_t *udp = Q_TO_UDP(q);
1490 udp_stack_t *us = udp->udp_us;
1491 int *i1 = (int *)ptr;
1492
1493 switch (level) {
1494 case IPPROTO_IP:
1495 switch (name) {
1496 case IP_MULTICAST_TTL:
1497 *ptr = (uchar_t)IP_DEFAULT_MULTICAST_TTL;
1498 return (sizeof (uchar_t));
1499 case IP_MULTICAST_LOOP:
1500 *ptr = (uchar_t)IP_DEFAULT_MULTICAST_LOOP;
1501 return (sizeof (uchar_t));
1502 }
1503 break;
1504 case IPPROTO_IPV6:
1505 switch (name) {
1506 case IPV6_MULTICAST_HOPS:
1507 *i1 = IP_DEFAULT_MULTICAST_TTL;
1508 return (sizeof (int));
1509 case IPV6_MULTICAST_LOOP:
1510 *i1 = IP_DEFAULT_MULTICAST_LOOP;
1511 return (sizeof (int));
1512 case IPV6_UNICAST_HOPS:
1513 *i1 = us->us_ipv6_hoplimit;
1514 return (sizeof (int));
1515 }
1516 break;
1517 }
1518 return (-1);
1519 }
1520
1521 /*
1522 * This routine retrieves the current status of socket options.
1523 * It returns the size of the option retrieved, or -1.
1524 */
1525 int
1526 udp_opt_get(conn_t *connp, t_scalar_t level, t_scalar_t name,
1527 uchar_t *ptr)
1528 {
1529 int *i1 = (int *)ptr;
1530 udp_t *udp = connp->conn_udp;
1531 int len;
1532 conn_opt_arg_t coas;
1533 int retval;
1534
1535 coas.coa_connp = connp;
1536 coas.coa_ixa = connp->conn_ixa;
1537 coas.coa_ipp = &connp->conn_xmit_ipp;
1538 coas.coa_ancillary = B_FALSE;
1539 coas.coa_changed = 0;
1540
1541 /*
1542 * We assume that the optcom framework has checked for the set
1543 * of levels and names that are supported, hence we don't worry
1544 * about rejecting based on that.
1545 * First check for UDP specific handling, then pass to common routine.
1546 */
1547 switch (level) {
1548 case IPPROTO_IP:
1549 /*
1550 * Only allow IPv4 option processing on IPv4 sockets.
1551 */
1552 if (connp->conn_family != AF_INET)
1553 return (-1);
1554
1555 switch (name) {
1556 case IP_OPTIONS:
1557 case T_IP_OPTIONS:
1558 mutex_enter(&connp->conn_lock);
1559 if (!(udp->udp_recv_ipp.ipp_fields &
1560 IPPF_IPV4_OPTIONS)) {
1561 mutex_exit(&connp->conn_lock);
1562 return (0);
1563 }
1564
1565 len = udp->udp_recv_ipp.ipp_ipv4_options_len;
1566 ASSERT(len != 0);
1567 bcopy(udp->udp_recv_ipp.ipp_ipv4_options, ptr, len);
1568 mutex_exit(&connp->conn_lock);
1569 return (len);
1570 }
1571 break;
1572 case IPPROTO_UDP:
1573 switch (name) {
1574 case UDP_NAT_T_ENDPOINT:
1575 mutex_enter(&connp->conn_lock);
1576 *i1 = udp->udp_nat_t_endpoint;
1577 mutex_exit(&connp->conn_lock);
1578 return (sizeof (int));
1579 case UDP_RCVHDR:
1580 mutex_enter(&connp->conn_lock);
1581 *i1 = udp->udp_rcvhdr ? 1 : 0;
1582 mutex_exit(&connp->conn_lock);
1583 return (sizeof (int));
1584 }
1585 }
1586 mutex_enter(&connp->conn_lock);
1587 retval = conn_opt_get(&coas, level, name, ptr);
1588 mutex_exit(&connp->conn_lock);
1589 return (retval);
1590 }
1591
1592 /*
1593 * This routine retrieves the current status of socket options.
1594 * It returns the size of the option retrieved, or -1.
1595 */
1596 int
1597 udp_tpi_opt_get(queue_t *q, t_scalar_t level, t_scalar_t name, uchar_t *ptr)
1598 {
1599 conn_t *connp = Q_TO_CONN(q);
1600 int err;
1601
1602 err = udp_opt_get(connp, level, name, ptr);
1603 return (err);
1604 }
1605
1606 /*
1607 * This routine sets socket options.
1608 */
1609 int
1610 udp_do_opt_set(conn_opt_arg_t *coa, int level, int name,
1611 uint_t inlen, uchar_t *invalp, cred_t *cr, boolean_t checkonly)
1612 {
1613 conn_t *connp = coa->coa_connp;
1614 ip_xmit_attr_t *ixa = coa->coa_ixa;
1615 udp_t *udp = connp->conn_udp;
1616 udp_stack_t *us = udp->udp_us;
1617 int *i1 = (int *)invalp;
1618 boolean_t onoff = (*i1 == 0) ? 0 : 1;
1619 int error;
1620
1621 ASSERT(MUTEX_NOT_HELD(&coa->coa_connp->conn_lock));
1622 /*
1623 * First do UDP specific sanity checks and handle UDP specific
1624 * options. Note that some IPPROTO_UDP options are handled
1625 * by conn_opt_set.
1626 */
1627 switch (level) {
1628 case SOL_SOCKET:
1629 switch (name) {
1630 case SO_SNDBUF:
1631 if (*i1 > us->us_max_buf) {
1632 return (ENOBUFS);
1633 }
1634 break;
1635 case SO_RCVBUF:
1636 if (*i1 > us->us_max_buf) {
1637 return (ENOBUFS);
1638 }
1639 break;
1640
1641 case SCM_UCRED: {
1642 struct ucred_s *ucr;
1643 cred_t *newcr;
1644 ts_label_t *tsl;
1645
1646 /*
1647 * Only sockets that have proper privileges and are
1648 * bound to MLPs will have any other value here, so
1649 * this implicitly tests for privilege to set label.
1650 */
1651 if (connp->conn_mlp_type == mlptSingle)
1652 break;
1653
1654 ucr = (struct ucred_s *)invalp;
1655 if (inlen < sizeof (*ucr) + sizeof (bslabel_t) ||
1656 ucr->uc_labeloff < sizeof (*ucr) ||
1657 ucr->uc_labeloff + sizeof (bslabel_t) > inlen)
1658 return (EINVAL);
1659 if (!checkonly) {
1660 /*
1661 * Set ixa_tsl to the new label.
1662 * We assume that crgetzoneid doesn't change
1663 * as part of the SCM_UCRED.
1664 */
1665 ASSERT(cr != NULL);
1666 if ((tsl = crgetlabel(cr)) == NULL)
1667 return (EINVAL);
1668 newcr = copycred_from_bslabel(cr, UCLABEL(ucr),
1669 tsl->tsl_doi, KM_NOSLEEP);
1670 if (newcr == NULL)
1671 return (ENOSR);
1672 ASSERT(newcr->cr_label != NULL);
1673 /*
1674 * Move the hold on the cr_label to ixa_tsl by
1675 * setting cr_label to NULL. Then release newcr.
1676 */
1677 ip_xmit_attr_replace_tsl(ixa, newcr->cr_label);
1678 ixa->ixa_flags |= IXAF_UCRED_TSL;
1679 newcr->cr_label = NULL;
1680 crfree(newcr);
1681 coa->coa_changed |= COA_HEADER_CHANGED;
1682 coa->coa_changed |= COA_WROFF_CHANGED;
1683 }
1684 /* Fully handled this option. */
1685 return (0);
1686 }
1687 }
1688 break;
1689 case IPPROTO_UDP:
1690 switch (name) {
1691 case UDP_NAT_T_ENDPOINT:
1692 if ((error = secpolicy_ip_config(cr, B_FALSE)) != 0) {
1693 return (error);
1694 }
1695
1696 /*
1697 * Use conn_family instead so we can avoid ambiguitites
1698 * with AF_INET6 sockets that may switch from IPv4
1699 * to IPv6.
1700 */
1701 if (connp->conn_family != AF_INET) {
1702 return (EAFNOSUPPORT);
1703 }
1704
1705 if (!checkonly) {
1706 mutex_enter(&connp->conn_lock);
1707 udp->udp_nat_t_endpoint = onoff;
1708 mutex_exit(&connp->conn_lock);
1709 coa->coa_changed |= COA_HEADER_CHANGED;
1710 coa->coa_changed |= COA_WROFF_CHANGED;
1711 }
1712 /* Fully handled this option. */
1713 return (0);
1714 case UDP_RCVHDR:
1715 mutex_enter(&connp->conn_lock);
1716 udp->udp_rcvhdr = onoff;
1717 mutex_exit(&connp->conn_lock);
1718 return (0);
1719 }
1720 break;
1721 }
1722 error = conn_opt_set(coa, level, name, inlen, invalp,
1723 checkonly, cr);
1724 return (error);
1725 }
1726
1727 /*
1728 * This routine sets socket options.
1729 */
1730 int
1731 udp_opt_set(conn_t *connp, uint_t optset_context, int level,
1732 int name, uint_t inlen, uchar_t *invalp, uint_t *outlenp,
1733 uchar_t *outvalp, void *thisdg_attrs, cred_t *cr)
1734 {
1735 udp_t *udp = connp->conn_udp;
1736 int err;
1737 conn_opt_arg_t coas, *coa;
1738 boolean_t checkonly;
1739 udp_stack_t *us = udp->udp_us;
1740
1741 switch (optset_context) {
1742 case SETFN_OPTCOM_CHECKONLY:
1743 checkonly = B_TRUE;
1744 /*
1745 * Note: Implies T_CHECK semantics for T_OPTCOM_REQ
1746 * inlen != 0 implies value supplied and
1747 * we have to "pretend" to set it.
1748 * inlen == 0 implies that there is no
1749 * value part in T_CHECK request and just validation
1750 * done elsewhere should be enough, we just return here.
1751 */
1752 if (inlen == 0) {
1753 *outlenp = 0;
1754 return (0);
1755 }
1756 break;
1757 case SETFN_OPTCOM_NEGOTIATE:
1758 checkonly = B_FALSE;
1759 break;
1760 case SETFN_UD_NEGOTIATE:
1761 case SETFN_CONN_NEGOTIATE:
1762 checkonly = B_FALSE;
1763 /*
1764 * Negotiating local and "association-related" options
1765 * through T_UNITDATA_REQ.
1766 *
1767 * Following routine can filter out ones we do not
1768 * want to be "set" this way.
1769 */
1770 if (!udp_opt_allow_udr_set(level, name)) {
1771 *outlenp = 0;
1772 return (EINVAL);
1773 }
1774 break;
1775 default:
1776 /*
1777 * We should never get here
1778 */
1779 *outlenp = 0;
1780 return (EINVAL);
1781 }
1782
1783 ASSERT((optset_context != SETFN_OPTCOM_CHECKONLY) ||
1784 (optset_context == SETFN_OPTCOM_CHECKONLY && inlen != 0));
1785
1786 if (thisdg_attrs != NULL) {
1787 /* Options from T_UNITDATA_REQ */
1788 coa = (conn_opt_arg_t *)thisdg_attrs;
1789 ASSERT(coa->coa_connp == connp);
1790 ASSERT(coa->coa_ixa != NULL);
1791 ASSERT(coa->coa_ipp != NULL);
1792 ASSERT(coa->coa_ancillary);
1793 } else {
1794 coa = &coas;
1795 coas.coa_connp = connp;
1796 /* Get a reference on conn_ixa to prevent concurrent mods */
1797 coas.coa_ixa = conn_get_ixa(connp, B_TRUE);
1798 if (coas.coa_ixa == NULL) {
1799 *outlenp = 0;
1800 return (ENOMEM);
1801 }
1802 coas.coa_ipp = &connp->conn_xmit_ipp;
1803 coas.coa_ancillary = B_FALSE;
1804 coas.coa_changed = 0;
1805 }
1806
1807 err = udp_do_opt_set(coa, level, name, inlen, invalp,
1808 cr, checkonly);
1809 if (err != 0) {
1810 errout:
1811 if (!coa->coa_ancillary)
1812 ixa_refrele(coa->coa_ixa);
1813 *outlenp = 0;
1814 return (err);
1815 }
1816 /* Handle DHCPINIT here outside of lock */
1817 if (level == IPPROTO_IP && name == IP_DHCPINIT_IF) {
1818 uint_t ifindex;
1819 ill_t *ill;
1820
1821 ifindex = *(uint_t *)invalp;
1822 if (ifindex == 0) {
1823 ill = NULL;
1824 } else {
1825 ill = ill_lookup_on_ifindex(ifindex, B_FALSE,
1826 coa->coa_ixa->ixa_ipst);
1827 if (ill == NULL) {
1828 err = ENXIO;
1829 goto errout;
1830 }
1831
1832 mutex_enter(&ill->ill_lock);
1833 if (ill->ill_state_flags & ILL_CONDEMNED) {
1834 mutex_exit(&ill->ill_lock);
1835 ill_refrele(ill);
1836 err = ENXIO;
1837 goto errout;
1838 }
1839 if (IS_VNI(ill)) {
1840 mutex_exit(&ill->ill_lock);
1841 ill_refrele(ill);
1842 err = EINVAL;
1843 goto errout;
1844 }
1845 }
1846 mutex_enter(&connp->conn_lock);
1847
1848 if (connp->conn_dhcpinit_ill != NULL) {
1849 /*
1850 * We've locked the conn so conn_cleanup_ill()
1851 * cannot clear conn_dhcpinit_ill -- so it's
1852 * safe to access the ill.
1853 */
1854 ill_t *oill = connp->conn_dhcpinit_ill;
1855
1856 ASSERT(oill->ill_dhcpinit != 0);
1857 atomic_dec_32(&oill->ill_dhcpinit);
1858 ill_set_inputfn(connp->conn_dhcpinit_ill);
1859 connp->conn_dhcpinit_ill = NULL;
1860 }
1861
1862 if (ill != NULL) {
1863 connp->conn_dhcpinit_ill = ill;
1864 atomic_inc_32(&ill->ill_dhcpinit);
1865 ill_set_inputfn(ill);
1866 mutex_exit(&connp->conn_lock);
1867 mutex_exit(&ill->ill_lock);
1868 ill_refrele(ill);
1869 } else {
1870 mutex_exit(&connp->conn_lock);
1871 }
1872 }
1873
1874 /*
1875 * Common case of OK return with outval same as inval.
1876 */
1877 if (invalp != outvalp) {
1878 /* don't trust bcopy for identical src/dst */
1879 (void) bcopy(invalp, outvalp, inlen);
1880 }
1881 *outlenp = inlen;
1882
1883 /*
1884 * If this was not ancillary data, then we rebuild the headers,
1885 * update the IRE/NCE, and IPsec as needed.
1886 * Since the label depends on the destination we go through
1887 * ip_set_destination first.
1888 */
1889 if (coa->coa_ancillary) {
1890 return (0);
1891 }
1892
1893 if (coa->coa_changed & COA_ROUTE_CHANGED) {
1894 in6_addr_t saddr, faddr, nexthop;
1895 in_port_t fport;
1896
1897 /*
1898 * We clear lastdst to make sure we pick up the change
1899 * next time sending.
1900 * If we are connected we re-cache the information.
1901 * We ignore errors to preserve BSD behavior.
1902 * Note that we don't redo IPsec policy lookup here
1903 * since the final destination (or source) didn't change.
1904 */
1905 mutex_enter(&connp->conn_lock);
1906 connp->conn_v6lastdst = ipv6_all_zeros;
1907
1908 ip_attr_nexthop(coa->coa_ipp, coa->coa_ixa,
1909 &connp->conn_faddr_v6, &nexthop);
1910 saddr = connp->conn_saddr_v6;
1911 faddr = connp->conn_faddr_v6;
1912 fport = connp->conn_fport;
1913 mutex_exit(&connp->conn_lock);
1914
1915 if (!IN6_IS_ADDR_UNSPECIFIED(&faddr) &&
1916 !IN6_IS_ADDR_V4MAPPED_ANY(&faddr)) {
1917 (void) ip_attr_connect(connp, coa->coa_ixa,
1918 &saddr, &faddr, &nexthop, fport, NULL, NULL,
1919 IPDF_ALLOW_MCBC | IPDF_VERIFY_DST);
1920 }
1921 }
1922
1923 ixa_refrele(coa->coa_ixa);
1924
1925 if (coa->coa_changed & COA_HEADER_CHANGED) {
1926 /*
1927 * Rebuild the header template if we are connected.
1928 * Otherwise clear conn_v6lastdst so we rebuild the header
1929 * in the data path.
1930 */
1931 mutex_enter(&connp->conn_lock);
1932 if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_faddr_v6) &&
1933 !IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_faddr_v6)) {
1934 err = udp_build_hdr_template(connp,
1935 &connp->conn_saddr_v6, &connp->conn_faddr_v6,
1936 connp->conn_fport, connp->conn_flowinfo);
1937 if (err != 0) {
1938 mutex_exit(&connp->conn_lock);
1939 return (err);
1940 }
1941 } else {
1942 connp->conn_v6lastdst = ipv6_all_zeros;
1943 }
1944 mutex_exit(&connp->conn_lock);
1945 }
1946 if (coa->coa_changed & COA_RCVBUF_CHANGED) {
1947 (void) proto_set_rx_hiwat(connp->conn_rq, connp,
1948 connp->conn_rcvbuf);
1949 }
1950 if ((coa->coa_changed & COA_SNDBUF_CHANGED) && !IPCL_IS_NONSTR(connp)) {
1951 connp->conn_wq->q_hiwat = connp->conn_sndbuf;
1952 }
1953 if (coa->coa_changed & COA_WROFF_CHANGED) {
1954 /* Increase wroff if needed */
1955 uint_t wroff;
1956
1957 mutex_enter(&connp->conn_lock);
1958 wroff = connp->conn_ht_iphc_allocated + us->us_wroff_extra;
1959 if (udp->udp_nat_t_endpoint)
1960 wroff += sizeof (uint32_t);
1961 if (wroff > connp->conn_wroff) {
1962 connp->conn_wroff = wroff;
1963 mutex_exit(&connp->conn_lock);
1964 (void) proto_set_tx_wroff(connp->conn_rq, connp, wroff);
1965 } else {
1966 mutex_exit(&connp->conn_lock);
1967 }
1968 }
1969 return (err);
1970 }
1971
1972 /* This routine sets socket options. */
1973 int
1974 udp_tpi_opt_set(queue_t *q, uint_t optset_context, int level, int name,
1975 uint_t inlen, uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp,
1976 void *thisdg_attrs, cred_t *cr)
1977 {
1978 conn_t *connp = Q_TO_CONN(q);
1979 int error;
1980
1981 error = udp_opt_set(connp, optset_context, level, name, inlen, invalp,
1982 outlenp, outvalp, thisdg_attrs, cr);
1983 return (error);
1984 }
1985
1986 /*
1987 * Setup IP and UDP headers.
1988 * Returns NULL on allocation failure, in which case data_mp is freed.
1989 */
1990 mblk_t *
1991 udp_prepend_hdr(conn_t *connp, ip_xmit_attr_t *ixa, const ip_pkt_t *ipp,
1992 const in6_addr_t *v6src, const in6_addr_t *v6dst, in_port_t dstport,
1993 uint32_t flowinfo, mblk_t *data_mp, int *errorp)
1994 {
1995 mblk_t *mp;
1996 udpha_t *udpha;
1997 udp_stack_t *us = connp->conn_netstack->netstack_udp;
1998 uint_t data_len;
1999 uint32_t cksum;
2000 udp_t *udp = connp->conn_udp;
2001 boolean_t insert_spi = udp->udp_nat_t_endpoint;
2002 uint_t ulp_hdr_len;
2003
2004 data_len = msgdsize(data_mp);
2005 ulp_hdr_len = UDPH_SIZE;
2006 if (insert_spi)
2007 ulp_hdr_len += sizeof (uint32_t);
2008
2009 mp = conn_prepend_hdr(ixa, ipp, v6src, v6dst, IPPROTO_UDP, flowinfo,
2010 ulp_hdr_len, data_mp, data_len, us->us_wroff_extra, &cksum, errorp);
2011 if (mp == NULL) {
2012 ASSERT(*errorp != 0);
2013 return (NULL);
2014 }
2015
2016 data_len += ulp_hdr_len;
2017 ixa->ixa_pktlen = data_len + ixa->ixa_ip_hdr_length;
2018
2019 udpha = (udpha_t *)(mp->b_rptr + ixa->ixa_ip_hdr_length);
2020 udpha->uha_src_port = connp->conn_lport;
2021 udpha->uha_dst_port = dstport;
2022 udpha->uha_checksum = 0;
2023 udpha->uha_length = htons(data_len);
2024
2025 /*
2026 * If there was a routing option/header then conn_prepend_hdr
2027 * has massaged it and placed the pseudo-header checksum difference
2028 * in the cksum argument.
2029 *
2030 * Setup header length and prepare for ULP checksum done in IP.
2031 *
2032 * We make it easy for IP to include our pseudo header
2033 * by putting our length in uha_checksum.
2034 * The IP source, destination, and length have already been set by
2035 * conn_prepend_hdr.
2036 */
2037 cksum += data_len;
2038 cksum = (cksum >> 16) + (cksum & 0xFFFF);
2039 ASSERT(cksum < 0x10000);
2040
2041 if (ixa->ixa_flags & IXAF_IS_IPV4) {
2042 ipha_t *ipha = (ipha_t *)mp->b_rptr;
2043
2044 ASSERT(ntohs(ipha->ipha_length) == ixa->ixa_pktlen);
2045
2046 /* IP does the checksum if uha_checksum is non-zero */
2047 if (us->us_do_checksum) {
2048 if (cksum == 0)
2049 udpha->uha_checksum = 0xffff;
2050 else
2051 udpha->uha_checksum = htons(cksum);
2052 } else {
2053 udpha->uha_checksum = 0;
2054 }
2055 } else {
2056 ip6_t *ip6h = (ip6_t *)mp->b_rptr;
2057
2058 ASSERT(ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN == ixa->ixa_pktlen);
2059 if (cksum == 0)
2060 udpha->uha_checksum = 0xffff;
2061 else
2062 udpha->uha_checksum = htons(cksum);
2063 }
2064
2065 /* Insert all-0s SPI now. */
2066 if (insert_spi)
2067 *((uint32_t *)(udpha + 1)) = 0;
2068
2069 return (mp);
2070 }
2071
2072 static int
2073 udp_build_hdr_template(conn_t *connp, const in6_addr_t *v6src,
2074 const in6_addr_t *v6dst, in_port_t dstport, uint32_t flowinfo)
2075 {
2076 udpha_t *udpha;
2077 int error;
2078
2079 ASSERT(MUTEX_HELD(&connp->conn_lock));
2080 /*
2081 * We clear lastdst to make sure we don't use the lastdst path
2082 * next time sending since we might not have set v6dst yet.
2083 */
2084 connp->conn_v6lastdst = ipv6_all_zeros;
2085
2086 error = conn_build_hdr_template(connp, UDPH_SIZE, 0, v6src, v6dst,
2087 flowinfo);
2088 if (error != 0)
2089 return (error);
2090
2091 /*
2092 * Any routing header/option has been massaged. The checksum difference
2093 * is stored in conn_sum.
2094 */
2095 udpha = (udpha_t *)connp->conn_ht_ulp;
2096 udpha->uha_src_port = connp->conn_lport;
2097 udpha->uha_dst_port = dstport;
2098 udpha->uha_checksum = 0;
2099 udpha->uha_length = htons(UDPH_SIZE); /* Filled in later */
2100 return (0);
2101 }
2102
2103 static mblk_t *
2104 udp_queue_fallback(udp_t *udp, mblk_t *mp)
2105 {
2106 ASSERT(MUTEX_HELD(&udp->udp_recv_lock));
2107 if (IPCL_IS_NONSTR(udp->udp_connp)) {
2108 /*
2109 * fallback has started but messages have not been moved yet
2110 */
2111 if (udp->udp_fallback_queue_head == NULL) {
2112 ASSERT(udp->udp_fallback_queue_tail == NULL);
2113 udp->udp_fallback_queue_head = mp;
2114 udp->udp_fallback_queue_tail = mp;
2115 } else {
2116 ASSERT(udp->udp_fallback_queue_tail != NULL);
2117 udp->udp_fallback_queue_tail->b_next = mp;
2118 udp->udp_fallback_queue_tail = mp;
2119 }
2120 return (NULL);
2121 } else {
2122 /*
2123 * Fallback completed, let the caller putnext() the mblk.
2124 */
2125 return (mp);
2126 }
2127 }
2128
2129 /*
2130 * Deliver data to ULP. In case we have a socket, and it's falling back to
2131 * TPI, then we'll queue the mp for later processing.
2132 */
2133 static void
2134 udp_ulp_recv(conn_t *connp, mblk_t *mp, uint_t len, ip_recv_attr_t *ira)
2135 {
2136 if (IPCL_IS_NONSTR(connp)) {
2137 udp_t *udp = connp->conn_udp;
2138 int error;
2139
2140 ASSERT(len == msgdsize(mp));
2141 if ((*connp->conn_upcalls->su_recv)
2142 (connp->conn_upper_handle, mp, len, 0, &error, NULL) < 0) {
2143 mutex_enter(&udp->udp_recv_lock);
2144 if (error == ENOSPC) {
2145 /*
2146 * let's confirm while holding the lock
2147 */
2148 if ((*connp->conn_upcalls->su_recv)
2149 (connp->conn_upper_handle, NULL, 0, 0,
2150 &error, NULL) < 0) {
2151 ASSERT(error == ENOSPC);
2152 if (error == ENOSPC) {
2153 connp->conn_flow_cntrld =
2154 B_TRUE;
2155 }
2156 }
2157 mutex_exit(&udp->udp_recv_lock);
2158 } else {
2159 ASSERT(error == EOPNOTSUPP);
2160 mp = udp_queue_fallback(udp, mp);
2161 mutex_exit(&udp->udp_recv_lock);
2162 if (mp != NULL)
2163 putnext(connp->conn_rq, mp);
2164 }
2165 }
2166 ASSERT(MUTEX_NOT_HELD(&udp->udp_recv_lock));
2167 } else {
2168 if (is_system_labeled()) {
2169 ASSERT(ira->ira_cred != NULL);
2170 /*
2171 * Provide for protocols above UDP such as RPC
2172 * NOPID leaves db_cpid unchanged.
2173 */
2174 mblk_setcred(mp, ira->ira_cred, NOPID);
2175 }
2176
2177 putnext(connp->conn_rq, mp);
2178 }
2179 }
2180
2181 /*
2182 * This is the inbound data path.
2183 * IP has already pulled up the IP plus UDP headers and verified alignment
2184 * etc.
2185 */
2186 /* ARGSUSED2 */
2187 static void
2188 udp_input(void *arg1, mblk_t *mp, void *arg2, ip_recv_attr_t *ira)
2189 {
2190 conn_t *connp = (conn_t *)arg1;
2191 struct T_unitdata_ind *tudi;
2192 uchar_t *rptr; /* Pointer to IP header */
2193 int hdr_length; /* Length of IP+UDP headers */
2194 int udi_size; /* Size of T_unitdata_ind */
2195 int pkt_len;
2196 udp_t *udp;
2197 udpha_t *udpha;
2198 ip_pkt_t ipps;
2199 ip6_t *ip6h;
2200 mblk_t *mp1;
2201 uint32_t udp_ipv4_options_len;
2202 crb_t recv_ancillary;
2203 udp_stack_t *us;
2204
2205 ASSERT(connp->conn_flags & IPCL_UDPCONN);
2206
2207 udp = connp->conn_udp;
2208 us = udp->udp_us;
2209 rptr = mp->b_rptr;
2210
2211 ASSERT(DB_TYPE(mp) == M_DATA);
2212 ASSERT(OK_32PTR(rptr));
2213 ASSERT(ira->ira_pktlen == msgdsize(mp));
2214 pkt_len = ira->ira_pktlen;
2215
2216 /*
2217 * Get a snapshot of these and allow other threads to change
2218 * them after that. We need the same recv_ancillary when determining
2219 * the size as when adding the ancillary data items.
2220 */
2221 mutex_enter(&connp->conn_lock);
2222 udp_ipv4_options_len = udp->udp_recv_ipp.ipp_ipv4_options_len;
2223 recv_ancillary = connp->conn_recv_ancillary;
2224 mutex_exit(&connp->conn_lock);
2225
2226 hdr_length = ira->ira_ip_hdr_length;
2227
2228 /*
2229 * IP inspected the UDP header thus all of it must be in the mblk.
2230 * UDP length check is performed for IPv6 packets and IPv4 packets
2231 * to check if the size of the packet as specified
2232 * by the UDP header is the same as the length derived from the IP
2233 * header.
2234 */
2235 udpha = (udpha_t *)(rptr + hdr_length);
2236 if (pkt_len != ntohs(udpha->uha_length) + hdr_length)
2237 goto tossit;
2238
2239 hdr_length += UDPH_SIZE;
2240 ASSERT(MBLKL(mp) >= hdr_length); /* IP did a pullup */
2241
2242 /* Initialize regardless of IP version */
2243 ipps.ipp_fields = 0;
2244
2245 if (((ira->ira_flags & IRAF_IPV4_OPTIONS) ||
2246 udp_ipv4_options_len > 0) &&
2247 connp->conn_family == AF_INET) {
2248 int err;
2249
2250 /*
2251 * Record/update udp_recv_ipp with the lock
2252 * held. Not needed for AF_INET6 sockets
2253 * since they don't support a getsockopt of IP_OPTIONS.
2254 */
2255 mutex_enter(&connp->conn_lock);
2256 err = ip_find_hdr_v4((ipha_t *)rptr, &udp->udp_recv_ipp,
2257 B_TRUE);
2258 if (err != 0) {
2259 /* Allocation failed. Drop packet */
2260 mutex_exit(&connp->conn_lock);
2261 freemsg(mp);
2262 UDPS_BUMP_MIB(us, udpInErrors);
2263 return;
2264 }
2265 mutex_exit(&connp->conn_lock);
2266 }
2267
2268 if (recv_ancillary.crb_all != 0) {
2269 /*
2270 * Record packet information in the ip_pkt_t
2271 */
2272 if (ira->ira_flags & IRAF_IS_IPV4) {
2273 ASSERT(IPH_HDR_VERSION(rptr) == IPV4_VERSION);
2274 ASSERT(MBLKL(mp) >= sizeof (ipha_t));
2275 ASSERT(((ipha_t *)rptr)->ipha_protocol == IPPROTO_UDP);
2276 ASSERT(ira->ira_ip_hdr_length == IPH_HDR_LENGTH(rptr));
2277
2278 (void) ip_find_hdr_v4((ipha_t *)rptr, &ipps, B_FALSE);
2279 } else {
2280 uint8_t nexthdrp;
2281
2282 ASSERT(IPH_HDR_VERSION(rptr) == IPV6_VERSION);
2283 /*
2284 * IPv6 packets can only be received by applications
2285 * that are prepared to receive IPv6 addresses.
2286 * The IP fanout must ensure this.
2287 */
2288 ASSERT(connp->conn_family == AF_INET6);
2289
2290 ip6h = (ip6_t *)rptr;
2291
2292 /* We don't care about the length, but need the ipp */
2293 hdr_length = ip_find_hdr_v6(mp, ip6h, B_TRUE, &ipps,
2294 &nexthdrp);
2295 ASSERT(hdr_length == ira->ira_ip_hdr_length);
2296 /* Restore */
2297 hdr_length = ira->ira_ip_hdr_length + UDPH_SIZE;
2298 ASSERT(nexthdrp == IPPROTO_UDP);
2299 }
2300 }
2301
2302 /*
2303 * This is the inbound data path. Packets are passed upstream as
2304 * T_UNITDATA_IND messages.
2305 */
2306 if (connp->conn_family == AF_INET) {
2307 sin_t *sin;
2308
2309 ASSERT(IPH_HDR_VERSION((ipha_t *)rptr) == IPV4_VERSION);
2310
2311 /*
2312 * Normally only send up the source address.
2313 * If any ancillary data items are wanted we add those.
2314 */
2315 udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin_t);
2316 if (recv_ancillary.crb_all != 0) {
2317 udi_size += conn_recvancillary_size(connp,
2318 recv_ancillary, ira, mp, &ipps);
2319 }
2320
2321 /* Allocate a message block for the T_UNITDATA_IND structure. */
2322 mp1 = allocb(udi_size, BPRI_MED);
2323 if (mp1 == NULL) {
2324 freemsg(mp);
2325 UDPS_BUMP_MIB(us, udpInErrors);
2326 return;
2327 }
2328 mp1->b_cont = mp;
2329 mp1->b_datap->db_type = M_PROTO;
2330 tudi = (struct T_unitdata_ind *)mp1->b_rptr;
2331 mp1->b_wptr = (uchar_t *)tudi + udi_size;
2332 tudi->PRIM_type = T_UNITDATA_IND;
2333 tudi->SRC_length = sizeof (sin_t);
2334 tudi->SRC_offset = sizeof (struct T_unitdata_ind);
2335 tudi->OPT_offset = sizeof (struct T_unitdata_ind) +
2336 sizeof (sin_t);
2337 udi_size -= (sizeof (struct T_unitdata_ind) + sizeof (sin_t));
2338 tudi->OPT_length = udi_size;
2339 sin = (sin_t *)&tudi[1];
2340 sin->sin_addr.s_addr = ((ipha_t *)rptr)->ipha_src;
2341 sin->sin_port = udpha->uha_src_port;
2342 sin->sin_family = connp->conn_family;
2343 *(uint32_t *)&sin->sin_zero[0] = 0;
2344 *(uint32_t *)&sin->sin_zero[4] = 0;
2345
2346 /*
2347 * Add options if IP_RECVDSTADDR, IP_RECVIF, IP_RECVSLLA or
2348 * IP_RECVTTL has been set.
2349 */
2350 if (udi_size != 0) {
2351 conn_recvancillary_add(connp, recv_ancillary, ira,
2352 &ipps, (uchar_t *)&sin[1], udi_size);
2353 }
2354 } else {
2355 sin6_t *sin6;
2356
2357 /*
2358 * Handle both IPv4 and IPv6 packets for IPv6 sockets.
2359 *
2360 * Normally we only send up the address. If receiving of any
2361 * optional receive side information is enabled, we also send
2362 * that up as options.
2363 */
2364 udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin6_t);
2365
2366 if (recv_ancillary.crb_all != 0) {
2367 udi_size += conn_recvancillary_size(connp,
2368 recv_ancillary, ira, mp, &ipps);
2369 }
2370
2371 mp1 = allocb(udi_size, BPRI_MED);
2372 if (mp1 == NULL) {
2373 freemsg(mp);
2374 UDPS_BUMP_MIB(us, udpInErrors);
2375 return;
2376 }
2377 mp1->b_cont = mp;
2378 mp1->b_datap->db_type = M_PROTO;
2379 tudi = (struct T_unitdata_ind *)mp1->b_rptr;
2380 mp1->b_wptr = (uchar_t *)tudi + udi_size;
2381 tudi->PRIM_type = T_UNITDATA_IND;
2382 tudi->SRC_length = sizeof (sin6_t);
2383 tudi->SRC_offset = sizeof (struct T_unitdata_ind);
2384 tudi->OPT_offset = sizeof (struct T_unitdata_ind) +
2385 sizeof (sin6_t);
2386 udi_size -= (sizeof (struct T_unitdata_ind) + sizeof (sin6_t));
2387 tudi->OPT_length = udi_size;
2388 sin6 = (sin6_t *)&tudi[1];
2389 if (ira->ira_flags & IRAF_IS_IPV4) {
2390 in6_addr_t v6dst;
2391
2392 IN6_IPADDR_TO_V4MAPPED(((ipha_t *)rptr)->ipha_src,
2393 &sin6->sin6_addr);
2394 IN6_IPADDR_TO_V4MAPPED(((ipha_t *)rptr)->ipha_dst,
2395 &v6dst);
2396 sin6->sin6_flowinfo = 0;
2397 sin6->sin6_scope_id = 0;
2398 sin6->__sin6_src_id = ip_srcid_find_addr(&v6dst,
2399 IPCL_ZONEID(connp), us->us_netstack);
2400 } else {
2401 ip6h = (ip6_t *)rptr;
2402
2403 sin6->sin6_addr = ip6h->ip6_src;
2404 /* No sin6_flowinfo per API */
2405 sin6->sin6_flowinfo = 0;
2406 /* For link-scope pass up scope id */
2407 if (IN6_IS_ADDR_LINKSCOPE(&ip6h->ip6_src))
2408 sin6->sin6_scope_id = ira->ira_ruifindex;
2409 else
2410 sin6->sin6_scope_id = 0;
2411 sin6->__sin6_src_id = ip_srcid_find_addr(
2412 &ip6h->ip6_dst, IPCL_ZONEID(connp),
2413 us->us_netstack);
2414 }
2415 sin6->sin6_port = udpha->uha_src_port;
2416 sin6->sin6_family = connp->conn_family;
2417
2418 if (udi_size != 0) {
2419 conn_recvancillary_add(connp, recv_ancillary, ira,
2420 &ipps, (uchar_t *)&sin6[1], udi_size);
2421 }
2422 }
2423
2424 /*
2425 * DTrace this UDP input as udp:::receive (this is for IPv4, IPv6 and
2426 * loopback traffic).
2427 */
2428 DTRACE_UDP5(receive, mblk_t *, NULL, ip_xmit_attr_t *, connp->conn_ixa,
2429 void_ip_t *, rptr, udp_t *, udp, udpha_t *, udpha);
2430
2431 /* Walk past the headers unless IP_RECVHDR was set. */
2432 if (!udp->udp_rcvhdr) {
2433 mp->b_rptr = rptr + hdr_length;
2434 pkt_len -= hdr_length;
2435 }
2436
2437 UDPS_BUMP_MIB(us, udpHCInDatagrams);
2438 udp_ulp_recv(connp, mp1, pkt_len, ira);
2439 return;
2440
2441 tossit:
2442 freemsg(mp);
2443 UDPS_BUMP_MIB(us, udpInErrors);
2444 }
2445
2446 /*
2447 * This routine creates a T_UDERROR_IND message and passes it upstream.
2448 * The address and options are copied from the T_UNITDATA_REQ message
2449 * passed in mp. This message is freed.
2450 */
2451 static void
2452 udp_ud_err(queue_t *q, mblk_t *mp, t_scalar_t err)
2453 {
2454 struct T_unitdata_req *tudr;
2455 mblk_t *mp1;
2456 uchar_t *destaddr;
2457 t_scalar_t destlen;
2458 uchar_t *optaddr;
2459 t_scalar_t optlen;
2460
2461 if ((mp->b_wptr < mp->b_rptr) ||
2462 (MBLKL(mp)) < sizeof (struct T_unitdata_req)) {
2463 goto done;
2464 }
2465 tudr = (struct T_unitdata_req *)mp->b_rptr;
2466 destaddr = mp->b_rptr + tudr->DEST_offset;
2467 if (destaddr < mp->b_rptr || destaddr >= mp->b_wptr ||
2468 destaddr + tudr->DEST_length < mp->b_rptr ||
2469 destaddr + tudr->DEST_length > mp->b_wptr) {
2470 goto done;
2471 }
2472 optaddr = mp->b_rptr + tudr->OPT_offset;
2473 if (optaddr < mp->b_rptr || optaddr >= mp->b_wptr ||
2474 optaddr + tudr->OPT_length < mp->b_rptr ||
2475 optaddr + tudr->OPT_length > mp->b_wptr) {
2476 goto done;
2477 }
2478 destlen = tudr->DEST_length;
2479 optlen = tudr->OPT_length;
2480
2481 mp1 = mi_tpi_uderror_ind((char *)destaddr, destlen,
2482 (char *)optaddr, optlen, err);
2483 if (mp1 != NULL)
2484 qreply(q, mp1);
2485
2486 done:
2487 freemsg(mp);
2488 }
2489
2490 /*
2491 * This routine removes a port number association from a stream. It
2492 * is called by udp_wput to handle T_UNBIND_REQ messages.
2493 */
2494 static void
2495 udp_tpi_unbind(queue_t *q, mblk_t *mp)
2496 {
2497 conn_t *connp = Q_TO_CONN(q);
2498 int error;
2499
2500 error = udp_do_unbind(connp);
2501 if (error) {
2502 if (error < 0)
2503 udp_err_ack(q, mp, -error, 0);
2504 else
2505 udp_err_ack(q, mp, TSYSERR, error);
2506 return;
2507 }
2508
2509 mp = mi_tpi_ok_ack_alloc(mp);
2510 ASSERT(mp != NULL);
2511 ASSERT(((struct T_ok_ack *)mp->b_rptr)->PRIM_type == T_OK_ACK);
2512 qreply(q, mp);
2513 }
2514
2515 /*
2516 * Don't let port fall into the privileged range.
2517 * Since the extra privileged ports can be arbitrary we also
2518 * ensure that we exclude those from consideration.
2519 * us->us_epriv_ports is not sorted thus we loop over it until
2520 * there are no changes.
2521 */
2522 static in_port_t
2523 udp_update_next_port(udp_t *udp, in_port_t port, boolean_t random)
2524 {
2525 int i, bump;
2526 in_port_t nextport;
2527 boolean_t restart = B_FALSE;
2528 udp_stack_t *us = udp->udp_us;
2529
2530 if (random && udp_random_anon_port != 0) {
2531 (void) random_get_pseudo_bytes((uint8_t *)&port,
2532 sizeof (in_port_t));
2533 /*
2534 * Unless changed by a sys admin, the smallest anon port
2535 * is 32768 and the largest anon port is 65535. It is
2536 * very likely (50%) for the random port to be smaller
2537 * than the smallest anon port. When that happens,
2538 * add port % (anon port range) to the smallest anon
2539 * port to get the random port. It should fall into the
2540 * valid anon port range.
2541 */
2542 if ((port < us->us_smallest_anon_port) ||
2543 (port > us->us_largest_anon_port)) {
2544 if (us->us_smallest_anon_port ==
2545 us->us_largest_anon_port) {
2546 bump = 0;
2547 } else {
2548 bump = port % (us->us_largest_anon_port -
2549 us->us_smallest_anon_port);
2550 }
2551
2552 port = us->us_smallest_anon_port + bump;
2553 }
2554 }
2555
2556 retry:
2557 if (port < us->us_smallest_anon_port)
2558 port = us->us_smallest_anon_port;
2559
2560 if (port > us->us_largest_anon_port) {
2561 port = us->us_smallest_anon_port;
2562 if (restart)
2563 return (0);
2564 restart = B_TRUE;
2565 }
2566
2567 if (port < us->us_smallest_nonpriv_port)
2568 port = us->us_smallest_nonpriv_port;
2569
2570 for (i = 0; i < us->us_num_epriv_ports; i++) {
2571 if (port == us->us_epriv_ports[i]) {
2572 port++;
2573 /*
2574 * Make sure that the port is in the
2575 * valid range.
2576 */
2577 goto retry;
2578 }
2579 }
2580
2581 if (is_system_labeled() &&
2582 (nextport = tsol_next_port(crgetzone(udp->udp_connp->conn_cred),
2583 port, IPPROTO_UDP, B_TRUE)) != 0) {
2584 port = nextport;
2585 goto retry;
2586 }
2587
2588 return (port);
2589 }
2590
2591 /*
2592 * Handle T_UNITDATA_REQ with options. Both IPv4 and IPv6
2593 * Either tudr_mp or msg is set. If tudr_mp we take ancillary data from
2594 * the TPI options, otherwise we take them from msg_control.
2595 * If both sin and sin6 is set it is a connected socket and we use conn_faddr.
2596 * Always consumes mp; never consumes tudr_mp.
2597 */
2598 static int
2599 udp_output_ancillary(conn_t *connp, sin_t *sin, sin6_t *sin6, mblk_t *mp,
2600 mblk_t *tudr_mp, struct nmsghdr *msg, cred_t *cr, pid_t pid)
2601 {
2602 udp_t *udp = connp->conn_udp;
2603 udp_stack_t *us = udp->udp_us;
2604 int error;
2605 ip_xmit_attr_t *ixa;
2606 ip_pkt_t *ipp;
2607 in6_addr_t v6src;
2608 in6_addr_t v6dst;
2609 in6_addr_t v6nexthop;
2610 in_port_t dstport;
2611 uint32_t flowinfo;
2612 uint_t srcid;
2613 int is_absreq_failure = 0;
2614 conn_opt_arg_t coas, *coa;
2615
2616 ASSERT(tudr_mp != NULL || msg != NULL);
2617
2618 /*
2619 * Get ixa before checking state to handle a disconnect race.
2620 *
2621 * We need an exclusive copy of conn_ixa since the ancillary data
2622 * options might modify it. That copy has no pointers hence we
2623 * need to set them up once we've parsed the ancillary data.
2624 */
2625 ixa = conn_get_ixa_exclusive(connp);
2626 if (ixa == NULL) {
2627 UDPS_BUMP_MIB(us, udpOutErrors);
2628 freemsg(mp);
2629 return (ENOMEM);
2630 }
2631 ASSERT(cr != NULL);
2632 ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED));
2633 ixa->ixa_cred = cr;
2634 ixa->ixa_cpid = pid;
2635 if (is_system_labeled()) {
2636 /* We need to restart with a label based on the cred */
2637 ip_xmit_attr_restore_tsl(ixa, ixa->ixa_cred);
2638 }
2639
2640 /* In case previous destination was multicast or multirt */
2641 ip_attr_newdst(ixa);
2642
2643 /* Get a copy of conn_xmit_ipp since the options might change it */
2644 ipp = kmem_zalloc(sizeof (*ipp), KM_NOSLEEP);
2645 if (ipp == NULL) {
2646 ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED));
2647 ixa->ixa_cred = connp->conn_cred; /* Restore */
2648 ixa->ixa_cpid = connp->conn_cpid;
2649 ixa_refrele(ixa);
2650 UDPS_BUMP_MIB(us, udpOutErrors);
2651 freemsg(mp);
2652 return (ENOMEM);
2653 }
2654 mutex_enter(&connp->conn_lock);
2655 error = ip_pkt_copy(&connp->conn_xmit_ipp, ipp, KM_NOSLEEP);
2656 mutex_exit(&connp->conn_lock);
2657 if (error != 0) {
2658 UDPS_BUMP_MIB(us, udpOutErrors);
2659 freemsg(mp);
2660 goto done;
2661 }
2662
2663 /*
2664 * Parse the options and update ixa and ipp as a result.
2665 * Note that ixa_tsl can be updated if SCM_UCRED.
2666 * ixa_refrele/ixa_inactivate will release any reference on ixa_tsl.
2667 */
2668
2669 coa = &coas;
2670 coa->coa_connp = connp;
2671 coa->coa_ixa = ixa;
2672 coa->coa_ipp = ipp;
2673 coa->coa_ancillary = B_TRUE;
2674 coa->coa_changed = 0;
2675
2676 if (msg != NULL) {
2677 error = process_auxiliary_options(connp, msg->msg_control,
2678 msg->msg_controllen, coa, &udp_opt_obj, udp_opt_set, cr);
2679 } else {
2680 struct T_unitdata_req *tudr;
2681
2682 tudr = (struct T_unitdata_req *)tudr_mp->b_rptr;
2683 ASSERT(tudr->PRIM_type == T_UNITDATA_REQ);
2684 error = tpi_optcom_buf(connp->conn_wq, tudr_mp,
2685 &tudr->OPT_length, tudr->OPT_offset, cr, &udp_opt_obj,
2686 coa, &is_absreq_failure);
2687 }
2688 if (error != 0) {
2689 /*
2690 * Note: No special action needed in this
2691 * module for "is_absreq_failure"
2692 */
2693 freemsg(mp);
2694 UDPS_BUMP_MIB(us, udpOutErrors);
2695 goto done;
2696 }
2697 ASSERT(is_absreq_failure == 0);
2698
2699 mutex_enter(&connp->conn_lock);
2700 /*
2701 * If laddr is unspecified then we look at sin6_src_id.
2702 * We will give precedence to a source address set with IPV6_PKTINFO
2703 * (aka IPPF_ADDR) but that is handled in build_hdrs. However, we don't
2704 * want ip_attr_connect to select a source (since it can fail) when
2705 * IPV6_PKTINFO is specified.
2706 * If this doesn't result in a source address then we get a source
2707 * from ip_attr_connect() below.
2708 */
2709 v6src = connp->conn_saddr_v6;
2710 if (sin != NULL) {
2711 IN6_IPADDR_TO_V4MAPPED(sin->sin_addr.s_addr, &v6dst);
2712 dstport = sin->sin_port;
2713 flowinfo = 0;
2714 ixa->ixa_flags &= ~IXAF_SCOPEID_SET;
2715 ixa->ixa_flags |= IXAF_IS_IPV4;
2716 } else if (sin6 != NULL) {
2717 v6dst = sin6->sin6_addr;
2718 dstport = sin6->sin6_port;
2719 flowinfo = sin6->sin6_flowinfo;
2720 srcid = sin6->__sin6_src_id;
2721 if (IN6_IS_ADDR_LINKSCOPE(&v6dst) && sin6->sin6_scope_id != 0) {
2722 ixa->ixa_scopeid = sin6->sin6_scope_id;
2723 ixa->ixa_flags |= IXAF_SCOPEID_SET;
2724 } else {
2725 ixa->ixa_flags &= ~IXAF_SCOPEID_SET;
2726 }
2727 if (srcid != 0 && IN6_IS_ADDR_UNSPECIFIED(&v6src)) {
2728 ip_srcid_find_id(srcid, &v6src, IPCL_ZONEID(connp),
2729 connp->conn_netstack);
2730 }
2731 if (IN6_IS_ADDR_V4MAPPED(&v6dst))
2732 ixa->ixa_flags |= IXAF_IS_IPV4;
2733 else
2734 ixa->ixa_flags &= ~IXAF_IS_IPV4;
2735 } else {
2736 /* Connected case */
2737 v6dst = connp->conn_faddr_v6;
2738 dstport = connp->conn_fport;
2739 flowinfo = connp->conn_flowinfo;
2740 }
2741 mutex_exit(&connp->conn_lock);
2742
2743 /* Handle IP_PKTINFO/IPV6_PKTINFO setting source address. */
2744 if (ipp->ipp_fields & IPPF_ADDR) {
2745 if (ixa->ixa_flags & IXAF_IS_IPV4) {
2746 if (IN6_IS_ADDR_V4MAPPED(&ipp->ipp_addr))
2747 v6src = ipp->ipp_addr;
2748 } else {
2749 if (!IN6_IS_ADDR_V4MAPPED(&ipp->ipp_addr))
2750 v6src = ipp->ipp_addr;
2751 }
2752 }
2753
2754 ip_attr_nexthop(ipp, ixa, &v6dst, &v6nexthop);
2755 error = ip_attr_connect(connp, ixa, &v6src, &v6dst, &v6nexthop, dstport,
2756 &v6src, NULL, IPDF_ALLOW_MCBC | IPDF_VERIFY_DST | IPDF_IPSEC);
2757
2758 switch (error) {
2759 case 0:
2760 break;
2761 case EADDRNOTAVAIL:
2762 /*
2763 * IXAF_VERIFY_SOURCE tells us to pick a better source.
2764 * Don't have the application see that errno
2765 */
2766 error = ENETUNREACH;
2767 goto failed;
2768 case ENETDOWN:
2769 /*
2770 * Have !ipif_addr_ready address; drop packet silently
2771 * until we can get applications to not send until we
2772 * are ready.
2773 */
2774 error = 0;
2775 goto failed;
2776 case EHOSTUNREACH:
2777 case ENETUNREACH:
2778 if (ixa->ixa_ire != NULL) {
2779 /*
2780 * Let conn_ip_output/ire_send_noroute return
2781 * the error and send any local ICMP error.
2782 */
2783 error = 0;
2784 break;
2785 }
2786 /* FALLTHRU */
2787 default:
2788 failed:
2789 freemsg(mp);
2790 UDPS_BUMP_MIB(us, udpOutErrors);
2791 goto done;
2792 }
2793
2794 /*
2795 * We might be going to a different destination than last time,
2796 * thus check that TX allows the communication and compute any
2797 * needed label.
2798 *
2799 * TSOL Note: We have an exclusive ipp and ixa for this thread so we
2800 * don't have to worry about concurrent threads.
2801 */
2802 if (is_system_labeled()) {
2803 /* Using UDP MLP requires SCM_UCRED from user */
2804 if (connp->conn_mlp_type != mlptSingle &&
2805 !((ixa->ixa_flags & IXAF_UCRED_TSL))) {
2806 UDPS_BUMP_MIB(us, udpOutErrors);
2807 error = ECONNREFUSED;
2808 freemsg(mp);
2809 goto done;
2810 }
2811 /*
2812 * Check whether Trusted Solaris policy allows communication
2813 * with this host, and pretend that the destination is
2814 * unreachable if not.
2815 * Compute any needed label and place it in ipp_label_v4/v6.
2816 *
2817 * Later conn_build_hdr_template/conn_prepend_hdr takes
2818 * ipp_label_v4/v6 to form the packet.
2819 *
2820 * Tsol note: We have ipp structure local to this thread so
2821 * no locking is needed.
2822 */
2823 error = conn_update_label(connp, ixa, &v6dst, ipp);
2824 if (error != 0) {
2825 freemsg(mp);
2826 UDPS_BUMP_MIB(us, udpOutErrors);
2827 goto done;
2828 }
2829 }
2830 mp = udp_prepend_hdr(connp, ixa, ipp, &v6src, &v6dst, dstport,
2831 flowinfo, mp, &error);
2832 if (mp == NULL) {
2833 ASSERT(error != 0);
2834 UDPS_BUMP_MIB(us, udpOutErrors);
2835 goto done;
2836 }
2837 if (ixa->ixa_pktlen > IP_MAXPACKET) {
2838 error = EMSGSIZE;
2839 UDPS_BUMP_MIB(us, udpOutErrors);
2840 freemsg(mp);
2841 goto done;
2842 }
2843 /* We're done. Pass the packet to ip. */
2844 UDPS_BUMP_MIB(us, udpHCOutDatagrams);
2845
2846 DTRACE_UDP5(send, mblk_t *, NULL, ip_xmit_attr_t *, ixa,
2847 void_ip_t *, mp->b_rptr, udp_t *, udp, udpha_t *,
2848 &mp->b_rptr[ixa->ixa_ip_hdr_length]);
2849
2850 error = conn_ip_output(mp, ixa);
2851 /* No udpOutErrors if an error since IP increases its error counter */
2852 switch (error) {
2853 case 0:
2854 break;
2855 case EWOULDBLOCK:
2856 (void) ixa_check_drain_insert(connp, ixa);
2857 error = 0;
2858 break;
2859 case EADDRNOTAVAIL:
2860 /*
2861 * IXAF_VERIFY_SOURCE tells us to pick a better source.
2862 * Don't have the application see that errno
2863 */
2864 error = ENETUNREACH;
2865 /* FALLTHRU */
2866 default:
2867 mutex_enter(&connp->conn_lock);
2868 /*
2869 * Clear the source and v6lastdst so we call ip_attr_connect
2870 * for the next packet and try to pick a better source.
2871 */
2872 if (connp->conn_mcbc_bind)
2873 connp->conn_saddr_v6 = ipv6_all_zeros;
2874 else
2875 connp->conn_saddr_v6 = connp->conn_bound_addr_v6;
2876 connp->conn_v6lastdst = ipv6_all_zeros;
2877 mutex_exit(&connp->conn_lock);
2878 break;
2879 }
2880 done:
2881 ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED));
2882 ixa->ixa_cred = connp->conn_cred; /* Restore */
2883 ixa->ixa_cpid = connp->conn_cpid;
2884 ixa_refrele(ixa);
2885 ip_pkt_free(ipp);
2886 kmem_free(ipp, sizeof (*ipp));
2887 return (error);
2888 }
2889
2890 /*
2891 * Handle sending an M_DATA for a connected socket.
2892 * Handles both IPv4 and IPv6.
2893 */
2894 static int
2895 udp_output_connected(conn_t *connp, mblk_t *mp, cred_t *cr, pid_t pid)
2896 {
2897 udp_t *udp = connp->conn_udp;
2898 udp_stack_t *us = udp->udp_us;
2899 int error;
2900 ip_xmit_attr_t *ixa;
2901
2902 /*
2903 * If no other thread is using conn_ixa this just gets a reference to
2904 * conn_ixa. Otherwise we get a safe copy of conn_ixa.
2905 */
2906 ixa = conn_get_ixa(connp, B_FALSE);
2907 if (ixa == NULL) {
2908 UDPS_BUMP_MIB(us, udpOutErrors);
2909 freemsg(mp);
2910 return (ENOMEM);
2911 }
2912
2913 ASSERT(cr != NULL);
2914 ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED));
2915 ixa->ixa_cred = cr;
2916 ixa->ixa_cpid = pid;
2917
2918 mutex_enter(&connp->conn_lock);
2919 mp = udp_prepend_header_template(connp, ixa, mp, &connp->conn_saddr_v6,
2920 connp->conn_fport, connp->conn_flowinfo, &error);
2921
2922 if (mp == NULL) {
2923 ASSERT(error != 0);
2924 mutex_exit(&connp->conn_lock);
2925 ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED));
2926 ixa->ixa_cred = connp->conn_cred; /* Restore */
2927 ixa->ixa_cpid = connp->conn_cpid;
2928 ixa_refrele(ixa);
2929 UDPS_BUMP_MIB(us, udpOutErrors);
2930 freemsg(mp);
2931 return (error);
2932 }
2933
2934 /*
2935 * In case we got a safe copy of conn_ixa, or if opt_set made us a new
2936 * safe copy, then we need to fill in any pointers in it.
2937 */
2938 if (ixa->ixa_ire == NULL) {
2939 in6_addr_t faddr, saddr;
2940 in6_addr_t nexthop;
2941 in_port_t fport;
2942
2943 saddr = connp->conn_saddr_v6;
2944 faddr = connp->conn_faddr_v6;
2945 fport = connp->conn_fport;
2946 ip_attr_nexthop(&connp->conn_xmit_ipp, ixa, &faddr, &nexthop);
2947 mutex_exit(&connp->conn_lock);
2948
2949 error = ip_attr_connect(connp, ixa, &saddr, &faddr, &nexthop,
2950 fport, NULL, NULL, IPDF_ALLOW_MCBC | IPDF_VERIFY_DST |
2951 IPDF_IPSEC);
2952 switch (error) {
2953 case 0:
2954 break;
2955 case EADDRNOTAVAIL:
2956 /*
2957 * IXAF_VERIFY_SOURCE tells us to pick a better source.
2958 * Don't have the application see that errno
2959 */
2960 error = ENETUNREACH;
2961 goto failed;
2962 case ENETDOWN:
2963 /*
2964 * Have !ipif_addr_ready address; drop packet silently
2965 * until we can get applications to not send until we
2966 * are ready.
2967 */
2968 error = 0;
2969 goto failed;
2970 case EHOSTUNREACH:
2971 case ENETUNREACH:
2972 if (ixa->ixa_ire != NULL) {
2973 /*
2974 * Let conn_ip_output/ire_send_noroute return
2975 * the error and send any local ICMP error.
2976 */
2977 error = 0;
2978 break;
2979 }
2980 /* FALLTHRU */
2981 default:
2982 failed:
2983 ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED));
2984 ixa->ixa_cred = connp->conn_cred; /* Restore */
2985 ixa->ixa_cpid = connp->conn_cpid;
2986 ixa_refrele(ixa);
2987 freemsg(mp);
2988 UDPS_BUMP_MIB(us, udpOutErrors);
2989 return (error);
2990 }
2991 } else {
2992 /* Done with conn_t */
2993 mutex_exit(&connp->conn_lock);
2994 }
2995 ASSERT(ixa->ixa_ire != NULL);
2996
2997 /* We're done. Pass the packet to ip. */
2998 UDPS_BUMP_MIB(us, udpHCOutDatagrams);
2999
3000 DTRACE_UDP5(send, mblk_t *, NULL, ip_xmit_attr_t *, ixa,
3001 void_ip_t *, mp->b_rptr, udp_t *, udp, udpha_t *,
3002 &mp->b_rptr[ixa->ixa_ip_hdr_length]);
3003
3004 error = conn_ip_output(mp, ixa);
3005 /* No udpOutErrors if an error since IP increases its error counter */
3006 switch (error) {
3007 case 0:
3008 break;
3009 case EWOULDBLOCK:
3010 (void) ixa_check_drain_insert(connp, ixa);
3011 error = 0;
3012 break;
3013 case EADDRNOTAVAIL:
3014 /*
3015 * IXAF_VERIFY_SOURCE tells us to pick a better source.
3016 * Don't have the application see that errno
3017 */
3018 error = ENETUNREACH;
3019 break;
3020 }
3021 ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED));
3022 ixa->ixa_cred = connp->conn_cred; /* Restore */
3023 ixa->ixa_cpid = connp->conn_cpid;
3024 ixa_refrele(ixa);
3025 return (error);
3026 }
3027
3028 /*
3029 * Handle sending an M_DATA to the last destination.
3030 * Handles both IPv4 and IPv6.
3031 *
3032 * NOTE: The caller must hold conn_lock and we drop it here.
3033 */
3034 static int
3035 udp_output_lastdst(conn_t *connp, mblk_t *mp, cred_t *cr, pid_t pid,
3036 ip_xmit_attr_t *ixa)
3037 {
3038 udp_t *udp = connp->conn_udp;
3039 udp_stack_t *us = udp->udp_us;
3040 int error;
3041
3042 ASSERT(MUTEX_HELD(&connp->conn_lock));
3043 ASSERT(ixa != NULL);
3044
3045 ASSERT(cr != NULL);
3046 ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED));
3047 ixa->ixa_cred = cr;
3048 ixa->ixa_cpid = pid;
3049
3050 mp = udp_prepend_header_template(connp, ixa, mp, &connp->conn_v6lastsrc,
3051 connp->conn_lastdstport, connp->conn_lastflowinfo, &error);
3052
3053 if (mp == NULL) {
3054 ASSERT(error != 0);
3055 mutex_exit(&connp->conn_lock);
3056 ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED));
3057 ixa->ixa_cred = connp->conn_cred; /* Restore */
3058 ixa->ixa_cpid = connp->conn_cpid;
3059 ixa_refrele(ixa);
3060 UDPS_BUMP_MIB(us, udpOutErrors);
3061 freemsg(mp);
3062 return (error);
3063 }
3064
3065 /*
3066 * In case we got a safe copy of conn_ixa, or if opt_set made us a new
3067 * safe copy, then we need to fill in any pointers in it.
3068 */
3069 if (ixa->ixa_ire == NULL) {
3070 in6_addr_t lastdst, lastsrc;
3071 in6_addr_t nexthop;
3072 in_port_t lastport;
3073
3074 lastsrc = connp->conn_v6lastsrc;
3075 lastdst = connp->conn_v6lastdst;
3076 lastport = connp->conn_lastdstport;
3077 ip_attr_nexthop(&connp->conn_xmit_ipp, ixa, &lastdst, &nexthop);
3078 mutex_exit(&connp->conn_lock);
3079
3080 error = ip_attr_connect(connp, ixa, &lastsrc, &lastdst,
3081 &nexthop, lastport, NULL, NULL, IPDF_ALLOW_MCBC |
3082 IPDF_VERIFY_DST | IPDF_IPSEC);
3083 switch (error) {
3084 case 0:
3085 break;
3086 case EADDRNOTAVAIL:
3087 /*
3088 * IXAF_VERIFY_SOURCE tells us to pick a better source.
3089 * Don't have the application see that errno
3090 */
3091 error = ENETUNREACH;
3092 goto failed;
3093 case ENETDOWN:
3094 /*
3095 * Have !ipif_addr_ready address; drop packet silently
3096 * until we can get applications to not send until we
3097 * are ready.
3098 */
3099 error = 0;
3100 goto failed;
3101 case EHOSTUNREACH:
3102 case ENETUNREACH:
3103 if (ixa->ixa_ire != NULL) {
3104 /*
3105 * Let conn_ip_output/ire_send_noroute return
3106 * the error and send any local ICMP error.
3107 */
3108 error = 0;
3109 break;
3110 }
3111 /* FALLTHRU */
3112 default:
3113 failed:
3114 ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED));
3115 ixa->ixa_cred = connp->conn_cred; /* Restore */
3116 ixa->ixa_cpid = connp->conn_cpid;
3117 ixa_refrele(ixa);
3118 freemsg(mp);
3119 UDPS_BUMP_MIB(us, udpOutErrors);
3120 return (error);
3121 }
3122 } else {
3123 /* Done with conn_t */
3124 mutex_exit(&connp->conn_lock);
3125 }
3126
3127 /* We're done. Pass the packet to ip. */
3128 UDPS_BUMP_MIB(us, udpHCOutDatagrams);
3129
3130 DTRACE_UDP5(send, mblk_t *, NULL, ip_xmit_attr_t *, ixa,
3131 void_ip_t *, mp->b_rptr, udp_t *, udp, udpha_t *,
3132 &mp->b_rptr[ixa->ixa_ip_hdr_length]);
3133
3134 error = conn_ip_output(mp, ixa);
3135 /* No udpOutErrors if an error since IP increases its error counter */
3136 switch (error) {
3137 case 0:
3138 break;
3139 case EWOULDBLOCK:
3140 (void) ixa_check_drain_insert(connp, ixa);
3141 error = 0;
3142 break;
3143 case EADDRNOTAVAIL:
3144 /*
3145 * IXAF_VERIFY_SOURCE tells us to pick a better source.
3146 * Don't have the application see that errno
3147 */
3148 error = ENETUNREACH;
3149 /* FALLTHRU */
3150 default:
3151 mutex_enter(&connp->conn_lock);
3152 /*
3153 * Clear the source and v6lastdst so we call ip_attr_connect
3154 * for the next packet and try to pick a better source.
3155 */
3156 if (connp->conn_mcbc_bind)
3157 connp->conn_saddr_v6 = ipv6_all_zeros;
3158 else
3159 connp->conn_saddr_v6 = connp->conn_bound_addr_v6;
3160 connp->conn_v6lastdst = ipv6_all_zeros;
3161 mutex_exit(&connp->conn_lock);
3162 break;
3163 }
3164 ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED));
3165 ixa->ixa_cred = connp->conn_cred; /* Restore */
3166 ixa->ixa_cpid = connp->conn_cpid;
3167 ixa_refrele(ixa);
3168 return (error);
3169 }
3170
3171
3172 /*
3173 * Prepend the header template and then fill in the source and
3174 * flowinfo. The caller needs to handle the destination address since
3175 * it's setting is different if rthdr or source route.
3176 *
3177 * Returns NULL is allocation failed or if the packet would exceed IP_MAXPACKET.
3178 * When it returns NULL it sets errorp.
3179 */
3180 static mblk_t *
3181 udp_prepend_header_template(conn_t *connp, ip_xmit_attr_t *ixa, mblk_t *mp,
3182 const in6_addr_t *v6src, in_port_t dstport, uint32_t flowinfo, int *errorp)
3183 {
3184 udp_t *udp = connp->conn_udp;
3185 udp_stack_t *us = udp->udp_us;
3186 boolean_t insert_spi = udp->udp_nat_t_endpoint;
3187 uint_t pktlen;
3188 uint_t alloclen;
3189 uint_t copylen;
3190 uint8_t *iph;
3191 uint_t ip_hdr_length;
3192 udpha_t *udpha;
3193 uint32_t cksum;
3194 ip_pkt_t *ipp;
3195
3196 ASSERT(MUTEX_HELD(&connp->conn_lock));
3197
3198 /*
3199 * Copy the header template and leave space for an SPI
3200 */
3201 copylen = connp->conn_ht_iphc_len;
3202 alloclen = copylen + (insert_spi ? sizeof (uint32_t) : 0);
3203 pktlen = alloclen + msgdsize(mp);
3204 if (pktlen > IP_MAXPACKET) {
3205 freemsg(mp);
3206 *errorp = EMSGSIZE;
3207 return (NULL);
3208 }
3209 ixa->ixa_pktlen = pktlen;
3210
3211 /* check/fix buffer config, setup pointers into it */
3212 iph = mp->b_rptr - alloclen;
3213 if (DB_REF(mp) != 1 || iph < DB_BASE(mp) || !OK_32PTR(iph)) {
3214 mblk_t *mp1;
3215
3216 mp1 = allocb(alloclen + us->us_wroff_extra, BPRI_MED);
3217 if (mp1 == NULL) {
3218 freemsg(mp);
3219 *errorp = ENOMEM;
3220 return (NULL);
3221 }
3222 mp1->b_wptr = DB_LIM(mp1);
3223 mp1->b_cont = mp;
3224 mp = mp1;
3225 iph = (mp->b_wptr - alloclen);
3226 }
3227 mp->b_rptr = iph;
3228 bcopy(connp->conn_ht_iphc, iph, copylen);
3229 ip_hdr_length = (uint_t)(connp->conn_ht_ulp - connp->conn_ht_iphc);
3230
3231 ixa->ixa_ip_hdr_length = ip_hdr_length;
3232 udpha = (udpha_t *)(iph + ip_hdr_length);
3233
3234 /*
3235 * Setup header length and prepare for ULP checksum done in IP.
3236 * udp_build_hdr_template has already massaged any routing header
3237 * and placed the result in conn_sum.
3238 *
3239 * We make it easy for IP to include our pseudo header
3240 * by putting our length in uha_checksum.
3241 */
3242 cksum = pktlen - ip_hdr_length;
3243 udpha->uha_length = htons(cksum);
3244
3245 cksum += connp->conn_sum;
3246 cksum = (cksum >> 16) + (cksum & 0xFFFF);
3247 ASSERT(cksum < 0x10000);
3248
3249 ipp = &connp->conn_xmit_ipp;
3250 if (ixa->ixa_flags & IXAF_IS_IPV4) {
3251 ipha_t *ipha = (ipha_t *)iph;
3252
3253 ipha->ipha_length = htons((uint16_t)pktlen);
3254
3255 /* IP does the checksum if uha_checksum is non-zero */
3256 if (us->us_do_checksum)
3257 udpha->uha_checksum = htons(cksum);
3258
3259 /* if IP_PKTINFO specified an addres it wins over bind() */
3260 if ((ipp->ipp_fields & IPPF_ADDR) &&
3261 IN6_IS_ADDR_V4MAPPED(&ipp->ipp_addr)) {
3262 ASSERT(ipp->ipp_addr_v4 != INADDR_ANY);
3263 ipha->ipha_src = ipp->ipp_addr_v4;
3264 } else {
3265 IN6_V4MAPPED_TO_IPADDR(v6src, ipha->ipha_src);
3266 }
3267 } else {
3268 ip6_t *ip6h = (ip6_t *)iph;
3269
3270 ip6h->ip6_plen = htons((uint16_t)(pktlen - IPV6_HDR_LEN));
3271 udpha->uha_checksum = htons(cksum);
3272
3273 /* if IP_PKTINFO specified an addres it wins over bind() */
3274 if ((ipp->ipp_fields & IPPF_ADDR) &&
3275 !IN6_IS_ADDR_V4MAPPED(&ipp->ipp_addr)) {
3276 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&ipp->ipp_addr));
3277 ip6h->ip6_src = ipp->ipp_addr;
3278 } else {
3279 ip6h->ip6_src = *v6src;
3280 }
3281 ip6h->ip6_vcf =
3282 (IPV6_DEFAULT_VERS_AND_FLOW & IPV6_VERS_AND_FLOW_MASK) |
3283 (flowinfo & ~IPV6_VERS_AND_FLOW_MASK);
3284 if (ipp->ipp_fields & IPPF_TCLASS) {
3285 /* Overrides the class part of flowinfo */
3286 ip6h->ip6_vcf = IPV6_TCLASS_FLOW(ip6h->ip6_vcf,
3287 ipp->ipp_tclass);
3288 }
3289 }
3290
3291 /* Insert all-0s SPI now. */
3292 if (insert_spi)
3293 *((uint32_t *)(udpha + 1)) = 0;
3294
3295 udpha->uha_dst_port = dstport;
3296 return (mp);
3297 }
3298
3299 /*
3300 * Send a T_UDERR_IND in response to an M_DATA
3301 */
3302 static void
3303 udp_ud_err_connected(conn_t *connp, t_scalar_t error)
3304 {
3305 struct sockaddr_storage ss;
3306 sin_t *sin;
3307 sin6_t *sin6;
3308 struct sockaddr *addr;
3309 socklen_t addrlen;
3310 mblk_t *mp1;
3311
3312 mutex_enter(&connp->conn_lock);
3313 /* Initialize addr and addrlen as if they're passed in */
3314 if (connp->conn_family == AF_INET) {
3315 sin = (sin_t *)&ss;
3316 *sin = sin_null;
3317 sin->sin_family = AF_INET;
3318 sin->sin_port = connp->conn_fport;
3319 sin->sin_addr.s_addr = connp->conn_faddr_v4;
3320 addr = (struct sockaddr *)sin;
3321 addrlen = sizeof (*sin);
3322 } else {
3323 sin6 = (sin6_t *)&ss;
3324 *sin6 = sin6_null;
3325 sin6->sin6_family = AF_INET6;
3326 sin6->sin6_port = connp->conn_fport;
3327 sin6->sin6_flowinfo = connp->conn_flowinfo;
3328 sin6->sin6_addr = connp->conn_faddr_v6;
3329 if (IN6_IS_ADDR_LINKSCOPE(&connp->conn_faddr_v6) &&
3330 (connp->conn_ixa->ixa_flags & IXAF_SCOPEID_SET)) {
3331 sin6->sin6_scope_id = connp->conn_ixa->ixa_scopeid;
3332 } else {
3333 sin6->sin6_scope_id = 0;
3334 }
3335 sin6->__sin6_src_id = 0;
3336 addr = (struct sockaddr *)sin6;
3337 addrlen = sizeof (*sin6);
3338 }
3339 mutex_exit(&connp->conn_lock);
3340
3341 mp1 = mi_tpi_uderror_ind((char *)addr, addrlen, NULL, 0, error);
3342 if (mp1 != NULL)
3343 putnext(connp->conn_rq, mp1);
3344 }
3345
3346 /*
3347 * This routine handles all messages passed downstream. It either
3348 * consumes the message or passes it downstream; it never queues a
3349 * a message.
3350 *
3351 * Also entry point for sockfs when udp is in "direct sockfs" mode. This mode
3352 * is valid when we are directly beneath the stream head, and thus sockfs
3353 * is able to bypass STREAMS and directly call us, passing along the sockaddr
3354 * structure without the cumbersome T_UNITDATA_REQ interface for the case of
3355 * connected endpoints.
3356 */
3357 void
3358 udp_wput(queue_t *q, mblk_t *mp)
3359 {
3360 sin6_t *sin6;
3361 sin_t *sin = NULL;
3362 uint_t srcid;
3363 conn_t *connp = Q_TO_CONN(q);
3364 udp_t *udp = connp->conn_udp;
3365 int error = 0;
3366 struct sockaddr *addr = NULL;
3367 socklen_t addrlen;
3368 udp_stack_t *us = udp->udp_us;
3369 struct T_unitdata_req *tudr;
3370 mblk_t *data_mp;
3371 ushort_t ipversion;
3372 cred_t *cr;
3373 pid_t pid;
3374
3375 /*
3376 * We directly handle several cases here: T_UNITDATA_REQ message
3377 * coming down as M_PROTO/M_PCPROTO and M_DATA messages for connected
3378 * socket.
3379 */
3380 switch (DB_TYPE(mp)) {
3381 case M_DATA:
3382 if (!udp->udp_issocket || udp->udp_state != TS_DATA_XFER) {
3383 /* Not connected; address is required */
3384 UDPS_BUMP_MIB(us, udpOutErrors);
3385 UDP_DBGSTAT(us, udp_data_notconn);
3386 UDP_STAT(us, udp_out_err_notconn);
3387 freemsg(mp);
3388 return;
3389 }
3390 /*
3391 * All Solaris components should pass a db_credp
3392 * for this message, hence we ASSERT.
3393 * On production kernels we return an error to be robust against
3394 * random streams modules sitting on top of us.
3395 */
3396 cr = msg_getcred(mp, &pid);
3397 ASSERT(cr != NULL);
3398 if (cr == NULL) {
3399 UDPS_BUMP_MIB(us, udpOutErrors);
3400 freemsg(mp);
3401 return;
3402 }
3403 ASSERT(udp->udp_issocket);
3404 UDP_DBGSTAT(us, udp_data_conn);
3405 error = udp_output_connected(connp, mp, cr, pid);
3406 if (error != 0) {
3407 UDP_STAT(us, udp_out_err_output);
3408 if (connp->conn_rq != NULL)
3409 udp_ud_err_connected(connp, (t_scalar_t)error);
3410 #ifdef DEBUG
3411 printf("udp_output_connected returned %d\n", error);
3412 #endif
3413 }
3414 return;
3415
3416 case M_PROTO:
3417 case M_PCPROTO:
3418 tudr = (struct T_unitdata_req *)mp->b_rptr;
3419 if (MBLKL(mp) < sizeof (*tudr) ||
3420 ((t_primp_t)mp->b_rptr)->type != T_UNITDATA_REQ) {
3421 udp_wput_other(q, mp);
3422 return;
3423 }
3424 break;
3425
3426 default:
3427 udp_wput_other(q, mp);
3428 return;
3429 }
3430
3431 /* Handle valid T_UNITDATA_REQ here */
3432 data_mp = mp->b_cont;
3433 if (data_mp == NULL) {
3434 error = EPROTO;
3435 goto ud_error2;
3436 }
3437 mp->b_cont = NULL;
3438
3439 if (!MBLKIN(mp, 0, tudr->DEST_offset + tudr->DEST_length)) {
3440 error = EADDRNOTAVAIL;
3441 goto ud_error2;
3442 }
3443
3444 /*
3445 * All Solaris components should pass a db_credp
3446 * for this TPI message, hence we should ASSERT.
3447 * However, RPC (svc_clts_ksend) does this odd thing where it
3448 * passes the options from a T_UNITDATA_IND unchanged in a
3449 * T_UNITDATA_REQ. While that is the right thing to do for
3450 * some options, SCM_UCRED being the key one, this also makes it
3451 * pass down IP_RECVDSTADDR. Hence we can't ASSERT here.
3452 */
3453 cr = msg_getcred(mp, &pid);
3454 if (cr == NULL) {
3455 cr = connp->conn_cred;
3456 pid = connp->conn_cpid;
3457 }
3458
3459 /*
3460 * If a port has not been bound to the stream, fail.
3461 * This is not a problem when sockfs is directly
3462 * above us, because it will ensure that the socket
3463 * is first bound before allowing data to be sent.
3464 */
3465 if (udp->udp_state == TS_UNBND) {
3466 error = EPROTO;
3467 goto ud_error2;
3468 }
3469 addr = (struct sockaddr *)&mp->b_rptr[tudr->DEST_offset];
3470 addrlen = tudr->DEST_length;
3471
3472 switch (connp->conn_family) {
3473 case AF_INET6:
3474 sin6 = (sin6_t *)addr;
3475 if (!OK_32PTR((char *)sin6) || (addrlen != sizeof (sin6_t)) ||
3476 (sin6->sin6_family != AF_INET6)) {
3477 error = EADDRNOTAVAIL;
3478 goto ud_error2;
3479 }
3480
3481 srcid = sin6->__sin6_src_id;
3482 if (!IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
3483 /*
3484 * Destination is a non-IPv4-compatible IPv6 address.
3485 * Send out an IPv6 format packet.
3486 */
3487
3488 /*
3489 * If the local address is a mapped address return
3490 * an error.
3491 * It would be possible to send an IPv6 packet but the
3492 * response would never make it back to the application
3493 * since it is bound to a mapped address.
3494 */
3495 if (IN6_IS_ADDR_V4MAPPED(&connp->conn_saddr_v6)) {
3496 error = EADDRNOTAVAIL;
3497 goto ud_error2;
3498 }
3499
3500 UDP_DBGSTAT(us, udp_out_ipv6);
3501
3502 if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr))
3503 sin6->sin6_addr = ipv6_loopback;
3504 ipversion = IPV6_VERSION;
3505 } else {
3506 if (connp->conn_ipv6_v6only) {
3507 error = EADDRNOTAVAIL;
3508 goto ud_error2;
3509 }
3510
3511 /*
3512 * If the local address is not zero or a mapped address
3513 * return an error. It would be possible to send an
3514 * IPv4 packet but the response would never make it
3515 * back to the application since it is bound to a
3516 * non-mapped address.
3517 */
3518 if (!IN6_IS_ADDR_V4MAPPED(&connp->conn_saddr_v6) &&
3519 !IN6_IS_ADDR_UNSPECIFIED(&connp->conn_saddr_v6)) {
3520 error = EADDRNOTAVAIL;
3521 goto ud_error2;
3522 }
3523 UDP_DBGSTAT(us, udp_out_mapped);
3524
3525 if (V4_PART_OF_V6(sin6->sin6_addr) == INADDR_ANY) {
3526 V4_PART_OF_V6(sin6->sin6_addr) =
3527 htonl(INADDR_LOOPBACK);
3528 }
3529 ipversion = IPV4_VERSION;
3530 }
3531
3532 if (tudr->OPT_length != 0) {
3533 /*
3534 * If we are connected then the destination needs to be
3535 * the same as the connected one.
3536 */
3537 if (udp->udp_state == TS_DATA_XFER &&
3538 !conn_same_as_last_v6(connp, sin6)) {
3539 error = EISCONN;
3540 goto ud_error2;
3541 }
3542 UDP_STAT(us, udp_out_opt);
3543 error = udp_output_ancillary(connp, NULL, sin6,
3544 data_mp, mp, NULL, cr, pid);
3545 } else {
3546 ip_xmit_attr_t *ixa;
3547
3548 /*
3549 * We have to allocate an ip_xmit_attr_t before we grab
3550 * conn_lock and we need to hold conn_lock once we've
3551 * checked conn_same_as_last_v6 to handle concurrent
3552 * send* calls on a socket.
3553 */
3554 ixa = conn_get_ixa(connp, B_FALSE);
3555 if (ixa == NULL) {
3556 error = ENOMEM;
3557 goto ud_error2;
3558 }
3559 mutex_enter(&connp->conn_lock);
3560
3561 if (conn_same_as_last_v6(connp, sin6) &&
3562 connp->conn_lastsrcid == srcid &&
3563 ipsec_outbound_policy_current(ixa)) {
3564 UDP_DBGSTAT(us, udp_out_lastdst);
3565 /* udp_output_lastdst drops conn_lock */
3566 error = udp_output_lastdst(connp, data_mp, cr,
3567 pid, ixa);
3568 } else {
3569 UDP_DBGSTAT(us, udp_out_diffdst);
3570 /* udp_output_newdst drops conn_lock */
3571 error = udp_output_newdst(connp, data_mp, NULL,
3572 sin6, ipversion, cr, pid, ixa);
3573 }
3574 ASSERT(MUTEX_NOT_HELD(&connp->conn_lock));
3575 }
3576 if (error == 0) {
3577 freeb(mp);
3578 return;
3579 }
3580 break;
3581
3582 case AF_INET:
3583 sin = (sin_t *)addr;
3584 if ((!OK_32PTR((char *)sin) || addrlen != sizeof (sin_t)) ||
3585 (sin->sin_family != AF_INET)) {
3586 error = EADDRNOTAVAIL;
3587 goto ud_error2;
3588 }
3589 UDP_DBGSTAT(us, udp_out_ipv4);
3590 if (sin->sin_addr.s_addr == INADDR_ANY)
3591 sin->sin_addr.s_addr = htonl(INADDR_LOOPBACK);
3592 ipversion = IPV4_VERSION;
3593
3594 srcid = 0;
3595 if (tudr->OPT_length != 0) {
3596 /*
3597 * If we are connected then the destination needs to be
3598 * the same as the connected one.
3599 */
3600 if (udp->udp_state == TS_DATA_XFER &&
3601 !conn_same_as_last_v4(connp, sin)) {
3602 error = EISCONN;
3603 goto ud_error2;
3604 }
3605 UDP_STAT(us, udp_out_opt);
3606 error = udp_output_ancillary(connp, sin, NULL,
3607 data_mp, mp, NULL, cr, pid);
3608 } else {
3609 ip_xmit_attr_t *ixa;
3610
3611 /*
3612 * We have to allocate an ip_xmit_attr_t before we grab
3613 * conn_lock and we need to hold conn_lock once we've
3614 * checked conn_same_as_last_v4 to handle concurrent
3615 * send* calls on a socket.
3616 */
3617 ixa = conn_get_ixa(connp, B_FALSE);
3618 if (ixa == NULL) {
3619 error = ENOMEM;
3620 goto ud_error2;
3621 }
3622 mutex_enter(&connp->conn_lock);
3623
3624 if (conn_same_as_last_v4(connp, sin) &&
3625 ipsec_outbound_policy_current(ixa)) {
3626 UDP_DBGSTAT(us, udp_out_lastdst);
3627 /* udp_output_lastdst drops conn_lock */
3628 error = udp_output_lastdst(connp, data_mp, cr,
3629 pid, ixa);
3630 } else {
3631 UDP_DBGSTAT(us, udp_out_diffdst);
3632 /* udp_output_newdst drops conn_lock */
3633 error = udp_output_newdst(connp, data_mp, sin,
3634 NULL, ipversion, cr, pid, ixa);
3635 }
3636 ASSERT(MUTEX_NOT_HELD(&connp->conn_lock));
3637 }
3638 if (error == 0) {
3639 freeb(mp);
3640 return;
3641 }
3642 break;
3643 }
3644 UDP_STAT(us, udp_out_err_output);
3645 ASSERT(mp != NULL);
3646 /* mp is freed by the following routine */
3647 udp_ud_err(q, mp, (t_scalar_t)error);
3648 return;
3649
3650 ud_error2:
3651 UDPS_BUMP_MIB(us, udpOutErrors);
3652 freemsg(data_mp);
3653 UDP_STAT(us, udp_out_err_output);
3654 ASSERT(mp != NULL);
3655 /* mp is freed by the following routine */
3656 udp_ud_err(q, mp, (t_scalar_t)error);
3657 }
3658
3659 /*
3660 * Handle the case of the IP address, port, flow label being different
3661 * for both IPv4 and IPv6.
3662 *
3663 * NOTE: The caller must hold conn_lock and we drop it here.
3664 */
3665 static int
3666 udp_output_newdst(conn_t *connp, mblk_t *data_mp, sin_t *sin, sin6_t *sin6,
3667 ushort_t ipversion, cred_t *cr, pid_t pid, ip_xmit_attr_t *ixa)
3668 {
3669 uint_t srcid;
3670 uint32_t flowinfo;
3671 udp_t *udp = connp->conn_udp;
3672 int error = 0;
3673 ip_xmit_attr_t *oldixa;
3674 udp_stack_t *us = udp->udp_us;
3675 in6_addr_t v6src;
3676 in6_addr_t v6dst;
3677 in6_addr_t v6nexthop;
3678 in_port_t dstport;
3679
3680 ASSERT(MUTEX_HELD(&connp->conn_lock));
3681 ASSERT(ixa != NULL);
3682 /*
3683 * We hold conn_lock across all the use and modifications of
3684 * the conn_lastdst, conn_ixa, and conn_xmit_ipp to ensure that they
3685 * stay consistent.
3686 */
3687
3688 ASSERT(cr != NULL);
3689 ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED));
3690 ixa->ixa_cred = cr;
3691 ixa->ixa_cpid = pid;
3692 if (is_system_labeled()) {
3693 /* We need to restart with a label based on the cred */
3694 ip_xmit_attr_restore_tsl(ixa, ixa->ixa_cred);
3695 }
3696
3697 /*
3698 * If we are connected then the destination needs to be the
3699 * same as the connected one, which is not the case here since we
3700 * checked for that above.
3701 */
3702 if (udp->udp_state == TS_DATA_XFER) {
3703 mutex_exit(&connp->conn_lock);
3704 error = EISCONN;
3705 goto ud_error;
3706 }
3707
3708 /* In case previous destination was multicast or multirt */
3709 ip_attr_newdst(ixa);
3710
3711 /*
3712 * If laddr is unspecified then we look at sin6_src_id.
3713 * We will give precedence to a source address set with IPV6_PKTINFO
3714 * (aka IPPF_ADDR) but that is handled in build_hdrs. However, we don't
3715 * want ip_attr_connect to select a source (since it can fail) when
3716 * IPV6_PKTINFO is specified.
3717 * If this doesn't result in a source address then we get a source
3718 * from ip_attr_connect() below.
3719 */
3720 v6src = connp->conn_saddr_v6;
3721 if (sin != NULL) {
3722 IN6_IPADDR_TO_V4MAPPED(sin->sin_addr.s_addr, &v6dst);
3723 dstport = sin->sin_port;
3724 flowinfo = 0;
3725 srcid = 0;
3726 ixa->ixa_flags &= ~IXAF_SCOPEID_SET;
3727 if (srcid != 0 && V4_PART_OF_V6(&v6src) == INADDR_ANY) {
3728 ip_srcid_find_id(srcid, &v6src, IPCL_ZONEID(connp),
3729 connp->conn_netstack);
3730 }
3731 ixa->ixa_flags |= IXAF_IS_IPV4;
3732 } else {
3733 v6dst = sin6->sin6_addr;
3734 dstport = sin6->sin6_port;
3735 flowinfo = sin6->sin6_flowinfo;
3736 srcid = sin6->__sin6_src_id;
3737 if (IN6_IS_ADDR_LINKSCOPE(&v6dst) && sin6->sin6_scope_id != 0) {
3738 ixa->ixa_scopeid = sin6->sin6_scope_id;
3739 ixa->ixa_flags |= IXAF_SCOPEID_SET;
3740 } else {
3741 ixa->ixa_flags &= ~IXAF_SCOPEID_SET;
3742 }
3743 if (srcid != 0 && IN6_IS_ADDR_UNSPECIFIED(&v6src)) {
3744 ip_srcid_find_id(srcid, &v6src, IPCL_ZONEID(connp),
3745 connp->conn_netstack);
3746 }
3747 if (IN6_IS_ADDR_V4MAPPED(&v6dst))
3748 ixa->ixa_flags |= IXAF_IS_IPV4;
3749 else
3750 ixa->ixa_flags &= ~IXAF_IS_IPV4;
3751 }
3752 /* Handle IP_PKTINFO/IPV6_PKTINFO setting source address. */
3753 if (connp->conn_xmit_ipp.ipp_fields & IPPF_ADDR) {
3754 ip_pkt_t *ipp = &connp->conn_xmit_ipp;
3755
3756 if (ixa->ixa_flags & IXAF_IS_IPV4) {
3757 if (IN6_IS_ADDR_V4MAPPED(&ipp->ipp_addr))
3758 v6src = ipp->ipp_addr;
3759 } else {
3760 if (!IN6_IS_ADDR_V4MAPPED(&ipp->ipp_addr))
3761 v6src = ipp->ipp_addr;
3762 }
3763 }
3764
3765 ip_attr_nexthop(&connp->conn_xmit_ipp, ixa, &v6dst, &v6nexthop);
3766 mutex_exit(&connp->conn_lock);
3767
3768 error = ip_attr_connect(connp, ixa, &v6src, &v6dst, &v6nexthop, dstport,
3769 &v6src, NULL, IPDF_ALLOW_MCBC | IPDF_VERIFY_DST | IPDF_IPSEC);
3770 switch (error) {
3771 case 0:
3772 break;
3773 case EADDRNOTAVAIL:
3774 /*
3775 * IXAF_VERIFY_SOURCE tells us to pick a better source.
3776 * Don't have the application see that errno
3777 */
3778 error = ENETUNREACH;
3779 goto failed;
3780 case ENETDOWN:
3781 /*
3782 * Have !ipif_addr_ready address; drop packet silently
3783 * until we can get applications to not send until we
3784 * are ready.
3785 */
3786 error = 0;
3787 goto failed;
3788 case EHOSTUNREACH:
3789 case ENETUNREACH:
3790 if (ixa->ixa_ire != NULL) {
3791 /*
3792 * Let conn_ip_output/ire_send_noroute return
3793 * the error and send any local ICMP error.
3794 */
3795 error = 0;
3796 break;
3797 }
3798 /* FALLTHRU */
3799 failed:
3800 default:
3801 goto ud_error;
3802 }
3803
3804
3805 /*
3806 * Cluster note: we let the cluster hook know that we are sending to a
3807 * new address and/or port.
3808 */
3809 if (cl_inet_connect2 != NULL) {
3810 CL_INET_UDP_CONNECT(connp, B_TRUE, &v6dst, dstport, error);
3811 if (error != 0) {
3812 error = EHOSTUNREACH;
3813 goto ud_error;
3814 }
3815 }
3816
3817 mutex_enter(&connp->conn_lock);
3818 /*
3819 * While we dropped the lock some other thread might have connected
3820 * this socket. If so we bail out with EISCONN to ensure that the
3821 * connecting thread is the one that updates conn_ixa, conn_ht_*
3822 * and conn_*last*.
3823 */
3824 if (udp->udp_state == TS_DATA_XFER) {
3825 mutex_exit(&connp->conn_lock);
3826 error = EISCONN;
3827 goto ud_error;
3828 }
3829
3830 /*
3831 * We need to rebuild the headers if
3832 * - we are labeling packets (could be different for different
3833 * destinations)
3834 * - we have a source route (or routing header) since we need to
3835 * massage that to get the pseudo-header checksum
3836 * - the IP version is different than the last time
3837 * - a socket option with COA_HEADER_CHANGED has been set which
3838 * set conn_v6lastdst to zero.
3839 *
3840 * Otherwise the prepend function will just update the src, dst,
3841 * dstport, and flow label.
3842 */
3843 if (is_system_labeled()) {
3844 /* TX MLP requires SCM_UCRED and don't have that here */
3845 if (connp->conn_mlp_type != mlptSingle) {
3846 mutex_exit(&connp->conn_lock);
3847 error = ECONNREFUSED;
3848 goto ud_error;
3849 }
3850 /*
3851 * Check whether Trusted Solaris policy allows communication
3852 * with this host, and pretend that the destination is
3853 * unreachable if not.
3854 * Compute any needed label and place it in ipp_label_v4/v6.
3855 *
3856 * Later conn_build_hdr_template/conn_prepend_hdr takes
3857 * ipp_label_v4/v6 to form the packet.
3858 *
3859 * Tsol note: Since we hold conn_lock we know no other
3860 * thread manipulates conn_xmit_ipp.
3861 */
3862 error = conn_update_label(connp, ixa, &v6dst,
3863 &connp->conn_xmit_ipp);
3864 if (error != 0) {
3865 mutex_exit(&connp->conn_lock);
3866 goto ud_error;
3867 }
3868 /* Rebuild the header template */
3869 error = udp_build_hdr_template(connp, &v6src, &v6dst, dstport,
3870 flowinfo);
3871 if (error != 0) {
3872 mutex_exit(&connp->conn_lock);
3873 goto ud_error;
3874 }
3875 } else if ((connp->conn_xmit_ipp.ipp_fields &
3876 (IPPF_IPV4_OPTIONS|IPPF_RTHDR)) ||
3877 ipversion != connp->conn_lastipversion ||
3878 IN6_IS_ADDR_UNSPECIFIED(&connp->conn_v6lastdst)) {
3879 /* Rebuild the header template */
3880 error = udp_build_hdr_template(connp, &v6src, &v6dst, dstport,
3881 flowinfo);
3882 if (error != 0) {
3883 mutex_exit(&connp->conn_lock);
3884 goto ud_error;
3885 }
3886 } else {
3887 /* Simply update the destination address if no source route */
3888 if (ixa->ixa_flags & IXAF_IS_IPV4) {
3889 ipha_t *ipha = (ipha_t *)connp->conn_ht_iphc;
3890
3891 IN6_V4MAPPED_TO_IPADDR(&v6dst, ipha->ipha_dst);
3892 if (ixa->ixa_flags & IXAF_PMTU_IPV4_DF) {
3893 ipha->ipha_fragment_offset_and_flags |=
3894 IPH_DF_HTONS;
3895 } else {
3896 ipha->ipha_fragment_offset_and_flags &=
3897 ~IPH_DF_HTONS;
3898 }
3899 } else {
3900 ip6_t *ip6h = (ip6_t *)connp->conn_ht_iphc;
3901 ip6h->ip6_dst = v6dst;
3902 }
3903 }
3904
3905 /*
3906 * Remember the dst/dstport etc which corresponds to the built header
3907 * template and conn_ixa.
3908 */
3909 oldixa = conn_replace_ixa(connp, ixa);
3910 connp->conn_v6lastdst = v6dst;
3911 connp->conn_lastipversion = ipversion;
3912 connp->conn_lastdstport = dstport;
3913 connp->conn_lastflowinfo = flowinfo;
3914 connp->conn_lastscopeid = ixa->ixa_scopeid;
3915 connp->conn_lastsrcid = srcid;
3916 /* Also remember a source to use together with lastdst */
3917 connp->conn_v6lastsrc = v6src;
3918
3919 data_mp = udp_prepend_header_template(connp, ixa, data_mp, &v6src,
3920 dstport, flowinfo, &error);
3921
3922 /* Done with conn_t */
3923 mutex_exit(&connp->conn_lock);
3924 ixa_refrele(oldixa);
3925
3926 if (data_mp == NULL) {
3927 ASSERT(error != 0);
3928 goto ud_error;
3929 }
3930
3931 /* We're done. Pass the packet to ip. */
3932 UDPS_BUMP_MIB(us, udpHCOutDatagrams);
3933
3934 DTRACE_UDP5(send, mblk_t *, NULL, ip_xmit_attr_t *, ixa,
3935 void_ip_t *, data_mp->b_rptr, udp_t *, udp, udpha_t *,
3936 &data_mp->b_rptr[ixa->ixa_ip_hdr_length]);
3937
3938 error = conn_ip_output(data_mp, ixa);
3939 /* No udpOutErrors if an error since IP increases its error counter */
3940 switch (error) {
3941 case 0:
3942 break;
3943 case EWOULDBLOCK:
3944 (void) ixa_check_drain_insert(connp, ixa);
3945 error = 0;
3946 break;
3947 case EADDRNOTAVAIL:
3948 /*
3949 * IXAF_VERIFY_SOURCE tells us to pick a better source.
3950 * Don't have the application see that errno
3951 */
3952 error = ENETUNREACH;
3953 /* FALLTHRU */
3954 default:
3955 mutex_enter(&connp->conn_lock);
3956 /*
3957 * Clear the source and v6lastdst so we call ip_attr_connect
3958 * for the next packet and try to pick a better source.
3959 */
3960 if (connp->conn_mcbc_bind)
3961 connp->conn_saddr_v6 = ipv6_all_zeros;
3962 else
3963 connp->conn_saddr_v6 = connp->conn_bound_addr_v6;
3964 connp->conn_v6lastdst = ipv6_all_zeros;
3965 mutex_exit(&connp->conn_lock);
3966 break;
3967 }
3968 ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED));
3969 ixa->ixa_cred = connp->conn_cred; /* Restore */
3970 ixa->ixa_cpid = connp->conn_cpid;
3971 ixa_refrele(ixa);
3972 return (error);
3973
3974 ud_error:
3975 ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED));
3976 ixa->ixa_cred = connp->conn_cred; /* Restore */
3977 ixa->ixa_cpid = connp->conn_cpid;
3978 ixa_refrele(ixa);
3979
3980 freemsg(data_mp);
3981 UDPS_BUMP_MIB(us, udpOutErrors);
3982 UDP_STAT(us, udp_out_err_output);
3983 return (error);
3984 }
3985
3986 /* ARGSUSED */
3987 static void
3988 udp_wput_fallback(queue_t *wq, mblk_t *mp)
3989 {
3990 #ifdef DEBUG
3991 cmn_err(CE_CONT, "udp_wput_fallback: Message in fallback \n");
3992 #endif
3993 freemsg(mp);
3994 }
3995
3996
3997 /*
3998 * Handle special out-of-band ioctl requests (see PSARC/2008/265).
3999 */
4000 static void
4001 udp_wput_cmdblk(queue_t *q, mblk_t *mp)
4002 {
4003 void *data;
4004 mblk_t *datamp = mp->b_cont;
4005 conn_t *connp = Q_TO_CONN(q);
4006 udp_t *udp = connp->conn_udp;
4007 cmdblk_t *cmdp = (cmdblk_t *)mp->b_rptr;
4008
4009 if (datamp == NULL || MBLKL(datamp) < cmdp->cb_len) {
4010 cmdp->cb_error = EPROTO;
4011 qreply(q, mp);
4012 return;
4013 }
4014 data = datamp->b_rptr;
4015
4016 mutex_enter(&connp->conn_lock);
4017 switch (cmdp->cb_cmd) {
4018 case TI_GETPEERNAME:
4019 if (udp->udp_state != TS_DATA_XFER)
4020 cmdp->cb_error = ENOTCONN;
4021 else
4022 cmdp->cb_error = conn_getpeername(connp, data,
4023 &cmdp->cb_len);
4024 break;
4025 case TI_GETMYNAME:
4026 cmdp->cb_error = conn_getsockname(connp, data, &cmdp->cb_len);
4027 break;
4028 default:
4029 cmdp->cb_error = EINVAL;
4030 break;
4031 }
4032 mutex_exit(&connp->conn_lock);
4033
4034 qreply(q, mp);
4035 }
4036
4037 static void
4038 udp_use_pure_tpi(udp_t *udp)
4039 {
4040 conn_t *connp = udp->udp_connp;
4041
4042 mutex_enter(&connp->conn_lock);
4043 udp->udp_issocket = B_FALSE;
4044 mutex_exit(&connp->conn_lock);
4045 UDP_STAT(udp->udp_us, udp_sock_fallback);
4046 }
4047
4048 static void
4049 udp_wput_other(queue_t *q, mblk_t *mp)
4050 {
4051 uchar_t *rptr = mp->b_rptr;
4052 struct iocblk *iocp;
4053 conn_t *connp = Q_TO_CONN(q);
4054 udp_t *udp = connp->conn_udp;
4055 cred_t *cr;
4056
4057 switch (mp->b_datap->db_type) {
4058 case M_CMD:
4059 udp_wput_cmdblk(q, mp);
4060 return;
4061
4062 case M_PROTO:
4063 case M_PCPROTO:
4064 if (mp->b_wptr - rptr < sizeof (t_scalar_t)) {
4065 /*
4066 * If the message does not contain a PRIM_type,
4067 * throw it away.
4068 */
4069 freemsg(mp);
4070 return;
4071 }
4072 switch (((t_primp_t)rptr)->type) {
4073 case T_ADDR_REQ:
4074 udp_addr_req(q, mp);
4075 return;
4076 case O_T_BIND_REQ:
4077 case T_BIND_REQ:
4078 udp_tpi_bind(q, mp);
4079 return;
4080 case T_CONN_REQ:
4081 udp_tpi_connect(q, mp);
4082 return;
4083 case T_CAPABILITY_REQ:
4084 udp_capability_req(q, mp);
4085 return;
4086 case T_INFO_REQ:
4087 udp_info_req(q, mp);
4088 return;
4089 case T_UNITDATA_REQ:
4090 /*
4091 * If a T_UNITDATA_REQ gets here, the address must
4092 * be bad. Valid T_UNITDATA_REQs are handled
4093 * in udp_wput.
4094 */
4095 udp_ud_err(q, mp, EADDRNOTAVAIL);
4096 return;
4097 case T_UNBIND_REQ:
4098 udp_tpi_unbind(q, mp);
4099 return;
4100 case T_SVR4_OPTMGMT_REQ:
4101 /*
4102 * All Solaris components should pass a db_credp
4103 * for this TPI message, hence we ASSERT.
4104 * But in case there is some other M_PROTO that looks
4105 * like a TPI message sent by some other kernel
4106 * component, we check and return an error.
4107 */
4108 cr = msg_getcred(mp, NULL);
4109 ASSERT(cr != NULL);
4110 if (cr == NULL) {
4111 udp_err_ack(q, mp, TSYSERR, EINVAL);
4112 return;
4113 }
4114 if (!snmpcom_req(q, mp, udp_snmp_set, ip_snmp_get,
4115 cr)) {
4116 svr4_optcom_req(q, mp, cr, &udp_opt_obj);
4117 }
4118 return;
4119
4120 case T_OPTMGMT_REQ:
4121 /*
4122 * All Solaris components should pass a db_credp
4123 * for this TPI message, hence we ASSERT.
4124 * But in case there is some other M_PROTO that looks
4125 * like a TPI message sent by some other kernel
4126 * component, we check and return an error.
4127 */
4128 cr = msg_getcred(mp, NULL);
4129 ASSERT(cr != NULL);
4130 if (cr == NULL) {
4131 udp_err_ack(q, mp, TSYSERR, EINVAL);
4132 return;
4133 }
4134 tpi_optcom_req(q, mp, cr, &udp_opt_obj);
4135 return;
4136
4137 case T_DISCON_REQ:
4138 udp_tpi_disconnect(q, mp);
4139 return;
4140
4141 /* The following TPI message is not supported by udp. */
4142 case O_T_CONN_RES:
4143 case T_CONN_RES:
4144 udp_err_ack(q, mp, TNOTSUPPORT, 0);
4145 return;
4146
4147 /* The following 3 TPI requests are illegal for udp. */
4148 case T_DATA_REQ:
4149 case T_EXDATA_REQ:
4150 case T_ORDREL_REQ:
4151 udp_err_ack(q, mp, TNOTSUPPORT, 0);
4152 return;
4153 default:
4154 break;
4155 }
4156 break;
4157 case M_FLUSH:
4158 if (*rptr & FLUSHW)
4159 flushq(q, FLUSHDATA);
4160 break;
4161 case M_IOCTL:
4162 iocp = (struct iocblk *)mp->b_rptr;
4163 switch (iocp->ioc_cmd) {
4164 case TI_GETPEERNAME:
4165 if (udp->udp_state != TS_DATA_XFER) {
4166 /*
4167 * If a default destination address has not
4168 * been associated with the stream, then we
4169 * don't know the peer's name.
4170 */
4171 iocp->ioc_error = ENOTCONN;
4172 iocp->ioc_count = 0;
4173 mp->b_datap->db_type = M_IOCACK;
4174 qreply(q, mp);
4175 return;
4176 }
4177 /* FALLTHRU */
4178 case TI_GETMYNAME:
4179 /*
4180 * For TI_GETPEERNAME and TI_GETMYNAME, we first
4181 * need to copyin the user's strbuf structure.
4182 * Processing will continue in the M_IOCDATA case
4183 * below.
4184 */
4185 mi_copyin(q, mp, NULL,
4186 SIZEOF_STRUCT(strbuf, iocp->ioc_flag));
4187 return;
4188 case _SIOCSOCKFALLBACK:
4189 /*
4190 * Either sockmod is about to be popped and the
4191 * socket would now be treated as a plain stream,
4192 * or a module is about to be pushed so we have
4193 * to follow pure TPI semantics.
4194 */
4195 if (!udp->udp_issocket) {
4196 DB_TYPE(mp) = M_IOCNAK;
4197 iocp->ioc_error = EINVAL;
4198 } else {
4199 udp_use_pure_tpi(udp);
4200
4201 DB_TYPE(mp) = M_IOCACK;
4202 iocp->ioc_error = 0;
4203 }
4204 iocp->ioc_count = 0;
4205 iocp->ioc_rval = 0;
4206 qreply(q, mp);
4207 return;
4208 default:
4209 break;
4210 }
4211 break;
4212 case M_IOCDATA:
4213 udp_wput_iocdata(q, mp);
4214 return;
4215 default:
4216 /* Unrecognized messages are passed through without change. */
4217 break;
4218 }
4219 ip_wput_nondata(q, mp);
4220 }
4221
4222 /*
4223 * udp_wput_iocdata is called by udp_wput_other to handle all M_IOCDATA
4224 * messages.
4225 */
4226 static void
4227 udp_wput_iocdata(queue_t *q, mblk_t *mp)
4228 {
4229 mblk_t *mp1;
4230 struct iocblk *iocp = (struct iocblk *)mp->b_rptr;
4231 STRUCT_HANDLE(strbuf, sb);
4232 uint_t addrlen;
4233 conn_t *connp = Q_TO_CONN(q);
4234 udp_t *udp = connp->conn_udp;
4235
4236 /* Make sure it is one of ours. */
4237 switch (iocp->ioc_cmd) {
4238 case TI_GETMYNAME:
4239 case TI_GETPEERNAME:
4240 break;
4241 default:
4242 ip_wput_nondata(q, mp);
4243 return;
4244 }
4245
4246 switch (mi_copy_state(q, mp, &mp1)) {
4247 case -1:
4248 return;
4249 case MI_COPY_CASE(MI_COPY_IN, 1):
4250 break;
4251 case MI_COPY_CASE(MI_COPY_OUT, 1):
4252 /*
4253 * The address has been copied out, so now
4254 * copyout the strbuf.
4255 */
4256 mi_copyout(q, mp);
4257 return;
4258 case MI_COPY_CASE(MI_COPY_OUT, 2):
4259 /*
4260 * The address and strbuf have been copied out.
4261 * We're done, so just acknowledge the original
4262 * M_IOCTL.
4263 */
4264 mi_copy_done(q, mp, 0);
4265 return;
4266 default:
4267 /*
4268 * Something strange has happened, so acknowledge
4269 * the original M_IOCTL with an EPROTO error.
4270 */
4271 mi_copy_done(q, mp, EPROTO);
4272 return;
4273 }
4274
4275 /*
4276 * Now we have the strbuf structure for TI_GETMYNAME
4277 * and TI_GETPEERNAME. Next we copyout the requested
4278 * address and then we'll copyout the strbuf.
4279 */
4280 STRUCT_SET_HANDLE(sb, iocp->ioc_flag, (void *)mp1->b_rptr);
4281
4282 if (connp->conn_family == AF_INET)
4283 addrlen = sizeof (sin_t);
4284 else
4285 addrlen = sizeof (sin6_t);
4286
4287 if (STRUCT_FGET(sb, maxlen) < addrlen) {
4288 mi_copy_done(q, mp, EINVAL);
4289 return;
4290 }
4291
4292 switch (iocp->ioc_cmd) {
4293 case TI_GETMYNAME:
4294 break;
4295 case TI_GETPEERNAME:
4296 if (udp->udp_state != TS_DATA_XFER) {
4297 mi_copy_done(q, mp, ENOTCONN);
4298 return;
4299 }
4300 break;
4301 }
4302 mp1 = mi_copyout_alloc(q, mp, STRUCT_FGETP(sb, buf), addrlen, B_TRUE);
4303 if (!mp1)
4304 return;
4305
4306 STRUCT_FSET(sb, len, addrlen);
4307 switch (((struct iocblk *)mp->b_rptr)->ioc_cmd) {
4308 case TI_GETMYNAME:
4309 (void) conn_getsockname(connp, (struct sockaddr *)mp1->b_wptr,
4310 &addrlen);
4311 break;
4312 case TI_GETPEERNAME:
4313 (void) conn_getpeername(connp, (struct sockaddr *)mp1->b_wptr,
4314 &addrlen);
4315 break;
4316 }
4317 mp1->b_wptr += addrlen;
4318 /* Copy out the address */
4319 mi_copyout(q, mp);
4320 }
4321
4322 void
4323 udp_ddi_g_init(void)
4324 {
4325 udp_max_optsize = optcom_max_optsize(udp_opt_obj.odb_opt_des_arr,
4326 udp_opt_obj.odb_opt_arr_cnt);
4327
4328 /*
4329 * We want to be informed each time a stack is created or
4330 * destroyed in the kernel, so we can maintain the
4331 * set of udp_stack_t's.
4332 */
4333 netstack_register(NS_UDP, udp_stack_init, NULL, udp_stack_fini);
4334 }
4335
4336 void
4337 udp_ddi_g_destroy(void)
4338 {
4339 netstack_unregister(NS_UDP);
4340 }
4341
4342 #define INET_NAME "ip"
4343
4344 /*
4345 * Initialize the UDP stack instance.
4346 */
4347 static void *
4348 udp_stack_init(netstackid_t stackid, netstack_t *ns)
4349 {
4350 udp_stack_t *us;
4351 int i;
4352 int error = 0;
4353 major_t major;
4354 size_t arrsz;
4355
4356 us = (udp_stack_t *)kmem_zalloc(sizeof (*us), KM_SLEEP);
4357 us->us_netstack = ns;
4358
4359 mutex_init(&us->us_epriv_port_lock, NULL, MUTEX_DEFAULT, NULL);
4360 us->us_num_epriv_ports = UDP_NUM_EPRIV_PORTS;
4361 us->us_epriv_ports[0] = ULP_DEF_EPRIV_PORT1;
4362 us->us_epriv_ports[1] = ULP_DEF_EPRIV_PORT2;
4363
4364 /*
4365 * The smallest anonymous port in the priviledged port range which UDP
4366 * looks for free port. Use in the option UDP_ANONPRIVBIND.
4367 */
4368 us->us_min_anonpriv_port = 512;
4369
4370 us->us_bind_fanout_size = udp_bind_fanout_size;
4371
4372 /* Roundup variable that might have been modified in /etc/system */
4373 if (us->us_bind_fanout_size & (us->us_bind_fanout_size - 1)) {
4374 /* Not a power of two. Round up to nearest power of two */
4375 for (i = 0; i < 31; i++) {
4376 if (us->us_bind_fanout_size < (1 << i))
4377 break;
4378 }
4379 us->us_bind_fanout_size = 1 << i;
4380 }
4381 us->us_bind_fanout = kmem_zalloc(us->us_bind_fanout_size *
4382 sizeof (udp_fanout_t), KM_SLEEP);
4383 for (i = 0; i < us->us_bind_fanout_size; i++) {
4384 mutex_init(&us->us_bind_fanout[i].uf_lock, NULL, MUTEX_DEFAULT,
4385 NULL);
4386 }
4387
4388 arrsz = udp_propinfo_count * sizeof (mod_prop_info_t);
4389 us->us_propinfo_tbl = (mod_prop_info_t *)kmem_alloc(arrsz,
4390 KM_SLEEP);
4391 bcopy(udp_propinfo_tbl, us->us_propinfo_tbl, arrsz);
4392
4393 /* Allocate the per netstack stats */
4394 mutex_enter(&cpu_lock);
4395 us->us_sc_cnt = MAX(ncpus, boot_ncpus);
4396 mutex_exit(&cpu_lock);
4397 us->us_sc = kmem_zalloc(max_ncpus * sizeof (udp_stats_cpu_t *),
4398 KM_SLEEP);
4399 for (i = 0; i < us->us_sc_cnt; i++) {
4400 us->us_sc[i] = kmem_zalloc(sizeof (udp_stats_cpu_t),
4401 KM_SLEEP);
4402 }
4403
4404 us->us_kstat = udp_kstat2_init(stackid);
4405 us->us_mibkp = udp_kstat_init(stackid);
4406
4407 major = mod_name_to_major(INET_NAME);
4408 error = ldi_ident_from_major(major, &us->us_ldi_ident);
4409 ASSERT(error == 0);
4410 return (us);
4411 }
4412
4413 /*
4414 * Free the UDP stack instance.
4415 */
4416 static void
4417 udp_stack_fini(netstackid_t stackid, void *arg)
4418 {
4419 udp_stack_t *us = (udp_stack_t *)arg;
4420 int i;
4421
4422 for (i = 0; i < us->us_bind_fanout_size; i++) {
4423 mutex_destroy(&us->us_bind_fanout[i].uf_lock);
4424 }
4425
4426 kmem_free(us->us_bind_fanout, us->us_bind_fanout_size *
4427 sizeof (udp_fanout_t));
4428
4429 us->us_bind_fanout = NULL;
4430
4431 for (i = 0; i < us->us_sc_cnt; i++)
4432 kmem_free(us->us_sc[i], sizeof (udp_stats_cpu_t));
4433 kmem_free(us->us_sc, max_ncpus * sizeof (udp_stats_cpu_t *));
4434
4435 kmem_free(us->us_propinfo_tbl,
4436 udp_propinfo_count * sizeof (mod_prop_info_t));
4437 us->us_propinfo_tbl = NULL;
4438
4439 udp_kstat_fini(stackid, us->us_mibkp);
4440 us->us_mibkp = NULL;
4441
4442 udp_kstat2_fini(stackid, us->us_kstat);
4443 us->us_kstat = NULL;
4444
4445 mutex_destroy(&us->us_epriv_port_lock);
4446 ldi_ident_release(us->us_ldi_ident);
4447 kmem_free(us, sizeof (*us));
4448 }
4449
4450 static size_t
4451 udp_set_rcv_hiwat(udp_t *udp, size_t size)
4452 {
4453 udp_stack_t *us = udp->udp_us;
4454
4455 /* We add a bit of extra buffering */
4456 size += size >> 1;
4457 if (size > us->us_max_buf)
4458 size = us->us_max_buf;
4459
4460 udp->udp_rcv_hiwat = size;
4461 return (size);
4462 }
4463
4464 /*
4465 * For the lower queue so that UDP can be a dummy mux.
4466 * Nobody should be sending
4467 * packets up this stream
4468 */
4469 static void
4470 udp_lrput(queue_t *q, mblk_t *mp)
4471 {
4472 switch (mp->b_datap->db_type) {
4473 case M_FLUSH:
4474 /* Turn around */
4475 if (*mp->b_rptr & FLUSHW) {
4476 *mp->b_rptr &= ~FLUSHR;
4477 qreply(q, mp);
4478 return;
4479 }
4480 break;
4481 }
4482 freemsg(mp);
4483 }
4484
4485 /*
4486 * For the lower queue so that UDP can be a dummy mux.
4487 * Nobody should be sending packets down this stream.
4488 */
4489 /* ARGSUSED */
4490 void
4491 udp_lwput(queue_t *q, mblk_t *mp)
4492 {
4493 freemsg(mp);
4494 }
4495
4496 /*
4497 * When a CPU is added, we need to allocate the per CPU stats struct.
4498 */
4499 void
4500 udp_stack_cpu_add(udp_stack_t *us, processorid_t cpu_seqid)
4501 {
4502 int i;
4503
4504 if (cpu_seqid < us->us_sc_cnt)
4505 return;
4506 for (i = us->us_sc_cnt; i <= cpu_seqid; i++) {
4507 ASSERT(us->us_sc[i] == NULL);
4508 us->us_sc[i] = kmem_zalloc(sizeof (udp_stats_cpu_t),
4509 KM_SLEEP);
4510 }
4511 membar_producer();
4512 us->us_sc_cnt = cpu_seqid + 1;
4513 }
4514
4515 /*
4516 * Below routines for UDP socket module.
4517 */
4518
4519 static conn_t *
4520 udp_do_open(cred_t *credp, boolean_t isv6, int flags, int *errorp)
4521 {
4522 udp_t *udp;
4523 conn_t *connp;
4524 zoneid_t zoneid;
4525 netstack_t *ns;
4526 udp_stack_t *us;
4527 int len;
4528
4529 ASSERT(errorp != NULL);
4530
4531 if ((*errorp = secpolicy_basic_net_access(credp)) != 0)
4532 return (NULL);
4533
4534 ns = netstack_find_by_cred(credp);
4535 ASSERT(ns != NULL);
4536 us = ns->netstack_udp;
4537 ASSERT(us != NULL);
4538
4539 /*
4540 * For exclusive stacks we set the zoneid to zero
4541 * to make UDP operate as if in the global zone.
4542 */
4543 if (ns->netstack_stackid != GLOBAL_NETSTACKID)
4544 zoneid = GLOBAL_ZONEID;
4545 else
4546 zoneid = crgetzoneid(credp);
4547
4548 ASSERT(flags == KM_SLEEP || flags == KM_NOSLEEP);
4549
4550 connp = ipcl_conn_create(IPCL_UDPCONN, flags, ns);
4551 if (connp == NULL) {
4552 netstack_rele(ns);
4553 *errorp = ENOMEM;
4554 return (NULL);
4555 }
4556 udp = connp->conn_udp;
4557
4558 /*
4559 * ipcl_conn_create did a netstack_hold. Undo the hold that was
4560 * done by netstack_find_by_cred()
4561 */
4562 netstack_rele(ns);
4563
4564 /*
4565 * Since this conn_t/udp_t is not yet visible to anybody else we don't
4566 * need to lock anything.
4567 */
4568 ASSERT(connp->conn_proto == IPPROTO_UDP);
4569 ASSERT(connp->conn_udp == udp);
4570 ASSERT(udp->udp_connp == connp);
4571
4572 /* Set the initial state of the stream and the privilege status. */
4573 udp->udp_state = TS_UNBND;
4574 connp->conn_ixa->ixa_flags |= IXAF_VERIFY_SOURCE;
4575 if (isv6) {
4576 connp->conn_family = AF_INET6;
4577 connp->conn_ipversion = IPV6_VERSION;
4578 connp->conn_ixa->ixa_flags &= ~IXAF_IS_IPV4;
4579 connp->conn_default_ttl = us->us_ipv6_hoplimit;
4580 len = sizeof (ip6_t) + UDPH_SIZE;
4581 } else {
4582 connp->conn_family = AF_INET;
4583 connp->conn_ipversion = IPV4_VERSION;
4584 connp->conn_ixa->ixa_flags |= IXAF_IS_IPV4;
4585 connp->conn_default_ttl = us->us_ipv4_ttl;
4586 len = sizeof (ipha_t) + UDPH_SIZE;
4587 }
4588
4589 ASSERT(connp->conn_ixa->ixa_protocol == connp->conn_proto);
4590 connp->conn_xmit_ipp.ipp_unicast_hops = connp->conn_default_ttl;
4591
4592 connp->conn_ixa->ixa_multicast_ttl = IP_DEFAULT_MULTICAST_TTL;
4593 connp->conn_ixa->ixa_flags |= IXAF_MULTICAST_LOOP | IXAF_SET_ULP_CKSUM;
4594 /* conn_allzones can not be set this early, hence no IPCL_ZONEID */
4595 connp->conn_ixa->ixa_zoneid = zoneid;
4596
4597 connp->conn_zoneid = zoneid;
4598
4599 /*
4600 * If the caller has the process-wide flag set, then default to MAC
4601 * exempt mode. This allows read-down to unlabeled hosts.
4602 */
4603 if (getpflags(NET_MAC_AWARE, credp) != 0)
4604 connp->conn_mac_mode = CONN_MAC_AWARE;
4605
4606 connp->conn_zone_is_global = (crgetzoneid(credp) == GLOBAL_ZONEID);
4607
4608 udp->udp_us = us;
4609
4610 connp->conn_rcvbuf = us->us_recv_hiwat;
4611 connp->conn_sndbuf = us->us_xmit_hiwat;
4612 connp->conn_sndlowat = us->us_xmit_lowat;
4613 connp->conn_rcvlowat = udp_mod_info.mi_lowat;
4614
4615 connp->conn_wroff = len + us->us_wroff_extra;
4616 connp->conn_so_type = SOCK_DGRAM;
4617
4618 connp->conn_recv = udp_input;
4619 connp->conn_recvicmp = udp_icmp_input;
4620 crhold(credp);
4621 connp->conn_cred = credp;
4622 connp->conn_cpid = curproc->p_pid;
4623 connp->conn_open_time = ddi_get_lbolt64();
4624 /* Cache things in ixa without an extra refhold */
4625 ASSERT(!(connp->conn_ixa->ixa_free_flags & IXA_FREE_CRED));
4626 connp->conn_ixa->ixa_cred = connp->conn_cred;
4627 connp->conn_ixa->ixa_cpid = connp->conn_cpid;
4628 if (is_system_labeled())
4629 connp->conn_ixa->ixa_tsl = crgetlabel(connp->conn_cred);
4630
4631 *((sin6_t *)&udp->udp_delayed_addr) = sin6_null;
4632
4633 if (us->us_pmtu_discovery)
4634 connp->conn_ixa->ixa_flags |= IXAF_PMTU_DISCOVERY;
4635
4636 return (connp);
4637 }
4638
4639 sock_lower_handle_t
4640 udp_create(int family, int type, int proto, sock_downcalls_t **sock_downcalls,
4641 uint_t *smodep, int *errorp, int flags, cred_t *credp)
4642 {
4643 udp_t *udp = NULL;
4644 udp_stack_t *us;
4645 conn_t *connp;
4646 boolean_t isv6;
4647
4648 if (type != SOCK_DGRAM || (family != AF_INET && family != AF_INET6) ||
4649 (proto != 0 && proto != IPPROTO_UDP)) {
4650 *errorp = EPROTONOSUPPORT;
4651 return (NULL);
4652 }
4653
4654 if (family == AF_INET6)
4655 isv6 = B_TRUE;
4656 else
4657 isv6 = B_FALSE;
4658
4659 connp = udp_do_open(credp, isv6, flags, errorp);
4660 if (connp == NULL)
4661 return (NULL);
4662
4663 udp = connp->conn_udp;
4664 ASSERT(udp != NULL);
4665 us = udp->udp_us;
4666 ASSERT(us != NULL);
4667
4668 udp->udp_issocket = B_TRUE;
4669 connp->conn_flags |= IPCL_NONSTR;
4670
4671 /*
4672 * Set flow control
4673 * Since this conn_t/udp_t is not yet visible to anybody else we don't
4674 * need to lock anything.
4675 */
4676 (void) udp_set_rcv_hiwat(udp, connp->conn_rcvbuf);
4677 udp->udp_rcv_disply_hiwat = connp->conn_rcvbuf;
4678
4679 connp->conn_flow_cntrld = B_FALSE;
4680
4681 mutex_enter(&connp->conn_lock);
4682 connp->conn_state_flags &= ~CONN_INCIPIENT;
4683 mutex_exit(&connp->conn_lock);
4684
4685 *errorp = 0;
4686 *smodep = SM_ATOMIC;
4687 *sock_downcalls = &sock_udp_downcalls;
4688 return ((sock_lower_handle_t)connp);
4689 }
4690
4691 /* ARGSUSED3 */
4692 void
4693 udp_activate(sock_lower_handle_t proto_handle, sock_upper_handle_t sock_handle,
4694 sock_upcalls_t *sock_upcalls, int flags, cred_t *cr)
4695 {
4696 conn_t *connp = (conn_t *)proto_handle;
4697 struct sock_proto_props sopp;
4698
4699 /* All Solaris components should pass a cred for this operation. */
4700 ASSERT(cr != NULL);
4701
4702 connp->conn_upcalls = sock_upcalls;
4703 connp->conn_upper_handle = sock_handle;
4704
4705 sopp.sopp_flags = SOCKOPT_WROFF | SOCKOPT_RCVHIWAT | SOCKOPT_RCVLOWAT |
4706 SOCKOPT_MAXBLK | SOCKOPT_MAXPSZ | SOCKOPT_MINPSZ;
4707 sopp.sopp_wroff = connp->conn_wroff;
4708 sopp.sopp_maxblk = INFPSZ;
4709 sopp.sopp_rxhiwat = connp->conn_rcvbuf;
4710 sopp.sopp_rxlowat = connp->conn_rcvlowat;
4711 sopp.sopp_maxaddrlen = sizeof (sin6_t);
4712 sopp.sopp_maxpsz =
4713 (connp->conn_family == AF_INET) ? UDP_MAXPACKET_IPV4 :
4714 UDP_MAXPACKET_IPV6;
4715 sopp.sopp_minpsz = (udp_mod_info.mi_minpsz == 1) ? 0 :
4716 udp_mod_info.mi_minpsz;
4717
4718 (*connp->conn_upcalls->su_set_proto_props)(connp->conn_upper_handle,
4719 &sopp);
4720 }
4721
4722 static void
4723 udp_do_close(conn_t *connp)
4724 {
4725 udp_t *udp;
4726
4727 ASSERT(connp != NULL && IPCL_IS_UDP(connp));
4728 udp = connp->conn_udp;
4729
4730 if (cl_inet_unbind != NULL && udp->udp_state == TS_IDLE) {
4731 /*
4732 * Running in cluster mode - register unbind information
4733 */
4734 if (connp->conn_ipversion == IPV4_VERSION) {
4735 (*cl_inet_unbind)(
4736 connp->conn_netstack->netstack_stackid,
4737 IPPROTO_UDP, AF_INET,
4738 (uint8_t *)(&V4_PART_OF_V6(connp->conn_laddr_v6)),
4739 (in_port_t)connp->conn_lport, NULL);
4740 } else {
4741 (*cl_inet_unbind)(
4742 connp->conn_netstack->netstack_stackid,
4743 IPPROTO_UDP, AF_INET6,
4744 (uint8_t *)&(connp->conn_laddr_v6),
4745 (in_port_t)connp->conn_lport, NULL);
4746 }
4747 }
4748
4749 udp_bind_hash_remove(udp, B_FALSE);
4750
4751 ip_quiesce_conn(connp);
4752
4753 if (!IPCL_IS_NONSTR(connp)) {
4754 ASSERT(connp->conn_wq != NULL);
4755 ASSERT(connp->conn_rq != NULL);
4756 qprocsoff(connp->conn_rq);
4757 }
4758
4759 udp_close_free(connp);
4760
4761 /*
4762 * Now we are truly single threaded on this stream, and can
4763 * delete the things hanging off the connp, and finally the connp.
4764 * We removed this connp from the fanout list, it cannot be
4765 * accessed thru the fanouts, and we already waited for the
4766 * conn_ref to drop to 0. We are already in close, so
4767 * there cannot be any other thread from the top. qprocsoff
4768 * has completed, and service has completed or won't run in
4769 * future.
4770 */
4771 ASSERT(connp->conn_ref == 1);
4772
4773 if (!IPCL_IS_NONSTR(connp)) {
4774 inet_minor_free(connp->conn_minor_arena, connp->conn_dev);
4775 } else {
4776 ip_free_helper_stream(connp);
4777 }
4778
4779 connp->conn_ref--;
4780 ipcl_conn_destroy(connp);
4781 }
4782
4783 /* ARGSUSED1 */
4784 int
4785 udp_close(sock_lower_handle_t proto_handle, int flags, cred_t *cr)
4786 {
4787 conn_t *connp = (conn_t *)proto_handle;
4788
4789 /* All Solaris components should pass a cred for this operation. */
4790 ASSERT(cr != NULL);
4791
4792 udp_do_close(connp);
4793 return (0);
4794 }
4795
4796 static int
4797 udp_do_bind(conn_t *connp, struct sockaddr *sa, socklen_t len, cred_t *cr,
4798 boolean_t bind_to_req_port_only)
4799 {
4800 sin_t *sin;
4801 sin6_t *sin6;
4802 udp_t *udp = connp->conn_udp;
4803 int error = 0;
4804 ip_laddr_t laddr_type = IPVL_UNICAST_UP; /* INADDR_ANY */
4805 in_port_t port; /* Host byte order */
4806 in_port_t requested_port; /* Host byte order */
4807 int count;
4808 ipaddr_t v4src; /* Set if AF_INET */
4809 in6_addr_t v6src;
4810 int loopmax;
4811 udp_fanout_t *udpf;
4812 in_port_t lport; /* Network byte order */
4813 uint_t scopeid = 0;
4814 zoneid_t zoneid = IPCL_ZONEID(connp);
4815 ip_stack_t *ipst = connp->conn_netstack->netstack_ip;
4816 boolean_t is_inaddr_any;
4817 mlp_type_t addrtype, mlptype;
4818 udp_stack_t *us = udp->udp_us;
4819
4820 switch (len) {
4821 case sizeof (sin_t): /* Complete IPv4 address */
4822 sin = (sin_t *)sa;
4823
4824 if (sin == NULL || !OK_32PTR((char *)sin))
4825 return (EINVAL);
4826
4827 if (connp->conn_family != AF_INET ||
4828 sin->sin_family != AF_INET) {
4829 return (EAFNOSUPPORT);
4830 }
4831 v4src = sin->sin_addr.s_addr;
4832 IN6_IPADDR_TO_V4MAPPED(v4src, &v6src);
4833 if (v4src != INADDR_ANY) {
4834 laddr_type = ip_laddr_verify_v4(v4src, zoneid, ipst,
4835 B_TRUE);
4836 }
4837 port = ntohs(sin->sin_port);
4838 break;
4839
4840 case sizeof (sin6_t): /* complete IPv6 address */
4841 sin6 = (sin6_t *)sa;
4842
4843 if (sin6 == NULL || !OK_32PTR((char *)sin6))
4844 return (EINVAL);
4845
4846 if (connp->conn_family != AF_INET6 ||
4847 sin6->sin6_family != AF_INET6) {
4848 return (EAFNOSUPPORT);
4849 }
4850 v6src = sin6->sin6_addr;
4851 if (IN6_IS_ADDR_V4MAPPED(&v6src)) {
4852 if (connp->conn_ipv6_v6only)
4853 return (EADDRNOTAVAIL);
4854
4855 IN6_V4MAPPED_TO_IPADDR(&v6src, v4src);
4856 if (v4src != INADDR_ANY) {
4857 laddr_type = ip_laddr_verify_v4(v4src,
4858 zoneid, ipst, B_FALSE);
4859 }
4860 } else {
4861 if (!IN6_IS_ADDR_UNSPECIFIED(&v6src)) {
4862 if (IN6_IS_ADDR_LINKSCOPE(&v6src))
4863 scopeid = sin6->sin6_scope_id;
4864 laddr_type = ip_laddr_verify_v6(&v6src,
4865 zoneid, ipst, B_TRUE, scopeid);
4866 }
4867 }
4868 port = ntohs(sin6->sin6_port);
4869 break;
4870
4871 default: /* Invalid request */
4872 (void) strlog(UDP_MOD_ID, 0, 1, SL_ERROR|SL_TRACE,
4873 "udp_bind: bad ADDR_length length %u", len);
4874 return (-TBADADDR);
4875 }
4876
4877 /* Is the local address a valid unicast, multicast, or broadcast? */
4878 if (laddr_type == IPVL_BAD)
4879 return (EADDRNOTAVAIL);
4880
4881 requested_port = port;
4882
4883 if (requested_port == 0 || !bind_to_req_port_only)
4884 bind_to_req_port_only = B_FALSE;
4885 else /* T_BIND_REQ and requested_port != 0 */
4886 bind_to_req_port_only = B_TRUE;
4887
4888 if (requested_port == 0) {
4889 /*
4890 * If the application passed in zero for the port number, it
4891 * doesn't care which port number we bind to. Get one in the
4892 * valid range.
4893 */
4894 if (connp->conn_anon_priv_bind) {
4895 port = udp_get_next_priv_port(udp);
4896 } else {
4897 port = udp_update_next_port(udp,
4898 us->us_next_port_to_try, B_TRUE);
4899 }
4900 } else {
4901 /*
4902 * If the port is in the well-known privileged range,
4903 * make sure the caller was privileged.
4904 */
4905 int i;
4906 boolean_t priv = B_FALSE;
4907
4908 if (port < us->us_smallest_nonpriv_port) {
4909 priv = B_TRUE;
4910 } else {
4911 for (i = 0; i < us->us_num_epriv_ports; i++) {
4912 if (port == us->us_epriv_ports[i]) {
4913 priv = B_TRUE;
4914 break;
4915 }
4916 }
4917 }
4918
4919 if (priv) {
4920 if (secpolicy_net_privaddr(cr, port, IPPROTO_UDP) != 0)
4921 return (-TACCES);
4922 }
4923 }
4924
4925 if (port == 0)
4926 return (-TNOADDR);
4927
4928 /*
4929 * The state must be TS_UNBND. TPI mandates that users must send
4930 * TPI primitives only 1 at a time and wait for the response before
4931 * sending the next primitive.
4932 */
4933 mutex_enter(&connp->conn_lock);
4934 if (udp->udp_state != TS_UNBND) {
4935 mutex_exit(&connp->conn_lock);
4936 (void) strlog(UDP_MOD_ID, 0, 1, SL_ERROR|SL_TRACE,
4937 "udp_bind: bad state, %u", udp->udp_state);
4938 return (-TOUTSTATE);
4939 }
4940 /*
4941 * Copy the source address into our udp structure. This address
4942 * may still be zero; if so, IP will fill in the correct address
4943 * each time an outbound packet is passed to it. Since the udp is
4944 * not yet in the bind hash list, we don't grab the uf_lock to
4945 * change conn_ipversion
4946 */
4947 if (connp->conn_family == AF_INET) {
4948 ASSERT(sin != NULL);
4949 ASSERT(connp->conn_ixa->ixa_flags & IXAF_IS_IPV4);
4950 } else {
4951 if (IN6_IS_ADDR_V4MAPPED(&v6src)) {
4952 /*
4953 * no need to hold the uf_lock to set the conn_ipversion
4954 * since we are not yet in the fanout list
4955 */
4956 connp->conn_ipversion = IPV4_VERSION;
4957 connp->conn_ixa->ixa_flags |= IXAF_IS_IPV4;
4958 } else {
4959 connp->conn_ipversion = IPV6_VERSION;
4960 connp->conn_ixa->ixa_flags &= ~IXAF_IS_IPV4;
4961 }
4962 }
4963
4964 /*
4965 * If conn_reuseaddr is not set, then we have to make sure that
4966 * the IP address and port number the application requested
4967 * (or we selected for the application) is not being used by
4968 * another stream. If another stream is already using the
4969 * requested IP address and port, the behavior depends on
4970 * "bind_to_req_port_only". If set the bind fails; otherwise we
4971 * search for any an unused port to bind to the stream.
4972 *
4973 * As per the BSD semantics, as modified by the Deering multicast
4974 * changes, if udp_reuseaddr is set, then we allow multiple binds
4975 * to the same port independent of the local IP address.
4976 *
4977 * This is slightly different than in SunOS 4.X which did not
4978 * support IP multicast. Note that the change implemented by the
4979 * Deering multicast code effects all binds - not only binding
4980 * to IP multicast addresses.
4981 *
4982 * Note that when binding to port zero we ignore SO_REUSEADDR in
4983 * order to guarantee a unique port.
4984 */
4985
4986 count = 0;
4987 if (connp->conn_anon_priv_bind) {
4988 /*
4989 * loopmax = (IPPORT_RESERVED-1) -
4990 * us->us_min_anonpriv_port + 1
4991 */
4992 loopmax = IPPORT_RESERVED - us->us_min_anonpriv_port;
4993 } else {
4994 loopmax = us->us_largest_anon_port -
4995 us->us_smallest_anon_port + 1;
4996 }
4997
4998 is_inaddr_any = V6_OR_V4_INADDR_ANY(v6src);
4999
5000 for (;;) {
5001 udp_t *udp1;
5002 boolean_t found_exclbind = B_FALSE;
5003 conn_t *connp1;
5004
5005 /*
5006 * Walk through the list of udp streams bound to
5007 * requested port with the same IP address.
5008 */
5009 lport = htons(port);
5010 udpf = &us->us_bind_fanout[UDP_BIND_HASH(lport,
5011 us->us_bind_fanout_size)];
5012 mutex_enter(&udpf->uf_lock);
5013 for (udp1 = udpf->uf_udp; udp1 != NULL;
5014 udp1 = udp1->udp_bind_hash) {
5015 connp1 = udp1->udp_connp;
5016
5017 if (lport != connp1->conn_lport)
5018 continue;
5019
5020 /*
5021 * On a labeled system, we must treat bindings to ports
5022 * on shared IP addresses by sockets with MAC exemption
5023 * privilege as being in all zones, as there's
5024 * otherwise no way to identify the right receiver.
5025 */
5026 if (!IPCL_BIND_ZONE_MATCH(connp1, connp))
5027 continue;
5028
5029 /*
5030 * If UDP_EXCLBIND is set for either the bound or
5031 * binding endpoint, the semantics of bind
5032 * is changed according to the following chart.
5033 *
5034 * spec = specified address (v4 or v6)
5035 * unspec = unspecified address (v4 or v6)
5036 * A = specified addresses are different for endpoints
5037 *
5038 * bound bind to allowed?
5039 * -------------------------------------
5040 * unspec unspec no
5041 * unspec spec no
5042 * spec unspec no
5043 * spec spec yes if A
5044 *
5045 * For labeled systems, SO_MAC_EXEMPT behaves the same
5046 * as UDP_EXCLBIND, except that zoneid is ignored.
5047 */
5048 if (connp1->conn_exclbind || connp->conn_exclbind ||
5049 IPCL_CONNS_MAC(udp1->udp_connp, connp)) {
5050 if (V6_OR_V4_INADDR_ANY(
5051 connp1->conn_bound_addr_v6) ||
5052 is_inaddr_any ||
5053 IN6_ARE_ADDR_EQUAL(
5054 &connp1->conn_bound_addr_v6,
5055 &v6src)) {
5056 found_exclbind = B_TRUE;
5057 break;
5058 }
5059 continue;
5060 }
5061
5062 /*
5063 * Check ipversion to allow IPv4 and IPv6 sockets to
5064 * have disjoint port number spaces.
5065 */
5066 if (connp->conn_ipversion != connp1->conn_ipversion) {
5067
5068 /*
5069 * On the first time through the loop, if the
5070 * the user intentionally specified a
5071 * particular port number, then ignore any
5072 * bindings of the other protocol that may
5073 * conflict. This allows the user to bind IPv6
5074 * alone and get both v4 and v6, or bind both
5075 * both and get each seperately. On subsequent
5076 * times through the loop, we're checking a
5077 * port that we chose (not the user) and thus
5078 * we do not allow casual duplicate bindings.
5079 */
5080 if (count == 0 && requested_port != 0)
5081 continue;
5082 }
5083
5084 /*
5085 * No difference depending on SO_REUSEADDR.
5086 *
5087 * If existing port is bound to a
5088 * non-wildcard IP address and
5089 * the requesting stream is bound to
5090 * a distinct different IP addresses
5091 * (non-wildcard, also), keep going.
5092 */
5093 if (!is_inaddr_any &&
5094 !V6_OR_V4_INADDR_ANY(connp1->conn_bound_addr_v6) &&
5095 !IN6_ARE_ADDR_EQUAL(&connp1->conn_laddr_v6,
5096 &v6src)) {
5097 continue;
5098 }
5099 break;
5100 }
5101
5102 if (!found_exclbind &&
5103 (connp->conn_reuseaddr && requested_port != 0)) {
5104 break;
5105 }
5106
5107 if (udp1 == NULL) {
5108 /*
5109 * No other stream has this IP address
5110 * and port number. We can use it.
5111 */
5112 break;
5113 }
5114 mutex_exit(&udpf->uf_lock);
5115 if (bind_to_req_port_only) {
5116 /*
5117 * We get here only when requested port
5118 * is bound (and only first of the for()
5119 * loop iteration).
5120 *
5121 * The semantics of this bind request
5122 * require it to fail so we return from
5123 * the routine (and exit the loop).
5124 *
5125 */
5126 mutex_exit(&connp->conn_lock);
5127 return (-TADDRBUSY);
5128 }
5129
5130 if (connp->conn_anon_priv_bind) {
5131 port = udp_get_next_priv_port(udp);
5132 } else {
5133 if ((count == 0) && (requested_port != 0)) {
5134 /*
5135 * If the application wants us to find
5136 * a port, get one to start with. Set
5137 * requested_port to 0, so that we will
5138 * update us->us_next_port_to_try below.
5139 */
5140 port = udp_update_next_port(udp,
5141 us->us_next_port_to_try, B_TRUE);
5142 requested_port = 0;
5143 } else {
5144 port = udp_update_next_port(udp, port + 1,
5145 B_FALSE);
5146 }
5147 }
5148
5149 if (port == 0 || ++count >= loopmax) {
5150 /*
5151 * We've tried every possible port number and
5152 * there are none available, so send an error
5153 * to the user.
5154 */
5155 mutex_exit(&connp->conn_lock);
5156 return (-TNOADDR);
5157 }
5158 }
5159
5160 /*
5161 * Copy the source address into our udp structure. This address
5162 * may still be zero; if so, ip_attr_connect will fill in the correct
5163 * address when a packet is about to be sent.
5164 * If we are binding to a broadcast or multicast address then
5165 * we just set the conn_bound_addr since we don't want to use
5166 * that as the source address when sending.
5167 */
5168 connp->conn_bound_addr_v6 = v6src;
5169 connp->conn_laddr_v6 = v6src;
5170 if (scopeid != 0) {
5171 connp->conn_ixa->ixa_flags |= IXAF_SCOPEID_SET;
5172 connp->conn_ixa->ixa_scopeid = scopeid;
5173 connp->conn_incoming_ifindex = scopeid;
5174 } else {
5175 connp->conn_ixa->ixa_flags &= ~IXAF_SCOPEID_SET;
5176 connp->conn_incoming_ifindex = connp->conn_bound_if;
5177 }
5178
5179 switch (laddr_type) {
5180 case IPVL_UNICAST_UP:
5181 case IPVL_UNICAST_DOWN:
5182 connp->conn_saddr_v6 = v6src;
5183 connp->conn_mcbc_bind = B_FALSE;
5184 break;
5185 case IPVL_MCAST:
5186 case IPVL_BCAST:
5187 /* ip_set_destination will pick a source address later */
5188 connp->conn_saddr_v6 = ipv6_all_zeros;
5189 connp->conn_mcbc_bind = B_TRUE;
5190 break;
5191 }
5192
5193 /* Any errors after this point should use late_error */
5194 connp->conn_lport = lport;
5195
5196 /*
5197 * Now reset the next anonymous port if the application requested
5198 * an anonymous port, or we handed out the next anonymous port.
5199 */
5200 if ((requested_port == 0) && (!connp->conn_anon_priv_bind)) {
5201 us->us_next_port_to_try = port + 1;
5202 }
5203
5204 /* Initialize the T_BIND_ACK. */
5205 if (connp->conn_family == AF_INET) {
5206 sin->sin_port = connp->conn_lport;
5207 } else {
5208 sin6->sin6_port = connp->conn_lport;
5209 }
5210 udp->udp_state = TS_IDLE;
5211 udp_bind_hash_insert(udpf, udp);
5212 mutex_exit(&udpf->uf_lock);
5213 mutex_exit(&connp->conn_lock);
5214
5215 if (cl_inet_bind) {
5216 /*
5217 * Running in cluster mode - register bind information
5218 */
5219 if (connp->conn_ipversion == IPV4_VERSION) {
5220 (*cl_inet_bind)(connp->conn_netstack->netstack_stackid,
5221 IPPROTO_UDP, AF_INET, (uint8_t *)&v4src,
5222 (in_port_t)connp->conn_lport, NULL);
5223 } else {
5224 (*cl_inet_bind)(connp->conn_netstack->netstack_stackid,
5225 IPPROTO_UDP, AF_INET6, (uint8_t *)&v6src,
5226 (in_port_t)connp->conn_lport, NULL);
5227 }
5228 }
5229
5230 mutex_enter(&connp->conn_lock);
5231 connp->conn_anon_port = (is_system_labeled() && requested_port == 0);
5232 if (is_system_labeled() && (!connp->conn_anon_port ||
5233 connp->conn_anon_mlp)) {
5234 uint16_t mlpport;
5235 zone_t *zone;
5236
5237 zone = crgetzone(cr);
5238 connp->conn_mlp_type =
5239 connp->conn_recv_ancillary.crb_recvucred ? mlptBoth :
5240 mlptSingle;
5241 addrtype = tsol_mlp_addr_type(
5242 connp->conn_allzones ? ALL_ZONES : zone->zone_id,
5243 IPV6_VERSION, &v6src, us->us_netstack->netstack_ip);
5244 if (addrtype == mlptSingle) {
5245 error = -TNOADDR;
5246 mutex_exit(&connp->conn_lock);
5247 goto late_error;
5248 }
5249 mlpport = connp->conn_anon_port ? PMAPPORT : port;
5250 mlptype = tsol_mlp_port_type(zone, IPPROTO_UDP, mlpport,
5251 addrtype);
5252
5253 /*
5254 * It is a coding error to attempt to bind an MLP port
5255 * without first setting SOL_SOCKET/SCM_UCRED.
5256 */
5257 if (mlptype != mlptSingle &&
5258 connp->conn_mlp_type == mlptSingle) {
5259 error = EINVAL;
5260 mutex_exit(&connp->conn_lock);
5261 goto late_error;
5262 }
5263
5264 /*
5265 * It is an access violation to attempt to bind an MLP port
5266 * without NET_BINDMLP privilege.
5267 */
5268 if (mlptype != mlptSingle &&
5269 secpolicy_net_bindmlp(cr) != 0) {
5270 if (connp->conn_debug) {
5271 (void) strlog(UDP_MOD_ID, 0, 1,
5272 SL_ERROR|SL_TRACE,
5273 "udp_bind: no priv for multilevel port %d",
5274 mlpport);
5275 }
5276 error = -TACCES;
5277 mutex_exit(&connp->conn_lock);
5278 goto late_error;
5279 }
5280
5281 /*
5282 * If we're specifically binding a shared IP address and the
5283 * port is MLP on shared addresses, then check to see if this
5284 * zone actually owns the MLP. Reject if not.
5285 */
5286 if (mlptype == mlptShared && addrtype == mlptShared) {
5287 /*
5288 * No need to handle exclusive-stack zones since
5289 * ALL_ZONES only applies to the shared stack.
5290 */
5291 zoneid_t mlpzone;
5292
5293 mlpzone = tsol_mlp_findzone(IPPROTO_UDP,
5294 htons(mlpport));
5295 if (connp->conn_zoneid != mlpzone) {
5296 if (connp->conn_debug) {
5297 (void) strlog(UDP_MOD_ID, 0, 1,
5298 SL_ERROR|SL_TRACE,
5299 "udp_bind: attempt to bind port "
5300 "%d on shared addr in zone %d "
5301 "(should be %d)",
5302 mlpport, connp->conn_zoneid,
5303 mlpzone);
5304 }
5305 error = -TACCES;
5306 mutex_exit(&connp->conn_lock);
5307 goto late_error;
5308 }
5309 }
5310 if (connp->conn_anon_port) {
5311 error = tsol_mlp_anon(zone, mlptype, connp->conn_proto,
5312 port, B_TRUE);
5313 if (error != 0) {
5314 if (connp->conn_debug) {
5315 (void) strlog(UDP_MOD_ID, 0, 1,
5316 SL_ERROR|SL_TRACE,
5317 "udp_bind: cannot establish anon "
5318 "MLP for port %d", port);
5319 }
5320 error = -TACCES;
5321 mutex_exit(&connp->conn_lock);
5322 goto late_error;
5323 }
5324 }
5325 connp->conn_mlp_type = mlptype;
5326 }
5327
5328 /*
5329 * We create an initial header template here to make a subsequent
5330 * sendto have a starting point. Since conn_last_dst is zero the
5331 * first sendto will always follow the 'dst changed' code path.
5332 * Note that we defer massaging options and the related checksum
5333 * adjustment until we have a destination address.
5334 */
5335 error = udp_build_hdr_template(connp, &connp->conn_saddr_v6,
5336 &connp->conn_faddr_v6, connp->conn_fport, connp->conn_flowinfo);
5337 if (error != 0) {
5338 mutex_exit(&connp->conn_lock);
5339 goto late_error;
5340 }
5341 /* Just in case */
5342 connp->conn_faddr_v6 = ipv6_all_zeros;
5343 connp->conn_fport = 0;
5344 connp->conn_v6lastdst = ipv6_all_zeros;
5345 mutex_exit(&connp->conn_lock);
5346
5347 error = ip_laddr_fanout_insert(connp);
5348 if (error != 0)
5349 goto late_error;
5350
5351 /* Bind succeeded */
5352 return (0);
5353
5354 late_error:
5355 /* We had already picked the port number, and then the bind failed */
5356 mutex_enter(&connp->conn_lock);
5357 udpf = &us->us_bind_fanout[
5358 UDP_BIND_HASH(connp->conn_lport,
5359 us->us_bind_fanout_size)];
5360 mutex_enter(&udpf->uf_lock);
5361 connp->conn_saddr_v6 = ipv6_all_zeros;
5362 connp->conn_bound_addr_v6 = ipv6_all_zeros;
5363 connp->conn_laddr_v6 = ipv6_all_zeros;
5364 if (scopeid != 0) {
5365 connp->conn_ixa->ixa_flags &= ~IXAF_SCOPEID_SET;
5366 connp->conn_incoming_ifindex = connp->conn_bound_if;
5367 }
5368 udp->udp_state = TS_UNBND;
5369 udp_bind_hash_remove(udp, B_TRUE);
5370 connp->conn_lport = 0;
5371 mutex_exit(&udpf->uf_lock);
5372 connp->conn_anon_port = B_FALSE;
5373 connp->conn_mlp_type = mlptSingle;
5374
5375 connp->conn_v6lastdst = ipv6_all_zeros;
5376
5377 /* Restore the header that was built above - different source address */
5378 (void) udp_build_hdr_template(connp, &connp->conn_saddr_v6,
5379 &connp->conn_faddr_v6, connp->conn_fport, connp->conn_flowinfo);
5380 mutex_exit(&connp->conn_lock);
5381 return (error);
5382 }
5383
5384 int
5385 udp_bind(sock_lower_handle_t proto_handle, struct sockaddr *sa,
5386 socklen_t len, cred_t *cr)
5387 {
5388 int error;
5389 conn_t *connp;
5390
5391 /* All Solaris components should pass a cred for this operation. */
5392 ASSERT(cr != NULL);
5393
5394 connp = (conn_t *)proto_handle;
5395
5396 if (sa == NULL)
5397 error = udp_do_unbind(connp);
5398 else
5399 error = udp_do_bind(connp, sa, len, cr, B_TRUE);
5400
5401 if (error < 0) {
5402 if (error == -TOUTSTATE)
5403 error = EINVAL;
5404 else
5405 error = proto_tlitosyserr(-error);
5406 }
5407
5408 return (error);
5409 }
5410
5411 static int
5412 udp_implicit_bind(conn_t *connp, cred_t *cr)
5413 {
5414 sin6_t sin6addr;
5415 sin_t *sin;
5416 sin6_t *sin6;
5417 socklen_t len;
5418 int error;
5419
5420 /* All Solaris components should pass a cred for this operation. */
5421 ASSERT(cr != NULL);
5422
5423 if (connp->conn_family == AF_INET) {
5424 len = sizeof (struct sockaddr_in);
5425 sin = (sin_t *)&sin6addr;
5426 *sin = sin_null;
5427 sin->sin_family = AF_INET;
5428 sin->sin_addr.s_addr = INADDR_ANY;
5429 } else {
5430 ASSERT(connp->conn_family == AF_INET6);
5431 len = sizeof (sin6_t);
5432 sin6 = (sin6_t *)&sin6addr;
5433 *sin6 = sin6_null;
5434 sin6->sin6_family = AF_INET6;
5435 V6_SET_ZERO(sin6->sin6_addr);
5436 }
5437
5438 error = udp_do_bind(connp, (struct sockaddr *)&sin6addr, len,
5439 cr, B_FALSE);
5440 return ((error < 0) ? proto_tlitosyserr(-error) : error);
5441 }
5442
5443 /*
5444 * This routine removes a port number association from a stream. It
5445 * is called by udp_unbind and udp_tpi_unbind.
5446 */
5447 static int
5448 udp_do_unbind(conn_t *connp)
5449 {
5450 udp_t *udp = connp->conn_udp;
5451 udp_fanout_t *udpf;
5452 udp_stack_t *us = udp->udp_us;
5453
5454 if (cl_inet_unbind != NULL) {
5455 /*
5456 * Running in cluster mode - register unbind information
5457 */
5458 if (connp->conn_ipversion == IPV4_VERSION) {
5459 (*cl_inet_unbind)(
5460 connp->conn_netstack->netstack_stackid,
5461 IPPROTO_UDP, AF_INET,
5462 (uint8_t *)(&V4_PART_OF_V6(connp->conn_laddr_v6)),
5463 (in_port_t)connp->conn_lport, NULL);
5464 } else {
5465 (*cl_inet_unbind)(
5466 connp->conn_netstack->netstack_stackid,
5467 IPPROTO_UDP, AF_INET6,
5468 (uint8_t *)&(connp->conn_laddr_v6),
5469 (in_port_t)connp->conn_lport, NULL);
5470 }
5471 }
5472
5473 mutex_enter(&connp->conn_lock);
5474 /* If a bind has not been done, we can't unbind. */
5475 if (udp->udp_state == TS_UNBND) {
5476 mutex_exit(&connp->conn_lock);
5477 return (-TOUTSTATE);
5478 }
5479 udpf = &us->us_bind_fanout[UDP_BIND_HASH(connp->conn_lport,
5480 us->us_bind_fanout_size)];
5481 mutex_enter(&udpf->uf_lock);
5482 udp_bind_hash_remove(udp, B_TRUE);
5483 connp->conn_saddr_v6 = ipv6_all_zeros;
5484 connp->conn_bound_addr_v6 = ipv6_all_zeros;
5485 connp->conn_laddr_v6 = ipv6_all_zeros;
5486 connp->conn_mcbc_bind = B_FALSE;
5487 connp->conn_lport = 0;
5488 /* In case we were also connected */
5489 connp->conn_faddr_v6 = ipv6_all_zeros;
5490 connp->conn_fport = 0;
5491 mutex_exit(&udpf->uf_lock);
5492
5493 connp->conn_v6lastdst = ipv6_all_zeros;
5494 udp->udp_state = TS_UNBND;
5495
5496 (void) udp_build_hdr_template(connp, &connp->conn_saddr_v6,
5497 &connp->conn_faddr_v6, connp->conn_fport, connp->conn_flowinfo);
5498 mutex_exit(&connp->conn_lock);
5499
5500 ip_unbind(connp);
5501
5502 return (0);
5503 }
5504
5505 /*
5506 * It associates a default destination address with the stream.
5507 */
5508 static int
5509 udp_do_connect(conn_t *connp, const struct sockaddr *sa, socklen_t len,
5510 cred_t *cr, pid_t pid)
5511 {
5512 sin6_t *sin6;
5513 sin_t *sin;
5514 in6_addr_t v6dst;
5515 ipaddr_t v4dst;
5516 uint16_t dstport;
5517 uint32_t flowinfo;
5518 udp_fanout_t *udpf;
5519 udp_t *udp, *udp1;
5520 ushort_t ipversion;
5521 udp_stack_t *us;
5522 int error;
5523 conn_t *connp1;
5524 ip_xmit_attr_t *ixa;
5525 ip_xmit_attr_t *oldixa;
5526 uint_t scopeid = 0;
5527 uint_t srcid = 0;
5528 in6_addr_t v6src = connp->conn_saddr_v6;
5529
5530 udp = connp->conn_udp;
5531 us = udp->udp_us;
5532
5533 /*
5534 * Address has been verified by the caller
5535 */
5536 switch (len) {
5537 default:
5538 /*
5539 * Should never happen
5540 */
5541 return (EINVAL);
5542
5543 case sizeof (sin_t):
5544 sin = (sin_t *)sa;
5545 v4dst = sin->sin_addr.s_addr;
5546 dstport = sin->sin_port;
5547 IN6_IPADDR_TO_V4MAPPED(v4dst, &v6dst);
5548 ASSERT(connp->conn_ipversion == IPV4_VERSION);
5549 ipversion = IPV4_VERSION;
5550 break;
5551
5552 case sizeof (sin6_t):
5553 sin6 = (sin6_t *)sa;
5554 v6dst = sin6->sin6_addr;
5555 dstport = sin6->sin6_port;
5556 srcid = sin6->__sin6_src_id;
5557 if (srcid != 0 && IN6_IS_ADDR_UNSPECIFIED(&v6src)) {
5558 ip_srcid_find_id(srcid, &v6src, IPCL_ZONEID(connp),
5559 connp->conn_netstack);
5560 }
5561 if (IN6_IS_ADDR_V4MAPPED(&v6dst)) {
5562 if (connp->conn_ipv6_v6only)
5563 return (EADDRNOTAVAIL);
5564
5565 /*
5566 * Destination adress is mapped IPv6 address.
5567 * Source bound address should be unspecified or
5568 * IPv6 mapped address as well.
5569 */
5570 if (!IN6_IS_ADDR_UNSPECIFIED(
5571 &connp->conn_bound_addr_v6) &&
5572 !IN6_IS_ADDR_V4MAPPED(&connp->conn_bound_addr_v6)) {
5573 return (EADDRNOTAVAIL);
5574 }
5575 IN6_V4MAPPED_TO_IPADDR(&v6dst, v4dst);
5576 ipversion = IPV4_VERSION;
5577 flowinfo = 0;
5578 } else {
5579 ipversion = IPV6_VERSION;
5580 flowinfo = sin6->sin6_flowinfo;
5581 if (IN6_IS_ADDR_LINKLOCAL(&sin6->sin6_addr))
5582 scopeid = sin6->sin6_scope_id;
5583 }
5584 break;
5585 }
5586
5587 if (dstport == 0)
5588 return (-TBADADDR);
5589
5590 /*
5591 * If there is a different thread using conn_ixa then we get a new
5592 * copy and cut the old one loose from conn_ixa. Otherwise we use
5593 * conn_ixa and prevent any other thread from using/changing it.
5594 * Once connect() is done other threads can use conn_ixa since the
5595 * refcnt will be back at one.
5596 * We defer updating conn_ixa until later to handle any concurrent
5597 * conn_ixa_cleanup thread.
5598 */
5599 ixa = conn_get_ixa(connp, B_FALSE);
5600 if (ixa == NULL)
5601 return (ENOMEM);
5602
5603 mutex_enter(&connp->conn_lock);
5604 /*
5605 * This udp_t must have bound to a port already before doing a connect.
5606 * Reject if a connect is in progress (we drop conn_lock during
5607 * udp_do_connect).
5608 */
5609 if (udp->udp_state == TS_UNBND || udp->udp_state == TS_WCON_CREQ) {
5610 mutex_exit(&connp->conn_lock);
5611 (void) strlog(UDP_MOD_ID, 0, 1, SL_ERROR|SL_TRACE,
5612 "udp_connect: bad state, %u", udp->udp_state);
5613 ixa_refrele(ixa);
5614 return (-TOUTSTATE);
5615 }
5616 ASSERT(connp->conn_lport != 0 && udp->udp_ptpbhn != NULL);
5617
5618 udpf = &us->us_bind_fanout[UDP_BIND_HASH(connp->conn_lport,
5619 us->us_bind_fanout_size)];
5620
5621 mutex_enter(&udpf->uf_lock);
5622 if (udp->udp_state == TS_DATA_XFER) {
5623 /* Already connected - clear out state */
5624 if (connp->conn_mcbc_bind)
5625 connp->conn_saddr_v6 = ipv6_all_zeros;
5626 else
5627 connp->conn_saddr_v6 = connp->conn_bound_addr_v6;
5628 connp->conn_laddr_v6 = connp->conn_bound_addr_v6;
5629 connp->conn_faddr_v6 = ipv6_all_zeros;
5630 connp->conn_fport = 0;
5631 udp->udp_state = TS_IDLE;
5632 }
5633
5634 connp->conn_fport = dstport;
5635 connp->conn_ipversion = ipversion;
5636 if (ipversion == IPV4_VERSION) {
5637 /*
5638 * Interpret a zero destination to mean loopback.
5639 * Update the T_CONN_REQ (sin/sin6) since it is used to
5640 * generate the T_CONN_CON.
5641 */
5642 if (v4dst == INADDR_ANY) {
5643 v4dst = htonl(INADDR_LOOPBACK);
5644 IN6_IPADDR_TO_V4MAPPED(v4dst, &v6dst);
5645 if (connp->conn_family == AF_INET) {
5646 sin->sin_addr.s_addr = v4dst;
5647 } else {
5648 sin6->sin6_addr = v6dst;
5649 }
5650 }
5651 connp->conn_faddr_v6 = v6dst;
5652 connp->conn_flowinfo = 0;
5653 } else {
5654 ASSERT(connp->conn_ipversion == IPV6_VERSION);
5655 /*
5656 * Interpret a zero destination to mean loopback.
5657 * Update the T_CONN_REQ (sin/sin6) since it is used to
5658 * generate the T_CONN_CON.
5659 */
5660 if (IN6_IS_ADDR_UNSPECIFIED(&v6dst)) {
5661 v6dst = ipv6_loopback;
5662 sin6->sin6_addr = v6dst;
5663 }
5664 connp->conn_faddr_v6 = v6dst;
5665 connp->conn_flowinfo = flowinfo;
5666 }
5667 mutex_exit(&udpf->uf_lock);
5668
5669 /*
5670 * We update our cred/cpid based on the caller of connect
5671 */
5672 if (connp->conn_cred != cr) {
5673 crhold(cr);
5674 crfree(connp->conn_cred);
5675 connp->conn_cred = cr;
5676 }
5677 connp->conn_cpid = pid;
5678 ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED));
5679 ixa->ixa_cred = cr;
5680 ixa->ixa_cpid = pid;
5681 if (is_system_labeled()) {
5682 /* We need to restart with a label based on the cred */
5683 ip_xmit_attr_restore_tsl(ixa, ixa->ixa_cred);
5684 }
5685
5686 if (scopeid != 0) {
5687 ixa->ixa_flags |= IXAF_SCOPEID_SET;
5688 ixa->ixa_scopeid = scopeid;
5689 connp->conn_incoming_ifindex = scopeid;
5690 } else {
5691 ixa->ixa_flags &= ~IXAF_SCOPEID_SET;
5692 connp->conn_incoming_ifindex = connp->conn_bound_if;
5693 }
5694 /*
5695 * conn_connect will drop conn_lock and reacquire it.
5696 * To prevent a send* from messing with this udp_t while the lock
5697 * is dropped we set udp_state and clear conn_v6lastdst.
5698 * That will make all send* fail with EISCONN.
5699 */
5700 connp->conn_v6lastdst = ipv6_all_zeros;
5701 udp->udp_state = TS_WCON_CREQ;
5702
5703 error = conn_connect(connp, NULL, IPDF_ALLOW_MCBC);
5704 mutex_exit(&connp->conn_lock);
5705 if (error != 0)
5706 goto connect_failed;
5707
5708 /*
5709 * The addresses have been verified. Time to insert in
5710 * the correct fanout list.
5711 */
5712 error = ipcl_conn_insert(connp);
5713 if (error != 0)
5714 goto connect_failed;
5715
5716 mutex_enter(&connp->conn_lock);
5717 error = udp_build_hdr_template(connp, &connp->conn_saddr_v6,
5718 &connp->conn_faddr_v6, connp->conn_fport, connp->conn_flowinfo);
5719 if (error != 0) {
5720 mutex_exit(&connp->conn_lock);
5721 goto connect_failed;
5722 }
5723
5724 udp->udp_state = TS_DATA_XFER;
5725 /* Record this as the "last" send even though we haven't sent any */
5726 connp->conn_v6lastdst = connp->conn_faddr_v6;
5727 connp->conn_lastipversion = connp->conn_ipversion;
5728 connp->conn_lastdstport = connp->conn_fport;
5729 connp->conn_lastflowinfo = connp->conn_flowinfo;
5730 connp->conn_lastscopeid = scopeid;
5731 connp->conn_lastsrcid = srcid;
5732 /* Also remember a source to use together with lastdst */
5733 connp->conn_v6lastsrc = v6src;
5734
5735 oldixa = conn_replace_ixa(connp, ixa);
5736 mutex_exit(&connp->conn_lock);
5737 ixa_refrele(oldixa);
5738
5739 /*
5740 * We've picked a source address above. Now we can
5741 * verify that the src/port/dst/port is unique for all
5742 * connections in TS_DATA_XFER, skipping ourselves.
5743 */
5744 mutex_enter(&udpf->uf_lock);
5745 for (udp1 = udpf->uf_udp; udp1 != NULL; udp1 = udp1->udp_bind_hash) {
5746 if (udp1->udp_state != TS_DATA_XFER)
5747 continue;
5748
5749 if (udp1 == udp)
5750 continue;
5751
5752 connp1 = udp1->udp_connp;
5753 if (connp->conn_lport != connp1->conn_lport ||
5754 connp->conn_ipversion != connp1->conn_ipversion ||
5755 dstport != connp1->conn_fport ||
5756 !IN6_ARE_ADDR_EQUAL(&connp->conn_laddr_v6,
5757 &connp1->conn_laddr_v6) ||
5758 !IN6_ARE_ADDR_EQUAL(&v6dst, &connp1->conn_faddr_v6) ||
5759 !(IPCL_ZONE_MATCH(connp, connp1->conn_zoneid) ||
5760 IPCL_ZONE_MATCH(connp1, connp->conn_zoneid)))
5761 continue;
5762 mutex_exit(&udpf->uf_lock);
5763 error = -TBADADDR;
5764 goto connect_failed;
5765 }
5766 if (cl_inet_connect2 != NULL) {
5767 CL_INET_UDP_CONNECT(connp, B_TRUE, &v6dst, dstport, error);
5768 if (error != 0) {
5769 mutex_exit(&udpf->uf_lock);
5770 error = -TBADADDR;
5771 goto connect_failed;
5772 }
5773 }
5774 mutex_exit(&udpf->uf_lock);
5775
5776 ixa_refrele(ixa);
5777 return (0);
5778
5779 connect_failed:
5780 if (ixa != NULL)
5781 ixa_refrele(ixa);
5782 mutex_enter(&connp->conn_lock);
5783 mutex_enter(&udpf->uf_lock);
5784 udp->udp_state = TS_IDLE;
5785 connp->conn_faddr_v6 = ipv6_all_zeros;
5786 connp->conn_fport = 0;
5787 /* In case the source address was set above */
5788 if (connp->conn_mcbc_bind)
5789 connp->conn_saddr_v6 = ipv6_all_zeros;
5790 else
5791 connp->conn_saddr_v6 = connp->conn_bound_addr_v6;
5792 connp->conn_laddr_v6 = connp->conn_bound_addr_v6;
5793 mutex_exit(&udpf->uf_lock);
5794
5795 connp->conn_v6lastdst = ipv6_all_zeros;
5796 connp->conn_flowinfo = 0;
5797
5798 (void) udp_build_hdr_template(connp, &connp->conn_saddr_v6,
5799 &connp->conn_faddr_v6, connp->conn_fport, connp->conn_flowinfo);
5800 mutex_exit(&connp->conn_lock);
5801 return (error);
5802 }
5803
5804 static int
5805 udp_connect(sock_lower_handle_t proto_handle, const struct sockaddr *sa,
5806 socklen_t len, sock_connid_t *id, cred_t *cr)
5807 {
5808 conn_t *connp = (conn_t *)proto_handle;
5809 udp_t *udp = connp->conn_udp;
5810 int error;
5811 boolean_t did_bind = B_FALSE;
5812 pid_t pid = curproc->p_pid;
5813
5814 /* All Solaris components should pass a cred for this operation. */
5815 ASSERT(cr != NULL);
5816
5817 if (sa == NULL) {
5818 /*
5819 * Disconnect
5820 * Make sure we are connected
5821 */
5822 if (udp->udp_state != TS_DATA_XFER)
5823 return (EINVAL);
5824
5825 error = udp_disconnect(connp);
5826 return (error);
5827 }
5828
5829 error = proto_verify_ip_addr(connp->conn_family, sa, len);
5830 if (error != 0)
5831 goto done;
5832
5833 /* do an implicit bind if necessary */
5834 if (udp->udp_state == TS_UNBND) {
5835 error = udp_implicit_bind(connp, cr);
5836 /*
5837 * We could be racing with an actual bind, in which case
5838 * we would see EPROTO. We cross our fingers and try
5839 * to connect.
5840 */
5841 if (!(error == 0 || error == EPROTO))
5842 goto done;
5843 did_bind = B_TRUE;
5844 }
5845 /*
5846 * set SO_DGRAM_ERRIND
5847 */
5848 connp->conn_dgram_errind = B_TRUE;
5849
5850 error = udp_do_connect(connp, sa, len, cr, pid);
5851
5852 if (error != 0 && did_bind) {
5853 int unbind_err;
5854
5855 unbind_err = udp_do_unbind(connp);
5856 ASSERT(unbind_err == 0);
5857 }
5858
5859 if (error == 0) {
5860 *id = 0;
5861 (*connp->conn_upcalls->su_connected)
5862 (connp->conn_upper_handle, 0, NULL, -1);
5863 } else if (error < 0) {
5864 error = proto_tlitosyserr(-error);
5865 }
5866
5867 done:
5868 if (error != 0 && udp->udp_state == TS_DATA_XFER) {
5869 /*
5870 * No need to hold locks to set state
5871 * after connect failure socket state is undefined
5872 * We set the state only to imitate old sockfs behavior
5873 */
5874 udp->udp_state = TS_IDLE;
5875 }
5876 return (error);
5877 }
5878
5879 int
5880 udp_send(sock_lower_handle_t proto_handle, mblk_t *mp, struct nmsghdr *msg,
5881 cred_t *cr)
5882 {
5883 sin6_t *sin6;
5884 sin_t *sin = NULL;
5885 uint_t srcid;
5886 conn_t *connp = (conn_t *)proto_handle;
5887 udp_t *udp = connp->conn_udp;
5888 int error = 0;
5889 udp_stack_t *us = udp->udp_us;
5890 ushort_t ipversion;
5891 pid_t pid = curproc->p_pid;
5892 ip_xmit_attr_t *ixa;
5893
5894 ASSERT(DB_TYPE(mp) == M_DATA);
5895
5896 /* All Solaris components should pass a cred for this operation. */
5897 ASSERT(cr != NULL);
5898
5899 /* do an implicit bind if necessary */
5900 if (udp->udp_state == TS_UNBND) {
5901 error = udp_implicit_bind(connp, cr);
5902 /*
5903 * We could be racing with an actual bind, in which case
5904 * we would see EPROTO. We cross our fingers and try
5905 * to connect.
5906 */
5907 if (!(error == 0 || error == EPROTO)) {
5908 freemsg(mp);
5909 return (error);
5910 }
5911 }
5912
5913 /* Connected? */
5914 if (msg->msg_name == NULL) {
5915 if (udp->udp_state != TS_DATA_XFER) {
5916 UDPS_BUMP_MIB(us, udpOutErrors);
5917 return (EDESTADDRREQ);
5918 }
5919 if (msg->msg_controllen != 0) {
5920 error = udp_output_ancillary(connp, NULL, NULL, mp,
5921 NULL, msg, cr, pid);
5922 } else {
5923 error = udp_output_connected(connp, mp, cr, pid);
5924 }
5925 if (us->us_sendto_ignerr)
5926 return (0);
5927 else
5928 return (error);
5929 }
5930 if (udp->udp_state == TS_DATA_XFER) {
5931 UDPS_BUMP_MIB(us, udpOutErrors);
5932 return (EISCONN);
5933 }
5934 error = proto_verify_ip_addr(connp->conn_family,
5935 (struct sockaddr *)msg->msg_name, msg->msg_namelen);
5936 if (error != 0) {
5937 UDPS_BUMP_MIB(us, udpOutErrors);
5938 return (error);
5939 }
5940 switch (connp->conn_family) {
5941 case AF_INET6:
5942 sin6 = (sin6_t *)msg->msg_name;
5943
5944 srcid = sin6->__sin6_src_id;
5945
5946 if (!IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
5947 /*
5948 * Destination is a non-IPv4-compatible IPv6 address.
5949 * Send out an IPv6 format packet.
5950 */
5951
5952 /*
5953 * If the local address is a mapped address return
5954 * an error.
5955 * It would be possible to send an IPv6 packet but the
5956 * response would never make it back to the application
5957 * since it is bound to a mapped address.
5958 */
5959 if (IN6_IS_ADDR_V4MAPPED(&connp->conn_saddr_v6)) {
5960 UDPS_BUMP_MIB(us, udpOutErrors);
5961 return (EADDRNOTAVAIL);
5962 }
5963 if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr))
5964 sin6->sin6_addr = ipv6_loopback;
5965 ipversion = IPV6_VERSION;
5966 } else {
5967 if (connp->conn_ipv6_v6only) {
5968 UDPS_BUMP_MIB(us, udpOutErrors);
5969 return (EADDRNOTAVAIL);
5970 }
5971
5972 /*
5973 * If the local address is not zero or a mapped address
5974 * return an error. It would be possible to send an
5975 * IPv4 packet but the response would never make it
5976 * back to the application since it is bound to a
5977 * non-mapped address.
5978 */
5979 if (!IN6_IS_ADDR_V4MAPPED(&connp->conn_saddr_v6) &&
5980 !IN6_IS_ADDR_UNSPECIFIED(&connp->conn_saddr_v6)) {
5981 UDPS_BUMP_MIB(us, udpOutErrors);
5982 return (EADDRNOTAVAIL);
5983 }
5984
5985 if (V4_PART_OF_V6(sin6->sin6_addr) == INADDR_ANY) {
5986 V4_PART_OF_V6(sin6->sin6_addr) =
5987 htonl(INADDR_LOOPBACK);
5988 }
5989 ipversion = IPV4_VERSION;
5990 }
5991
5992 /*
5993 * We have to allocate an ip_xmit_attr_t before we grab
5994 * conn_lock and we need to hold conn_lock once we've check
5995 * conn_same_as_last_v6 to handle concurrent send* calls on a
5996 * socket.
5997 */
5998 if (msg->msg_controllen == 0) {
5999 ixa = conn_get_ixa(connp, B_FALSE);
6000 if (ixa == NULL) {
6001 UDPS_BUMP_MIB(us, udpOutErrors);
6002 return (ENOMEM);
6003 }
6004 } else {
6005 ixa = NULL;
6006 }
6007 mutex_enter(&connp->conn_lock);
6008 if (udp->udp_delayed_error != 0) {
6009 sin6_t *sin2 = (sin6_t *)&udp->udp_delayed_addr;
6010
6011 error = udp->udp_delayed_error;
6012 udp->udp_delayed_error = 0;
6013
6014 /* Compare IP address, port, and family */
6015
6016 if (sin6->sin6_port == sin2->sin6_port &&
6017 IN6_ARE_ADDR_EQUAL(&sin6->sin6_addr,
6018 &sin2->sin6_addr) &&
6019 sin6->sin6_family == sin2->sin6_family) {
6020 mutex_exit(&connp->conn_lock);
6021 UDPS_BUMP_MIB(us, udpOutErrors);
6022 if (ixa != NULL)
6023 ixa_refrele(ixa);
6024 return (error);
6025 }
6026 }
6027
6028 if (msg->msg_controllen != 0) {
6029 mutex_exit(&connp->conn_lock);
6030 ASSERT(ixa == NULL);
6031 error = udp_output_ancillary(connp, NULL, sin6, mp,
6032 NULL, msg, cr, pid);
6033 } else if (conn_same_as_last_v6(connp, sin6) &&
6034 connp->conn_lastsrcid == srcid &&
6035 ipsec_outbound_policy_current(ixa)) {
6036 /* udp_output_lastdst drops conn_lock */
6037 error = udp_output_lastdst(connp, mp, cr, pid, ixa);
6038 } else {
6039 /* udp_output_newdst drops conn_lock */
6040 error = udp_output_newdst(connp, mp, NULL, sin6,
6041 ipversion, cr, pid, ixa);
6042 }
6043 ASSERT(MUTEX_NOT_HELD(&connp->conn_lock));
6044 if (us->us_sendto_ignerr)
6045 return (0);
6046 else
6047 return (error);
6048 case AF_INET:
6049 sin = (sin_t *)msg->msg_name;
6050
6051 ipversion = IPV4_VERSION;
6052
6053 if (sin->sin_addr.s_addr == INADDR_ANY)
6054 sin->sin_addr.s_addr = htonl(INADDR_LOOPBACK);
6055
6056 /*
6057 * We have to allocate an ip_xmit_attr_t before we grab
6058 * conn_lock and we need to hold conn_lock once we've check
6059 * conn_same_as_last_v6 to handle concurrent send* on a socket.
6060 */
6061 if (msg->msg_controllen == 0) {
6062 ixa = conn_get_ixa(connp, B_FALSE);
6063 if (ixa == NULL) {
6064 UDPS_BUMP_MIB(us, udpOutErrors);
6065 return (ENOMEM);
6066 }
6067 } else {
6068 ixa = NULL;
6069 }
6070 mutex_enter(&connp->conn_lock);
6071 if (udp->udp_delayed_error != 0) {
6072 sin_t *sin2 = (sin_t *)&udp->udp_delayed_addr;
6073
6074 error = udp->udp_delayed_error;
6075 udp->udp_delayed_error = 0;
6076
6077 /* Compare IP address and port */
6078
6079 if (sin->sin_port == sin2->sin_port &&
6080 sin->sin_addr.s_addr == sin2->sin_addr.s_addr) {
6081 mutex_exit(&connp->conn_lock);
6082 UDPS_BUMP_MIB(us, udpOutErrors);
6083 if (ixa != NULL)
6084 ixa_refrele(ixa);
6085 return (error);
6086 }
6087 }
6088 if (msg->msg_controllen != 0) {
6089 mutex_exit(&connp->conn_lock);
6090 ASSERT(ixa == NULL);
6091 error = udp_output_ancillary(connp, sin, NULL, mp,
6092 NULL, msg, cr, pid);
6093 } else if (conn_same_as_last_v4(connp, sin) &&
6094 ipsec_outbound_policy_current(ixa)) {
6095 /* udp_output_lastdst drops conn_lock */
6096 error = udp_output_lastdst(connp, mp, cr, pid, ixa);
6097 } else {
6098 /* udp_output_newdst drops conn_lock */
6099 error = udp_output_newdst(connp, mp, sin, NULL,
6100 ipversion, cr, pid, ixa);
6101 }
6102 ASSERT(MUTEX_NOT_HELD(&connp->conn_lock));
6103 if (us->us_sendto_ignerr)
6104 return (0);
6105 else
6106 return (error);
6107 default:
6108 return (EINVAL);
6109 }
6110 }
6111
6112 int
6113 udp_fallback(sock_lower_handle_t proto_handle, queue_t *q,
6114 boolean_t issocket, so_proto_quiesced_cb_t quiesced_cb,
6115 sock_quiesce_arg_t *arg)
6116 {
6117 conn_t *connp = (conn_t *)proto_handle;
6118 udp_t *udp;
6119 struct T_capability_ack tca;
6120 struct sockaddr_in6 laddr, faddr;
6121 socklen_t laddrlen, faddrlen;
6122 short opts;
6123 struct stroptions *stropt;
6124 mblk_t *mp, *stropt_mp;
6125 int error;
6126
6127 udp = connp->conn_udp;
6128
6129 stropt_mp = allocb_wait(sizeof (*stropt), BPRI_HI, STR_NOSIG, NULL);
6130
6131 /*
6132 * setup the fallback stream that was allocated
6133 */
6134 connp->conn_dev = (dev_t)RD(q)->q_ptr;
6135 connp->conn_minor_arena = WR(q)->q_ptr;
6136
6137 RD(q)->q_ptr = WR(q)->q_ptr = connp;
6138
6139 WR(q)->q_qinfo = &udp_winit;
6140
6141 connp->conn_rq = RD(q);
6142 connp->conn_wq = WR(q);
6143
6144 /* Notify stream head about options before sending up data */
6145 stropt_mp->b_datap->db_type = M_SETOPTS;
6146 stropt_mp->b_wptr += sizeof (*stropt);
6147 stropt = (struct stroptions *)stropt_mp->b_rptr;
6148 stropt->so_flags = SO_WROFF | SO_HIWAT;
6149 stropt->so_wroff = connp->conn_wroff;
6150 stropt->so_hiwat = udp->udp_rcv_disply_hiwat;
6151 putnext(RD(q), stropt_mp);
6152
6153 /*
6154 * Free the helper stream
6155 */
6156 ip_free_helper_stream(connp);
6157
6158 if (!issocket)
6159 udp_use_pure_tpi(udp);
6160
6161 /*
6162 * Collect the information needed to sync with the sonode
6163 */
6164 udp_do_capability_ack(udp, &tca, TC1_INFO);
6165
6166 laddrlen = faddrlen = sizeof (sin6_t);
6167 (void) udp_getsockname((sock_lower_handle_t)connp,
6168 (struct sockaddr *)&laddr, &laddrlen, CRED());
6169 error = udp_getpeername((sock_lower_handle_t)connp,
6170 (struct sockaddr *)&faddr, &faddrlen, CRED());
6171 if (error != 0)
6172 faddrlen = 0;
6173
6174 opts = 0;
6175 if (connp->conn_dgram_errind)
6176 opts |= SO_DGRAM_ERRIND;
6177 if (connp->conn_ixa->ixa_flags & IXAF_DONTROUTE)
6178 opts |= SO_DONTROUTE;
6179
6180 mp = (*quiesced_cb)(connp->conn_upper_handle, arg, &tca,
6181 (struct sockaddr *)&laddr, laddrlen,
6182 (struct sockaddr *)&faddr, faddrlen, opts);
6183
6184 mutex_enter(&udp->udp_recv_lock);
6185 /*
6186 * Attempts to send data up during fallback will result in it being
6187 * queued in udp_t. First push up the datagrams obtained from the
6188 * socket, then any packets queued in udp_t.
6189 */
6190 if (mp != NULL) {
6191 mp->b_next = udp->udp_fallback_queue_head;
6192 udp->udp_fallback_queue_head = mp;
6193 }
6194 while (udp->udp_fallback_queue_head != NULL) {
6195 mp = udp->udp_fallback_queue_head;
6196 udp->udp_fallback_queue_head = mp->b_next;
6197 mutex_exit(&udp->udp_recv_lock);
6198 mp->b_next = NULL;
6199 putnext(RD(q), mp);
6200 mutex_enter(&udp->udp_recv_lock);
6201 }
6202 udp->udp_fallback_queue_tail = udp->udp_fallback_queue_head;
6203 /*
6204 * No longer a streams less socket
6205 */
6206 mutex_enter(&connp->conn_lock);
6207 connp->conn_flags &= ~IPCL_NONSTR;
6208 mutex_exit(&connp->conn_lock);
6209
6210 mutex_exit(&udp->udp_recv_lock);
6211
6212 ASSERT(connp->conn_ref >= 1);
6213
6214 return (0);
6215 }
6216
6217 /* ARGSUSED3 */
6218 int
6219 udp_getpeername(sock_lower_handle_t proto_handle, struct sockaddr *sa,
6220 socklen_t *salenp, cred_t *cr)
6221 {
6222 conn_t *connp = (conn_t *)proto_handle;
6223 udp_t *udp = connp->conn_udp;
6224 int error;
6225
6226 /* All Solaris components should pass a cred for this operation. */
6227 ASSERT(cr != NULL);
6228
6229 mutex_enter(&connp->conn_lock);
6230 if (udp->udp_state != TS_DATA_XFER)
6231 error = ENOTCONN;
6232 else
6233 error = conn_getpeername(connp, sa, salenp);
6234 mutex_exit(&connp->conn_lock);
6235 return (error);
6236 }
6237
6238 /* ARGSUSED3 */
6239 int
6240 udp_getsockname(sock_lower_handle_t proto_handle, struct sockaddr *sa,
6241 socklen_t *salenp, cred_t *cr)
6242 {
6243 conn_t *connp = (conn_t *)proto_handle;
6244 int error;
6245
6246 /* All Solaris components should pass a cred for this operation. */
6247 ASSERT(cr != NULL);
6248
6249 mutex_enter(&connp->conn_lock);
6250 error = conn_getsockname(connp, sa, salenp);
6251 mutex_exit(&connp->conn_lock);
6252 return (error);
6253 }
6254
6255 int
6256 udp_getsockopt(sock_lower_handle_t proto_handle, int level, int option_name,
6257 void *optvalp, socklen_t *optlen, cred_t *cr)
6258 {
6259 conn_t *connp = (conn_t *)proto_handle;
6260 int error;
6261 t_uscalar_t max_optbuf_len;
6262 void *optvalp_buf;
6263 int len;
6264
6265 /* All Solaris components should pass a cred for this operation. */
6266 ASSERT(cr != NULL);
6267
6268 error = proto_opt_check(level, option_name, *optlen, &max_optbuf_len,
6269 udp_opt_obj.odb_opt_des_arr,
6270 udp_opt_obj.odb_opt_arr_cnt,
6271 B_FALSE, B_TRUE, cr);
6272 if (error != 0) {
6273 if (error < 0)
6274 error = proto_tlitosyserr(-error);
6275 return (error);
6276 }
6277
6278 optvalp_buf = kmem_alloc(max_optbuf_len, KM_SLEEP);
6279 len = udp_opt_get(connp, level, option_name, optvalp_buf);
6280 if (len == -1) {
6281 kmem_free(optvalp_buf, max_optbuf_len);
6282 return (EINVAL);
6283 }
6284
6285 /*
6286 * update optlen and copy option value
6287 */
6288 t_uscalar_t size = MIN(len, *optlen);
6289
6290 bcopy(optvalp_buf, optvalp, size);
6291 bcopy(&size, optlen, sizeof (size));
6292
6293 kmem_free(optvalp_buf, max_optbuf_len);
6294 return (0);
6295 }
6296
6297 int
6298 udp_setsockopt(sock_lower_handle_t proto_handle, int level, int option_name,
6299 const void *optvalp, socklen_t optlen, cred_t *cr)
6300 {
6301 conn_t *connp = (conn_t *)proto_handle;
6302 int error;
6303
6304 /* All Solaris components should pass a cred for this operation. */
6305 ASSERT(cr != NULL);
6306
6307 error = proto_opt_check(level, option_name, optlen, NULL,
6308 udp_opt_obj.odb_opt_des_arr,
6309 udp_opt_obj.odb_opt_arr_cnt,
6310 B_TRUE, B_FALSE, cr);
6311
6312 if (error != 0) {
6313 if (error < 0)
6314 error = proto_tlitosyserr(-error);
6315 return (error);
6316 }
6317
6318 error = udp_opt_set(connp, SETFN_OPTCOM_NEGOTIATE, level, option_name,
6319 optlen, (uchar_t *)optvalp, (uint_t *)&optlen, (uchar_t *)optvalp,
6320 NULL, cr);
6321
6322 ASSERT(error >= 0);
6323
6324 return (error);
6325 }
6326
6327 void
6328 udp_clr_flowctrl(sock_lower_handle_t proto_handle)
6329 {
6330 conn_t *connp = (conn_t *)proto_handle;
6331 udp_t *udp = connp->conn_udp;
6332
6333 mutex_enter(&udp->udp_recv_lock);
6334 connp->conn_flow_cntrld = B_FALSE;
6335 mutex_exit(&udp->udp_recv_lock);
6336 }
6337
6338 /* ARGSUSED2 */
6339 int
6340 udp_shutdown(sock_lower_handle_t proto_handle, int how, cred_t *cr)
6341 {
6342 conn_t *connp = (conn_t *)proto_handle;
6343
6344 /* All Solaris components should pass a cred for this operation. */
6345 ASSERT(cr != NULL);
6346
6347 /* shut down the send side */
6348 if (how != SHUT_RD)
6349 (*connp->conn_upcalls->su_opctl)(connp->conn_upper_handle,
6350 SOCK_OPCTL_SHUT_SEND, 0);
6351 /* shut down the recv side */
6352 if (how != SHUT_WR)
6353 (*connp->conn_upcalls->su_opctl)(connp->conn_upper_handle,
6354 SOCK_OPCTL_SHUT_RECV, 0);
6355 return (0);
6356 }
6357
6358 int
6359 udp_ioctl(sock_lower_handle_t proto_handle, int cmd, intptr_t arg,
6360 int mode, int32_t *rvalp, cred_t *cr)
6361 {
6362 conn_t *connp = (conn_t *)proto_handle;
6363 int error;
6364
6365 /* All Solaris components should pass a cred for this operation. */
6366 ASSERT(cr != NULL);
6367
6368 /*
6369 * If we don't have a helper stream then create one.
6370 * ip_create_helper_stream takes care of locking the conn_t,
6371 * so this check for NULL is just a performance optimization.
6372 */
6373 if (connp->conn_helper_info == NULL) {
6374 udp_stack_t *us = connp->conn_udp->udp_us;
6375
6376 ASSERT(us->us_ldi_ident != NULL);
6377
6378 /*
6379 * Create a helper stream for non-STREAMS socket.
6380 */
6381 error = ip_create_helper_stream(connp, us->us_ldi_ident);
6382 if (error != 0) {
6383 ip0dbg(("tcp_ioctl: create of IP helper stream "
6384 "failed %d\n", error));
6385 return (error);
6386 }
6387 }
6388
6389 switch (cmd) {
6390 case _SIOCSOCKFALLBACK:
6391 case TI_GETPEERNAME:
6392 case TI_GETMYNAME:
6393 ip1dbg(("udp_ioctl: cmd 0x%x on non streams socket",
6394 cmd));
6395 error = EINVAL;
6396 break;
6397 default:
6398 /*
6399 * Pass on to IP using helper stream
6400 */
6401 error = ldi_ioctl(connp->conn_helper_info->iphs_handle,
6402 cmd, arg, mode, cr, rvalp);
6403 break;
6404 }
6405 return (error);
6406 }
6407
6408 /* ARGSUSED */
6409 int
6410 udp_accept(sock_lower_handle_t lproto_handle,
6411 sock_lower_handle_t eproto_handle, sock_upper_handle_t sock_handle,
6412 cred_t *cr)
6413 {
6414 return (EOPNOTSUPP);
6415 }
6416
6417 /* ARGSUSED */
6418 int
6419 udp_listen(sock_lower_handle_t proto_handle, int backlog, cred_t *cr)
6420 {
6421 return (EOPNOTSUPP);
6422 }
6423
6424 sock_downcalls_t sock_udp_downcalls = {
6425 udp_activate, /* sd_activate */
6426 udp_accept, /* sd_accept */
6427 udp_bind, /* sd_bind */
6428 udp_listen, /* sd_listen */
6429 udp_connect, /* sd_connect */
6430 udp_getpeername, /* sd_getpeername */
6431 udp_getsockname, /* sd_getsockname */
6432 udp_getsockopt, /* sd_getsockopt */
6433 udp_setsockopt, /* sd_setsockopt */
6434 udp_send, /* sd_send */
6435 NULL, /* sd_send_uio */
6436 NULL, /* sd_recv_uio */
6437 NULL, /* sd_poll */
6438 udp_shutdown, /* sd_shutdown */
6439 udp_clr_flowctrl, /* sd_setflowctrl */
6440 udp_ioctl, /* sd_ioctl */
6441 udp_close /* sd_close */
6442 };