Print this page
4596 Callers of ip_srcid_find_id() need to be more careful
| Split |
Close |
| Expand all |
| Collapse all |
--- old/usr/src/uts/common/inet/udp/udp.c
+++ new/usr/src/uts/common/inet/udp/udp.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
|
↓ open down ↓ |
13 lines elided |
↑ open up ↑ |
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21 /*
22 22 * Copyright (c) 1991, 2010, Oracle and/or its affiliates. All rights reserved.
23 23 * Copyright 2013 Nexenta Systems, Inc. All rights reserved.
24 + * Copyright 2014, OmniTI Computer Consulting, Inc. All rights reserved.
24 25 */
25 26 /* Copyright (c) 1990 Mentat Inc. */
26 27
27 28 #include <sys/types.h>
28 29 #include <sys/stream.h>
29 30 #include <sys/stropts.h>
30 31 #include <sys/strlog.h>
31 32 #include <sys/strsun.h>
32 33 #define _SUN_TPI_VERSION 2
33 34 #include <sys/tihdr.h>
34 35 #include <sys/timod.h>
35 36 #include <sys/ddi.h>
36 37 #include <sys/sunddi.h>
37 38 #include <sys/strsubr.h>
38 39 #include <sys/suntpi.h>
39 40 #include <sys/xti_inet.h>
40 41 #include <sys/kmem.h>
41 42 #include <sys/cred_impl.h>
42 43 #include <sys/policy.h>
43 44 #include <sys/priv.h>
44 45 #include <sys/ucred.h>
45 46 #include <sys/zone.h>
46 47
47 48 #include <sys/socket.h>
48 49 #include <sys/socketvar.h>
49 50 #include <sys/sockio.h>
50 51 #include <sys/vtrace.h>
51 52 #include <sys/sdt.h>
52 53 #include <sys/debug.h>
53 54 #include <sys/isa_defs.h>
54 55 #include <sys/random.h>
55 56 #include <netinet/in.h>
56 57 #include <netinet/ip6.h>
57 58 #include <netinet/icmp6.h>
58 59 #include <netinet/udp.h>
59 60
60 61 #include <inet/common.h>
61 62 #include <inet/ip.h>
62 63 #include <inet/ip_impl.h>
63 64 #include <inet/ipsec_impl.h>
64 65 #include <inet/ip6.h>
65 66 #include <inet/ip_ire.h>
66 67 #include <inet/ip_if.h>
67 68 #include <inet/ip_multi.h>
68 69 #include <inet/ip_ndp.h>
69 70 #include <inet/proto_set.h>
70 71 #include <inet/mib2.h>
71 72 #include <inet/optcom.h>
72 73 #include <inet/snmpcom.h>
73 74 #include <inet/kstatcom.h>
74 75 #include <inet/ipclassifier.h>
75 76 #include <sys/squeue_impl.h>
76 77 #include <inet/ipnet.h>
77 78 #include <sys/ethernet.h>
78 79
79 80 #include <sys/tsol/label.h>
80 81 #include <sys/tsol/tnet.h>
81 82 #include <rpc/pmap_prot.h>
82 83
83 84 #include <inet/udp_impl.h>
84 85
85 86 /*
86 87 * Synchronization notes:
87 88 *
88 89 * UDP is MT and uses the usual kernel synchronization primitives. There are 2
89 90 * locks, the fanout lock (uf_lock) and conn_lock. conn_lock
90 91 * protects the contents of the udp_t. uf_lock protects the address and the
91 92 * fanout information.
92 93 * The lock order is conn_lock -> uf_lock.
93 94 *
94 95 * The fanout lock uf_lock:
95 96 * When a UDP endpoint is bound to a local port, it is inserted into
96 97 * a bind hash list. The list consists of an array of udp_fanout_t buckets.
97 98 * The size of the array is controlled by the udp_bind_fanout_size variable.
98 99 * This variable can be changed in /etc/system if the default value is
99 100 * not large enough. Each bind hash bucket is protected by a per bucket
100 101 * lock. It protects the udp_bind_hash and udp_ptpbhn fields in the udp_t
101 102 * structure and a few other fields in the udp_t. A UDP endpoint is removed
102 103 * from the bind hash list only when it is being unbound or being closed.
103 104 * The per bucket lock also protects a UDP endpoint's state changes.
104 105 *
105 106 * Plumbing notes:
106 107 * UDP is always a device driver. For compatibility with mibopen() code
107 108 * it is possible to I_PUSH "udp", but that results in pushing a passthrough
108 109 * dummy module.
109 110 *
110 111 * The above implies that we don't support any intermediate module to
111 112 * reside in between /dev/ip and udp -- in fact, we never supported such
112 113 * scenario in the past as the inter-layer communication semantics have
113 114 * always been private.
114 115 */
115 116
116 117 /* For /etc/system control */
117 118 uint_t udp_bind_fanout_size = UDP_BIND_FANOUT_SIZE;
118 119
119 120 static void udp_addr_req(queue_t *q, mblk_t *mp);
120 121 static void udp_tpi_bind(queue_t *q, mblk_t *mp);
121 122 static void udp_bind_hash_insert(udp_fanout_t *uf, udp_t *udp);
122 123 static void udp_bind_hash_remove(udp_t *udp, boolean_t caller_holds_lock);
123 124 static int udp_build_hdr_template(conn_t *, const in6_addr_t *,
124 125 const in6_addr_t *, in_port_t, uint32_t);
125 126 static void udp_capability_req(queue_t *q, mblk_t *mp);
126 127 static int udp_tpi_close(queue_t *q, int flags);
127 128 static void udp_close_free(conn_t *);
128 129 static void udp_tpi_connect(queue_t *q, mblk_t *mp);
129 130 static void udp_tpi_disconnect(queue_t *q, mblk_t *mp);
130 131 static void udp_err_ack(queue_t *q, mblk_t *mp, t_scalar_t t_error,
131 132 int sys_error);
132 133 static void udp_err_ack_prim(queue_t *q, mblk_t *mp, t_scalar_t primitive,
133 134 t_scalar_t tlierr, int sys_error);
134 135 static int udp_extra_priv_ports_get(queue_t *q, mblk_t *mp, caddr_t cp,
135 136 cred_t *cr);
136 137 static int udp_extra_priv_ports_add(queue_t *q, mblk_t *mp,
137 138 char *value, caddr_t cp, cred_t *cr);
138 139 static int udp_extra_priv_ports_del(queue_t *q, mblk_t *mp,
139 140 char *value, caddr_t cp, cred_t *cr);
140 141 static void udp_icmp_input(void *, mblk_t *, void *, ip_recv_attr_t *);
141 142 static void udp_icmp_error_ipv6(conn_t *connp, mblk_t *mp,
142 143 ip_recv_attr_t *ira);
143 144 static void udp_info_req(queue_t *q, mblk_t *mp);
144 145 static void udp_input(void *, mblk_t *, void *, ip_recv_attr_t *);
145 146 static void udp_lrput(queue_t *, mblk_t *);
146 147 static void udp_lwput(queue_t *, mblk_t *);
147 148 static int udp_open(queue_t *q, dev_t *devp, int flag, int sflag,
148 149 cred_t *credp, boolean_t isv6);
149 150 static int udp_openv4(queue_t *q, dev_t *devp, int flag, int sflag,
150 151 cred_t *credp);
151 152 static int udp_openv6(queue_t *q, dev_t *devp, int flag, int sflag,
152 153 cred_t *credp);
153 154 static boolean_t udp_opt_allow_udr_set(t_scalar_t level, t_scalar_t name);
154 155 int udp_opt_set(conn_t *connp, uint_t optset_context,
155 156 int level, int name, uint_t inlen,
156 157 uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp,
157 158 void *thisdg_attrs, cred_t *cr);
158 159 int udp_opt_get(conn_t *connp, int level, int name,
159 160 uchar_t *ptr);
160 161 static int udp_output_connected(conn_t *connp, mblk_t *mp, cred_t *cr,
161 162 pid_t pid);
162 163 static int udp_output_lastdst(conn_t *connp, mblk_t *mp, cred_t *cr,
163 164 pid_t pid, ip_xmit_attr_t *ixa);
164 165 static int udp_output_newdst(conn_t *connp, mblk_t *data_mp, sin_t *sin,
165 166 sin6_t *sin6, ushort_t ipversion, cred_t *cr, pid_t,
166 167 ip_xmit_attr_t *ixa);
167 168 static mblk_t *udp_prepend_hdr(conn_t *, ip_xmit_attr_t *, const ip_pkt_t *,
168 169 const in6_addr_t *, const in6_addr_t *, in_port_t, uint32_t, mblk_t *,
169 170 int *);
170 171 static mblk_t *udp_prepend_header_template(conn_t *, ip_xmit_attr_t *,
171 172 mblk_t *, const in6_addr_t *, in_port_t, uint32_t, int *);
172 173 static void udp_ud_err(queue_t *q, mblk_t *mp, t_scalar_t err);
173 174 static void udp_ud_err_connected(conn_t *, t_scalar_t);
174 175 static void udp_tpi_unbind(queue_t *q, mblk_t *mp);
175 176 static in_port_t udp_update_next_port(udp_t *udp, in_port_t port,
176 177 boolean_t random);
177 178 static void udp_wput_other(queue_t *q, mblk_t *mp);
178 179 static void udp_wput_iocdata(queue_t *q, mblk_t *mp);
179 180 static void udp_wput_fallback(queue_t *q, mblk_t *mp);
180 181 static size_t udp_set_rcv_hiwat(udp_t *udp, size_t size);
181 182
182 183 static void *udp_stack_init(netstackid_t stackid, netstack_t *ns);
183 184 static void udp_stack_fini(netstackid_t stackid, void *arg);
184 185
185 186 /* Common routines for TPI and socket module */
186 187 static void udp_ulp_recv(conn_t *, mblk_t *, uint_t, ip_recv_attr_t *);
187 188
188 189 /* Common routine for TPI and socket module */
189 190 static conn_t *udp_do_open(cred_t *, boolean_t, int, int *);
190 191 static void udp_do_close(conn_t *);
191 192 static int udp_do_bind(conn_t *, struct sockaddr *, socklen_t, cred_t *,
192 193 boolean_t);
193 194 static int udp_do_unbind(conn_t *);
194 195
195 196 int udp_getsockname(sock_lower_handle_t,
196 197 struct sockaddr *, socklen_t *, cred_t *);
197 198 int udp_getpeername(sock_lower_handle_t,
198 199 struct sockaddr *, socklen_t *, cred_t *);
199 200 static int udp_do_connect(conn_t *, const struct sockaddr *, socklen_t,
200 201 cred_t *, pid_t);
201 202
202 203 #pragma inline(udp_output_connected, udp_output_newdst, udp_output_lastdst)
203 204
204 205 /*
205 206 * Checks if the given destination addr/port is allowed out.
206 207 * If allowed, registers the (dest_addr/port, node_ID) mapping at Cluster.
207 208 * Called for each connect() and for sendto()/sendmsg() to a different
208 209 * destination.
209 210 * For connect(), called in udp_connect().
210 211 * For sendto()/sendmsg(), called in udp_output_newdst().
211 212 *
212 213 * This macro assumes that the cl_inet_connect2 hook is not NULL.
213 214 * Please check this before calling this macro.
214 215 *
215 216 * void
216 217 * CL_INET_UDP_CONNECT(conn_t cp, udp_t *udp, boolean_t is_outgoing,
217 218 * in6_addr_t *faddrp, in_port_t (or uint16_t) fport, int err);
218 219 */
219 220 #define CL_INET_UDP_CONNECT(cp, is_outgoing, faddrp, fport, err) { \
220 221 (err) = 0; \
221 222 /* \
222 223 * Running in cluster mode - check and register active \
223 224 * "connection" information \
224 225 */ \
225 226 if ((cp)->conn_ipversion == IPV4_VERSION) \
226 227 (err) = (*cl_inet_connect2)( \
227 228 (cp)->conn_netstack->netstack_stackid, \
228 229 IPPROTO_UDP, is_outgoing, AF_INET, \
229 230 (uint8_t *)&((cp)->conn_laddr_v4), \
230 231 (cp)->conn_lport, \
231 232 (uint8_t *)&(V4_PART_OF_V6(*faddrp)), \
232 233 (in_port_t)(fport), NULL); \
233 234 else \
234 235 (err) = (*cl_inet_connect2)( \
235 236 (cp)->conn_netstack->netstack_stackid, \
236 237 IPPROTO_UDP, is_outgoing, AF_INET6, \
237 238 (uint8_t *)&((cp)->conn_laddr_v6), \
238 239 (cp)->conn_lport, \
239 240 (uint8_t *)(faddrp), (in_port_t)(fport), NULL); \
240 241 }
241 242
242 243 static struct module_info udp_mod_info = {
243 244 UDP_MOD_ID, UDP_MOD_NAME, 1, INFPSZ, UDP_RECV_HIWATER, UDP_RECV_LOWATER
244 245 };
245 246
246 247 /*
247 248 * Entry points for UDP as a device.
248 249 * We have separate open functions for the /dev/udp and /dev/udp6 devices.
249 250 */
250 251 static struct qinit udp_rinitv4 = {
251 252 NULL, NULL, udp_openv4, udp_tpi_close, NULL, &udp_mod_info, NULL
252 253 };
253 254
254 255 static struct qinit udp_rinitv6 = {
255 256 NULL, NULL, udp_openv6, udp_tpi_close, NULL, &udp_mod_info, NULL
256 257 };
257 258
258 259 static struct qinit udp_winit = {
259 260 (pfi_t)udp_wput, (pfi_t)ip_wsrv, NULL, NULL, NULL, &udp_mod_info
260 261 };
261 262
262 263 /* UDP entry point during fallback */
263 264 struct qinit udp_fallback_sock_winit = {
264 265 (pfi_t)udp_wput_fallback, NULL, NULL, NULL, NULL, &udp_mod_info
265 266 };
266 267
267 268 /*
268 269 * UDP needs to handle I_LINK and I_PLINK since ifconfig
269 270 * likes to use it as a place to hang the various streams.
270 271 */
271 272 static struct qinit udp_lrinit = {
272 273 (pfi_t)udp_lrput, NULL, udp_openv4, udp_tpi_close, NULL, &udp_mod_info
273 274 };
274 275
275 276 static struct qinit udp_lwinit = {
276 277 (pfi_t)udp_lwput, NULL, udp_openv4, udp_tpi_close, NULL, &udp_mod_info
277 278 };
278 279
279 280 /* For AF_INET aka /dev/udp */
280 281 struct streamtab udpinfov4 = {
281 282 &udp_rinitv4, &udp_winit, &udp_lrinit, &udp_lwinit
282 283 };
283 284
284 285 /* For AF_INET6 aka /dev/udp6 */
285 286 struct streamtab udpinfov6 = {
286 287 &udp_rinitv6, &udp_winit, &udp_lrinit, &udp_lwinit
287 288 };
288 289
289 290 #define UDP_MAXPACKET_IPV4 (IP_MAXPACKET - UDPH_SIZE - IP_SIMPLE_HDR_LENGTH)
290 291
291 292 /* Default structure copied into T_INFO_ACK messages */
292 293 static struct T_info_ack udp_g_t_info_ack_ipv4 = {
293 294 T_INFO_ACK,
294 295 UDP_MAXPACKET_IPV4, /* TSDU_size. Excl. headers */
295 296 T_INVALID, /* ETSU_size. udp does not support expedited data. */
296 297 T_INVALID, /* CDATA_size. udp does not support connect data. */
297 298 T_INVALID, /* DDATA_size. udp does not support disconnect data. */
298 299 sizeof (sin_t), /* ADDR_size. */
299 300 0, /* OPT_size - not initialized here */
300 301 UDP_MAXPACKET_IPV4, /* TIDU_size. Excl. headers */
301 302 T_CLTS, /* SERV_type. udp supports connection-less. */
302 303 TS_UNBND, /* CURRENT_state. This is set from udp_state. */
303 304 (XPG4_1|SENDZERO) /* PROVIDER_flag */
304 305 };
305 306
306 307 #define UDP_MAXPACKET_IPV6 (IP_MAXPACKET - UDPH_SIZE - IPV6_HDR_LEN)
307 308
308 309 static struct T_info_ack udp_g_t_info_ack_ipv6 = {
309 310 T_INFO_ACK,
310 311 UDP_MAXPACKET_IPV6, /* TSDU_size. Excl. headers */
311 312 T_INVALID, /* ETSU_size. udp does not support expedited data. */
312 313 T_INVALID, /* CDATA_size. udp does not support connect data. */
313 314 T_INVALID, /* DDATA_size. udp does not support disconnect data. */
314 315 sizeof (sin6_t), /* ADDR_size. */
315 316 0, /* OPT_size - not initialized here */
316 317 UDP_MAXPACKET_IPV6, /* TIDU_size. Excl. headers */
317 318 T_CLTS, /* SERV_type. udp supports connection-less. */
318 319 TS_UNBND, /* CURRENT_state. This is set from udp_state. */
319 320 (XPG4_1|SENDZERO) /* PROVIDER_flag */
320 321 };
321 322
322 323 /*
323 324 * UDP tunables related declarations. Definitions are in udp_tunables.c
324 325 */
325 326 extern mod_prop_info_t udp_propinfo_tbl[];
326 327 extern int udp_propinfo_count;
327 328
328 329 /* Setable in /etc/system */
329 330 /* If set to 0, pick ephemeral port sequentially; otherwise randomly. */
330 331 uint32_t udp_random_anon_port = 1;
331 332
332 333 /*
333 334 * Hook functions to enable cluster networking.
334 335 * On non-clustered systems these vectors must always be NULL
335 336 */
336 337
337 338 void (*cl_inet_bind)(netstackid_t stack_id, uchar_t protocol,
338 339 sa_family_t addr_family, uint8_t *laddrp, in_port_t lport,
339 340 void *args) = NULL;
340 341 void (*cl_inet_unbind)(netstackid_t stack_id, uint8_t protocol,
341 342 sa_family_t addr_family, uint8_t *laddrp, in_port_t lport,
342 343 void *args) = NULL;
343 344
344 345 typedef union T_primitives *t_primp_t;
345 346
346 347 /*
347 348 * Return the next anonymous port in the privileged port range for
348 349 * bind checking.
349 350 *
350 351 * Trusted Extension (TX) notes: TX allows administrator to mark or
351 352 * reserve ports as Multilevel ports (MLP). MLP has special function
352 353 * on TX systems. Once a port is made MLP, it's not available as
353 354 * ordinary port. This creates "holes" in the port name space. It
354 355 * may be necessary to skip the "holes" find a suitable anon port.
355 356 */
356 357 static in_port_t
357 358 udp_get_next_priv_port(udp_t *udp)
358 359 {
359 360 static in_port_t next_priv_port = IPPORT_RESERVED - 1;
360 361 in_port_t nextport;
361 362 boolean_t restart = B_FALSE;
362 363 udp_stack_t *us = udp->udp_us;
363 364
364 365 retry:
365 366 if (next_priv_port < us->us_min_anonpriv_port ||
366 367 next_priv_port >= IPPORT_RESERVED) {
367 368 next_priv_port = IPPORT_RESERVED - 1;
368 369 if (restart)
369 370 return (0);
370 371 restart = B_TRUE;
371 372 }
372 373
373 374 if (is_system_labeled() &&
374 375 (nextport = tsol_next_port(crgetzone(udp->udp_connp->conn_cred),
375 376 next_priv_port, IPPROTO_UDP, B_FALSE)) != 0) {
376 377 next_priv_port = nextport;
377 378 goto retry;
378 379 }
379 380
380 381 return (next_priv_port--);
381 382 }
382 383
383 384 /*
384 385 * Hash list removal routine for udp_t structures.
385 386 */
386 387 static void
387 388 udp_bind_hash_remove(udp_t *udp, boolean_t caller_holds_lock)
388 389 {
389 390 udp_t *udpnext;
390 391 kmutex_t *lockp;
391 392 udp_stack_t *us = udp->udp_us;
392 393 conn_t *connp = udp->udp_connp;
393 394
394 395 if (udp->udp_ptpbhn == NULL)
395 396 return;
396 397
397 398 /*
398 399 * Extract the lock pointer in case there are concurrent
399 400 * hash_remove's for this instance.
400 401 */
401 402 ASSERT(connp->conn_lport != 0);
402 403 if (!caller_holds_lock) {
403 404 lockp = &us->us_bind_fanout[UDP_BIND_HASH(connp->conn_lport,
404 405 us->us_bind_fanout_size)].uf_lock;
405 406 ASSERT(lockp != NULL);
406 407 mutex_enter(lockp);
407 408 }
408 409 if (udp->udp_ptpbhn != NULL) {
409 410 udpnext = udp->udp_bind_hash;
410 411 if (udpnext != NULL) {
411 412 udpnext->udp_ptpbhn = udp->udp_ptpbhn;
412 413 udp->udp_bind_hash = NULL;
413 414 }
414 415 *udp->udp_ptpbhn = udpnext;
415 416 udp->udp_ptpbhn = NULL;
416 417 }
417 418 if (!caller_holds_lock) {
418 419 mutex_exit(lockp);
419 420 }
420 421 }
421 422
422 423 static void
423 424 udp_bind_hash_insert(udp_fanout_t *uf, udp_t *udp)
424 425 {
425 426 conn_t *connp = udp->udp_connp;
426 427 udp_t **udpp;
427 428 udp_t *udpnext;
428 429 conn_t *connext;
429 430
430 431 ASSERT(MUTEX_HELD(&uf->uf_lock));
431 432 ASSERT(udp->udp_ptpbhn == NULL);
432 433 udpp = &uf->uf_udp;
433 434 udpnext = udpp[0];
434 435 if (udpnext != NULL) {
435 436 /*
436 437 * If the new udp bound to the INADDR_ANY address
437 438 * and the first one in the list is not bound to
438 439 * INADDR_ANY we skip all entries until we find the
439 440 * first one bound to INADDR_ANY.
440 441 * This makes sure that applications binding to a
441 442 * specific address get preference over those binding to
442 443 * INADDR_ANY.
443 444 */
444 445 connext = udpnext->udp_connp;
445 446 if (V6_OR_V4_INADDR_ANY(connp->conn_bound_addr_v6) &&
446 447 !V6_OR_V4_INADDR_ANY(connext->conn_bound_addr_v6)) {
447 448 while ((udpnext = udpp[0]) != NULL &&
448 449 !V6_OR_V4_INADDR_ANY(connext->conn_bound_addr_v6)) {
449 450 udpp = &(udpnext->udp_bind_hash);
450 451 }
451 452 if (udpnext != NULL)
452 453 udpnext->udp_ptpbhn = &udp->udp_bind_hash;
453 454 } else {
454 455 udpnext->udp_ptpbhn = &udp->udp_bind_hash;
455 456 }
456 457 }
457 458 udp->udp_bind_hash = udpnext;
458 459 udp->udp_ptpbhn = udpp;
459 460 udpp[0] = udp;
460 461 }
461 462
462 463 /*
463 464 * This routine is called to handle each O_T_BIND_REQ/T_BIND_REQ message
464 465 * passed to udp_wput.
465 466 * It associates a port number and local address with the stream.
466 467 * It calls IP to verify the local IP address, and calls IP to insert
467 468 * the conn_t in the fanout table.
468 469 * If everything is ok it then sends the T_BIND_ACK back up.
469 470 *
470 471 * Note that UDP over IPv4 and IPv6 sockets can use the same port number
471 472 * without setting SO_REUSEADDR. This is needed so that they
472 473 * can be viewed as two independent transport protocols.
473 474 * However, anonymouns ports are allocated from the same range to avoid
474 475 * duplicating the us->us_next_port_to_try.
475 476 */
476 477 static void
477 478 udp_tpi_bind(queue_t *q, mblk_t *mp)
478 479 {
479 480 sin_t *sin;
480 481 sin6_t *sin6;
481 482 mblk_t *mp1;
482 483 struct T_bind_req *tbr;
483 484 conn_t *connp;
484 485 udp_t *udp;
485 486 int error;
486 487 struct sockaddr *sa;
487 488 cred_t *cr;
488 489
489 490 /*
490 491 * All Solaris components should pass a db_credp
491 492 * for this TPI message, hence we ASSERT.
492 493 * But in case there is some other M_PROTO that looks
493 494 * like a TPI message sent by some other kernel
494 495 * component, we check and return an error.
495 496 */
496 497 cr = msg_getcred(mp, NULL);
497 498 ASSERT(cr != NULL);
498 499 if (cr == NULL) {
499 500 udp_err_ack(q, mp, TSYSERR, EINVAL);
500 501 return;
501 502 }
502 503
503 504 connp = Q_TO_CONN(q);
504 505 udp = connp->conn_udp;
505 506 if ((mp->b_wptr - mp->b_rptr) < sizeof (*tbr)) {
506 507 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE,
507 508 "udp_bind: bad req, len %u",
508 509 (uint_t)(mp->b_wptr - mp->b_rptr));
509 510 udp_err_ack(q, mp, TPROTO, 0);
510 511 return;
511 512 }
512 513 if (udp->udp_state != TS_UNBND) {
513 514 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE,
514 515 "udp_bind: bad state, %u", udp->udp_state);
515 516 udp_err_ack(q, mp, TOUTSTATE, 0);
516 517 return;
517 518 }
518 519 /*
519 520 * Reallocate the message to make sure we have enough room for an
520 521 * address.
521 522 */
522 523 mp1 = reallocb(mp, sizeof (struct T_bind_ack) + sizeof (sin6_t), 1);
523 524 if (mp1 == NULL) {
524 525 udp_err_ack(q, mp, TSYSERR, ENOMEM);
525 526 return;
526 527 }
527 528
528 529 mp = mp1;
529 530
530 531 /* Reset the message type in preparation for shipping it back. */
531 532 DB_TYPE(mp) = M_PCPROTO;
532 533
533 534 tbr = (struct T_bind_req *)mp->b_rptr;
534 535 switch (tbr->ADDR_length) {
535 536 case 0: /* Request for a generic port */
536 537 tbr->ADDR_offset = sizeof (struct T_bind_req);
537 538 if (connp->conn_family == AF_INET) {
538 539 tbr->ADDR_length = sizeof (sin_t);
539 540 sin = (sin_t *)&tbr[1];
540 541 *sin = sin_null;
541 542 sin->sin_family = AF_INET;
542 543 mp->b_wptr = (uchar_t *)&sin[1];
543 544 sa = (struct sockaddr *)sin;
544 545 } else {
545 546 ASSERT(connp->conn_family == AF_INET6);
546 547 tbr->ADDR_length = sizeof (sin6_t);
547 548 sin6 = (sin6_t *)&tbr[1];
548 549 *sin6 = sin6_null;
549 550 sin6->sin6_family = AF_INET6;
550 551 mp->b_wptr = (uchar_t *)&sin6[1];
551 552 sa = (struct sockaddr *)sin6;
552 553 }
553 554 break;
554 555
555 556 case sizeof (sin_t): /* Complete IPv4 address */
556 557 sa = (struct sockaddr *)mi_offset_param(mp, tbr->ADDR_offset,
557 558 sizeof (sin_t));
558 559 if (sa == NULL || !OK_32PTR((char *)sa)) {
559 560 udp_err_ack(q, mp, TSYSERR, EINVAL);
560 561 return;
561 562 }
562 563 if (connp->conn_family != AF_INET ||
563 564 sa->sa_family != AF_INET) {
564 565 udp_err_ack(q, mp, TSYSERR, EAFNOSUPPORT);
565 566 return;
566 567 }
567 568 break;
568 569
569 570 case sizeof (sin6_t): /* complete IPv6 address */
570 571 sa = (struct sockaddr *)mi_offset_param(mp, tbr->ADDR_offset,
571 572 sizeof (sin6_t));
572 573 if (sa == NULL || !OK_32PTR((char *)sa)) {
573 574 udp_err_ack(q, mp, TSYSERR, EINVAL);
574 575 return;
575 576 }
576 577 if (connp->conn_family != AF_INET6 ||
577 578 sa->sa_family != AF_INET6) {
578 579 udp_err_ack(q, mp, TSYSERR, EAFNOSUPPORT);
579 580 return;
580 581 }
581 582 break;
582 583
583 584 default: /* Invalid request */
584 585 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE,
585 586 "udp_bind: bad ADDR_length length %u", tbr->ADDR_length);
586 587 udp_err_ack(q, mp, TBADADDR, 0);
587 588 return;
588 589 }
589 590
590 591 error = udp_do_bind(connp, sa, tbr->ADDR_length, cr,
591 592 tbr->PRIM_type != O_T_BIND_REQ);
592 593
593 594 if (error != 0) {
594 595 if (error > 0) {
595 596 udp_err_ack(q, mp, TSYSERR, error);
596 597 } else {
597 598 udp_err_ack(q, mp, -error, 0);
598 599 }
599 600 } else {
600 601 tbr->PRIM_type = T_BIND_ACK;
601 602 qreply(q, mp);
602 603 }
603 604 }
604 605
605 606 /*
606 607 * This routine handles each T_CONN_REQ message passed to udp. It
607 608 * associates a default destination address with the stream.
608 609 *
609 610 * After various error checks are completed, udp_connect() lays
610 611 * the target address and port into the composite header template.
611 612 * Then we ask IP for information, including a source address if we didn't
612 613 * already have one. Finally we send up the T_OK_ACK reply message.
613 614 */
614 615 static void
615 616 udp_tpi_connect(queue_t *q, mblk_t *mp)
616 617 {
617 618 conn_t *connp = Q_TO_CONN(q);
618 619 int error;
619 620 socklen_t len;
620 621 struct sockaddr *sa;
621 622 struct T_conn_req *tcr;
622 623 cred_t *cr;
623 624 pid_t pid;
624 625 /*
625 626 * All Solaris components should pass a db_credp
626 627 * for this TPI message, hence we ASSERT.
627 628 * But in case there is some other M_PROTO that looks
628 629 * like a TPI message sent by some other kernel
629 630 * component, we check and return an error.
630 631 */
631 632 cr = msg_getcred(mp, &pid);
632 633 ASSERT(cr != NULL);
633 634 if (cr == NULL) {
634 635 udp_err_ack(q, mp, TSYSERR, EINVAL);
635 636 return;
636 637 }
637 638
638 639 tcr = (struct T_conn_req *)mp->b_rptr;
639 640
640 641 /* A bit of sanity checking */
641 642 if ((mp->b_wptr - mp->b_rptr) < sizeof (struct T_conn_req)) {
642 643 udp_err_ack(q, mp, TPROTO, 0);
643 644 return;
644 645 }
645 646
646 647 if (tcr->OPT_length != 0) {
647 648 udp_err_ack(q, mp, TBADOPT, 0);
648 649 return;
649 650 }
650 651
651 652 /*
652 653 * Determine packet type based on type of address passed in
653 654 * the request should contain an IPv4 or IPv6 address.
654 655 * Make sure that address family matches the type of
655 656 * family of the address passed down.
656 657 */
657 658 len = tcr->DEST_length;
658 659 switch (tcr->DEST_length) {
659 660 default:
660 661 udp_err_ack(q, mp, TBADADDR, 0);
661 662 return;
662 663
663 664 case sizeof (sin_t):
664 665 sa = (struct sockaddr *)mi_offset_param(mp, tcr->DEST_offset,
665 666 sizeof (sin_t));
666 667 break;
667 668
668 669 case sizeof (sin6_t):
669 670 sa = (struct sockaddr *)mi_offset_param(mp, tcr->DEST_offset,
670 671 sizeof (sin6_t));
671 672 break;
672 673 }
673 674
674 675 error = proto_verify_ip_addr(connp->conn_family, sa, len);
675 676 if (error != 0) {
676 677 udp_err_ack(q, mp, TSYSERR, error);
677 678 return;
678 679 }
679 680
680 681 error = udp_do_connect(connp, sa, len, cr, pid);
681 682 if (error != 0) {
682 683 if (error < 0)
683 684 udp_err_ack(q, mp, -error, 0);
684 685 else
685 686 udp_err_ack(q, mp, TSYSERR, error);
686 687 } else {
687 688 mblk_t *mp1;
688 689 /*
689 690 * We have to send a connection confirmation to
690 691 * keep TLI happy.
691 692 */
692 693 if (connp->conn_family == AF_INET) {
693 694 mp1 = mi_tpi_conn_con(NULL, (char *)sa,
694 695 sizeof (sin_t), NULL, 0);
695 696 } else {
696 697 mp1 = mi_tpi_conn_con(NULL, (char *)sa,
697 698 sizeof (sin6_t), NULL, 0);
698 699 }
699 700 if (mp1 == NULL) {
700 701 udp_err_ack(q, mp, TSYSERR, ENOMEM);
701 702 return;
702 703 }
703 704
704 705 /*
705 706 * Send ok_ack for T_CONN_REQ
706 707 */
707 708 mp = mi_tpi_ok_ack_alloc(mp);
708 709 if (mp == NULL) {
709 710 /* Unable to reuse the T_CONN_REQ for the ack. */
710 711 udp_err_ack_prim(q, mp1, T_CONN_REQ, TSYSERR, ENOMEM);
711 712 return;
712 713 }
713 714
714 715 putnext(connp->conn_rq, mp);
715 716 putnext(connp->conn_rq, mp1);
716 717 }
717 718 }
718 719
719 720 static int
720 721 udp_tpi_close(queue_t *q, int flags)
721 722 {
722 723 conn_t *connp;
723 724
724 725 if (flags & SO_FALLBACK) {
725 726 /*
726 727 * stream is being closed while in fallback
727 728 * simply free the resources that were allocated
728 729 */
729 730 inet_minor_free(WR(q)->q_ptr, (dev_t)(RD(q)->q_ptr));
730 731 qprocsoff(q);
731 732 goto done;
732 733 }
733 734
734 735 connp = Q_TO_CONN(q);
735 736 udp_do_close(connp);
736 737 done:
737 738 q->q_ptr = WR(q)->q_ptr = NULL;
738 739 return (0);
739 740 }
740 741
741 742 static void
742 743 udp_close_free(conn_t *connp)
743 744 {
744 745 udp_t *udp = connp->conn_udp;
745 746
746 747 /* If there are any options associated with the stream, free them. */
747 748 if (udp->udp_recv_ipp.ipp_fields != 0)
748 749 ip_pkt_free(&udp->udp_recv_ipp);
749 750
750 751 /*
751 752 * Clear any fields which the kmem_cache constructor clears.
752 753 * Only udp_connp needs to be preserved.
753 754 * TBD: We should make this more efficient to avoid clearing
754 755 * everything.
755 756 */
756 757 ASSERT(udp->udp_connp == connp);
757 758 bzero(udp, sizeof (udp_t));
758 759 udp->udp_connp = connp;
759 760 }
760 761
761 762 static int
762 763 udp_do_disconnect(conn_t *connp)
763 764 {
764 765 udp_t *udp;
765 766 udp_fanout_t *udpf;
766 767 udp_stack_t *us;
767 768 int error;
768 769
769 770 udp = connp->conn_udp;
770 771 us = udp->udp_us;
771 772 mutex_enter(&connp->conn_lock);
772 773 if (udp->udp_state != TS_DATA_XFER) {
773 774 mutex_exit(&connp->conn_lock);
774 775 return (-TOUTSTATE);
775 776 }
776 777 udpf = &us->us_bind_fanout[UDP_BIND_HASH(connp->conn_lport,
777 778 us->us_bind_fanout_size)];
778 779 mutex_enter(&udpf->uf_lock);
779 780 if (connp->conn_mcbc_bind)
780 781 connp->conn_saddr_v6 = ipv6_all_zeros;
781 782 else
782 783 connp->conn_saddr_v6 = connp->conn_bound_addr_v6;
783 784 connp->conn_laddr_v6 = connp->conn_bound_addr_v6;
784 785 connp->conn_faddr_v6 = ipv6_all_zeros;
785 786 connp->conn_fport = 0;
786 787 udp->udp_state = TS_IDLE;
787 788 mutex_exit(&udpf->uf_lock);
788 789
789 790 /* Remove any remnants of mapped address binding */
790 791 if (connp->conn_family == AF_INET6)
791 792 connp->conn_ipversion = IPV6_VERSION;
792 793
793 794 connp->conn_v6lastdst = ipv6_all_zeros;
794 795 error = udp_build_hdr_template(connp, &connp->conn_saddr_v6,
795 796 &connp->conn_faddr_v6, connp->conn_fport, connp->conn_flowinfo);
796 797 mutex_exit(&connp->conn_lock);
797 798 if (error != 0)
798 799 return (error);
799 800
800 801 /*
801 802 * Tell IP to remove the full binding and revert
802 803 * to the local address binding.
803 804 */
804 805 return (ip_laddr_fanout_insert(connp));
805 806 }
806 807
807 808 static void
808 809 udp_tpi_disconnect(queue_t *q, mblk_t *mp)
809 810 {
810 811 conn_t *connp = Q_TO_CONN(q);
811 812 int error;
812 813
813 814 /*
814 815 * Allocate the largest primitive we need to send back
815 816 * T_error_ack is > than T_ok_ack
816 817 */
817 818 mp = reallocb(mp, sizeof (struct T_error_ack), 1);
818 819 if (mp == NULL) {
819 820 /* Unable to reuse the T_DISCON_REQ for the ack. */
820 821 udp_err_ack_prim(q, mp, T_DISCON_REQ, TSYSERR, ENOMEM);
821 822 return;
822 823 }
823 824
824 825 error = udp_do_disconnect(connp);
825 826
826 827 if (error != 0) {
827 828 if (error < 0) {
828 829 udp_err_ack(q, mp, -error, 0);
829 830 } else {
830 831 udp_err_ack(q, mp, TSYSERR, error);
831 832 }
832 833 } else {
833 834 mp = mi_tpi_ok_ack_alloc(mp);
834 835 ASSERT(mp != NULL);
835 836 qreply(q, mp);
836 837 }
837 838 }
838 839
839 840 int
840 841 udp_disconnect(conn_t *connp)
841 842 {
842 843 int error;
843 844
844 845 connp->conn_dgram_errind = B_FALSE;
845 846 error = udp_do_disconnect(connp);
846 847 if (error < 0)
847 848 error = proto_tlitosyserr(-error);
848 849
849 850 return (error);
850 851 }
851 852
852 853 /* This routine creates a T_ERROR_ACK message and passes it upstream. */
853 854 static void
854 855 udp_err_ack(queue_t *q, mblk_t *mp, t_scalar_t t_error, int sys_error)
855 856 {
856 857 if ((mp = mi_tpi_err_ack_alloc(mp, t_error, sys_error)) != NULL)
857 858 qreply(q, mp);
858 859 }
859 860
860 861 /* Shorthand to generate and send TPI error acks to our client */
861 862 static void
862 863 udp_err_ack_prim(queue_t *q, mblk_t *mp, t_scalar_t primitive,
863 864 t_scalar_t t_error, int sys_error)
864 865 {
865 866 struct T_error_ack *teackp;
866 867
867 868 if ((mp = tpi_ack_alloc(mp, sizeof (struct T_error_ack),
868 869 M_PCPROTO, T_ERROR_ACK)) != NULL) {
869 870 teackp = (struct T_error_ack *)mp->b_rptr;
870 871 teackp->ERROR_prim = primitive;
871 872 teackp->TLI_error = t_error;
872 873 teackp->UNIX_error = sys_error;
873 874 qreply(q, mp);
874 875 }
875 876 }
876 877
877 878 /* At minimum we need 4 bytes of UDP header */
878 879 #define ICMP_MIN_UDP_HDR 4
879 880
880 881 /*
881 882 * udp_icmp_input is called as conn_recvicmp to process ICMP messages.
882 883 * Generates the appropriate T_UDERROR_IND for permanent (non-transient) errors.
883 884 * Assumes that IP has pulled up everything up to and including the ICMP header.
884 885 */
885 886 /* ARGSUSED2 */
886 887 static void
887 888 udp_icmp_input(void *arg1, mblk_t *mp, void *arg2, ip_recv_attr_t *ira)
888 889 {
889 890 conn_t *connp = (conn_t *)arg1;
890 891 icmph_t *icmph;
891 892 ipha_t *ipha;
892 893 int iph_hdr_length;
893 894 udpha_t *udpha;
894 895 sin_t sin;
895 896 sin6_t sin6;
896 897 mblk_t *mp1;
897 898 int error = 0;
898 899 udp_t *udp = connp->conn_udp;
899 900
900 901 ipha = (ipha_t *)mp->b_rptr;
901 902
902 903 ASSERT(OK_32PTR(mp->b_rptr));
903 904
904 905 if (IPH_HDR_VERSION(ipha) != IPV4_VERSION) {
905 906 ASSERT(IPH_HDR_VERSION(ipha) == IPV6_VERSION);
906 907 udp_icmp_error_ipv6(connp, mp, ira);
907 908 return;
908 909 }
909 910 ASSERT(IPH_HDR_VERSION(ipha) == IPV4_VERSION);
910 911
911 912 /* Skip past the outer IP and ICMP headers */
912 913 ASSERT(IPH_HDR_LENGTH(ipha) == ira->ira_ip_hdr_length);
913 914 iph_hdr_length = ira->ira_ip_hdr_length;
914 915 icmph = (icmph_t *)&mp->b_rptr[iph_hdr_length];
915 916 ipha = (ipha_t *)&icmph[1]; /* Inner IP header */
916 917
917 918 /* Skip past the inner IP and find the ULP header */
918 919 iph_hdr_length = IPH_HDR_LENGTH(ipha);
919 920 udpha = (udpha_t *)((char *)ipha + iph_hdr_length);
920 921
921 922 switch (icmph->icmph_type) {
922 923 case ICMP_DEST_UNREACHABLE:
923 924 switch (icmph->icmph_code) {
924 925 case ICMP_FRAGMENTATION_NEEDED: {
925 926 ipha_t *ipha;
926 927 ip_xmit_attr_t *ixa;
927 928 /*
928 929 * IP has already adjusted the path MTU.
929 930 * But we need to adjust DF for IPv4.
930 931 */
931 932 if (connp->conn_ipversion != IPV4_VERSION)
932 933 break;
933 934
934 935 ixa = conn_get_ixa(connp, B_FALSE);
935 936 if (ixa == NULL || ixa->ixa_ire == NULL) {
936 937 /*
937 938 * Some other thread holds conn_ixa. We will
938 939 * redo this on the next ICMP too big.
939 940 */
940 941 if (ixa != NULL)
941 942 ixa_refrele(ixa);
942 943 break;
943 944 }
944 945 (void) ip_get_pmtu(ixa);
945 946
946 947 mutex_enter(&connp->conn_lock);
947 948 ipha = (ipha_t *)connp->conn_ht_iphc;
948 949 if (ixa->ixa_flags & IXAF_PMTU_IPV4_DF) {
949 950 ipha->ipha_fragment_offset_and_flags |=
950 951 IPH_DF_HTONS;
951 952 } else {
952 953 ipha->ipha_fragment_offset_and_flags &=
953 954 ~IPH_DF_HTONS;
954 955 }
955 956 mutex_exit(&connp->conn_lock);
956 957 ixa_refrele(ixa);
957 958 break;
958 959 }
959 960 case ICMP_PORT_UNREACHABLE:
960 961 case ICMP_PROTOCOL_UNREACHABLE:
961 962 error = ECONNREFUSED;
962 963 break;
963 964 default:
964 965 /* Transient errors */
965 966 break;
966 967 }
967 968 break;
968 969 default:
969 970 /* Transient errors */
970 971 break;
971 972 }
972 973 if (error == 0) {
973 974 freemsg(mp);
974 975 return;
975 976 }
976 977
977 978 /*
978 979 * Deliver T_UDERROR_IND when the application has asked for it.
979 980 * The socket layer enables this automatically when connected.
980 981 */
981 982 if (!connp->conn_dgram_errind) {
982 983 freemsg(mp);
983 984 return;
984 985 }
985 986
986 987 switch (connp->conn_family) {
987 988 case AF_INET:
988 989 sin = sin_null;
989 990 sin.sin_family = AF_INET;
990 991 sin.sin_addr.s_addr = ipha->ipha_dst;
991 992 sin.sin_port = udpha->uha_dst_port;
992 993 if (IPCL_IS_NONSTR(connp)) {
993 994 mutex_enter(&connp->conn_lock);
994 995 if (udp->udp_state == TS_DATA_XFER) {
995 996 if (sin.sin_port == connp->conn_fport &&
996 997 sin.sin_addr.s_addr ==
997 998 connp->conn_faddr_v4) {
998 999 mutex_exit(&connp->conn_lock);
999 1000 (*connp->conn_upcalls->su_set_error)
1000 1001 (connp->conn_upper_handle, error);
1001 1002 goto done;
1002 1003 }
1003 1004 } else {
1004 1005 udp->udp_delayed_error = error;
1005 1006 *((sin_t *)&udp->udp_delayed_addr) = sin;
1006 1007 }
1007 1008 mutex_exit(&connp->conn_lock);
1008 1009 } else {
1009 1010 mp1 = mi_tpi_uderror_ind((char *)&sin, sizeof (sin_t),
1010 1011 NULL, 0, error);
1011 1012 if (mp1 != NULL)
1012 1013 putnext(connp->conn_rq, mp1);
1013 1014 }
1014 1015 break;
1015 1016 case AF_INET6:
1016 1017 sin6 = sin6_null;
1017 1018 sin6.sin6_family = AF_INET6;
1018 1019 IN6_IPADDR_TO_V4MAPPED(ipha->ipha_dst, &sin6.sin6_addr);
1019 1020 sin6.sin6_port = udpha->uha_dst_port;
1020 1021 if (IPCL_IS_NONSTR(connp)) {
1021 1022 mutex_enter(&connp->conn_lock);
1022 1023 if (udp->udp_state == TS_DATA_XFER) {
1023 1024 if (sin6.sin6_port == connp->conn_fport &&
1024 1025 IN6_ARE_ADDR_EQUAL(&sin6.sin6_addr,
1025 1026 &connp->conn_faddr_v6)) {
1026 1027 mutex_exit(&connp->conn_lock);
1027 1028 (*connp->conn_upcalls->su_set_error)
1028 1029 (connp->conn_upper_handle, error);
1029 1030 goto done;
1030 1031 }
1031 1032 } else {
1032 1033 udp->udp_delayed_error = error;
1033 1034 *((sin6_t *)&udp->udp_delayed_addr) = sin6;
1034 1035 }
1035 1036 mutex_exit(&connp->conn_lock);
1036 1037 } else {
1037 1038 mp1 = mi_tpi_uderror_ind((char *)&sin6, sizeof (sin6_t),
1038 1039 NULL, 0, error);
1039 1040 if (mp1 != NULL)
1040 1041 putnext(connp->conn_rq, mp1);
1041 1042 }
1042 1043 break;
1043 1044 }
1044 1045 done:
1045 1046 freemsg(mp);
1046 1047 }
1047 1048
1048 1049 /*
1049 1050 * udp_icmp_error_ipv6 is called by udp_icmp_error to process ICMP for IPv6.
1050 1051 * Generates the appropriate T_UDERROR_IND for permanent (non-transient) errors.
1051 1052 * Assumes that IP has pulled up all the extension headers as well as the
1052 1053 * ICMPv6 header.
1053 1054 */
1054 1055 static void
1055 1056 udp_icmp_error_ipv6(conn_t *connp, mblk_t *mp, ip_recv_attr_t *ira)
1056 1057 {
1057 1058 icmp6_t *icmp6;
1058 1059 ip6_t *ip6h, *outer_ip6h;
1059 1060 uint16_t iph_hdr_length;
1060 1061 uint8_t *nexthdrp;
1061 1062 udpha_t *udpha;
1062 1063 sin6_t sin6;
1063 1064 mblk_t *mp1;
1064 1065 int error = 0;
1065 1066 udp_t *udp = connp->conn_udp;
1066 1067 udp_stack_t *us = udp->udp_us;
1067 1068
1068 1069 outer_ip6h = (ip6_t *)mp->b_rptr;
1069 1070 #ifdef DEBUG
1070 1071 if (outer_ip6h->ip6_nxt != IPPROTO_ICMPV6)
1071 1072 iph_hdr_length = ip_hdr_length_v6(mp, outer_ip6h);
1072 1073 else
1073 1074 iph_hdr_length = IPV6_HDR_LEN;
1074 1075 ASSERT(iph_hdr_length == ira->ira_ip_hdr_length);
1075 1076 #endif
1076 1077 /* Skip past the outer IP and ICMP headers */
1077 1078 iph_hdr_length = ira->ira_ip_hdr_length;
1078 1079 icmp6 = (icmp6_t *)&mp->b_rptr[iph_hdr_length];
1079 1080
1080 1081 /* Skip past the inner IP and find the ULP header */
1081 1082 ip6h = (ip6_t *)&icmp6[1]; /* Inner IP header */
1082 1083 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &iph_hdr_length, &nexthdrp)) {
1083 1084 freemsg(mp);
1084 1085 return;
1085 1086 }
1086 1087 udpha = (udpha_t *)((char *)ip6h + iph_hdr_length);
1087 1088
1088 1089 switch (icmp6->icmp6_type) {
1089 1090 case ICMP6_DST_UNREACH:
1090 1091 switch (icmp6->icmp6_code) {
1091 1092 case ICMP6_DST_UNREACH_NOPORT:
1092 1093 error = ECONNREFUSED;
1093 1094 break;
1094 1095 case ICMP6_DST_UNREACH_ADMIN:
1095 1096 case ICMP6_DST_UNREACH_NOROUTE:
1096 1097 case ICMP6_DST_UNREACH_BEYONDSCOPE:
1097 1098 case ICMP6_DST_UNREACH_ADDR:
1098 1099 /* Transient errors */
1099 1100 break;
1100 1101 default:
1101 1102 break;
1102 1103 }
1103 1104 break;
1104 1105 case ICMP6_PACKET_TOO_BIG: {
1105 1106 struct T_unitdata_ind *tudi;
1106 1107 struct T_opthdr *toh;
1107 1108 size_t udi_size;
1108 1109 mblk_t *newmp;
1109 1110 t_scalar_t opt_length = sizeof (struct T_opthdr) +
1110 1111 sizeof (struct ip6_mtuinfo);
1111 1112 sin6_t *sin6;
1112 1113 struct ip6_mtuinfo *mtuinfo;
1113 1114
1114 1115 /*
1115 1116 * If the application has requested to receive path mtu
1116 1117 * information, send up an empty message containing an
1117 1118 * IPV6_PATHMTU ancillary data item.
1118 1119 */
1119 1120 if (!connp->conn_ipv6_recvpathmtu)
1120 1121 break;
1121 1122
1122 1123 udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin6_t) +
1123 1124 opt_length;
1124 1125 if ((newmp = allocb(udi_size, BPRI_MED)) == NULL) {
1125 1126 UDPS_BUMP_MIB(us, udpInErrors);
1126 1127 break;
1127 1128 }
1128 1129
1129 1130 /*
1130 1131 * newmp->b_cont is left to NULL on purpose. This is an
1131 1132 * empty message containing only ancillary data.
1132 1133 */
1133 1134 newmp->b_datap->db_type = M_PROTO;
1134 1135 tudi = (struct T_unitdata_ind *)newmp->b_rptr;
1135 1136 newmp->b_wptr = (uchar_t *)tudi + udi_size;
1136 1137 tudi->PRIM_type = T_UNITDATA_IND;
1137 1138 tudi->SRC_length = sizeof (sin6_t);
1138 1139 tudi->SRC_offset = sizeof (struct T_unitdata_ind);
1139 1140 tudi->OPT_offset = tudi->SRC_offset + sizeof (sin6_t);
1140 1141 tudi->OPT_length = opt_length;
1141 1142
1142 1143 sin6 = (sin6_t *)&tudi[1];
1143 1144 bzero(sin6, sizeof (sin6_t));
1144 1145 sin6->sin6_family = AF_INET6;
1145 1146 sin6->sin6_addr = connp->conn_faddr_v6;
1146 1147
1147 1148 toh = (struct T_opthdr *)&sin6[1];
1148 1149 toh->level = IPPROTO_IPV6;
1149 1150 toh->name = IPV6_PATHMTU;
1150 1151 toh->len = opt_length;
1151 1152 toh->status = 0;
1152 1153
1153 1154 mtuinfo = (struct ip6_mtuinfo *)&toh[1];
1154 1155 bzero(mtuinfo, sizeof (struct ip6_mtuinfo));
1155 1156 mtuinfo->ip6m_addr.sin6_family = AF_INET6;
1156 1157 mtuinfo->ip6m_addr.sin6_addr = ip6h->ip6_dst;
1157 1158 mtuinfo->ip6m_mtu = icmp6->icmp6_mtu;
1158 1159 /*
1159 1160 * We've consumed everything we need from the original
1160 1161 * message. Free it, then send our empty message.
1161 1162 */
1162 1163 freemsg(mp);
1163 1164 udp_ulp_recv(connp, newmp, msgdsize(newmp), ira);
1164 1165 return;
1165 1166 }
1166 1167 case ICMP6_TIME_EXCEEDED:
1167 1168 /* Transient errors */
1168 1169 break;
1169 1170 case ICMP6_PARAM_PROB:
1170 1171 /* If this corresponds to an ICMP_PROTOCOL_UNREACHABLE */
1171 1172 if (icmp6->icmp6_code == ICMP6_PARAMPROB_NEXTHEADER &&
1172 1173 (uchar_t *)ip6h + icmp6->icmp6_pptr ==
1173 1174 (uchar_t *)nexthdrp) {
1174 1175 error = ECONNREFUSED;
1175 1176 break;
1176 1177 }
1177 1178 break;
1178 1179 }
1179 1180 if (error == 0) {
1180 1181 freemsg(mp);
1181 1182 return;
1182 1183 }
1183 1184
1184 1185 /*
1185 1186 * Deliver T_UDERROR_IND when the application has asked for it.
1186 1187 * The socket layer enables this automatically when connected.
1187 1188 */
1188 1189 if (!connp->conn_dgram_errind) {
1189 1190 freemsg(mp);
1190 1191 return;
1191 1192 }
1192 1193
1193 1194 sin6 = sin6_null;
1194 1195 sin6.sin6_family = AF_INET6;
1195 1196 sin6.sin6_addr = ip6h->ip6_dst;
1196 1197 sin6.sin6_port = udpha->uha_dst_port;
1197 1198 sin6.sin6_flowinfo = ip6h->ip6_vcf & ~IPV6_VERS_AND_FLOW_MASK;
1198 1199
1199 1200 if (IPCL_IS_NONSTR(connp)) {
1200 1201 mutex_enter(&connp->conn_lock);
1201 1202 if (udp->udp_state == TS_DATA_XFER) {
1202 1203 if (sin6.sin6_port == connp->conn_fport &&
1203 1204 IN6_ARE_ADDR_EQUAL(&sin6.sin6_addr,
1204 1205 &connp->conn_faddr_v6)) {
1205 1206 mutex_exit(&connp->conn_lock);
1206 1207 (*connp->conn_upcalls->su_set_error)
1207 1208 (connp->conn_upper_handle, error);
1208 1209 goto done;
1209 1210 }
1210 1211 } else {
1211 1212 udp->udp_delayed_error = error;
1212 1213 *((sin6_t *)&udp->udp_delayed_addr) = sin6;
1213 1214 }
1214 1215 mutex_exit(&connp->conn_lock);
1215 1216 } else {
1216 1217 mp1 = mi_tpi_uderror_ind((char *)&sin6, sizeof (sin6_t),
1217 1218 NULL, 0, error);
1218 1219 if (mp1 != NULL)
1219 1220 putnext(connp->conn_rq, mp1);
1220 1221 }
1221 1222 done:
1222 1223 freemsg(mp);
1223 1224 }
1224 1225
1225 1226 /*
1226 1227 * This routine responds to T_ADDR_REQ messages. It is called by udp_wput.
1227 1228 * The local address is filled in if endpoint is bound. The remote address
1228 1229 * is filled in if remote address has been precified ("connected endpoint")
1229 1230 * (The concept of connected CLTS sockets is alien to published TPI
1230 1231 * but we support it anyway).
1231 1232 */
1232 1233 static void
1233 1234 udp_addr_req(queue_t *q, mblk_t *mp)
1234 1235 {
1235 1236 struct sockaddr *sa;
1236 1237 mblk_t *ackmp;
1237 1238 struct T_addr_ack *taa;
1238 1239 udp_t *udp = Q_TO_UDP(q);
1239 1240 conn_t *connp = udp->udp_connp;
1240 1241 uint_t addrlen;
1241 1242
1242 1243 /* Make it large enough for worst case */
1243 1244 ackmp = reallocb(mp, sizeof (struct T_addr_ack) +
1244 1245 2 * sizeof (sin6_t), 1);
1245 1246 if (ackmp == NULL) {
1246 1247 udp_err_ack(q, mp, TSYSERR, ENOMEM);
1247 1248 return;
1248 1249 }
1249 1250 taa = (struct T_addr_ack *)ackmp->b_rptr;
1250 1251
1251 1252 bzero(taa, sizeof (struct T_addr_ack));
1252 1253 ackmp->b_wptr = (uchar_t *)&taa[1];
1253 1254
1254 1255 taa->PRIM_type = T_ADDR_ACK;
1255 1256 ackmp->b_datap->db_type = M_PCPROTO;
1256 1257
1257 1258 if (connp->conn_family == AF_INET)
1258 1259 addrlen = sizeof (sin_t);
1259 1260 else
1260 1261 addrlen = sizeof (sin6_t);
1261 1262
1262 1263 mutex_enter(&connp->conn_lock);
1263 1264 /*
1264 1265 * Note: Following code assumes 32 bit alignment of basic
1265 1266 * data structures like sin_t and struct T_addr_ack.
1266 1267 */
1267 1268 if (udp->udp_state != TS_UNBND) {
1268 1269 /*
1269 1270 * Fill in local address first
1270 1271 */
1271 1272 taa->LOCADDR_offset = sizeof (*taa);
1272 1273 taa->LOCADDR_length = addrlen;
1273 1274 sa = (struct sockaddr *)&taa[1];
1274 1275 (void) conn_getsockname(connp, sa, &addrlen);
1275 1276 ackmp->b_wptr += addrlen;
1276 1277 }
1277 1278 if (udp->udp_state == TS_DATA_XFER) {
1278 1279 /*
1279 1280 * connected, fill remote address too
1280 1281 */
1281 1282 taa->REMADDR_length = addrlen;
1282 1283 /* assumed 32-bit alignment */
1283 1284 taa->REMADDR_offset = taa->LOCADDR_offset + taa->LOCADDR_length;
1284 1285 sa = (struct sockaddr *)(ackmp->b_rptr + taa->REMADDR_offset);
1285 1286 (void) conn_getpeername(connp, sa, &addrlen);
1286 1287 ackmp->b_wptr += addrlen;
1287 1288 }
1288 1289 mutex_exit(&connp->conn_lock);
1289 1290 ASSERT(ackmp->b_wptr <= ackmp->b_datap->db_lim);
1290 1291 qreply(q, ackmp);
1291 1292 }
1292 1293
1293 1294 static void
1294 1295 udp_copy_info(struct T_info_ack *tap, udp_t *udp)
1295 1296 {
1296 1297 conn_t *connp = udp->udp_connp;
1297 1298
1298 1299 if (connp->conn_family == AF_INET) {
1299 1300 *tap = udp_g_t_info_ack_ipv4;
1300 1301 } else {
1301 1302 *tap = udp_g_t_info_ack_ipv6;
1302 1303 }
1303 1304 tap->CURRENT_state = udp->udp_state;
1304 1305 tap->OPT_size = udp_max_optsize;
1305 1306 }
1306 1307
1307 1308 static void
1308 1309 udp_do_capability_ack(udp_t *udp, struct T_capability_ack *tcap,
1309 1310 t_uscalar_t cap_bits1)
1310 1311 {
1311 1312 tcap->CAP_bits1 = 0;
1312 1313
1313 1314 if (cap_bits1 & TC1_INFO) {
1314 1315 udp_copy_info(&tcap->INFO_ack, udp);
1315 1316 tcap->CAP_bits1 |= TC1_INFO;
1316 1317 }
1317 1318 }
1318 1319
1319 1320 /*
1320 1321 * This routine responds to T_CAPABILITY_REQ messages. It is called by
1321 1322 * udp_wput. Much of the T_CAPABILITY_ACK information is copied from
1322 1323 * udp_g_t_info_ack. The current state of the stream is copied from
1323 1324 * udp_state.
1324 1325 */
1325 1326 static void
1326 1327 udp_capability_req(queue_t *q, mblk_t *mp)
1327 1328 {
1328 1329 t_uscalar_t cap_bits1;
1329 1330 struct T_capability_ack *tcap;
1330 1331 udp_t *udp = Q_TO_UDP(q);
1331 1332
1332 1333 cap_bits1 = ((struct T_capability_req *)mp->b_rptr)->CAP_bits1;
1333 1334
1334 1335 mp = tpi_ack_alloc(mp, sizeof (struct T_capability_ack),
1335 1336 mp->b_datap->db_type, T_CAPABILITY_ACK);
1336 1337 if (!mp)
1337 1338 return;
1338 1339
1339 1340 tcap = (struct T_capability_ack *)mp->b_rptr;
1340 1341 udp_do_capability_ack(udp, tcap, cap_bits1);
1341 1342
1342 1343 qreply(q, mp);
1343 1344 }
1344 1345
1345 1346 /*
1346 1347 * This routine responds to T_INFO_REQ messages. It is called by udp_wput.
1347 1348 * Most of the T_INFO_ACK information is copied from udp_g_t_info_ack.
1348 1349 * The current state of the stream is copied from udp_state.
1349 1350 */
1350 1351 static void
1351 1352 udp_info_req(queue_t *q, mblk_t *mp)
1352 1353 {
1353 1354 udp_t *udp = Q_TO_UDP(q);
1354 1355
1355 1356 /* Create a T_INFO_ACK message. */
1356 1357 mp = tpi_ack_alloc(mp, sizeof (struct T_info_ack), M_PCPROTO,
1357 1358 T_INFO_ACK);
1358 1359 if (!mp)
1359 1360 return;
1360 1361 udp_copy_info((struct T_info_ack *)mp->b_rptr, udp);
1361 1362 qreply(q, mp);
1362 1363 }
1363 1364
1364 1365 /* For /dev/udp aka AF_INET open */
1365 1366 static int
1366 1367 udp_openv4(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp)
1367 1368 {
1368 1369 return (udp_open(q, devp, flag, sflag, credp, B_FALSE));
1369 1370 }
1370 1371
1371 1372 /* For /dev/udp6 aka AF_INET6 open */
1372 1373 static int
1373 1374 udp_openv6(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp)
1374 1375 {
1375 1376 return (udp_open(q, devp, flag, sflag, credp, B_TRUE));
1376 1377 }
1377 1378
1378 1379 /*
1379 1380 * This is the open routine for udp. It allocates a udp_t structure for
1380 1381 * the stream and, on the first open of the module, creates an ND table.
1381 1382 */
1382 1383 static int
1383 1384 udp_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp,
1384 1385 boolean_t isv6)
1385 1386 {
1386 1387 udp_t *udp;
1387 1388 conn_t *connp;
1388 1389 dev_t conn_dev;
1389 1390 vmem_t *minor_arena;
1390 1391 int err;
1391 1392
1392 1393 /* If the stream is already open, return immediately. */
1393 1394 if (q->q_ptr != NULL)
1394 1395 return (0);
1395 1396
1396 1397 if (sflag == MODOPEN)
1397 1398 return (EINVAL);
1398 1399
1399 1400 if ((ip_minor_arena_la != NULL) && (flag & SO_SOCKSTR) &&
1400 1401 ((conn_dev = inet_minor_alloc(ip_minor_arena_la)) != 0)) {
1401 1402 minor_arena = ip_minor_arena_la;
1402 1403 } else {
1403 1404 /*
1404 1405 * Either minor numbers in the large arena were exhausted
1405 1406 * or a non socket application is doing the open.
1406 1407 * Try to allocate from the small arena.
1407 1408 */
1408 1409 if ((conn_dev = inet_minor_alloc(ip_minor_arena_sa)) == 0)
1409 1410 return (EBUSY);
1410 1411
1411 1412 minor_arena = ip_minor_arena_sa;
1412 1413 }
1413 1414
1414 1415 if (flag & SO_FALLBACK) {
1415 1416 /*
1416 1417 * Non streams socket needs a stream to fallback to
1417 1418 */
1418 1419 RD(q)->q_ptr = (void *)conn_dev;
1419 1420 WR(q)->q_qinfo = &udp_fallback_sock_winit;
1420 1421 WR(q)->q_ptr = (void *)minor_arena;
1421 1422 qprocson(q);
1422 1423 return (0);
1423 1424 }
1424 1425
1425 1426 connp = udp_do_open(credp, isv6, KM_SLEEP, &err);
1426 1427 if (connp == NULL) {
1427 1428 inet_minor_free(minor_arena, conn_dev);
1428 1429 return (err);
1429 1430 }
1430 1431 udp = connp->conn_udp;
1431 1432
1432 1433 *devp = makedevice(getemajor(*devp), (minor_t)conn_dev);
1433 1434 connp->conn_dev = conn_dev;
1434 1435 connp->conn_minor_arena = minor_arena;
1435 1436
1436 1437 /*
1437 1438 * Initialize the udp_t structure for this stream.
1438 1439 */
1439 1440 q->q_ptr = connp;
1440 1441 WR(q)->q_ptr = connp;
1441 1442 connp->conn_rq = q;
1442 1443 connp->conn_wq = WR(q);
1443 1444
1444 1445 /*
1445 1446 * Since this conn_t/udp_t is not yet visible to anybody else we don't
1446 1447 * need to lock anything.
1447 1448 */
1448 1449 ASSERT(connp->conn_proto == IPPROTO_UDP);
1449 1450 ASSERT(connp->conn_udp == udp);
1450 1451 ASSERT(udp->udp_connp == connp);
1451 1452
1452 1453 if (flag & SO_SOCKSTR) {
1453 1454 udp->udp_issocket = B_TRUE;
1454 1455 }
1455 1456
1456 1457 WR(q)->q_hiwat = connp->conn_sndbuf;
1457 1458 WR(q)->q_lowat = connp->conn_sndlowat;
1458 1459
1459 1460 qprocson(q);
1460 1461
1461 1462 /* Set the Stream head write offset and high watermark. */
1462 1463 (void) proto_set_tx_wroff(q, connp, connp->conn_wroff);
1463 1464 (void) proto_set_rx_hiwat(q, connp,
1464 1465 udp_set_rcv_hiwat(udp, connp->conn_rcvbuf));
1465 1466
1466 1467 mutex_enter(&connp->conn_lock);
1467 1468 connp->conn_state_flags &= ~CONN_INCIPIENT;
1468 1469 mutex_exit(&connp->conn_lock);
1469 1470 return (0);
1470 1471 }
1471 1472
1472 1473 /*
1473 1474 * Which UDP options OK to set through T_UNITDATA_REQ...
1474 1475 */
1475 1476 /* ARGSUSED */
1476 1477 static boolean_t
1477 1478 udp_opt_allow_udr_set(t_scalar_t level, t_scalar_t name)
1478 1479 {
1479 1480 return (B_TRUE);
1480 1481 }
1481 1482
1482 1483 /*
1483 1484 * This routine gets default values of certain options whose default
1484 1485 * values are maintained by protcol specific code
1485 1486 */
1486 1487 int
1487 1488 udp_opt_default(queue_t *q, t_scalar_t level, t_scalar_t name, uchar_t *ptr)
1488 1489 {
1489 1490 udp_t *udp = Q_TO_UDP(q);
1490 1491 udp_stack_t *us = udp->udp_us;
1491 1492 int *i1 = (int *)ptr;
1492 1493
1493 1494 switch (level) {
1494 1495 case IPPROTO_IP:
1495 1496 switch (name) {
1496 1497 case IP_MULTICAST_TTL:
1497 1498 *ptr = (uchar_t)IP_DEFAULT_MULTICAST_TTL;
1498 1499 return (sizeof (uchar_t));
1499 1500 case IP_MULTICAST_LOOP:
1500 1501 *ptr = (uchar_t)IP_DEFAULT_MULTICAST_LOOP;
1501 1502 return (sizeof (uchar_t));
1502 1503 }
1503 1504 break;
1504 1505 case IPPROTO_IPV6:
1505 1506 switch (name) {
1506 1507 case IPV6_MULTICAST_HOPS:
1507 1508 *i1 = IP_DEFAULT_MULTICAST_TTL;
1508 1509 return (sizeof (int));
1509 1510 case IPV6_MULTICAST_LOOP:
1510 1511 *i1 = IP_DEFAULT_MULTICAST_LOOP;
1511 1512 return (sizeof (int));
1512 1513 case IPV6_UNICAST_HOPS:
1513 1514 *i1 = us->us_ipv6_hoplimit;
1514 1515 return (sizeof (int));
1515 1516 }
1516 1517 break;
1517 1518 }
1518 1519 return (-1);
1519 1520 }
1520 1521
1521 1522 /*
1522 1523 * This routine retrieves the current status of socket options.
1523 1524 * It returns the size of the option retrieved, or -1.
1524 1525 */
1525 1526 int
1526 1527 udp_opt_get(conn_t *connp, t_scalar_t level, t_scalar_t name,
1527 1528 uchar_t *ptr)
1528 1529 {
1529 1530 int *i1 = (int *)ptr;
1530 1531 udp_t *udp = connp->conn_udp;
1531 1532 int len;
1532 1533 conn_opt_arg_t coas;
1533 1534 int retval;
1534 1535
1535 1536 coas.coa_connp = connp;
1536 1537 coas.coa_ixa = connp->conn_ixa;
1537 1538 coas.coa_ipp = &connp->conn_xmit_ipp;
1538 1539 coas.coa_ancillary = B_FALSE;
1539 1540 coas.coa_changed = 0;
1540 1541
1541 1542 /*
1542 1543 * We assume that the optcom framework has checked for the set
1543 1544 * of levels and names that are supported, hence we don't worry
1544 1545 * about rejecting based on that.
1545 1546 * First check for UDP specific handling, then pass to common routine.
1546 1547 */
1547 1548 switch (level) {
1548 1549 case IPPROTO_IP:
1549 1550 /*
1550 1551 * Only allow IPv4 option processing on IPv4 sockets.
1551 1552 */
1552 1553 if (connp->conn_family != AF_INET)
1553 1554 return (-1);
1554 1555
1555 1556 switch (name) {
1556 1557 case IP_OPTIONS:
1557 1558 case T_IP_OPTIONS:
1558 1559 mutex_enter(&connp->conn_lock);
1559 1560 if (!(udp->udp_recv_ipp.ipp_fields &
1560 1561 IPPF_IPV4_OPTIONS)) {
1561 1562 mutex_exit(&connp->conn_lock);
1562 1563 return (0);
1563 1564 }
1564 1565
1565 1566 len = udp->udp_recv_ipp.ipp_ipv4_options_len;
1566 1567 ASSERT(len != 0);
1567 1568 bcopy(udp->udp_recv_ipp.ipp_ipv4_options, ptr, len);
1568 1569 mutex_exit(&connp->conn_lock);
1569 1570 return (len);
1570 1571 }
1571 1572 break;
1572 1573 case IPPROTO_UDP:
1573 1574 switch (name) {
1574 1575 case UDP_NAT_T_ENDPOINT:
1575 1576 mutex_enter(&connp->conn_lock);
1576 1577 *i1 = udp->udp_nat_t_endpoint;
1577 1578 mutex_exit(&connp->conn_lock);
1578 1579 return (sizeof (int));
1579 1580 case UDP_RCVHDR:
1580 1581 mutex_enter(&connp->conn_lock);
1581 1582 *i1 = udp->udp_rcvhdr ? 1 : 0;
1582 1583 mutex_exit(&connp->conn_lock);
1583 1584 return (sizeof (int));
1584 1585 }
1585 1586 }
1586 1587 mutex_enter(&connp->conn_lock);
1587 1588 retval = conn_opt_get(&coas, level, name, ptr);
1588 1589 mutex_exit(&connp->conn_lock);
1589 1590 return (retval);
1590 1591 }
1591 1592
1592 1593 /*
1593 1594 * This routine retrieves the current status of socket options.
1594 1595 * It returns the size of the option retrieved, or -1.
1595 1596 */
1596 1597 int
1597 1598 udp_tpi_opt_get(queue_t *q, t_scalar_t level, t_scalar_t name, uchar_t *ptr)
1598 1599 {
1599 1600 conn_t *connp = Q_TO_CONN(q);
1600 1601 int err;
1601 1602
1602 1603 err = udp_opt_get(connp, level, name, ptr);
1603 1604 return (err);
1604 1605 }
1605 1606
1606 1607 /*
1607 1608 * This routine sets socket options.
1608 1609 */
1609 1610 int
1610 1611 udp_do_opt_set(conn_opt_arg_t *coa, int level, int name,
1611 1612 uint_t inlen, uchar_t *invalp, cred_t *cr, boolean_t checkonly)
1612 1613 {
1613 1614 conn_t *connp = coa->coa_connp;
1614 1615 ip_xmit_attr_t *ixa = coa->coa_ixa;
1615 1616 udp_t *udp = connp->conn_udp;
1616 1617 udp_stack_t *us = udp->udp_us;
1617 1618 int *i1 = (int *)invalp;
1618 1619 boolean_t onoff = (*i1 == 0) ? 0 : 1;
1619 1620 int error;
1620 1621
1621 1622 ASSERT(MUTEX_NOT_HELD(&coa->coa_connp->conn_lock));
1622 1623 /*
1623 1624 * First do UDP specific sanity checks and handle UDP specific
1624 1625 * options. Note that some IPPROTO_UDP options are handled
1625 1626 * by conn_opt_set.
1626 1627 */
1627 1628 switch (level) {
1628 1629 case SOL_SOCKET:
1629 1630 switch (name) {
1630 1631 case SO_SNDBUF:
1631 1632 if (*i1 > us->us_max_buf) {
1632 1633 return (ENOBUFS);
1633 1634 }
1634 1635 break;
1635 1636 case SO_RCVBUF:
1636 1637 if (*i1 > us->us_max_buf) {
1637 1638 return (ENOBUFS);
1638 1639 }
1639 1640 break;
1640 1641
1641 1642 case SCM_UCRED: {
1642 1643 struct ucred_s *ucr;
1643 1644 cred_t *newcr;
1644 1645 ts_label_t *tsl;
1645 1646
1646 1647 /*
1647 1648 * Only sockets that have proper privileges and are
1648 1649 * bound to MLPs will have any other value here, so
1649 1650 * this implicitly tests for privilege to set label.
1650 1651 */
1651 1652 if (connp->conn_mlp_type == mlptSingle)
1652 1653 break;
1653 1654
1654 1655 ucr = (struct ucred_s *)invalp;
1655 1656 if (inlen < sizeof (*ucr) + sizeof (bslabel_t) ||
1656 1657 ucr->uc_labeloff < sizeof (*ucr) ||
1657 1658 ucr->uc_labeloff + sizeof (bslabel_t) > inlen)
1658 1659 return (EINVAL);
1659 1660 if (!checkonly) {
1660 1661 /*
1661 1662 * Set ixa_tsl to the new label.
1662 1663 * We assume that crgetzoneid doesn't change
1663 1664 * as part of the SCM_UCRED.
1664 1665 */
1665 1666 ASSERT(cr != NULL);
1666 1667 if ((tsl = crgetlabel(cr)) == NULL)
1667 1668 return (EINVAL);
1668 1669 newcr = copycred_from_bslabel(cr, UCLABEL(ucr),
1669 1670 tsl->tsl_doi, KM_NOSLEEP);
1670 1671 if (newcr == NULL)
1671 1672 return (ENOSR);
1672 1673 ASSERT(newcr->cr_label != NULL);
1673 1674 /*
1674 1675 * Move the hold on the cr_label to ixa_tsl by
1675 1676 * setting cr_label to NULL. Then release newcr.
1676 1677 */
1677 1678 ip_xmit_attr_replace_tsl(ixa, newcr->cr_label);
1678 1679 ixa->ixa_flags |= IXAF_UCRED_TSL;
1679 1680 newcr->cr_label = NULL;
1680 1681 crfree(newcr);
1681 1682 coa->coa_changed |= COA_HEADER_CHANGED;
1682 1683 coa->coa_changed |= COA_WROFF_CHANGED;
1683 1684 }
1684 1685 /* Fully handled this option. */
1685 1686 return (0);
1686 1687 }
1687 1688 }
1688 1689 break;
1689 1690 case IPPROTO_UDP:
1690 1691 switch (name) {
1691 1692 case UDP_NAT_T_ENDPOINT:
1692 1693 if ((error = secpolicy_ip_config(cr, B_FALSE)) != 0) {
1693 1694 return (error);
1694 1695 }
1695 1696
1696 1697 /*
1697 1698 * Use conn_family instead so we can avoid ambiguitites
1698 1699 * with AF_INET6 sockets that may switch from IPv4
1699 1700 * to IPv6.
1700 1701 */
1701 1702 if (connp->conn_family != AF_INET) {
1702 1703 return (EAFNOSUPPORT);
1703 1704 }
1704 1705
1705 1706 if (!checkonly) {
1706 1707 mutex_enter(&connp->conn_lock);
1707 1708 udp->udp_nat_t_endpoint = onoff;
1708 1709 mutex_exit(&connp->conn_lock);
1709 1710 coa->coa_changed |= COA_HEADER_CHANGED;
1710 1711 coa->coa_changed |= COA_WROFF_CHANGED;
1711 1712 }
1712 1713 /* Fully handled this option. */
1713 1714 return (0);
1714 1715 case UDP_RCVHDR:
1715 1716 mutex_enter(&connp->conn_lock);
1716 1717 udp->udp_rcvhdr = onoff;
1717 1718 mutex_exit(&connp->conn_lock);
1718 1719 return (0);
1719 1720 }
1720 1721 break;
1721 1722 }
1722 1723 error = conn_opt_set(coa, level, name, inlen, invalp,
1723 1724 checkonly, cr);
1724 1725 return (error);
1725 1726 }
1726 1727
1727 1728 /*
1728 1729 * This routine sets socket options.
1729 1730 */
1730 1731 int
1731 1732 udp_opt_set(conn_t *connp, uint_t optset_context, int level,
1732 1733 int name, uint_t inlen, uchar_t *invalp, uint_t *outlenp,
1733 1734 uchar_t *outvalp, void *thisdg_attrs, cred_t *cr)
1734 1735 {
1735 1736 udp_t *udp = connp->conn_udp;
1736 1737 int err;
1737 1738 conn_opt_arg_t coas, *coa;
1738 1739 boolean_t checkonly;
1739 1740 udp_stack_t *us = udp->udp_us;
1740 1741
1741 1742 switch (optset_context) {
1742 1743 case SETFN_OPTCOM_CHECKONLY:
1743 1744 checkonly = B_TRUE;
1744 1745 /*
1745 1746 * Note: Implies T_CHECK semantics for T_OPTCOM_REQ
1746 1747 * inlen != 0 implies value supplied and
1747 1748 * we have to "pretend" to set it.
1748 1749 * inlen == 0 implies that there is no
1749 1750 * value part in T_CHECK request and just validation
1750 1751 * done elsewhere should be enough, we just return here.
1751 1752 */
1752 1753 if (inlen == 0) {
1753 1754 *outlenp = 0;
1754 1755 return (0);
1755 1756 }
1756 1757 break;
1757 1758 case SETFN_OPTCOM_NEGOTIATE:
1758 1759 checkonly = B_FALSE;
1759 1760 break;
1760 1761 case SETFN_UD_NEGOTIATE:
1761 1762 case SETFN_CONN_NEGOTIATE:
1762 1763 checkonly = B_FALSE;
1763 1764 /*
1764 1765 * Negotiating local and "association-related" options
1765 1766 * through T_UNITDATA_REQ.
1766 1767 *
1767 1768 * Following routine can filter out ones we do not
1768 1769 * want to be "set" this way.
1769 1770 */
1770 1771 if (!udp_opt_allow_udr_set(level, name)) {
1771 1772 *outlenp = 0;
1772 1773 return (EINVAL);
1773 1774 }
1774 1775 break;
1775 1776 default:
1776 1777 /*
1777 1778 * We should never get here
1778 1779 */
1779 1780 *outlenp = 0;
1780 1781 return (EINVAL);
1781 1782 }
1782 1783
1783 1784 ASSERT((optset_context != SETFN_OPTCOM_CHECKONLY) ||
1784 1785 (optset_context == SETFN_OPTCOM_CHECKONLY && inlen != 0));
1785 1786
1786 1787 if (thisdg_attrs != NULL) {
1787 1788 /* Options from T_UNITDATA_REQ */
1788 1789 coa = (conn_opt_arg_t *)thisdg_attrs;
1789 1790 ASSERT(coa->coa_connp == connp);
1790 1791 ASSERT(coa->coa_ixa != NULL);
1791 1792 ASSERT(coa->coa_ipp != NULL);
1792 1793 ASSERT(coa->coa_ancillary);
1793 1794 } else {
1794 1795 coa = &coas;
1795 1796 coas.coa_connp = connp;
1796 1797 /* Get a reference on conn_ixa to prevent concurrent mods */
1797 1798 coas.coa_ixa = conn_get_ixa(connp, B_TRUE);
1798 1799 if (coas.coa_ixa == NULL) {
1799 1800 *outlenp = 0;
1800 1801 return (ENOMEM);
1801 1802 }
1802 1803 coas.coa_ipp = &connp->conn_xmit_ipp;
1803 1804 coas.coa_ancillary = B_FALSE;
1804 1805 coas.coa_changed = 0;
1805 1806 }
1806 1807
1807 1808 err = udp_do_opt_set(coa, level, name, inlen, invalp,
1808 1809 cr, checkonly);
1809 1810 if (err != 0) {
1810 1811 errout:
1811 1812 if (!coa->coa_ancillary)
1812 1813 ixa_refrele(coa->coa_ixa);
1813 1814 *outlenp = 0;
1814 1815 return (err);
1815 1816 }
1816 1817 /* Handle DHCPINIT here outside of lock */
1817 1818 if (level == IPPROTO_IP && name == IP_DHCPINIT_IF) {
1818 1819 uint_t ifindex;
1819 1820 ill_t *ill;
1820 1821
1821 1822 ifindex = *(uint_t *)invalp;
1822 1823 if (ifindex == 0) {
1823 1824 ill = NULL;
1824 1825 } else {
1825 1826 ill = ill_lookup_on_ifindex(ifindex, B_FALSE,
1826 1827 coa->coa_ixa->ixa_ipst);
1827 1828 if (ill == NULL) {
1828 1829 err = ENXIO;
1829 1830 goto errout;
1830 1831 }
1831 1832
1832 1833 mutex_enter(&ill->ill_lock);
1833 1834 if (ill->ill_state_flags & ILL_CONDEMNED) {
1834 1835 mutex_exit(&ill->ill_lock);
1835 1836 ill_refrele(ill);
1836 1837 err = ENXIO;
1837 1838 goto errout;
1838 1839 }
1839 1840 if (IS_VNI(ill)) {
1840 1841 mutex_exit(&ill->ill_lock);
1841 1842 ill_refrele(ill);
1842 1843 err = EINVAL;
1843 1844 goto errout;
1844 1845 }
1845 1846 }
1846 1847 mutex_enter(&connp->conn_lock);
1847 1848
1848 1849 if (connp->conn_dhcpinit_ill != NULL) {
1849 1850 /*
1850 1851 * We've locked the conn so conn_cleanup_ill()
1851 1852 * cannot clear conn_dhcpinit_ill -- so it's
1852 1853 * safe to access the ill.
1853 1854 */
1854 1855 ill_t *oill = connp->conn_dhcpinit_ill;
1855 1856
1856 1857 ASSERT(oill->ill_dhcpinit != 0);
1857 1858 atomic_dec_32(&oill->ill_dhcpinit);
1858 1859 ill_set_inputfn(connp->conn_dhcpinit_ill);
1859 1860 connp->conn_dhcpinit_ill = NULL;
1860 1861 }
1861 1862
1862 1863 if (ill != NULL) {
1863 1864 connp->conn_dhcpinit_ill = ill;
1864 1865 atomic_inc_32(&ill->ill_dhcpinit);
1865 1866 ill_set_inputfn(ill);
1866 1867 mutex_exit(&connp->conn_lock);
1867 1868 mutex_exit(&ill->ill_lock);
1868 1869 ill_refrele(ill);
1869 1870 } else {
1870 1871 mutex_exit(&connp->conn_lock);
1871 1872 }
1872 1873 }
1873 1874
1874 1875 /*
1875 1876 * Common case of OK return with outval same as inval.
1876 1877 */
1877 1878 if (invalp != outvalp) {
1878 1879 /* don't trust bcopy for identical src/dst */
1879 1880 (void) bcopy(invalp, outvalp, inlen);
1880 1881 }
1881 1882 *outlenp = inlen;
1882 1883
1883 1884 /*
1884 1885 * If this was not ancillary data, then we rebuild the headers,
1885 1886 * update the IRE/NCE, and IPsec as needed.
1886 1887 * Since the label depends on the destination we go through
1887 1888 * ip_set_destination first.
1888 1889 */
1889 1890 if (coa->coa_ancillary) {
1890 1891 return (0);
1891 1892 }
1892 1893
1893 1894 if (coa->coa_changed & COA_ROUTE_CHANGED) {
1894 1895 in6_addr_t saddr, faddr, nexthop;
1895 1896 in_port_t fport;
1896 1897
1897 1898 /*
1898 1899 * We clear lastdst to make sure we pick up the change
1899 1900 * next time sending.
1900 1901 * If we are connected we re-cache the information.
1901 1902 * We ignore errors to preserve BSD behavior.
1902 1903 * Note that we don't redo IPsec policy lookup here
1903 1904 * since the final destination (or source) didn't change.
1904 1905 */
1905 1906 mutex_enter(&connp->conn_lock);
1906 1907 connp->conn_v6lastdst = ipv6_all_zeros;
1907 1908
1908 1909 ip_attr_nexthop(coa->coa_ipp, coa->coa_ixa,
1909 1910 &connp->conn_faddr_v6, &nexthop);
1910 1911 saddr = connp->conn_saddr_v6;
1911 1912 faddr = connp->conn_faddr_v6;
1912 1913 fport = connp->conn_fport;
1913 1914 mutex_exit(&connp->conn_lock);
1914 1915
1915 1916 if (!IN6_IS_ADDR_UNSPECIFIED(&faddr) &&
1916 1917 !IN6_IS_ADDR_V4MAPPED_ANY(&faddr)) {
1917 1918 (void) ip_attr_connect(connp, coa->coa_ixa,
1918 1919 &saddr, &faddr, &nexthop, fport, NULL, NULL,
1919 1920 IPDF_ALLOW_MCBC | IPDF_VERIFY_DST);
1920 1921 }
1921 1922 }
1922 1923
1923 1924 ixa_refrele(coa->coa_ixa);
1924 1925
1925 1926 if (coa->coa_changed & COA_HEADER_CHANGED) {
1926 1927 /*
1927 1928 * Rebuild the header template if we are connected.
1928 1929 * Otherwise clear conn_v6lastdst so we rebuild the header
1929 1930 * in the data path.
1930 1931 */
1931 1932 mutex_enter(&connp->conn_lock);
1932 1933 if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_faddr_v6) &&
1933 1934 !IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_faddr_v6)) {
1934 1935 err = udp_build_hdr_template(connp,
1935 1936 &connp->conn_saddr_v6, &connp->conn_faddr_v6,
1936 1937 connp->conn_fport, connp->conn_flowinfo);
1937 1938 if (err != 0) {
1938 1939 mutex_exit(&connp->conn_lock);
1939 1940 return (err);
1940 1941 }
1941 1942 } else {
1942 1943 connp->conn_v6lastdst = ipv6_all_zeros;
1943 1944 }
1944 1945 mutex_exit(&connp->conn_lock);
1945 1946 }
1946 1947 if (coa->coa_changed & COA_RCVBUF_CHANGED) {
1947 1948 (void) proto_set_rx_hiwat(connp->conn_rq, connp,
1948 1949 connp->conn_rcvbuf);
1949 1950 }
1950 1951 if ((coa->coa_changed & COA_SNDBUF_CHANGED) && !IPCL_IS_NONSTR(connp)) {
1951 1952 connp->conn_wq->q_hiwat = connp->conn_sndbuf;
1952 1953 }
1953 1954 if (coa->coa_changed & COA_WROFF_CHANGED) {
1954 1955 /* Increase wroff if needed */
1955 1956 uint_t wroff;
1956 1957
1957 1958 mutex_enter(&connp->conn_lock);
1958 1959 wroff = connp->conn_ht_iphc_allocated + us->us_wroff_extra;
1959 1960 if (udp->udp_nat_t_endpoint)
1960 1961 wroff += sizeof (uint32_t);
1961 1962 if (wroff > connp->conn_wroff) {
1962 1963 connp->conn_wroff = wroff;
1963 1964 mutex_exit(&connp->conn_lock);
1964 1965 (void) proto_set_tx_wroff(connp->conn_rq, connp, wroff);
1965 1966 } else {
1966 1967 mutex_exit(&connp->conn_lock);
1967 1968 }
1968 1969 }
1969 1970 return (err);
1970 1971 }
1971 1972
1972 1973 /* This routine sets socket options. */
1973 1974 int
1974 1975 udp_tpi_opt_set(queue_t *q, uint_t optset_context, int level, int name,
1975 1976 uint_t inlen, uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp,
1976 1977 void *thisdg_attrs, cred_t *cr)
1977 1978 {
1978 1979 conn_t *connp = Q_TO_CONN(q);
1979 1980 int error;
1980 1981
1981 1982 error = udp_opt_set(connp, optset_context, level, name, inlen, invalp,
1982 1983 outlenp, outvalp, thisdg_attrs, cr);
1983 1984 return (error);
1984 1985 }
1985 1986
1986 1987 /*
1987 1988 * Setup IP and UDP headers.
1988 1989 * Returns NULL on allocation failure, in which case data_mp is freed.
1989 1990 */
1990 1991 mblk_t *
1991 1992 udp_prepend_hdr(conn_t *connp, ip_xmit_attr_t *ixa, const ip_pkt_t *ipp,
1992 1993 const in6_addr_t *v6src, const in6_addr_t *v6dst, in_port_t dstport,
1993 1994 uint32_t flowinfo, mblk_t *data_mp, int *errorp)
1994 1995 {
1995 1996 mblk_t *mp;
1996 1997 udpha_t *udpha;
1997 1998 udp_stack_t *us = connp->conn_netstack->netstack_udp;
1998 1999 uint_t data_len;
1999 2000 uint32_t cksum;
2000 2001 udp_t *udp = connp->conn_udp;
2001 2002 boolean_t insert_spi = udp->udp_nat_t_endpoint;
2002 2003 uint_t ulp_hdr_len;
2003 2004
2004 2005 data_len = msgdsize(data_mp);
2005 2006 ulp_hdr_len = UDPH_SIZE;
2006 2007 if (insert_spi)
2007 2008 ulp_hdr_len += sizeof (uint32_t);
2008 2009
2009 2010 mp = conn_prepend_hdr(ixa, ipp, v6src, v6dst, IPPROTO_UDP, flowinfo,
2010 2011 ulp_hdr_len, data_mp, data_len, us->us_wroff_extra, &cksum, errorp);
2011 2012 if (mp == NULL) {
2012 2013 ASSERT(*errorp != 0);
2013 2014 return (NULL);
2014 2015 }
2015 2016
2016 2017 data_len += ulp_hdr_len;
2017 2018 ixa->ixa_pktlen = data_len + ixa->ixa_ip_hdr_length;
2018 2019
2019 2020 udpha = (udpha_t *)(mp->b_rptr + ixa->ixa_ip_hdr_length);
2020 2021 udpha->uha_src_port = connp->conn_lport;
2021 2022 udpha->uha_dst_port = dstport;
2022 2023 udpha->uha_checksum = 0;
2023 2024 udpha->uha_length = htons(data_len);
2024 2025
2025 2026 /*
2026 2027 * If there was a routing option/header then conn_prepend_hdr
2027 2028 * has massaged it and placed the pseudo-header checksum difference
2028 2029 * in the cksum argument.
2029 2030 *
2030 2031 * Setup header length and prepare for ULP checksum done in IP.
2031 2032 *
2032 2033 * We make it easy for IP to include our pseudo header
2033 2034 * by putting our length in uha_checksum.
2034 2035 * The IP source, destination, and length have already been set by
2035 2036 * conn_prepend_hdr.
2036 2037 */
2037 2038 cksum += data_len;
2038 2039 cksum = (cksum >> 16) + (cksum & 0xFFFF);
2039 2040 ASSERT(cksum < 0x10000);
2040 2041
2041 2042 if (ixa->ixa_flags & IXAF_IS_IPV4) {
2042 2043 ipha_t *ipha = (ipha_t *)mp->b_rptr;
2043 2044
2044 2045 ASSERT(ntohs(ipha->ipha_length) == ixa->ixa_pktlen);
2045 2046
2046 2047 /* IP does the checksum if uha_checksum is non-zero */
2047 2048 if (us->us_do_checksum) {
2048 2049 if (cksum == 0)
2049 2050 udpha->uha_checksum = 0xffff;
2050 2051 else
2051 2052 udpha->uha_checksum = htons(cksum);
2052 2053 } else {
2053 2054 udpha->uha_checksum = 0;
2054 2055 }
2055 2056 } else {
2056 2057 ip6_t *ip6h = (ip6_t *)mp->b_rptr;
2057 2058
2058 2059 ASSERT(ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN == ixa->ixa_pktlen);
2059 2060 if (cksum == 0)
2060 2061 udpha->uha_checksum = 0xffff;
2061 2062 else
2062 2063 udpha->uha_checksum = htons(cksum);
2063 2064 }
2064 2065
2065 2066 /* Insert all-0s SPI now. */
2066 2067 if (insert_spi)
2067 2068 *((uint32_t *)(udpha + 1)) = 0;
2068 2069
2069 2070 return (mp);
2070 2071 }
2071 2072
2072 2073 static int
2073 2074 udp_build_hdr_template(conn_t *connp, const in6_addr_t *v6src,
2074 2075 const in6_addr_t *v6dst, in_port_t dstport, uint32_t flowinfo)
2075 2076 {
2076 2077 udpha_t *udpha;
2077 2078 int error;
2078 2079
2079 2080 ASSERT(MUTEX_HELD(&connp->conn_lock));
2080 2081 /*
2081 2082 * We clear lastdst to make sure we don't use the lastdst path
2082 2083 * next time sending since we might not have set v6dst yet.
2083 2084 */
2084 2085 connp->conn_v6lastdst = ipv6_all_zeros;
2085 2086
2086 2087 error = conn_build_hdr_template(connp, UDPH_SIZE, 0, v6src, v6dst,
2087 2088 flowinfo);
2088 2089 if (error != 0)
2089 2090 return (error);
2090 2091
2091 2092 /*
2092 2093 * Any routing header/option has been massaged. The checksum difference
2093 2094 * is stored in conn_sum.
2094 2095 */
2095 2096 udpha = (udpha_t *)connp->conn_ht_ulp;
2096 2097 udpha->uha_src_port = connp->conn_lport;
2097 2098 udpha->uha_dst_port = dstport;
2098 2099 udpha->uha_checksum = 0;
2099 2100 udpha->uha_length = htons(UDPH_SIZE); /* Filled in later */
2100 2101 return (0);
2101 2102 }
2102 2103
2103 2104 static mblk_t *
2104 2105 udp_queue_fallback(udp_t *udp, mblk_t *mp)
2105 2106 {
2106 2107 ASSERT(MUTEX_HELD(&udp->udp_recv_lock));
2107 2108 if (IPCL_IS_NONSTR(udp->udp_connp)) {
2108 2109 /*
2109 2110 * fallback has started but messages have not been moved yet
2110 2111 */
2111 2112 if (udp->udp_fallback_queue_head == NULL) {
2112 2113 ASSERT(udp->udp_fallback_queue_tail == NULL);
2113 2114 udp->udp_fallback_queue_head = mp;
2114 2115 udp->udp_fallback_queue_tail = mp;
2115 2116 } else {
2116 2117 ASSERT(udp->udp_fallback_queue_tail != NULL);
2117 2118 udp->udp_fallback_queue_tail->b_next = mp;
2118 2119 udp->udp_fallback_queue_tail = mp;
2119 2120 }
2120 2121 return (NULL);
2121 2122 } else {
2122 2123 /*
2123 2124 * Fallback completed, let the caller putnext() the mblk.
2124 2125 */
2125 2126 return (mp);
2126 2127 }
2127 2128 }
2128 2129
2129 2130 /*
2130 2131 * Deliver data to ULP. In case we have a socket, and it's falling back to
2131 2132 * TPI, then we'll queue the mp for later processing.
2132 2133 */
2133 2134 static void
2134 2135 udp_ulp_recv(conn_t *connp, mblk_t *mp, uint_t len, ip_recv_attr_t *ira)
2135 2136 {
2136 2137 if (IPCL_IS_NONSTR(connp)) {
2137 2138 udp_t *udp = connp->conn_udp;
2138 2139 int error;
2139 2140
2140 2141 ASSERT(len == msgdsize(mp));
2141 2142 if ((*connp->conn_upcalls->su_recv)
2142 2143 (connp->conn_upper_handle, mp, len, 0, &error, NULL) < 0) {
2143 2144 mutex_enter(&udp->udp_recv_lock);
2144 2145 if (error == ENOSPC) {
2145 2146 /*
2146 2147 * let's confirm while holding the lock
2147 2148 */
2148 2149 if ((*connp->conn_upcalls->su_recv)
2149 2150 (connp->conn_upper_handle, NULL, 0, 0,
2150 2151 &error, NULL) < 0) {
2151 2152 ASSERT(error == ENOSPC);
2152 2153 if (error == ENOSPC) {
2153 2154 connp->conn_flow_cntrld =
2154 2155 B_TRUE;
2155 2156 }
2156 2157 }
2157 2158 mutex_exit(&udp->udp_recv_lock);
2158 2159 } else {
2159 2160 ASSERT(error == EOPNOTSUPP);
2160 2161 mp = udp_queue_fallback(udp, mp);
2161 2162 mutex_exit(&udp->udp_recv_lock);
2162 2163 if (mp != NULL)
2163 2164 putnext(connp->conn_rq, mp);
2164 2165 }
2165 2166 }
2166 2167 ASSERT(MUTEX_NOT_HELD(&udp->udp_recv_lock));
2167 2168 } else {
2168 2169 if (is_system_labeled()) {
2169 2170 ASSERT(ira->ira_cred != NULL);
2170 2171 /*
2171 2172 * Provide for protocols above UDP such as RPC
2172 2173 * NOPID leaves db_cpid unchanged.
2173 2174 */
2174 2175 mblk_setcred(mp, ira->ira_cred, NOPID);
2175 2176 }
2176 2177
2177 2178 putnext(connp->conn_rq, mp);
2178 2179 }
2179 2180 }
2180 2181
2181 2182 /*
2182 2183 * This is the inbound data path.
2183 2184 * IP has already pulled up the IP plus UDP headers and verified alignment
2184 2185 * etc.
2185 2186 */
2186 2187 /* ARGSUSED2 */
2187 2188 static void
2188 2189 udp_input(void *arg1, mblk_t *mp, void *arg2, ip_recv_attr_t *ira)
2189 2190 {
2190 2191 conn_t *connp = (conn_t *)arg1;
2191 2192 struct T_unitdata_ind *tudi;
2192 2193 uchar_t *rptr; /* Pointer to IP header */
2193 2194 int hdr_length; /* Length of IP+UDP headers */
2194 2195 int udi_size; /* Size of T_unitdata_ind */
2195 2196 int pkt_len;
2196 2197 udp_t *udp;
2197 2198 udpha_t *udpha;
2198 2199 ip_pkt_t ipps;
2199 2200 ip6_t *ip6h;
2200 2201 mblk_t *mp1;
2201 2202 uint32_t udp_ipv4_options_len;
2202 2203 crb_t recv_ancillary;
2203 2204 udp_stack_t *us;
2204 2205
2205 2206 ASSERT(connp->conn_flags & IPCL_UDPCONN);
2206 2207
2207 2208 udp = connp->conn_udp;
2208 2209 us = udp->udp_us;
2209 2210 rptr = mp->b_rptr;
2210 2211
2211 2212 ASSERT(DB_TYPE(mp) == M_DATA);
2212 2213 ASSERT(OK_32PTR(rptr));
2213 2214 ASSERT(ira->ira_pktlen == msgdsize(mp));
2214 2215 pkt_len = ira->ira_pktlen;
2215 2216
2216 2217 /*
2217 2218 * Get a snapshot of these and allow other threads to change
2218 2219 * them after that. We need the same recv_ancillary when determining
2219 2220 * the size as when adding the ancillary data items.
2220 2221 */
2221 2222 mutex_enter(&connp->conn_lock);
2222 2223 udp_ipv4_options_len = udp->udp_recv_ipp.ipp_ipv4_options_len;
2223 2224 recv_ancillary = connp->conn_recv_ancillary;
2224 2225 mutex_exit(&connp->conn_lock);
2225 2226
2226 2227 hdr_length = ira->ira_ip_hdr_length;
2227 2228
2228 2229 /*
2229 2230 * IP inspected the UDP header thus all of it must be in the mblk.
2230 2231 * UDP length check is performed for IPv6 packets and IPv4 packets
2231 2232 * to check if the size of the packet as specified
2232 2233 * by the UDP header is the same as the length derived from the IP
2233 2234 * header.
2234 2235 */
2235 2236 udpha = (udpha_t *)(rptr + hdr_length);
2236 2237 if (pkt_len != ntohs(udpha->uha_length) + hdr_length)
2237 2238 goto tossit;
2238 2239
2239 2240 hdr_length += UDPH_SIZE;
2240 2241 ASSERT(MBLKL(mp) >= hdr_length); /* IP did a pullup */
2241 2242
2242 2243 /* Initialize regardless of IP version */
2243 2244 ipps.ipp_fields = 0;
2244 2245
2245 2246 if (((ira->ira_flags & IRAF_IPV4_OPTIONS) ||
2246 2247 udp_ipv4_options_len > 0) &&
2247 2248 connp->conn_family == AF_INET) {
2248 2249 int err;
2249 2250
2250 2251 /*
2251 2252 * Record/update udp_recv_ipp with the lock
2252 2253 * held. Not needed for AF_INET6 sockets
2253 2254 * since they don't support a getsockopt of IP_OPTIONS.
2254 2255 */
2255 2256 mutex_enter(&connp->conn_lock);
2256 2257 err = ip_find_hdr_v4((ipha_t *)rptr, &udp->udp_recv_ipp,
2257 2258 B_TRUE);
2258 2259 if (err != 0) {
2259 2260 /* Allocation failed. Drop packet */
2260 2261 mutex_exit(&connp->conn_lock);
2261 2262 freemsg(mp);
2262 2263 UDPS_BUMP_MIB(us, udpInErrors);
2263 2264 return;
2264 2265 }
2265 2266 mutex_exit(&connp->conn_lock);
2266 2267 }
2267 2268
2268 2269 if (recv_ancillary.crb_all != 0) {
2269 2270 /*
2270 2271 * Record packet information in the ip_pkt_t
2271 2272 */
2272 2273 if (ira->ira_flags & IRAF_IS_IPV4) {
2273 2274 ASSERT(IPH_HDR_VERSION(rptr) == IPV4_VERSION);
2274 2275 ASSERT(MBLKL(mp) >= sizeof (ipha_t));
2275 2276 ASSERT(((ipha_t *)rptr)->ipha_protocol == IPPROTO_UDP);
2276 2277 ASSERT(ira->ira_ip_hdr_length == IPH_HDR_LENGTH(rptr));
2277 2278
2278 2279 (void) ip_find_hdr_v4((ipha_t *)rptr, &ipps, B_FALSE);
2279 2280 } else {
2280 2281 uint8_t nexthdrp;
2281 2282
2282 2283 ASSERT(IPH_HDR_VERSION(rptr) == IPV6_VERSION);
2283 2284 /*
2284 2285 * IPv6 packets can only be received by applications
2285 2286 * that are prepared to receive IPv6 addresses.
2286 2287 * The IP fanout must ensure this.
2287 2288 */
2288 2289 ASSERT(connp->conn_family == AF_INET6);
2289 2290
2290 2291 ip6h = (ip6_t *)rptr;
2291 2292
2292 2293 /* We don't care about the length, but need the ipp */
2293 2294 hdr_length = ip_find_hdr_v6(mp, ip6h, B_TRUE, &ipps,
2294 2295 &nexthdrp);
2295 2296 ASSERT(hdr_length == ira->ira_ip_hdr_length);
2296 2297 /* Restore */
2297 2298 hdr_length = ira->ira_ip_hdr_length + UDPH_SIZE;
2298 2299 ASSERT(nexthdrp == IPPROTO_UDP);
2299 2300 }
2300 2301 }
2301 2302
2302 2303 /*
2303 2304 * This is the inbound data path. Packets are passed upstream as
2304 2305 * T_UNITDATA_IND messages.
2305 2306 */
2306 2307 if (connp->conn_family == AF_INET) {
2307 2308 sin_t *sin;
2308 2309
2309 2310 ASSERT(IPH_HDR_VERSION((ipha_t *)rptr) == IPV4_VERSION);
2310 2311
2311 2312 /*
2312 2313 * Normally only send up the source address.
2313 2314 * If any ancillary data items are wanted we add those.
2314 2315 */
2315 2316 udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin_t);
2316 2317 if (recv_ancillary.crb_all != 0) {
2317 2318 udi_size += conn_recvancillary_size(connp,
2318 2319 recv_ancillary, ira, mp, &ipps);
2319 2320 }
2320 2321
2321 2322 /* Allocate a message block for the T_UNITDATA_IND structure. */
2322 2323 mp1 = allocb(udi_size, BPRI_MED);
2323 2324 if (mp1 == NULL) {
2324 2325 freemsg(mp);
2325 2326 UDPS_BUMP_MIB(us, udpInErrors);
2326 2327 return;
2327 2328 }
2328 2329 mp1->b_cont = mp;
2329 2330 mp1->b_datap->db_type = M_PROTO;
2330 2331 tudi = (struct T_unitdata_ind *)mp1->b_rptr;
2331 2332 mp1->b_wptr = (uchar_t *)tudi + udi_size;
2332 2333 tudi->PRIM_type = T_UNITDATA_IND;
2333 2334 tudi->SRC_length = sizeof (sin_t);
2334 2335 tudi->SRC_offset = sizeof (struct T_unitdata_ind);
2335 2336 tudi->OPT_offset = sizeof (struct T_unitdata_ind) +
2336 2337 sizeof (sin_t);
2337 2338 udi_size -= (sizeof (struct T_unitdata_ind) + sizeof (sin_t));
2338 2339 tudi->OPT_length = udi_size;
2339 2340 sin = (sin_t *)&tudi[1];
2340 2341 sin->sin_addr.s_addr = ((ipha_t *)rptr)->ipha_src;
2341 2342 sin->sin_port = udpha->uha_src_port;
2342 2343 sin->sin_family = connp->conn_family;
2343 2344 *(uint32_t *)&sin->sin_zero[0] = 0;
2344 2345 *(uint32_t *)&sin->sin_zero[4] = 0;
2345 2346
2346 2347 /*
2347 2348 * Add options if IP_RECVDSTADDR, IP_RECVIF, IP_RECVSLLA or
2348 2349 * IP_RECVTTL has been set.
2349 2350 */
2350 2351 if (udi_size != 0) {
2351 2352 conn_recvancillary_add(connp, recv_ancillary, ira,
2352 2353 &ipps, (uchar_t *)&sin[1], udi_size);
2353 2354 }
2354 2355 } else {
2355 2356 sin6_t *sin6;
2356 2357
2357 2358 /*
2358 2359 * Handle both IPv4 and IPv6 packets for IPv6 sockets.
2359 2360 *
2360 2361 * Normally we only send up the address. If receiving of any
2361 2362 * optional receive side information is enabled, we also send
2362 2363 * that up as options.
2363 2364 */
2364 2365 udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin6_t);
2365 2366
2366 2367 if (recv_ancillary.crb_all != 0) {
2367 2368 udi_size += conn_recvancillary_size(connp,
2368 2369 recv_ancillary, ira, mp, &ipps);
2369 2370 }
2370 2371
2371 2372 mp1 = allocb(udi_size, BPRI_MED);
2372 2373 if (mp1 == NULL) {
2373 2374 freemsg(mp);
2374 2375 UDPS_BUMP_MIB(us, udpInErrors);
2375 2376 return;
2376 2377 }
2377 2378 mp1->b_cont = mp;
2378 2379 mp1->b_datap->db_type = M_PROTO;
2379 2380 tudi = (struct T_unitdata_ind *)mp1->b_rptr;
2380 2381 mp1->b_wptr = (uchar_t *)tudi + udi_size;
2381 2382 tudi->PRIM_type = T_UNITDATA_IND;
2382 2383 tudi->SRC_length = sizeof (sin6_t);
2383 2384 tudi->SRC_offset = sizeof (struct T_unitdata_ind);
2384 2385 tudi->OPT_offset = sizeof (struct T_unitdata_ind) +
2385 2386 sizeof (sin6_t);
2386 2387 udi_size -= (sizeof (struct T_unitdata_ind) + sizeof (sin6_t));
2387 2388 tudi->OPT_length = udi_size;
2388 2389 sin6 = (sin6_t *)&tudi[1];
2389 2390 if (ira->ira_flags & IRAF_IS_IPV4) {
2390 2391 in6_addr_t v6dst;
2391 2392
2392 2393 IN6_IPADDR_TO_V4MAPPED(((ipha_t *)rptr)->ipha_src,
2393 2394 &sin6->sin6_addr);
2394 2395 IN6_IPADDR_TO_V4MAPPED(((ipha_t *)rptr)->ipha_dst,
2395 2396 &v6dst);
2396 2397 sin6->sin6_flowinfo = 0;
2397 2398 sin6->sin6_scope_id = 0;
2398 2399 sin6->__sin6_src_id = ip_srcid_find_addr(&v6dst,
2399 2400 IPCL_ZONEID(connp), us->us_netstack);
2400 2401 } else {
2401 2402 ip6h = (ip6_t *)rptr;
2402 2403
2403 2404 sin6->sin6_addr = ip6h->ip6_src;
2404 2405 /* No sin6_flowinfo per API */
2405 2406 sin6->sin6_flowinfo = 0;
2406 2407 /* For link-scope pass up scope id */
2407 2408 if (IN6_IS_ADDR_LINKSCOPE(&ip6h->ip6_src))
2408 2409 sin6->sin6_scope_id = ira->ira_ruifindex;
2409 2410 else
2410 2411 sin6->sin6_scope_id = 0;
2411 2412 sin6->__sin6_src_id = ip_srcid_find_addr(
2412 2413 &ip6h->ip6_dst, IPCL_ZONEID(connp),
2413 2414 us->us_netstack);
2414 2415 }
2415 2416 sin6->sin6_port = udpha->uha_src_port;
2416 2417 sin6->sin6_family = connp->conn_family;
2417 2418
2418 2419 if (udi_size != 0) {
2419 2420 conn_recvancillary_add(connp, recv_ancillary, ira,
2420 2421 &ipps, (uchar_t *)&sin6[1], udi_size);
2421 2422 }
2422 2423 }
2423 2424
2424 2425 /*
2425 2426 * DTrace this UDP input as udp:::receive (this is for IPv4, IPv6 and
2426 2427 * loopback traffic).
2427 2428 */
2428 2429 DTRACE_UDP5(receive, mblk_t *, NULL, ip_xmit_attr_t *, connp->conn_ixa,
2429 2430 void_ip_t *, rptr, udp_t *, udp, udpha_t *, udpha);
2430 2431
2431 2432 /* Walk past the headers unless IP_RECVHDR was set. */
2432 2433 if (!udp->udp_rcvhdr) {
2433 2434 mp->b_rptr = rptr + hdr_length;
2434 2435 pkt_len -= hdr_length;
2435 2436 }
2436 2437
2437 2438 UDPS_BUMP_MIB(us, udpHCInDatagrams);
2438 2439 udp_ulp_recv(connp, mp1, pkt_len, ira);
2439 2440 return;
2440 2441
2441 2442 tossit:
2442 2443 freemsg(mp);
2443 2444 UDPS_BUMP_MIB(us, udpInErrors);
2444 2445 }
2445 2446
2446 2447 /*
2447 2448 * This routine creates a T_UDERROR_IND message and passes it upstream.
2448 2449 * The address and options are copied from the T_UNITDATA_REQ message
2449 2450 * passed in mp. This message is freed.
2450 2451 */
2451 2452 static void
2452 2453 udp_ud_err(queue_t *q, mblk_t *mp, t_scalar_t err)
2453 2454 {
2454 2455 struct T_unitdata_req *tudr;
2455 2456 mblk_t *mp1;
2456 2457 uchar_t *destaddr;
2457 2458 t_scalar_t destlen;
2458 2459 uchar_t *optaddr;
2459 2460 t_scalar_t optlen;
2460 2461
2461 2462 if ((mp->b_wptr < mp->b_rptr) ||
2462 2463 (MBLKL(mp)) < sizeof (struct T_unitdata_req)) {
2463 2464 goto done;
2464 2465 }
2465 2466 tudr = (struct T_unitdata_req *)mp->b_rptr;
2466 2467 destaddr = mp->b_rptr + tudr->DEST_offset;
2467 2468 if (destaddr < mp->b_rptr || destaddr >= mp->b_wptr ||
2468 2469 destaddr + tudr->DEST_length < mp->b_rptr ||
2469 2470 destaddr + tudr->DEST_length > mp->b_wptr) {
2470 2471 goto done;
2471 2472 }
2472 2473 optaddr = mp->b_rptr + tudr->OPT_offset;
2473 2474 if (optaddr < mp->b_rptr || optaddr >= mp->b_wptr ||
2474 2475 optaddr + tudr->OPT_length < mp->b_rptr ||
2475 2476 optaddr + tudr->OPT_length > mp->b_wptr) {
2476 2477 goto done;
2477 2478 }
2478 2479 destlen = tudr->DEST_length;
2479 2480 optlen = tudr->OPT_length;
2480 2481
2481 2482 mp1 = mi_tpi_uderror_ind((char *)destaddr, destlen,
2482 2483 (char *)optaddr, optlen, err);
2483 2484 if (mp1 != NULL)
2484 2485 qreply(q, mp1);
2485 2486
2486 2487 done:
2487 2488 freemsg(mp);
2488 2489 }
2489 2490
2490 2491 /*
2491 2492 * This routine removes a port number association from a stream. It
2492 2493 * is called by udp_wput to handle T_UNBIND_REQ messages.
2493 2494 */
2494 2495 static void
2495 2496 udp_tpi_unbind(queue_t *q, mblk_t *mp)
2496 2497 {
2497 2498 conn_t *connp = Q_TO_CONN(q);
2498 2499 int error;
2499 2500
2500 2501 error = udp_do_unbind(connp);
2501 2502 if (error) {
2502 2503 if (error < 0)
2503 2504 udp_err_ack(q, mp, -error, 0);
2504 2505 else
2505 2506 udp_err_ack(q, mp, TSYSERR, error);
2506 2507 return;
2507 2508 }
2508 2509
2509 2510 mp = mi_tpi_ok_ack_alloc(mp);
2510 2511 ASSERT(mp != NULL);
2511 2512 ASSERT(((struct T_ok_ack *)mp->b_rptr)->PRIM_type == T_OK_ACK);
2512 2513 qreply(q, mp);
2513 2514 }
2514 2515
2515 2516 /*
2516 2517 * Don't let port fall into the privileged range.
2517 2518 * Since the extra privileged ports can be arbitrary we also
2518 2519 * ensure that we exclude those from consideration.
2519 2520 * us->us_epriv_ports is not sorted thus we loop over it until
2520 2521 * there are no changes.
2521 2522 */
2522 2523 static in_port_t
2523 2524 udp_update_next_port(udp_t *udp, in_port_t port, boolean_t random)
2524 2525 {
2525 2526 int i, bump;
2526 2527 in_port_t nextport;
2527 2528 boolean_t restart = B_FALSE;
2528 2529 udp_stack_t *us = udp->udp_us;
2529 2530
2530 2531 if (random && udp_random_anon_port != 0) {
2531 2532 (void) random_get_pseudo_bytes((uint8_t *)&port,
2532 2533 sizeof (in_port_t));
2533 2534 /*
2534 2535 * Unless changed by a sys admin, the smallest anon port
2535 2536 * is 32768 and the largest anon port is 65535. It is
2536 2537 * very likely (50%) for the random port to be smaller
2537 2538 * than the smallest anon port. When that happens,
2538 2539 * add port % (anon port range) to the smallest anon
2539 2540 * port to get the random port. It should fall into the
2540 2541 * valid anon port range.
2541 2542 */
2542 2543 if ((port < us->us_smallest_anon_port) ||
2543 2544 (port > us->us_largest_anon_port)) {
2544 2545 if (us->us_smallest_anon_port ==
2545 2546 us->us_largest_anon_port) {
2546 2547 bump = 0;
2547 2548 } else {
2548 2549 bump = port % (us->us_largest_anon_port -
2549 2550 us->us_smallest_anon_port);
2550 2551 }
2551 2552
2552 2553 port = us->us_smallest_anon_port + bump;
2553 2554 }
2554 2555 }
2555 2556
2556 2557 retry:
2557 2558 if (port < us->us_smallest_anon_port)
2558 2559 port = us->us_smallest_anon_port;
2559 2560
2560 2561 if (port > us->us_largest_anon_port) {
2561 2562 port = us->us_smallest_anon_port;
2562 2563 if (restart)
2563 2564 return (0);
2564 2565 restart = B_TRUE;
2565 2566 }
2566 2567
2567 2568 if (port < us->us_smallest_nonpriv_port)
2568 2569 port = us->us_smallest_nonpriv_port;
2569 2570
2570 2571 for (i = 0; i < us->us_num_epriv_ports; i++) {
2571 2572 if (port == us->us_epriv_ports[i]) {
2572 2573 port++;
2573 2574 /*
2574 2575 * Make sure that the port is in the
2575 2576 * valid range.
2576 2577 */
2577 2578 goto retry;
2578 2579 }
2579 2580 }
2580 2581
2581 2582 if (is_system_labeled() &&
2582 2583 (nextport = tsol_next_port(crgetzone(udp->udp_connp->conn_cred),
2583 2584 port, IPPROTO_UDP, B_TRUE)) != 0) {
2584 2585 port = nextport;
2585 2586 goto retry;
2586 2587 }
2587 2588
2588 2589 return (port);
2589 2590 }
2590 2591
2591 2592 /*
2592 2593 * Handle T_UNITDATA_REQ with options. Both IPv4 and IPv6
2593 2594 * Either tudr_mp or msg is set. If tudr_mp we take ancillary data from
2594 2595 * the TPI options, otherwise we take them from msg_control.
2595 2596 * If both sin and sin6 is set it is a connected socket and we use conn_faddr.
2596 2597 * Always consumes mp; never consumes tudr_mp.
2597 2598 */
2598 2599 static int
2599 2600 udp_output_ancillary(conn_t *connp, sin_t *sin, sin6_t *sin6, mblk_t *mp,
2600 2601 mblk_t *tudr_mp, struct nmsghdr *msg, cred_t *cr, pid_t pid)
2601 2602 {
2602 2603 udp_t *udp = connp->conn_udp;
2603 2604 udp_stack_t *us = udp->udp_us;
2604 2605 int error;
2605 2606 ip_xmit_attr_t *ixa;
2606 2607 ip_pkt_t *ipp;
2607 2608 in6_addr_t v6src;
2608 2609 in6_addr_t v6dst;
2609 2610 in6_addr_t v6nexthop;
2610 2611 in_port_t dstport;
2611 2612 uint32_t flowinfo;
2612 2613 uint_t srcid;
2613 2614 int is_absreq_failure = 0;
2614 2615 conn_opt_arg_t coas, *coa;
2615 2616
2616 2617 ASSERT(tudr_mp != NULL || msg != NULL);
2617 2618
2618 2619 /*
2619 2620 * Get ixa before checking state to handle a disconnect race.
2620 2621 *
2621 2622 * We need an exclusive copy of conn_ixa since the ancillary data
2622 2623 * options might modify it. That copy has no pointers hence we
2623 2624 * need to set them up once we've parsed the ancillary data.
2624 2625 */
2625 2626 ixa = conn_get_ixa_exclusive(connp);
2626 2627 if (ixa == NULL) {
2627 2628 UDPS_BUMP_MIB(us, udpOutErrors);
2628 2629 freemsg(mp);
2629 2630 return (ENOMEM);
2630 2631 }
2631 2632 ASSERT(cr != NULL);
2632 2633 ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED));
2633 2634 ixa->ixa_cred = cr;
2634 2635 ixa->ixa_cpid = pid;
2635 2636 if (is_system_labeled()) {
2636 2637 /* We need to restart with a label based on the cred */
2637 2638 ip_xmit_attr_restore_tsl(ixa, ixa->ixa_cred);
2638 2639 }
2639 2640
2640 2641 /* In case previous destination was multicast or multirt */
2641 2642 ip_attr_newdst(ixa);
2642 2643
2643 2644 /* Get a copy of conn_xmit_ipp since the options might change it */
2644 2645 ipp = kmem_zalloc(sizeof (*ipp), KM_NOSLEEP);
2645 2646 if (ipp == NULL) {
2646 2647 ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED));
2647 2648 ixa->ixa_cred = connp->conn_cred; /* Restore */
2648 2649 ixa->ixa_cpid = connp->conn_cpid;
2649 2650 ixa_refrele(ixa);
2650 2651 UDPS_BUMP_MIB(us, udpOutErrors);
2651 2652 freemsg(mp);
2652 2653 return (ENOMEM);
2653 2654 }
2654 2655 mutex_enter(&connp->conn_lock);
2655 2656 error = ip_pkt_copy(&connp->conn_xmit_ipp, ipp, KM_NOSLEEP);
2656 2657 mutex_exit(&connp->conn_lock);
2657 2658 if (error != 0) {
2658 2659 UDPS_BUMP_MIB(us, udpOutErrors);
2659 2660 freemsg(mp);
2660 2661 goto done;
2661 2662 }
2662 2663
2663 2664 /*
2664 2665 * Parse the options and update ixa and ipp as a result.
2665 2666 * Note that ixa_tsl can be updated if SCM_UCRED.
2666 2667 * ixa_refrele/ixa_inactivate will release any reference on ixa_tsl.
2667 2668 */
2668 2669
2669 2670 coa = &coas;
2670 2671 coa->coa_connp = connp;
2671 2672 coa->coa_ixa = ixa;
2672 2673 coa->coa_ipp = ipp;
2673 2674 coa->coa_ancillary = B_TRUE;
2674 2675 coa->coa_changed = 0;
2675 2676
2676 2677 if (msg != NULL) {
2677 2678 error = process_auxiliary_options(connp, msg->msg_control,
2678 2679 msg->msg_controllen, coa, &udp_opt_obj, udp_opt_set, cr);
2679 2680 } else {
2680 2681 struct T_unitdata_req *tudr;
2681 2682
2682 2683 tudr = (struct T_unitdata_req *)tudr_mp->b_rptr;
2683 2684 ASSERT(tudr->PRIM_type == T_UNITDATA_REQ);
2684 2685 error = tpi_optcom_buf(connp->conn_wq, tudr_mp,
2685 2686 &tudr->OPT_length, tudr->OPT_offset, cr, &udp_opt_obj,
2686 2687 coa, &is_absreq_failure);
2687 2688 }
2688 2689 if (error != 0) {
2689 2690 /*
2690 2691 * Note: No special action needed in this
2691 2692 * module for "is_absreq_failure"
2692 2693 */
2693 2694 freemsg(mp);
2694 2695 UDPS_BUMP_MIB(us, udpOutErrors);
2695 2696 goto done;
2696 2697 }
2697 2698 ASSERT(is_absreq_failure == 0);
2698 2699
2699 2700 mutex_enter(&connp->conn_lock);
2700 2701 /*
2701 2702 * If laddr is unspecified then we look at sin6_src_id.
2702 2703 * We will give precedence to a source address set with IPV6_PKTINFO
2703 2704 * (aka IPPF_ADDR) but that is handled in build_hdrs. However, we don't
2704 2705 * want ip_attr_connect to select a source (since it can fail) when
2705 2706 * IPV6_PKTINFO is specified.
2706 2707 * If this doesn't result in a source address then we get a source
|
↓ open down ↓ |
2673 lines elided |
↑ open up ↑ |
2707 2708 * from ip_attr_connect() below.
2708 2709 */
2709 2710 v6src = connp->conn_saddr_v6;
2710 2711 if (sin != NULL) {
2711 2712 IN6_IPADDR_TO_V4MAPPED(sin->sin_addr.s_addr, &v6dst);
2712 2713 dstport = sin->sin_port;
2713 2714 flowinfo = 0;
2714 2715 ixa->ixa_flags &= ~IXAF_SCOPEID_SET;
2715 2716 ixa->ixa_flags |= IXAF_IS_IPV4;
2716 2717 } else if (sin6 != NULL) {
2718 + boolean_t v4mapped;
2719 +
2717 2720 v6dst = sin6->sin6_addr;
2718 2721 dstport = sin6->sin6_port;
2719 2722 flowinfo = sin6->sin6_flowinfo;
2720 2723 srcid = sin6->__sin6_src_id;
2721 2724 if (IN6_IS_ADDR_LINKSCOPE(&v6dst) && sin6->sin6_scope_id != 0) {
2722 2725 ixa->ixa_scopeid = sin6->sin6_scope_id;
2723 2726 ixa->ixa_flags |= IXAF_SCOPEID_SET;
2724 2727 } else {
2725 2728 ixa->ixa_flags &= ~IXAF_SCOPEID_SET;
2726 2729 }
2727 - if (srcid != 0 && IN6_IS_ADDR_UNSPECIFIED(&v6src)) {
2728 - ip_srcid_find_id(srcid, &v6src, IPCL_ZONEID(connp),
2729 - connp->conn_netstack);
2730 - }
2731 - if (IN6_IS_ADDR_V4MAPPED(&v6dst))
2730 + v4mapped = IN6_IS_ADDR_V4MAPPED(&v6dst);
2731 + if (v4mapped)
2732 2732 ixa->ixa_flags |= IXAF_IS_IPV4;
2733 2733 else
2734 2734 ixa->ixa_flags &= ~IXAF_IS_IPV4;
2735 + if (srcid != 0 && IN6_IS_ADDR_UNSPECIFIED(&v6src)) {
2736 + if (ip_srcid_find_id(srcid, &v6src, IPCL_ZONEID(connp),
2737 + v4mapped, connp->conn_netstack)) {
2738 + /* Mismatch - v4mapped/v6 specified by srcid. */
2739 + mutex_exit(&connp->conn_lock);
2740 + error = EADDRNOTAVAIL;
2741 + goto failed; /* Does freemsg() and mib. */
2742 + }
2743 + }
2735 2744 } else {
2736 2745 /* Connected case */
2737 2746 v6dst = connp->conn_faddr_v6;
2738 2747 dstport = connp->conn_fport;
2739 2748 flowinfo = connp->conn_flowinfo;
2740 2749 }
2741 2750 mutex_exit(&connp->conn_lock);
2742 2751
2743 2752 /* Handle IP_PKTINFO/IPV6_PKTINFO setting source address. */
2744 2753 if (ipp->ipp_fields & IPPF_ADDR) {
2745 2754 if (ixa->ixa_flags & IXAF_IS_IPV4) {
2746 2755 if (IN6_IS_ADDR_V4MAPPED(&ipp->ipp_addr))
2747 2756 v6src = ipp->ipp_addr;
2748 2757 } else {
2749 2758 if (!IN6_IS_ADDR_V4MAPPED(&ipp->ipp_addr))
2750 2759 v6src = ipp->ipp_addr;
2751 2760 }
2752 2761 }
2753 2762
2754 2763 ip_attr_nexthop(ipp, ixa, &v6dst, &v6nexthop);
2755 2764 error = ip_attr_connect(connp, ixa, &v6src, &v6dst, &v6nexthop, dstport,
2756 2765 &v6src, NULL, IPDF_ALLOW_MCBC | IPDF_VERIFY_DST | IPDF_IPSEC);
2757 2766
2758 2767 switch (error) {
2759 2768 case 0:
2760 2769 break;
2761 2770 case EADDRNOTAVAIL:
2762 2771 /*
2763 2772 * IXAF_VERIFY_SOURCE tells us to pick a better source.
2764 2773 * Don't have the application see that errno
2765 2774 */
2766 2775 error = ENETUNREACH;
2767 2776 goto failed;
2768 2777 case ENETDOWN:
2769 2778 /*
2770 2779 * Have !ipif_addr_ready address; drop packet silently
2771 2780 * until we can get applications to not send until we
2772 2781 * are ready.
2773 2782 */
2774 2783 error = 0;
2775 2784 goto failed;
2776 2785 case EHOSTUNREACH:
2777 2786 case ENETUNREACH:
2778 2787 if (ixa->ixa_ire != NULL) {
2779 2788 /*
2780 2789 * Let conn_ip_output/ire_send_noroute return
2781 2790 * the error and send any local ICMP error.
2782 2791 */
2783 2792 error = 0;
2784 2793 break;
2785 2794 }
2786 2795 /* FALLTHRU */
2787 2796 default:
2788 2797 failed:
2789 2798 freemsg(mp);
2790 2799 UDPS_BUMP_MIB(us, udpOutErrors);
2791 2800 goto done;
2792 2801 }
2793 2802
2794 2803 /*
2795 2804 * We might be going to a different destination than last time,
2796 2805 * thus check that TX allows the communication and compute any
2797 2806 * needed label.
2798 2807 *
2799 2808 * TSOL Note: We have an exclusive ipp and ixa for this thread so we
2800 2809 * don't have to worry about concurrent threads.
2801 2810 */
2802 2811 if (is_system_labeled()) {
2803 2812 /* Using UDP MLP requires SCM_UCRED from user */
2804 2813 if (connp->conn_mlp_type != mlptSingle &&
2805 2814 !((ixa->ixa_flags & IXAF_UCRED_TSL))) {
2806 2815 UDPS_BUMP_MIB(us, udpOutErrors);
2807 2816 error = ECONNREFUSED;
2808 2817 freemsg(mp);
2809 2818 goto done;
2810 2819 }
2811 2820 /*
2812 2821 * Check whether Trusted Solaris policy allows communication
2813 2822 * with this host, and pretend that the destination is
2814 2823 * unreachable if not.
2815 2824 * Compute any needed label and place it in ipp_label_v4/v6.
2816 2825 *
2817 2826 * Later conn_build_hdr_template/conn_prepend_hdr takes
2818 2827 * ipp_label_v4/v6 to form the packet.
2819 2828 *
2820 2829 * Tsol note: We have ipp structure local to this thread so
2821 2830 * no locking is needed.
2822 2831 */
2823 2832 error = conn_update_label(connp, ixa, &v6dst, ipp);
2824 2833 if (error != 0) {
2825 2834 freemsg(mp);
2826 2835 UDPS_BUMP_MIB(us, udpOutErrors);
2827 2836 goto done;
2828 2837 }
2829 2838 }
2830 2839 mp = udp_prepend_hdr(connp, ixa, ipp, &v6src, &v6dst, dstport,
2831 2840 flowinfo, mp, &error);
2832 2841 if (mp == NULL) {
2833 2842 ASSERT(error != 0);
2834 2843 UDPS_BUMP_MIB(us, udpOutErrors);
2835 2844 goto done;
2836 2845 }
2837 2846 if (ixa->ixa_pktlen > IP_MAXPACKET) {
2838 2847 error = EMSGSIZE;
2839 2848 UDPS_BUMP_MIB(us, udpOutErrors);
2840 2849 freemsg(mp);
2841 2850 goto done;
2842 2851 }
2843 2852 /* We're done. Pass the packet to ip. */
2844 2853 UDPS_BUMP_MIB(us, udpHCOutDatagrams);
2845 2854
2846 2855 DTRACE_UDP5(send, mblk_t *, NULL, ip_xmit_attr_t *, ixa,
2847 2856 void_ip_t *, mp->b_rptr, udp_t *, udp, udpha_t *,
2848 2857 &mp->b_rptr[ixa->ixa_ip_hdr_length]);
2849 2858
2850 2859 error = conn_ip_output(mp, ixa);
2851 2860 /* No udpOutErrors if an error since IP increases its error counter */
2852 2861 switch (error) {
2853 2862 case 0:
2854 2863 break;
2855 2864 case EWOULDBLOCK:
2856 2865 (void) ixa_check_drain_insert(connp, ixa);
2857 2866 error = 0;
2858 2867 break;
2859 2868 case EADDRNOTAVAIL:
2860 2869 /*
2861 2870 * IXAF_VERIFY_SOURCE tells us to pick a better source.
2862 2871 * Don't have the application see that errno
2863 2872 */
2864 2873 error = ENETUNREACH;
2865 2874 /* FALLTHRU */
2866 2875 default:
2867 2876 mutex_enter(&connp->conn_lock);
2868 2877 /*
2869 2878 * Clear the source and v6lastdst so we call ip_attr_connect
2870 2879 * for the next packet and try to pick a better source.
2871 2880 */
2872 2881 if (connp->conn_mcbc_bind)
2873 2882 connp->conn_saddr_v6 = ipv6_all_zeros;
2874 2883 else
2875 2884 connp->conn_saddr_v6 = connp->conn_bound_addr_v6;
2876 2885 connp->conn_v6lastdst = ipv6_all_zeros;
2877 2886 mutex_exit(&connp->conn_lock);
2878 2887 break;
2879 2888 }
2880 2889 done:
2881 2890 ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED));
2882 2891 ixa->ixa_cred = connp->conn_cred; /* Restore */
2883 2892 ixa->ixa_cpid = connp->conn_cpid;
2884 2893 ixa_refrele(ixa);
2885 2894 ip_pkt_free(ipp);
2886 2895 kmem_free(ipp, sizeof (*ipp));
2887 2896 return (error);
2888 2897 }
2889 2898
2890 2899 /*
2891 2900 * Handle sending an M_DATA for a connected socket.
2892 2901 * Handles both IPv4 and IPv6.
2893 2902 */
2894 2903 static int
2895 2904 udp_output_connected(conn_t *connp, mblk_t *mp, cred_t *cr, pid_t pid)
2896 2905 {
2897 2906 udp_t *udp = connp->conn_udp;
2898 2907 udp_stack_t *us = udp->udp_us;
2899 2908 int error;
2900 2909 ip_xmit_attr_t *ixa;
2901 2910
2902 2911 /*
2903 2912 * If no other thread is using conn_ixa this just gets a reference to
2904 2913 * conn_ixa. Otherwise we get a safe copy of conn_ixa.
2905 2914 */
2906 2915 ixa = conn_get_ixa(connp, B_FALSE);
2907 2916 if (ixa == NULL) {
2908 2917 UDPS_BUMP_MIB(us, udpOutErrors);
2909 2918 freemsg(mp);
2910 2919 return (ENOMEM);
2911 2920 }
2912 2921
2913 2922 ASSERT(cr != NULL);
2914 2923 ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED));
2915 2924 ixa->ixa_cred = cr;
2916 2925 ixa->ixa_cpid = pid;
2917 2926
2918 2927 mutex_enter(&connp->conn_lock);
2919 2928 mp = udp_prepend_header_template(connp, ixa, mp, &connp->conn_saddr_v6,
2920 2929 connp->conn_fport, connp->conn_flowinfo, &error);
2921 2930
2922 2931 if (mp == NULL) {
2923 2932 ASSERT(error != 0);
2924 2933 mutex_exit(&connp->conn_lock);
2925 2934 ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED));
2926 2935 ixa->ixa_cred = connp->conn_cred; /* Restore */
2927 2936 ixa->ixa_cpid = connp->conn_cpid;
2928 2937 ixa_refrele(ixa);
2929 2938 UDPS_BUMP_MIB(us, udpOutErrors);
2930 2939 freemsg(mp);
2931 2940 return (error);
2932 2941 }
2933 2942
2934 2943 /*
2935 2944 * In case we got a safe copy of conn_ixa, or if opt_set made us a new
2936 2945 * safe copy, then we need to fill in any pointers in it.
2937 2946 */
2938 2947 if (ixa->ixa_ire == NULL) {
2939 2948 in6_addr_t faddr, saddr;
2940 2949 in6_addr_t nexthop;
2941 2950 in_port_t fport;
2942 2951
2943 2952 saddr = connp->conn_saddr_v6;
2944 2953 faddr = connp->conn_faddr_v6;
2945 2954 fport = connp->conn_fport;
2946 2955 ip_attr_nexthop(&connp->conn_xmit_ipp, ixa, &faddr, &nexthop);
2947 2956 mutex_exit(&connp->conn_lock);
2948 2957
2949 2958 error = ip_attr_connect(connp, ixa, &saddr, &faddr, &nexthop,
2950 2959 fport, NULL, NULL, IPDF_ALLOW_MCBC | IPDF_VERIFY_DST |
2951 2960 IPDF_IPSEC);
2952 2961 switch (error) {
2953 2962 case 0:
2954 2963 break;
2955 2964 case EADDRNOTAVAIL:
2956 2965 /*
2957 2966 * IXAF_VERIFY_SOURCE tells us to pick a better source.
2958 2967 * Don't have the application see that errno
2959 2968 */
2960 2969 error = ENETUNREACH;
2961 2970 goto failed;
2962 2971 case ENETDOWN:
2963 2972 /*
2964 2973 * Have !ipif_addr_ready address; drop packet silently
2965 2974 * until we can get applications to not send until we
2966 2975 * are ready.
2967 2976 */
2968 2977 error = 0;
2969 2978 goto failed;
2970 2979 case EHOSTUNREACH:
2971 2980 case ENETUNREACH:
2972 2981 if (ixa->ixa_ire != NULL) {
2973 2982 /*
2974 2983 * Let conn_ip_output/ire_send_noroute return
2975 2984 * the error and send any local ICMP error.
2976 2985 */
2977 2986 error = 0;
2978 2987 break;
2979 2988 }
2980 2989 /* FALLTHRU */
2981 2990 default:
2982 2991 failed:
2983 2992 ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED));
2984 2993 ixa->ixa_cred = connp->conn_cred; /* Restore */
2985 2994 ixa->ixa_cpid = connp->conn_cpid;
2986 2995 ixa_refrele(ixa);
2987 2996 freemsg(mp);
2988 2997 UDPS_BUMP_MIB(us, udpOutErrors);
2989 2998 return (error);
2990 2999 }
2991 3000 } else {
2992 3001 /* Done with conn_t */
2993 3002 mutex_exit(&connp->conn_lock);
2994 3003 }
2995 3004 ASSERT(ixa->ixa_ire != NULL);
2996 3005
2997 3006 /* We're done. Pass the packet to ip. */
2998 3007 UDPS_BUMP_MIB(us, udpHCOutDatagrams);
2999 3008
3000 3009 DTRACE_UDP5(send, mblk_t *, NULL, ip_xmit_attr_t *, ixa,
3001 3010 void_ip_t *, mp->b_rptr, udp_t *, udp, udpha_t *,
3002 3011 &mp->b_rptr[ixa->ixa_ip_hdr_length]);
3003 3012
3004 3013 error = conn_ip_output(mp, ixa);
3005 3014 /* No udpOutErrors if an error since IP increases its error counter */
3006 3015 switch (error) {
3007 3016 case 0:
3008 3017 break;
3009 3018 case EWOULDBLOCK:
3010 3019 (void) ixa_check_drain_insert(connp, ixa);
3011 3020 error = 0;
3012 3021 break;
3013 3022 case EADDRNOTAVAIL:
3014 3023 /*
3015 3024 * IXAF_VERIFY_SOURCE tells us to pick a better source.
3016 3025 * Don't have the application see that errno
3017 3026 */
3018 3027 error = ENETUNREACH;
3019 3028 break;
3020 3029 }
3021 3030 ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED));
3022 3031 ixa->ixa_cred = connp->conn_cred; /* Restore */
3023 3032 ixa->ixa_cpid = connp->conn_cpid;
3024 3033 ixa_refrele(ixa);
3025 3034 return (error);
3026 3035 }
3027 3036
3028 3037 /*
3029 3038 * Handle sending an M_DATA to the last destination.
3030 3039 * Handles both IPv4 and IPv6.
3031 3040 *
3032 3041 * NOTE: The caller must hold conn_lock and we drop it here.
3033 3042 */
3034 3043 static int
3035 3044 udp_output_lastdst(conn_t *connp, mblk_t *mp, cred_t *cr, pid_t pid,
3036 3045 ip_xmit_attr_t *ixa)
3037 3046 {
3038 3047 udp_t *udp = connp->conn_udp;
3039 3048 udp_stack_t *us = udp->udp_us;
3040 3049 int error;
3041 3050
3042 3051 ASSERT(MUTEX_HELD(&connp->conn_lock));
3043 3052 ASSERT(ixa != NULL);
3044 3053
3045 3054 ASSERT(cr != NULL);
3046 3055 ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED));
3047 3056 ixa->ixa_cred = cr;
3048 3057 ixa->ixa_cpid = pid;
3049 3058
3050 3059 mp = udp_prepend_header_template(connp, ixa, mp, &connp->conn_v6lastsrc,
3051 3060 connp->conn_lastdstport, connp->conn_lastflowinfo, &error);
3052 3061
3053 3062 if (mp == NULL) {
3054 3063 ASSERT(error != 0);
3055 3064 mutex_exit(&connp->conn_lock);
3056 3065 ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED));
3057 3066 ixa->ixa_cred = connp->conn_cred; /* Restore */
3058 3067 ixa->ixa_cpid = connp->conn_cpid;
3059 3068 ixa_refrele(ixa);
3060 3069 UDPS_BUMP_MIB(us, udpOutErrors);
3061 3070 freemsg(mp);
3062 3071 return (error);
3063 3072 }
3064 3073
3065 3074 /*
3066 3075 * In case we got a safe copy of conn_ixa, or if opt_set made us a new
3067 3076 * safe copy, then we need to fill in any pointers in it.
3068 3077 */
3069 3078 if (ixa->ixa_ire == NULL) {
3070 3079 in6_addr_t lastdst, lastsrc;
3071 3080 in6_addr_t nexthop;
3072 3081 in_port_t lastport;
3073 3082
3074 3083 lastsrc = connp->conn_v6lastsrc;
3075 3084 lastdst = connp->conn_v6lastdst;
3076 3085 lastport = connp->conn_lastdstport;
3077 3086 ip_attr_nexthop(&connp->conn_xmit_ipp, ixa, &lastdst, &nexthop);
3078 3087 mutex_exit(&connp->conn_lock);
3079 3088
3080 3089 error = ip_attr_connect(connp, ixa, &lastsrc, &lastdst,
3081 3090 &nexthop, lastport, NULL, NULL, IPDF_ALLOW_MCBC |
3082 3091 IPDF_VERIFY_DST | IPDF_IPSEC);
3083 3092 switch (error) {
3084 3093 case 0:
3085 3094 break;
3086 3095 case EADDRNOTAVAIL:
3087 3096 /*
3088 3097 * IXAF_VERIFY_SOURCE tells us to pick a better source.
3089 3098 * Don't have the application see that errno
3090 3099 */
3091 3100 error = ENETUNREACH;
3092 3101 goto failed;
3093 3102 case ENETDOWN:
3094 3103 /*
3095 3104 * Have !ipif_addr_ready address; drop packet silently
3096 3105 * until we can get applications to not send until we
3097 3106 * are ready.
3098 3107 */
3099 3108 error = 0;
3100 3109 goto failed;
3101 3110 case EHOSTUNREACH:
3102 3111 case ENETUNREACH:
3103 3112 if (ixa->ixa_ire != NULL) {
3104 3113 /*
3105 3114 * Let conn_ip_output/ire_send_noroute return
3106 3115 * the error and send any local ICMP error.
3107 3116 */
3108 3117 error = 0;
3109 3118 break;
3110 3119 }
3111 3120 /* FALLTHRU */
3112 3121 default:
3113 3122 failed:
3114 3123 ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED));
3115 3124 ixa->ixa_cred = connp->conn_cred; /* Restore */
3116 3125 ixa->ixa_cpid = connp->conn_cpid;
3117 3126 ixa_refrele(ixa);
3118 3127 freemsg(mp);
3119 3128 UDPS_BUMP_MIB(us, udpOutErrors);
3120 3129 return (error);
3121 3130 }
3122 3131 } else {
3123 3132 /* Done with conn_t */
3124 3133 mutex_exit(&connp->conn_lock);
3125 3134 }
3126 3135
3127 3136 /* We're done. Pass the packet to ip. */
3128 3137 UDPS_BUMP_MIB(us, udpHCOutDatagrams);
3129 3138
3130 3139 DTRACE_UDP5(send, mblk_t *, NULL, ip_xmit_attr_t *, ixa,
3131 3140 void_ip_t *, mp->b_rptr, udp_t *, udp, udpha_t *,
3132 3141 &mp->b_rptr[ixa->ixa_ip_hdr_length]);
3133 3142
3134 3143 error = conn_ip_output(mp, ixa);
3135 3144 /* No udpOutErrors if an error since IP increases its error counter */
3136 3145 switch (error) {
3137 3146 case 0:
3138 3147 break;
3139 3148 case EWOULDBLOCK:
3140 3149 (void) ixa_check_drain_insert(connp, ixa);
3141 3150 error = 0;
3142 3151 break;
3143 3152 case EADDRNOTAVAIL:
3144 3153 /*
3145 3154 * IXAF_VERIFY_SOURCE tells us to pick a better source.
3146 3155 * Don't have the application see that errno
3147 3156 */
3148 3157 error = ENETUNREACH;
3149 3158 /* FALLTHRU */
3150 3159 default:
3151 3160 mutex_enter(&connp->conn_lock);
3152 3161 /*
3153 3162 * Clear the source and v6lastdst so we call ip_attr_connect
3154 3163 * for the next packet and try to pick a better source.
3155 3164 */
3156 3165 if (connp->conn_mcbc_bind)
3157 3166 connp->conn_saddr_v6 = ipv6_all_zeros;
3158 3167 else
3159 3168 connp->conn_saddr_v6 = connp->conn_bound_addr_v6;
3160 3169 connp->conn_v6lastdst = ipv6_all_zeros;
3161 3170 mutex_exit(&connp->conn_lock);
3162 3171 break;
3163 3172 }
3164 3173 ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED));
3165 3174 ixa->ixa_cred = connp->conn_cred; /* Restore */
3166 3175 ixa->ixa_cpid = connp->conn_cpid;
3167 3176 ixa_refrele(ixa);
3168 3177 return (error);
3169 3178 }
3170 3179
3171 3180
3172 3181 /*
3173 3182 * Prepend the header template and then fill in the source and
3174 3183 * flowinfo. The caller needs to handle the destination address since
3175 3184 * it's setting is different if rthdr or source route.
3176 3185 *
3177 3186 * Returns NULL is allocation failed or if the packet would exceed IP_MAXPACKET.
3178 3187 * When it returns NULL it sets errorp.
3179 3188 */
3180 3189 static mblk_t *
3181 3190 udp_prepend_header_template(conn_t *connp, ip_xmit_attr_t *ixa, mblk_t *mp,
3182 3191 const in6_addr_t *v6src, in_port_t dstport, uint32_t flowinfo, int *errorp)
3183 3192 {
3184 3193 udp_t *udp = connp->conn_udp;
3185 3194 udp_stack_t *us = udp->udp_us;
3186 3195 boolean_t insert_spi = udp->udp_nat_t_endpoint;
3187 3196 uint_t pktlen;
3188 3197 uint_t alloclen;
3189 3198 uint_t copylen;
3190 3199 uint8_t *iph;
3191 3200 uint_t ip_hdr_length;
3192 3201 udpha_t *udpha;
3193 3202 uint32_t cksum;
3194 3203 ip_pkt_t *ipp;
3195 3204
3196 3205 ASSERT(MUTEX_HELD(&connp->conn_lock));
3197 3206
3198 3207 /*
3199 3208 * Copy the header template and leave space for an SPI
3200 3209 */
3201 3210 copylen = connp->conn_ht_iphc_len;
3202 3211 alloclen = copylen + (insert_spi ? sizeof (uint32_t) : 0);
3203 3212 pktlen = alloclen + msgdsize(mp);
3204 3213 if (pktlen > IP_MAXPACKET) {
3205 3214 freemsg(mp);
3206 3215 *errorp = EMSGSIZE;
3207 3216 return (NULL);
3208 3217 }
3209 3218 ixa->ixa_pktlen = pktlen;
3210 3219
3211 3220 /* check/fix buffer config, setup pointers into it */
3212 3221 iph = mp->b_rptr - alloclen;
3213 3222 if (DB_REF(mp) != 1 || iph < DB_BASE(mp) || !OK_32PTR(iph)) {
3214 3223 mblk_t *mp1;
3215 3224
3216 3225 mp1 = allocb(alloclen + us->us_wroff_extra, BPRI_MED);
3217 3226 if (mp1 == NULL) {
3218 3227 freemsg(mp);
3219 3228 *errorp = ENOMEM;
3220 3229 return (NULL);
3221 3230 }
3222 3231 mp1->b_wptr = DB_LIM(mp1);
3223 3232 mp1->b_cont = mp;
3224 3233 mp = mp1;
3225 3234 iph = (mp->b_wptr - alloclen);
3226 3235 }
3227 3236 mp->b_rptr = iph;
3228 3237 bcopy(connp->conn_ht_iphc, iph, copylen);
3229 3238 ip_hdr_length = (uint_t)(connp->conn_ht_ulp - connp->conn_ht_iphc);
3230 3239
3231 3240 ixa->ixa_ip_hdr_length = ip_hdr_length;
3232 3241 udpha = (udpha_t *)(iph + ip_hdr_length);
3233 3242
3234 3243 /*
3235 3244 * Setup header length and prepare for ULP checksum done in IP.
3236 3245 * udp_build_hdr_template has already massaged any routing header
3237 3246 * and placed the result in conn_sum.
3238 3247 *
3239 3248 * We make it easy for IP to include our pseudo header
3240 3249 * by putting our length in uha_checksum.
3241 3250 */
3242 3251 cksum = pktlen - ip_hdr_length;
3243 3252 udpha->uha_length = htons(cksum);
3244 3253
3245 3254 cksum += connp->conn_sum;
3246 3255 cksum = (cksum >> 16) + (cksum & 0xFFFF);
3247 3256 ASSERT(cksum < 0x10000);
3248 3257
3249 3258 ipp = &connp->conn_xmit_ipp;
3250 3259 if (ixa->ixa_flags & IXAF_IS_IPV4) {
3251 3260 ipha_t *ipha = (ipha_t *)iph;
3252 3261
3253 3262 ipha->ipha_length = htons((uint16_t)pktlen);
3254 3263
3255 3264 /* IP does the checksum if uha_checksum is non-zero */
3256 3265 if (us->us_do_checksum)
3257 3266 udpha->uha_checksum = htons(cksum);
3258 3267
3259 3268 /* if IP_PKTINFO specified an addres it wins over bind() */
3260 3269 if ((ipp->ipp_fields & IPPF_ADDR) &&
3261 3270 IN6_IS_ADDR_V4MAPPED(&ipp->ipp_addr)) {
3262 3271 ASSERT(ipp->ipp_addr_v4 != INADDR_ANY);
3263 3272 ipha->ipha_src = ipp->ipp_addr_v4;
3264 3273 } else {
3265 3274 IN6_V4MAPPED_TO_IPADDR(v6src, ipha->ipha_src);
3266 3275 }
3267 3276 } else {
3268 3277 ip6_t *ip6h = (ip6_t *)iph;
3269 3278
3270 3279 ip6h->ip6_plen = htons((uint16_t)(pktlen - IPV6_HDR_LEN));
3271 3280 udpha->uha_checksum = htons(cksum);
3272 3281
3273 3282 /* if IP_PKTINFO specified an addres it wins over bind() */
3274 3283 if ((ipp->ipp_fields & IPPF_ADDR) &&
3275 3284 !IN6_IS_ADDR_V4MAPPED(&ipp->ipp_addr)) {
3276 3285 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&ipp->ipp_addr));
3277 3286 ip6h->ip6_src = ipp->ipp_addr;
3278 3287 } else {
3279 3288 ip6h->ip6_src = *v6src;
3280 3289 }
3281 3290 ip6h->ip6_vcf =
3282 3291 (IPV6_DEFAULT_VERS_AND_FLOW & IPV6_VERS_AND_FLOW_MASK) |
3283 3292 (flowinfo & ~IPV6_VERS_AND_FLOW_MASK);
3284 3293 if (ipp->ipp_fields & IPPF_TCLASS) {
3285 3294 /* Overrides the class part of flowinfo */
3286 3295 ip6h->ip6_vcf = IPV6_TCLASS_FLOW(ip6h->ip6_vcf,
3287 3296 ipp->ipp_tclass);
3288 3297 }
3289 3298 }
3290 3299
3291 3300 /* Insert all-0s SPI now. */
3292 3301 if (insert_spi)
3293 3302 *((uint32_t *)(udpha + 1)) = 0;
3294 3303
3295 3304 udpha->uha_dst_port = dstport;
3296 3305 return (mp);
3297 3306 }
3298 3307
3299 3308 /*
3300 3309 * Send a T_UDERR_IND in response to an M_DATA
3301 3310 */
3302 3311 static void
3303 3312 udp_ud_err_connected(conn_t *connp, t_scalar_t error)
3304 3313 {
3305 3314 struct sockaddr_storage ss;
3306 3315 sin_t *sin;
3307 3316 sin6_t *sin6;
3308 3317 struct sockaddr *addr;
3309 3318 socklen_t addrlen;
3310 3319 mblk_t *mp1;
3311 3320
3312 3321 mutex_enter(&connp->conn_lock);
3313 3322 /* Initialize addr and addrlen as if they're passed in */
3314 3323 if (connp->conn_family == AF_INET) {
3315 3324 sin = (sin_t *)&ss;
3316 3325 *sin = sin_null;
3317 3326 sin->sin_family = AF_INET;
3318 3327 sin->sin_port = connp->conn_fport;
3319 3328 sin->sin_addr.s_addr = connp->conn_faddr_v4;
3320 3329 addr = (struct sockaddr *)sin;
3321 3330 addrlen = sizeof (*sin);
3322 3331 } else {
3323 3332 sin6 = (sin6_t *)&ss;
3324 3333 *sin6 = sin6_null;
3325 3334 sin6->sin6_family = AF_INET6;
3326 3335 sin6->sin6_port = connp->conn_fport;
3327 3336 sin6->sin6_flowinfo = connp->conn_flowinfo;
3328 3337 sin6->sin6_addr = connp->conn_faddr_v6;
3329 3338 if (IN6_IS_ADDR_LINKSCOPE(&connp->conn_faddr_v6) &&
3330 3339 (connp->conn_ixa->ixa_flags & IXAF_SCOPEID_SET)) {
3331 3340 sin6->sin6_scope_id = connp->conn_ixa->ixa_scopeid;
3332 3341 } else {
3333 3342 sin6->sin6_scope_id = 0;
3334 3343 }
3335 3344 sin6->__sin6_src_id = 0;
3336 3345 addr = (struct sockaddr *)sin6;
3337 3346 addrlen = sizeof (*sin6);
3338 3347 }
3339 3348 mutex_exit(&connp->conn_lock);
3340 3349
3341 3350 mp1 = mi_tpi_uderror_ind((char *)addr, addrlen, NULL, 0, error);
3342 3351 if (mp1 != NULL)
3343 3352 putnext(connp->conn_rq, mp1);
3344 3353 }
3345 3354
3346 3355 /*
3347 3356 * This routine handles all messages passed downstream. It either
3348 3357 * consumes the message or passes it downstream; it never queues a
3349 3358 * a message.
3350 3359 *
3351 3360 * Also entry point for sockfs when udp is in "direct sockfs" mode. This mode
3352 3361 * is valid when we are directly beneath the stream head, and thus sockfs
3353 3362 * is able to bypass STREAMS and directly call us, passing along the sockaddr
3354 3363 * structure without the cumbersome T_UNITDATA_REQ interface for the case of
3355 3364 * connected endpoints.
3356 3365 */
3357 3366 void
3358 3367 udp_wput(queue_t *q, mblk_t *mp)
3359 3368 {
3360 3369 sin6_t *sin6;
3361 3370 sin_t *sin = NULL;
3362 3371 uint_t srcid;
3363 3372 conn_t *connp = Q_TO_CONN(q);
3364 3373 udp_t *udp = connp->conn_udp;
3365 3374 int error = 0;
3366 3375 struct sockaddr *addr = NULL;
3367 3376 socklen_t addrlen;
3368 3377 udp_stack_t *us = udp->udp_us;
3369 3378 struct T_unitdata_req *tudr;
3370 3379 mblk_t *data_mp;
3371 3380 ushort_t ipversion;
3372 3381 cred_t *cr;
3373 3382 pid_t pid;
3374 3383
3375 3384 /*
3376 3385 * We directly handle several cases here: T_UNITDATA_REQ message
3377 3386 * coming down as M_PROTO/M_PCPROTO and M_DATA messages for connected
3378 3387 * socket.
3379 3388 */
3380 3389 switch (DB_TYPE(mp)) {
3381 3390 case M_DATA:
3382 3391 if (!udp->udp_issocket || udp->udp_state != TS_DATA_XFER) {
3383 3392 /* Not connected; address is required */
3384 3393 UDPS_BUMP_MIB(us, udpOutErrors);
3385 3394 UDP_DBGSTAT(us, udp_data_notconn);
3386 3395 UDP_STAT(us, udp_out_err_notconn);
3387 3396 freemsg(mp);
3388 3397 return;
3389 3398 }
3390 3399 /*
3391 3400 * All Solaris components should pass a db_credp
3392 3401 * for this message, hence we ASSERT.
3393 3402 * On production kernels we return an error to be robust against
3394 3403 * random streams modules sitting on top of us.
3395 3404 */
3396 3405 cr = msg_getcred(mp, &pid);
3397 3406 ASSERT(cr != NULL);
3398 3407 if (cr == NULL) {
3399 3408 UDPS_BUMP_MIB(us, udpOutErrors);
3400 3409 freemsg(mp);
3401 3410 return;
3402 3411 }
3403 3412 ASSERT(udp->udp_issocket);
3404 3413 UDP_DBGSTAT(us, udp_data_conn);
3405 3414 error = udp_output_connected(connp, mp, cr, pid);
3406 3415 if (error != 0) {
3407 3416 UDP_STAT(us, udp_out_err_output);
3408 3417 if (connp->conn_rq != NULL)
3409 3418 udp_ud_err_connected(connp, (t_scalar_t)error);
3410 3419 #ifdef DEBUG
3411 3420 printf("udp_output_connected returned %d\n", error);
3412 3421 #endif
3413 3422 }
3414 3423 return;
3415 3424
3416 3425 case M_PROTO:
3417 3426 case M_PCPROTO:
3418 3427 tudr = (struct T_unitdata_req *)mp->b_rptr;
3419 3428 if (MBLKL(mp) < sizeof (*tudr) ||
3420 3429 ((t_primp_t)mp->b_rptr)->type != T_UNITDATA_REQ) {
3421 3430 udp_wput_other(q, mp);
3422 3431 return;
3423 3432 }
3424 3433 break;
3425 3434
3426 3435 default:
3427 3436 udp_wput_other(q, mp);
3428 3437 return;
3429 3438 }
3430 3439
3431 3440 /* Handle valid T_UNITDATA_REQ here */
3432 3441 data_mp = mp->b_cont;
3433 3442 if (data_mp == NULL) {
3434 3443 error = EPROTO;
3435 3444 goto ud_error2;
3436 3445 }
3437 3446 mp->b_cont = NULL;
3438 3447
3439 3448 if (!MBLKIN(mp, 0, tudr->DEST_offset + tudr->DEST_length)) {
3440 3449 error = EADDRNOTAVAIL;
3441 3450 goto ud_error2;
3442 3451 }
3443 3452
3444 3453 /*
3445 3454 * All Solaris components should pass a db_credp
3446 3455 * for this TPI message, hence we should ASSERT.
3447 3456 * However, RPC (svc_clts_ksend) does this odd thing where it
3448 3457 * passes the options from a T_UNITDATA_IND unchanged in a
3449 3458 * T_UNITDATA_REQ. While that is the right thing to do for
3450 3459 * some options, SCM_UCRED being the key one, this also makes it
3451 3460 * pass down IP_RECVDSTADDR. Hence we can't ASSERT here.
3452 3461 */
3453 3462 cr = msg_getcred(mp, &pid);
3454 3463 if (cr == NULL) {
3455 3464 cr = connp->conn_cred;
3456 3465 pid = connp->conn_cpid;
3457 3466 }
3458 3467
3459 3468 /*
3460 3469 * If a port has not been bound to the stream, fail.
3461 3470 * This is not a problem when sockfs is directly
3462 3471 * above us, because it will ensure that the socket
3463 3472 * is first bound before allowing data to be sent.
3464 3473 */
3465 3474 if (udp->udp_state == TS_UNBND) {
3466 3475 error = EPROTO;
3467 3476 goto ud_error2;
3468 3477 }
3469 3478 addr = (struct sockaddr *)&mp->b_rptr[tudr->DEST_offset];
3470 3479 addrlen = tudr->DEST_length;
3471 3480
3472 3481 switch (connp->conn_family) {
3473 3482 case AF_INET6:
3474 3483 sin6 = (sin6_t *)addr;
3475 3484 if (!OK_32PTR((char *)sin6) || (addrlen != sizeof (sin6_t)) ||
3476 3485 (sin6->sin6_family != AF_INET6)) {
3477 3486 error = EADDRNOTAVAIL;
3478 3487 goto ud_error2;
3479 3488 }
3480 3489
3481 3490 srcid = sin6->__sin6_src_id;
3482 3491 if (!IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
3483 3492 /*
3484 3493 * Destination is a non-IPv4-compatible IPv6 address.
3485 3494 * Send out an IPv6 format packet.
3486 3495 */
3487 3496
3488 3497 /*
3489 3498 * If the local address is a mapped address return
3490 3499 * an error.
3491 3500 * It would be possible to send an IPv6 packet but the
3492 3501 * response would never make it back to the application
3493 3502 * since it is bound to a mapped address.
3494 3503 */
3495 3504 if (IN6_IS_ADDR_V4MAPPED(&connp->conn_saddr_v6)) {
3496 3505 error = EADDRNOTAVAIL;
3497 3506 goto ud_error2;
3498 3507 }
3499 3508
3500 3509 UDP_DBGSTAT(us, udp_out_ipv6);
3501 3510
3502 3511 if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr))
3503 3512 sin6->sin6_addr = ipv6_loopback;
3504 3513 ipversion = IPV6_VERSION;
3505 3514 } else {
3506 3515 if (connp->conn_ipv6_v6only) {
3507 3516 error = EADDRNOTAVAIL;
3508 3517 goto ud_error2;
3509 3518 }
3510 3519
3511 3520 /*
3512 3521 * If the local address is not zero or a mapped address
3513 3522 * return an error. It would be possible to send an
3514 3523 * IPv4 packet but the response would never make it
3515 3524 * back to the application since it is bound to a
3516 3525 * non-mapped address.
3517 3526 */
3518 3527 if (!IN6_IS_ADDR_V4MAPPED(&connp->conn_saddr_v6) &&
3519 3528 !IN6_IS_ADDR_UNSPECIFIED(&connp->conn_saddr_v6)) {
3520 3529 error = EADDRNOTAVAIL;
3521 3530 goto ud_error2;
3522 3531 }
3523 3532 UDP_DBGSTAT(us, udp_out_mapped);
3524 3533
3525 3534 if (V4_PART_OF_V6(sin6->sin6_addr) == INADDR_ANY) {
3526 3535 V4_PART_OF_V6(sin6->sin6_addr) =
3527 3536 htonl(INADDR_LOOPBACK);
3528 3537 }
3529 3538 ipversion = IPV4_VERSION;
3530 3539 }
3531 3540
3532 3541 if (tudr->OPT_length != 0) {
3533 3542 /*
3534 3543 * If we are connected then the destination needs to be
3535 3544 * the same as the connected one.
3536 3545 */
3537 3546 if (udp->udp_state == TS_DATA_XFER &&
3538 3547 !conn_same_as_last_v6(connp, sin6)) {
3539 3548 error = EISCONN;
3540 3549 goto ud_error2;
3541 3550 }
3542 3551 UDP_STAT(us, udp_out_opt);
3543 3552 error = udp_output_ancillary(connp, NULL, sin6,
3544 3553 data_mp, mp, NULL, cr, pid);
3545 3554 } else {
3546 3555 ip_xmit_attr_t *ixa;
3547 3556
3548 3557 /*
3549 3558 * We have to allocate an ip_xmit_attr_t before we grab
3550 3559 * conn_lock and we need to hold conn_lock once we've
3551 3560 * checked conn_same_as_last_v6 to handle concurrent
3552 3561 * send* calls on a socket.
3553 3562 */
3554 3563 ixa = conn_get_ixa(connp, B_FALSE);
3555 3564 if (ixa == NULL) {
3556 3565 error = ENOMEM;
3557 3566 goto ud_error2;
3558 3567 }
3559 3568 mutex_enter(&connp->conn_lock);
3560 3569
3561 3570 if (conn_same_as_last_v6(connp, sin6) &&
3562 3571 connp->conn_lastsrcid == srcid &&
3563 3572 ipsec_outbound_policy_current(ixa)) {
3564 3573 UDP_DBGSTAT(us, udp_out_lastdst);
3565 3574 /* udp_output_lastdst drops conn_lock */
3566 3575 error = udp_output_lastdst(connp, data_mp, cr,
3567 3576 pid, ixa);
3568 3577 } else {
3569 3578 UDP_DBGSTAT(us, udp_out_diffdst);
3570 3579 /* udp_output_newdst drops conn_lock */
3571 3580 error = udp_output_newdst(connp, data_mp, NULL,
3572 3581 sin6, ipversion, cr, pid, ixa);
3573 3582 }
3574 3583 ASSERT(MUTEX_NOT_HELD(&connp->conn_lock));
3575 3584 }
3576 3585 if (error == 0) {
3577 3586 freeb(mp);
3578 3587 return;
3579 3588 }
3580 3589 break;
3581 3590
3582 3591 case AF_INET:
3583 3592 sin = (sin_t *)addr;
3584 3593 if ((!OK_32PTR((char *)sin) || addrlen != sizeof (sin_t)) ||
3585 3594 (sin->sin_family != AF_INET)) {
3586 3595 error = EADDRNOTAVAIL;
3587 3596 goto ud_error2;
3588 3597 }
3589 3598 UDP_DBGSTAT(us, udp_out_ipv4);
3590 3599 if (sin->sin_addr.s_addr == INADDR_ANY)
3591 3600 sin->sin_addr.s_addr = htonl(INADDR_LOOPBACK);
3592 3601 ipversion = IPV4_VERSION;
3593 3602
3594 3603 srcid = 0;
3595 3604 if (tudr->OPT_length != 0) {
3596 3605 /*
3597 3606 * If we are connected then the destination needs to be
3598 3607 * the same as the connected one.
3599 3608 */
3600 3609 if (udp->udp_state == TS_DATA_XFER &&
3601 3610 !conn_same_as_last_v4(connp, sin)) {
3602 3611 error = EISCONN;
3603 3612 goto ud_error2;
3604 3613 }
3605 3614 UDP_STAT(us, udp_out_opt);
3606 3615 error = udp_output_ancillary(connp, sin, NULL,
3607 3616 data_mp, mp, NULL, cr, pid);
3608 3617 } else {
3609 3618 ip_xmit_attr_t *ixa;
3610 3619
3611 3620 /*
3612 3621 * We have to allocate an ip_xmit_attr_t before we grab
3613 3622 * conn_lock and we need to hold conn_lock once we've
3614 3623 * checked conn_same_as_last_v4 to handle concurrent
3615 3624 * send* calls on a socket.
3616 3625 */
3617 3626 ixa = conn_get_ixa(connp, B_FALSE);
3618 3627 if (ixa == NULL) {
3619 3628 error = ENOMEM;
3620 3629 goto ud_error2;
3621 3630 }
3622 3631 mutex_enter(&connp->conn_lock);
3623 3632
3624 3633 if (conn_same_as_last_v4(connp, sin) &&
3625 3634 ipsec_outbound_policy_current(ixa)) {
3626 3635 UDP_DBGSTAT(us, udp_out_lastdst);
3627 3636 /* udp_output_lastdst drops conn_lock */
3628 3637 error = udp_output_lastdst(connp, data_mp, cr,
3629 3638 pid, ixa);
3630 3639 } else {
3631 3640 UDP_DBGSTAT(us, udp_out_diffdst);
3632 3641 /* udp_output_newdst drops conn_lock */
3633 3642 error = udp_output_newdst(connp, data_mp, sin,
3634 3643 NULL, ipversion, cr, pid, ixa);
3635 3644 }
3636 3645 ASSERT(MUTEX_NOT_HELD(&connp->conn_lock));
3637 3646 }
3638 3647 if (error == 0) {
3639 3648 freeb(mp);
3640 3649 return;
3641 3650 }
3642 3651 break;
3643 3652 }
3644 3653 UDP_STAT(us, udp_out_err_output);
3645 3654 ASSERT(mp != NULL);
3646 3655 /* mp is freed by the following routine */
3647 3656 udp_ud_err(q, mp, (t_scalar_t)error);
3648 3657 return;
3649 3658
3650 3659 ud_error2:
3651 3660 UDPS_BUMP_MIB(us, udpOutErrors);
3652 3661 freemsg(data_mp);
3653 3662 UDP_STAT(us, udp_out_err_output);
3654 3663 ASSERT(mp != NULL);
3655 3664 /* mp is freed by the following routine */
3656 3665 udp_ud_err(q, mp, (t_scalar_t)error);
3657 3666 }
3658 3667
3659 3668 /*
3660 3669 * Handle the case of the IP address, port, flow label being different
3661 3670 * for both IPv4 and IPv6.
3662 3671 *
3663 3672 * NOTE: The caller must hold conn_lock and we drop it here.
3664 3673 */
3665 3674 static int
3666 3675 udp_output_newdst(conn_t *connp, mblk_t *data_mp, sin_t *sin, sin6_t *sin6,
3667 3676 ushort_t ipversion, cred_t *cr, pid_t pid, ip_xmit_attr_t *ixa)
3668 3677 {
3669 3678 uint_t srcid;
3670 3679 uint32_t flowinfo;
3671 3680 udp_t *udp = connp->conn_udp;
3672 3681 int error = 0;
3673 3682 ip_xmit_attr_t *oldixa;
3674 3683 udp_stack_t *us = udp->udp_us;
3675 3684 in6_addr_t v6src;
3676 3685 in6_addr_t v6dst;
3677 3686 in6_addr_t v6nexthop;
3678 3687 in_port_t dstport;
3679 3688
3680 3689 ASSERT(MUTEX_HELD(&connp->conn_lock));
3681 3690 ASSERT(ixa != NULL);
3682 3691 /*
3683 3692 * We hold conn_lock across all the use and modifications of
3684 3693 * the conn_lastdst, conn_ixa, and conn_xmit_ipp to ensure that they
3685 3694 * stay consistent.
3686 3695 */
3687 3696
3688 3697 ASSERT(cr != NULL);
3689 3698 ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED));
3690 3699 ixa->ixa_cred = cr;
3691 3700 ixa->ixa_cpid = pid;
3692 3701 if (is_system_labeled()) {
3693 3702 /* We need to restart with a label based on the cred */
3694 3703 ip_xmit_attr_restore_tsl(ixa, ixa->ixa_cred);
3695 3704 }
3696 3705
3697 3706 /*
3698 3707 * If we are connected then the destination needs to be the
3699 3708 * same as the connected one, which is not the case here since we
3700 3709 * checked for that above.
3701 3710 */
3702 3711 if (udp->udp_state == TS_DATA_XFER) {
3703 3712 mutex_exit(&connp->conn_lock);
3704 3713 error = EISCONN;
3705 3714 goto ud_error;
3706 3715 }
3707 3716
3708 3717 /* In case previous destination was multicast or multirt */
3709 3718 ip_attr_newdst(ixa);
3710 3719
3711 3720 /*
3712 3721 * If laddr is unspecified then we look at sin6_src_id.
3713 3722 * We will give precedence to a source address set with IPV6_PKTINFO
3714 3723 * (aka IPPF_ADDR) but that is handled in build_hdrs. However, we don't
|
↓ open down ↓ |
970 lines elided |
↑ open up ↑ |
3715 3724 * want ip_attr_connect to select a source (since it can fail) when
3716 3725 * IPV6_PKTINFO is specified.
3717 3726 * If this doesn't result in a source address then we get a source
3718 3727 * from ip_attr_connect() below.
3719 3728 */
3720 3729 v6src = connp->conn_saddr_v6;
3721 3730 if (sin != NULL) {
3722 3731 IN6_IPADDR_TO_V4MAPPED(sin->sin_addr.s_addr, &v6dst);
3723 3732 dstport = sin->sin_port;
3724 3733 flowinfo = 0;
3734 + /* Don't bother with ip_srcid_find_id(), but indicate anyway. */
3725 3735 srcid = 0;
3726 3736 ixa->ixa_flags &= ~IXAF_SCOPEID_SET;
3727 - if (srcid != 0 && V4_PART_OF_V6(&v6src) == INADDR_ANY) {
3728 - ip_srcid_find_id(srcid, &v6src, IPCL_ZONEID(connp),
3729 - connp->conn_netstack);
3730 - }
3731 3737 ixa->ixa_flags |= IXAF_IS_IPV4;
3732 3738 } else {
3739 + boolean_t v4mapped;
3740 +
3733 3741 v6dst = sin6->sin6_addr;
3734 3742 dstport = sin6->sin6_port;
3735 3743 flowinfo = sin6->sin6_flowinfo;
3736 3744 srcid = sin6->__sin6_src_id;
3737 3745 if (IN6_IS_ADDR_LINKSCOPE(&v6dst) && sin6->sin6_scope_id != 0) {
3738 3746 ixa->ixa_scopeid = sin6->sin6_scope_id;
3739 3747 ixa->ixa_flags |= IXAF_SCOPEID_SET;
3740 3748 } else {
3741 3749 ixa->ixa_flags &= ~IXAF_SCOPEID_SET;
3742 3750 }
3743 - if (srcid != 0 && IN6_IS_ADDR_UNSPECIFIED(&v6src)) {
3744 - ip_srcid_find_id(srcid, &v6src, IPCL_ZONEID(connp),
3745 - connp->conn_netstack);
3746 - }
3747 - if (IN6_IS_ADDR_V4MAPPED(&v6dst))
3751 + v4mapped = IN6_IS_ADDR_V4MAPPED(&v6dst);
3752 + if (v4mapped)
3748 3753 ixa->ixa_flags |= IXAF_IS_IPV4;
3749 3754 else
3750 3755 ixa->ixa_flags &= ~IXAF_IS_IPV4;
3756 + if (srcid != 0 && IN6_IS_ADDR_UNSPECIFIED(&v6src)) {
3757 + if (ip_srcid_find_id(srcid, &v6src, IPCL_ZONEID(connp),
3758 + v4mapped, connp->conn_netstack)) {
3759 + /* Mismatched v4mapped/v6 specified by srcid. */
3760 + mutex_exit(&connp->conn_lock);
3761 + error = EADDRNOTAVAIL;
3762 + goto ud_error;
3763 + }
3764 + }
3751 3765 }
3752 3766 /* Handle IP_PKTINFO/IPV6_PKTINFO setting source address. */
3753 3767 if (connp->conn_xmit_ipp.ipp_fields & IPPF_ADDR) {
3754 3768 ip_pkt_t *ipp = &connp->conn_xmit_ipp;
3755 3769
3756 3770 if (ixa->ixa_flags & IXAF_IS_IPV4) {
3757 3771 if (IN6_IS_ADDR_V4MAPPED(&ipp->ipp_addr))
3758 3772 v6src = ipp->ipp_addr;
3759 3773 } else {
3760 3774 if (!IN6_IS_ADDR_V4MAPPED(&ipp->ipp_addr))
3761 3775 v6src = ipp->ipp_addr;
3762 3776 }
3763 3777 }
3764 3778
3765 3779 ip_attr_nexthop(&connp->conn_xmit_ipp, ixa, &v6dst, &v6nexthop);
3766 3780 mutex_exit(&connp->conn_lock);
3767 3781
3768 3782 error = ip_attr_connect(connp, ixa, &v6src, &v6dst, &v6nexthop, dstport,
3769 3783 &v6src, NULL, IPDF_ALLOW_MCBC | IPDF_VERIFY_DST | IPDF_IPSEC);
3770 3784 switch (error) {
3771 3785 case 0:
3772 3786 break;
3773 3787 case EADDRNOTAVAIL:
3774 3788 /*
3775 3789 * IXAF_VERIFY_SOURCE tells us to pick a better source.
3776 3790 * Don't have the application see that errno
3777 3791 */
3778 3792 error = ENETUNREACH;
3779 3793 goto failed;
3780 3794 case ENETDOWN:
3781 3795 /*
3782 3796 * Have !ipif_addr_ready address; drop packet silently
3783 3797 * until we can get applications to not send until we
3784 3798 * are ready.
3785 3799 */
3786 3800 error = 0;
3787 3801 goto failed;
3788 3802 case EHOSTUNREACH:
3789 3803 case ENETUNREACH:
3790 3804 if (ixa->ixa_ire != NULL) {
3791 3805 /*
3792 3806 * Let conn_ip_output/ire_send_noroute return
3793 3807 * the error and send any local ICMP error.
3794 3808 */
3795 3809 error = 0;
3796 3810 break;
3797 3811 }
3798 3812 /* FALLTHRU */
3799 3813 failed:
3800 3814 default:
3801 3815 goto ud_error;
3802 3816 }
3803 3817
3804 3818
3805 3819 /*
3806 3820 * Cluster note: we let the cluster hook know that we are sending to a
3807 3821 * new address and/or port.
3808 3822 */
3809 3823 if (cl_inet_connect2 != NULL) {
3810 3824 CL_INET_UDP_CONNECT(connp, B_TRUE, &v6dst, dstport, error);
3811 3825 if (error != 0) {
3812 3826 error = EHOSTUNREACH;
3813 3827 goto ud_error;
3814 3828 }
3815 3829 }
3816 3830
3817 3831 mutex_enter(&connp->conn_lock);
3818 3832 /*
3819 3833 * While we dropped the lock some other thread might have connected
3820 3834 * this socket. If so we bail out with EISCONN to ensure that the
3821 3835 * connecting thread is the one that updates conn_ixa, conn_ht_*
3822 3836 * and conn_*last*.
3823 3837 */
3824 3838 if (udp->udp_state == TS_DATA_XFER) {
3825 3839 mutex_exit(&connp->conn_lock);
3826 3840 error = EISCONN;
3827 3841 goto ud_error;
3828 3842 }
3829 3843
3830 3844 /*
3831 3845 * We need to rebuild the headers if
3832 3846 * - we are labeling packets (could be different for different
3833 3847 * destinations)
3834 3848 * - we have a source route (or routing header) since we need to
3835 3849 * massage that to get the pseudo-header checksum
3836 3850 * - the IP version is different than the last time
3837 3851 * - a socket option with COA_HEADER_CHANGED has been set which
3838 3852 * set conn_v6lastdst to zero.
3839 3853 *
3840 3854 * Otherwise the prepend function will just update the src, dst,
3841 3855 * dstport, and flow label.
3842 3856 */
3843 3857 if (is_system_labeled()) {
3844 3858 /* TX MLP requires SCM_UCRED and don't have that here */
3845 3859 if (connp->conn_mlp_type != mlptSingle) {
3846 3860 mutex_exit(&connp->conn_lock);
3847 3861 error = ECONNREFUSED;
3848 3862 goto ud_error;
3849 3863 }
3850 3864 /*
3851 3865 * Check whether Trusted Solaris policy allows communication
3852 3866 * with this host, and pretend that the destination is
3853 3867 * unreachable if not.
3854 3868 * Compute any needed label and place it in ipp_label_v4/v6.
3855 3869 *
3856 3870 * Later conn_build_hdr_template/conn_prepend_hdr takes
3857 3871 * ipp_label_v4/v6 to form the packet.
3858 3872 *
3859 3873 * Tsol note: Since we hold conn_lock we know no other
3860 3874 * thread manipulates conn_xmit_ipp.
3861 3875 */
3862 3876 error = conn_update_label(connp, ixa, &v6dst,
3863 3877 &connp->conn_xmit_ipp);
3864 3878 if (error != 0) {
3865 3879 mutex_exit(&connp->conn_lock);
3866 3880 goto ud_error;
3867 3881 }
3868 3882 /* Rebuild the header template */
3869 3883 error = udp_build_hdr_template(connp, &v6src, &v6dst, dstport,
3870 3884 flowinfo);
3871 3885 if (error != 0) {
3872 3886 mutex_exit(&connp->conn_lock);
3873 3887 goto ud_error;
3874 3888 }
3875 3889 } else if ((connp->conn_xmit_ipp.ipp_fields &
3876 3890 (IPPF_IPV4_OPTIONS|IPPF_RTHDR)) ||
3877 3891 ipversion != connp->conn_lastipversion ||
3878 3892 IN6_IS_ADDR_UNSPECIFIED(&connp->conn_v6lastdst)) {
3879 3893 /* Rebuild the header template */
3880 3894 error = udp_build_hdr_template(connp, &v6src, &v6dst, dstport,
3881 3895 flowinfo);
3882 3896 if (error != 0) {
3883 3897 mutex_exit(&connp->conn_lock);
3884 3898 goto ud_error;
3885 3899 }
3886 3900 } else {
3887 3901 /* Simply update the destination address if no source route */
3888 3902 if (ixa->ixa_flags & IXAF_IS_IPV4) {
3889 3903 ipha_t *ipha = (ipha_t *)connp->conn_ht_iphc;
3890 3904
3891 3905 IN6_V4MAPPED_TO_IPADDR(&v6dst, ipha->ipha_dst);
3892 3906 if (ixa->ixa_flags & IXAF_PMTU_IPV4_DF) {
3893 3907 ipha->ipha_fragment_offset_and_flags |=
3894 3908 IPH_DF_HTONS;
3895 3909 } else {
3896 3910 ipha->ipha_fragment_offset_and_flags &=
3897 3911 ~IPH_DF_HTONS;
3898 3912 }
3899 3913 } else {
3900 3914 ip6_t *ip6h = (ip6_t *)connp->conn_ht_iphc;
3901 3915 ip6h->ip6_dst = v6dst;
3902 3916 }
3903 3917 }
3904 3918
3905 3919 /*
3906 3920 * Remember the dst/dstport etc which corresponds to the built header
3907 3921 * template and conn_ixa.
3908 3922 */
3909 3923 oldixa = conn_replace_ixa(connp, ixa);
3910 3924 connp->conn_v6lastdst = v6dst;
3911 3925 connp->conn_lastipversion = ipversion;
3912 3926 connp->conn_lastdstport = dstport;
3913 3927 connp->conn_lastflowinfo = flowinfo;
3914 3928 connp->conn_lastscopeid = ixa->ixa_scopeid;
3915 3929 connp->conn_lastsrcid = srcid;
3916 3930 /* Also remember a source to use together with lastdst */
3917 3931 connp->conn_v6lastsrc = v6src;
3918 3932
3919 3933 data_mp = udp_prepend_header_template(connp, ixa, data_mp, &v6src,
3920 3934 dstport, flowinfo, &error);
3921 3935
3922 3936 /* Done with conn_t */
3923 3937 mutex_exit(&connp->conn_lock);
3924 3938 ixa_refrele(oldixa);
3925 3939
3926 3940 if (data_mp == NULL) {
3927 3941 ASSERT(error != 0);
3928 3942 goto ud_error;
3929 3943 }
3930 3944
3931 3945 /* We're done. Pass the packet to ip. */
3932 3946 UDPS_BUMP_MIB(us, udpHCOutDatagrams);
3933 3947
3934 3948 DTRACE_UDP5(send, mblk_t *, NULL, ip_xmit_attr_t *, ixa,
3935 3949 void_ip_t *, data_mp->b_rptr, udp_t *, udp, udpha_t *,
3936 3950 &data_mp->b_rptr[ixa->ixa_ip_hdr_length]);
3937 3951
3938 3952 error = conn_ip_output(data_mp, ixa);
3939 3953 /* No udpOutErrors if an error since IP increases its error counter */
3940 3954 switch (error) {
3941 3955 case 0:
3942 3956 break;
3943 3957 case EWOULDBLOCK:
3944 3958 (void) ixa_check_drain_insert(connp, ixa);
3945 3959 error = 0;
3946 3960 break;
3947 3961 case EADDRNOTAVAIL:
3948 3962 /*
3949 3963 * IXAF_VERIFY_SOURCE tells us to pick a better source.
3950 3964 * Don't have the application see that errno
3951 3965 */
3952 3966 error = ENETUNREACH;
3953 3967 /* FALLTHRU */
3954 3968 default:
3955 3969 mutex_enter(&connp->conn_lock);
3956 3970 /*
3957 3971 * Clear the source and v6lastdst so we call ip_attr_connect
3958 3972 * for the next packet and try to pick a better source.
3959 3973 */
3960 3974 if (connp->conn_mcbc_bind)
3961 3975 connp->conn_saddr_v6 = ipv6_all_zeros;
3962 3976 else
3963 3977 connp->conn_saddr_v6 = connp->conn_bound_addr_v6;
3964 3978 connp->conn_v6lastdst = ipv6_all_zeros;
3965 3979 mutex_exit(&connp->conn_lock);
3966 3980 break;
3967 3981 }
3968 3982 ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED));
3969 3983 ixa->ixa_cred = connp->conn_cred; /* Restore */
3970 3984 ixa->ixa_cpid = connp->conn_cpid;
3971 3985 ixa_refrele(ixa);
3972 3986 return (error);
3973 3987
3974 3988 ud_error:
3975 3989 ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED));
3976 3990 ixa->ixa_cred = connp->conn_cred; /* Restore */
3977 3991 ixa->ixa_cpid = connp->conn_cpid;
3978 3992 ixa_refrele(ixa);
3979 3993
3980 3994 freemsg(data_mp);
3981 3995 UDPS_BUMP_MIB(us, udpOutErrors);
3982 3996 UDP_STAT(us, udp_out_err_output);
3983 3997 return (error);
3984 3998 }
3985 3999
3986 4000 /* ARGSUSED */
3987 4001 static void
3988 4002 udp_wput_fallback(queue_t *wq, mblk_t *mp)
3989 4003 {
3990 4004 #ifdef DEBUG
3991 4005 cmn_err(CE_CONT, "udp_wput_fallback: Message in fallback \n");
3992 4006 #endif
3993 4007 freemsg(mp);
3994 4008 }
3995 4009
3996 4010
3997 4011 /*
3998 4012 * Handle special out-of-band ioctl requests (see PSARC/2008/265).
3999 4013 */
4000 4014 static void
4001 4015 udp_wput_cmdblk(queue_t *q, mblk_t *mp)
4002 4016 {
4003 4017 void *data;
4004 4018 mblk_t *datamp = mp->b_cont;
4005 4019 conn_t *connp = Q_TO_CONN(q);
4006 4020 udp_t *udp = connp->conn_udp;
4007 4021 cmdblk_t *cmdp = (cmdblk_t *)mp->b_rptr;
4008 4022
4009 4023 if (datamp == NULL || MBLKL(datamp) < cmdp->cb_len) {
4010 4024 cmdp->cb_error = EPROTO;
4011 4025 qreply(q, mp);
4012 4026 return;
4013 4027 }
4014 4028 data = datamp->b_rptr;
4015 4029
4016 4030 mutex_enter(&connp->conn_lock);
4017 4031 switch (cmdp->cb_cmd) {
4018 4032 case TI_GETPEERNAME:
4019 4033 if (udp->udp_state != TS_DATA_XFER)
4020 4034 cmdp->cb_error = ENOTCONN;
4021 4035 else
4022 4036 cmdp->cb_error = conn_getpeername(connp, data,
4023 4037 &cmdp->cb_len);
4024 4038 break;
4025 4039 case TI_GETMYNAME:
4026 4040 cmdp->cb_error = conn_getsockname(connp, data, &cmdp->cb_len);
4027 4041 break;
4028 4042 default:
4029 4043 cmdp->cb_error = EINVAL;
4030 4044 break;
4031 4045 }
4032 4046 mutex_exit(&connp->conn_lock);
4033 4047
4034 4048 qreply(q, mp);
4035 4049 }
4036 4050
4037 4051 static void
4038 4052 udp_use_pure_tpi(udp_t *udp)
4039 4053 {
4040 4054 conn_t *connp = udp->udp_connp;
4041 4055
4042 4056 mutex_enter(&connp->conn_lock);
4043 4057 udp->udp_issocket = B_FALSE;
4044 4058 mutex_exit(&connp->conn_lock);
4045 4059 UDP_STAT(udp->udp_us, udp_sock_fallback);
4046 4060 }
4047 4061
4048 4062 static void
4049 4063 udp_wput_other(queue_t *q, mblk_t *mp)
4050 4064 {
4051 4065 uchar_t *rptr = mp->b_rptr;
4052 4066 struct iocblk *iocp;
4053 4067 conn_t *connp = Q_TO_CONN(q);
4054 4068 udp_t *udp = connp->conn_udp;
4055 4069 cred_t *cr;
4056 4070
4057 4071 switch (mp->b_datap->db_type) {
4058 4072 case M_CMD:
4059 4073 udp_wput_cmdblk(q, mp);
4060 4074 return;
4061 4075
4062 4076 case M_PROTO:
4063 4077 case M_PCPROTO:
4064 4078 if (mp->b_wptr - rptr < sizeof (t_scalar_t)) {
4065 4079 /*
4066 4080 * If the message does not contain a PRIM_type,
4067 4081 * throw it away.
4068 4082 */
4069 4083 freemsg(mp);
4070 4084 return;
4071 4085 }
4072 4086 switch (((t_primp_t)rptr)->type) {
4073 4087 case T_ADDR_REQ:
4074 4088 udp_addr_req(q, mp);
4075 4089 return;
4076 4090 case O_T_BIND_REQ:
4077 4091 case T_BIND_REQ:
4078 4092 udp_tpi_bind(q, mp);
4079 4093 return;
4080 4094 case T_CONN_REQ:
4081 4095 udp_tpi_connect(q, mp);
4082 4096 return;
4083 4097 case T_CAPABILITY_REQ:
4084 4098 udp_capability_req(q, mp);
4085 4099 return;
4086 4100 case T_INFO_REQ:
4087 4101 udp_info_req(q, mp);
4088 4102 return;
4089 4103 case T_UNITDATA_REQ:
4090 4104 /*
4091 4105 * If a T_UNITDATA_REQ gets here, the address must
4092 4106 * be bad. Valid T_UNITDATA_REQs are handled
4093 4107 * in udp_wput.
4094 4108 */
4095 4109 udp_ud_err(q, mp, EADDRNOTAVAIL);
4096 4110 return;
4097 4111 case T_UNBIND_REQ:
4098 4112 udp_tpi_unbind(q, mp);
4099 4113 return;
4100 4114 case T_SVR4_OPTMGMT_REQ:
4101 4115 /*
4102 4116 * All Solaris components should pass a db_credp
4103 4117 * for this TPI message, hence we ASSERT.
4104 4118 * But in case there is some other M_PROTO that looks
4105 4119 * like a TPI message sent by some other kernel
4106 4120 * component, we check and return an error.
4107 4121 */
4108 4122 cr = msg_getcred(mp, NULL);
4109 4123 ASSERT(cr != NULL);
4110 4124 if (cr == NULL) {
4111 4125 udp_err_ack(q, mp, TSYSERR, EINVAL);
4112 4126 return;
4113 4127 }
4114 4128 if (!snmpcom_req(q, mp, udp_snmp_set, ip_snmp_get,
4115 4129 cr)) {
4116 4130 svr4_optcom_req(q, mp, cr, &udp_opt_obj);
4117 4131 }
4118 4132 return;
4119 4133
4120 4134 case T_OPTMGMT_REQ:
4121 4135 /*
4122 4136 * All Solaris components should pass a db_credp
4123 4137 * for this TPI message, hence we ASSERT.
4124 4138 * But in case there is some other M_PROTO that looks
4125 4139 * like a TPI message sent by some other kernel
4126 4140 * component, we check and return an error.
4127 4141 */
4128 4142 cr = msg_getcred(mp, NULL);
4129 4143 ASSERT(cr != NULL);
4130 4144 if (cr == NULL) {
4131 4145 udp_err_ack(q, mp, TSYSERR, EINVAL);
4132 4146 return;
4133 4147 }
4134 4148 tpi_optcom_req(q, mp, cr, &udp_opt_obj);
4135 4149 return;
4136 4150
4137 4151 case T_DISCON_REQ:
4138 4152 udp_tpi_disconnect(q, mp);
4139 4153 return;
4140 4154
4141 4155 /* The following TPI message is not supported by udp. */
4142 4156 case O_T_CONN_RES:
4143 4157 case T_CONN_RES:
4144 4158 udp_err_ack(q, mp, TNOTSUPPORT, 0);
4145 4159 return;
4146 4160
4147 4161 /* The following 3 TPI requests are illegal for udp. */
4148 4162 case T_DATA_REQ:
4149 4163 case T_EXDATA_REQ:
4150 4164 case T_ORDREL_REQ:
4151 4165 udp_err_ack(q, mp, TNOTSUPPORT, 0);
4152 4166 return;
4153 4167 default:
4154 4168 break;
4155 4169 }
4156 4170 break;
4157 4171 case M_FLUSH:
4158 4172 if (*rptr & FLUSHW)
4159 4173 flushq(q, FLUSHDATA);
4160 4174 break;
4161 4175 case M_IOCTL:
4162 4176 iocp = (struct iocblk *)mp->b_rptr;
4163 4177 switch (iocp->ioc_cmd) {
4164 4178 case TI_GETPEERNAME:
4165 4179 if (udp->udp_state != TS_DATA_XFER) {
4166 4180 /*
4167 4181 * If a default destination address has not
4168 4182 * been associated with the stream, then we
4169 4183 * don't know the peer's name.
4170 4184 */
4171 4185 iocp->ioc_error = ENOTCONN;
4172 4186 iocp->ioc_count = 0;
4173 4187 mp->b_datap->db_type = M_IOCACK;
4174 4188 qreply(q, mp);
4175 4189 return;
4176 4190 }
4177 4191 /* FALLTHRU */
4178 4192 case TI_GETMYNAME:
4179 4193 /*
4180 4194 * For TI_GETPEERNAME and TI_GETMYNAME, we first
4181 4195 * need to copyin the user's strbuf structure.
4182 4196 * Processing will continue in the M_IOCDATA case
4183 4197 * below.
4184 4198 */
4185 4199 mi_copyin(q, mp, NULL,
4186 4200 SIZEOF_STRUCT(strbuf, iocp->ioc_flag));
4187 4201 return;
4188 4202 case _SIOCSOCKFALLBACK:
4189 4203 /*
4190 4204 * Either sockmod is about to be popped and the
4191 4205 * socket would now be treated as a plain stream,
4192 4206 * or a module is about to be pushed so we have
4193 4207 * to follow pure TPI semantics.
4194 4208 */
4195 4209 if (!udp->udp_issocket) {
4196 4210 DB_TYPE(mp) = M_IOCNAK;
4197 4211 iocp->ioc_error = EINVAL;
4198 4212 } else {
4199 4213 udp_use_pure_tpi(udp);
4200 4214
4201 4215 DB_TYPE(mp) = M_IOCACK;
4202 4216 iocp->ioc_error = 0;
4203 4217 }
4204 4218 iocp->ioc_count = 0;
4205 4219 iocp->ioc_rval = 0;
4206 4220 qreply(q, mp);
4207 4221 return;
4208 4222 default:
4209 4223 break;
4210 4224 }
4211 4225 break;
4212 4226 case M_IOCDATA:
4213 4227 udp_wput_iocdata(q, mp);
4214 4228 return;
4215 4229 default:
4216 4230 /* Unrecognized messages are passed through without change. */
4217 4231 break;
4218 4232 }
4219 4233 ip_wput_nondata(q, mp);
4220 4234 }
4221 4235
4222 4236 /*
4223 4237 * udp_wput_iocdata is called by udp_wput_other to handle all M_IOCDATA
4224 4238 * messages.
4225 4239 */
4226 4240 static void
4227 4241 udp_wput_iocdata(queue_t *q, mblk_t *mp)
4228 4242 {
4229 4243 mblk_t *mp1;
4230 4244 struct iocblk *iocp = (struct iocblk *)mp->b_rptr;
4231 4245 STRUCT_HANDLE(strbuf, sb);
4232 4246 uint_t addrlen;
4233 4247 conn_t *connp = Q_TO_CONN(q);
4234 4248 udp_t *udp = connp->conn_udp;
4235 4249
4236 4250 /* Make sure it is one of ours. */
4237 4251 switch (iocp->ioc_cmd) {
4238 4252 case TI_GETMYNAME:
4239 4253 case TI_GETPEERNAME:
4240 4254 break;
4241 4255 default:
4242 4256 ip_wput_nondata(q, mp);
4243 4257 return;
4244 4258 }
4245 4259
4246 4260 switch (mi_copy_state(q, mp, &mp1)) {
4247 4261 case -1:
4248 4262 return;
4249 4263 case MI_COPY_CASE(MI_COPY_IN, 1):
4250 4264 break;
4251 4265 case MI_COPY_CASE(MI_COPY_OUT, 1):
4252 4266 /*
4253 4267 * The address has been copied out, so now
4254 4268 * copyout the strbuf.
4255 4269 */
4256 4270 mi_copyout(q, mp);
4257 4271 return;
4258 4272 case MI_COPY_CASE(MI_COPY_OUT, 2):
4259 4273 /*
4260 4274 * The address and strbuf have been copied out.
4261 4275 * We're done, so just acknowledge the original
4262 4276 * M_IOCTL.
4263 4277 */
4264 4278 mi_copy_done(q, mp, 0);
4265 4279 return;
4266 4280 default:
4267 4281 /*
4268 4282 * Something strange has happened, so acknowledge
4269 4283 * the original M_IOCTL with an EPROTO error.
4270 4284 */
4271 4285 mi_copy_done(q, mp, EPROTO);
4272 4286 return;
4273 4287 }
4274 4288
4275 4289 /*
4276 4290 * Now we have the strbuf structure for TI_GETMYNAME
4277 4291 * and TI_GETPEERNAME. Next we copyout the requested
4278 4292 * address and then we'll copyout the strbuf.
4279 4293 */
4280 4294 STRUCT_SET_HANDLE(sb, iocp->ioc_flag, (void *)mp1->b_rptr);
4281 4295
4282 4296 if (connp->conn_family == AF_INET)
4283 4297 addrlen = sizeof (sin_t);
4284 4298 else
4285 4299 addrlen = sizeof (sin6_t);
4286 4300
4287 4301 if (STRUCT_FGET(sb, maxlen) < addrlen) {
4288 4302 mi_copy_done(q, mp, EINVAL);
4289 4303 return;
4290 4304 }
4291 4305
4292 4306 switch (iocp->ioc_cmd) {
4293 4307 case TI_GETMYNAME:
4294 4308 break;
4295 4309 case TI_GETPEERNAME:
4296 4310 if (udp->udp_state != TS_DATA_XFER) {
4297 4311 mi_copy_done(q, mp, ENOTCONN);
4298 4312 return;
4299 4313 }
4300 4314 break;
4301 4315 }
4302 4316 mp1 = mi_copyout_alloc(q, mp, STRUCT_FGETP(sb, buf), addrlen, B_TRUE);
4303 4317 if (!mp1)
4304 4318 return;
4305 4319
4306 4320 STRUCT_FSET(sb, len, addrlen);
4307 4321 switch (((struct iocblk *)mp->b_rptr)->ioc_cmd) {
4308 4322 case TI_GETMYNAME:
4309 4323 (void) conn_getsockname(connp, (struct sockaddr *)mp1->b_wptr,
4310 4324 &addrlen);
4311 4325 break;
4312 4326 case TI_GETPEERNAME:
4313 4327 (void) conn_getpeername(connp, (struct sockaddr *)mp1->b_wptr,
4314 4328 &addrlen);
4315 4329 break;
4316 4330 }
4317 4331 mp1->b_wptr += addrlen;
4318 4332 /* Copy out the address */
4319 4333 mi_copyout(q, mp);
4320 4334 }
4321 4335
4322 4336 void
4323 4337 udp_ddi_g_init(void)
4324 4338 {
4325 4339 udp_max_optsize = optcom_max_optsize(udp_opt_obj.odb_opt_des_arr,
4326 4340 udp_opt_obj.odb_opt_arr_cnt);
4327 4341
4328 4342 /*
4329 4343 * We want to be informed each time a stack is created or
4330 4344 * destroyed in the kernel, so we can maintain the
4331 4345 * set of udp_stack_t's.
4332 4346 */
4333 4347 netstack_register(NS_UDP, udp_stack_init, NULL, udp_stack_fini);
4334 4348 }
4335 4349
4336 4350 void
4337 4351 udp_ddi_g_destroy(void)
4338 4352 {
4339 4353 netstack_unregister(NS_UDP);
4340 4354 }
4341 4355
4342 4356 #define INET_NAME "ip"
4343 4357
4344 4358 /*
4345 4359 * Initialize the UDP stack instance.
4346 4360 */
4347 4361 static void *
4348 4362 udp_stack_init(netstackid_t stackid, netstack_t *ns)
4349 4363 {
4350 4364 udp_stack_t *us;
4351 4365 int i;
4352 4366 int error = 0;
4353 4367 major_t major;
4354 4368 size_t arrsz;
4355 4369
4356 4370 us = (udp_stack_t *)kmem_zalloc(sizeof (*us), KM_SLEEP);
4357 4371 us->us_netstack = ns;
4358 4372
4359 4373 mutex_init(&us->us_epriv_port_lock, NULL, MUTEX_DEFAULT, NULL);
4360 4374 us->us_num_epriv_ports = UDP_NUM_EPRIV_PORTS;
4361 4375 us->us_epriv_ports[0] = ULP_DEF_EPRIV_PORT1;
4362 4376 us->us_epriv_ports[1] = ULP_DEF_EPRIV_PORT2;
4363 4377
4364 4378 /*
4365 4379 * The smallest anonymous port in the priviledged port range which UDP
4366 4380 * looks for free port. Use in the option UDP_ANONPRIVBIND.
4367 4381 */
4368 4382 us->us_min_anonpriv_port = 512;
4369 4383
4370 4384 us->us_bind_fanout_size = udp_bind_fanout_size;
4371 4385
4372 4386 /* Roundup variable that might have been modified in /etc/system */
4373 4387 if (us->us_bind_fanout_size & (us->us_bind_fanout_size - 1)) {
4374 4388 /* Not a power of two. Round up to nearest power of two */
4375 4389 for (i = 0; i < 31; i++) {
4376 4390 if (us->us_bind_fanout_size < (1 << i))
4377 4391 break;
4378 4392 }
4379 4393 us->us_bind_fanout_size = 1 << i;
4380 4394 }
4381 4395 us->us_bind_fanout = kmem_zalloc(us->us_bind_fanout_size *
4382 4396 sizeof (udp_fanout_t), KM_SLEEP);
4383 4397 for (i = 0; i < us->us_bind_fanout_size; i++) {
4384 4398 mutex_init(&us->us_bind_fanout[i].uf_lock, NULL, MUTEX_DEFAULT,
4385 4399 NULL);
4386 4400 }
4387 4401
4388 4402 arrsz = udp_propinfo_count * sizeof (mod_prop_info_t);
4389 4403 us->us_propinfo_tbl = (mod_prop_info_t *)kmem_alloc(arrsz,
4390 4404 KM_SLEEP);
4391 4405 bcopy(udp_propinfo_tbl, us->us_propinfo_tbl, arrsz);
4392 4406
4393 4407 /* Allocate the per netstack stats */
4394 4408 mutex_enter(&cpu_lock);
4395 4409 us->us_sc_cnt = MAX(ncpus, boot_ncpus);
4396 4410 mutex_exit(&cpu_lock);
4397 4411 us->us_sc = kmem_zalloc(max_ncpus * sizeof (udp_stats_cpu_t *),
4398 4412 KM_SLEEP);
4399 4413 for (i = 0; i < us->us_sc_cnt; i++) {
4400 4414 us->us_sc[i] = kmem_zalloc(sizeof (udp_stats_cpu_t),
4401 4415 KM_SLEEP);
4402 4416 }
4403 4417
4404 4418 us->us_kstat = udp_kstat2_init(stackid);
4405 4419 us->us_mibkp = udp_kstat_init(stackid);
4406 4420
4407 4421 major = mod_name_to_major(INET_NAME);
4408 4422 error = ldi_ident_from_major(major, &us->us_ldi_ident);
4409 4423 ASSERT(error == 0);
4410 4424 return (us);
4411 4425 }
4412 4426
4413 4427 /*
4414 4428 * Free the UDP stack instance.
4415 4429 */
4416 4430 static void
4417 4431 udp_stack_fini(netstackid_t stackid, void *arg)
4418 4432 {
4419 4433 udp_stack_t *us = (udp_stack_t *)arg;
4420 4434 int i;
4421 4435
4422 4436 for (i = 0; i < us->us_bind_fanout_size; i++) {
4423 4437 mutex_destroy(&us->us_bind_fanout[i].uf_lock);
4424 4438 }
4425 4439
4426 4440 kmem_free(us->us_bind_fanout, us->us_bind_fanout_size *
4427 4441 sizeof (udp_fanout_t));
4428 4442
4429 4443 us->us_bind_fanout = NULL;
4430 4444
4431 4445 for (i = 0; i < us->us_sc_cnt; i++)
4432 4446 kmem_free(us->us_sc[i], sizeof (udp_stats_cpu_t));
4433 4447 kmem_free(us->us_sc, max_ncpus * sizeof (udp_stats_cpu_t *));
4434 4448
4435 4449 kmem_free(us->us_propinfo_tbl,
4436 4450 udp_propinfo_count * sizeof (mod_prop_info_t));
4437 4451 us->us_propinfo_tbl = NULL;
4438 4452
4439 4453 udp_kstat_fini(stackid, us->us_mibkp);
4440 4454 us->us_mibkp = NULL;
4441 4455
4442 4456 udp_kstat2_fini(stackid, us->us_kstat);
4443 4457 us->us_kstat = NULL;
4444 4458
4445 4459 mutex_destroy(&us->us_epriv_port_lock);
4446 4460 ldi_ident_release(us->us_ldi_ident);
4447 4461 kmem_free(us, sizeof (*us));
4448 4462 }
4449 4463
4450 4464 static size_t
4451 4465 udp_set_rcv_hiwat(udp_t *udp, size_t size)
4452 4466 {
4453 4467 udp_stack_t *us = udp->udp_us;
4454 4468
4455 4469 /* We add a bit of extra buffering */
4456 4470 size += size >> 1;
4457 4471 if (size > us->us_max_buf)
4458 4472 size = us->us_max_buf;
4459 4473
4460 4474 udp->udp_rcv_hiwat = size;
4461 4475 return (size);
4462 4476 }
4463 4477
4464 4478 /*
4465 4479 * For the lower queue so that UDP can be a dummy mux.
4466 4480 * Nobody should be sending
4467 4481 * packets up this stream
4468 4482 */
4469 4483 static void
4470 4484 udp_lrput(queue_t *q, mblk_t *mp)
4471 4485 {
4472 4486 switch (mp->b_datap->db_type) {
4473 4487 case M_FLUSH:
4474 4488 /* Turn around */
4475 4489 if (*mp->b_rptr & FLUSHW) {
4476 4490 *mp->b_rptr &= ~FLUSHR;
4477 4491 qreply(q, mp);
4478 4492 return;
4479 4493 }
4480 4494 break;
4481 4495 }
4482 4496 freemsg(mp);
4483 4497 }
4484 4498
4485 4499 /*
4486 4500 * For the lower queue so that UDP can be a dummy mux.
4487 4501 * Nobody should be sending packets down this stream.
4488 4502 */
4489 4503 /* ARGSUSED */
4490 4504 void
4491 4505 udp_lwput(queue_t *q, mblk_t *mp)
4492 4506 {
4493 4507 freemsg(mp);
4494 4508 }
4495 4509
4496 4510 /*
4497 4511 * When a CPU is added, we need to allocate the per CPU stats struct.
4498 4512 */
4499 4513 void
4500 4514 udp_stack_cpu_add(udp_stack_t *us, processorid_t cpu_seqid)
4501 4515 {
4502 4516 int i;
4503 4517
4504 4518 if (cpu_seqid < us->us_sc_cnt)
4505 4519 return;
4506 4520 for (i = us->us_sc_cnt; i <= cpu_seqid; i++) {
4507 4521 ASSERT(us->us_sc[i] == NULL);
4508 4522 us->us_sc[i] = kmem_zalloc(sizeof (udp_stats_cpu_t),
4509 4523 KM_SLEEP);
4510 4524 }
4511 4525 membar_producer();
4512 4526 us->us_sc_cnt = cpu_seqid + 1;
4513 4527 }
4514 4528
4515 4529 /*
4516 4530 * Below routines for UDP socket module.
4517 4531 */
4518 4532
4519 4533 static conn_t *
4520 4534 udp_do_open(cred_t *credp, boolean_t isv6, int flags, int *errorp)
4521 4535 {
4522 4536 udp_t *udp;
4523 4537 conn_t *connp;
4524 4538 zoneid_t zoneid;
4525 4539 netstack_t *ns;
4526 4540 udp_stack_t *us;
4527 4541 int len;
4528 4542
4529 4543 ASSERT(errorp != NULL);
4530 4544
4531 4545 if ((*errorp = secpolicy_basic_net_access(credp)) != 0)
4532 4546 return (NULL);
4533 4547
4534 4548 ns = netstack_find_by_cred(credp);
4535 4549 ASSERT(ns != NULL);
4536 4550 us = ns->netstack_udp;
4537 4551 ASSERT(us != NULL);
4538 4552
4539 4553 /*
4540 4554 * For exclusive stacks we set the zoneid to zero
4541 4555 * to make UDP operate as if in the global zone.
4542 4556 */
4543 4557 if (ns->netstack_stackid != GLOBAL_NETSTACKID)
4544 4558 zoneid = GLOBAL_ZONEID;
4545 4559 else
4546 4560 zoneid = crgetzoneid(credp);
4547 4561
4548 4562 ASSERT(flags == KM_SLEEP || flags == KM_NOSLEEP);
4549 4563
4550 4564 connp = ipcl_conn_create(IPCL_UDPCONN, flags, ns);
4551 4565 if (connp == NULL) {
4552 4566 netstack_rele(ns);
4553 4567 *errorp = ENOMEM;
4554 4568 return (NULL);
4555 4569 }
4556 4570 udp = connp->conn_udp;
4557 4571
4558 4572 /*
4559 4573 * ipcl_conn_create did a netstack_hold. Undo the hold that was
4560 4574 * done by netstack_find_by_cred()
4561 4575 */
4562 4576 netstack_rele(ns);
4563 4577
4564 4578 /*
4565 4579 * Since this conn_t/udp_t is not yet visible to anybody else we don't
4566 4580 * need to lock anything.
4567 4581 */
4568 4582 ASSERT(connp->conn_proto == IPPROTO_UDP);
4569 4583 ASSERT(connp->conn_udp == udp);
4570 4584 ASSERT(udp->udp_connp == connp);
4571 4585
4572 4586 /* Set the initial state of the stream and the privilege status. */
4573 4587 udp->udp_state = TS_UNBND;
4574 4588 connp->conn_ixa->ixa_flags |= IXAF_VERIFY_SOURCE;
4575 4589 if (isv6) {
4576 4590 connp->conn_family = AF_INET6;
4577 4591 connp->conn_ipversion = IPV6_VERSION;
4578 4592 connp->conn_ixa->ixa_flags &= ~IXAF_IS_IPV4;
4579 4593 connp->conn_default_ttl = us->us_ipv6_hoplimit;
4580 4594 len = sizeof (ip6_t) + UDPH_SIZE;
4581 4595 } else {
4582 4596 connp->conn_family = AF_INET;
4583 4597 connp->conn_ipversion = IPV4_VERSION;
4584 4598 connp->conn_ixa->ixa_flags |= IXAF_IS_IPV4;
4585 4599 connp->conn_default_ttl = us->us_ipv4_ttl;
4586 4600 len = sizeof (ipha_t) + UDPH_SIZE;
4587 4601 }
4588 4602
4589 4603 ASSERT(connp->conn_ixa->ixa_protocol == connp->conn_proto);
4590 4604 connp->conn_xmit_ipp.ipp_unicast_hops = connp->conn_default_ttl;
4591 4605
4592 4606 connp->conn_ixa->ixa_multicast_ttl = IP_DEFAULT_MULTICAST_TTL;
4593 4607 connp->conn_ixa->ixa_flags |= IXAF_MULTICAST_LOOP | IXAF_SET_ULP_CKSUM;
4594 4608 /* conn_allzones can not be set this early, hence no IPCL_ZONEID */
4595 4609 connp->conn_ixa->ixa_zoneid = zoneid;
4596 4610
4597 4611 connp->conn_zoneid = zoneid;
4598 4612
4599 4613 /*
4600 4614 * If the caller has the process-wide flag set, then default to MAC
4601 4615 * exempt mode. This allows read-down to unlabeled hosts.
4602 4616 */
4603 4617 if (getpflags(NET_MAC_AWARE, credp) != 0)
4604 4618 connp->conn_mac_mode = CONN_MAC_AWARE;
4605 4619
4606 4620 connp->conn_zone_is_global = (crgetzoneid(credp) == GLOBAL_ZONEID);
4607 4621
4608 4622 udp->udp_us = us;
4609 4623
4610 4624 connp->conn_rcvbuf = us->us_recv_hiwat;
4611 4625 connp->conn_sndbuf = us->us_xmit_hiwat;
4612 4626 connp->conn_sndlowat = us->us_xmit_lowat;
4613 4627 connp->conn_rcvlowat = udp_mod_info.mi_lowat;
4614 4628
4615 4629 connp->conn_wroff = len + us->us_wroff_extra;
4616 4630 connp->conn_so_type = SOCK_DGRAM;
4617 4631
4618 4632 connp->conn_recv = udp_input;
4619 4633 connp->conn_recvicmp = udp_icmp_input;
4620 4634 crhold(credp);
4621 4635 connp->conn_cred = credp;
4622 4636 connp->conn_cpid = curproc->p_pid;
4623 4637 connp->conn_open_time = ddi_get_lbolt64();
4624 4638 /* Cache things in ixa without an extra refhold */
4625 4639 ASSERT(!(connp->conn_ixa->ixa_free_flags & IXA_FREE_CRED));
4626 4640 connp->conn_ixa->ixa_cred = connp->conn_cred;
4627 4641 connp->conn_ixa->ixa_cpid = connp->conn_cpid;
4628 4642 if (is_system_labeled())
4629 4643 connp->conn_ixa->ixa_tsl = crgetlabel(connp->conn_cred);
4630 4644
4631 4645 *((sin6_t *)&udp->udp_delayed_addr) = sin6_null;
4632 4646
4633 4647 if (us->us_pmtu_discovery)
4634 4648 connp->conn_ixa->ixa_flags |= IXAF_PMTU_DISCOVERY;
4635 4649
4636 4650 return (connp);
4637 4651 }
4638 4652
4639 4653 sock_lower_handle_t
4640 4654 udp_create(int family, int type, int proto, sock_downcalls_t **sock_downcalls,
4641 4655 uint_t *smodep, int *errorp, int flags, cred_t *credp)
4642 4656 {
4643 4657 udp_t *udp = NULL;
4644 4658 udp_stack_t *us;
4645 4659 conn_t *connp;
4646 4660 boolean_t isv6;
4647 4661
4648 4662 if (type != SOCK_DGRAM || (family != AF_INET && family != AF_INET6) ||
4649 4663 (proto != 0 && proto != IPPROTO_UDP)) {
4650 4664 *errorp = EPROTONOSUPPORT;
4651 4665 return (NULL);
4652 4666 }
4653 4667
4654 4668 if (family == AF_INET6)
4655 4669 isv6 = B_TRUE;
4656 4670 else
4657 4671 isv6 = B_FALSE;
4658 4672
4659 4673 connp = udp_do_open(credp, isv6, flags, errorp);
4660 4674 if (connp == NULL)
4661 4675 return (NULL);
4662 4676
4663 4677 udp = connp->conn_udp;
4664 4678 ASSERT(udp != NULL);
4665 4679 us = udp->udp_us;
4666 4680 ASSERT(us != NULL);
4667 4681
4668 4682 udp->udp_issocket = B_TRUE;
4669 4683 connp->conn_flags |= IPCL_NONSTR;
4670 4684
4671 4685 /*
4672 4686 * Set flow control
4673 4687 * Since this conn_t/udp_t is not yet visible to anybody else we don't
4674 4688 * need to lock anything.
4675 4689 */
4676 4690 (void) udp_set_rcv_hiwat(udp, connp->conn_rcvbuf);
4677 4691 udp->udp_rcv_disply_hiwat = connp->conn_rcvbuf;
4678 4692
4679 4693 connp->conn_flow_cntrld = B_FALSE;
4680 4694
4681 4695 mutex_enter(&connp->conn_lock);
4682 4696 connp->conn_state_flags &= ~CONN_INCIPIENT;
4683 4697 mutex_exit(&connp->conn_lock);
4684 4698
4685 4699 *errorp = 0;
4686 4700 *smodep = SM_ATOMIC;
4687 4701 *sock_downcalls = &sock_udp_downcalls;
4688 4702 return ((sock_lower_handle_t)connp);
4689 4703 }
4690 4704
4691 4705 /* ARGSUSED3 */
4692 4706 void
4693 4707 udp_activate(sock_lower_handle_t proto_handle, sock_upper_handle_t sock_handle,
4694 4708 sock_upcalls_t *sock_upcalls, int flags, cred_t *cr)
4695 4709 {
4696 4710 conn_t *connp = (conn_t *)proto_handle;
4697 4711 struct sock_proto_props sopp;
4698 4712
4699 4713 /* All Solaris components should pass a cred for this operation. */
4700 4714 ASSERT(cr != NULL);
4701 4715
4702 4716 connp->conn_upcalls = sock_upcalls;
4703 4717 connp->conn_upper_handle = sock_handle;
4704 4718
4705 4719 sopp.sopp_flags = SOCKOPT_WROFF | SOCKOPT_RCVHIWAT | SOCKOPT_RCVLOWAT |
4706 4720 SOCKOPT_MAXBLK | SOCKOPT_MAXPSZ | SOCKOPT_MINPSZ;
4707 4721 sopp.sopp_wroff = connp->conn_wroff;
4708 4722 sopp.sopp_maxblk = INFPSZ;
4709 4723 sopp.sopp_rxhiwat = connp->conn_rcvbuf;
4710 4724 sopp.sopp_rxlowat = connp->conn_rcvlowat;
4711 4725 sopp.sopp_maxaddrlen = sizeof (sin6_t);
4712 4726 sopp.sopp_maxpsz =
4713 4727 (connp->conn_family == AF_INET) ? UDP_MAXPACKET_IPV4 :
4714 4728 UDP_MAXPACKET_IPV6;
4715 4729 sopp.sopp_minpsz = (udp_mod_info.mi_minpsz == 1) ? 0 :
4716 4730 udp_mod_info.mi_minpsz;
4717 4731
4718 4732 (*connp->conn_upcalls->su_set_proto_props)(connp->conn_upper_handle,
4719 4733 &sopp);
4720 4734 }
4721 4735
4722 4736 static void
4723 4737 udp_do_close(conn_t *connp)
4724 4738 {
4725 4739 udp_t *udp;
4726 4740
4727 4741 ASSERT(connp != NULL && IPCL_IS_UDP(connp));
4728 4742 udp = connp->conn_udp;
4729 4743
4730 4744 if (cl_inet_unbind != NULL && udp->udp_state == TS_IDLE) {
4731 4745 /*
4732 4746 * Running in cluster mode - register unbind information
4733 4747 */
4734 4748 if (connp->conn_ipversion == IPV4_VERSION) {
4735 4749 (*cl_inet_unbind)(
4736 4750 connp->conn_netstack->netstack_stackid,
4737 4751 IPPROTO_UDP, AF_INET,
4738 4752 (uint8_t *)(&V4_PART_OF_V6(connp->conn_laddr_v6)),
4739 4753 (in_port_t)connp->conn_lport, NULL);
4740 4754 } else {
4741 4755 (*cl_inet_unbind)(
4742 4756 connp->conn_netstack->netstack_stackid,
4743 4757 IPPROTO_UDP, AF_INET6,
4744 4758 (uint8_t *)&(connp->conn_laddr_v6),
4745 4759 (in_port_t)connp->conn_lport, NULL);
4746 4760 }
4747 4761 }
4748 4762
4749 4763 udp_bind_hash_remove(udp, B_FALSE);
4750 4764
4751 4765 ip_quiesce_conn(connp);
4752 4766
4753 4767 if (!IPCL_IS_NONSTR(connp)) {
4754 4768 ASSERT(connp->conn_wq != NULL);
4755 4769 ASSERT(connp->conn_rq != NULL);
4756 4770 qprocsoff(connp->conn_rq);
4757 4771 }
4758 4772
4759 4773 udp_close_free(connp);
4760 4774
4761 4775 /*
4762 4776 * Now we are truly single threaded on this stream, and can
4763 4777 * delete the things hanging off the connp, and finally the connp.
4764 4778 * We removed this connp from the fanout list, it cannot be
4765 4779 * accessed thru the fanouts, and we already waited for the
4766 4780 * conn_ref to drop to 0. We are already in close, so
4767 4781 * there cannot be any other thread from the top. qprocsoff
4768 4782 * has completed, and service has completed or won't run in
4769 4783 * future.
4770 4784 */
4771 4785 ASSERT(connp->conn_ref == 1);
4772 4786
4773 4787 if (!IPCL_IS_NONSTR(connp)) {
4774 4788 inet_minor_free(connp->conn_minor_arena, connp->conn_dev);
4775 4789 } else {
4776 4790 ip_free_helper_stream(connp);
4777 4791 }
4778 4792
4779 4793 connp->conn_ref--;
4780 4794 ipcl_conn_destroy(connp);
4781 4795 }
4782 4796
4783 4797 /* ARGSUSED1 */
4784 4798 int
4785 4799 udp_close(sock_lower_handle_t proto_handle, int flags, cred_t *cr)
4786 4800 {
4787 4801 conn_t *connp = (conn_t *)proto_handle;
4788 4802
4789 4803 /* All Solaris components should pass a cred for this operation. */
4790 4804 ASSERT(cr != NULL);
4791 4805
4792 4806 udp_do_close(connp);
4793 4807 return (0);
4794 4808 }
4795 4809
4796 4810 static int
4797 4811 udp_do_bind(conn_t *connp, struct sockaddr *sa, socklen_t len, cred_t *cr,
4798 4812 boolean_t bind_to_req_port_only)
4799 4813 {
4800 4814 sin_t *sin;
4801 4815 sin6_t *sin6;
4802 4816 udp_t *udp = connp->conn_udp;
4803 4817 int error = 0;
4804 4818 ip_laddr_t laddr_type = IPVL_UNICAST_UP; /* INADDR_ANY */
4805 4819 in_port_t port; /* Host byte order */
4806 4820 in_port_t requested_port; /* Host byte order */
4807 4821 int count;
4808 4822 ipaddr_t v4src; /* Set if AF_INET */
4809 4823 in6_addr_t v6src;
4810 4824 int loopmax;
4811 4825 udp_fanout_t *udpf;
4812 4826 in_port_t lport; /* Network byte order */
4813 4827 uint_t scopeid = 0;
4814 4828 zoneid_t zoneid = IPCL_ZONEID(connp);
4815 4829 ip_stack_t *ipst = connp->conn_netstack->netstack_ip;
4816 4830 boolean_t is_inaddr_any;
4817 4831 mlp_type_t addrtype, mlptype;
4818 4832 udp_stack_t *us = udp->udp_us;
4819 4833
4820 4834 switch (len) {
4821 4835 case sizeof (sin_t): /* Complete IPv4 address */
4822 4836 sin = (sin_t *)sa;
4823 4837
4824 4838 if (sin == NULL || !OK_32PTR((char *)sin))
4825 4839 return (EINVAL);
4826 4840
4827 4841 if (connp->conn_family != AF_INET ||
4828 4842 sin->sin_family != AF_INET) {
4829 4843 return (EAFNOSUPPORT);
4830 4844 }
4831 4845 v4src = sin->sin_addr.s_addr;
4832 4846 IN6_IPADDR_TO_V4MAPPED(v4src, &v6src);
4833 4847 if (v4src != INADDR_ANY) {
4834 4848 laddr_type = ip_laddr_verify_v4(v4src, zoneid, ipst,
4835 4849 B_TRUE);
4836 4850 }
4837 4851 port = ntohs(sin->sin_port);
4838 4852 break;
4839 4853
4840 4854 case sizeof (sin6_t): /* complete IPv6 address */
4841 4855 sin6 = (sin6_t *)sa;
4842 4856
4843 4857 if (sin6 == NULL || !OK_32PTR((char *)sin6))
4844 4858 return (EINVAL);
4845 4859
4846 4860 if (connp->conn_family != AF_INET6 ||
4847 4861 sin6->sin6_family != AF_INET6) {
4848 4862 return (EAFNOSUPPORT);
4849 4863 }
4850 4864 v6src = sin6->sin6_addr;
4851 4865 if (IN6_IS_ADDR_V4MAPPED(&v6src)) {
4852 4866 if (connp->conn_ipv6_v6only)
4853 4867 return (EADDRNOTAVAIL);
4854 4868
4855 4869 IN6_V4MAPPED_TO_IPADDR(&v6src, v4src);
4856 4870 if (v4src != INADDR_ANY) {
4857 4871 laddr_type = ip_laddr_verify_v4(v4src,
4858 4872 zoneid, ipst, B_FALSE);
4859 4873 }
4860 4874 } else {
4861 4875 if (!IN6_IS_ADDR_UNSPECIFIED(&v6src)) {
4862 4876 if (IN6_IS_ADDR_LINKSCOPE(&v6src))
4863 4877 scopeid = sin6->sin6_scope_id;
4864 4878 laddr_type = ip_laddr_verify_v6(&v6src,
4865 4879 zoneid, ipst, B_TRUE, scopeid);
4866 4880 }
4867 4881 }
4868 4882 port = ntohs(sin6->sin6_port);
4869 4883 break;
4870 4884
4871 4885 default: /* Invalid request */
4872 4886 (void) strlog(UDP_MOD_ID, 0, 1, SL_ERROR|SL_TRACE,
4873 4887 "udp_bind: bad ADDR_length length %u", len);
4874 4888 return (-TBADADDR);
4875 4889 }
4876 4890
4877 4891 /* Is the local address a valid unicast, multicast, or broadcast? */
4878 4892 if (laddr_type == IPVL_BAD)
4879 4893 return (EADDRNOTAVAIL);
4880 4894
4881 4895 requested_port = port;
4882 4896
4883 4897 if (requested_port == 0 || !bind_to_req_port_only)
4884 4898 bind_to_req_port_only = B_FALSE;
4885 4899 else /* T_BIND_REQ and requested_port != 0 */
4886 4900 bind_to_req_port_only = B_TRUE;
4887 4901
4888 4902 if (requested_port == 0) {
4889 4903 /*
4890 4904 * If the application passed in zero for the port number, it
4891 4905 * doesn't care which port number we bind to. Get one in the
4892 4906 * valid range.
4893 4907 */
4894 4908 if (connp->conn_anon_priv_bind) {
4895 4909 port = udp_get_next_priv_port(udp);
4896 4910 } else {
4897 4911 port = udp_update_next_port(udp,
4898 4912 us->us_next_port_to_try, B_TRUE);
4899 4913 }
4900 4914 } else {
4901 4915 /*
4902 4916 * If the port is in the well-known privileged range,
4903 4917 * make sure the caller was privileged.
4904 4918 */
4905 4919 int i;
4906 4920 boolean_t priv = B_FALSE;
4907 4921
4908 4922 if (port < us->us_smallest_nonpriv_port) {
4909 4923 priv = B_TRUE;
4910 4924 } else {
4911 4925 for (i = 0; i < us->us_num_epriv_ports; i++) {
4912 4926 if (port == us->us_epriv_ports[i]) {
4913 4927 priv = B_TRUE;
4914 4928 break;
4915 4929 }
4916 4930 }
4917 4931 }
4918 4932
4919 4933 if (priv) {
4920 4934 if (secpolicy_net_privaddr(cr, port, IPPROTO_UDP) != 0)
4921 4935 return (-TACCES);
4922 4936 }
4923 4937 }
4924 4938
4925 4939 if (port == 0)
4926 4940 return (-TNOADDR);
4927 4941
4928 4942 /*
4929 4943 * The state must be TS_UNBND. TPI mandates that users must send
4930 4944 * TPI primitives only 1 at a time and wait for the response before
4931 4945 * sending the next primitive.
4932 4946 */
4933 4947 mutex_enter(&connp->conn_lock);
4934 4948 if (udp->udp_state != TS_UNBND) {
4935 4949 mutex_exit(&connp->conn_lock);
4936 4950 (void) strlog(UDP_MOD_ID, 0, 1, SL_ERROR|SL_TRACE,
4937 4951 "udp_bind: bad state, %u", udp->udp_state);
4938 4952 return (-TOUTSTATE);
4939 4953 }
4940 4954 /*
4941 4955 * Copy the source address into our udp structure. This address
4942 4956 * may still be zero; if so, IP will fill in the correct address
4943 4957 * each time an outbound packet is passed to it. Since the udp is
4944 4958 * not yet in the bind hash list, we don't grab the uf_lock to
4945 4959 * change conn_ipversion
4946 4960 */
4947 4961 if (connp->conn_family == AF_INET) {
4948 4962 ASSERT(sin != NULL);
4949 4963 ASSERT(connp->conn_ixa->ixa_flags & IXAF_IS_IPV4);
4950 4964 } else {
4951 4965 if (IN6_IS_ADDR_V4MAPPED(&v6src)) {
4952 4966 /*
4953 4967 * no need to hold the uf_lock to set the conn_ipversion
4954 4968 * since we are not yet in the fanout list
4955 4969 */
4956 4970 connp->conn_ipversion = IPV4_VERSION;
4957 4971 connp->conn_ixa->ixa_flags |= IXAF_IS_IPV4;
4958 4972 } else {
4959 4973 connp->conn_ipversion = IPV6_VERSION;
4960 4974 connp->conn_ixa->ixa_flags &= ~IXAF_IS_IPV4;
4961 4975 }
4962 4976 }
4963 4977
4964 4978 /*
4965 4979 * If conn_reuseaddr is not set, then we have to make sure that
4966 4980 * the IP address and port number the application requested
4967 4981 * (or we selected for the application) is not being used by
4968 4982 * another stream. If another stream is already using the
4969 4983 * requested IP address and port, the behavior depends on
4970 4984 * "bind_to_req_port_only". If set the bind fails; otherwise we
4971 4985 * search for any an unused port to bind to the stream.
4972 4986 *
4973 4987 * As per the BSD semantics, as modified by the Deering multicast
4974 4988 * changes, if udp_reuseaddr is set, then we allow multiple binds
4975 4989 * to the same port independent of the local IP address.
4976 4990 *
4977 4991 * This is slightly different than in SunOS 4.X which did not
4978 4992 * support IP multicast. Note that the change implemented by the
4979 4993 * Deering multicast code effects all binds - not only binding
4980 4994 * to IP multicast addresses.
4981 4995 *
4982 4996 * Note that when binding to port zero we ignore SO_REUSEADDR in
4983 4997 * order to guarantee a unique port.
4984 4998 */
4985 4999
4986 5000 count = 0;
4987 5001 if (connp->conn_anon_priv_bind) {
4988 5002 /*
4989 5003 * loopmax = (IPPORT_RESERVED-1) -
4990 5004 * us->us_min_anonpriv_port + 1
4991 5005 */
4992 5006 loopmax = IPPORT_RESERVED - us->us_min_anonpriv_port;
4993 5007 } else {
4994 5008 loopmax = us->us_largest_anon_port -
4995 5009 us->us_smallest_anon_port + 1;
4996 5010 }
4997 5011
4998 5012 is_inaddr_any = V6_OR_V4_INADDR_ANY(v6src);
4999 5013
5000 5014 for (;;) {
5001 5015 udp_t *udp1;
5002 5016 boolean_t found_exclbind = B_FALSE;
5003 5017 conn_t *connp1;
5004 5018
5005 5019 /*
5006 5020 * Walk through the list of udp streams bound to
5007 5021 * requested port with the same IP address.
5008 5022 */
5009 5023 lport = htons(port);
5010 5024 udpf = &us->us_bind_fanout[UDP_BIND_HASH(lport,
5011 5025 us->us_bind_fanout_size)];
5012 5026 mutex_enter(&udpf->uf_lock);
5013 5027 for (udp1 = udpf->uf_udp; udp1 != NULL;
5014 5028 udp1 = udp1->udp_bind_hash) {
5015 5029 connp1 = udp1->udp_connp;
5016 5030
5017 5031 if (lport != connp1->conn_lport)
5018 5032 continue;
5019 5033
5020 5034 /*
5021 5035 * On a labeled system, we must treat bindings to ports
5022 5036 * on shared IP addresses by sockets with MAC exemption
5023 5037 * privilege as being in all zones, as there's
5024 5038 * otherwise no way to identify the right receiver.
5025 5039 */
5026 5040 if (!IPCL_BIND_ZONE_MATCH(connp1, connp))
5027 5041 continue;
5028 5042
5029 5043 /*
5030 5044 * If UDP_EXCLBIND is set for either the bound or
5031 5045 * binding endpoint, the semantics of bind
5032 5046 * is changed according to the following chart.
5033 5047 *
5034 5048 * spec = specified address (v4 or v6)
5035 5049 * unspec = unspecified address (v4 or v6)
5036 5050 * A = specified addresses are different for endpoints
5037 5051 *
5038 5052 * bound bind to allowed?
5039 5053 * -------------------------------------
5040 5054 * unspec unspec no
5041 5055 * unspec spec no
5042 5056 * spec unspec no
5043 5057 * spec spec yes if A
5044 5058 *
5045 5059 * For labeled systems, SO_MAC_EXEMPT behaves the same
5046 5060 * as UDP_EXCLBIND, except that zoneid is ignored.
5047 5061 */
5048 5062 if (connp1->conn_exclbind || connp->conn_exclbind ||
5049 5063 IPCL_CONNS_MAC(udp1->udp_connp, connp)) {
5050 5064 if (V6_OR_V4_INADDR_ANY(
5051 5065 connp1->conn_bound_addr_v6) ||
5052 5066 is_inaddr_any ||
5053 5067 IN6_ARE_ADDR_EQUAL(
5054 5068 &connp1->conn_bound_addr_v6,
5055 5069 &v6src)) {
5056 5070 found_exclbind = B_TRUE;
5057 5071 break;
5058 5072 }
5059 5073 continue;
5060 5074 }
5061 5075
5062 5076 /*
5063 5077 * Check ipversion to allow IPv4 and IPv6 sockets to
5064 5078 * have disjoint port number spaces.
5065 5079 */
5066 5080 if (connp->conn_ipversion != connp1->conn_ipversion) {
5067 5081
5068 5082 /*
5069 5083 * On the first time through the loop, if the
5070 5084 * the user intentionally specified a
5071 5085 * particular port number, then ignore any
5072 5086 * bindings of the other protocol that may
5073 5087 * conflict. This allows the user to bind IPv6
5074 5088 * alone and get both v4 and v6, or bind both
5075 5089 * both and get each seperately. On subsequent
5076 5090 * times through the loop, we're checking a
5077 5091 * port that we chose (not the user) and thus
5078 5092 * we do not allow casual duplicate bindings.
5079 5093 */
5080 5094 if (count == 0 && requested_port != 0)
5081 5095 continue;
5082 5096 }
5083 5097
5084 5098 /*
5085 5099 * No difference depending on SO_REUSEADDR.
5086 5100 *
5087 5101 * If existing port is bound to a
5088 5102 * non-wildcard IP address and
5089 5103 * the requesting stream is bound to
5090 5104 * a distinct different IP addresses
5091 5105 * (non-wildcard, also), keep going.
5092 5106 */
5093 5107 if (!is_inaddr_any &&
5094 5108 !V6_OR_V4_INADDR_ANY(connp1->conn_bound_addr_v6) &&
5095 5109 !IN6_ARE_ADDR_EQUAL(&connp1->conn_laddr_v6,
5096 5110 &v6src)) {
5097 5111 continue;
5098 5112 }
5099 5113 break;
5100 5114 }
5101 5115
5102 5116 if (!found_exclbind &&
5103 5117 (connp->conn_reuseaddr && requested_port != 0)) {
5104 5118 break;
5105 5119 }
5106 5120
5107 5121 if (udp1 == NULL) {
5108 5122 /*
5109 5123 * No other stream has this IP address
5110 5124 * and port number. We can use it.
5111 5125 */
5112 5126 break;
5113 5127 }
5114 5128 mutex_exit(&udpf->uf_lock);
5115 5129 if (bind_to_req_port_only) {
5116 5130 /*
5117 5131 * We get here only when requested port
5118 5132 * is bound (and only first of the for()
5119 5133 * loop iteration).
5120 5134 *
5121 5135 * The semantics of this bind request
5122 5136 * require it to fail so we return from
5123 5137 * the routine (and exit the loop).
5124 5138 *
5125 5139 */
5126 5140 mutex_exit(&connp->conn_lock);
5127 5141 return (-TADDRBUSY);
5128 5142 }
5129 5143
5130 5144 if (connp->conn_anon_priv_bind) {
5131 5145 port = udp_get_next_priv_port(udp);
5132 5146 } else {
5133 5147 if ((count == 0) && (requested_port != 0)) {
5134 5148 /*
5135 5149 * If the application wants us to find
5136 5150 * a port, get one to start with. Set
5137 5151 * requested_port to 0, so that we will
5138 5152 * update us->us_next_port_to_try below.
5139 5153 */
5140 5154 port = udp_update_next_port(udp,
5141 5155 us->us_next_port_to_try, B_TRUE);
5142 5156 requested_port = 0;
5143 5157 } else {
5144 5158 port = udp_update_next_port(udp, port + 1,
5145 5159 B_FALSE);
5146 5160 }
5147 5161 }
5148 5162
5149 5163 if (port == 0 || ++count >= loopmax) {
5150 5164 /*
5151 5165 * We've tried every possible port number and
5152 5166 * there are none available, so send an error
5153 5167 * to the user.
5154 5168 */
5155 5169 mutex_exit(&connp->conn_lock);
5156 5170 return (-TNOADDR);
5157 5171 }
5158 5172 }
5159 5173
5160 5174 /*
5161 5175 * Copy the source address into our udp structure. This address
5162 5176 * may still be zero; if so, ip_attr_connect will fill in the correct
5163 5177 * address when a packet is about to be sent.
5164 5178 * If we are binding to a broadcast or multicast address then
5165 5179 * we just set the conn_bound_addr since we don't want to use
5166 5180 * that as the source address when sending.
5167 5181 */
5168 5182 connp->conn_bound_addr_v6 = v6src;
5169 5183 connp->conn_laddr_v6 = v6src;
5170 5184 if (scopeid != 0) {
5171 5185 connp->conn_ixa->ixa_flags |= IXAF_SCOPEID_SET;
5172 5186 connp->conn_ixa->ixa_scopeid = scopeid;
5173 5187 connp->conn_incoming_ifindex = scopeid;
5174 5188 } else {
5175 5189 connp->conn_ixa->ixa_flags &= ~IXAF_SCOPEID_SET;
5176 5190 connp->conn_incoming_ifindex = connp->conn_bound_if;
5177 5191 }
5178 5192
5179 5193 switch (laddr_type) {
5180 5194 case IPVL_UNICAST_UP:
5181 5195 case IPVL_UNICAST_DOWN:
5182 5196 connp->conn_saddr_v6 = v6src;
5183 5197 connp->conn_mcbc_bind = B_FALSE;
5184 5198 break;
5185 5199 case IPVL_MCAST:
5186 5200 case IPVL_BCAST:
5187 5201 /* ip_set_destination will pick a source address later */
5188 5202 connp->conn_saddr_v6 = ipv6_all_zeros;
5189 5203 connp->conn_mcbc_bind = B_TRUE;
5190 5204 break;
5191 5205 }
5192 5206
5193 5207 /* Any errors after this point should use late_error */
5194 5208 connp->conn_lport = lport;
5195 5209
5196 5210 /*
5197 5211 * Now reset the next anonymous port if the application requested
5198 5212 * an anonymous port, or we handed out the next anonymous port.
5199 5213 */
5200 5214 if ((requested_port == 0) && (!connp->conn_anon_priv_bind)) {
5201 5215 us->us_next_port_to_try = port + 1;
5202 5216 }
5203 5217
5204 5218 /* Initialize the T_BIND_ACK. */
5205 5219 if (connp->conn_family == AF_INET) {
5206 5220 sin->sin_port = connp->conn_lport;
5207 5221 } else {
5208 5222 sin6->sin6_port = connp->conn_lport;
5209 5223 }
5210 5224 udp->udp_state = TS_IDLE;
5211 5225 udp_bind_hash_insert(udpf, udp);
5212 5226 mutex_exit(&udpf->uf_lock);
5213 5227 mutex_exit(&connp->conn_lock);
5214 5228
5215 5229 if (cl_inet_bind) {
5216 5230 /*
5217 5231 * Running in cluster mode - register bind information
5218 5232 */
5219 5233 if (connp->conn_ipversion == IPV4_VERSION) {
5220 5234 (*cl_inet_bind)(connp->conn_netstack->netstack_stackid,
5221 5235 IPPROTO_UDP, AF_INET, (uint8_t *)&v4src,
5222 5236 (in_port_t)connp->conn_lport, NULL);
5223 5237 } else {
5224 5238 (*cl_inet_bind)(connp->conn_netstack->netstack_stackid,
5225 5239 IPPROTO_UDP, AF_INET6, (uint8_t *)&v6src,
5226 5240 (in_port_t)connp->conn_lport, NULL);
5227 5241 }
5228 5242 }
5229 5243
5230 5244 mutex_enter(&connp->conn_lock);
5231 5245 connp->conn_anon_port = (is_system_labeled() && requested_port == 0);
5232 5246 if (is_system_labeled() && (!connp->conn_anon_port ||
5233 5247 connp->conn_anon_mlp)) {
5234 5248 uint16_t mlpport;
5235 5249 zone_t *zone;
5236 5250
5237 5251 zone = crgetzone(cr);
5238 5252 connp->conn_mlp_type =
5239 5253 connp->conn_recv_ancillary.crb_recvucred ? mlptBoth :
5240 5254 mlptSingle;
5241 5255 addrtype = tsol_mlp_addr_type(
5242 5256 connp->conn_allzones ? ALL_ZONES : zone->zone_id,
5243 5257 IPV6_VERSION, &v6src, us->us_netstack->netstack_ip);
5244 5258 if (addrtype == mlptSingle) {
5245 5259 error = -TNOADDR;
5246 5260 mutex_exit(&connp->conn_lock);
5247 5261 goto late_error;
5248 5262 }
5249 5263 mlpport = connp->conn_anon_port ? PMAPPORT : port;
5250 5264 mlptype = tsol_mlp_port_type(zone, IPPROTO_UDP, mlpport,
5251 5265 addrtype);
5252 5266
5253 5267 /*
5254 5268 * It is a coding error to attempt to bind an MLP port
5255 5269 * without first setting SOL_SOCKET/SCM_UCRED.
5256 5270 */
5257 5271 if (mlptype != mlptSingle &&
5258 5272 connp->conn_mlp_type == mlptSingle) {
5259 5273 error = EINVAL;
5260 5274 mutex_exit(&connp->conn_lock);
5261 5275 goto late_error;
5262 5276 }
5263 5277
5264 5278 /*
5265 5279 * It is an access violation to attempt to bind an MLP port
5266 5280 * without NET_BINDMLP privilege.
5267 5281 */
5268 5282 if (mlptype != mlptSingle &&
5269 5283 secpolicy_net_bindmlp(cr) != 0) {
5270 5284 if (connp->conn_debug) {
5271 5285 (void) strlog(UDP_MOD_ID, 0, 1,
5272 5286 SL_ERROR|SL_TRACE,
5273 5287 "udp_bind: no priv for multilevel port %d",
5274 5288 mlpport);
5275 5289 }
5276 5290 error = -TACCES;
5277 5291 mutex_exit(&connp->conn_lock);
5278 5292 goto late_error;
5279 5293 }
5280 5294
5281 5295 /*
5282 5296 * If we're specifically binding a shared IP address and the
5283 5297 * port is MLP on shared addresses, then check to see if this
5284 5298 * zone actually owns the MLP. Reject if not.
5285 5299 */
5286 5300 if (mlptype == mlptShared && addrtype == mlptShared) {
5287 5301 /*
5288 5302 * No need to handle exclusive-stack zones since
5289 5303 * ALL_ZONES only applies to the shared stack.
5290 5304 */
5291 5305 zoneid_t mlpzone;
5292 5306
5293 5307 mlpzone = tsol_mlp_findzone(IPPROTO_UDP,
5294 5308 htons(mlpport));
5295 5309 if (connp->conn_zoneid != mlpzone) {
5296 5310 if (connp->conn_debug) {
5297 5311 (void) strlog(UDP_MOD_ID, 0, 1,
5298 5312 SL_ERROR|SL_TRACE,
5299 5313 "udp_bind: attempt to bind port "
5300 5314 "%d on shared addr in zone %d "
5301 5315 "(should be %d)",
5302 5316 mlpport, connp->conn_zoneid,
5303 5317 mlpzone);
5304 5318 }
5305 5319 error = -TACCES;
5306 5320 mutex_exit(&connp->conn_lock);
5307 5321 goto late_error;
5308 5322 }
5309 5323 }
5310 5324 if (connp->conn_anon_port) {
5311 5325 error = tsol_mlp_anon(zone, mlptype, connp->conn_proto,
5312 5326 port, B_TRUE);
5313 5327 if (error != 0) {
5314 5328 if (connp->conn_debug) {
5315 5329 (void) strlog(UDP_MOD_ID, 0, 1,
5316 5330 SL_ERROR|SL_TRACE,
5317 5331 "udp_bind: cannot establish anon "
5318 5332 "MLP for port %d", port);
5319 5333 }
5320 5334 error = -TACCES;
5321 5335 mutex_exit(&connp->conn_lock);
5322 5336 goto late_error;
5323 5337 }
5324 5338 }
5325 5339 connp->conn_mlp_type = mlptype;
5326 5340 }
5327 5341
5328 5342 /*
5329 5343 * We create an initial header template here to make a subsequent
5330 5344 * sendto have a starting point. Since conn_last_dst is zero the
5331 5345 * first sendto will always follow the 'dst changed' code path.
5332 5346 * Note that we defer massaging options and the related checksum
5333 5347 * adjustment until we have a destination address.
5334 5348 */
5335 5349 error = udp_build_hdr_template(connp, &connp->conn_saddr_v6,
5336 5350 &connp->conn_faddr_v6, connp->conn_fport, connp->conn_flowinfo);
5337 5351 if (error != 0) {
5338 5352 mutex_exit(&connp->conn_lock);
5339 5353 goto late_error;
5340 5354 }
5341 5355 /* Just in case */
5342 5356 connp->conn_faddr_v6 = ipv6_all_zeros;
5343 5357 connp->conn_fport = 0;
5344 5358 connp->conn_v6lastdst = ipv6_all_zeros;
5345 5359 mutex_exit(&connp->conn_lock);
5346 5360
5347 5361 error = ip_laddr_fanout_insert(connp);
5348 5362 if (error != 0)
5349 5363 goto late_error;
5350 5364
5351 5365 /* Bind succeeded */
5352 5366 return (0);
5353 5367
5354 5368 late_error:
5355 5369 /* We had already picked the port number, and then the bind failed */
5356 5370 mutex_enter(&connp->conn_lock);
5357 5371 udpf = &us->us_bind_fanout[
5358 5372 UDP_BIND_HASH(connp->conn_lport,
5359 5373 us->us_bind_fanout_size)];
5360 5374 mutex_enter(&udpf->uf_lock);
5361 5375 connp->conn_saddr_v6 = ipv6_all_zeros;
5362 5376 connp->conn_bound_addr_v6 = ipv6_all_zeros;
5363 5377 connp->conn_laddr_v6 = ipv6_all_zeros;
5364 5378 if (scopeid != 0) {
5365 5379 connp->conn_ixa->ixa_flags &= ~IXAF_SCOPEID_SET;
5366 5380 connp->conn_incoming_ifindex = connp->conn_bound_if;
5367 5381 }
5368 5382 udp->udp_state = TS_UNBND;
5369 5383 udp_bind_hash_remove(udp, B_TRUE);
5370 5384 connp->conn_lport = 0;
5371 5385 mutex_exit(&udpf->uf_lock);
5372 5386 connp->conn_anon_port = B_FALSE;
5373 5387 connp->conn_mlp_type = mlptSingle;
5374 5388
5375 5389 connp->conn_v6lastdst = ipv6_all_zeros;
5376 5390
5377 5391 /* Restore the header that was built above - different source address */
5378 5392 (void) udp_build_hdr_template(connp, &connp->conn_saddr_v6,
5379 5393 &connp->conn_faddr_v6, connp->conn_fport, connp->conn_flowinfo);
5380 5394 mutex_exit(&connp->conn_lock);
5381 5395 return (error);
5382 5396 }
5383 5397
5384 5398 int
5385 5399 udp_bind(sock_lower_handle_t proto_handle, struct sockaddr *sa,
5386 5400 socklen_t len, cred_t *cr)
5387 5401 {
5388 5402 int error;
5389 5403 conn_t *connp;
5390 5404
5391 5405 /* All Solaris components should pass a cred for this operation. */
5392 5406 ASSERT(cr != NULL);
5393 5407
5394 5408 connp = (conn_t *)proto_handle;
5395 5409
5396 5410 if (sa == NULL)
5397 5411 error = udp_do_unbind(connp);
5398 5412 else
5399 5413 error = udp_do_bind(connp, sa, len, cr, B_TRUE);
5400 5414
5401 5415 if (error < 0) {
5402 5416 if (error == -TOUTSTATE)
5403 5417 error = EINVAL;
5404 5418 else
5405 5419 error = proto_tlitosyserr(-error);
5406 5420 }
5407 5421
5408 5422 return (error);
5409 5423 }
5410 5424
5411 5425 static int
5412 5426 udp_implicit_bind(conn_t *connp, cred_t *cr)
5413 5427 {
5414 5428 sin6_t sin6addr;
5415 5429 sin_t *sin;
5416 5430 sin6_t *sin6;
5417 5431 socklen_t len;
5418 5432 int error;
5419 5433
5420 5434 /* All Solaris components should pass a cred for this operation. */
5421 5435 ASSERT(cr != NULL);
5422 5436
5423 5437 if (connp->conn_family == AF_INET) {
5424 5438 len = sizeof (struct sockaddr_in);
5425 5439 sin = (sin_t *)&sin6addr;
5426 5440 *sin = sin_null;
5427 5441 sin->sin_family = AF_INET;
5428 5442 sin->sin_addr.s_addr = INADDR_ANY;
5429 5443 } else {
5430 5444 ASSERT(connp->conn_family == AF_INET6);
5431 5445 len = sizeof (sin6_t);
5432 5446 sin6 = (sin6_t *)&sin6addr;
5433 5447 *sin6 = sin6_null;
5434 5448 sin6->sin6_family = AF_INET6;
5435 5449 V6_SET_ZERO(sin6->sin6_addr);
5436 5450 }
5437 5451
5438 5452 error = udp_do_bind(connp, (struct sockaddr *)&sin6addr, len,
5439 5453 cr, B_FALSE);
5440 5454 return ((error < 0) ? proto_tlitosyserr(-error) : error);
5441 5455 }
5442 5456
5443 5457 /*
5444 5458 * This routine removes a port number association from a stream. It
5445 5459 * is called by udp_unbind and udp_tpi_unbind.
5446 5460 */
5447 5461 static int
5448 5462 udp_do_unbind(conn_t *connp)
5449 5463 {
5450 5464 udp_t *udp = connp->conn_udp;
5451 5465 udp_fanout_t *udpf;
5452 5466 udp_stack_t *us = udp->udp_us;
5453 5467
5454 5468 if (cl_inet_unbind != NULL) {
5455 5469 /*
5456 5470 * Running in cluster mode - register unbind information
5457 5471 */
5458 5472 if (connp->conn_ipversion == IPV4_VERSION) {
5459 5473 (*cl_inet_unbind)(
5460 5474 connp->conn_netstack->netstack_stackid,
5461 5475 IPPROTO_UDP, AF_INET,
5462 5476 (uint8_t *)(&V4_PART_OF_V6(connp->conn_laddr_v6)),
5463 5477 (in_port_t)connp->conn_lport, NULL);
5464 5478 } else {
5465 5479 (*cl_inet_unbind)(
5466 5480 connp->conn_netstack->netstack_stackid,
5467 5481 IPPROTO_UDP, AF_INET6,
5468 5482 (uint8_t *)&(connp->conn_laddr_v6),
5469 5483 (in_port_t)connp->conn_lport, NULL);
5470 5484 }
5471 5485 }
5472 5486
5473 5487 mutex_enter(&connp->conn_lock);
5474 5488 /* If a bind has not been done, we can't unbind. */
5475 5489 if (udp->udp_state == TS_UNBND) {
5476 5490 mutex_exit(&connp->conn_lock);
5477 5491 return (-TOUTSTATE);
5478 5492 }
5479 5493 udpf = &us->us_bind_fanout[UDP_BIND_HASH(connp->conn_lport,
5480 5494 us->us_bind_fanout_size)];
5481 5495 mutex_enter(&udpf->uf_lock);
5482 5496 udp_bind_hash_remove(udp, B_TRUE);
5483 5497 connp->conn_saddr_v6 = ipv6_all_zeros;
5484 5498 connp->conn_bound_addr_v6 = ipv6_all_zeros;
5485 5499 connp->conn_laddr_v6 = ipv6_all_zeros;
5486 5500 connp->conn_mcbc_bind = B_FALSE;
5487 5501 connp->conn_lport = 0;
5488 5502 /* In case we were also connected */
5489 5503 connp->conn_faddr_v6 = ipv6_all_zeros;
5490 5504 connp->conn_fport = 0;
5491 5505 mutex_exit(&udpf->uf_lock);
5492 5506
5493 5507 connp->conn_v6lastdst = ipv6_all_zeros;
5494 5508 udp->udp_state = TS_UNBND;
5495 5509
5496 5510 (void) udp_build_hdr_template(connp, &connp->conn_saddr_v6,
5497 5511 &connp->conn_faddr_v6, connp->conn_fport, connp->conn_flowinfo);
5498 5512 mutex_exit(&connp->conn_lock);
5499 5513
5500 5514 ip_unbind(connp);
5501 5515
5502 5516 return (0);
5503 5517 }
5504 5518
5505 5519 /*
5506 5520 * It associates a default destination address with the stream.
5507 5521 */
5508 5522 static int
5509 5523 udp_do_connect(conn_t *connp, const struct sockaddr *sa, socklen_t len,
5510 5524 cred_t *cr, pid_t pid)
5511 5525 {
5512 5526 sin6_t *sin6;
5513 5527 sin_t *sin;
5514 5528 in6_addr_t v6dst;
5515 5529 ipaddr_t v4dst;
5516 5530 uint16_t dstport;
5517 5531 uint32_t flowinfo;
5518 5532 udp_fanout_t *udpf;
|
↓ open down ↓ |
1758 lines elided |
↑ open up ↑ |
5519 5533 udp_t *udp, *udp1;
5520 5534 ushort_t ipversion;
5521 5535 udp_stack_t *us;
5522 5536 int error;
5523 5537 conn_t *connp1;
5524 5538 ip_xmit_attr_t *ixa;
5525 5539 ip_xmit_attr_t *oldixa;
5526 5540 uint_t scopeid = 0;
5527 5541 uint_t srcid = 0;
5528 5542 in6_addr_t v6src = connp->conn_saddr_v6;
5543 + boolean_t v4mapped;
5529 5544
5530 5545 udp = connp->conn_udp;
5531 5546 us = udp->udp_us;
5532 5547
5533 5548 /*
5534 5549 * Address has been verified by the caller
5535 5550 */
5536 5551 switch (len) {
5537 5552 default:
5538 5553 /*
5539 5554 * Should never happen
5540 5555 */
5541 5556 return (EINVAL);
5542 5557
5543 5558 case sizeof (sin_t):
5544 5559 sin = (sin_t *)sa;
5545 5560 v4dst = sin->sin_addr.s_addr;
5546 5561 dstport = sin->sin_port;
|
↓ open down ↓ |
8 lines elided |
↑ open up ↑ |
5547 5562 IN6_IPADDR_TO_V4MAPPED(v4dst, &v6dst);
5548 5563 ASSERT(connp->conn_ipversion == IPV4_VERSION);
5549 5564 ipversion = IPV4_VERSION;
5550 5565 break;
5551 5566
5552 5567 case sizeof (sin6_t):
5553 5568 sin6 = (sin6_t *)sa;
5554 5569 v6dst = sin6->sin6_addr;
5555 5570 dstport = sin6->sin6_port;
5556 5571 srcid = sin6->__sin6_src_id;
5572 + v4mapped = IN6_IS_ADDR_V4MAPPED(&v6dst);
5557 5573 if (srcid != 0 && IN6_IS_ADDR_UNSPECIFIED(&v6src)) {
5558 - ip_srcid_find_id(srcid, &v6src, IPCL_ZONEID(connp),
5559 - connp->conn_netstack);
5574 + if (ip_srcid_find_id(srcid, &v6src, IPCL_ZONEID(connp),
5575 + v4mapped, connp->conn_netstack)) {
5576 + /* Mismatch v4mapped/v6 specified by srcid. */
5577 + return (EADDRNOTAVAIL);
5578 + }
5560 5579 }
5561 - if (IN6_IS_ADDR_V4MAPPED(&v6dst)) {
5580 + if (v4mapped) {
5562 5581 if (connp->conn_ipv6_v6only)
5563 5582 return (EADDRNOTAVAIL);
5564 5583
5565 5584 /*
5566 5585 * Destination adress is mapped IPv6 address.
5567 5586 * Source bound address should be unspecified or
5568 5587 * IPv6 mapped address as well.
5569 5588 */
5570 5589 if (!IN6_IS_ADDR_UNSPECIFIED(
5571 5590 &connp->conn_bound_addr_v6) &&
5572 5591 !IN6_IS_ADDR_V4MAPPED(&connp->conn_bound_addr_v6)) {
5573 5592 return (EADDRNOTAVAIL);
5574 5593 }
5575 5594 IN6_V4MAPPED_TO_IPADDR(&v6dst, v4dst);
5576 5595 ipversion = IPV4_VERSION;
5577 5596 flowinfo = 0;
5578 5597 } else {
5579 5598 ipversion = IPV6_VERSION;
5580 5599 flowinfo = sin6->sin6_flowinfo;
5581 5600 if (IN6_IS_ADDR_LINKLOCAL(&sin6->sin6_addr))
5582 5601 scopeid = sin6->sin6_scope_id;
5583 5602 }
5584 5603 break;
5585 5604 }
5586 5605
5587 5606 if (dstport == 0)
5588 5607 return (-TBADADDR);
5589 5608
5590 5609 /*
5591 5610 * If there is a different thread using conn_ixa then we get a new
5592 5611 * copy and cut the old one loose from conn_ixa. Otherwise we use
5593 5612 * conn_ixa and prevent any other thread from using/changing it.
5594 5613 * Once connect() is done other threads can use conn_ixa since the
5595 5614 * refcnt will be back at one.
5596 5615 * We defer updating conn_ixa until later to handle any concurrent
5597 5616 * conn_ixa_cleanup thread.
5598 5617 */
5599 5618 ixa = conn_get_ixa(connp, B_FALSE);
5600 5619 if (ixa == NULL)
5601 5620 return (ENOMEM);
5602 5621
5603 5622 mutex_enter(&connp->conn_lock);
5604 5623 /*
5605 5624 * This udp_t must have bound to a port already before doing a connect.
5606 5625 * Reject if a connect is in progress (we drop conn_lock during
5607 5626 * udp_do_connect).
5608 5627 */
5609 5628 if (udp->udp_state == TS_UNBND || udp->udp_state == TS_WCON_CREQ) {
5610 5629 mutex_exit(&connp->conn_lock);
5611 5630 (void) strlog(UDP_MOD_ID, 0, 1, SL_ERROR|SL_TRACE,
5612 5631 "udp_connect: bad state, %u", udp->udp_state);
5613 5632 ixa_refrele(ixa);
5614 5633 return (-TOUTSTATE);
5615 5634 }
5616 5635 ASSERT(connp->conn_lport != 0 && udp->udp_ptpbhn != NULL);
5617 5636
5618 5637 udpf = &us->us_bind_fanout[UDP_BIND_HASH(connp->conn_lport,
5619 5638 us->us_bind_fanout_size)];
5620 5639
5621 5640 mutex_enter(&udpf->uf_lock);
5622 5641 if (udp->udp_state == TS_DATA_XFER) {
5623 5642 /* Already connected - clear out state */
5624 5643 if (connp->conn_mcbc_bind)
5625 5644 connp->conn_saddr_v6 = ipv6_all_zeros;
5626 5645 else
5627 5646 connp->conn_saddr_v6 = connp->conn_bound_addr_v6;
5628 5647 connp->conn_laddr_v6 = connp->conn_bound_addr_v6;
5629 5648 connp->conn_faddr_v6 = ipv6_all_zeros;
5630 5649 connp->conn_fport = 0;
5631 5650 udp->udp_state = TS_IDLE;
5632 5651 }
5633 5652
5634 5653 connp->conn_fport = dstport;
5635 5654 connp->conn_ipversion = ipversion;
5636 5655 if (ipversion == IPV4_VERSION) {
5637 5656 /*
5638 5657 * Interpret a zero destination to mean loopback.
5639 5658 * Update the T_CONN_REQ (sin/sin6) since it is used to
5640 5659 * generate the T_CONN_CON.
5641 5660 */
5642 5661 if (v4dst == INADDR_ANY) {
5643 5662 v4dst = htonl(INADDR_LOOPBACK);
5644 5663 IN6_IPADDR_TO_V4MAPPED(v4dst, &v6dst);
5645 5664 if (connp->conn_family == AF_INET) {
5646 5665 sin->sin_addr.s_addr = v4dst;
5647 5666 } else {
5648 5667 sin6->sin6_addr = v6dst;
5649 5668 }
5650 5669 }
5651 5670 connp->conn_faddr_v6 = v6dst;
5652 5671 connp->conn_flowinfo = 0;
5653 5672 } else {
5654 5673 ASSERT(connp->conn_ipversion == IPV6_VERSION);
5655 5674 /*
5656 5675 * Interpret a zero destination to mean loopback.
5657 5676 * Update the T_CONN_REQ (sin/sin6) since it is used to
5658 5677 * generate the T_CONN_CON.
5659 5678 */
5660 5679 if (IN6_IS_ADDR_UNSPECIFIED(&v6dst)) {
5661 5680 v6dst = ipv6_loopback;
5662 5681 sin6->sin6_addr = v6dst;
5663 5682 }
5664 5683 connp->conn_faddr_v6 = v6dst;
5665 5684 connp->conn_flowinfo = flowinfo;
5666 5685 }
5667 5686 mutex_exit(&udpf->uf_lock);
5668 5687
5669 5688 /*
5670 5689 * We update our cred/cpid based on the caller of connect
5671 5690 */
5672 5691 if (connp->conn_cred != cr) {
5673 5692 crhold(cr);
5674 5693 crfree(connp->conn_cred);
5675 5694 connp->conn_cred = cr;
5676 5695 }
5677 5696 connp->conn_cpid = pid;
5678 5697 ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED));
5679 5698 ixa->ixa_cred = cr;
5680 5699 ixa->ixa_cpid = pid;
5681 5700 if (is_system_labeled()) {
5682 5701 /* We need to restart with a label based on the cred */
5683 5702 ip_xmit_attr_restore_tsl(ixa, ixa->ixa_cred);
5684 5703 }
5685 5704
5686 5705 if (scopeid != 0) {
5687 5706 ixa->ixa_flags |= IXAF_SCOPEID_SET;
5688 5707 ixa->ixa_scopeid = scopeid;
5689 5708 connp->conn_incoming_ifindex = scopeid;
5690 5709 } else {
5691 5710 ixa->ixa_flags &= ~IXAF_SCOPEID_SET;
5692 5711 connp->conn_incoming_ifindex = connp->conn_bound_if;
5693 5712 }
5694 5713 /*
5695 5714 * conn_connect will drop conn_lock and reacquire it.
5696 5715 * To prevent a send* from messing with this udp_t while the lock
5697 5716 * is dropped we set udp_state and clear conn_v6lastdst.
5698 5717 * That will make all send* fail with EISCONN.
5699 5718 */
5700 5719 connp->conn_v6lastdst = ipv6_all_zeros;
5701 5720 udp->udp_state = TS_WCON_CREQ;
5702 5721
5703 5722 error = conn_connect(connp, NULL, IPDF_ALLOW_MCBC);
5704 5723 mutex_exit(&connp->conn_lock);
5705 5724 if (error != 0)
5706 5725 goto connect_failed;
5707 5726
5708 5727 /*
5709 5728 * The addresses have been verified. Time to insert in
5710 5729 * the correct fanout list.
5711 5730 */
5712 5731 error = ipcl_conn_insert(connp);
5713 5732 if (error != 0)
5714 5733 goto connect_failed;
5715 5734
5716 5735 mutex_enter(&connp->conn_lock);
5717 5736 error = udp_build_hdr_template(connp, &connp->conn_saddr_v6,
5718 5737 &connp->conn_faddr_v6, connp->conn_fport, connp->conn_flowinfo);
5719 5738 if (error != 0) {
5720 5739 mutex_exit(&connp->conn_lock);
5721 5740 goto connect_failed;
5722 5741 }
5723 5742
5724 5743 udp->udp_state = TS_DATA_XFER;
5725 5744 /* Record this as the "last" send even though we haven't sent any */
5726 5745 connp->conn_v6lastdst = connp->conn_faddr_v6;
5727 5746 connp->conn_lastipversion = connp->conn_ipversion;
5728 5747 connp->conn_lastdstport = connp->conn_fport;
5729 5748 connp->conn_lastflowinfo = connp->conn_flowinfo;
5730 5749 connp->conn_lastscopeid = scopeid;
5731 5750 connp->conn_lastsrcid = srcid;
5732 5751 /* Also remember a source to use together with lastdst */
5733 5752 connp->conn_v6lastsrc = v6src;
5734 5753
5735 5754 oldixa = conn_replace_ixa(connp, ixa);
5736 5755 mutex_exit(&connp->conn_lock);
5737 5756 ixa_refrele(oldixa);
5738 5757
5739 5758 /*
5740 5759 * We've picked a source address above. Now we can
5741 5760 * verify that the src/port/dst/port is unique for all
5742 5761 * connections in TS_DATA_XFER, skipping ourselves.
5743 5762 */
5744 5763 mutex_enter(&udpf->uf_lock);
5745 5764 for (udp1 = udpf->uf_udp; udp1 != NULL; udp1 = udp1->udp_bind_hash) {
5746 5765 if (udp1->udp_state != TS_DATA_XFER)
5747 5766 continue;
5748 5767
5749 5768 if (udp1 == udp)
5750 5769 continue;
5751 5770
5752 5771 connp1 = udp1->udp_connp;
5753 5772 if (connp->conn_lport != connp1->conn_lport ||
5754 5773 connp->conn_ipversion != connp1->conn_ipversion ||
5755 5774 dstport != connp1->conn_fport ||
5756 5775 !IN6_ARE_ADDR_EQUAL(&connp->conn_laddr_v6,
5757 5776 &connp1->conn_laddr_v6) ||
5758 5777 !IN6_ARE_ADDR_EQUAL(&v6dst, &connp1->conn_faddr_v6) ||
5759 5778 !(IPCL_ZONE_MATCH(connp, connp1->conn_zoneid) ||
5760 5779 IPCL_ZONE_MATCH(connp1, connp->conn_zoneid)))
5761 5780 continue;
5762 5781 mutex_exit(&udpf->uf_lock);
5763 5782 error = -TBADADDR;
5764 5783 goto connect_failed;
5765 5784 }
5766 5785 if (cl_inet_connect2 != NULL) {
5767 5786 CL_INET_UDP_CONNECT(connp, B_TRUE, &v6dst, dstport, error);
5768 5787 if (error != 0) {
5769 5788 mutex_exit(&udpf->uf_lock);
5770 5789 error = -TBADADDR;
5771 5790 goto connect_failed;
5772 5791 }
5773 5792 }
5774 5793 mutex_exit(&udpf->uf_lock);
5775 5794
5776 5795 ixa_refrele(ixa);
5777 5796 return (0);
5778 5797
5779 5798 connect_failed:
5780 5799 if (ixa != NULL)
5781 5800 ixa_refrele(ixa);
5782 5801 mutex_enter(&connp->conn_lock);
5783 5802 mutex_enter(&udpf->uf_lock);
5784 5803 udp->udp_state = TS_IDLE;
5785 5804 connp->conn_faddr_v6 = ipv6_all_zeros;
5786 5805 connp->conn_fport = 0;
5787 5806 /* In case the source address was set above */
5788 5807 if (connp->conn_mcbc_bind)
5789 5808 connp->conn_saddr_v6 = ipv6_all_zeros;
5790 5809 else
5791 5810 connp->conn_saddr_v6 = connp->conn_bound_addr_v6;
5792 5811 connp->conn_laddr_v6 = connp->conn_bound_addr_v6;
5793 5812 mutex_exit(&udpf->uf_lock);
5794 5813
5795 5814 connp->conn_v6lastdst = ipv6_all_zeros;
5796 5815 connp->conn_flowinfo = 0;
5797 5816
5798 5817 (void) udp_build_hdr_template(connp, &connp->conn_saddr_v6,
5799 5818 &connp->conn_faddr_v6, connp->conn_fport, connp->conn_flowinfo);
5800 5819 mutex_exit(&connp->conn_lock);
5801 5820 return (error);
5802 5821 }
5803 5822
5804 5823 static int
5805 5824 udp_connect(sock_lower_handle_t proto_handle, const struct sockaddr *sa,
5806 5825 socklen_t len, sock_connid_t *id, cred_t *cr)
5807 5826 {
5808 5827 conn_t *connp = (conn_t *)proto_handle;
5809 5828 udp_t *udp = connp->conn_udp;
5810 5829 int error;
5811 5830 boolean_t did_bind = B_FALSE;
5812 5831 pid_t pid = curproc->p_pid;
5813 5832
5814 5833 /* All Solaris components should pass a cred for this operation. */
5815 5834 ASSERT(cr != NULL);
5816 5835
5817 5836 if (sa == NULL) {
5818 5837 /*
5819 5838 * Disconnect
5820 5839 * Make sure we are connected
5821 5840 */
5822 5841 if (udp->udp_state != TS_DATA_XFER)
5823 5842 return (EINVAL);
5824 5843
5825 5844 error = udp_disconnect(connp);
5826 5845 return (error);
5827 5846 }
5828 5847
5829 5848 error = proto_verify_ip_addr(connp->conn_family, sa, len);
5830 5849 if (error != 0)
5831 5850 goto done;
5832 5851
5833 5852 /* do an implicit bind if necessary */
5834 5853 if (udp->udp_state == TS_UNBND) {
5835 5854 error = udp_implicit_bind(connp, cr);
5836 5855 /*
5837 5856 * We could be racing with an actual bind, in which case
5838 5857 * we would see EPROTO. We cross our fingers and try
5839 5858 * to connect.
5840 5859 */
5841 5860 if (!(error == 0 || error == EPROTO))
5842 5861 goto done;
5843 5862 did_bind = B_TRUE;
5844 5863 }
5845 5864 /*
5846 5865 * set SO_DGRAM_ERRIND
5847 5866 */
5848 5867 connp->conn_dgram_errind = B_TRUE;
5849 5868
5850 5869 error = udp_do_connect(connp, sa, len, cr, pid);
5851 5870
5852 5871 if (error != 0 && did_bind) {
5853 5872 int unbind_err;
5854 5873
5855 5874 unbind_err = udp_do_unbind(connp);
5856 5875 ASSERT(unbind_err == 0);
5857 5876 }
5858 5877
5859 5878 if (error == 0) {
5860 5879 *id = 0;
5861 5880 (*connp->conn_upcalls->su_connected)
5862 5881 (connp->conn_upper_handle, 0, NULL, -1);
5863 5882 } else if (error < 0) {
5864 5883 error = proto_tlitosyserr(-error);
5865 5884 }
5866 5885
5867 5886 done:
5868 5887 if (error != 0 && udp->udp_state == TS_DATA_XFER) {
5869 5888 /*
5870 5889 * No need to hold locks to set state
5871 5890 * after connect failure socket state is undefined
5872 5891 * We set the state only to imitate old sockfs behavior
5873 5892 */
5874 5893 udp->udp_state = TS_IDLE;
5875 5894 }
5876 5895 return (error);
5877 5896 }
5878 5897
5879 5898 int
5880 5899 udp_send(sock_lower_handle_t proto_handle, mblk_t *mp, struct nmsghdr *msg,
5881 5900 cred_t *cr)
5882 5901 {
5883 5902 sin6_t *sin6;
5884 5903 sin_t *sin = NULL;
5885 5904 uint_t srcid;
5886 5905 conn_t *connp = (conn_t *)proto_handle;
5887 5906 udp_t *udp = connp->conn_udp;
5888 5907 int error = 0;
5889 5908 udp_stack_t *us = udp->udp_us;
5890 5909 ushort_t ipversion;
5891 5910 pid_t pid = curproc->p_pid;
5892 5911 ip_xmit_attr_t *ixa;
5893 5912
5894 5913 ASSERT(DB_TYPE(mp) == M_DATA);
5895 5914
5896 5915 /* All Solaris components should pass a cred for this operation. */
5897 5916 ASSERT(cr != NULL);
5898 5917
5899 5918 /* do an implicit bind if necessary */
5900 5919 if (udp->udp_state == TS_UNBND) {
5901 5920 error = udp_implicit_bind(connp, cr);
5902 5921 /*
5903 5922 * We could be racing with an actual bind, in which case
5904 5923 * we would see EPROTO. We cross our fingers and try
5905 5924 * to connect.
5906 5925 */
5907 5926 if (!(error == 0 || error == EPROTO)) {
5908 5927 freemsg(mp);
5909 5928 return (error);
5910 5929 }
5911 5930 }
5912 5931
5913 5932 /* Connected? */
5914 5933 if (msg->msg_name == NULL) {
5915 5934 if (udp->udp_state != TS_DATA_XFER) {
5916 5935 UDPS_BUMP_MIB(us, udpOutErrors);
5917 5936 return (EDESTADDRREQ);
5918 5937 }
5919 5938 if (msg->msg_controllen != 0) {
5920 5939 error = udp_output_ancillary(connp, NULL, NULL, mp,
5921 5940 NULL, msg, cr, pid);
5922 5941 } else {
5923 5942 error = udp_output_connected(connp, mp, cr, pid);
5924 5943 }
5925 5944 if (us->us_sendto_ignerr)
5926 5945 return (0);
5927 5946 else
5928 5947 return (error);
5929 5948 }
5930 5949 if (udp->udp_state == TS_DATA_XFER) {
5931 5950 UDPS_BUMP_MIB(us, udpOutErrors);
5932 5951 return (EISCONN);
5933 5952 }
5934 5953 error = proto_verify_ip_addr(connp->conn_family,
5935 5954 (struct sockaddr *)msg->msg_name, msg->msg_namelen);
5936 5955 if (error != 0) {
5937 5956 UDPS_BUMP_MIB(us, udpOutErrors);
5938 5957 return (error);
5939 5958 }
5940 5959 switch (connp->conn_family) {
5941 5960 case AF_INET6:
5942 5961 sin6 = (sin6_t *)msg->msg_name;
5943 5962
5944 5963 srcid = sin6->__sin6_src_id;
5945 5964
5946 5965 if (!IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
5947 5966 /*
5948 5967 * Destination is a non-IPv4-compatible IPv6 address.
5949 5968 * Send out an IPv6 format packet.
5950 5969 */
5951 5970
5952 5971 /*
5953 5972 * If the local address is a mapped address return
5954 5973 * an error.
5955 5974 * It would be possible to send an IPv6 packet but the
5956 5975 * response would never make it back to the application
5957 5976 * since it is bound to a mapped address.
5958 5977 */
5959 5978 if (IN6_IS_ADDR_V4MAPPED(&connp->conn_saddr_v6)) {
5960 5979 UDPS_BUMP_MIB(us, udpOutErrors);
5961 5980 return (EADDRNOTAVAIL);
5962 5981 }
5963 5982 if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr))
5964 5983 sin6->sin6_addr = ipv6_loopback;
5965 5984 ipversion = IPV6_VERSION;
5966 5985 } else {
5967 5986 if (connp->conn_ipv6_v6only) {
5968 5987 UDPS_BUMP_MIB(us, udpOutErrors);
5969 5988 return (EADDRNOTAVAIL);
5970 5989 }
5971 5990
5972 5991 /*
5973 5992 * If the local address is not zero or a mapped address
5974 5993 * return an error. It would be possible to send an
5975 5994 * IPv4 packet but the response would never make it
5976 5995 * back to the application since it is bound to a
5977 5996 * non-mapped address.
5978 5997 */
5979 5998 if (!IN6_IS_ADDR_V4MAPPED(&connp->conn_saddr_v6) &&
5980 5999 !IN6_IS_ADDR_UNSPECIFIED(&connp->conn_saddr_v6)) {
5981 6000 UDPS_BUMP_MIB(us, udpOutErrors);
5982 6001 return (EADDRNOTAVAIL);
5983 6002 }
5984 6003
5985 6004 if (V4_PART_OF_V6(sin6->sin6_addr) == INADDR_ANY) {
5986 6005 V4_PART_OF_V6(sin6->sin6_addr) =
5987 6006 htonl(INADDR_LOOPBACK);
5988 6007 }
5989 6008 ipversion = IPV4_VERSION;
5990 6009 }
5991 6010
5992 6011 /*
5993 6012 * We have to allocate an ip_xmit_attr_t before we grab
5994 6013 * conn_lock and we need to hold conn_lock once we've check
5995 6014 * conn_same_as_last_v6 to handle concurrent send* calls on a
5996 6015 * socket.
5997 6016 */
5998 6017 if (msg->msg_controllen == 0) {
5999 6018 ixa = conn_get_ixa(connp, B_FALSE);
6000 6019 if (ixa == NULL) {
6001 6020 UDPS_BUMP_MIB(us, udpOutErrors);
6002 6021 return (ENOMEM);
6003 6022 }
6004 6023 } else {
6005 6024 ixa = NULL;
6006 6025 }
6007 6026 mutex_enter(&connp->conn_lock);
6008 6027 if (udp->udp_delayed_error != 0) {
6009 6028 sin6_t *sin2 = (sin6_t *)&udp->udp_delayed_addr;
6010 6029
6011 6030 error = udp->udp_delayed_error;
6012 6031 udp->udp_delayed_error = 0;
6013 6032
6014 6033 /* Compare IP address, port, and family */
6015 6034
6016 6035 if (sin6->sin6_port == sin2->sin6_port &&
6017 6036 IN6_ARE_ADDR_EQUAL(&sin6->sin6_addr,
6018 6037 &sin2->sin6_addr) &&
6019 6038 sin6->sin6_family == sin2->sin6_family) {
6020 6039 mutex_exit(&connp->conn_lock);
6021 6040 UDPS_BUMP_MIB(us, udpOutErrors);
6022 6041 if (ixa != NULL)
6023 6042 ixa_refrele(ixa);
6024 6043 return (error);
6025 6044 }
6026 6045 }
6027 6046
6028 6047 if (msg->msg_controllen != 0) {
6029 6048 mutex_exit(&connp->conn_lock);
6030 6049 ASSERT(ixa == NULL);
6031 6050 error = udp_output_ancillary(connp, NULL, sin6, mp,
6032 6051 NULL, msg, cr, pid);
6033 6052 } else if (conn_same_as_last_v6(connp, sin6) &&
6034 6053 connp->conn_lastsrcid == srcid &&
6035 6054 ipsec_outbound_policy_current(ixa)) {
6036 6055 /* udp_output_lastdst drops conn_lock */
6037 6056 error = udp_output_lastdst(connp, mp, cr, pid, ixa);
6038 6057 } else {
6039 6058 /* udp_output_newdst drops conn_lock */
6040 6059 error = udp_output_newdst(connp, mp, NULL, sin6,
6041 6060 ipversion, cr, pid, ixa);
6042 6061 }
6043 6062 ASSERT(MUTEX_NOT_HELD(&connp->conn_lock));
6044 6063 if (us->us_sendto_ignerr)
6045 6064 return (0);
6046 6065 else
6047 6066 return (error);
6048 6067 case AF_INET:
6049 6068 sin = (sin_t *)msg->msg_name;
6050 6069
6051 6070 ipversion = IPV4_VERSION;
6052 6071
6053 6072 if (sin->sin_addr.s_addr == INADDR_ANY)
6054 6073 sin->sin_addr.s_addr = htonl(INADDR_LOOPBACK);
6055 6074
6056 6075 /*
6057 6076 * We have to allocate an ip_xmit_attr_t before we grab
6058 6077 * conn_lock and we need to hold conn_lock once we've check
6059 6078 * conn_same_as_last_v6 to handle concurrent send* on a socket.
6060 6079 */
6061 6080 if (msg->msg_controllen == 0) {
6062 6081 ixa = conn_get_ixa(connp, B_FALSE);
6063 6082 if (ixa == NULL) {
6064 6083 UDPS_BUMP_MIB(us, udpOutErrors);
6065 6084 return (ENOMEM);
6066 6085 }
6067 6086 } else {
6068 6087 ixa = NULL;
6069 6088 }
6070 6089 mutex_enter(&connp->conn_lock);
6071 6090 if (udp->udp_delayed_error != 0) {
6072 6091 sin_t *sin2 = (sin_t *)&udp->udp_delayed_addr;
6073 6092
6074 6093 error = udp->udp_delayed_error;
6075 6094 udp->udp_delayed_error = 0;
6076 6095
6077 6096 /* Compare IP address and port */
6078 6097
6079 6098 if (sin->sin_port == sin2->sin_port &&
6080 6099 sin->sin_addr.s_addr == sin2->sin_addr.s_addr) {
6081 6100 mutex_exit(&connp->conn_lock);
6082 6101 UDPS_BUMP_MIB(us, udpOutErrors);
6083 6102 if (ixa != NULL)
6084 6103 ixa_refrele(ixa);
6085 6104 return (error);
6086 6105 }
6087 6106 }
6088 6107 if (msg->msg_controllen != 0) {
6089 6108 mutex_exit(&connp->conn_lock);
6090 6109 ASSERT(ixa == NULL);
6091 6110 error = udp_output_ancillary(connp, sin, NULL, mp,
6092 6111 NULL, msg, cr, pid);
6093 6112 } else if (conn_same_as_last_v4(connp, sin) &&
6094 6113 ipsec_outbound_policy_current(ixa)) {
6095 6114 /* udp_output_lastdst drops conn_lock */
6096 6115 error = udp_output_lastdst(connp, mp, cr, pid, ixa);
6097 6116 } else {
6098 6117 /* udp_output_newdst drops conn_lock */
6099 6118 error = udp_output_newdst(connp, mp, sin, NULL,
6100 6119 ipversion, cr, pid, ixa);
6101 6120 }
6102 6121 ASSERT(MUTEX_NOT_HELD(&connp->conn_lock));
6103 6122 if (us->us_sendto_ignerr)
6104 6123 return (0);
6105 6124 else
6106 6125 return (error);
6107 6126 default:
6108 6127 return (EINVAL);
6109 6128 }
6110 6129 }
6111 6130
6112 6131 int
6113 6132 udp_fallback(sock_lower_handle_t proto_handle, queue_t *q,
6114 6133 boolean_t issocket, so_proto_quiesced_cb_t quiesced_cb,
6115 6134 sock_quiesce_arg_t *arg)
6116 6135 {
6117 6136 conn_t *connp = (conn_t *)proto_handle;
6118 6137 udp_t *udp;
6119 6138 struct T_capability_ack tca;
6120 6139 struct sockaddr_in6 laddr, faddr;
6121 6140 socklen_t laddrlen, faddrlen;
6122 6141 short opts;
6123 6142 struct stroptions *stropt;
6124 6143 mblk_t *mp, *stropt_mp;
6125 6144 int error;
6126 6145
6127 6146 udp = connp->conn_udp;
6128 6147
6129 6148 stropt_mp = allocb_wait(sizeof (*stropt), BPRI_HI, STR_NOSIG, NULL);
6130 6149
6131 6150 /*
6132 6151 * setup the fallback stream that was allocated
6133 6152 */
6134 6153 connp->conn_dev = (dev_t)RD(q)->q_ptr;
6135 6154 connp->conn_minor_arena = WR(q)->q_ptr;
6136 6155
6137 6156 RD(q)->q_ptr = WR(q)->q_ptr = connp;
6138 6157
6139 6158 WR(q)->q_qinfo = &udp_winit;
6140 6159
6141 6160 connp->conn_rq = RD(q);
6142 6161 connp->conn_wq = WR(q);
6143 6162
6144 6163 /* Notify stream head about options before sending up data */
6145 6164 stropt_mp->b_datap->db_type = M_SETOPTS;
6146 6165 stropt_mp->b_wptr += sizeof (*stropt);
6147 6166 stropt = (struct stroptions *)stropt_mp->b_rptr;
6148 6167 stropt->so_flags = SO_WROFF | SO_HIWAT;
6149 6168 stropt->so_wroff = connp->conn_wroff;
6150 6169 stropt->so_hiwat = udp->udp_rcv_disply_hiwat;
6151 6170 putnext(RD(q), stropt_mp);
6152 6171
6153 6172 /*
6154 6173 * Free the helper stream
6155 6174 */
6156 6175 ip_free_helper_stream(connp);
6157 6176
6158 6177 if (!issocket)
6159 6178 udp_use_pure_tpi(udp);
6160 6179
6161 6180 /*
6162 6181 * Collect the information needed to sync with the sonode
6163 6182 */
6164 6183 udp_do_capability_ack(udp, &tca, TC1_INFO);
6165 6184
6166 6185 laddrlen = faddrlen = sizeof (sin6_t);
6167 6186 (void) udp_getsockname((sock_lower_handle_t)connp,
6168 6187 (struct sockaddr *)&laddr, &laddrlen, CRED());
6169 6188 error = udp_getpeername((sock_lower_handle_t)connp,
6170 6189 (struct sockaddr *)&faddr, &faddrlen, CRED());
6171 6190 if (error != 0)
6172 6191 faddrlen = 0;
6173 6192
6174 6193 opts = 0;
6175 6194 if (connp->conn_dgram_errind)
6176 6195 opts |= SO_DGRAM_ERRIND;
6177 6196 if (connp->conn_ixa->ixa_flags & IXAF_DONTROUTE)
6178 6197 opts |= SO_DONTROUTE;
6179 6198
6180 6199 mp = (*quiesced_cb)(connp->conn_upper_handle, arg, &tca,
6181 6200 (struct sockaddr *)&laddr, laddrlen,
6182 6201 (struct sockaddr *)&faddr, faddrlen, opts);
6183 6202
6184 6203 mutex_enter(&udp->udp_recv_lock);
6185 6204 /*
6186 6205 * Attempts to send data up during fallback will result in it being
6187 6206 * queued in udp_t. First push up the datagrams obtained from the
6188 6207 * socket, then any packets queued in udp_t.
6189 6208 */
6190 6209 if (mp != NULL) {
6191 6210 mp->b_next = udp->udp_fallback_queue_head;
6192 6211 udp->udp_fallback_queue_head = mp;
6193 6212 }
6194 6213 while (udp->udp_fallback_queue_head != NULL) {
6195 6214 mp = udp->udp_fallback_queue_head;
6196 6215 udp->udp_fallback_queue_head = mp->b_next;
6197 6216 mutex_exit(&udp->udp_recv_lock);
6198 6217 mp->b_next = NULL;
6199 6218 putnext(RD(q), mp);
6200 6219 mutex_enter(&udp->udp_recv_lock);
6201 6220 }
6202 6221 udp->udp_fallback_queue_tail = udp->udp_fallback_queue_head;
6203 6222 /*
6204 6223 * No longer a streams less socket
6205 6224 */
6206 6225 mutex_enter(&connp->conn_lock);
6207 6226 connp->conn_flags &= ~IPCL_NONSTR;
6208 6227 mutex_exit(&connp->conn_lock);
6209 6228
6210 6229 mutex_exit(&udp->udp_recv_lock);
6211 6230
6212 6231 ASSERT(connp->conn_ref >= 1);
6213 6232
6214 6233 return (0);
6215 6234 }
6216 6235
6217 6236 /* ARGSUSED3 */
6218 6237 int
6219 6238 udp_getpeername(sock_lower_handle_t proto_handle, struct sockaddr *sa,
6220 6239 socklen_t *salenp, cred_t *cr)
6221 6240 {
6222 6241 conn_t *connp = (conn_t *)proto_handle;
6223 6242 udp_t *udp = connp->conn_udp;
6224 6243 int error;
6225 6244
6226 6245 /* All Solaris components should pass a cred for this operation. */
6227 6246 ASSERT(cr != NULL);
6228 6247
6229 6248 mutex_enter(&connp->conn_lock);
6230 6249 if (udp->udp_state != TS_DATA_XFER)
6231 6250 error = ENOTCONN;
6232 6251 else
6233 6252 error = conn_getpeername(connp, sa, salenp);
6234 6253 mutex_exit(&connp->conn_lock);
6235 6254 return (error);
6236 6255 }
6237 6256
6238 6257 /* ARGSUSED3 */
6239 6258 int
6240 6259 udp_getsockname(sock_lower_handle_t proto_handle, struct sockaddr *sa,
6241 6260 socklen_t *salenp, cred_t *cr)
6242 6261 {
6243 6262 conn_t *connp = (conn_t *)proto_handle;
6244 6263 int error;
6245 6264
6246 6265 /* All Solaris components should pass a cred for this operation. */
6247 6266 ASSERT(cr != NULL);
6248 6267
6249 6268 mutex_enter(&connp->conn_lock);
6250 6269 error = conn_getsockname(connp, sa, salenp);
6251 6270 mutex_exit(&connp->conn_lock);
6252 6271 return (error);
6253 6272 }
6254 6273
6255 6274 int
6256 6275 udp_getsockopt(sock_lower_handle_t proto_handle, int level, int option_name,
6257 6276 void *optvalp, socklen_t *optlen, cred_t *cr)
6258 6277 {
6259 6278 conn_t *connp = (conn_t *)proto_handle;
6260 6279 int error;
6261 6280 t_uscalar_t max_optbuf_len;
6262 6281 void *optvalp_buf;
6263 6282 int len;
6264 6283
6265 6284 /* All Solaris components should pass a cred for this operation. */
6266 6285 ASSERT(cr != NULL);
6267 6286
6268 6287 error = proto_opt_check(level, option_name, *optlen, &max_optbuf_len,
6269 6288 udp_opt_obj.odb_opt_des_arr,
6270 6289 udp_opt_obj.odb_opt_arr_cnt,
6271 6290 B_FALSE, B_TRUE, cr);
6272 6291 if (error != 0) {
6273 6292 if (error < 0)
6274 6293 error = proto_tlitosyserr(-error);
6275 6294 return (error);
6276 6295 }
6277 6296
6278 6297 optvalp_buf = kmem_alloc(max_optbuf_len, KM_SLEEP);
6279 6298 len = udp_opt_get(connp, level, option_name, optvalp_buf);
6280 6299 if (len == -1) {
6281 6300 kmem_free(optvalp_buf, max_optbuf_len);
6282 6301 return (EINVAL);
6283 6302 }
6284 6303
6285 6304 /*
6286 6305 * update optlen and copy option value
6287 6306 */
6288 6307 t_uscalar_t size = MIN(len, *optlen);
6289 6308
6290 6309 bcopy(optvalp_buf, optvalp, size);
6291 6310 bcopy(&size, optlen, sizeof (size));
6292 6311
6293 6312 kmem_free(optvalp_buf, max_optbuf_len);
6294 6313 return (0);
6295 6314 }
6296 6315
6297 6316 int
6298 6317 udp_setsockopt(sock_lower_handle_t proto_handle, int level, int option_name,
6299 6318 const void *optvalp, socklen_t optlen, cred_t *cr)
6300 6319 {
6301 6320 conn_t *connp = (conn_t *)proto_handle;
6302 6321 int error;
6303 6322
6304 6323 /* All Solaris components should pass a cred for this operation. */
6305 6324 ASSERT(cr != NULL);
6306 6325
6307 6326 error = proto_opt_check(level, option_name, optlen, NULL,
6308 6327 udp_opt_obj.odb_opt_des_arr,
6309 6328 udp_opt_obj.odb_opt_arr_cnt,
6310 6329 B_TRUE, B_FALSE, cr);
6311 6330
6312 6331 if (error != 0) {
6313 6332 if (error < 0)
6314 6333 error = proto_tlitosyserr(-error);
6315 6334 return (error);
6316 6335 }
6317 6336
6318 6337 error = udp_opt_set(connp, SETFN_OPTCOM_NEGOTIATE, level, option_name,
6319 6338 optlen, (uchar_t *)optvalp, (uint_t *)&optlen, (uchar_t *)optvalp,
6320 6339 NULL, cr);
6321 6340
6322 6341 ASSERT(error >= 0);
6323 6342
6324 6343 return (error);
6325 6344 }
6326 6345
6327 6346 void
6328 6347 udp_clr_flowctrl(sock_lower_handle_t proto_handle)
6329 6348 {
6330 6349 conn_t *connp = (conn_t *)proto_handle;
6331 6350 udp_t *udp = connp->conn_udp;
6332 6351
6333 6352 mutex_enter(&udp->udp_recv_lock);
6334 6353 connp->conn_flow_cntrld = B_FALSE;
6335 6354 mutex_exit(&udp->udp_recv_lock);
6336 6355 }
6337 6356
6338 6357 /* ARGSUSED2 */
6339 6358 int
6340 6359 udp_shutdown(sock_lower_handle_t proto_handle, int how, cred_t *cr)
6341 6360 {
6342 6361 conn_t *connp = (conn_t *)proto_handle;
6343 6362
6344 6363 /* All Solaris components should pass a cred for this operation. */
6345 6364 ASSERT(cr != NULL);
6346 6365
6347 6366 /* shut down the send side */
6348 6367 if (how != SHUT_RD)
6349 6368 (*connp->conn_upcalls->su_opctl)(connp->conn_upper_handle,
6350 6369 SOCK_OPCTL_SHUT_SEND, 0);
6351 6370 /* shut down the recv side */
6352 6371 if (how != SHUT_WR)
6353 6372 (*connp->conn_upcalls->su_opctl)(connp->conn_upper_handle,
6354 6373 SOCK_OPCTL_SHUT_RECV, 0);
6355 6374 return (0);
6356 6375 }
6357 6376
6358 6377 int
6359 6378 udp_ioctl(sock_lower_handle_t proto_handle, int cmd, intptr_t arg,
6360 6379 int mode, int32_t *rvalp, cred_t *cr)
6361 6380 {
6362 6381 conn_t *connp = (conn_t *)proto_handle;
6363 6382 int error;
6364 6383
6365 6384 /* All Solaris components should pass a cred for this operation. */
6366 6385 ASSERT(cr != NULL);
6367 6386
6368 6387 /*
6369 6388 * If we don't have a helper stream then create one.
6370 6389 * ip_create_helper_stream takes care of locking the conn_t,
6371 6390 * so this check for NULL is just a performance optimization.
6372 6391 */
6373 6392 if (connp->conn_helper_info == NULL) {
6374 6393 udp_stack_t *us = connp->conn_udp->udp_us;
6375 6394
6376 6395 ASSERT(us->us_ldi_ident != NULL);
6377 6396
6378 6397 /*
6379 6398 * Create a helper stream for non-STREAMS socket.
6380 6399 */
6381 6400 error = ip_create_helper_stream(connp, us->us_ldi_ident);
6382 6401 if (error != 0) {
6383 6402 ip0dbg(("tcp_ioctl: create of IP helper stream "
6384 6403 "failed %d\n", error));
6385 6404 return (error);
6386 6405 }
6387 6406 }
6388 6407
6389 6408 switch (cmd) {
6390 6409 case _SIOCSOCKFALLBACK:
6391 6410 case TI_GETPEERNAME:
6392 6411 case TI_GETMYNAME:
6393 6412 ip1dbg(("udp_ioctl: cmd 0x%x on non streams socket",
6394 6413 cmd));
6395 6414 error = EINVAL;
6396 6415 break;
6397 6416 default:
6398 6417 /*
6399 6418 * Pass on to IP using helper stream
6400 6419 */
6401 6420 error = ldi_ioctl(connp->conn_helper_info->iphs_handle,
6402 6421 cmd, arg, mode, cr, rvalp);
6403 6422 break;
6404 6423 }
6405 6424 return (error);
6406 6425 }
6407 6426
6408 6427 /* ARGSUSED */
6409 6428 int
6410 6429 udp_accept(sock_lower_handle_t lproto_handle,
6411 6430 sock_lower_handle_t eproto_handle, sock_upper_handle_t sock_handle,
6412 6431 cred_t *cr)
6413 6432 {
6414 6433 return (EOPNOTSUPP);
6415 6434 }
6416 6435
6417 6436 /* ARGSUSED */
6418 6437 int
6419 6438 udp_listen(sock_lower_handle_t proto_handle, int backlog, cred_t *cr)
6420 6439 {
6421 6440 return (EOPNOTSUPP);
6422 6441 }
6423 6442
6424 6443 sock_downcalls_t sock_udp_downcalls = {
6425 6444 udp_activate, /* sd_activate */
6426 6445 udp_accept, /* sd_accept */
6427 6446 udp_bind, /* sd_bind */
6428 6447 udp_listen, /* sd_listen */
6429 6448 udp_connect, /* sd_connect */
6430 6449 udp_getpeername, /* sd_getpeername */
6431 6450 udp_getsockname, /* sd_getsockname */
6432 6451 udp_getsockopt, /* sd_getsockopt */
6433 6452 udp_setsockopt, /* sd_setsockopt */
6434 6453 udp_send, /* sd_send */
6435 6454 NULL, /* sd_send_uio */
6436 6455 NULL, /* sd_recv_uio */
6437 6456 NULL, /* sd_poll */
6438 6457 udp_shutdown, /* sd_shutdown */
6439 6458 udp_clr_flowctrl, /* sd_setflowctrl */
6440 6459 udp_ioctl, /* sd_ioctl */
6441 6460 udp_close /* sd_close */
6442 6461 };
|
↓ open down ↓ |
871 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX