Print this page
OS-4699 lxbrand netty complains about SO_LINGER (really IP_TOS)
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
OS-4018 lxbrand support TCP SO_REUSEPORT
Reviewed by: Robert Mustacchi <rm@joyent.com>
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
Reviewed by: Cody Mello <cody.mello@joyent.com>
| Split |
Close |
| Expand all |
| Collapse all |
--- old/usr/src/uts/common/inet/ip/conn_opt.c
+++ new/usr/src/uts/common/inet/ip/conn_opt.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
|
↓ open down ↓ |
13 lines elided |
↑ open up ↑ |
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21
22 22 /*
23 23 * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
24 + * Copyright 2016 Joyent, Inc.
24 25 */
25 26 /* Copyright (c) 1990 Mentat Inc. */
26 27
27 28 #include <sys/types.h>
28 29 #include <sys/stream.h>
29 30 #include <sys/strsun.h>
30 31 #define _SUN_TPI_VERSION 2
31 32 #include <sys/tihdr.h>
32 33 #include <sys/xti_inet.h>
33 34 #include <sys/ucred.h>
34 35 #include <sys/zone.h>
35 36 #include <sys/ddi.h>
36 37 #include <sys/sunddi.h>
37 38 #include <sys/cmn_err.h>
38 39 #include <sys/debug.h>
39 40 #include <sys/atomic.h>
40 41 #include <sys/policy.h>
41 42
42 43 #include <sys/systm.h>
43 44 #include <sys/param.h>
44 45 #include <sys/kmem.h>
45 46 #include <sys/sdt.h>
46 47 #include <sys/socket.h>
47 48 #include <sys/ethernet.h>
48 49 #include <sys/mac.h>
49 50 #include <net/if.h>
50 51 #include <net/if_types.h>
51 52 #include <net/if_arp.h>
52 53 #include <net/route.h>
53 54 #include <sys/sockio.h>
54 55 #include <netinet/in.h>
55 56 #include <net/if_dl.h>
56 57
57 58 #include <inet/common.h>
58 59 #include <inet/mi.h>
59 60 #include <inet/mib2.h>
60 61 #include <inet/nd.h>
61 62 #include <inet/arp.h>
62 63 #include <inet/snmpcom.h>
63 64 #include <inet/kstatcom.h>
64 65
65 66 #include <netinet/igmp_var.h>
66 67 #include <netinet/ip6.h>
67 68 #include <netinet/icmp6.h>
68 69 #include <netinet/sctp.h>
69 70
70 71 #include <inet/ip.h>
71 72 #include <inet/ip_impl.h>
72 73 #include <inet/ip6.h>
73 74 #include <inet/ip6_asp.h>
74 75 #include <inet/tcp.h>
75 76 #include <inet/ip_multi.h>
76 77 #include <inet/ip_if.h>
77 78 #include <inet/ip_ire.h>
78 79 #include <inet/ip_ftable.h>
79 80 #include <inet/ip_rts.h>
80 81 #include <inet/optcom.h>
81 82 #include <inet/ip_ndp.h>
82 83 #include <inet/ip_listutils.h>
83 84 #include <netinet/igmp.h>
84 85 #include <netinet/ip_mroute.h>
85 86 #include <netinet/udp.h>
86 87 #include <inet/ipp_common.h>
87 88
88 89 #include <net/pfkeyv2.h>
89 90 #include <inet/sadb.h>
90 91 #include <inet/ipsec_impl.h>
91 92 #include <inet/ipdrop.h>
92 93 #include <inet/ip_netinfo.h>
93 94
94 95 #include <inet/ipclassifier.h>
95 96 #include <inet/sctp_ip.h>
96 97 #include <inet/sctp/sctp_impl.h>
97 98 #include <inet/udp_impl.h>
98 99 #include <sys/sunddi.h>
99 100
100 101 #include <sys/tsol/label.h>
101 102 #include <sys/tsol/tnet.h>
102 103
103 104 /*
104 105 * Return how much size is needed for the different ancillary data items
105 106 */
106 107 uint_t
107 108 conn_recvancillary_size(conn_t *connp, crb_t recv_ancillary,
108 109 ip_recv_attr_t *ira, mblk_t *mp, ip_pkt_t *ipp)
109 110 {
110 111 uint_t ancil_size;
111 112 ip_stack_t *ipst = connp->conn_netstack->netstack_ip;
112 113
113 114 /*
114 115 * If IP_RECVDSTADDR is set we include the destination IP
115 116 * address as an option. With IP_RECVOPTS we include all
116 117 * the IP options.
117 118 */
118 119 ancil_size = 0;
119 120 if (recv_ancillary.crb_recvdstaddr &&
120 121 (ira->ira_flags & IRAF_IS_IPV4)) {
121 122 ancil_size += sizeof (struct T_opthdr) +
122 123 sizeof (struct in_addr);
123 124 IP_STAT(ipst, conn_in_recvdstaddr);
124 125 }
125 126
126 127 /*
127 128 * ip_recvpktinfo is used for both AF_INET and AF_INET6 but
128 129 * are different
129 130 */
130 131 if (recv_ancillary.crb_ip_recvpktinfo &&
131 132 connp->conn_family == AF_INET) {
132 133 ancil_size += sizeof (struct T_opthdr) +
133 134 sizeof (struct in_pktinfo);
134 135 IP_STAT(ipst, conn_in_recvpktinfo);
135 136 }
136 137
137 138 if ((recv_ancillary.crb_recvopts) &&
138 139 (ipp->ipp_fields & IPPF_IPV4_OPTIONS)) {
139 140 ancil_size += sizeof (struct T_opthdr) +
140 141 ipp->ipp_ipv4_options_len;
141 142 IP_STAT(ipst, conn_in_recvopts);
142 143 }
143 144
144 145 if (recv_ancillary.crb_recvslla) {
145 146 ip_stack_t *ipst = connp->conn_netstack->netstack_ip;
146 147 ill_t *ill;
147 148
148 149 /* Make sure ira_l2src is setup if not already */
149 150 if (!(ira->ira_flags & IRAF_L2SRC_SET)) {
150 151 ill = ill_lookup_on_ifindex(ira->ira_rifindex, B_FALSE,
151 152 ipst);
152 153 if (ill != NULL) {
153 154 ip_setl2src(mp, ira, ill);
154 155 ill_refrele(ill);
155 156 }
156 157 }
157 158 ancil_size += sizeof (struct T_opthdr) +
158 159 sizeof (struct sockaddr_dl);
159 160 IP_STAT(ipst, conn_in_recvslla);
160 161 }
161 162
162 163 if (recv_ancillary.crb_recvif) {
163 164 ancil_size += sizeof (struct T_opthdr) + sizeof (uint_t);
164 165 IP_STAT(ipst, conn_in_recvif);
165 166 }
166 167
167 168 /*
168 169 * ip_recvpktinfo is used for both AF_INET and AF_INET6 but
169 170 * are different
170 171 */
171 172 if (recv_ancillary.crb_ip_recvpktinfo &&
172 173 connp->conn_family == AF_INET6) {
173 174 ancil_size += sizeof (struct T_opthdr) +
174 175 sizeof (struct in6_pktinfo);
175 176 IP_STAT(ipst, conn_in_recvpktinfo);
176 177 }
177 178
178 179 if (recv_ancillary.crb_ipv6_recvhoplimit) {
179 180 ancil_size += sizeof (struct T_opthdr) + sizeof (int);
180 181 IP_STAT(ipst, conn_in_recvhoplimit);
181 182 }
182 183
183 184 if (recv_ancillary.crb_ipv6_recvtclass) {
184 185 ancil_size += sizeof (struct T_opthdr) + sizeof (int);
185 186 IP_STAT(ipst, conn_in_recvtclass);
186 187 }
187 188
188 189 if (recv_ancillary.crb_ipv6_recvhopopts &&
189 190 (ipp->ipp_fields & IPPF_HOPOPTS)) {
190 191 ancil_size += sizeof (struct T_opthdr) + ipp->ipp_hopoptslen;
191 192 IP_STAT(ipst, conn_in_recvhopopts);
192 193 }
193 194 /*
194 195 * To honor RFC3542 when an application asks for both IPV6_RECVDSTOPTS
195 196 * and IPV6_RECVRTHDR, we pass up the item rthdrdstopts (the destination
196 197 * options that appear before a routing header.
197 198 * We also pass them up if IPV6_RECVRTHDRDSTOPTS is set.
198 199 */
199 200 if (ipp->ipp_fields & IPPF_RTHDRDSTOPTS) {
200 201 if (recv_ancillary.crb_ipv6_recvrthdrdstopts ||
201 202 (recv_ancillary.crb_ipv6_recvdstopts &&
202 203 recv_ancillary.crb_ipv6_recvrthdr)) {
203 204 ancil_size += sizeof (struct T_opthdr) +
204 205 ipp->ipp_rthdrdstoptslen;
205 206 IP_STAT(ipst, conn_in_recvrthdrdstopts);
206 207 }
207 208 }
208 209 if ((recv_ancillary.crb_ipv6_recvrthdr) &&
209 210 (ipp->ipp_fields & IPPF_RTHDR)) {
210 211 ancil_size += sizeof (struct T_opthdr) + ipp->ipp_rthdrlen;
211 212 IP_STAT(ipst, conn_in_recvrthdr);
212 213 }
213 214 if ((recv_ancillary.crb_ipv6_recvdstopts ||
214 215 recv_ancillary.crb_old_ipv6_recvdstopts) &&
215 216 (ipp->ipp_fields & IPPF_DSTOPTS)) {
216 217 ancil_size += sizeof (struct T_opthdr) + ipp->ipp_dstoptslen;
217 218 IP_STAT(ipst, conn_in_recvdstopts);
218 219 }
219 220 if (recv_ancillary.crb_recvucred && ira->ira_cred != NULL) {
220 221 ancil_size += sizeof (struct T_opthdr) +
221 222 ucredminsize(ira->ira_cred);
222 223 IP_STAT(ipst, conn_in_recvucred);
223 224 }
224 225
225 226 /*
226 227 * If SO_TIMESTAMP is set allocate the appropriate sized
227 228 * buffer. Since gethrestime() expects a pointer aligned
228 229 * argument, we allocate space necessary for extra
229 230 * alignment (even though it might not be used).
230 231 */
231 232 if (recv_ancillary.crb_timestamp) {
232 233 ancil_size += sizeof (struct T_opthdr) +
233 234 sizeof (timestruc_t) + _POINTER_ALIGNMENT;
234 235 IP_STAT(ipst, conn_in_timestamp);
235 236 }
236 237
237 238 /*
238 239 * If IP_RECVTTL is set allocate the appropriate sized buffer
239 240 */
240 241 if (recv_ancillary.crb_recvttl &&
241 242 (ira->ira_flags & IRAF_IS_IPV4)) {
242 243 ancil_size += sizeof (struct T_opthdr) + sizeof (uint8_t);
243 244 IP_STAT(ipst, conn_in_recvttl);
244 245 }
245 246
246 247 return (ancil_size);
247 248 }
248 249
249 250 /*
250 251 * Lay down the ancillary data items at "ancil_buf".
251 252 * Assumes caller has used conn_recvancillary_size to allocate a sufficiently
252 253 * large buffer - ancil_size.
253 254 */
254 255 void
255 256 conn_recvancillary_add(conn_t *connp, crb_t recv_ancillary,
256 257 ip_recv_attr_t *ira, ip_pkt_t *ipp, uchar_t *ancil_buf, uint_t ancil_size)
257 258 {
258 259 /*
259 260 * Copy in destination address before options to avoid
260 261 * any padding issues.
261 262 */
262 263 if (recv_ancillary.crb_recvdstaddr &&
263 264 (ira->ira_flags & IRAF_IS_IPV4)) {
264 265 struct T_opthdr *toh;
265 266 ipaddr_t *dstptr;
266 267
267 268 toh = (struct T_opthdr *)ancil_buf;
268 269 toh->level = IPPROTO_IP;
269 270 toh->name = IP_RECVDSTADDR;
270 271 toh->len = sizeof (struct T_opthdr) + sizeof (ipaddr_t);
271 272 toh->status = 0;
272 273 ancil_buf += sizeof (struct T_opthdr);
273 274 dstptr = (ipaddr_t *)ancil_buf;
274 275 *dstptr = ipp->ipp_addr_v4;
275 276 ancil_buf += sizeof (ipaddr_t);
276 277 ancil_size -= toh->len;
277 278 }
278 279
279 280 /*
280 281 * ip_recvpktinfo is used for both AF_INET and AF_INET6 but
281 282 * are different
282 283 */
283 284 if (recv_ancillary.crb_ip_recvpktinfo &&
284 285 connp->conn_family == AF_INET) {
285 286 ip_stack_t *ipst = connp->conn_netstack->netstack_ip;
286 287 struct T_opthdr *toh;
287 288 struct in_pktinfo *pktinfop;
288 289 ill_t *ill;
289 290 ipif_t *ipif;
290 291
291 292 toh = (struct T_opthdr *)ancil_buf;
292 293 toh->level = IPPROTO_IP;
293 294 toh->name = IP_PKTINFO;
294 295 toh->len = sizeof (struct T_opthdr) + sizeof (*pktinfop);
295 296 toh->status = 0;
296 297 ancil_buf += sizeof (struct T_opthdr);
297 298 pktinfop = (struct in_pktinfo *)ancil_buf;
298 299
299 300 pktinfop->ipi_ifindex = ira->ira_ruifindex;
300 301 pktinfop->ipi_spec_dst.s_addr = INADDR_ANY;
301 302
302 303 /* Find a good address to report */
303 304 ill = ill_lookup_on_ifindex(ira->ira_ruifindex, B_FALSE, ipst);
304 305 if (ill != NULL) {
305 306 ipif = ipif_good_addr(ill, IPCL_ZONEID(connp));
306 307 if (ipif != NULL) {
307 308 pktinfop->ipi_spec_dst.s_addr =
308 309 ipif->ipif_lcl_addr;
309 310 ipif_refrele(ipif);
310 311 }
311 312 ill_refrele(ill);
312 313 }
313 314 pktinfop->ipi_addr.s_addr = ipp->ipp_addr_v4;
314 315 ancil_buf += sizeof (struct in_pktinfo);
315 316 ancil_size -= toh->len;
316 317 }
317 318
318 319 if ((recv_ancillary.crb_recvopts) &&
319 320 (ipp->ipp_fields & IPPF_IPV4_OPTIONS)) {
320 321 struct T_opthdr *toh;
321 322
322 323 toh = (struct T_opthdr *)ancil_buf;
323 324 toh->level = IPPROTO_IP;
324 325 toh->name = IP_RECVOPTS;
325 326 toh->len = sizeof (struct T_opthdr) + ipp->ipp_ipv4_options_len;
326 327 toh->status = 0;
327 328 ancil_buf += sizeof (struct T_opthdr);
328 329 bcopy(ipp->ipp_ipv4_options, ancil_buf,
329 330 ipp->ipp_ipv4_options_len);
330 331 ancil_buf += ipp->ipp_ipv4_options_len;
331 332 ancil_size -= toh->len;
332 333 }
333 334
334 335 if (recv_ancillary.crb_recvslla) {
335 336 ip_stack_t *ipst = connp->conn_netstack->netstack_ip;
336 337 struct T_opthdr *toh;
337 338 struct sockaddr_dl *dstptr;
338 339 ill_t *ill;
339 340 int alen = 0;
340 341
341 342 ill = ill_lookup_on_ifindex(ira->ira_rifindex, B_FALSE, ipst);
342 343 if (ill != NULL)
343 344 alen = ill->ill_phys_addr_length;
344 345
345 346 /*
346 347 * For loopback multicast and broadcast the packet arrives
347 348 * with ira_ruifdex being the physical interface, but
348 349 * ira_l2src is all zero since ip_postfrag_loopback doesn't
349 350 * know our l2src. We don't report the address in that case.
350 351 */
351 352 if (ira->ira_flags & IRAF_LOOPBACK)
352 353 alen = 0;
353 354
354 355 toh = (struct T_opthdr *)ancil_buf;
355 356 toh->level = IPPROTO_IP;
356 357 toh->name = IP_RECVSLLA;
357 358 toh->len = sizeof (struct T_opthdr) +
358 359 sizeof (struct sockaddr_dl);
359 360 toh->status = 0;
360 361 ancil_buf += sizeof (struct T_opthdr);
361 362 dstptr = (struct sockaddr_dl *)ancil_buf;
362 363 dstptr->sdl_family = AF_LINK;
363 364 dstptr->sdl_index = ira->ira_ruifindex;
364 365 if (ill != NULL)
365 366 dstptr->sdl_type = ill->ill_type;
366 367 else
367 368 dstptr->sdl_type = 0;
368 369 dstptr->sdl_nlen = 0;
369 370 dstptr->sdl_alen = alen;
370 371 dstptr->sdl_slen = 0;
371 372 bcopy(ira->ira_l2src, dstptr->sdl_data, alen);
372 373 ancil_buf += sizeof (struct sockaddr_dl);
373 374 ancil_size -= toh->len;
374 375 if (ill != NULL)
375 376 ill_refrele(ill);
376 377 }
377 378
378 379 if (recv_ancillary.crb_recvif) {
379 380 struct T_opthdr *toh;
380 381 uint_t *dstptr;
381 382
382 383 toh = (struct T_opthdr *)ancil_buf;
383 384 toh->level = IPPROTO_IP;
384 385 toh->name = IP_RECVIF;
385 386 toh->len = sizeof (struct T_opthdr) + sizeof (uint_t);
386 387 toh->status = 0;
387 388 ancil_buf += sizeof (struct T_opthdr);
388 389 dstptr = (uint_t *)ancil_buf;
389 390 *dstptr = ira->ira_ruifindex;
390 391 ancil_buf += sizeof (uint_t);
391 392 ancil_size -= toh->len;
392 393 }
393 394
394 395 /*
395 396 * ip_recvpktinfo is used for both AF_INET and AF_INET6 but
396 397 * are different
397 398 */
398 399 if (recv_ancillary.crb_ip_recvpktinfo &&
399 400 connp->conn_family == AF_INET6) {
400 401 struct T_opthdr *toh;
401 402 struct in6_pktinfo *pkti;
402 403
403 404 toh = (struct T_opthdr *)ancil_buf;
404 405 toh->level = IPPROTO_IPV6;
405 406 toh->name = IPV6_PKTINFO;
406 407 toh->len = sizeof (struct T_opthdr) + sizeof (*pkti);
407 408 toh->status = 0;
408 409 ancil_buf += sizeof (struct T_opthdr);
409 410 pkti = (struct in6_pktinfo *)ancil_buf;
410 411 if (ira->ira_flags & IRAF_IS_IPV4) {
411 412 IN6_IPADDR_TO_V4MAPPED(ipp->ipp_addr_v4,
412 413 &pkti->ipi6_addr);
413 414 } else {
414 415 pkti->ipi6_addr = ipp->ipp_addr;
415 416 }
416 417 pkti->ipi6_ifindex = ira->ira_ruifindex;
417 418
418 419 ancil_buf += sizeof (*pkti);
419 420 ancil_size -= toh->len;
420 421 }
421 422 if (recv_ancillary.crb_ipv6_recvhoplimit) {
422 423 struct T_opthdr *toh;
423 424
424 425 toh = (struct T_opthdr *)ancil_buf;
425 426 toh->level = IPPROTO_IPV6;
426 427 toh->name = IPV6_HOPLIMIT;
427 428 toh->len = sizeof (struct T_opthdr) + sizeof (uint_t);
428 429 toh->status = 0;
429 430 ancil_buf += sizeof (struct T_opthdr);
430 431 *(uint_t *)ancil_buf = ipp->ipp_hoplimit;
431 432 ancil_buf += sizeof (uint_t);
432 433 ancil_size -= toh->len;
433 434 }
434 435 if (recv_ancillary.crb_ipv6_recvtclass) {
435 436 struct T_opthdr *toh;
436 437
437 438 toh = (struct T_opthdr *)ancil_buf;
438 439 toh->level = IPPROTO_IPV6;
439 440 toh->name = IPV6_TCLASS;
440 441 toh->len = sizeof (struct T_opthdr) + sizeof (uint_t);
441 442 toh->status = 0;
442 443 ancil_buf += sizeof (struct T_opthdr);
443 444
444 445 if (ira->ira_flags & IRAF_IS_IPV4)
445 446 *(uint_t *)ancil_buf = ipp->ipp_type_of_service;
446 447 else
447 448 *(uint_t *)ancil_buf = ipp->ipp_tclass;
448 449 ancil_buf += sizeof (uint_t);
449 450 ancil_size -= toh->len;
450 451 }
451 452 if (recv_ancillary.crb_ipv6_recvhopopts &&
452 453 (ipp->ipp_fields & IPPF_HOPOPTS)) {
453 454 struct T_opthdr *toh;
454 455
455 456 toh = (struct T_opthdr *)ancil_buf;
456 457 toh->level = IPPROTO_IPV6;
457 458 toh->name = IPV6_HOPOPTS;
458 459 toh->len = sizeof (struct T_opthdr) + ipp->ipp_hopoptslen;
459 460 toh->status = 0;
460 461 ancil_buf += sizeof (struct T_opthdr);
461 462 bcopy(ipp->ipp_hopopts, ancil_buf, ipp->ipp_hopoptslen);
462 463 ancil_buf += ipp->ipp_hopoptslen;
463 464 ancil_size -= toh->len;
464 465 }
465 466 /*
466 467 * To honor RFC3542 when an application asks for both IPV6_RECVDSTOPTS
467 468 * and IPV6_RECVRTHDR, we pass up the item rthdrdstopts (the destination
468 469 * options that appear before a routing header.
469 470 * We also pass them up if IPV6_RECVRTHDRDSTOPTS is set.
470 471 */
471 472 if (ipp->ipp_fields & IPPF_RTHDRDSTOPTS) {
472 473 if (recv_ancillary.crb_ipv6_recvrthdrdstopts ||
473 474 (recv_ancillary.crb_ipv6_recvdstopts &&
474 475 recv_ancillary.crb_ipv6_recvrthdr)) {
475 476 struct T_opthdr *toh;
476 477
477 478 toh = (struct T_opthdr *)ancil_buf;
478 479 toh->level = IPPROTO_IPV6;
479 480 toh->name = IPV6_DSTOPTS;
480 481 toh->len = sizeof (struct T_opthdr) +
481 482 ipp->ipp_rthdrdstoptslen;
482 483 toh->status = 0;
483 484 ancil_buf += sizeof (struct T_opthdr);
484 485 bcopy(ipp->ipp_rthdrdstopts, ancil_buf,
485 486 ipp->ipp_rthdrdstoptslen);
486 487 ancil_buf += ipp->ipp_rthdrdstoptslen;
487 488 ancil_size -= toh->len;
488 489 }
489 490 }
490 491 if (recv_ancillary.crb_ipv6_recvrthdr &&
491 492 (ipp->ipp_fields & IPPF_RTHDR)) {
492 493 struct T_opthdr *toh;
493 494
494 495 toh = (struct T_opthdr *)ancil_buf;
495 496 toh->level = IPPROTO_IPV6;
496 497 toh->name = IPV6_RTHDR;
497 498 toh->len = sizeof (struct T_opthdr) + ipp->ipp_rthdrlen;
498 499 toh->status = 0;
499 500 ancil_buf += sizeof (struct T_opthdr);
500 501 bcopy(ipp->ipp_rthdr, ancil_buf, ipp->ipp_rthdrlen);
501 502 ancil_buf += ipp->ipp_rthdrlen;
502 503 ancil_size -= toh->len;
503 504 }
504 505 if ((recv_ancillary.crb_ipv6_recvdstopts ||
505 506 recv_ancillary.crb_old_ipv6_recvdstopts) &&
506 507 (ipp->ipp_fields & IPPF_DSTOPTS)) {
507 508 struct T_opthdr *toh;
508 509
509 510 toh = (struct T_opthdr *)ancil_buf;
510 511 toh->level = IPPROTO_IPV6;
511 512 toh->name = IPV6_DSTOPTS;
512 513 toh->len = sizeof (struct T_opthdr) + ipp->ipp_dstoptslen;
513 514 toh->status = 0;
514 515 ancil_buf += sizeof (struct T_opthdr);
515 516 bcopy(ipp->ipp_dstopts, ancil_buf, ipp->ipp_dstoptslen);
516 517 ancil_buf += ipp->ipp_dstoptslen;
517 518 ancil_size -= toh->len;
518 519 }
519 520
520 521 if (recv_ancillary.crb_recvucred && ira->ira_cred != NULL) {
521 522 struct T_opthdr *toh;
522 523 cred_t *rcr = connp->conn_cred;
523 524
524 525 toh = (struct T_opthdr *)ancil_buf;
525 526 toh->level = SOL_SOCKET;
526 527 toh->name = SCM_UCRED;
527 528 toh->len = sizeof (struct T_opthdr) +
528 529 ucredminsize(ira->ira_cred);
529 530 toh->status = 0;
530 531 (void) cred2ucred(ira->ira_cred, ira->ira_cpid, &toh[1], rcr);
531 532 ancil_buf += toh->len;
532 533 ancil_size -= toh->len;
533 534 }
534 535 if (recv_ancillary.crb_timestamp) {
535 536 struct T_opthdr *toh;
536 537
537 538 toh = (struct T_opthdr *)ancil_buf;
538 539 toh->level = SOL_SOCKET;
539 540 toh->name = SCM_TIMESTAMP;
540 541 toh->len = sizeof (struct T_opthdr) +
541 542 sizeof (timestruc_t) + _POINTER_ALIGNMENT;
542 543 toh->status = 0;
543 544 ancil_buf += sizeof (struct T_opthdr);
544 545 /* Align for gethrestime() */
545 546 ancil_buf = (uchar_t *)P2ROUNDUP((intptr_t)ancil_buf,
546 547 sizeof (intptr_t));
547 548 gethrestime((timestruc_t *)ancil_buf);
548 549 ancil_buf = (uchar_t *)toh + toh->len;
549 550 ancil_size -= toh->len;
550 551 }
551 552
552 553 /*
553 554 * CAUTION:
554 555 * Due to aligment issues
555 556 * Processing of IP_RECVTTL option
556 557 * should always be the last. Adding
557 558 * any option processing after this will
558 559 * cause alignment panic.
559 560 */
560 561 if (recv_ancillary.crb_recvttl &&
561 562 (ira->ira_flags & IRAF_IS_IPV4)) {
562 563 struct T_opthdr *toh;
563 564 uint8_t *dstptr;
564 565
565 566 toh = (struct T_opthdr *)ancil_buf;
566 567 toh->level = IPPROTO_IP;
567 568 toh->name = IP_RECVTTL;
568 569 toh->len = sizeof (struct T_opthdr) + sizeof (uint8_t);
569 570 toh->status = 0;
570 571 ancil_buf += sizeof (struct T_opthdr);
571 572 dstptr = (uint8_t *)ancil_buf;
572 573 *dstptr = ipp->ipp_hoplimit;
573 574 ancil_buf += sizeof (uint8_t);
574 575 ancil_size -= toh->len;
575 576 }
576 577
577 578 /* Consumed all of allocated space */
578 579 ASSERT(ancil_size == 0);
579 580
580 581 }
581 582
582 583 /*
583 584 * This routine retrieves the current status of socket options.
584 585 * It returns the size of the option retrieved, or -1.
585 586 */
586 587 int
587 588 conn_opt_get(conn_opt_arg_t *coa, t_scalar_t level, t_scalar_t name,
588 589 uchar_t *ptr)
589 590 {
590 591 int *i1 = (int *)ptr;
591 592 conn_t *connp = coa->coa_connp;
592 593 ip_xmit_attr_t *ixa = coa->coa_ixa;
593 594 ip_pkt_t *ipp = coa->coa_ipp;
594 595 ip_stack_t *ipst = ixa->ixa_ipst;
595 596 uint_t len;
596 597
597 598 ASSERT(MUTEX_HELD(&coa->coa_connp->conn_lock));
598 599
599 600 switch (level) {
600 601 case SOL_SOCKET:
601 602 switch (name) {
602 603 case SO_DEBUG:
603 604 *i1 = connp->conn_debug ? SO_DEBUG : 0;
604 605 break; /* goto sizeof (int) option return */
605 606 case SO_KEEPALIVE:
606 607 *i1 = connp->conn_keepalive ? SO_KEEPALIVE : 0;
607 608 break;
608 609 case SO_LINGER: {
609 610 struct linger *lgr = (struct linger *)ptr;
610 611
611 612 lgr->l_onoff = connp->conn_linger ? SO_LINGER : 0;
|
↓ open down ↓ |
578 lines elided |
↑ open up ↑ |
612 613 lgr->l_linger = connp->conn_lingertime;
613 614 }
614 615 return (sizeof (struct linger));
615 616
616 617 case SO_OOBINLINE:
617 618 *i1 = connp->conn_oobinline ? SO_OOBINLINE : 0;
618 619 break;
619 620 case SO_REUSEADDR:
620 621 *i1 = connp->conn_reuseaddr ? SO_REUSEADDR : 0;
621 622 break; /* goto sizeof (int) option return */
623 + case SO_REUSEPORT:
624 + *i1 = connp->conn_reuseport;
625 + break; /* goto sizeof (int) option return */
622 626 case SO_TYPE:
623 627 *i1 = connp->conn_so_type;
624 628 break; /* goto sizeof (int) option return */
625 629 case SO_DONTROUTE:
626 630 *i1 = (ixa->ixa_flags & IXAF_DONTROUTE) ?
627 631 SO_DONTROUTE : 0;
628 632 break; /* goto sizeof (int) option return */
629 633 case SO_USELOOPBACK:
630 634 *i1 = connp->conn_useloopback ? SO_USELOOPBACK : 0;
631 635 break; /* goto sizeof (int) option return */
632 636 case SO_BROADCAST:
633 637 *i1 = connp->conn_broadcast ? SO_BROADCAST : 0;
634 638 break; /* goto sizeof (int) option return */
635 639
636 640 case SO_SNDBUF:
637 641 *i1 = connp->conn_sndbuf;
638 642 break; /* goto sizeof (int) option return */
639 643 case SO_RCVBUF:
640 644 *i1 = connp->conn_rcvbuf;
641 645 break; /* goto sizeof (int) option return */
642 646 case SO_RCVTIMEO:
643 647 case SO_SNDTIMEO:
644 648 /*
645 649 * Pass these two options in order for third part
646 650 * protocol usage. Here just return directly.
647 651 */
648 652 *i1 = 0;
649 653 break;
650 654 case SO_DGRAM_ERRIND:
651 655 *i1 = connp->conn_dgram_errind ? SO_DGRAM_ERRIND : 0;
652 656 break; /* goto sizeof (int) option return */
653 657 case SO_RECVUCRED:
654 658 *i1 = connp->conn_recv_ancillary.crb_recvucred;
655 659 break; /* goto sizeof (int) option return */
656 660 case SO_TIMESTAMP:
657 661 *i1 = connp->conn_recv_ancillary.crb_timestamp;
658 662 break; /* goto sizeof (int) option return */
659 663 case SO_VRRP:
660 664 *i1 = connp->conn_isvrrp;
661 665 break; /* goto sizeof (int) option return */
662 666 case SO_ANON_MLP:
663 667 *i1 = connp->conn_anon_mlp;
664 668 break; /* goto sizeof (int) option return */
665 669 case SO_MAC_EXEMPT:
666 670 *i1 = (connp->conn_mac_mode == CONN_MAC_AWARE);
667 671 break; /* goto sizeof (int) option return */
668 672 case SO_MAC_IMPLICIT:
669 673 *i1 = (connp->conn_mac_mode == CONN_MAC_IMPLICIT);
670 674 break; /* goto sizeof (int) option return */
671 675 case SO_ALLZONES:
672 676 *i1 = connp->conn_allzones;
673 677 break; /* goto sizeof (int) option return */
674 678 case SO_EXCLBIND:
675 679 *i1 = connp->conn_exclbind ? SO_EXCLBIND : 0;
676 680 break;
677 681 case SO_PROTOTYPE:
678 682 *i1 = connp->conn_proto;
679 683 break;
680 684
681 685 case SO_DOMAIN:
682 686 *i1 = connp->conn_family;
683 687 break;
684 688 default:
685 689 return (-1);
686 690 }
687 691 break;
688 692 case IPPROTO_IP:
689 693 if (connp->conn_family != AF_INET)
690 694 return (-1);
691 695 switch (name) {
692 696 case IP_OPTIONS:
693 697 case T_IP_OPTIONS:
694 698 if (!(ipp->ipp_fields & IPPF_IPV4_OPTIONS))
695 699 return (0);
696 700
697 701 len = ipp->ipp_ipv4_options_len;
698 702 if (len > 0) {
699 703 bcopy(ipp->ipp_ipv4_options, ptr, len);
700 704 }
701 705 return (len);
702 706
703 707 case IP_PKTINFO: {
704 708 /*
705 709 * This also handles IP_RECVPKTINFO.
706 710 * IP_PKTINFO and IP_RECVPKTINFO have same value.
707 711 * Differentiation is based on the size of the
708 712 * argument passed in.
709 713 */
710 714 struct in_pktinfo *pktinfo;
711 715
712 716 #ifdef notdef
713 717 /* optcom doesn't provide a length with "get" */
714 718 if (inlen == sizeof (int)) {
715 719 /* This is IP_RECVPKTINFO option. */
716 720 *i1 = connp->conn_recv_ancillary.
717 721 crb_ip_recvpktinfo;
718 722 return (sizeof (int));
719 723 }
720 724 #endif
721 725 /* XXX assumes that caller has room for max size! */
722 726
723 727 pktinfo = (struct in_pktinfo *)ptr;
724 728 pktinfo->ipi_ifindex = ixa->ixa_ifindex;
725 729 if (ipp->ipp_fields & IPPF_ADDR)
726 730 pktinfo->ipi_spec_dst.s_addr = ipp->ipp_addr_v4;
727 731 else
728 732 pktinfo->ipi_spec_dst.s_addr = INADDR_ANY;
729 733 return (sizeof (struct in_pktinfo));
730 734 }
731 735 case IP_DONTFRAG:
732 736 *i1 = (ixa->ixa_flags & IXAF_DONTFRAG) != 0;
733 737 return (sizeof (int));
734 738 case IP_TOS:
735 739 case T_IP_TOS:
736 740 *i1 = (int)ipp->ipp_type_of_service;
737 741 break; /* goto sizeof (int) option return */
738 742 case IP_TTL:
739 743 *i1 = (int)ipp->ipp_unicast_hops;
740 744 break; /* goto sizeof (int) option return */
741 745 case IP_DHCPINIT_IF:
742 746 return (-1);
743 747 case IP_NEXTHOP:
744 748 if (ixa->ixa_flags & IXAF_NEXTHOP_SET) {
745 749 *(ipaddr_t *)ptr = ixa->ixa_nexthop_v4;
746 750 return (sizeof (ipaddr_t));
747 751 } else {
748 752 return (0);
749 753 }
750 754
751 755 case IP_MULTICAST_IF:
752 756 /* 0 address if not set */
753 757 *(ipaddr_t *)ptr = ixa->ixa_multicast_ifaddr;
754 758 return (sizeof (ipaddr_t));
755 759 case IP_MULTICAST_TTL:
756 760 *(uchar_t *)ptr = ixa->ixa_multicast_ttl;
757 761 return (sizeof (uchar_t));
758 762 case IP_MULTICAST_LOOP:
759 763 *ptr = (ixa->ixa_flags & IXAF_MULTICAST_LOOP) ? 1 : 0;
760 764 return (sizeof (uint8_t));
761 765 case IP_RECVOPTS:
762 766 *i1 = connp->conn_recv_ancillary.crb_recvopts;
763 767 break; /* goto sizeof (int) option return */
764 768 case IP_RECVDSTADDR:
765 769 *i1 = connp->conn_recv_ancillary.crb_recvdstaddr;
766 770 break; /* goto sizeof (int) option return */
767 771 case IP_RECVIF:
768 772 *i1 = connp->conn_recv_ancillary.crb_recvif;
769 773 break; /* goto sizeof (int) option return */
770 774 case IP_RECVSLLA:
771 775 *i1 = connp->conn_recv_ancillary.crb_recvslla;
772 776 break; /* goto sizeof (int) option return */
773 777 case IP_RECVTTL:
774 778 *i1 = connp->conn_recv_ancillary.crb_recvttl;
775 779 break; /* goto sizeof (int) option return */
776 780 case IP_ADD_MEMBERSHIP:
777 781 case IP_DROP_MEMBERSHIP:
778 782 case MCAST_JOIN_GROUP:
779 783 case MCAST_LEAVE_GROUP:
780 784 case IP_BLOCK_SOURCE:
781 785 case IP_UNBLOCK_SOURCE:
782 786 case IP_ADD_SOURCE_MEMBERSHIP:
783 787 case IP_DROP_SOURCE_MEMBERSHIP:
784 788 case MCAST_BLOCK_SOURCE:
785 789 case MCAST_UNBLOCK_SOURCE:
786 790 case MCAST_JOIN_SOURCE_GROUP:
787 791 case MCAST_LEAVE_SOURCE_GROUP:
788 792 case MRT_INIT:
789 793 case MRT_DONE:
790 794 case MRT_ADD_VIF:
791 795 case MRT_DEL_VIF:
792 796 case MRT_ADD_MFC:
793 797 case MRT_DEL_MFC:
794 798 /* cannot "get" the value for these */
795 799 return (-1);
796 800 case MRT_VERSION:
797 801 case MRT_ASSERT:
798 802 (void) ip_mrouter_get(name, connp, ptr);
799 803 return (sizeof (int));
800 804 case IP_SEC_OPT:
801 805 return (ipsec_req_from_conn(connp, (ipsec_req_t *)ptr,
802 806 IPSEC_AF_V4));
803 807 case IP_BOUND_IF:
804 808 /* Zero if not set */
805 809 *i1 = connp->conn_bound_if;
806 810 break; /* goto sizeof (int) option return */
807 811 case IP_UNSPEC_SRC:
808 812 *i1 = connp->conn_unspec_src;
809 813 break; /* goto sizeof (int) option return */
810 814 case IP_BROADCAST_TTL:
811 815 if (ixa->ixa_flags & IXAF_BROADCAST_TTL_SET)
812 816 *(uchar_t *)ptr = ixa->ixa_broadcast_ttl;
813 817 else
814 818 *(uchar_t *)ptr = ipst->ips_ip_broadcast_ttl;
815 819 return (sizeof (uchar_t));
816 820 default:
817 821 return (-1);
818 822 }
819 823 break;
820 824 case IPPROTO_IPV6:
821 825 if (connp->conn_family != AF_INET6)
822 826 return (-1);
823 827 switch (name) {
824 828 case IPV6_UNICAST_HOPS:
825 829 *i1 = (int)ipp->ipp_unicast_hops;
826 830 break; /* goto sizeof (int) option return */
827 831 case IPV6_MULTICAST_IF:
828 832 /* 0 index if not set */
829 833 *i1 = ixa->ixa_multicast_ifindex;
830 834 break; /* goto sizeof (int) option return */
831 835 case IPV6_MULTICAST_HOPS:
832 836 *i1 = ixa->ixa_multicast_ttl;
833 837 break; /* goto sizeof (int) option return */
834 838 case IPV6_MULTICAST_LOOP:
835 839 *i1 = (ixa->ixa_flags & IXAF_MULTICAST_LOOP) ? 1 : 0;
836 840 break; /* goto sizeof (int) option return */
837 841 case IPV6_JOIN_GROUP:
838 842 case IPV6_LEAVE_GROUP:
839 843 case MCAST_JOIN_GROUP:
840 844 case MCAST_LEAVE_GROUP:
841 845 case MCAST_BLOCK_SOURCE:
842 846 case MCAST_UNBLOCK_SOURCE:
843 847 case MCAST_JOIN_SOURCE_GROUP:
844 848 case MCAST_LEAVE_SOURCE_GROUP:
845 849 /* cannot "get" the value for these */
846 850 return (-1);
847 851 case IPV6_BOUND_IF:
848 852 /* Zero if not set */
849 853 *i1 = connp->conn_bound_if;
850 854 break; /* goto sizeof (int) option return */
851 855 case IPV6_UNSPEC_SRC:
852 856 *i1 = connp->conn_unspec_src;
853 857 break; /* goto sizeof (int) option return */
854 858 case IPV6_RECVPKTINFO:
855 859 *i1 = connp->conn_recv_ancillary.crb_ip_recvpktinfo;
856 860 break; /* goto sizeof (int) option return */
857 861 case IPV6_RECVTCLASS:
858 862 *i1 = connp->conn_recv_ancillary.crb_ipv6_recvtclass;
859 863 break; /* goto sizeof (int) option return */
860 864 case IPV6_RECVPATHMTU:
861 865 *i1 = connp->conn_ipv6_recvpathmtu;
862 866 break; /* goto sizeof (int) option return */
863 867 case IPV6_RECVHOPLIMIT:
864 868 *i1 = connp->conn_recv_ancillary.crb_ipv6_recvhoplimit;
865 869 break; /* goto sizeof (int) option return */
866 870 case IPV6_RECVHOPOPTS:
867 871 *i1 = connp->conn_recv_ancillary.crb_ipv6_recvhopopts;
868 872 break; /* goto sizeof (int) option return */
869 873 case IPV6_RECVDSTOPTS:
870 874 *i1 = connp->conn_recv_ancillary.crb_ipv6_recvdstopts;
871 875 break; /* goto sizeof (int) option return */
872 876 case _OLD_IPV6_RECVDSTOPTS:
873 877 *i1 =
874 878 connp->conn_recv_ancillary.crb_old_ipv6_recvdstopts;
875 879 break; /* goto sizeof (int) option return */
876 880 case IPV6_RECVRTHDRDSTOPTS:
877 881 *i1 = connp->conn_recv_ancillary.
878 882 crb_ipv6_recvrthdrdstopts;
879 883 break; /* goto sizeof (int) option return */
880 884 case IPV6_RECVRTHDR:
881 885 *i1 = connp->conn_recv_ancillary.crb_ipv6_recvrthdr;
882 886 break; /* goto sizeof (int) option return */
883 887 case IPV6_PKTINFO: {
884 888 /* XXX assumes that caller has room for max size! */
885 889 struct in6_pktinfo *pkti;
886 890
887 891 pkti = (struct in6_pktinfo *)ptr;
888 892 pkti->ipi6_ifindex = ixa->ixa_ifindex;
889 893 if (ipp->ipp_fields & IPPF_ADDR)
890 894 pkti->ipi6_addr = ipp->ipp_addr;
891 895 else
892 896 pkti->ipi6_addr = ipv6_all_zeros;
893 897 return (sizeof (struct in6_pktinfo));
894 898 }
895 899 case IPV6_TCLASS:
896 900 *i1 = ipp->ipp_tclass;
897 901 break; /* goto sizeof (int) option return */
898 902 case IPV6_NEXTHOP: {
899 903 sin6_t *sin6 = (sin6_t *)ptr;
900 904
901 905 if (ixa->ixa_flags & IXAF_NEXTHOP_SET)
902 906 return (0);
903 907
904 908 *sin6 = sin6_null;
905 909 sin6->sin6_family = AF_INET6;
906 910 sin6->sin6_addr = ixa->ixa_nexthop_v6;
907 911
908 912 return (sizeof (sin6_t));
909 913 }
910 914 case IPV6_HOPOPTS:
911 915 if (!(ipp->ipp_fields & IPPF_HOPOPTS))
912 916 return (0);
913 917 bcopy(ipp->ipp_hopopts, ptr,
914 918 ipp->ipp_hopoptslen);
915 919 return (ipp->ipp_hopoptslen);
916 920 case IPV6_RTHDRDSTOPTS:
917 921 if (!(ipp->ipp_fields & IPPF_RTHDRDSTOPTS))
918 922 return (0);
919 923 bcopy(ipp->ipp_rthdrdstopts, ptr,
920 924 ipp->ipp_rthdrdstoptslen);
921 925 return (ipp->ipp_rthdrdstoptslen);
922 926 case IPV6_RTHDR:
923 927 if (!(ipp->ipp_fields & IPPF_RTHDR))
924 928 return (0);
925 929 bcopy(ipp->ipp_rthdr, ptr, ipp->ipp_rthdrlen);
926 930 return (ipp->ipp_rthdrlen);
927 931 case IPV6_DSTOPTS:
928 932 if (!(ipp->ipp_fields & IPPF_DSTOPTS))
929 933 return (0);
930 934 bcopy(ipp->ipp_dstopts, ptr, ipp->ipp_dstoptslen);
931 935 return (ipp->ipp_dstoptslen);
932 936 case IPV6_PATHMTU:
933 937 return (ip_fill_mtuinfo(connp, ixa,
934 938 (struct ip6_mtuinfo *)ptr));
935 939 case IPV6_SEC_OPT:
936 940 return (ipsec_req_from_conn(connp, (ipsec_req_t *)ptr,
937 941 IPSEC_AF_V6));
938 942 case IPV6_SRC_PREFERENCES:
939 943 return (ip6_get_src_preferences(ixa, (uint32_t *)ptr));
940 944 case IPV6_DONTFRAG:
941 945 *i1 = (ixa->ixa_flags & IXAF_DONTFRAG) != 0;
942 946 return (sizeof (int));
943 947 case IPV6_USE_MIN_MTU:
944 948 if (ixa->ixa_flags & IXAF_USE_MIN_MTU)
945 949 *i1 = ixa->ixa_use_min_mtu;
946 950 else
947 951 *i1 = IPV6_USE_MIN_MTU_MULTICAST;
948 952 break;
949 953 case IPV6_V6ONLY:
950 954 *i1 = connp->conn_ipv6_v6only;
951 955 return (sizeof (int));
952 956 default:
953 957 return (-1);
954 958 }
955 959 break;
956 960 case IPPROTO_UDP:
957 961 switch (name) {
958 962 case UDP_ANONPRIVBIND:
959 963 *i1 = connp->conn_anon_priv_bind;
960 964 break;
961 965 case UDP_EXCLBIND:
962 966 *i1 = connp->conn_exclbind ? UDP_EXCLBIND : 0;
963 967 break;
964 968 default:
965 969 return (-1);
966 970 }
967 971 break;
968 972 case IPPROTO_TCP:
969 973 switch (name) {
970 974 case TCP_RECVDSTADDR:
971 975 *i1 = connp->conn_recv_ancillary.crb_recvdstaddr;
972 976 break;
973 977 case TCP_ANONPRIVBIND:
974 978 *i1 = connp->conn_anon_priv_bind;
975 979 break;
976 980 case TCP_EXCLBIND:
977 981 *i1 = connp->conn_exclbind ? TCP_EXCLBIND : 0;
978 982 break;
979 983 default:
980 984 return (-1);
981 985 }
982 986 break;
983 987 default:
984 988 return (-1);
985 989 }
986 990 return (sizeof (int));
987 991 }
988 992
989 993 static int conn_opt_set_socket(conn_opt_arg_t *coa, t_scalar_t name,
990 994 uint_t inlen, uchar_t *invalp, boolean_t checkonly, cred_t *cr);
991 995 static int conn_opt_set_ip(conn_opt_arg_t *coa, t_scalar_t name,
992 996 uint_t inlen, uchar_t *invalp, boolean_t checkonly, cred_t *cr);
993 997 static int conn_opt_set_ipv6(conn_opt_arg_t *coa, t_scalar_t name,
994 998 uint_t inlen, uchar_t *invalp, boolean_t checkonly, cred_t *cr);
995 999 static int conn_opt_set_udp(conn_opt_arg_t *coa, t_scalar_t name,
996 1000 uint_t inlen, uchar_t *invalp, boolean_t checkonly, cred_t *cr);
997 1001 static int conn_opt_set_tcp(conn_opt_arg_t *coa, t_scalar_t name,
998 1002 uint_t inlen, uchar_t *invalp, boolean_t checkonly, cred_t *cr);
999 1003
1000 1004 /*
1001 1005 * This routine sets the most common socket options including some
1002 1006 * that are transport/ULP specific.
1003 1007 * It returns errno or zero.
1004 1008 *
1005 1009 * For fixed length options, there is no sanity check
1006 1010 * of passed in length is done. It is assumed *_optcom_req()
1007 1011 * routines do the right thing.
1008 1012 */
1009 1013 int
1010 1014 conn_opt_set(conn_opt_arg_t *coa, t_scalar_t level, t_scalar_t name,
1011 1015 uint_t inlen, uchar_t *invalp, boolean_t checkonly, cred_t *cr)
1012 1016 {
1013 1017 ASSERT(MUTEX_NOT_HELD(&coa->coa_connp->conn_lock));
1014 1018
1015 1019 /* We have different functions for different levels */
1016 1020 switch (level) {
1017 1021 case SOL_SOCKET:
1018 1022 return (conn_opt_set_socket(coa, name, inlen, invalp,
1019 1023 checkonly, cr));
1020 1024 case IPPROTO_IP:
1021 1025 return (conn_opt_set_ip(coa, name, inlen, invalp,
1022 1026 checkonly, cr));
1023 1027 case IPPROTO_IPV6:
1024 1028 return (conn_opt_set_ipv6(coa, name, inlen, invalp,
1025 1029 checkonly, cr));
1026 1030 case IPPROTO_UDP:
1027 1031 return (conn_opt_set_udp(coa, name, inlen, invalp,
1028 1032 checkonly, cr));
1029 1033 case IPPROTO_TCP:
1030 1034 return (conn_opt_set_tcp(coa, name, inlen, invalp,
1031 1035 checkonly, cr));
1032 1036 default:
1033 1037 return (0);
1034 1038 }
1035 1039 }
1036 1040
1037 1041 /*
1038 1042 * Handle SOL_SOCKET
1039 1043 * Note that we do not handle SO_PROTOTYPE here. The ULPs that support
1040 1044 * it implement their own checks and setting of conn_proto.
1041 1045 */
1042 1046 /* ARGSUSED1 */
1043 1047 static int
1044 1048 conn_opt_set_socket(conn_opt_arg_t *coa, t_scalar_t name, uint_t inlen,
1045 1049 uchar_t *invalp, boolean_t checkonly, cred_t *cr)
1046 1050 {
1047 1051 conn_t *connp = coa->coa_connp;
1048 1052 ip_xmit_attr_t *ixa = coa->coa_ixa;
1049 1053 int *i1 = (int *)invalp;
1050 1054 boolean_t onoff = (*i1 == 0) ? 0 : 1;
1051 1055
1052 1056 switch (name) {
1053 1057 case SO_ALLZONES:
1054 1058 if (IPCL_IS_BOUND(connp))
1055 1059 return (EINVAL);
1056 1060 break;
1057 1061 case SO_VRRP:
1058 1062 if (secpolicy_ip_config(cr, checkonly) != 0)
1059 1063 return (EACCES);
1060 1064 break;
1061 1065 case SO_MAC_EXEMPT:
1062 1066 if (secpolicy_net_mac_aware(cr) != 0)
1063 1067 return (EACCES);
1064 1068 if (IPCL_IS_BOUND(connp))
1065 1069 return (EINVAL);
1066 1070 break;
1067 1071 case SO_MAC_IMPLICIT:
1068 1072 if (secpolicy_net_mac_implicit(cr) != 0)
1069 1073 return (EACCES);
1070 1074 break;
1071 1075 }
1072 1076 if (checkonly)
1073 1077 return (0);
1074 1078
1075 1079 mutex_enter(&connp->conn_lock);
1076 1080 /* Here we set the actual option value */
1077 1081 switch (name) {
1078 1082 case SO_DEBUG:
1079 1083 connp->conn_debug = onoff;
1080 1084 break;
1081 1085 case SO_KEEPALIVE:
1082 1086 connp->conn_keepalive = onoff;
1083 1087 break;
1084 1088 case SO_LINGER: {
1085 1089 struct linger *lgr = (struct linger *)invalp;
1086 1090
1087 1091 if (lgr->l_onoff) {
1088 1092 connp->conn_linger = 1;
1089 1093 connp->conn_lingertime = lgr->l_linger;
1090 1094 } else {
1091 1095 connp->conn_linger = 0;
1092 1096 connp->conn_lingertime = 0;
1093 1097 }
1094 1098 break;
1095 1099 }
1096 1100 case SO_OOBINLINE:
1097 1101 connp->conn_oobinline = onoff;
1098 1102 coa->coa_changed |= COA_OOBINLINE_CHANGED;
1099 1103 break;
1100 1104 case SO_REUSEADDR:
1101 1105 connp->conn_reuseaddr = onoff;
1102 1106 break;
1103 1107 case SO_DONTROUTE:
1104 1108 if (onoff)
1105 1109 ixa->ixa_flags |= IXAF_DONTROUTE;
1106 1110 else
1107 1111 ixa->ixa_flags &= ~IXAF_DONTROUTE;
1108 1112 coa->coa_changed |= COA_ROUTE_CHANGED;
1109 1113 break;
1110 1114 case SO_USELOOPBACK:
1111 1115 connp->conn_useloopback = onoff;
1112 1116 break;
1113 1117 case SO_BROADCAST:
1114 1118 connp->conn_broadcast = onoff;
1115 1119 break;
1116 1120 case SO_SNDBUF:
1117 1121 /* ULP has range checked the value */
1118 1122 connp->conn_sndbuf = *i1;
1119 1123 coa->coa_changed |= COA_SNDBUF_CHANGED;
1120 1124 break;
1121 1125 case SO_RCVBUF:
1122 1126 /* ULP has range checked the value */
1123 1127 connp->conn_rcvbuf = *i1;
1124 1128 coa->coa_changed |= COA_RCVBUF_CHANGED;
1125 1129 break;
1126 1130 case SO_RCVTIMEO:
1127 1131 case SO_SNDTIMEO:
1128 1132 /*
1129 1133 * Pass these two options in order for third part
1130 1134 * protocol usage.
1131 1135 */
1132 1136 break;
1133 1137 case SO_DGRAM_ERRIND:
1134 1138 connp->conn_dgram_errind = onoff;
1135 1139 break;
1136 1140 case SO_RECVUCRED:
1137 1141 connp->conn_recv_ancillary.crb_recvucred = onoff;
1138 1142 break;
1139 1143 case SO_ALLZONES:
1140 1144 connp->conn_allzones = onoff;
1141 1145 coa->coa_changed |= COA_ROUTE_CHANGED;
1142 1146 if (onoff)
1143 1147 ixa->ixa_zoneid = ALL_ZONES;
1144 1148 else
1145 1149 ixa->ixa_zoneid = connp->conn_zoneid;
1146 1150 break;
1147 1151 case SO_TIMESTAMP:
1148 1152 connp->conn_recv_ancillary.crb_timestamp = onoff;
1149 1153 break;
1150 1154 case SO_VRRP:
1151 1155 connp->conn_isvrrp = onoff;
1152 1156 break;
1153 1157 case SO_ANON_MLP:
1154 1158 connp->conn_anon_mlp = onoff;
1155 1159 break;
1156 1160 case SO_MAC_EXEMPT:
1157 1161 connp->conn_mac_mode = onoff ?
1158 1162 CONN_MAC_AWARE : CONN_MAC_DEFAULT;
1159 1163 break;
1160 1164 case SO_MAC_IMPLICIT:
1161 1165 connp->conn_mac_mode = onoff ?
1162 1166 CONN_MAC_IMPLICIT : CONN_MAC_DEFAULT;
1163 1167 break;
1164 1168 case SO_EXCLBIND:
1165 1169 connp->conn_exclbind = onoff;
1166 1170 break;
1167 1171 }
1168 1172 mutex_exit(&connp->conn_lock);
1169 1173 return (0);
1170 1174 }
1171 1175
1172 1176 /* Handle IPPROTO_IP */
1173 1177 static int
1174 1178 conn_opt_set_ip(conn_opt_arg_t *coa, t_scalar_t name, uint_t inlen,
1175 1179 uchar_t *invalp, boolean_t checkonly, cred_t *cr)
1176 1180 {
1177 1181 conn_t *connp = coa->coa_connp;
1178 1182 ip_xmit_attr_t *ixa = coa->coa_ixa;
|
↓ open down ↓ |
547 lines elided |
↑ open up ↑ |
1179 1183 ip_pkt_t *ipp = coa->coa_ipp;
1180 1184 int *i1 = (int *)invalp;
1181 1185 boolean_t onoff = (*i1 == 0) ? 0 : 1;
1182 1186 ipaddr_t addr = (ipaddr_t)*i1;
1183 1187 uint_t ifindex;
1184 1188 zoneid_t zoneid = IPCL_ZONEID(connp);
1185 1189 ipif_t *ipif;
1186 1190 ip_stack_t *ipst = connp->conn_netstack->netstack_ip;
1187 1191 int error;
1188 1192
1189 - if (connp->conn_family != AF_INET)
1193 + if (connp->conn_family == AF_INET6 &&
1194 + connp->conn_ipversion == IPV4_VERSION) {
1195 + /*
1196 + * Allow certain IPv4 options to be set on an AF_INET6 socket
1197 + * if the connection is still IPv4.
1198 + */
1199 + switch (name) {
1200 + case IP_TOS:
1201 + case T_IP_TOS:
1202 + case IP_TTL:
1203 + case IP_DONTFRAG:
1204 + break;
1205 + default:
1206 + return (EINVAL);
1207 + }
1208 + } else if (connp->conn_family != AF_INET) {
1190 1209 return (EINVAL);
1210 + }
1191 1211
1192 1212 switch (name) {
1193 1213 case IP_TTL:
1194 1214 /* Don't allow zero */
1195 1215 if (*i1 < 1 || *i1 > 255)
1196 1216 return (EINVAL);
1197 1217 break;
1198 1218 case IP_MULTICAST_IF:
1199 1219 if (addr == INADDR_ANY) {
1200 1220 /* Clear */
1201 1221 ifindex = 0;
1202 1222 break;
1203 1223 }
1204 1224 ipif = ipif_lookup_addr(addr, NULL, zoneid, ipst);
1205 1225 if (ipif == NULL)
1206 1226 return (EHOSTUNREACH);
1207 1227 /* not supported by the virtual network iface */
1208 1228 if (IS_VNI(ipif->ipif_ill)) {
1209 1229 ipif_refrele(ipif);
1210 1230 return (EINVAL);
1211 1231 }
1212 1232 ifindex = ipif->ipif_ill->ill_phyint->phyint_ifindex;
1213 1233 ipif_refrele(ipif);
1214 1234 break;
1215 1235 case IP_NEXTHOP: {
1216 1236 ire_t *ire;
1217 1237
1218 1238 if (addr == INADDR_ANY) {
1219 1239 /* Clear */
1220 1240 break;
1221 1241 }
1222 1242 /* Verify that the next-hop is on-link */
1223 1243 ire = ire_ftable_lookup_v4(addr, 0, 0, IRE_ONLINK, NULL, zoneid,
1224 1244 NULL, MATCH_IRE_TYPE, 0, ipst, NULL);
1225 1245 if (ire == NULL)
1226 1246 return (EHOSTUNREACH);
1227 1247 ire_refrele(ire);
1228 1248 break;
1229 1249 }
1230 1250 case IP_OPTIONS:
1231 1251 case T_IP_OPTIONS: {
1232 1252 uint_t newlen;
1233 1253
1234 1254 if (ipp->ipp_fields & IPPF_LABEL_V4)
1235 1255 newlen = inlen + (ipp->ipp_label_len_v4 + 3) & ~3;
1236 1256 else
1237 1257 newlen = inlen;
1238 1258 if ((inlen & 0x3) || newlen > IP_MAX_OPT_LENGTH) {
1239 1259 return (EINVAL);
1240 1260 }
1241 1261 break;
1242 1262 }
1243 1263 case IP_PKTINFO: {
1244 1264 struct in_pktinfo *pktinfo;
1245 1265
1246 1266 /* Two different valid lengths */
1247 1267 if (inlen != sizeof (int) &&
1248 1268 inlen != sizeof (struct in_pktinfo))
1249 1269 return (EINVAL);
1250 1270 if (inlen == sizeof (int))
1251 1271 break;
1252 1272
1253 1273 pktinfo = (struct in_pktinfo *)invalp;
1254 1274 if (pktinfo->ipi_spec_dst.s_addr != INADDR_ANY) {
1255 1275 switch (ip_laddr_verify_v4(pktinfo->ipi_spec_dst.s_addr,
1256 1276 zoneid, ipst, B_FALSE)) {
1257 1277 case IPVL_UNICAST_UP:
1258 1278 case IPVL_UNICAST_DOWN:
1259 1279 break;
1260 1280 default:
1261 1281 return (EADDRNOTAVAIL);
1262 1282 }
1263 1283 }
1264 1284 if (!ip_xmit_ifindex_valid(pktinfo->ipi_ifindex, zoneid,
1265 1285 B_FALSE, ipst))
1266 1286 return (ENXIO);
1267 1287 break;
1268 1288 }
1269 1289 case IP_BOUND_IF:
1270 1290 ifindex = *(uint_t *)i1;
1271 1291
1272 1292 /* Just check it is ok. */
1273 1293 if (!ip_xmit_ifindex_valid(ifindex, zoneid, B_FALSE, ipst))
1274 1294 return (ENXIO);
1275 1295 break;
1276 1296 }
1277 1297 if (checkonly)
1278 1298 return (0);
1279 1299
1280 1300 /* Here we set the actual option value */
1281 1301 /*
1282 1302 * conn_lock protects the bitfields, and is used to
1283 1303 * set the fields atomically. Not needed for ixa settings since
1284 1304 * the caller has an exclusive copy of the ixa.
1285 1305 * We can not hold conn_lock across the multicast options though.
1286 1306 */
1287 1307 switch (name) {
1288 1308 case IP_OPTIONS:
1289 1309 case T_IP_OPTIONS:
1290 1310 /* Save options for use by IP. */
1291 1311 mutex_enter(&connp->conn_lock);
1292 1312 error = optcom_pkt_set(invalp, inlen,
1293 1313 (uchar_t **)&ipp->ipp_ipv4_options,
1294 1314 &ipp->ipp_ipv4_options_len);
1295 1315 if (error != 0) {
1296 1316 mutex_exit(&connp->conn_lock);
1297 1317 return (error);
1298 1318 }
1299 1319 if (ipp->ipp_ipv4_options_len == 0) {
1300 1320 ipp->ipp_fields &= ~IPPF_IPV4_OPTIONS;
1301 1321 } else {
1302 1322 ipp->ipp_fields |= IPPF_IPV4_OPTIONS;
1303 1323 }
1304 1324 mutex_exit(&connp->conn_lock);
1305 1325 coa->coa_changed |= COA_HEADER_CHANGED;
1306 1326 coa->coa_changed |= COA_WROFF_CHANGED;
1307 1327 break;
1308 1328
1309 1329 case IP_TTL:
1310 1330 mutex_enter(&connp->conn_lock);
1311 1331 ipp->ipp_unicast_hops = *i1;
1312 1332 mutex_exit(&connp->conn_lock);
1313 1333 coa->coa_changed |= COA_HEADER_CHANGED;
1314 1334 break;
1315 1335 case IP_TOS:
1316 1336 case T_IP_TOS:
1317 1337 mutex_enter(&connp->conn_lock);
1318 1338 if (*i1 == -1) {
1319 1339 ipp->ipp_type_of_service = 0;
1320 1340 } else {
1321 1341 ipp->ipp_type_of_service = *i1;
1322 1342 }
1323 1343 mutex_exit(&connp->conn_lock);
1324 1344 coa->coa_changed |= COA_HEADER_CHANGED;
1325 1345 break;
1326 1346 case IP_MULTICAST_IF:
1327 1347 ixa->ixa_multicast_ifindex = ifindex;
1328 1348 ixa->ixa_multicast_ifaddr = addr;
1329 1349 coa->coa_changed |= COA_ROUTE_CHANGED;
1330 1350 break;
1331 1351 case IP_MULTICAST_TTL:
1332 1352 ixa->ixa_multicast_ttl = *invalp;
1333 1353 /* Handled automatically by ip_output */
1334 1354 break;
1335 1355 case IP_MULTICAST_LOOP:
1336 1356 if (*invalp != 0)
1337 1357 ixa->ixa_flags |= IXAF_MULTICAST_LOOP;
1338 1358 else
1339 1359 ixa->ixa_flags &= ~IXAF_MULTICAST_LOOP;
1340 1360 /* Handled automatically by ip_output */
1341 1361 break;
1342 1362 case IP_RECVOPTS:
1343 1363 mutex_enter(&connp->conn_lock);
1344 1364 connp->conn_recv_ancillary.crb_recvopts = onoff;
1345 1365 mutex_exit(&connp->conn_lock);
1346 1366 break;
1347 1367 case IP_RECVDSTADDR:
1348 1368 mutex_enter(&connp->conn_lock);
1349 1369 connp->conn_recv_ancillary.crb_recvdstaddr = onoff;
1350 1370 mutex_exit(&connp->conn_lock);
1351 1371 break;
1352 1372 case IP_RECVIF:
1353 1373 mutex_enter(&connp->conn_lock);
1354 1374 connp->conn_recv_ancillary.crb_recvif = onoff;
1355 1375 mutex_exit(&connp->conn_lock);
1356 1376 break;
1357 1377 case IP_RECVSLLA:
1358 1378 mutex_enter(&connp->conn_lock);
1359 1379 connp->conn_recv_ancillary.crb_recvslla = onoff;
1360 1380 mutex_exit(&connp->conn_lock);
1361 1381 break;
1362 1382 case IP_RECVTTL:
1363 1383 mutex_enter(&connp->conn_lock);
1364 1384 connp->conn_recv_ancillary.crb_recvttl = onoff;
1365 1385 mutex_exit(&connp->conn_lock);
1366 1386 break;
1367 1387 case IP_PKTINFO: {
1368 1388 /*
1369 1389 * This also handles IP_RECVPKTINFO.
1370 1390 * IP_PKTINFO and IP_RECVPKTINFO have same value.
1371 1391 * Differentiation is based on the size of the
1372 1392 * argument passed in.
1373 1393 */
1374 1394 struct in_pktinfo *pktinfo;
1375 1395
1376 1396 if (inlen == sizeof (int)) {
1377 1397 /* This is IP_RECVPKTINFO option. */
1378 1398 mutex_enter(&connp->conn_lock);
1379 1399 connp->conn_recv_ancillary.crb_ip_recvpktinfo =
1380 1400 onoff;
1381 1401 mutex_exit(&connp->conn_lock);
1382 1402 break;
1383 1403 }
1384 1404
1385 1405 /* This is IP_PKTINFO option. */
1386 1406 mutex_enter(&connp->conn_lock);
1387 1407 pktinfo = (struct in_pktinfo *)invalp;
1388 1408 if (pktinfo->ipi_spec_dst.s_addr != INADDR_ANY) {
1389 1409 ipp->ipp_fields |= IPPF_ADDR;
1390 1410 IN6_INADDR_TO_V4MAPPED(&pktinfo->ipi_spec_dst,
1391 1411 &ipp->ipp_addr);
1392 1412 } else {
1393 1413 ipp->ipp_fields &= ~IPPF_ADDR;
1394 1414 ipp->ipp_addr = ipv6_all_zeros;
1395 1415 }
1396 1416 mutex_exit(&connp->conn_lock);
1397 1417 ixa->ixa_ifindex = pktinfo->ipi_ifindex;
1398 1418 coa->coa_changed |= COA_ROUTE_CHANGED;
1399 1419 coa->coa_changed |= COA_HEADER_CHANGED;
1400 1420 break;
1401 1421 }
1402 1422 case IP_DONTFRAG:
1403 1423 if (onoff) {
1404 1424 ixa->ixa_flags |= (IXAF_DONTFRAG | IXAF_PMTU_IPV4_DF);
1405 1425 ixa->ixa_flags &= ~IXAF_PMTU_DISCOVERY;
1406 1426 } else {
1407 1427 ixa->ixa_flags &= ~(IXAF_DONTFRAG | IXAF_PMTU_IPV4_DF);
1408 1428 ixa->ixa_flags |= IXAF_PMTU_DISCOVERY;
1409 1429 }
1410 1430 /* Need to redo ip_attr_connect */
1411 1431 coa->coa_changed |= COA_ROUTE_CHANGED;
1412 1432 break;
1413 1433 case IP_ADD_MEMBERSHIP:
1414 1434 case IP_DROP_MEMBERSHIP:
1415 1435 case MCAST_JOIN_GROUP:
1416 1436 case MCAST_LEAVE_GROUP:
1417 1437 return (ip_opt_set_multicast_group(connp, name,
1418 1438 invalp, B_FALSE, checkonly));
1419 1439
1420 1440 case IP_BLOCK_SOURCE:
1421 1441 case IP_UNBLOCK_SOURCE:
1422 1442 case IP_ADD_SOURCE_MEMBERSHIP:
1423 1443 case IP_DROP_SOURCE_MEMBERSHIP:
1424 1444 case MCAST_BLOCK_SOURCE:
1425 1445 case MCAST_UNBLOCK_SOURCE:
1426 1446 case MCAST_JOIN_SOURCE_GROUP:
1427 1447 case MCAST_LEAVE_SOURCE_GROUP:
1428 1448 return (ip_opt_set_multicast_sources(connp, name,
1429 1449 invalp, B_FALSE, checkonly));
1430 1450
1431 1451 case IP_SEC_OPT:
1432 1452 mutex_enter(&connp->conn_lock);
1433 1453 error = ipsec_set_req(cr, connp, (ipsec_req_t *)invalp);
1434 1454 mutex_exit(&connp->conn_lock);
1435 1455 if (error != 0) {
1436 1456 return (error);
1437 1457 }
1438 1458 /* This is an IPsec policy change - redo ip_attr_connect */
1439 1459 coa->coa_changed |= COA_ROUTE_CHANGED;
1440 1460 break;
1441 1461 case IP_NEXTHOP:
1442 1462 ixa->ixa_nexthop_v4 = addr;
1443 1463 if (addr != INADDR_ANY)
1444 1464 ixa->ixa_flags |= IXAF_NEXTHOP_SET;
1445 1465 else
1446 1466 ixa->ixa_flags &= ~IXAF_NEXTHOP_SET;
1447 1467 coa->coa_changed |= COA_ROUTE_CHANGED;
1448 1468 break;
1449 1469
1450 1470 case IP_BOUND_IF:
1451 1471 ixa->ixa_ifindex = ifindex; /* Send */
1452 1472 mutex_enter(&connp->conn_lock);
1453 1473 connp->conn_incoming_ifindex = ifindex; /* Receive */
1454 1474 connp->conn_bound_if = ifindex; /* getsockopt */
1455 1475 mutex_exit(&connp->conn_lock);
1456 1476 coa->coa_changed |= COA_ROUTE_CHANGED;
1457 1477 break;
1458 1478 case IP_UNSPEC_SRC:
1459 1479 mutex_enter(&connp->conn_lock);
1460 1480 connp->conn_unspec_src = onoff;
1461 1481 if (onoff)
1462 1482 ixa->ixa_flags &= ~IXAF_VERIFY_SOURCE;
1463 1483 else
1464 1484 ixa->ixa_flags |= IXAF_VERIFY_SOURCE;
1465 1485
1466 1486 mutex_exit(&connp->conn_lock);
1467 1487 break;
1468 1488 case IP_BROADCAST_TTL:
1469 1489 ixa->ixa_broadcast_ttl = *invalp;
1470 1490 ixa->ixa_flags |= IXAF_BROADCAST_TTL_SET;
1471 1491 /* Handled automatically by ip_output */
1472 1492 break;
1473 1493 case MRT_INIT:
1474 1494 case MRT_DONE:
1475 1495 case MRT_ADD_VIF:
1476 1496 case MRT_DEL_VIF:
1477 1497 case MRT_ADD_MFC:
1478 1498 case MRT_DEL_MFC:
1479 1499 case MRT_ASSERT:
1480 1500 if ((error = secpolicy_ip_config(cr, B_FALSE)) != 0) {
1481 1501 return (error);
1482 1502 }
1483 1503 error = ip_mrouter_set((int)name, connp, checkonly,
1484 1504 (uchar_t *)invalp, inlen);
1485 1505 if (error) {
1486 1506 return (error);
1487 1507 }
1488 1508 return (0);
1489 1509
1490 1510 }
1491 1511 return (0);
1492 1512 }
1493 1513
1494 1514 /* Handle IPPROTO_IPV6 */
1495 1515 static int
1496 1516 conn_opt_set_ipv6(conn_opt_arg_t *coa, t_scalar_t name, uint_t inlen,
1497 1517 uchar_t *invalp, boolean_t checkonly, cred_t *cr)
1498 1518 {
1499 1519 conn_t *connp = coa->coa_connp;
1500 1520 ip_xmit_attr_t *ixa = coa->coa_ixa;
1501 1521 ip_pkt_t *ipp = coa->coa_ipp;
1502 1522 int *i1 = (int *)invalp;
1503 1523 boolean_t onoff = (*i1 == 0) ? 0 : 1;
1504 1524 uint_t ifindex;
1505 1525 zoneid_t zoneid = IPCL_ZONEID(connp);
1506 1526 ip_stack_t *ipst = connp->conn_netstack->netstack_ip;
1507 1527 int error;
1508 1528
1509 1529 if (connp->conn_family != AF_INET6)
1510 1530 return (EINVAL);
1511 1531
1512 1532 switch (name) {
1513 1533 case IPV6_MULTICAST_IF:
1514 1534 /*
1515 1535 * The only possible error is EINVAL.
1516 1536 * We call this option on both V4 and V6
1517 1537 * If both fail, then this call returns
1518 1538 * EINVAL. If at least one of them succeeds we
1519 1539 * return success.
1520 1540 */
1521 1541 ifindex = *(uint_t *)i1;
1522 1542
1523 1543 if (!ip_xmit_ifindex_valid(ifindex, zoneid, B_TRUE, ipst) &&
1524 1544 !ip_xmit_ifindex_valid(ifindex, zoneid, B_FALSE, ipst))
1525 1545 return (EINVAL);
1526 1546 break;
1527 1547 case IPV6_UNICAST_HOPS:
1528 1548 /* Don't allow zero. -1 means to use default */
1529 1549 if (*i1 < -1 || *i1 == 0 || *i1 > IPV6_MAX_HOPS)
1530 1550 return (EINVAL);
1531 1551 break;
1532 1552 case IPV6_MULTICAST_HOPS:
1533 1553 /* -1 means use default */
1534 1554 if (*i1 < -1 || *i1 > IPV6_MAX_HOPS)
1535 1555 return (EINVAL);
1536 1556 break;
1537 1557 case IPV6_MULTICAST_LOOP:
1538 1558 if (*i1 != 0 && *i1 != 1)
1539 1559 return (EINVAL);
1540 1560 break;
1541 1561 case IPV6_BOUND_IF:
1542 1562 ifindex = *(uint_t *)i1;
1543 1563
1544 1564 if (!ip_xmit_ifindex_valid(ifindex, zoneid, B_TRUE, ipst))
1545 1565 return (ENXIO);
1546 1566 break;
1547 1567 case IPV6_PKTINFO: {
1548 1568 struct in6_pktinfo *pkti;
1549 1569 boolean_t isv6;
1550 1570
1551 1571 if (inlen != 0 && inlen != sizeof (struct in6_pktinfo))
1552 1572 return (EINVAL);
1553 1573 if (inlen == 0)
1554 1574 break; /* Clear values below */
1555 1575
1556 1576 /*
1557 1577 * Verify the source address and ifindex. Privileged users
1558 1578 * can use any source address.
1559 1579 */
1560 1580 pkti = (struct in6_pktinfo *)invalp;
1561 1581
1562 1582 /*
1563 1583 * For link-local addresses we use the ipi6_ifindex when
1564 1584 * we verify the local address.
1565 1585 * If net_rawaccess then any source address can be used.
1566 1586 */
1567 1587 if (!IN6_IS_ADDR_UNSPECIFIED(&pkti->ipi6_addr) &&
1568 1588 secpolicy_net_rawaccess(cr) != 0) {
1569 1589 uint_t scopeid = 0;
1570 1590 in6_addr_t *v6src = &pkti->ipi6_addr;
1571 1591 ipaddr_t v4src;
1572 1592 ip_laddr_t laddr_type = IPVL_UNICAST_UP;
1573 1593
1574 1594 if (IN6_IS_ADDR_V4MAPPED(v6src)) {
1575 1595 IN6_V4MAPPED_TO_IPADDR(v6src, v4src);
1576 1596 if (v4src != INADDR_ANY) {
1577 1597 laddr_type = ip_laddr_verify_v4(v4src,
1578 1598 zoneid, ipst, B_FALSE);
1579 1599 }
1580 1600 } else {
1581 1601 if (IN6_IS_ADDR_LINKSCOPE(v6src))
1582 1602 scopeid = pkti->ipi6_ifindex;
1583 1603
1584 1604 laddr_type = ip_laddr_verify_v6(v6src, zoneid,
1585 1605 ipst, B_FALSE, scopeid);
1586 1606 }
1587 1607 switch (laddr_type) {
1588 1608 case IPVL_UNICAST_UP:
1589 1609 case IPVL_UNICAST_DOWN:
1590 1610 break;
1591 1611 default:
1592 1612 return (EADDRNOTAVAIL);
1593 1613 }
1594 1614 ixa->ixa_flags |= IXAF_VERIFY_SOURCE;
1595 1615 } else if (!IN6_IS_ADDR_UNSPECIFIED(&pkti->ipi6_addr)) {
1596 1616 /* Allow any source */
1597 1617 ixa->ixa_flags &= ~IXAF_VERIFY_SOURCE;
1598 1618 }
1599 1619 isv6 = !(IN6_IS_ADDR_V4MAPPED(&pkti->ipi6_addr));
1600 1620 if (!ip_xmit_ifindex_valid(pkti->ipi6_ifindex, zoneid, isv6,
1601 1621 ipst))
1602 1622 return (ENXIO);
1603 1623 break;
1604 1624 }
1605 1625 case IPV6_HOPLIMIT:
1606 1626 /* It is only allowed as ancilary data */
1607 1627 if (!coa->coa_ancillary)
1608 1628 return (EINVAL);
1609 1629
1610 1630 if (inlen != 0 && inlen != sizeof (int))
1611 1631 return (EINVAL);
1612 1632 if (inlen == sizeof (int)) {
1613 1633 if (*i1 > 255 || *i1 < -1 || *i1 == 0)
1614 1634 return (EINVAL);
1615 1635 }
1616 1636 break;
1617 1637 case IPV6_TCLASS:
1618 1638 if (inlen != 0 && inlen != sizeof (int))
1619 1639 return (EINVAL);
1620 1640 if (inlen == sizeof (int)) {
1621 1641 if (*i1 > 255 || *i1 < -1)
1622 1642 return (EINVAL);
1623 1643 }
1624 1644 break;
1625 1645 case IPV6_NEXTHOP:
1626 1646 if (inlen != 0 && inlen != sizeof (sin6_t))
1627 1647 return (EINVAL);
1628 1648 if (inlen == sizeof (sin6_t)) {
1629 1649 sin6_t *sin6 = (sin6_t *)invalp;
1630 1650 ire_t *ire;
1631 1651
1632 1652 if (sin6->sin6_family != AF_INET6)
1633 1653 return (EAFNOSUPPORT);
1634 1654 if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr))
1635 1655 return (EADDRNOTAVAIL);
1636 1656
1637 1657 /* Verify that the next-hop is on-link */
1638 1658 ire = ire_ftable_lookup_v6(&sin6->sin6_addr,
1639 1659 0, 0, IRE_ONLINK, NULL, zoneid,
1640 1660 NULL, MATCH_IRE_TYPE, 0, ipst, NULL);
1641 1661 if (ire == NULL)
1642 1662 return (EHOSTUNREACH);
1643 1663 ire_refrele(ire);
1644 1664 break;
1645 1665 }
1646 1666 break;
1647 1667 case IPV6_RTHDR:
1648 1668 case IPV6_DSTOPTS:
1649 1669 case IPV6_RTHDRDSTOPTS:
1650 1670 case IPV6_HOPOPTS: {
1651 1671 /* All have the length field in the same place */
1652 1672 ip6_hbh_t *hopts = (ip6_hbh_t *)invalp;
1653 1673 /*
1654 1674 * Sanity checks - minimum size, size a multiple of
1655 1675 * eight bytes, and matching size passed in.
1656 1676 */
1657 1677 if (inlen != 0 &&
1658 1678 inlen != (8 * (hopts->ip6h_len + 1)))
1659 1679 return (EINVAL);
1660 1680 break;
1661 1681 }
1662 1682 case IPV6_PATHMTU:
1663 1683 /* Can't be set */
1664 1684 return (EINVAL);
1665 1685
1666 1686 case IPV6_USE_MIN_MTU:
1667 1687 if (inlen != sizeof (int))
1668 1688 return (EINVAL);
1669 1689 if (*i1 < -1 || *i1 > 1)
1670 1690 return (EINVAL);
1671 1691 break;
1672 1692 case IPV6_SRC_PREFERENCES:
1673 1693 if (inlen != sizeof (uint32_t))
1674 1694 return (EINVAL);
1675 1695 break;
1676 1696 case IPV6_V6ONLY:
1677 1697 if (*i1 < 0 || *i1 > 1) {
1678 1698 return (EINVAL);
1679 1699 }
1680 1700 break;
1681 1701 }
1682 1702 if (checkonly)
1683 1703 return (0);
1684 1704
1685 1705 /* Here we set the actual option value */
1686 1706 /*
1687 1707 * conn_lock protects the bitfields, and is used to
1688 1708 * set the fields atomically. Not needed for ixa settings since
1689 1709 * the caller has an exclusive copy of the ixa.
1690 1710 * We can not hold conn_lock across the multicast options though.
1691 1711 */
1692 1712 ASSERT(MUTEX_NOT_HELD(&coa->coa_connp->conn_lock));
1693 1713 switch (name) {
1694 1714 case IPV6_MULTICAST_IF:
1695 1715 ixa->ixa_multicast_ifindex = ifindex;
1696 1716 /* Need to redo ip_attr_connect */
1697 1717 coa->coa_changed |= COA_ROUTE_CHANGED;
1698 1718 break;
1699 1719 case IPV6_UNICAST_HOPS:
1700 1720 /* -1 means use default */
1701 1721 mutex_enter(&connp->conn_lock);
1702 1722 if (*i1 == -1) {
1703 1723 ipp->ipp_unicast_hops = connp->conn_default_ttl;
1704 1724 } else {
1705 1725 ipp->ipp_unicast_hops = (uint8_t)*i1;
1706 1726 }
1707 1727 mutex_exit(&connp->conn_lock);
1708 1728 coa->coa_changed |= COA_HEADER_CHANGED;
1709 1729 break;
1710 1730 case IPV6_MULTICAST_HOPS:
1711 1731 /* -1 means use default */
1712 1732 if (*i1 == -1) {
1713 1733 ixa->ixa_multicast_ttl = IP_DEFAULT_MULTICAST_TTL;
1714 1734 } else {
1715 1735 ixa->ixa_multicast_ttl = (uint8_t)*i1;
1716 1736 }
1717 1737 /* Handled automatically by ip_output */
1718 1738 break;
1719 1739 case IPV6_MULTICAST_LOOP:
1720 1740 if (*i1 != 0)
1721 1741 ixa->ixa_flags |= IXAF_MULTICAST_LOOP;
1722 1742 else
1723 1743 ixa->ixa_flags &= ~IXAF_MULTICAST_LOOP;
1724 1744 /* Handled automatically by ip_output */
1725 1745 break;
1726 1746 case IPV6_JOIN_GROUP:
1727 1747 case IPV6_LEAVE_GROUP:
1728 1748 case MCAST_JOIN_GROUP:
1729 1749 case MCAST_LEAVE_GROUP:
1730 1750 return (ip_opt_set_multicast_group(connp, name,
1731 1751 invalp, B_TRUE, checkonly));
1732 1752
1733 1753 case MCAST_BLOCK_SOURCE:
1734 1754 case MCAST_UNBLOCK_SOURCE:
1735 1755 case MCAST_JOIN_SOURCE_GROUP:
1736 1756 case MCAST_LEAVE_SOURCE_GROUP:
1737 1757 return (ip_opt_set_multicast_sources(connp, name,
1738 1758 invalp, B_TRUE, checkonly));
1739 1759
1740 1760 case IPV6_BOUND_IF:
1741 1761 ixa->ixa_ifindex = ifindex; /* Send */
1742 1762 mutex_enter(&connp->conn_lock);
1743 1763 connp->conn_incoming_ifindex = ifindex; /* Receive */
1744 1764 connp->conn_bound_if = ifindex; /* getsockopt */
1745 1765 mutex_exit(&connp->conn_lock);
1746 1766 coa->coa_changed |= COA_ROUTE_CHANGED;
1747 1767 break;
1748 1768 case IPV6_UNSPEC_SRC:
1749 1769 mutex_enter(&connp->conn_lock);
1750 1770 connp->conn_unspec_src = onoff;
1751 1771 if (onoff)
1752 1772 ixa->ixa_flags &= ~IXAF_VERIFY_SOURCE;
1753 1773 else
1754 1774 ixa->ixa_flags |= IXAF_VERIFY_SOURCE;
1755 1775 mutex_exit(&connp->conn_lock);
1756 1776 break;
1757 1777 case IPV6_RECVPKTINFO:
1758 1778 mutex_enter(&connp->conn_lock);
1759 1779 connp->conn_recv_ancillary.crb_ip_recvpktinfo = onoff;
1760 1780 mutex_exit(&connp->conn_lock);
1761 1781 break;
1762 1782 case IPV6_RECVTCLASS:
1763 1783 mutex_enter(&connp->conn_lock);
1764 1784 connp->conn_recv_ancillary.crb_ipv6_recvtclass = onoff;
1765 1785 mutex_exit(&connp->conn_lock);
1766 1786 break;
1767 1787 case IPV6_RECVPATHMTU:
1768 1788 mutex_enter(&connp->conn_lock);
1769 1789 connp->conn_ipv6_recvpathmtu = onoff;
1770 1790 mutex_exit(&connp->conn_lock);
1771 1791 break;
1772 1792 case IPV6_RECVHOPLIMIT:
1773 1793 mutex_enter(&connp->conn_lock);
1774 1794 connp->conn_recv_ancillary.crb_ipv6_recvhoplimit =
1775 1795 onoff;
1776 1796 mutex_exit(&connp->conn_lock);
1777 1797 break;
1778 1798 case IPV6_RECVHOPOPTS:
1779 1799 mutex_enter(&connp->conn_lock);
1780 1800 connp->conn_recv_ancillary.crb_ipv6_recvhopopts = onoff;
1781 1801 mutex_exit(&connp->conn_lock);
1782 1802 break;
1783 1803 case IPV6_RECVDSTOPTS:
1784 1804 mutex_enter(&connp->conn_lock);
1785 1805 connp->conn_recv_ancillary.crb_ipv6_recvdstopts = onoff;
1786 1806 mutex_exit(&connp->conn_lock);
1787 1807 break;
1788 1808 case _OLD_IPV6_RECVDSTOPTS:
1789 1809 mutex_enter(&connp->conn_lock);
1790 1810 connp->conn_recv_ancillary.crb_old_ipv6_recvdstopts =
1791 1811 onoff;
1792 1812 mutex_exit(&connp->conn_lock);
1793 1813 break;
1794 1814 case IPV6_RECVRTHDRDSTOPTS:
1795 1815 mutex_enter(&connp->conn_lock);
1796 1816 connp->conn_recv_ancillary.crb_ipv6_recvrthdrdstopts =
1797 1817 onoff;
1798 1818 mutex_exit(&connp->conn_lock);
1799 1819 break;
1800 1820 case IPV6_RECVRTHDR:
1801 1821 mutex_enter(&connp->conn_lock);
1802 1822 connp->conn_recv_ancillary.crb_ipv6_recvrthdr = onoff;
1803 1823 mutex_exit(&connp->conn_lock);
1804 1824 break;
1805 1825 case IPV6_PKTINFO:
1806 1826 mutex_enter(&connp->conn_lock);
1807 1827 if (inlen == 0) {
1808 1828 ipp->ipp_fields &= ~IPPF_ADDR;
1809 1829 ipp->ipp_addr = ipv6_all_zeros;
1810 1830 ixa->ixa_ifindex = 0;
1811 1831 } else {
1812 1832 struct in6_pktinfo *pkti;
1813 1833
1814 1834 pkti = (struct in6_pktinfo *)invalp;
1815 1835 ipp->ipp_addr = pkti->ipi6_addr;
1816 1836 if (!IN6_IS_ADDR_UNSPECIFIED(&ipp->ipp_addr))
1817 1837 ipp->ipp_fields |= IPPF_ADDR;
1818 1838 else
1819 1839 ipp->ipp_fields &= ~IPPF_ADDR;
1820 1840 ixa->ixa_ifindex = pkti->ipi6_ifindex;
1821 1841 }
1822 1842 mutex_exit(&connp->conn_lock);
1823 1843 /* Source and ifindex might have changed */
1824 1844 coa->coa_changed |= COA_HEADER_CHANGED;
1825 1845 coa->coa_changed |= COA_ROUTE_CHANGED;
1826 1846 break;
1827 1847 case IPV6_HOPLIMIT:
1828 1848 mutex_enter(&connp->conn_lock);
1829 1849 if (inlen == 0 || *i1 == -1) {
1830 1850 /* Revert to default */
1831 1851 ipp->ipp_fields &= ~IPPF_HOPLIMIT;
1832 1852 ixa->ixa_flags &= ~IXAF_NO_TTL_CHANGE;
1833 1853 } else {
1834 1854 ipp->ipp_hoplimit = *i1;
1835 1855 ipp->ipp_fields |= IPPF_HOPLIMIT;
1836 1856 /* Ensure that it sticks for multicast packets */
1837 1857 ixa->ixa_flags |= IXAF_NO_TTL_CHANGE;
1838 1858 }
1839 1859 mutex_exit(&connp->conn_lock);
1840 1860 coa->coa_changed |= COA_HEADER_CHANGED;
1841 1861 break;
1842 1862 case IPV6_TCLASS:
1843 1863 /*
1844 1864 * IPV6_TCLASS accepts -1 as use kernel default
1845 1865 * and [0, 255] as the actualy traffic class.
1846 1866 */
1847 1867 mutex_enter(&connp->conn_lock);
1848 1868 if (inlen == 0 || *i1 == -1) {
1849 1869 ipp->ipp_tclass = 0;
1850 1870 ipp->ipp_fields &= ~IPPF_TCLASS;
1851 1871 } else {
1852 1872 ipp->ipp_tclass = *i1;
1853 1873 ipp->ipp_fields |= IPPF_TCLASS;
1854 1874 }
1855 1875 mutex_exit(&connp->conn_lock);
1856 1876 coa->coa_changed |= COA_HEADER_CHANGED;
1857 1877 break;
1858 1878 case IPV6_NEXTHOP:
1859 1879 if (inlen == 0) {
1860 1880 ixa->ixa_flags &= ~IXAF_NEXTHOP_SET;
1861 1881 } else {
1862 1882 sin6_t *sin6 = (sin6_t *)invalp;
1863 1883
1864 1884 ixa->ixa_nexthop_v6 = sin6->sin6_addr;
1865 1885 if (!IN6_IS_ADDR_UNSPECIFIED(&ixa->ixa_nexthop_v6))
1866 1886 ixa->ixa_flags |= IXAF_NEXTHOP_SET;
1867 1887 else
1868 1888 ixa->ixa_flags &= ~IXAF_NEXTHOP_SET;
1869 1889 }
1870 1890 coa->coa_changed |= COA_ROUTE_CHANGED;
1871 1891 break;
1872 1892 case IPV6_HOPOPTS:
1873 1893 mutex_enter(&connp->conn_lock);
1874 1894 error = optcom_pkt_set(invalp, inlen,
1875 1895 (uchar_t **)&ipp->ipp_hopopts, &ipp->ipp_hopoptslen);
1876 1896 if (error != 0) {
1877 1897 mutex_exit(&connp->conn_lock);
1878 1898 return (error);
1879 1899 }
1880 1900 if (ipp->ipp_hopoptslen == 0) {
1881 1901 ipp->ipp_fields &= ~IPPF_HOPOPTS;
1882 1902 } else {
1883 1903 ipp->ipp_fields |= IPPF_HOPOPTS;
1884 1904 }
1885 1905 mutex_exit(&connp->conn_lock);
1886 1906 coa->coa_changed |= COA_HEADER_CHANGED;
1887 1907 coa->coa_changed |= COA_WROFF_CHANGED;
1888 1908 break;
1889 1909 case IPV6_RTHDRDSTOPTS:
1890 1910 mutex_enter(&connp->conn_lock);
1891 1911 error = optcom_pkt_set(invalp, inlen,
1892 1912 (uchar_t **)&ipp->ipp_rthdrdstopts,
1893 1913 &ipp->ipp_rthdrdstoptslen);
1894 1914 if (error != 0) {
1895 1915 mutex_exit(&connp->conn_lock);
1896 1916 return (error);
1897 1917 }
1898 1918 if (ipp->ipp_rthdrdstoptslen == 0) {
1899 1919 ipp->ipp_fields &= ~IPPF_RTHDRDSTOPTS;
1900 1920 } else {
1901 1921 ipp->ipp_fields |= IPPF_RTHDRDSTOPTS;
1902 1922 }
1903 1923 mutex_exit(&connp->conn_lock);
1904 1924 coa->coa_changed |= COA_HEADER_CHANGED;
1905 1925 coa->coa_changed |= COA_WROFF_CHANGED;
1906 1926 break;
1907 1927 case IPV6_DSTOPTS:
1908 1928 mutex_enter(&connp->conn_lock);
1909 1929 error = optcom_pkt_set(invalp, inlen,
1910 1930 (uchar_t **)&ipp->ipp_dstopts, &ipp->ipp_dstoptslen);
1911 1931 if (error != 0) {
1912 1932 mutex_exit(&connp->conn_lock);
1913 1933 return (error);
1914 1934 }
1915 1935 if (ipp->ipp_dstoptslen == 0) {
1916 1936 ipp->ipp_fields &= ~IPPF_DSTOPTS;
1917 1937 } else {
1918 1938 ipp->ipp_fields |= IPPF_DSTOPTS;
1919 1939 }
1920 1940 mutex_exit(&connp->conn_lock);
1921 1941 coa->coa_changed |= COA_HEADER_CHANGED;
1922 1942 coa->coa_changed |= COA_WROFF_CHANGED;
1923 1943 break;
1924 1944 case IPV6_RTHDR:
1925 1945 mutex_enter(&connp->conn_lock);
1926 1946 error = optcom_pkt_set(invalp, inlen,
1927 1947 (uchar_t **)&ipp->ipp_rthdr, &ipp->ipp_rthdrlen);
1928 1948 if (error != 0) {
1929 1949 mutex_exit(&connp->conn_lock);
1930 1950 return (error);
1931 1951 }
1932 1952 if (ipp->ipp_rthdrlen == 0) {
1933 1953 ipp->ipp_fields &= ~IPPF_RTHDR;
1934 1954 } else {
1935 1955 ipp->ipp_fields |= IPPF_RTHDR;
1936 1956 }
1937 1957 mutex_exit(&connp->conn_lock);
1938 1958 coa->coa_changed |= COA_HEADER_CHANGED;
1939 1959 coa->coa_changed |= COA_WROFF_CHANGED;
1940 1960 break;
1941 1961
1942 1962 case IPV6_DONTFRAG:
1943 1963 if (onoff) {
1944 1964 ixa->ixa_flags |= IXAF_DONTFRAG;
1945 1965 ixa->ixa_flags &= ~IXAF_PMTU_DISCOVERY;
1946 1966 } else {
1947 1967 ixa->ixa_flags &= ~IXAF_DONTFRAG;
1948 1968 ixa->ixa_flags |= IXAF_PMTU_DISCOVERY;
1949 1969 }
1950 1970 /* Need to redo ip_attr_connect */
1951 1971 coa->coa_changed |= COA_ROUTE_CHANGED;
1952 1972 break;
1953 1973
1954 1974 case IPV6_USE_MIN_MTU:
1955 1975 ixa->ixa_flags |= IXAF_USE_MIN_MTU;
1956 1976 ixa->ixa_use_min_mtu = *i1;
1957 1977 /* Need to redo ip_attr_connect */
1958 1978 coa->coa_changed |= COA_ROUTE_CHANGED;
1959 1979 break;
1960 1980
1961 1981 case IPV6_SEC_OPT:
1962 1982 mutex_enter(&connp->conn_lock);
1963 1983 error = ipsec_set_req(cr, connp, (ipsec_req_t *)invalp);
1964 1984 mutex_exit(&connp->conn_lock);
1965 1985 if (error != 0) {
1966 1986 return (error);
1967 1987 }
1968 1988 /* This is an IPsec policy change - redo ip_attr_connect */
1969 1989 coa->coa_changed |= COA_ROUTE_CHANGED;
1970 1990 break;
1971 1991 case IPV6_SRC_PREFERENCES:
1972 1992 /*
1973 1993 * This socket option only affects connected
1974 1994 * sockets that haven't already bound to a specific
1975 1995 * IPv6 address. In other words, sockets that
1976 1996 * don't call bind() with an address other than the
1977 1997 * unspecified address and that call connect().
1978 1998 * ip_set_destination_v6() passes these preferences
1979 1999 * to the ipif_select_source_v6() function.
1980 2000 */
1981 2001 mutex_enter(&connp->conn_lock);
1982 2002 error = ip6_set_src_preferences(ixa, *(uint32_t *)invalp);
1983 2003 mutex_exit(&connp->conn_lock);
1984 2004 if (error != 0) {
1985 2005 return (error);
1986 2006 }
1987 2007 break;
1988 2008 case IPV6_V6ONLY:
1989 2009 mutex_enter(&connp->conn_lock);
1990 2010 connp->conn_ipv6_v6only = onoff;
1991 2011 mutex_exit(&connp->conn_lock);
1992 2012 break;
1993 2013 }
1994 2014 return (0);
1995 2015 }
1996 2016
1997 2017 /* Handle IPPROTO_UDP */
1998 2018 /* ARGSUSED1 */
1999 2019 static int
2000 2020 conn_opt_set_udp(conn_opt_arg_t *coa, t_scalar_t name, uint_t inlen,
2001 2021 uchar_t *invalp, boolean_t checkonly, cred_t *cr)
2002 2022 {
2003 2023 conn_t *connp = coa->coa_connp;
2004 2024 int *i1 = (int *)invalp;
2005 2025 boolean_t onoff = (*i1 == 0) ? 0 : 1;
2006 2026 int error;
2007 2027
2008 2028 switch (name) {
2009 2029 case UDP_ANONPRIVBIND:
2010 2030 if ((error = secpolicy_net_privaddr(cr, 0, IPPROTO_UDP)) != 0) {
2011 2031 return (error);
2012 2032 }
2013 2033 break;
2014 2034 }
2015 2035 if (checkonly)
2016 2036 return (0);
2017 2037
2018 2038 /* Here we set the actual option value */
2019 2039 mutex_enter(&connp->conn_lock);
2020 2040 switch (name) {
2021 2041 case UDP_ANONPRIVBIND:
2022 2042 connp->conn_anon_priv_bind = onoff;
2023 2043 break;
2024 2044 case UDP_EXCLBIND:
2025 2045 connp->conn_exclbind = onoff;
2026 2046 break;
2027 2047 }
2028 2048 mutex_exit(&connp->conn_lock);
2029 2049 return (0);
2030 2050 }
2031 2051
2032 2052 /* Handle IPPROTO_TCP */
2033 2053 /* ARGSUSED1 */
2034 2054 static int
2035 2055 conn_opt_set_tcp(conn_opt_arg_t *coa, t_scalar_t name, uint_t inlen,
2036 2056 uchar_t *invalp, boolean_t checkonly, cred_t *cr)
2037 2057 {
2038 2058 conn_t *connp = coa->coa_connp;
2039 2059 int *i1 = (int *)invalp;
2040 2060 boolean_t onoff = (*i1 == 0) ? 0 : 1;
2041 2061 int error;
2042 2062
2043 2063 switch (name) {
2044 2064 case TCP_ANONPRIVBIND:
2045 2065 if ((error = secpolicy_net_privaddr(cr, 0, IPPROTO_TCP)) != 0) {
2046 2066 return (error);
2047 2067 }
2048 2068 break;
2049 2069 }
2050 2070 if (checkonly)
2051 2071 return (0);
2052 2072
2053 2073 /* Here we set the actual option value */
2054 2074 mutex_enter(&connp->conn_lock);
2055 2075 switch (name) {
2056 2076 case TCP_ANONPRIVBIND:
2057 2077 connp->conn_anon_priv_bind = onoff;
2058 2078 break;
2059 2079 case TCP_EXCLBIND:
2060 2080 connp->conn_exclbind = onoff;
2061 2081 break;
2062 2082 case TCP_RECVDSTADDR:
2063 2083 connp->conn_recv_ancillary.crb_recvdstaddr = onoff;
2064 2084 break;
2065 2085 }
2066 2086 mutex_exit(&connp->conn_lock);
2067 2087 return (0);
2068 2088 }
2069 2089
2070 2090 int
2071 2091 conn_getsockname(conn_t *connp, struct sockaddr *sa, uint_t *salenp)
2072 2092 {
2073 2093 sin_t *sin;
2074 2094 sin6_t *sin6;
2075 2095
2076 2096 if (connp->conn_family == AF_INET) {
2077 2097 if (*salenp < sizeof (sin_t))
2078 2098 return (EINVAL);
2079 2099
2080 2100 *salenp = sizeof (sin_t);
2081 2101 /* Fill zeroes and then initialize non-zero fields */
2082 2102 sin = (sin_t *)sa;
2083 2103 *sin = sin_null;
2084 2104 sin->sin_family = AF_INET;
2085 2105 if (!IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_saddr_v6) &&
2086 2106 !IN6_IS_ADDR_UNSPECIFIED(&connp->conn_saddr_v6)) {
2087 2107 sin->sin_addr.s_addr = connp->conn_saddr_v4;
2088 2108 } else {
2089 2109 /*
2090 2110 * INADDR_ANY
2091 2111 * conn_saddr is not set, we might be bound to
2092 2112 * broadcast/multicast. Use conn_bound_addr as
2093 2113 * local address instead (that could
2094 2114 * also still be INADDR_ANY)
2095 2115 */
2096 2116 sin->sin_addr.s_addr = connp->conn_bound_addr_v4;
2097 2117 }
2098 2118 sin->sin_port = connp->conn_lport;
2099 2119 } else {
2100 2120 if (*salenp < sizeof (sin6_t))
2101 2121 return (EINVAL);
2102 2122
2103 2123 *salenp = sizeof (sin6_t);
2104 2124 /* Fill zeroes and then initialize non-zero fields */
2105 2125 sin6 = (sin6_t *)sa;
2106 2126 *sin6 = sin6_null;
2107 2127 sin6->sin6_family = AF_INET6;
2108 2128 if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_saddr_v6)) {
2109 2129 sin6->sin6_addr = connp->conn_saddr_v6;
2110 2130 } else {
2111 2131 /*
2112 2132 * conn_saddr is not set, we might be bound to
2113 2133 * broadcast/multicast. Use conn_bound_addr as
2114 2134 * local address instead (which could
2115 2135 * also still be unspecified)
2116 2136 */
2117 2137 sin6->sin6_addr = connp->conn_bound_addr_v6;
2118 2138 }
2119 2139 sin6->sin6_port = connp->conn_lport;
2120 2140 if (IN6_IS_ADDR_LINKSCOPE(&sin6->sin6_addr) &&
2121 2141 (connp->conn_ixa->ixa_flags & IXAF_SCOPEID_SET))
2122 2142 sin6->sin6_scope_id = connp->conn_ixa->ixa_scopeid;
2123 2143 }
2124 2144 return (0);
2125 2145 }
2126 2146
2127 2147 int
2128 2148 conn_getpeername(conn_t *connp, struct sockaddr *sa, uint_t *salenp)
2129 2149 {
2130 2150 struct sockaddr_in *sin;
2131 2151 struct sockaddr_in6 *sin6;
2132 2152
2133 2153 if (connp->conn_family == AF_INET) {
2134 2154 if (*salenp < sizeof (sin_t))
2135 2155 return (EINVAL);
2136 2156
2137 2157 *salenp = sizeof (sin_t);
2138 2158 /* initialize */
2139 2159 sin = (sin_t *)sa;
2140 2160 *sin = sin_null;
2141 2161 sin->sin_family = AF_INET;
2142 2162 sin->sin_addr.s_addr = connp->conn_faddr_v4;
2143 2163 sin->sin_port = connp->conn_fport;
2144 2164 } else {
2145 2165 if (*salenp < sizeof (sin6_t))
2146 2166 return (EINVAL);
2147 2167
2148 2168 *salenp = sizeof (sin6_t);
2149 2169 /* initialize */
2150 2170 sin6 = (sin6_t *)sa;
2151 2171 *sin6 = sin6_null;
2152 2172 sin6->sin6_family = AF_INET6;
2153 2173 sin6->sin6_addr = connp->conn_faddr_v6;
2154 2174 sin6->sin6_port = connp->conn_fport;
2155 2175 sin6->sin6_flowinfo = connp->conn_flowinfo;
2156 2176 if (IN6_IS_ADDR_LINKSCOPE(&sin6->sin6_addr) &&
2157 2177 (connp->conn_ixa->ixa_flags & IXAF_SCOPEID_SET))
2158 2178 sin6->sin6_scope_id = connp->conn_ixa->ixa_scopeid;
2159 2179 }
2160 2180 return (0);
2161 2181 }
2162 2182
2163 2183 static uint32_t cksum_massage_options_v4(ipha_t *, netstack_t *);
2164 2184 static uint32_t cksum_massage_options_v6(ip6_t *, uint_t, netstack_t *);
2165 2185
2166 2186 /*
2167 2187 * Allocate and fill in conn_ht_iphc based on the current information
2168 2188 * in the conn.
2169 2189 * Normally used when we bind() and connect().
2170 2190 * Returns failure if can't allocate memory, or if there is a problem
2171 2191 * with a routing header/option.
2172 2192 *
2173 2193 * We allocate space for the transport header (ulp_hdr_len + extra) and
2174 2194 * indicate the offset of the ulp header by setting ixa_ip_hdr_length.
2175 2195 * The extra is there for transports that want some spare room for future
2176 2196 * options. conn_ht_iphc_allocated is what was allocated; conn_ht_iphc_len
2177 2197 * excludes the extra part.
2178 2198 *
2179 2199 * We massage an routing option/header and store the ckecksum difference
2180 2200 * in conn_sum.
2181 2201 *
2182 2202 * Caller needs to update conn_wroff if desired.
2183 2203 */
2184 2204 int
2185 2205 conn_build_hdr_template(conn_t *connp, uint_t ulp_hdr_length, uint_t extra,
2186 2206 const in6_addr_t *v6src, const in6_addr_t *v6dst, uint32_t flowinfo)
2187 2207 {
2188 2208 ip_xmit_attr_t *ixa = connp->conn_ixa;
2189 2209 ip_pkt_t *ipp = &connp->conn_xmit_ipp;
2190 2210 uint_t ip_hdr_length;
2191 2211 uchar_t *hdrs;
2192 2212 uint_t hdrs_len;
2193 2213
2194 2214 ASSERT(MUTEX_HELD(&connp->conn_lock));
2195 2215
2196 2216 if (ixa->ixa_flags & IXAF_IS_IPV4) {
2197 2217 ip_hdr_length = ip_total_hdrs_len_v4(ipp);
2198 2218 /* In case of TX label and IP options it can be too much */
2199 2219 if (ip_hdr_length > IP_MAX_HDR_LENGTH) {
2200 2220 /* Preserves existing TX errno for this */
2201 2221 return (EHOSTUNREACH);
2202 2222 }
2203 2223 } else {
2204 2224 ip_hdr_length = ip_total_hdrs_len_v6(ipp);
2205 2225 }
2206 2226 ixa->ixa_ip_hdr_length = ip_hdr_length;
2207 2227 hdrs_len = ip_hdr_length + ulp_hdr_length + extra;
2208 2228 ASSERT(hdrs_len != 0);
2209 2229
2210 2230 if (hdrs_len != connp->conn_ht_iphc_allocated) {
2211 2231 /* Allocate new before we free any old */
2212 2232 hdrs = kmem_alloc(hdrs_len, KM_NOSLEEP);
2213 2233 if (hdrs == NULL)
2214 2234 return (ENOMEM);
2215 2235
2216 2236 if (connp->conn_ht_iphc != NULL) {
2217 2237 kmem_free(connp->conn_ht_iphc,
2218 2238 connp->conn_ht_iphc_allocated);
2219 2239 }
2220 2240 connp->conn_ht_iphc = hdrs;
2221 2241 connp->conn_ht_iphc_allocated = hdrs_len;
2222 2242 } else {
2223 2243 hdrs = connp->conn_ht_iphc;
2224 2244 }
2225 2245 hdrs_len -= extra;
2226 2246 connp->conn_ht_iphc_len = hdrs_len;
2227 2247
2228 2248 connp->conn_ht_ulp = hdrs + ip_hdr_length;
2229 2249 connp->conn_ht_ulp_len = ulp_hdr_length;
2230 2250
2231 2251 if (ixa->ixa_flags & IXAF_IS_IPV4) {
2232 2252 ipha_t *ipha = (ipha_t *)hdrs;
2233 2253
2234 2254 IN6_V4MAPPED_TO_IPADDR(v6src, ipha->ipha_src);
2235 2255 IN6_V4MAPPED_TO_IPADDR(v6dst, ipha->ipha_dst);
2236 2256 ip_build_hdrs_v4(hdrs, ip_hdr_length, ipp, connp->conn_proto);
2237 2257 ipha->ipha_length = htons(hdrs_len);
2238 2258 if (ixa->ixa_flags & IXAF_PMTU_IPV4_DF)
2239 2259 ipha->ipha_fragment_offset_and_flags |= IPH_DF_HTONS;
2240 2260 else
2241 2261 ipha->ipha_fragment_offset_and_flags &= ~IPH_DF_HTONS;
2242 2262
2243 2263 if (ipp->ipp_fields & IPPF_IPV4_OPTIONS) {
2244 2264 connp->conn_sum = cksum_massage_options_v4(ipha,
2245 2265 connp->conn_netstack);
2246 2266 } else {
2247 2267 connp->conn_sum = 0;
2248 2268 }
2249 2269 } else {
2250 2270 ip6_t *ip6h = (ip6_t *)hdrs;
2251 2271
2252 2272 ip6h->ip6_src = *v6src;
2253 2273 ip6h->ip6_dst = *v6dst;
2254 2274 ip_build_hdrs_v6(hdrs, ip_hdr_length, ipp, connp->conn_proto,
2255 2275 flowinfo);
2256 2276 ip6h->ip6_plen = htons(hdrs_len - IPV6_HDR_LEN);
2257 2277
2258 2278 if (ipp->ipp_fields & IPPF_RTHDR) {
2259 2279 connp->conn_sum = cksum_massage_options_v6(ip6h,
2260 2280 ip_hdr_length, connp->conn_netstack);
2261 2281
2262 2282 /*
2263 2283 * Verify that the first hop isn't a mapped address.
2264 2284 * Routers along the path need to do this verification
2265 2285 * for subsequent hops.
2266 2286 */
2267 2287 if (IN6_IS_ADDR_V4MAPPED(&ip6h->ip6_dst))
2268 2288 return (EADDRNOTAVAIL);
2269 2289
2270 2290 } else {
2271 2291 connp->conn_sum = 0;
2272 2292 }
2273 2293 }
2274 2294 return (0);
2275 2295 }
2276 2296
2277 2297 /*
2278 2298 * Prepend a header template to data_mp based on the ip_pkt_t
2279 2299 * and the passed in source, destination and protocol.
2280 2300 *
2281 2301 * Returns failure if can't allocate memory, in which case data_mp is freed.
2282 2302 * We allocate space for the transport header (ulp_hdr_len) and
2283 2303 * indicate the offset of the ulp header by setting ixa_ip_hdr_length.
2284 2304 *
2285 2305 * We massage an routing option/header and return the ckecksum difference
2286 2306 * in *sump. This is in host byte order.
2287 2307 *
2288 2308 * Caller needs to update conn_wroff if desired.
2289 2309 */
2290 2310 mblk_t *
2291 2311 conn_prepend_hdr(ip_xmit_attr_t *ixa, const ip_pkt_t *ipp,
2292 2312 const in6_addr_t *v6src, const in6_addr_t *v6dst,
2293 2313 uint8_t protocol, uint32_t flowinfo, uint_t ulp_hdr_length, mblk_t *data_mp,
2294 2314 uint_t data_length, uint_t wroff_extra, uint32_t *sump, int *errorp)
2295 2315 {
2296 2316 uint_t ip_hdr_length;
2297 2317 uchar_t *hdrs;
2298 2318 uint_t hdrs_len;
2299 2319 mblk_t *mp;
2300 2320
2301 2321 if (ixa->ixa_flags & IXAF_IS_IPV4) {
2302 2322 ip_hdr_length = ip_total_hdrs_len_v4(ipp);
2303 2323 ASSERT(ip_hdr_length <= IP_MAX_HDR_LENGTH);
2304 2324 } else {
2305 2325 ip_hdr_length = ip_total_hdrs_len_v6(ipp);
2306 2326 }
2307 2327 hdrs_len = ip_hdr_length + ulp_hdr_length;
2308 2328 ASSERT(hdrs_len != 0);
2309 2329
2310 2330 ixa->ixa_ip_hdr_length = ip_hdr_length;
2311 2331
2312 2332 /* Can we prepend to data_mp? */
2313 2333 if (data_mp != NULL &&
2314 2334 data_mp->b_rptr - data_mp->b_datap->db_base >= hdrs_len &&
2315 2335 data_mp->b_datap->db_ref == 1) {
2316 2336 hdrs = data_mp->b_rptr - hdrs_len;
2317 2337 data_mp->b_rptr = hdrs;
2318 2338 mp = data_mp;
2319 2339 } else {
2320 2340 mp = allocb(hdrs_len + wroff_extra, BPRI_MED);
2321 2341 if (mp == NULL) {
2322 2342 freemsg(data_mp);
2323 2343 *errorp = ENOMEM;
2324 2344 return (NULL);
2325 2345 }
2326 2346 mp->b_wptr = mp->b_datap->db_lim;
2327 2347 hdrs = mp->b_rptr = mp->b_wptr - hdrs_len;
2328 2348 mp->b_cont = data_mp;
2329 2349 }
2330 2350
2331 2351 /*
2332 2352 * Set the source in the header. ip_build_hdrs_v4/v6 will overwrite it
2333 2353 * if PKTINFO (aka IPPF_ADDR) was set.
2334 2354 */
2335 2355 if (ixa->ixa_flags & IXAF_IS_IPV4) {
2336 2356 ipha_t *ipha = (ipha_t *)hdrs;
2337 2357
2338 2358 ASSERT(IN6_IS_ADDR_V4MAPPED(v6dst));
2339 2359 IN6_V4MAPPED_TO_IPADDR(v6src, ipha->ipha_src);
2340 2360 IN6_V4MAPPED_TO_IPADDR(v6dst, ipha->ipha_dst);
2341 2361 ip_build_hdrs_v4(hdrs, ip_hdr_length, ipp, protocol);
2342 2362 ipha->ipha_length = htons(hdrs_len + data_length);
2343 2363 if (ixa->ixa_flags & IXAF_PMTU_IPV4_DF)
2344 2364 ipha->ipha_fragment_offset_and_flags |= IPH_DF_HTONS;
2345 2365 else
2346 2366 ipha->ipha_fragment_offset_and_flags &= ~IPH_DF_HTONS;
2347 2367
2348 2368 if (ipp->ipp_fields & IPPF_IPV4_OPTIONS) {
2349 2369 *sump = cksum_massage_options_v4(ipha,
2350 2370 ixa->ixa_ipst->ips_netstack);
2351 2371 } else {
2352 2372 *sump = 0;
2353 2373 }
2354 2374 } else {
2355 2375 ip6_t *ip6h = (ip6_t *)hdrs;
2356 2376
2357 2377 ip6h->ip6_src = *v6src;
2358 2378 ip6h->ip6_dst = *v6dst;
2359 2379 ip_build_hdrs_v6(hdrs, ip_hdr_length, ipp, protocol, flowinfo);
2360 2380 ip6h->ip6_plen = htons(hdrs_len + data_length - IPV6_HDR_LEN);
2361 2381
2362 2382 if (ipp->ipp_fields & IPPF_RTHDR) {
2363 2383 *sump = cksum_massage_options_v6(ip6h,
2364 2384 ip_hdr_length, ixa->ixa_ipst->ips_netstack);
2365 2385
2366 2386 /*
2367 2387 * Verify that the first hop isn't a mapped address.
2368 2388 * Routers along the path need to do this verification
2369 2389 * for subsequent hops.
2370 2390 */
2371 2391 if (IN6_IS_ADDR_V4MAPPED(&ip6h->ip6_dst)) {
2372 2392 *errorp = EADDRNOTAVAIL;
2373 2393 freemsg(mp);
2374 2394 return (NULL);
2375 2395 }
2376 2396 } else {
2377 2397 *sump = 0;
2378 2398 }
2379 2399 }
2380 2400 return (mp);
2381 2401 }
2382 2402
2383 2403 /*
2384 2404 * Massage a source route if any putting the first hop
2385 2405 * in ipha_dst. Compute a starting value for the checksum which
2386 2406 * takes into account that the original ipha_dst should be
2387 2407 * included in the checksum but that IP will include the
2388 2408 * first hop from the source route in the tcp checksum.
2389 2409 */
2390 2410 static uint32_t
2391 2411 cksum_massage_options_v4(ipha_t *ipha, netstack_t *ns)
2392 2412 {
2393 2413 in_addr_t dst;
2394 2414 uint32_t cksum;
2395 2415
2396 2416 /* Get last hop then diff against first hop */
2397 2417 cksum = ip_massage_options(ipha, ns);
2398 2418 cksum = (cksum & 0xFFFF) + (cksum >> 16);
2399 2419 dst = ipha->ipha_dst;
2400 2420 cksum -= ((dst >> 16) + (dst & 0xffff));
2401 2421 if ((int)cksum < 0)
2402 2422 cksum--;
2403 2423 cksum = (cksum & 0xFFFF) + (cksum >> 16);
2404 2424 cksum = (cksum & 0xFFFF) + (cksum >> 16);
2405 2425 ASSERT(cksum < 0x10000);
2406 2426 return (ntohs(cksum));
2407 2427 }
2408 2428
2409 2429 static uint32_t
2410 2430 cksum_massage_options_v6(ip6_t *ip6h, uint_t ip_hdr_len, netstack_t *ns)
2411 2431 {
2412 2432 uint8_t *end;
2413 2433 ip6_rthdr_t *rth;
2414 2434 uint32_t cksum;
2415 2435
2416 2436 end = (uint8_t *)ip6h + ip_hdr_len;
2417 2437 rth = ip_find_rthdr_v6(ip6h, end);
2418 2438 if (rth == NULL)
2419 2439 return (0);
2420 2440
2421 2441 cksum = ip_massage_options_v6(ip6h, rth, ns);
2422 2442 cksum = (cksum & 0xFFFF) + (cksum >> 16);
2423 2443 ASSERT(cksum < 0x10000);
2424 2444 return (ntohs(cksum));
2425 2445 }
2426 2446
2427 2447 /*
2428 2448 * ULPs that change the destination address need to call this for each
2429 2449 * change to discard any state about a previous destination that might
2430 2450 * have been multicast or multirt.
2431 2451 */
2432 2452 void
2433 2453 ip_attr_newdst(ip_xmit_attr_t *ixa)
2434 2454 {
2435 2455 ixa->ixa_flags &= ~(IXAF_LOOPBACK_COPY | IXAF_NO_HW_CKSUM |
2436 2456 IXAF_NO_TTL_CHANGE | IXAF_IPV6_ADD_FRAGHDR |
2437 2457 IXAF_NO_LOOP_ZONEID_SET);
2438 2458 }
2439 2459
2440 2460 /*
2441 2461 * Determine the nexthop which will be used.
2442 2462 * Normally this is just the destination, but if a IPv4 source route, or
2443 2463 * IPv6 routing header, is in the ip_pkt_t then we extract the nexthop from
2444 2464 * there.
2445 2465 */
2446 2466 void
2447 2467 ip_attr_nexthop(const ip_pkt_t *ipp, const ip_xmit_attr_t *ixa,
2448 2468 const in6_addr_t *dst, in6_addr_t *nexthop)
2449 2469 {
2450 2470 if (!(ipp->ipp_fields & (IPPF_IPV4_OPTIONS|IPPF_RTHDR))) {
2451 2471 *nexthop = *dst;
2452 2472 return;
2453 2473 }
2454 2474 if (ixa->ixa_flags & IXAF_IS_IPV4) {
2455 2475 ipaddr_t v4dst;
2456 2476 ipaddr_t v4nexthop;
2457 2477
2458 2478 IN6_V4MAPPED_TO_IPADDR(dst, v4dst);
2459 2479 v4nexthop = ip_pkt_source_route_v4(ipp);
2460 2480 if (v4nexthop == INADDR_ANY)
2461 2481 v4nexthop = v4dst;
2462 2482
2463 2483 IN6_IPADDR_TO_V4MAPPED(v4nexthop, nexthop);
2464 2484 } else {
2465 2485 const in6_addr_t *v6nexthop;
2466 2486
2467 2487 v6nexthop = ip_pkt_source_route_v6(ipp);
2468 2488 if (v6nexthop == NULL)
2469 2489 v6nexthop = dst;
2470 2490
2471 2491 *nexthop = *v6nexthop;
2472 2492 }
2473 2493 }
2474 2494
2475 2495 /*
2476 2496 * Update the ip_xmit_attr_t based the addresses, conn_xmit_ipp and conn_ixa.
2477 2497 * If IPDF_IPSEC is set we cache the IPsec policy to handle the unconnected
2478 2498 * case (connected latching is done in conn_connect).
2479 2499 * Note that IPsec policy lookup requires conn_proto and conn_laddr to be
2480 2500 * set, but doesn't otherwise use the conn_t.
2481 2501 *
2482 2502 * Caller must set/clear IXAF_IS_IPV4 as appropriately.
2483 2503 * Caller must use ip_attr_nexthop() to determine the nexthop argument.
2484 2504 *
2485 2505 * The caller must NOT hold conn_lock (to avoid problems with ill_refrele
2486 2506 * causing the squeue to run doing ipcl_walk grabbing conn_lock.)
2487 2507 *
2488 2508 * Updates laddrp and uinfo if they are non-NULL.
2489 2509 *
2490 2510 * TSOL notes: The callers if ip_attr_connect must check if the destination
2491 2511 * is different than before and in that case redo conn_update_label.
2492 2512 * The callers of conn_connect do not need that since conn_connect
2493 2513 * performs the conn_update_label.
2494 2514 */
2495 2515 int
2496 2516 ip_attr_connect(const conn_t *connp, ip_xmit_attr_t *ixa,
2497 2517 const in6_addr_t *v6src, const in6_addr_t *v6dst,
2498 2518 const in6_addr_t *v6nexthop, in_port_t dstport, in6_addr_t *laddrp,
2499 2519 iulp_t *uinfo, uint32_t flags)
2500 2520 {
2501 2521 in6_addr_t laddr = *v6src;
2502 2522 int error;
2503 2523
2504 2524 ASSERT(MUTEX_NOT_HELD(&connp->conn_lock));
2505 2525
2506 2526 if (connp->conn_zone_is_global)
2507 2527 flags |= IPDF_ZONE_IS_GLOBAL;
2508 2528 else
2509 2529 flags &= ~IPDF_ZONE_IS_GLOBAL;
2510 2530
2511 2531 /*
2512 2532 * Lookup the route to determine a source address and the uinfo.
2513 2533 * If the ULP has a source route option then the caller will
2514 2534 * have set v6nexthop to be the first hop.
2515 2535 */
2516 2536 if (ixa->ixa_flags & IXAF_IS_IPV4) {
2517 2537 ipaddr_t v4dst;
2518 2538 ipaddr_t v4src, v4nexthop;
2519 2539
2520 2540 IN6_V4MAPPED_TO_IPADDR(v6dst, v4dst);
2521 2541 IN6_V4MAPPED_TO_IPADDR(v6nexthop, v4nexthop);
2522 2542 IN6_V4MAPPED_TO_IPADDR(v6src, v4src);
2523 2543
2524 2544 if (connp->conn_unspec_src || v4src != INADDR_ANY)
2525 2545 flags &= ~IPDF_SELECT_SRC;
2526 2546 else
2527 2547 flags |= IPDF_SELECT_SRC;
2528 2548
2529 2549 error = ip_set_destination_v4(&v4src, v4dst, v4nexthop, ixa,
2530 2550 uinfo, flags, connp->conn_mac_mode);
2531 2551 IN6_IPADDR_TO_V4MAPPED(v4src, &laddr);
2532 2552 } else {
2533 2553 if (connp->conn_unspec_src || !IN6_IS_ADDR_UNSPECIFIED(v6src))
2534 2554 flags &= ~IPDF_SELECT_SRC;
2535 2555 else
2536 2556 flags |= IPDF_SELECT_SRC;
2537 2557
2538 2558 error = ip_set_destination_v6(&laddr, v6dst, v6nexthop, ixa,
2539 2559 uinfo, flags, connp->conn_mac_mode);
2540 2560 }
2541 2561 /* Pass out some address even if we hit a RTF_REJECT etc */
2542 2562 if (laddrp != NULL)
2543 2563 *laddrp = laddr;
2544 2564
2545 2565 if (error != 0)
2546 2566 return (error);
2547 2567
2548 2568 if (flags & IPDF_IPSEC) {
2549 2569 /*
2550 2570 * Set any IPsec policy in ixa. Routine also looks at ULP
2551 2571 * ports.
2552 2572 */
2553 2573 ipsec_cache_outbound_policy(connp, v6src, v6dst, dstport, ixa);
2554 2574 }
2555 2575 return (0);
2556 2576 }
2557 2577
2558 2578 /*
2559 2579 * Connect the conn based on the addresses, conn_xmit_ipp and conn_ixa.
2560 2580 * Assumes that conn_faddr and conn_fport are already set. As such it is not
2561 2581 * usable for SCTP, since SCTP has multiple faddrs.
2562 2582 *
2563 2583 * Caller must hold conn_lock to provide atomic constency between the
2564 2584 * conn_t's addresses and the ixa.
2565 2585 * NOTE: this function drops and reaquires conn_lock since it can't be
2566 2586 * held across ip_attr_connect/ip_set_destination.
2567 2587 *
2568 2588 * The caller needs to handle inserting in the receive-side fanout when
2569 2589 * appropriate after conn_connect returns.
2570 2590 */
2571 2591 int
2572 2592 conn_connect(conn_t *connp, iulp_t *uinfo, uint32_t flags)
2573 2593 {
2574 2594 ip_xmit_attr_t *ixa = connp->conn_ixa;
2575 2595 in6_addr_t nexthop;
2576 2596 in6_addr_t saddr, faddr;
2577 2597 in_port_t fport;
2578 2598 int error;
2579 2599
2580 2600 ASSERT(MUTEX_HELD(&connp->conn_lock));
2581 2601
2582 2602 if (connp->conn_ipversion == IPV4_VERSION)
2583 2603 ixa->ixa_flags |= IXAF_IS_IPV4;
2584 2604 else
2585 2605 ixa->ixa_flags &= ~IXAF_IS_IPV4;
2586 2606
2587 2607 /* We do IPsec latching below - hence no caching in ip_attr_connect */
2588 2608 flags &= ~IPDF_IPSEC;
2589 2609
2590 2610 /* In case we had previously done an ip_attr_connect */
2591 2611 ip_attr_newdst(ixa);
2592 2612
2593 2613 /*
2594 2614 * Determine the nexthop and copy the addresses before dropping
2595 2615 * conn_lock.
2596 2616 */
2597 2617 ip_attr_nexthop(&connp->conn_xmit_ipp, connp->conn_ixa,
2598 2618 &connp->conn_faddr_v6, &nexthop);
2599 2619 saddr = connp->conn_saddr_v6;
2600 2620 faddr = connp->conn_faddr_v6;
2601 2621 fport = connp->conn_fport;
2602 2622
2603 2623 mutex_exit(&connp->conn_lock);
2604 2624 error = ip_attr_connect(connp, ixa, &saddr, &faddr, &nexthop, fport,
2605 2625 &saddr, uinfo, flags | IPDF_VERIFY_DST);
2606 2626 mutex_enter(&connp->conn_lock);
2607 2627
2608 2628 /* Could have changed even if an error */
2609 2629 connp->conn_saddr_v6 = saddr;
2610 2630 if (error != 0)
2611 2631 return (error);
2612 2632
2613 2633 /*
2614 2634 * Check whether Trusted Solaris policy allows communication with this
2615 2635 * host, and pretend that the destination is unreachable if not.
2616 2636 * Compute any needed label and place it in ipp_label_v4/v6.
2617 2637 *
2618 2638 * Later conn_build_hdr_template() takes ipp_label_v4/v6 to form
2619 2639 * the packet.
2620 2640 *
2621 2641 * TSOL Note: Any concurrent threads would pick a different ixa
2622 2642 * (and ipp if they are to change the ipp) so we
2623 2643 * don't have to worry about concurrent threads.
2624 2644 */
2625 2645 if (is_system_labeled()) {
2626 2646 if (connp->conn_mlp_type != mlptSingle)
2627 2647 return (ECONNREFUSED);
2628 2648
2629 2649 /*
2630 2650 * conn_update_label will set ipp_label* which will later
2631 2651 * be used by conn_build_hdr_template.
2632 2652 */
2633 2653 error = conn_update_label(connp, ixa,
2634 2654 &connp->conn_faddr_v6, &connp->conn_xmit_ipp);
2635 2655 if (error != 0)
2636 2656 return (error);
2637 2657 }
2638 2658
2639 2659 /*
2640 2660 * Ensure that we match on the selected local address.
2641 2661 * This overrides conn_laddr in the case we had earlier bound to a
2642 2662 * multicast or broadcast address.
2643 2663 */
2644 2664 connp->conn_laddr_v6 = connp->conn_saddr_v6;
2645 2665
2646 2666 /*
2647 2667 * Allow setting new policies.
2648 2668 * The addresses/ports are already set, thus the IPsec policy calls
2649 2669 * can handle their passed-in conn's.
2650 2670 */
2651 2671 connp->conn_policy_cached = B_FALSE;
2652 2672
2653 2673 /*
2654 2674 * Cache IPsec policy in this conn. If we have per-socket policy,
2655 2675 * we'll cache that. If we don't, we'll inherit global policy.
2656 2676 *
2657 2677 * This is done before the caller inserts in the receive-side fanout.
2658 2678 * Note that conn_policy_cached is set by ipsec_conn_cache_policy() even
2659 2679 * for connections where we don't have a policy. This is to prevent
2660 2680 * global policy lookups in the inbound path.
2661 2681 *
2662 2682 * If we insert before we set conn_policy_cached,
2663 2683 * CONN_INBOUND_POLICY_PRESENT() check can still evaluate true
2664 2684 * because global policy cound be non-empty. We normally call
2665 2685 * ipsec_check_policy() for conn_policy_cached connections only if
2666 2686 * conn_in_enforce_policy is set. But in this case,
2667 2687 * conn_policy_cached can get set anytime since we made the
2668 2688 * CONN_INBOUND_POLICY_PRESENT() check and ipsec_check_policy() is
2669 2689 * called, which will make the above assumption false. Thus, we
2670 2690 * need to insert after we set conn_policy_cached.
2671 2691 */
2672 2692 error = ipsec_conn_cache_policy(connp,
2673 2693 connp->conn_ipversion == IPV4_VERSION);
2674 2694 if (error != 0)
2675 2695 return (error);
2676 2696
2677 2697 /*
2678 2698 * We defer to do LSO check until here since now we have better idea
2679 2699 * whether IPsec is present. If the underlying ill is LSO capable,
2680 2700 * copy its capability in so the ULP can decide whether to enable LSO
2681 2701 * on this connection. So far, only TCP/IPv4 is implemented, so won't
2682 2702 * claim LSO for IPv6.
2683 2703 *
2684 2704 * Currently, won't enable LSO for IRE_LOOPBACK or IRE_LOCAL, because
2685 2705 * the receiver can not handle it. Also not to enable LSO for MULTIRT.
2686 2706 */
2687 2707 ixa->ixa_flags &= ~IXAF_LSO_CAPAB;
2688 2708
2689 2709 ASSERT(ixa->ixa_ire != NULL);
2690 2710 if (ixa->ixa_ipst->ips_ip_lso_outbound && (flags & IPDF_LSO) &&
2691 2711 !(ixa->ixa_flags & IXAF_IPSEC_SECURE) &&
2692 2712 !(ixa->ixa_ire->ire_type & (IRE_LOCAL | IRE_LOOPBACK)) &&
2693 2713 !(ixa->ixa_ire->ire_flags & RTF_MULTIRT) &&
2694 2714 (ixa->ixa_nce != NULL) &&
2695 2715 ((ixa->ixa_flags & IXAF_IS_IPV4) ?
2696 2716 ILL_LSO_TCP_IPV4_USABLE(ixa->ixa_nce->nce_ill) :
2697 2717 ILL_LSO_TCP_IPV6_USABLE(ixa->ixa_nce->nce_ill))) {
2698 2718 ixa->ixa_lso_capab = *ixa->ixa_nce->nce_ill->ill_lso_capab;
2699 2719 ixa->ixa_flags |= IXAF_LSO_CAPAB;
2700 2720 }
2701 2721
2702 2722 /* Check whether ZEROCOPY capability is usable for this connection. */
2703 2723 ixa->ixa_flags &= ~IXAF_ZCOPY_CAPAB;
2704 2724
2705 2725 if ((flags & IPDF_ZCOPY) &&
2706 2726 !(ixa->ixa_flags & IXAF_IPSEC_SECURE) &&
2707 2727 !(ixa->ixa_ire->ire_type & (IRE_LOCAL | IRE_LOOPBACK)) &&
2708 2728 !(ixa->ixa_ire->ire_flags & RTF_MULTIRT) &&
2709 2729 (ixa->ixa_nce != NULL) &&
2710 2730 ILL_ZCOPY_USABLE(ixa->ixa_nce->nce_ill)) {
2711 2731 ixa->ixa_flags |= IXAF_ZCOPY_CAPAB;
2712 2732 }
2713 2733 return (0);
2714 2734 }
2715 2735
2716 2736 /*
2717 2737 * Predicates to check if the addresses match conn_last*
2718 2738 */
2719 2739
2720 2740 /*
2721 2741 * Compare the conn against an address.
2722 2742 * If using mapped addresses on AF_INET6 sockets, use the _v6 function
2723 2743 */
2724 2744 boolean_t
2725 2745 conn_same_as_last_v4(conn_t *connp, sin_t *sin)
2726 2746 {
2727 2747 ASSERT(connp->conn_family == AF_INET);
2728 2748 return (sin->sin_addr.s_addr == connp->conn_v4lastdst &&
2729 2749 sin->sin_port == connp->conn_lastdstport);
2730 2750 }
2731 2751
2732 2752 /*
2733 2753 * Compare, including for mapped addresses
2734 2754 */
2735 2755 boolean_t
2736 2756 conn_same_as_last_v6(conn_t *connp, sin6_t *sin6)
2737 2757 {
2738 2758 return (IN6_ARE_ADDR_EQUAL(&connp->conn_v6lastdst, &sin6->sin6_addr) &&
2739 2759 sin6->sin6_port == connp->conn_lastdstport &&
2740 2760 sin6->sin6_flowinfo == connp->conn_lastflowinfo &&
2741 2761 sin6->sin6_scope_id == connp->conn_lastscopeid);
2742 2762 }
2743 2763
2744 2764 /*
2745 2765 * Compute a label and place it in the ip_packet_t.
2746 2766 * Handles IPv4 and IPv6.
2747 2767 * The caller should have a correct ixa_tsl and ixa_zoneid and have
2748 2768 * already called conn_connect or ip_attr_connect to ensure that tsol_check_dest
2749 2769 * has been called.
2750 2770 */
2751 2771 int
2752 2772 conn_update_label(const conn_t *connp, const ip_xmit_attr_t *ixa,
2753 2773 const in6_addr_t *v6dst, ip_pkt_t *ipp)
2754 2774 {
2755 2775 int err;
2756 2776 ipaddr_t v4dst;
2757 2777
2758 2778 if (IN6_IS_ADDR_V4MAPPED(v6dst)) {
2759 2779 uchar_t opt_storage[IP_MAX_OPT_LENGTH];
2760 2780
2761 2781 IN6_V4MAPPED_TO_IPADDR(v6dst, v4dst);
2762 2782
2763 2783 err = tsol_compute_label_v4(ixa->ixa_tsl, ixa->ixa_zoneid,
2764 2784 v4dst, opt_storage, ixa->ixa_ipst);
2765 2785 if (err == 0) {
2766 2786 /* Length contained in opt_storage[IPOPT_OLEN] */
2767 2787 err = optcom_pkt_set(opt_storage,
2768 2788 opt_storage[IPOPT_OLEN],
2769 2789 (uchar_t **)&ipp->ipp_label_v4,
2770 2790 &ipp->ipp_label_len_v4);
2771 2791 }
2772 2792 if (err != 0) {
2773 2793 DTRACE_PROBE4(tx__ip__log__info__updatelabel,
2774 2794 char *, "conn(1) failed to update options(2) "
2775 2795 "on ixa(3)",
2776 2796 conn_t *, connp, char *, opt_storage,
2777 2797 ip_xmit_attr_t *, ixa);
2778 2798 }
2779 2799 if (ipp->ipp_label_len_v4 != 0)
2780 2800 ipp->ipp_fields |= IPPF_LABEL_V4;
2781 2801 else
2782 2802 ipp->ipp_fields &= ~IPPF_LABEL_V4;
2783 2803 } else {
2784 2804 uchar_t opt_storage[TSOL_MAX_IPV6_OPTION];
2785 2805 uint_t optlen;
2786 2806
2787 2807 err = tsol_compute_label_v6(ixa->ixa_tsl, ixa->ixa_zoneid,
2788 2808 v6dst, opt_storage, ixa->ixa_ipst);
2789 2809 if (err == 0) {
2790 2810 /*
2791 2811 * Note that ipp_label_v6 is just the option - not
2792 2812 * the hopopts extension header.
2793 2813 *
2794 2814 * Length contained in opt_storage[IPOPT_OLEN], but
2795 2815 * that doesn't include the two byte options header.
2796 2816 */
2797 2817 optlen = opt_storage[IPOPT_OLEN];
2798 2818 if (optlen != 0)
2799 2819 optlen += 2;
2800 2820
2801 2821 err = optcom_pkt_set(opt_storage, optlen,
2802 2822 (uchar_t **)&ipp->ipp_label_v6,
2803 2823 &ipp->ipp_label_len_v6);
2804 2824 }
2805 2825 if (err != 0) {
2806 2826 DTRACE_PROBE4(tx__ip__log__info__updatelabel,
2807 2827 char *, "conn(1) failed to update options(2) "
2808 2828 "on ixa(3)",
2809 2829 conn_t *, connp, char *, opt_storage,
2810 2830 ip_xmit_attr_t *, ixa);
2811 2831 }
2812 2832 if (ipp->ipp_label_len_v6 != 0)
2813 2833 ipp->ipp_fields |= IPPF_LABEL_V6;
2814 2834 else
2815 2835 ipp->ipp_fields &= ~IPPF_LABEL_V6;
2816 2836 }
2817 2837 return (err);
2818 2838 }
2819 2839
2820 2840 /*
2821 2841 * Inherit all options settings from the parent/listener to the eager.
2822 2842 * Returns zero on success; ENOMEM if memory allocation failed.
2823 2843 *
2824 2844 * We assume that the eager has not had any work done i.e., the conn_ixa
2825 2845 * and conn_xmit_ipp are all zero.
2826 2846 * Furthermore we assume that no other thread can access the eager (because
2827 2847 * it isn't inserted in any fanout list).
2828 2848 */
2829 2849 int
2830 2850 conn_inherit_parent(conn_t *lconnp, conn_t *econnp)
2831 2851 {
2832 2852 cred_t *credp;
2833 2853 int err;
2834 2854 void *notify_cookie;
2835 2855 uint32_t xmit_hint;
2836 2856
2837 2857 econnp->conn_family = lconnp->conn_family;
2838 2858 econnp->conn_ipv6_v6only = lconnp->conn_ipv6_v6only;
2839 2859 econnp->conn_wq = lconnp->conn_wq;
2840 2860 econnp->conn_rq = lconnp->conn_rq;
2841 2861
2842 2862 /*
2843 2863 * Make a safe copy of the transmit attributes.
2844 2864 * conn_connect will later be used by the caller to setup the ire etc.
2845 2865 */
2846 2866 ASSERT(econnp->conn_ixa->ixa_refcnt == 1);
2847 2867 ASSERT(econnp->conn_ixa->ixa_ire == NULL);
2848 2868 ASSERT(econnp->conn_ixa->ixa_dce == NULL);
2849 2869 ASSERT(econnp->conn_ixa->ixa_nce == NULL);
2850 2870
2851 2871 /* Preserve ixa_notify_cookie and xmit_hint */
2852 2872 notify_cookie = econnp->conn_ixa->ixa_notify_cookie;
2853 2873 xmit_hint = econnp->conn_ixa->ixa_xmit_hint;
2854 2874 ixa_safe_copy(lconnp->conn_ixa, econnp->conn_ixa);
2855 2875 econnp->conn_ixa->ixa_notify_cookie = notify_cookie;
2856 2876 econnp->conn_ixa->ixa_xmit_hint = xmit_hint;
2857 2877
2858 2878 econnp->conn_bound_if = lconnp->conn_bound_if;
2859 2879 econnp->conn_incoming_ifindex = lconnp->conn_incoming_ifindex;
2860 2880
2861 2881 /* Inherit all RECV options */
2862 2882 econnp->conn_recv_ancillary = lconnp->conn_recv_ancillary;
2863 2883
2864 2884 err = ip_pkt_copy(&lconnp->conn_xmit_ipp, &econnp->conn_xmit_ipp,
2865 2885 KM_NOSLEEP);
2866 2886 if (err != 0)
2867 2887 return (err);
2868 2888
2869 2889 econnp->conn_zoneid = lconnp->conn_zoneid;
2870 2890 econnp->conn_allzones = lconnp->conn_allzones;
2871 2891
2872 2892 /* This is odd. Pick a flowlabel for each connection instead? */
2873 2893 econnp->conn_flowinfo = lconnp->conn_flowinfo;
2874 2894
2875 2895 econnp->conn_default_ttl = lconnp->conn_default_ttl;
2876 2896
2877 2897 /*
2878 2898 * TSOL: tsol_input_proc() needs the eager's cred before the
2879 2899 * eager is accepted
2880 2900 */
2881 2901 ASSERT(lconnp->conn_cred != NULL);
2882 2902 econnp->conn_cred = credp = lconnp->conn_cred;
2883 2903 crhold(credp);
2884 2904 econnp->conn_cpid = lconnp->conn_cpid;
2885 2905 econnp->conn_open_time = ddi_get_lbolt64();
2886 2906
2887 2907 /*
2888 2908 * Cache things in the ixa without any refhold.
2889 2909 * Listener might not have set up ixa_cred
2890 2910 */
2891 2911 ASSERT(!(econnp->conn_ixa->ixa_free_flags & IXA_FREE_CRED));
2892 2912 econnp->conn_ixa->ixa_cred = econnp->conn_cred;
2893 2913 econnp->conn_ixa->ixa_cpid = econnp->conn_cpid;
2894 2914 if (is_system_labeled())
2895 2915 econnp->conn_ixa->ixa_tsl = crgetlabel(econnp->conn_cred);
2896 2916
2897 2917 /*
2898 2918 * If the caller has the process-wide flag set, then default to MAC
2899 2919 * exempt mode. This allows read-down to unlabeled hosts.
2900 2920 */
2901 2921 if (getpflags(NET_MAC_AWARE, credp) != 0)
2902 2922 econnp->conn_mac_mode = CONN_MAC_AWARE;
2903 2923
2904 2924 econnp->conn_zone_is_global = lconnp->conn_zone_is_global;
2905 2925
2906 2926 /*
2907 2927 * We eliminate the need for sockfs to send down a T_SVR4_OPTMGMT_REQ
2908 2928 * via soaccept()->soinheritoptions() which essentially applies
2909 2929 * all the listener options to the new connection. The options that we
2910 2930 * need to take care of are:
2911 2931 * SO_DEBUG, SO_REUSEADDR, SO_KEEPALIVE, SO_DONTROUTE, SO_BROADCAST,
2912 2932 * SO_USELOOPBACK, SO_OOBINLINE, SO_DGRAM_ERRIND, SO_LINGER,
2913 2933 * SO_SNDBUF, SO_RCVBUF.
2914 2934 *
2915 2935 * SO_RCVBUF: conn_rcvbuf is set.
2916 2936 * SO_SNDBUF: conn_sndbuf is set.
2917 2937 */
2918 2938
2919 2939 /* Could we define a struct and use a struct copy for this? */
2920 2940 econnp->conn_sndbuf = lconnp->conn_sndbuf;
2921 2941 econnp->conn_rcvbuf = lconnp->conn_rcvbuf;
2922 2942 econnp->conn_sndlowat = lconnp->conn_sndlowat;
2923 2943 econnp->conn_rcvlowat = lconnp->conn_rcvlowat;
2924 2944 econnp->conn_dgram_errind = lconnp->conn_dgram_errind;
2925 2945 econnp->conn_oobinline = lconnp->conn_oobinline;
2926 2946 econnp->conn_debug = lconnp->conn_debug;
2927 2947 econnp->conn_keepalive = lconnp->conn_keepalive;
2928 2948 econnp->conn_linger = lconnp->conn_linger;
2929 2949 econnp->conn_lingertime = lconnp->conn_lingertime;
2930 2950
2931 2951 /* Set the IP options */
2932 2952 econnp->conn_broadcast = lconnp->conn_broadcast;
2933 2953 econnp->conn_useloopback = lconnp->conn_useloopback;
2934 2954 econnp->conn_reuseaddr = lconnp->conn_reuseaddr;
2935 2955 return (0);
2936 2956 }
|
↓ open down ↓ |
1736 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX