Print this page
| Split |
Close |
| Expand all |
| Collapse all |
--- old/usr/src/uts/common/inet/ip/conn_opt.c
+++ new/usr/src/uts/common/inet/ip/conn_opt.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21
22 22 /*
23 23 * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
24 24 * Copyright 2016 Joyent, Inc.
25 25 */
26 26 /* Copyright (c) 1990 Mentat Inc. */
27 27
28 28 #include <sys/types.h>
29 29 #include <sys/stream.h>
30 30 #include <sys/strsun.h>
31 31 #define _SUN_TPI_VERSION 2
32 32 #include <sys/tihdr.h>
33 33 #include <sys/xti_inet.h>
34 34 #include <sys/ucred.h>
35 35 #include <sys/zone.h>
36 36 #include <sys/ddi.h>
37 37 #include <sys/sunddi.h>
38 38 #include <sys/cmn_err.h>
39 39 #include <sys/debug.h>
40 40 #include <sys/atomic.h>
41 41 #include <sys/policy.h>
42 42
43 43 #include <sys/systm.h>
44 44 #include <sys/param.h>
45 45 #include <sys/kmem.h>
46 46 #include <sys/sdt.h>
47 47 #include <sys/socket.h>
48 48 #include <sys/ethernet.h>
49 49 #include <sys/mac.h>
50 50 #include <net/if.h>
51 51 #include <net/if_types.h>
52 52 #include <net/if_arp.h>
53 53 #include <net/route.h>
54 54 #include <sys/sockio.h>
55 55 #include <netinet/in.h>
56 56 #include <net/if_dl.h>
57 57
58 58 #include <inet/common.h>
59 59 #include <inet/mi.h>
60 60 #include <inet/mib2.h>
61 61 #include <inet/nd.h>
62 62 #include <inet/arp.h>
63 63 #include <inet/snmpcom.h>
64 64 #include <inet/kstatcom.h>
65 65
66 66 #include <netinet/igmp_var.h>
67 67 #include <netinet/ip6.h>
68 68 #include <netinet/icmp6.h>
69 69 #include <netinet/sctp.h>
70 70
71 71 #include <inet/ip.h>
72 72 #include <inet/ip_impl.h>
73 73 #include <inet/ip6.h>
74 74 #include <inet/ip6_asp.h>
75 75 #include <inet/tcp.h>
76 76 #include <inet/ip_multi.h>
77 77 #include <inet/ip_if.h>
78 78 #include <inet/ip_ire.h>
79 79 #include <inet/ip_ftable.h>
80 80 #include <inet/ip_rts.h>
81 81 #include <inet/optcom.h>
82 82 #include <inet/ip_ndp.h>
83 83 #include <inet/ip_listutils.h>
84 84 #include <netinet/igmp.h>
85 85 #include <netinet/ip_mroute.h>
86 86 #include <netinet/udp.h>
87 87 #include <inet/ipp_common.h>
88 88
89 89 #include <net/pfkeyv2.h>
90 90 #include <inet/sadb.h>
91 91 #include <inet/ipsec_impl.h>
92 92 #include <inet/ipdrop.h>
93 93 #include <inet/ip_netinfo.h>
94 94
95 95 #include <inet/ipclassifier.h>
96 96 #include <inet/sctp_ip.h>
97 97 #include <inet/sctp/sctp_impl.h>
98 98 #include <inet/udp_impl.h>
99 99 #include <sys/sunddi.h>
100 100
101 101 #include <sys/tsol/label.h>
102 102 #include <sys/tsol/tnet.h>
103 103
104 104 /*
105 105 * Return how much size is needed for the different ancillary data items
106 106 */
107 107 uint_t
108 108 conn_recvancillary_size(conn_t *connp, crb_t recv_ancillary,
109 109 ip_recv_attr_t *ira, mblk_t *mp, ip_pkt_t *ipp)
110 110 {
111 111 uint_t ancil_size;
112 112 ip_stack_t *ipst = connp->conn_netstack->netstack_ip;
113 113
114 114 /*
115 115 * If IP_RECVDSTADDR is set we include the destination IP
116 116 * address as an option. With IP_RECVOPTS we include all
117 117 * the IP options.
118 118 */
119 119 ancil_size = 0;
120 120 if (recv_ancillary.crb_recvdstaddr &&
121 121 (ira->ira_flags & IRAF_IS_IPV4)) {
122 122 ancil_size += sizeof (struct T_opthdr) +
123 123 sizeof (struct in_addr);
124 124 IP_STAT(ipst, conn_in_recvdstaddr);
125 125 }
126 126
127 127 /*
128 128 * ip_recvpktinfo is used for both AF_INET and AF_INET6 but
129 129 * are different
130 130 */
131 131 if (recv_ancillary.crb_ip_recvpktinfo &&
132 132 connp->conn_family == AF_INET) {
133 133 ancil_size += sizeof (struct T_opthdr) +
134 134 sizeof (struct in_pktinfo);
135 135 IP_STAT(ipst, conn_in_recvpktinfo);
136 136 }
137 137
138 138 if ((recv_ancillary.crb_recvopts) &&
139 139 (ipp->ipp_fields & IPPF_IPV4_OPTIONS)) {
140 140 ancil_size += sizeof (struct T_opthdr) +
141 141 ipp->ipp_ipv4_options_len;
142 142 IP_STAT(ipst, conn_in_recvopts);
143 143 }
144 144
145 145 if (recv_ancillary.crb_recvslla) {
146 146 ip_stack_t *ipst = connp->conn_netstack->netstack_ip;
147 147 ill_t *ill;
148 148
149 149 /* Make sure ira_l2src is setup if not already */
150 150 if (!(ira->ira_flags & IRAF_L2SRC_SET)) {
151 151 ill = ill_lookup_on_ifindex(ira->ira_rifindex, B_FALSE,
152 152 ipst);
153 153 if (ill != NULL) {
154 154 ip_setl2src(mp, ira, ill);
155 155 ill_refrele(ill);
156 156 }
157 157 }
158 158 ancil_size += sizeof (struct T_opthdr) +
159 159 sizeof (struct sockaddr_dl);
160 160 IP_STAT(ipst, conn_in_recvslla);
161 161 }
162 162
163 163 if (recv_ancillary.crb_recvif) {
164 164 ancil_size += sizeof (struct T_opthdr) + sizeof (uint_t);
165 165 IP_STAT(ipst, conn_in_recvif);
166 166 }
167 167
168 168 /*
169 169 * ip_recvpktinfo is used for both AF_INET and AF_INET6 but
170 170 * are different
171 171 */
172 172 if (recv_ancillary.crb_ip_recvpktinfo &&
173 173 connp->conn_family == AF_INET6) {
174 174 ancil_size += sizeof (struct T_opthdr) +
175 175 sizeof (struct in6_pktinfo);
176 176 IP_STAT(ipst, conn_in_recvpktinfo);
177 177 }
178 178
179 179 if (recv_ancillary.crb_ipv6_recvhoplimit) {
180 180 ancil_size += sizeof (struct T_opthdr) + sizeof (int);
181 181 IP_STAT(ipst, conn_in_recvhoplimit);
182 182 }
183 183
184 184 if (recv_ancillary.crb_ipv6_recvtclass) {
185 185 ancil_size += sizeof (struct T_opthdr) + sizeof (int);
186 186 IP_STAT(ipst, conn_in_recvtclass);
187 187 }
188 188
189 189 if (recv_ancillary.crb_ipv6_recvhopopts &&
190 190 (ipp->ipp_fields & IPPF_HOPOPTS)) {
191 191 ancil_size += sizeof (struct T_opthdr) + ipp->ipp_hopoptslen;
192 192 IP_STAT(ipst, conn_in_recvhopopts);
193 193 }
194 194 /*
195 195 * To honor RFC3542 when an application asks for both IPV6_RECVDSTOPTS
196 196 * and IPV6_RECVRTHDR, we pass up the item rthdrdstopts (the destination
197 197 * options that appear before a routing header.
198 198 * We also pass them up if IPV6_RECVRTHDRDSTOPTS is set.
199 199 */
200 200 if (ipp->ipp_fields & IPPF_RTHDRDSTOPTS) {
201 201 if (recv_ancillary.crb_ipv6_recvrthdrdstopts ||
202 202 (recv_ancillary.crb_ipv6_recvdstopts &&
203 203 recv_ancillary.crb_ipv6_recvrthdr)) {
204 204 ancil_size += sizeof (struct T_opthdr) +
205 205 ipp->ipp_rthdrdstoptslen;
206 206 IP_STAT(ipst, conn_in_recvrthdrdstopts);
207 207 }
208 208 }
209 209 if ((recv_ancillary.crb_ipv6_recvrthdr) &&
210 210 (ipp->ipp_fields & IPPF_RTHDR)) {
211 211 ancil_size += sizeof (struct T_opthdr) + ipp->ipp_rthdrlen;
212 212 IP_STAT(ipst, conn_in_recvrthdr);
213 213 }
214 214 if ((recv_ancillary.crb_ipv6_recvdstopts ||
215 215 recv_ancillary.crb_old_ipv6_recvdstopts) &&
216 216 (ipp->ipp_fields & IPPF_DSTOPTS)) {
217 217 ancil_size += sizeof (struct T_opthdr) + ipp->ipp_dstoptslen;
218 218 IP_STAT(ipst, conn_in_recvdstopts);
219 219 }
220 220 if (recv_ancillary.crb_recvucred && ira->ira_cred != NULL) {
221 221 ancil_size += sizeof (struct T_opthdr) +
222 222 ucredminsize(ira->ira_cred);
223 223 IP_STAT(ipst, conn_in_recvucred);
224 224 }
225 225
226 226 /*
227 227 * If SO_TIMESTAMP is set allocate the appropriate sized
228 228 * buffer. Since gethrestime() expects a pointer aligned
229 229 * argument, we allocate space necessary for extra
230 230 * alignment (even though it might not be used).
231 231 */
232 232 if (recv_ancillary.crb_timestamp) {
233 233 ancil_size += sizeof (struct T_opthdr) +
234 234 sizeof (timestruc_t) + _POINTER_ALIGNMENT;
235 235 IP_STAT(ipst, conn_in_timestamp);
236 236 }
237 237
238 238 /*
239 239 * If IP_RECVTTL is set allocate the appropriate sized buffer
240 240 */
241 241 if (recv_ancillary.crb_recvttl &&
242 242 (ira->ira_flags & IRAF_IS_IPV4)) {
243 243 ancil_size += sizeof (struct T_opthdr) + sizeof (uint8_t);
244 244 IP_STAT(ipst, conn_in_recvttl);
245 245 }
246 246
247 247 return (ancil_size);
248 248 }
249 249
250 250 /*
251 251 * Lay down the ancillary data items at "ancil_buf".
252 252 * Assumes caller has used conn_recvancillary_size to allocate a sufficiently
253 253 * large buffer - ancil_size.
254 254 */
255 255 void
256 256 conn_recvancillary_add(conn_t *connp, crb_t recv_ancillary,
257 257 ip_recv_attr_t *ira, ip_pkt_t *ipp, uchar_t *ancil_buf, uint_t ancil_size)
258 258 {
259 259 /*
260 260 * Copy in destination address before options to avoid
261 261 * any padding issues.
262 262 */
263 263 if (recv_ancillary.crb_recvdstaddr &&
264 264 (ira->ira_flags & IRAF_IS_IPV4)) {
265 265 struct T_opthdr *toh;
266 266 ipaddr_t *dstptr;
267 267
268 268 toh = (struct T_opthdr *)ancil_buf;
269 269 toh->level = IPPROTO_IP;
270 270 toh->name = IP_RECVDSTADDR;
271 271 toh->len = sizeof (struct T_opthdr) + sizeof (ipaddr_t);
272 272 toh->status = 0;
273 273 ancil_buf += sizeof (struct T_opthdr);
274 274 dstptr = (ipaddr_t *)ancil_buf;
275 275 *dstptr = ipp->ipp_addr_v4;
276 276 ancil_buf += sizeof (ipaddr_t);
277 277 ancil_size -= toh->len;
278 278 }
279 279
280 280 /*
281 281 * ip_recvpktinfo is used for both AF_INET and AF_INET6 but
282 282 * are different
283 283 */
284 284 if (recv_ancillary.crb_ip_recvpktinfo &&
285 285 connp->conn_family == AF_INET) {
286 286 ip_stack_t *ipst = connp->conn_netstack->netstack_ip;
287 287 struct T_opthdr *toh;
288 288 struct in_pktinfo *pktinfop;
289 289 ill_t *ill;
290 290 ipif_t *ipif;
291 291
292 292 toh = (struct T_opthdr *)ancil_buf;
293 293 toh->level = IPPROTO_IP;
294 294 toh->name = IP_PKTINFO;
295 295 toh->len = sizeof (struct T_opthdr) + sizeof (*pktinfop);
296 296 toh->status = 0;
297 297 ancil_buf += sizeof (struct T_opthdr);
298 298 pktinfop = (struct in_pktinfo *)ancil_buf;
299 299
300 300 pktinfop->ipi_ifindex = ira->ira_ruifindex;
301 301 pktinfop->ipi_spec_dst.s_addr = INADDR_ANY;
302 302
303 303 /* Find a good address to report */
304 304 ill = ill_lookup_on_ifindex(ira->ira_ruifindex, B_FALSE, ipst);
305 305 if (ill != NULL) {
306 306 ipif = ipif_good_addr(ill, IPCL_ZONEID(connp));
307 307 if (ipif != NULL) {
308 308 pktinfop->ipi_spec_dst.s_addr =
309 309 ipif->ipif_lcl_addr;
310 310 ipif_refrele(ipif);
311 311 }
312 312 ill_refrele(ill);
313 313 }
314 314 pktinfop->ipi_addr.s_addr = ipp->ipp_addr_v4;
315 315 ancil_buf += sizeof (struct in_pktinfo);
316 316 ancil_size -= toh->len;
317 317 }
318 318
319 319 if ((recv_ancillary.crb_recvopts) &&
320 320 (ipp->ipp_fields & IPPF_IPV4_OPTIONS)) {
321 321 struct T_opthdr *toh;
322 322
323 323 toh = (struct T_opthdr *)ancil_buf;
324 324 toh->level = IPPROTO_IP;
325 325 toh->name = IP_RECVOPTS;
326 326 toh->len = sizeof (struct T_opthdr) + ipp->ipp_ipv4_options_len;
327 327 toh->status = 0;
328 328 ancil_buf += sizeof (struct T_opthdr);
329 329 bcopy(ipp->ipp_ipv4_options, ancil_buf,
330 330 ipp->ipp_ipv4_options_len);
331 331 ancil_buf += ipp->ipp_ipv4_options_len;
332 332 ancil_size -= toh->len;
333 333 }
334 334
335 335 if (recv_ancillary.crb_recvslla) {
336 336 ip_stack_t *ipst = connp->conn_netstack->netstack_ip;
337 337 struct T_opthdr *toh;
338 338 struct sockaddr_dl *dstptr;
339 339 ill_t *ill;
340 340 int alen = 0;
341 341
342 342 ill = ill_lookup_on_ifindex(ira->ira_rifindex, B_FALSE, ipst);
343 343 if (ill != NULL)
344 344 alen = ill->ill_phys_addr_length;
345 345
346 346 /*
347 347 * For loopback multicast and broadcast the packet arrives
348 348 * with ira_ruifdex being the physical interface, but
349 349 * ira_l2src is all zero since ip_postfrag_loopback doesn't
350 350 * know our l2src. We don't report the address in that case.
351 351 */
352 352 if (ira->ira_flags & IRAF_LOOPBACK)
353 353 alen = 0;
354 354
355 355 toh = (struct T_opthdr *)ancil_buf;
356 356 toh->level = IPPROTO_IP;
357 357 toh->name = IP_RECVSLLA;
358 358 toh->len = sizeof (struct T_opthdr) +
359 359 sizeof (struct sockaddr_dl);
360 360 toh->status = 0;
361 361 ancil_buf += sizeof (struct T_opthdr);
362 362 dstptr = (struct sockaddr_dl *)ancil_buf;
363 363 dstptr->sdl_family = AF_LINK;
364 364 dstptr->sdl_index = ira->ira_ruifindex;
365 365 if (ill != NULL)
366 366 dstptr->sdl_type = ill->ill_type;
367 367 else
368 368 dstptr->sdl_type = 0;
369 369 dstptr->sdl_nlen = 0;
370 370 dstptr->sdl_alen = alen;
371 371 dstptr->sdl_slen = 0;
372 372 bcopy(ira->ira_l2src, dstptr->sdl_data, alen);
373 373 ancil_buf += sizeof (struct sockaddr_dl);
374 374 ancil_size -= toh->len;
375 375 if (ill != NULL)
376 376 ill_refrele(ill);
377 377 }
378 378
379 379 if (recv_ancillary.crb_recvif) {
380 380 struct T_opthdr *toh;
381 381 uint_t *dstptr;
382 382
383 383 toh = (struct T_opthdr *)ancil_buf;
384 384 toh->level = IPPROTO_IP;
385 385 toh->name = IP_RECVIF;
386 386 toh->len = sizeof (struct T_opthdr) + sizeof (uint_t);
387 387 toh->status = 0;
388 388 ancil_buf += sizeof (struct T_opthdr);
389 389 dstptr = (uint_t *)ancil_buf;
390 390 *dstptr = ira->ira_ruifindex;
391 391 ancil_buf += sizeof (uint_t);
392 392 ancil_size -= toh->len;
393 393 }
394 394
395 395 /*
396 396 * ip_recvpktinfo is used for both AF_INET and AF_INET6 but
397 397 * are different
398 398 */
399 399 if (recv_ancillary.crb_ip_recvpktinfo &&
400 400 connp->conn_family == AF_INET6) {
401 401 struct T_opthdr *toh;
402 402 struct in6_pktinfo *pkti;
403 403
404 404 toh = (struct T_opthdr *)ancil_buf;
405 405 toh->level = IPPROTO_IPV6;
406 406 toh->name = IPV6_PKTINFO;
407 407 toh->len = sizeof (struct T_opthdr) + sizeof (*pkti);
408 408 toh->status = 0;
409 409 ancil_buf += sizeof (struct T_opthdr);
410 410 pkti = (struct in6_pktinfo *)ancil_buf;
411 411 if (ira->ira_flags & IRAF_IS_IPV4) {
412 412 IN6_IPADDR_TO_V4MAPPED(ipp->ipp_addr_v4,
413 413 &pkti->ipi6_addr);
414 414 } else {
415 415 pkti->ipi6_addr = ipp->ipp_addr;
416 416 }
417 417 pkti->ipi6_ifindex = ira->ira_ruifindex;
418 418
419 419 ancil_buf += sizeof (*pkti);
420 420 ancil_size -= toh->len;
421 421 }
422 422 if (recv_ancillary.crb_ipv6_recvhoplimit) {
423 423 struct T_opthdr *toh;
424 424
425 425 toh = (struct T_opthdr *)ancil_buf;
426 426 toh->level = IPPROTO_IPV6;
427 427 toh->name = IPV6_HOPLIMIT;
428 428 toh->len = sizeof (struct T_opthdr) + sizeof (uint_t);
429 429 toh->status = 0;
430 430 ancil_buf += sizeof (struct T_opthdr);
431 431 *(uint_t *)ancil_buf = ipp->ipp_hoplimit;
432 432 ancil_buf += sizeof (uint_t);
433 433 ancil_size -= toh->len;
434 434 }
435 435 if (recv_ancillary.crb_ipv6_recvtclass) {
436 436 struct T_opthdr *toh;
437 437
438 438 toh = (struct T_opthdr *)ancil_buf;
439 439 toh->level = IPPROTO_IPV6;
440 440 toh->name = IPV6_TCLASS;
441 441 toh->len = sizeof (struct T_opthdr) + sizeof (uint_t);
442 442 toh->status = 0;
443 443 ancil_buf += sizeof (struct T_opthdr);
444 444
445 445 if (ira->ira_flags & IRAF_IS_IPV4)
446 446 *(uint_t *)ancil_buf = ipp->ipp_type_of_service;
447 447 else
448 448 *(uint_t *)ancil_buf = ipp->ipp_tclass;
449 449 ancil_buf += sizeof (uint_t);
450 450 ancil_size -= toh->len;
451 451 }
452 452 if (recv_ancillary.crb_ipv6_recvhopopts &&
453 453 (ipp->ipp_fields & IPPF_HOPOPTS)) {
454 454 struct T_opthdr *toh;
455 455
456 456 toh = (struct T_opthdr *)ancil_buf;
457 457 toh->level = IPPROTO_IPV6;
458 458 toh->name = IPV6_HOPOPTS;
459 459 toh->len = sizeof (struct T_opthdr) + ipp->ipp_hopoptslen;
460 460 toh->status = 0;
461 461 ancil_buf += sizeof (struct T_opthdr);
462 462 bcopy(ipp->ipp_hopopts, ancil_buf, ipp->ipp_hopoptslen);
463 463 ancil_buf += ipp->ipp_hopoptslen;
464 464 ancil_size -= toh->len;
465 465 }
466 466 /*
467 467 * To honor RFC3542 when an application asks for both IPV6_RECVDSTOPTS
468 468 * and IPV6_RECVRTHDR, we pass up the item rthdrdstopts (the destination
469 469 * options that appear before a routing header.
470 470 * We also pass them up if IPV6_RECVRTHDRDSTOPTS is set.
471 471 */
472 472 if (ipp->ipp_fields & IPPF_RTHDRDSTOPTS) {
473 473 if (recv_ancillary.crb_ipv6_recvrthdrdstopts ||
474 474 (recv_ancillary.crb_ipv6_recvdstopts &&
475 475 recv_ancillary.crb_ipv6_recvrthdr)) {
476 476 struct T_opthdr *toh;
477 477
478 478 toh = (struct T_opthdr *)ancil_buf;
479 479 toh->level = IPPROTO_IPV6;
480 480 toh->name = IPV6_DSTOPTS;
481 481 toh->len = sizeof (struct T_opthdr) +
482 482 ipp->ipp_rthdrdstoptslen;
483 483 toh->status = 0;
484 484 ancil_buf += sizeof (struct T_opthdr);
485 485 bcopy(ipp->ipp_rthdrdstopts, ancil_buf,
486 486 ipp->ipp_rthdrdstoptslen);
487 487 ancil_buf += ipp->ipp_rthdrdstoptslen;
488 488 ancil_size -= toh->len;
489 489 }
490 490 }
491 491 if (recv_ancillary.crb_ipv6_recvrthdr &&
492 492 (ipp->ipp_fields & IPPF_RTHDR)) {
493 493 struct T_opthdr *toh;
494 494
495 495 toh = (struct T_opthdr *)ancil_buf;
496 496 toh->level = IPPROTO_IPV6;
497 497 toh->name = IPV6_RTHDR;
498 498 toh->len = sizeof (struct T_opthdr) + ipp->ipp_rthdrlen;
499 499 toh->status = 0;
500 500 ancil_buf += sizeof (struct T_opthdr);
501 501 bcopy(ipp->ipp_rthdr, ancil_buf, ipp->ipp_rthdrlen);
502 502 ancil_buf += ipp->ipp_rthdrlen;
503 503 ancil_size -= toh->len;
504 504 }
505 505 if ((recv_ancillary.crb_ipv6_recvdstopts ||
506 506 recv_ancillary.crb_old_ipv6_recvdstopts) &&
507 507 (ipp->ipp_fields & IPPF_DSTOPTS)) {
508 508 struct T_opthdr *toh;
509 509
510 510 toh = (struct T_opthdr *)ancil_buf;
511 511 toh->level = IPPROTO_IPV6;
512 512 toh->name = IPV6_DSTOPTS;
513 513 toh->len = sizeof (struct T_opthdr) + ipp->ipp_dstoptslen;
514 514 toh->status = 0;
515 515 ancil_buf += sizeof (struct T_opthdr);
516 516 bcopy(ipp->ipp_dstopts, ancil_buf, ipp->ipp_dstoptslen);
517 517 ancil_buf += ipp->ipp_dstoptslen;
518 518 ancil_size -= toh->len;
519 519 }
520 520
521 521 if (recv_ancillary.crb_recvucred && ira->ira_cred != NULL) {
522 522 struct T_opthdr *toh;
523 523 cred_t *rcr = connp->conn_cred;
524 524
525 525 toh = (struct T_opthdr *)ancil_buf;
526 526 toh->level = SOL_SOCKET;
527 527 toh->name = SCM_UCRED;
528 528 toh->len = sizeof (struct T_opthdr) +
529 529 ucredminsize(ira->ira_cred);
530 530 toh->status = 0;
531 531 (void) cred2ucred(ira->ira_cred, ira->ira_cpid, &toh[1], rcr);
532 532 ancil_buf += toh->len;
533 533 ancil_size -= toh->len;
534 534 }
535 535 if (recv_ancillary.crb_timestamp) {
536 536 struct T_opthdr *toh;
537 537
538 538 toh = (struct T_opthdr *)ancil_buf;
539 539 toh->level = SOL_SOCKET;
540 540 toh->name = SCM_TIMESTAMP;
541 541 toh->len = sizeof (struct T_opthdr) +
542 542 sizeof (timestruc_t) + _POINTER_ALIGNMENT;
543 543 toh->status = 0;
544 544 ancil_buf += sizeof (struct T_opthdr);
545 545 /* Align for gethrestime() */
546 546 ancil_buf = (uchar_t *)P2ROUNDUP((intptr_t)ancil_buf,
547 547 sizeof (intptr_t));
548 548 gethrestime((timestruc_t *)ancil_buf);
549 549 ancil_buf = (uchar_t *)toh + toh->len;
550 550 ancil_size -= toh->len;
551 551 }
552 552
553 553 /*
554 554 * CAUTION:
555 555 * Due to aligment issues
556 556 * Processing of IP_RECVTTL option
557 557 * should always be the last. Adding
558 558 * any option processing after this will
559 559 * cause alignment panic.
560 560 */
561 561 if (recv_ancillary.crb_recvttl &&
562 562 (ira->ira_flags & IRAF_IS_IPV4)) {
563 563 struct T_opthdr *toh;
564 564 uint8_t *dstptr;
565 565
566 566 toh = (struct T_opthdr *)ancil_buf;
567 567 toh->level = IPPROTO_IP;
568 568 toh->name = IP_RECVTTL;
569 569 toh->len = sizeof (struct T_opthdr) + sizeof (uint8_t);
570 570 toh->status = 0;
571 571 ancil_buf += sizeof (struct T_opthdr);
572 572 dstptr = (uint8_t *)ancil_buf;
573 573 *dstptr = ipp->ipp_hoplimit;
574 574 ancil_buf += sizeof (uint8_t);
575 575 ancil_size -= toh->len;
576 576 }
577 577
578 578 /* Consumed all of allocated space */
579 579 ASSERT(ancil_size == 0);
580 580
581 581 }
582 582
583 583 /*
584 584 * This routine retrieves the current status of socket options.
585 585 * It returns the size of the option retrieved, or -1.
586 586 */
587 587 int
588 588 conn_opt_get(conn_opt_arg_t *coa, t_scalar_t level, t_scalar_t name,
589 589 uchar_t *ptr)
590 590 {
591 591 int *i1 = (int *)ptr;
592 592 conn_t *connp = coa->coa_connp;
593 593 ip_xmit_attr_t *ixa = coa->coa_ixa;
594 594 ip_pkt_t *ipp = coa->coa_ipp;
595 595 ip_stack_t *ipst = ixa->ixa_ipst;
596 596 uint_t len;
597 597
598 598 ASSERT(MUTEX_HELD(&coa->coa_connp->conn_lock));
599 599
600 600 switch (level) {
601 601 case SOL_SOCKET:
602 602 switch (name) {
603 603 case SO_DEBUG:
604 604 *i1 = connp->conn_debug ? SO_DEBUG : 0;
605 605 break; /* goto sizeof (int) option return */
606 606 case SO_KEEPALIVE:
607 607 *i1 = connp->conn_keepalive ? SO_KEEPALIVE : 0;
608 608 break;
609 609 case SO_LINGER: {
610 610 struct linger *lgr = (struct linger *)ptr;
611 611
612 612 lgr->l_onoff = connp->conn_linger ? SO_LINGER : 0;
613 613 lgr->l_linger = connp->conn_lingertime;
614 614 }
615 615 return (sizeof (struct linger));
616 616
617 617 case SO_OOBINLINE:
618 618 *i1 = connp->conn_oobinline ? SO_OOBINLINE : 0;
619 619 break;
620 620 case SO_REUSEADDR:
621 621 *i1 = connp->conn_reuseaddr ? SO_REUSEADDR : 0;
622 622 break; /* goto sizeof (int) option return */
623 623 case SO_REUSEPORT:
624 624 *i1 = connp->conn_reuseport;
625 625 break; /* goto sizeof (int) option return */
626 626 case SO_TYPE:
627 627 *i1 = connp->conn_so_type;
628 628 break; /* goto sizeof (int) option return */
629 629 case SO_DONTROUTE:
630 630 *i1 = (ixa->ixa_flags & IXAF_DONTROUTE) ?
631 631 SO_DONTROUTE : 0;
632 632 break; /* goto sizeof (int) option return */
633 633 case SO_USELOOPBACK:
634 634 *i1 = connp->conn_useloopback ? SO_USELOOPBACK : 0;
635 635 break; /* goto sizeof (int) option return */
636 636 case SO_BROADCAST:
637 637 *i1 = connp->conn_broadcast ? SO_BROADCAST : 0;
638 638 break; /* goto sizeof (int) option return */
639 639
640 640 case SO_SNDBUF:
641 641 *i1 = connp->conn_sndbuf;
642 642 break; /* goto sizeof (int) option return */
643 643 case SO_RCVBUF:
644 644 *i1 = connp->conn_rcvbuf;
645 645 break; /* goto sizeof (int) option return */
646 646 case SO_RCVTIMEO:
647 647 case SO_SNDTIMEO:
648 648 /*
649 649 * Pass these two options in order for third part
650 650 * protocol usage. Here just return directly.
651 651 */
652 652 *i1 = 0;
653 653 break;
654 654 case SO_DGRAM_ERRIND:
655 655 *i1 = connp->conn_dgram_errind ? SO_DGRAM_ERRIND : 0;
656 656 break; /* goto sizeof (int) option return */
657 657 case SO_RECVUCRED:
658 658 *i1 = connp->conn_recv_ancillary.crb_recvucred;
659 659 break; /* goto sizeof (int) option return */
660 660 case SO_TIMESTAMP:
661 661 *i1 = connp->conn_recv_ancillary.crb_timestamp;
662 662 break; /* goto sizeof (int) option return */
663 663 case SO_VRRP:
664 664 *i1 = connp->conn_isvrrp;
665 665 break; /* goto sizeof (int) option return */
666 666 case SO_ANON_MLP:
667 667 *i1 = connp->conn_anon_mlp;
668 668 break; /* goto sizeof (int) option return */
669 669 case SO_MAC_EXEMPT:
670 670 *i1 = (connp->conn_mac_mode == CONN_MAC_AWARE);
671 671 break; /* goto sizeof (int) option return */
672 672 case SO_MAC_IMPLICIT:
673 673 *i1 = (connp->conn_mac_mode == CONN_MAC_IMPLICIT);
674 674 break; /* goto sizeof (int) option return */
675 675 case SO_ALLZONES:
676 676 *i1 = connp->conn_allzones;
677 677 break; /* goto sizeof (int) option return */
678 678 case SO_EXCLBIND:
679 679 *i1 = connp->conn_exclbind ? SO_EXCLBIND : 0;
680 680 break;
681 681 case SO_PROTOTYPE:
682 682 *i1 = connp->conn_proto;
683 683 break;
684 684
685 685 case SO_DOMAIN:
686 686 *i1 = connp->conn_family;
687 687 break;
688 688 default:
689 689 return (-1);
690 690 }
691 691 break;
692 692 case IPPROTO_IP:
693 693 if (connp->conn_family != AF_INET)
694 694 return (-1);
695 695 switch (name) {
696 696 case IP_OPTIONS:
697 697 case T_IP_OPTIONS:
698 698 if (!(ipp->ipp_fields & IPPF_IPV4_OPTIONS))
699 699 return (0);
700 700
701 701 len = ipp->ipp_ipv4_options_len;
702 702 if (len > 0) {
703 703 bcopy(ipp->ipp_ipv4_options, ptr, len);
704 704 }
705 705 return (len);
706 706
707 707 case IP_PKTINFO: {
708 708 /*
709 709 * This also handles IP_RECVPKTINFO.
710 710 * IP_PKTINFO and IP_RECVPKTINFO have same value.
711 711 * Differentiation is based on the size of the
712 712 * argument passed in.
713 713 */
714 714 struct in_pktinfo *pktinfo;
715 715
716 716 #ifdef notdef
717 717 /* optcom doesn't provide a length with "get" */
718 718 if (inlen == sizeof (int)) {
719 719 /* This is IP_RECVPKTINFO option. */
720 720 *i1 = connp->conn_recv_ancillary.
721 721 crb_ip_recvpktinfo;
722 722 return (sizeof (int));
723 723 }
724 724 #endif
725 725 /* XXX assumes that caller has room for max size! */
726 726
727 727 pktinfo = (struct in_pktinfo *)ptr;
728 728 pktinfo->ipi_ifindex = ixa->ixa_ifindex;
729 729 if (ipp->ipp_fields & IPPF_ADDR)
730 730 pktinfo->ipi_spec_dst.s_addr = ipp->ipp_addr_v4;
731 731 else
732 732 pktinfo->ipi_spec_dst.s_addr = INADDR_ANY;
733 733 return (sizeof (struct in_pktinfo));
734 734 }
735 735 case IP_DONTFRAG:
736 736 *i1 = (ixa->ixa_flags & IXAF_DONTFRAG) != 0;
737 737 return (sizeof (int));
738 738 case IP_TOS:
739 739 case T_IP_TOS:
740 740 *i1 = (int)ipp->ipp_type_of_service;
741 741 break; /* goto sizeof (int) option return */
742 742 case IP_TTL:
743 743 *i1 = (int)ipp->ipp_unicast_hops;
744 744 break; /* goto sizeof (int) option return */
745 745 case IP_DHCPINIT_IF:
746 746 return (-1);
747 747 case IP_NEXTHOP:
748 748 if (ixa->ixa_flags & IXAF_NEXTHOP_SET) {
749 749 *(ipaddr_t *)ptr = ixa->ixa_nexthop_v4;
750 750 return (sizeof (ipaddr_t));
751 751 } else {
752 752 return (0);
753 753 }
754 754
755 755 case IP_MULTICAST_IF:
756 756 /* 0 address if not set */
757 757 *(ipaddr_t *)ptr = ixa->ixa_multicast_ifaddr;
758 758 return (sizeof (ipaddr_t));
759 759 case IP_MULTICAST_TTL:
760 760 *(uchar_t *)ptr = ixa->ixa_multicast_ttl;
761 761 return (sizeof (uchar_t));
762 762 case IP_MULTICAST_LOOP:
763 763 *ptr = (ixa->ixa_flags & IXAF_MULTICAST_LOOP) ? 1 : 0;
764 764 return (sizeof (uint8_t));
765 765 case IP_RECVOPTS:
766 766 *i1 = connp->conn_recv_ancillary.crb_recvopts;
767 767 break; /* goto sizeof (int) option return */
768 768 case IP_RECVDSTADDR:
769 769 *i1 = connp->conn_recv_ancillary.crb_recvdstaddr;
770 770 break; /* goto sizeof (int) option return */
771 771 case IP_RECVIF:
772 772 *i1 = connp->conn_recv_ancillary.crb_recvif;
773 773 break; /* goto sizeof (int) option return */
774 774 case IP_RECVSLLA:
775 775 *i1 = connp->conn_recv_ancillary.crb_recvslla;
776 776 break; /* goto sizeof (int) option return */
777 777 case IP_RECVTTL:
778 778 *i1 = connp->conn_recv_ancillary.crb_recvttl;
779 779 break; /* goto sizeof (int) option return */
780 780 case IP_ADD_MEMBERSHIP:
781 781 case IP_DROP_MEMBERSHIP:
782 782 case MCAST_JOIN_GROUP:
783 783 case MCAST_LEAVE_GROUP:
784 784 case IP_BLOCK_SOURCE:
785 785 case IP_UNBLOCK_SOURCE:
786 786 case IP_ADD_SOURCE_MEMBERSHIP:
787 787 case IP_DROP_SOURCE_MEMBERSHIP:
788 788 case MCAST_BLOCK_SOURCE:
789 789 case MCAST_UNBLOCK_SOURCE:
790 790 case MCAST_JOIN_SOURCE_GROUP:
791 791 case MCAST_LEAVE_SOURCE_GROUP:
792 792 case MRT_INIT:
793 793 case MRT_DONE:
794 794 case MRT_ADD_VIF:
795 795 case MRT_DEL_VIF:
796 796 case MRT_ADD_MFC:
797 797 case MRT_DEL_MFC:
798 798 /* cannot "get" the value for these */
799 799 return (-1);
800 800 case MRT_VERSION:
801 801 case MRT_ASSERT:
802 802 (void) ip_mrouter_get(name, connp, ptr);
803 803 return (sizeof (int));
804 804 case IP_SEC_OPT:
805 805 return (ipsec_req_from_conn(connp, (ipsec_req_t *)ptr,
806 806 IPSEC_AF_V4));
807 807 case IP_BOUND_IF:
808 808 /* Zero if not set */
809 809 *i1 = connp->conn_bound_if;
810 810 break; /* goto sizeof (int) option return */
811 811 case IP_UNSPEC_SRC:
812 812 *i1 = connp->conn_unspec_src;
813 813 break; /* goto sizeof (int) option return */
814 814 case IP_BROADCAST_TTL:
815 815 if (ixa->ixa_flags & IXAF_BROADCAST_TTL_SET)
816 816 *(uchar_t *)ptr = ixa->ixa_broadcast_ttl;
817 817 else
818 818 *(uchar_t *)ptr = ipst->ips_ip_broadcast_ttl;
819 819 return (sizeof (uchar_t));
820 820 default:
821 821 return (-1);
822 822 }
823 823 break;
824 824 case IPPROTO_IPV6:
825 825 if (connp->conn_family != AF_INET6)
826 826 return (-1);
827 827 switch (name) {
828 828 case IPV6_UNICAST_HOPS:
829 829 *i1 = (int)ipp->ipp_unicast_hops;
830 830 break; /* goto sizeof (int) option return */
831 831 case IPV6_MULTICAST_IF:
832 832 /* 0 index if not set */
833 833 *i1 = ixa->ixa_multicast_ifindex;
834 834 break; /* goto sizeof (int) option return */
835 835 case IPV6_MULTICAST_HOPS:
836 836 *i1 = ixa->ixa_multicast_ttl;
837 837 break; /* goto sizeof (int) option return */
838 838 case IPV6_MULTICAST_LOOP:
839 839 *i1 = (ixa->ixa_flags & IXAF_MULTICAST_LOOP) ? 1 : 0;
840 840 break; /* goto sizeof (int) option return */
841 841 case IPV6_JOIN_GROUP:
842 842 case IPV6_LEAVE_GROUP:
843 843 case MCAST_JOIN_GROUP:
844 844 case MCAST_LEAVE_GROUP:
845 845 case MCAST_BLOCK_SOURCE:
846 846 case MCAST_UNBLOCK_SOURCE:
847 847 case MCAST_JOIN_SOURCE_GROUP:
848 848 case MCAST_LEAVE_SOURCE_GROUP:
849 849 /* cannot "get" the value for these */
850 850 return (-1);
851 851 case IPV6_BOUND_IF:
852 852 /* Zero if not set */
853 853 *i1 = connp->conn_bound_if;
854 854 break; /* goto sizeof (int) option return */
855 855 case IPV6_UNSPEC_SRC:
856 856 *i1 = connp->conn_unspec_src;
857 857 break; /* goto sizeof (int) option return */
858 858 case IPV6_RECVPKTINFO:
859 859 *i1 = connp->conn_recv_ancillary.crb_ip_recvpktinfo;
860 860 break; /* goto sizeof (int) option return */
861 861 case IPV6_RECVTCLASS:
862 862 *i1 = connp->conn_recv_ancillary.crb_ipv6_recvtclass;
863 863 break; /* goto sizeof (int) option return */
864 864 case IPV6_RECVPATHMTU:
865 865 *i1 = connp->conn_ipv6_recvpathmtu;
866 866 break; /* goto sizeof (int) option return */
867 867 case IPV6_RECVHOPLIMIT:
868 868 *i1 = connp->conn_recv_ancillary.crb_ipv6_recvhoplimit;
869 869 break; /* goto sizeof (int) option return */
870 870 case IPV6_RECVHOPOPTS:
871 871 *i1 = connp->conn_recv_ancillary.crb_ipv6_recvhopopts;
872 872 break; /* goto sizeof (int) option return */
873 873 case IPV6_RECVDSTOPTS:
874 874 *i1 = connp->conn_recv_ancillary.crb_ipv6_recvdstopts;
875 875 break; /* goto sizeof (int) option return */
876 876 case _OLD_IPV6_RECVDSTOPTS:
877 877 *i1 =
878 878 connp->conn_recv_ancillary.crb_old_ipv6_recvdstopts;
879 879 break; /* goto sizeof (int) option return */
880 880 case IPV6_RECVRTHDRDSTOPTS:
881 881 *i1 = connp->conn_recv_ancillary.
882 882 crb_ipv6_recvrthdrdstopts;
883 883 break; /* goto sizeof (int) option return */
884 884 case IPV6_RECVRTHDR:
885 885 *i1 = connp->conn_recv_ancillary.crb_ipv6_recvrthdr;
886 886 break; /* goto sizeof (int) option return */
887 887 case IPV6_PKTINFO: {
888 888 /* XXX assumes that caller has room for max size! */
889 889 struct in6_pktinfo *pkti;
890 890
891 891 pkti = (struct in6_pktinfo *)ptr;
892 892 pkti->ipi6_ifindex = ixa->ixa_ifindex;
893 893 if (ipp->ipp_fields & IPPF_ADDR)
894 894 pkti->ipi6_addr = ipp->ipp_addr;
895 895 else
896 896 pkti->ipi6_addr = ipv6_all_zeros;
897 897 return (sizeof (struct in6_pktinfo));
898 898 }
899 899 case IPV6_TCLASS:
900 900 *i1 = ipp->ipp_tclass;
901 901 break; /* goto sizeof (int) option return */
902 902 case IPV6_NEXTHOP: {
903 903 sin6_t *sin6 = (sin6_t *)ptr;
904 904
905 905 if (ixa->ixa_flags & IXAF_NEXTHOP_SET)
906 906 return (0);
907 907
908 908 *sin6 = sin6_null;
909 909 sin6->sin6_family = AF_INET6;
910 910 sin6->sin6_addr = ixa->ixa_nexthop_v6;
911 911
912 912 return (sizeof (sin6_t));
913 913 }
914 914 case IPV6_HOPOPTS:
915 915 if (!(ipp->ipp_fields & IPPF_HOPOPTS))
916 916 return (0);
917 917 bcopy(ipp->ipp_hopopts, ptr,
918 918 ipp->ipp_hopoptslen);
919 919 return (ipp->ipp_hopoptslen);
920 920 case IPV6_RTHDRDSTOPTS:
921 921 if (!(ipp->ipp_fields & IPPF_RTHDRDSTOPTS))
922 922 return (0);
923 923 bcopy(ipp->ipp_rthdrdstopts, ptr,
924 924 ipp->ipp_rthdrdstoptslen);
925 925 return (ipp->ipp_rthdrdstoptslen);
926 926 case IPV6_RTHDR:
927 927 if (!(ipp->ipp_fields & IPPF_RTHDR))
928 928 return (0);
929 929 bcopy(ipp->ipp_rthdr, ptr, ipp->ipp_rthdrlen);
930 930 return (ipp->ipp_rthdrlen);
931 931 case IPV6_DSTOPTS:
932 932 if (!(ipp->ipp_fields & IPPF_DSTOPTS))
933 933 return (0);
934 934 bcopy(ipp->ipp_dstopts, ptr, ipp->ipp_dstoptslen);
935 935 return (ipp->ipp_dstoptslen);
936 936 case IPV6_PATHMTU:
937 937 return (ip_fill_mtuinfo(connp, ixa,
938 938 (struct ip6_mtuinfo *)ptr));
939 939 case IPV6_SEC_OPT:
940 940 return (ipsec_req_from_conn(connp, (ipsec_req_t *)ptr,
941 941 IPSEC_AF_V6));
942 942 case IPV6_SRC_PREFERENCES:
943 943 return (ip6_get_src_preferences(ixa, (uint32_t *)ptr));
944 944 case IPV6_DONTFRAG:
945 945 *i1 = (ixa->ixa_flags & IXAF_DONTFRAG) != 0;
946 946 return (sizeof (int));
947 947 case IPV6_USE_MIN_MTU:
948 948 if (ixa->ixa_flags & IXAF_USE_MIN_MTU)
949 949 *i1 = ixa->ixa_use_min_mtu;
950 950 else
951 951 *i1 = IPV6_USE_MIN_MTU_MULTICAST;
952 952 break;
953 953 case IPV6_V6ONLY:
954 954 *i1 = connp->conn_ipv6_v6only;
955 955 return (sizeof (int));
956 956 default:
957 957 return (-1);
958 958 }
959 959 break;
960 960 case IPPROTO_UDP:
961 961 switch (name) {
962 962 case UDP_ANONPRIVBIND:
963 963 *i1 = connp->conn_anon_priv_bind;
964 964 break;
965 965 case UDP_EXCLBIND:
966 966 *i1 = connp->conn_exclbind ? UDP_EXCLBIND : 0;
967 967 break;
968 968 default:
969 969 return (-1);
970 970 }
971 971 break;
972 972 case IPPROTO_TCP:
973 973 switch (name) {
974 974 case TCP_RECVDSTADDR:
975 975 *i1 = connp->conn_recv_ancillary.crb_recvdstaddr;
976 976 break;
977 977 case TCP_ANONPRIVBIND:
978 978 *i1 = connp->conn_anon_priv_bind;
979 979 break;
980 980 case TCP_EXCLBIND:
981 981 *i1 = connp->conn_exclbind ? TCP_EXCLBIND : 0;
982 982 break;
983 983 default:
984 984 return (-1);
985 985 }
986 986 break;
987 987 default:
988 988 return (-1);
989 989 }
990 990 return (sizeof (int));
991 991 }
992 992
993 993 static int conn_opt_set_socket(conn_opt_arg_t *coa, t_scalar_t name,
994 994 uint_t inlen, uchar_t *invalp, boolean_t checkonly, cred_t *cr);
995 995 static int conn_opt_set_ip(conn_opt_arg_t *coa, t_scalar_t name,
996 996 uint_t inlen, uchar_t *invalp, boolean_t checkonly, cred_t *cr);
997 997 static int conn_opt_set_ipv6(conn_opt_arg_t *coa, t_scalar_t name,
998 998 uint_t inlen, uchar_t *invalp, boolean_t checkonly, cred_t *cr);
999 999 static int conn_opt_set_udp(conn_opt_arg_t *coa, t_scalar_t name,
1000 1000 uint_t inlen, uchar_t *invalp, boolean_t checkonly, cred_t *cr);
1001 1001 static int conn_opt_set_tcp(conn_opt_arg_t *coa, t_scalar_t name,
1002 1002 uint_t inlen, uchar_t *invalp, boolean_t checkonly, cred_t *cr);
1003 1003
1004 1004 /*
1005 1005 * This routine sets the most common socket options including some
1006 1006 * that are transport/ULP specific.
1007 1007 * It returns errno or zero.
1008 1008 *
1009 1009 * For fixed length options, there is no sanity check
1010 1010 * of passed in length is done. It is assumed *_optcom_req()
1011 1011 * routines do the right thing.
1012 1012 */
1013 1013 int
1014 1014 conn_opt_set(conn_opt_arg_t *coa, t_scalar_t level, t_scalar_t name,
1015 1015 uint_t inlen, uchar_t *invalp, boolean_t checkonly, cred_t *cr)
1016 1016 {
1017 1017 ASSERT(MUTEX_NOT_HELD(&coa->coa_connp->conn_lock));
1018 1018
1019 1019 /* We have different functions for different levels */
1020 1020 switch (level) {
1021 1021 case SOL_SOCKET:
1022 1022 return (conn_opt_set_socket(coa, name, inlen, invalp,
1023 1023 checkonly, cr));
1024 1024 case IPPROTO_IP:
1025 1025 return (conn_opt_set_ip(coa, name, inlen, invalp,
1026 1026 checkonly, cr));
1027 1027 case IPPROTO_IPV6:
1028 1028 return (conn_opt_set_ipv6(coa, name, inlen, invalp,
1029 1029 checkonly, cr));
1030 1030 case IPPROTO_UDP:
1031 1031 return (conn_opt_set_udp(coa, name, inlen, invalp,
1032 1032 checkonly, cr));
1033 1033 case IPPROTO_TCP:
1034 1034 return (conn_opt_set_tcp(coa, name, inlen, invalp,
1035 1035 checkonly, cr));
1036 1036 default:
1037 1037 return (0);
1038 1038 }
1039 1039 }
1040 1040
1041 1041 /*
1042 1042 * Handle SOL_SOCKET
1043 1043 * Note that we do not handle SO_PROTOTYPE here. The ULPs that support
1044 1044 * it implement their own checks and setting of conn_proto.
1045 1045 */
1046 1046 /* ARGSUSED1 */
1047 1047 static int
1048 1048 conn_opt_set_socket(conn_opt_arg_t *coa, t_scalar_t name, uint_t inlen,
1049 1049 uchar_t *invalp, boolean_t checkonly, cred_t *cr)
1050 1050 {
1051 1051 conn_t *connp = coa->coa_connp;
1052 1052 ip_xmit_attr_t *ixa = coa->coa_ixa;
1053 1053 int *i1 = (int *)invalp;
1054 1054 boolean_t onoff = (*i1 == 0) ? 0 : 1;
1055 1055
1056 1056 switch (name) {
1057 1057 case SO_ALLZONES:
1058 1058 if (IPCL_IS_BOUND(connp))
1059 1059 return (EINVAL);
1060 1060 break;
1061 1061 case SO_VRRP:
1062 1062 if (secpolicy_ip_config(cr, checkonly) != 0)
1063 1063 return (EACCES);
1064 1064 break;
1065 1065 case SO_MAC_EXEMPT:
1066 1066 if (secpolicy_net_mac_aware(cr) != 0)
1067 1067 return (EACCES);
1068 1068 if (IPCL_IS_BOUND(connp))
1069 1069 return (EINVAL);
1070 1070 break;
1071 1071 case SO_MAC_IMPLICIT:
1072 1072 if (secpolicy_net_mac_implicit(cr) != 0)
1073 1073 return (EACCES);
1074 1074 break;
1075 1075 }
1076 1076 if (checkonly)
1077 1077 return (0);
1078 1078
1079 1079 mutex_enter(&connp->conn_lock);
1080 1080 /* Here we set the actual option value */
1081 1081 switch (name) {
1082 1082 case SO_DEBUG:
1083 1083 connp->conn_debug = onoff;
1084 1084 break;
1085 1085 case SO_KEEPALIVE:
1086 1086 connp->conn_keepalive = onoff;
1087 1087 break;
1088 1088 case SO_LINGER: {
1089 1089 struct linger *lgr = (struct linger *)invalp;
1090 1090
1091 1091 if (lgr->l_onoff) {
1092 1092 connp->conn_linger = 1;
1093 1093 connp->conn_lingertime = lgr->l_linger;
1094 1094 } else {
1095 1095 connp->conn_linger = 0;
1096 1096 connp->conn_lingertime = 0;
1097 1097 }
1098 1098 break;
1099 1099 }
1100 1100 case SO_OOBINLINE:
1101 1101 connp->conn_oobinline = onoff;
1102 1102 coa->coa_changed |= COA_OOBINLINE_CHANGED;
1103 1103 break;
1104 1104 case SO_REUSEADDR:
1105 1105 connp->conn_reuseaddr = onoff;
1106 1106 break;
1107 1107 case SO_DONTROUTE:
1108 1108 if (onoff)
1109 1109 ixa->ixa_flags |= IXAF_DONTROUTE;
1110 1110 else
1111 1111 ixa->ixa_flags &= ~IXAF_DONTROUTE;
1112 1112 coa->coa_changed |= COA_ROUTE_CHANGED;
1113 1113 break;
1114 1114 case SO_USELOOPBACK:
1115 1115 connp->conn_useloopback = onoff;
1116 1116 break;
1117 1117 case SO_BROADCAST:
1118 1118 connp->conn_broadcast = onoff;
1119 1119 break;
1120 1120 case SO_SNDBUF:
1121 1121 /* ULP has range checked the value */
1122 1122 connp->conn_sndbuf = *i1;
1123 1123 coa->coa_changed |= COA_SNDBUF_CHANGED;
1124 1124 break;
1125 1125 case SO_RCVBUF:
1126 1126 /* ULP has range checked the value */
1127 1127 connp->conn_rcvbuf = *i1;
1128 1128 coa->coa_changed |= COA_RCVBUF_CHANGED;
1129 1129 break;
1130 1130 case SO_RCVTIMEO:
1131 1131 case SO_SNDTIMEO:
1132 1132 /*
1133 1133 * Pass these two options in order for third part
1134 1134 * protocol usage.
1135 1135 */
1136 1136 break;
1137 1137 case SO_DGRAM_ERRIND:
1138 1138 connp->conn_dgram_errind = onoff;
1139 1139 break;
1140 1140 case SO_RECVUCRED:
1141 1141 connp->conn_recv_ancillary.crb_recvucred = onoff;
1142 1142 break;
1143 1143 case SO_ALLZONES:
1144 1144 connp->conn_allzones = onoff;
1145 1145 coa->coa_changed |= COA_ROUTE_CHANGED;
1146 1146 if (onoff)
1147 1147 ixa->ixa_zoneid = ALL_ZONES;
1148 1148 else
1149 1149 ixa->ixa_zoneid = connp->conn_zoneid;
1150 1150 break;
1151 1151 case SO_TIMESTAMP:
1152 1152 connp->conn_recv_ancillary.crb_timestamp = onoff;
1153 1153 break;
1154 1154 case SO_VRRP:
1155 1155 connp->conn_isvrrp = onoff;
1156 1156 break;
1157 1157 case SO_ANON_MLP:
1158 1158 connp->conn_anon_mlp = onoff;
1159 1159 break;
1160 1160 case SO_MAC_EXEMPT:
1161 1161 connp->conn_mac_mode = onoff ?
1162 1162 CONN_MAC_AWARE : CONN_MAC_DEFAULT;
1163 1163 break;
1164 1164 case SO_MAC_IMPLICIT:
1165 1165 connp->conn_mac_mode = onoff ?
1166 1166 CONN_MAC_IMPLICIT : CONN_MAC_DEFAULT;
1167 1167 break;
1168 1168 case SO_EXCLBIND:
1169 1169 connp->conn_exclbind = onoff;
1170 1170 break;
1171 1171 }
1172 1172 mutex_exit(&connp->conn_lock);
1173 1173 return (0);
1174 1174 }
1175 1175
1176 1176 /* Handle IPPROTO_IP */
1177 1177 static int
1178 1178 conn_opt_set_ip(conn_opt_arg_t *coa, t_scalar_t name, uint_t inlen,
1179 1179 uchar_t *invalp, boolean_t checkonly, cred_t *cr)
1180 1180 {
1181 1181 conn_t *connp = coa->coa_connp;
1182 1182 ip_xmit_attr_t *ixa = coa->coa_ixa;
1183 1183 ip_pkt_t *ipp = coa->coa_ipp;
1184 1184 int *i1 = (int *)invalp;
1185 1185 boolean_t onoff = (*i1 == 0) ? 0 : 1;
1186 1186 ipaddr_t addr = (ipaddr_t)*i1;
1187 1187 uint_t ifindex;
1188 1188 zoneid_t zoneid = IPCL_ZONEID(connp);
1189 1189 ipif_t *ipif;
1190 1190 ip_stack_t *ipst = connp->conn_netstack->netstack_ip;
1191 1191 int error;
1192 1192
1193 1193 if (connp->conn_family == AF_INET6 &&
1194 1194 connp->conn_ipversion == IPV4_VERSION) {
1195 1195 /*
1196 1196 * Allow certain IPv4 options to be set on an AF_INET6 socket
1197 1197 * if the connection is still IPv4.
1198 1198 */
1199 1199 switch (name) {
1200 1200 case IP_TOS:
1201 1201 case T_IP_TOS:
1202 1202 case IP_TTL:
1203 1203 case IP_DONTFRAG:
1204 1204 break;
1205 1205 default:
1206 1206 return (EINVAL);
1207 1207 }
1208 1208 } else if (connp->conn_family != AF_INET) {
1209 1209 return (EINVAL);
1210 1210 }
1211 1211
1212 1212 switch (name) {
1213 1213 case IP_TTL:
1214 1214 /* Don't allow zero */
1215 1215 if (*i1 < 1 || *i1 > 255)
1216 1216 return (EINVAL);
1217 1217 break;
1218 1218 case IP_MULTICAST_IF:
1219 1219 if (addr == INADDR_ANY) {
1220 1220 /* Clear */
1221 1221 ifindex = 0;
1222 1222 break;
1223 1223 }
1224 1224 ipif = ipif_lookup_addr(addr, NULL, zoneid, ipst);
1225 1225 if (ipif == NULL)
1226 1226 return (EHOSTUNREACH);
1227 1227 /* not supported by the virtual network iface */
1228 1228 if (IS_VNI(ipif->ipif_ill)) {
1229 1229 ipif_refrele(ipif);
1230 1230 return (EINVAL);
1231 1231 }
1232 1232 ifindex = ipif->ipif_ill->ill_phyint->phyint_ifindex;
1233 1233 ipif_refrele(ipif);
1234 1234 break;
1235 1235 case IP_NEXTHOP: {
1236 1236 ire_t *ire;
1237 1237
1238 1238 if (addr == INADDR_ANY) {
1239 1239 /* Clear */
1240 1240 break;
1241 1241 }
1242 1242 /* Verify that the next-hop is on-link */
1243 1243 ire = ire_ftable_lookup_v4(addr, 0, 0, IRE_ONLINK, NULL, zoneid,
1244 1244 NULL, MATCH_IRE_TYPE, 0, ipst, NULL);
1245 1245 if (ire == NULL)
1246 1246 return (EHOSTUNREACH);
1247 1247 ire_refrele(ire);
1248 1248 break;
1249 1249 }
1250 1250 case IP_OPTIONS:
1251 1251 case T_IP_OPTIONS: {
1252 1252 uint_t newlen;
1253 1253
1254 1254 if (ipp->ipp_fields & IPPF_LABEL_V4)
1255 1255 newlen = inlen + (ipp->ipp_label_len_v4 + 3) & ~3;
1256 1256 else
1257 1257 newlen = inlen;
1258 1258 if ((inlen & 0x3) || newlen > IP_MAX_OPT_LENGTH) {
1259 1259 return (EINVAL);
1260 1260 }
1261 1261 break;
1262 1262 }
1263 1263 case IP_PKTINFO: {
1264 1264 struct in_pktinfo *pktinfo;
1265 1265
1266 1266 /* Two different valid lengths */
1267 1267 if (inlen != sizeof (int) &&
1268 1268 inlen != sizeof (struct in_pktinfo))
1269 1269 return (EINVAL);
1270 1270 if (inlen == sizeof (int))
1271 1271 break;
1272 1272
1273 1273 pktinfo = (struct in_pktinfo *)invalp;
1274 1274 if (pktinfo->ipi_spec_dst.s_addr != INADDR_ANY) {
1275 1275 switch (ip_laddr_verify_v4(pktinfo->ipi_spec_dst.s_addr,
1276 1276 zoneid, ipst, B_FALSE)) {
1277 1277 case IPVL_UNICAST_UP:
1278 1278 case IPVL_UNICAST_DOWN:
1279 1279 break;
1280 1280 default:
1281 1281 return (EADDRNOTAVAIL);
1282 1282 }
1283 1283 }
1284 1284 if (!ip_xmit_ifindex_valid(pktinfo->ipi_ifindex, zoneid,
1285 1285 B_FALSE, ipst))
1286 1286 return (ENXIO);
1287 1287 break;
1288 1288 }
1289 1289 case IP_BOUND_IF:
1290 1290 ifindex = *(uint_t *)i1;
1291 1291
1292 1292 /* Just check it is ok. */
1293 1293 if (!ip_xmit_ifindex_valid(ifindex, zoneid, B_FALSE, ipst))
1294 1294 return (ENXIO);
1295 1295 break;
1296 1296 }
1297 1297 if (checkonly)
1298 1298 return (0);
1299 1299
1300 1300 /* Here we set the actual option value */
1301 1301 /*
1302 1302 * conn_lock protects the bitfields, and is used to
1303 1303 * set the fields atomically. Not needed for ixa settings since
1304 1304 * the caller has an exclusive copy of the ixa.
1305 1305 * We can not hold conn_lock across the multicast options though.
1306 1306 */
1307 1307 switch (name) {
1308 1308 case IP_OPTIONS:
1309 1309 case T_IP_OPTIONS:
1310 1310 /* Save options for use by IP. */
1311 1311 mutex_enter(&connp->conn_lock);
1312 1312 error = optcom_pkt_set(invalp, inlen,
1313 1313 (uchar_t **)&ipp->ipp_ipv4_options,
1314 1314 &ipp->ipp_ipv4_options_len);
1315 1315 if (error != 0) {
1316 1316 mutex_exit(&connp->conn_lock);
1317 1317 return (error);
1318 1318 }
1319 1319 if (ipp->ipp_ipv4_options_len == 0) {
1320 1320 ipp->ipp_fields &= ~IPPF_IPV4_OPTIONS;
1321 1321 } else {
1322 1322 ipp->ipp_fields |= IPPF_IPV4_OPTIONS;
1323 1323 }
1324 1324 mutex_exit(&connp->conn_lock);
1325 1325 coa->coa_changed |= COA_HEADER_CHANGED;
1326 1326 coa->coa_changed |= COA_WROFF_CHANGED;
1327 1327 break;
1328 1328
1329 1329 case IP_TTL:
1330 1330 mutex_enter(&connp->conn_lock);
1331 1331 ipp->ipp_unicast_hops = *i1;
1332 1332 mutex_exit(&connp->conn_lock);
1333 1333 coa->coa_changed |= COA_HEADER_CHANGED;
1334 1334 break;
1335 1335 case IP_TOS:
1336 1336 case T_IP_TOS:
1337 1337 mutex_enter(&connp->conn_lock);
1338 1338 if (*i1 == -1) {
1339 1339 ipp->ipp_type_of_service = 0;
1340 1340 } else {
1341 1341 ipp->ipp_type_of_service = *i1;
1342 1342 }
1343 1343 mutex_exit(&connp->conn_lock);
1344 1344 coa->coa_changed |= COA_HEADER_CHANGED;
1345 1345 break;
1346 1346 case IP_MULTICAST_IF:
1347 1347 ixa->ixa_multicast_ifindex = ifindex;
1348 1348 ixa->ixa_multicast_ifaddr = addr;
1349 1349 coa->coa_changed |= COA_ROUTE_CHANGED;
1350 1350 break;
1351 1351 case IP_MULTICAST_TTL:
1352 1352 ixa->ixa_multicast_ttl = *invalp;
1353 1353 /* Handled automatically by ip_output */
1354 1354 break;
1355 1355 case IP_MULTICAST_LOOP:
1356 1356 if (*invalp != 0)
1357 1357 ixa->ixa_flags |= IXAF_MULTICAST_LOOP;
1358 1358 else
1359 1359 ixa->ixa_flags &= ~IXAF_MULTICAST_LOOP;
1360 1360 /* Handled automatically by ip_output */
1361 1361 break;
1362 1362 case IP_RECVOPTS:
1363 1363 mutex_enter(&connp->conn_lock);
1364 1364 connp->conn_recv_ancillary.crb_recvopts = onoff;
1365 1365 mutex_exit(&connp->conn_lock);
1366 1366 break;
1367 1367 case IP_RECVDSTADDR:
1368 1368 mutex_enter(&connp->conn_lock);
1369 1369 connp->conn_recv_ancillary.crb_recvdstaddr = onoff;
1370 1370 mutex_exit(&connp->conn_lock);
1371 1371 break;
1372 1372 case IP_RECVIF:
1373 1373 mutex_enter(&connp->conn_lock);
1374 1374 connp->conn_recv_ancillary.crb_recvif = onoff;
1375 1375 mutex_exit(&connp->conn_lock);
1376 1376 break;
1377 1377 case IP_RECVSLLA:
1378 1378 mutex_enter(&connp->conn_lock);
1379 1379 connp->conn_recv_ancillary.crb_recvslla = onoff;
1380 1380 mutex_exit(&connp->conn_lock);
1381 1381 break;
1382 1382 case IP_RECVTTL:
1383 1383 mutex_enter(&connp->conn_lock);
1384 1384 connp->conn_recv_ancillary.crb_recvttl = onoff;
1385 1385 mutex_exit(&connp->conn_lock);
1386 1386 break;
1387 1387 case IP_PKTINFO: {
1388 1388 /*
1389 1389 * This also handles IP_RECVPKTINFO.
1390 1390 * IP_PKTINFO and IP_RECVPKTINFO have same value.
1391 1391 * Differentiation is based on the size of the
1392 1392 * argument passed in.
1393 1393 */
1394 1394 struct in_pktinfo *pktinfo;
1395 1395
1396 1396 if (inlen == sizeof (int)) {
1397 1397 /* This is IP_RECVPKTINFO option. */
1398 1398 mutex_enter(&connp->conn_lock);
1399 1399 connp->conn_recv_ancillary.crb_ip_recvpktinfo =
1400 1400 onoff;
1401 1401 mutex_exit(&connp->conn_lock);
1402 1402 break;
1403 1403 }
1404 1404
1405 1405 /* This is IP_PKTINFO option. */
1406 1406 mutex_enter(&connp->conn_lock);
1407 1407 pktinfo = (struct in_pktinfo *)invalp;
1408 1408 if (pktinfo->ipi_spec_dst.s_addr != INADDR_ANY) {
1409 1409 ipp->ipp_fields |= IPPF_ADDR;
1410 1410 IN6_INADDR_TO_V4MAPPED(&pktinfo->ipi_spec_dst,
1411 1411 &ipp->ipp_addr);
1412 1412 } else {
1413 1413 ipp->ipp_fields &= ~IPPF_ADDR;
1414 1414 ipp->ipp_addr = ipv6_all_zeros;
1415 1415 }
1416 1416 mutex_exit(&connp->conn_lock);
1417 1417 ixa->ixa_ifindex = pktinfo->ipi_ifindex;
1418 1418 coa->coa_changed |= COA_ROUTE_CHANGED;
1419 1419 coa->coa_changed |= COA_HEADER_CHANGED;
1420 1420 break;
1421 1421 }
1422 1422 case IP_DONTFRAG:
1423 1423 if (onoff) {
1424 1424 ixa->ixa_flags |= (IXAF_DONTFRAG | IXAF_PMTU_IPV4_DF);
1425 1425 ixa->ixa_flags &= ~IXAF_PMTU_DISCOVERY;
1426 1426 } else {
1427 1427 ixa->ixa_flags &= ~(IXAF_DONTFRAG | IXAF_PMTU_IPV4_DF);
1428 1428 ixa->ixa_flags |= IXAF_PMTU_DISCOVERY;
1429 1429 }
1430 1430 /* Need to redo ip_attr_connect */
1431 1431 coa->coa_changed |= COA_ROUTE_CHANGED;
1432 1432 break;
1433 1433 case IP_ADD_MEMBERSHIP:
1434 1434 case IP_DROP_MEMBERSHIP:
1435 1435 case MCAST_JOIN_GROUP:
1436 1436 case MCAST_LEAVE_GROUP:
1437 1437 return (ip_opt_set_multicast_group(connp, name,
1438 1438 invalp, B_FALSE, checkonly));
1439 1439
1440 1440 case IP_BLOCK_SOURCE:
1441 1441 case IP_UNBLOCK_SOURCE:
1442 1442 case IP_ADD_SOURCE_MEMBERSHIP:
1443 1443 case IP_DROP_SOURCE_MEMBERSHIP:
1444 1444 case MCAST_BLOCK_SOURCE:
1445 1445 case MCAST_UNBLOCK_SOURCE:
1446 1446 case MCAST_JOIN_SOURCE_GROUP:
1447 1447 case MCAST_LEAVE_SOURCE_GROUP:
1448 1448 return (ip_opt_set_multicast_sources(connp, name,
1449 1449 invalp, B_FALSE, checkonly));
1450 1450
1451 1451 case IP_SEC_OPT:
1452 1452 mutex_enter(&connp->conn_lock);
1453 1453 error = ipsec_set_req(cr, connp, (ipsec_req_t *)invalp);
1454 1454 mutex_exit(&connp->conn_lock);
1455 1455 if (error != 0) {
1456 1456 return (error);
1457 1457 }
1458 1458 /* This is an IPsec policy change - redo ip_attr_connect */
1459 1459 coa->coa_changed |= COA_ROUTE_CHANGED;
1460 1460 break;
1461 1461 case IP_NEXTHOP:
1462 1462 ixa->ixa_nexthop_v4 = addr;
1463 1463 if (addr != INADDR_ANY)
1464 1464 ixa->ixa_flags |= IXAF_NEXTHOP_SET;
1465 1465 else
1466 1466 ixa->ixa_flags &= ~IXAF_NEXTHOP_SET;
1467 1467 coa->coa_changed |= COA_ROUTE_CHANGED;
1468 1468 break;
1469 1469
1470 1470 case IP_BOUND_IF:
1471 1471 ixa->ixa_ifindex = ifindex; /* Send */
1472 1472 mutex_enter(&connp->conn_lock);
1473 1473 connp->conn_incoming_ifindex = ifindex; /* Receive */
1474 1474 connp->conn_bound_if = ifindex; /* getsockopt */
1475 1475 mutex_exit(&connp->conn_lock);
1476 1476 coa->coa_changed |= COA_ROUTE_CHANGED;
1477 1477 break;
1478 1478 case IP_UNSPEC_SRC:
1479 1479 mutex_enter(&connp->conn_lock);
1480 1480 connp->conn_unspec_src = onoff;
1481 1481 if (onoff)
1482 1482 ixa->ixa_flags &= ~IXAF_VERIFY_SOURCE;
1483 1483 else
1484 1484 ixa->ixa_flags |= IXAF_VERIFY_SOURCE;
1485 1485
1486 1486 mutex_exit(&connp->conn_lock);
1487 1487 break;
1488 1488 case IP_BROADCAST_TTL:
1489 1489 ixa->ixa_broadcast_ttl = *invalp;
1490 1490 ixa->ixa_flags |= IXAF_BROADCAST_TTL_SET;
1491 1491 /* Handled automatically by ip_output */
1492 1492 break;
1493 1493 case MRT_INIT:
1494 1494 case MRT_DONE:
1495 1495 case MRT_ADD_VIF:
1496 1496 case MRT_DEL_VIF:
1497 1497 case MRT_ADD_MFC:
1498 1498 case MRT_DEL_MFC:
1499 1499 case MRT_ASSERT:
1500 1500 if ((error = secpolicy_ip_config(cr, B_FALSE)) != 0) {
1501 1501 return (error);
1502 1502 }
1503 1503 error = ip_mrouter_set((int)name, connp, checkonly,
1504 1504 (uchar_t *)invalp, inlen);
1505 1505 if (error) {
1506 1506 return (error);
1507 1507 }
1508 1508 return (0);
1509 1509
1510 1510 }
1511 1511 return (0);
1512 1512 }
1513 1513
1514 1514 /* Handle IPPROTO_IPV6 */
1515 1515 static int
1516 1516 conn_opt_set_ipv6(conn_opt_arg_t *coa, t_scalar_t name, uint_t inlen,
1517 1517 uchar_t *invalp, boolean_t checkonly, cred_t *cr)
1518 1518 {
1519 1519 conn_t *connp = coa->coa_connp;
1520 1520 ip_xmit_attr_t *ixa = coa->coa_ixa;
1521 1521 ip_pkt_t *ipp = coa->coa_ipp;
1522 1522 int *i1 = (int *)invalp;
1523 1523 boolean_t onoff = (*i1 == 0) ? 0 : 1;
1524 1524 uint_t ifindex;
1525 1525 zoneid_t zoneid = IPCL_ZONEID(connp);
1526 1526 ip_stack_t *ipst = connp->conn_netstack->netstack_ip;
1527 1527 int error;
1528 1528
1529 1529 if (connp->conn_family != AF_INET6)
1530 1530 return (EINVAL);
1531 1531
1532 1532 switch (name) {
1533 1533 case IPV6_MULTICAST_IF:
1534 1534 /*
1535 1535 * The only possible error is EINVAL.
1536 1536 * We call this option on both V4 and V6
1537 1537 * If both fail, then this call returns
1538 1538 * EINVAL. If at least one of them succeeds we
1539 1539 * return success.
1540 1540 */
1541 1541 ifindex = *(uint_t *)i1;
1542 1542
1543 1543 if (!ip_xmit_ifindex_valid(ifindex, zoneid, B_TRUE, ipst) &&
1544 1544 !ip_xmit_ifindex_valid(ifindex, zoneid, B_FALSE, ipst))
1545 1545 return (EINVAL);
1546 1546 break;
1547 1547 case IPV6_UNICAST_HOPS:
1548 1548 /* Don't allow zero. -1 means to use default */
1549 1549 if (*i1 < -1 || *i1 == 0 || *i1 > IPV6_MAX_HOPS)
1550 1550 return (EINVAL);
1551 1551 break;
1552 1552 case IPV6_MULTICAST_HOPS:
1553 1553 /* -1 means use default */
1554 1554 if (*i1 < -1 || *i1 > IPV6_MAX_HOPS)
1555 1555 return (EINVAL);
1556 1556 break;
1557 1557 case IPV6_MULTICAST_LOOP:
1558 1558 if (*i1 != 0 && *i1 != 1)
1559 1559 return (EINVAL);
1560 1560 break;
1561 1561 case IPV6_BOUND_IF:
1562 1562 ifindex = *(uint_t *)i1;
1563 1563
1564 1564 if (!ip_xmit_ifindex_valid(ifindex, zoneid, B_TRUE, ipst))
1565 1565 return (ENXIO);
1566 1566 break;
1567 1567 case IPV6_PKTINFO: {
1568 1568 struct in6_pktinfo *pkti;
1569 1569 boolean_t isv6;
1570 1570
1571 1571 if (inlen != 0 && inlen != sizeof (struct in6_pktinfo))
1572 1572 return (EINVAL);
1573 1573 if (inlen == 0)
1574 1574 break; /* Clear values below */
1575 1575
1576 1576 /*
1577 1577 * Verify the source address and ifindex. Privileged users
1578 1578 * can use any source address.
1579 1579 */
1580 1580 pkti = (struct in6_pktinfo *)invalp;
1581 1581
1582 1582 /*
1583 1583 * For link-local addresses we use the ipi6_ifindex when
1584 1584 * we verify the local address.
1585 1585 * If net_rawaccess then any source address can be used.
1586 1586 */
1587 1587 if (!IN6_IS_ADDR_UNSPECIFIED(&pkti->ipi6_addr) &&
1588 1588 secpolicy_net_rawaccess(cr) != 0) {
1589 1589 uint_t scopeid = 0;
1590 1590 in6_addr_t *v6src = &pkti->ipi6_addr;
1591 1591 ipaddr_t v4src;
1592 1592 ip_laddr_t laddr_type = IPVL_UNICAST_UP;
1593 1593
1594 1594 if (IN6_IS_ADDR_V4MAPPED(v6src)) {
1595 1595 IN6_V4MAPPED_TO_IPADDR(v6src, v4src);
1596 1596 if (v4src != INADDR_ANY) {
1597 1597 laddr_type = ip_laddr_verify_v4(v4src,
1598 1598 zoneid, ipst, B_FALSE);
1599 1599 }
1600 1600 } else {
1601 1601 if (IN6_IS_ADDR_LINKSCOPE(v6src))
1602 1602 scopeid = pkti->ipi6_ifindex;
1603 1603
1604 1604 laddr_type = ip_laddr_verify_v6(v6src, zoneid,
1605 1605 ipst, B_FALSE, scopeid);
1606 1606 }
1607 1607 switch (laddr_type) {
1608 1608 case IPVL_UNICAST_UP:
1609 1609 case IPVL_UNICAST_DOWN:
1610 1610 break;
1611 1611 default:
1612 1612 return (EADDRNOTAVAIL);
1613 1613 }
1614 1614 ixa->ixa_flags |= IXAF_VERIFY_SOURCE;
1615 1615 } else if (!IN6_IS_ADDR_UNSPECIFIED(&pkti->ipi6_addr)) {
1616 1616 /* Allow any source */
1617 1617 ixa->ixa_flags &= ~IXAF_VERIFY_SOURCE;
1618 1618 }
1619 1619 isv6 = !(IN6_IS_ADDR_V4MAPPED(&pkti->ipi6_addr));
1620 1620 if (!ip_xmit_ifindex_valid(pkti->ipi6_ifindex, zoneid, isv6,
1621 1621 ipst))
1622 1622 return (ENXIO);
1623 1623 break;
1624 1624 }
1625 1625 case IPV6_HOPLIMIT:
1626 1626 /* It is only allowed as ancilary data */
1627 1627 if (!coa->coa_ancillary)
1628 1628 return (EINVAL);
1629 1629
1630 1630 if (inlen != 0 && inlen != sizeof (int))
1631 1631 return (EINVAL);
1632 1632 if (inlen == sizeof (int)) {
1633 1633 if (*i1 > 255 || *i1 < -1 || *i1 == 0)
1634 1634 return (EINVAL);
1635 1635 }
1636 1636 break;
1637 1637 case IPV6_TCLASS:
1638 1638 if (inlen != 0 && inlen != sizeof (int))
1639 1639 return (EINVAL);
1640 1640 if (inlen == sizeof (int)) {
1641 1641 if (*i1 > 255 || *i1 < -1)
1642 1642 return (EINVAL);
1643 1643 }
1644 1644 break;
1645 1645 case IPV6_NEXTHOP:
1646 1646 if (inlen != 0 && inlen != sizeof (sin6_t))
1647 1647 return (EINVAL);
1648 1648 if (inlen == sizeof (sin6_t)) {
1649 1649 sin6_t *sin6 = (sin6_t *)invalp;
1650 1650 ire_t *ire;
1651 1651
1652 1652 if (sin6->sin6_family != AF_INET6)
1653 1653 return (EAFNOSUPPORT);
1654 1654 if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr))
1655 1655 return (EADDRNOTAVAIL);
1656 1656
1657 1657 /* Verify that the next-hop is on-link */
1658 1658 ire = ire_ftable_lookup_v6(&sin6->sin6_addr,
1659 1659 0, 0, IRE_ONLINK, NULL, zoneid,
1660 1660 NULL, MATCH_IRE_TYPE, 0, ipst, NULL);
1661 1661 if (ire == NULL)
1662 1662 return (EHOSTUNREACH);
1663 1663 ire_refrele(ire);
1664 1664 break;
1665 1665 }
1666 1666 break;
1667 1667 case IPV6_RTHDR:
1668 1668 case IPV6_DSTOPTS:
1669 1669 case IPV6_RTHDRDSTOPTS:
1670 1670 case IPV6_HOPOPTS: {
1671 1671 /* All have the length field in the same place */
1672 1672 ip6_hbh_t *hopts = (ip6_hbh_t *)invalp;
1673 1673 /*
1674 1674 * Sanity checks - minimum size, size a multiple of
1675 1675 * eight bytes, and matching size passed in.
1676 1676 */
1677 1677 if (inlen != 0 &&
1678 1678 inlen != (8 * (hopts->ip6h_len + 1)))
1679 1679 return (EINVAL);
1680 1680 break;
1681 1681 }
1682 1682 case IPV6_PATHMTU:
1683 1683 /* Can't be set */
1684 1684 return (EINVAL);
1685 1685
1686 1686 case IPV6_USE_MIN_MTU:
1687 1687 if (inlen != sizeof (int))
1688 1688 return (EINVAL);
1689 1689 if (*i1 < -1 || *i1 > 1)
1690 1690 return (EINVAL);
1691 1691 break;
1692 1692 case IPV6_SRC_PREFERENCES:
1693 1693 if (inlen != sizeof (uint32_t))
1694 1694 return (EINVAL);
1695 1695 break;
1696 1696 case IPV6_V6ONLY:
1697 1697 if (*i1 < 0 || *i1 > 1) {
1698 1698 return (EINVAL);
1699 1699 }
1700 1700 break;
1701 1701 }
1702 1702 if (checkonly)
1703 1703 return (0);
1704 1704
1705 1705 /* Here we set the actual option value */
1706 1706 /*
1707 1707 * conn_lock protects the bitfields, and is used to
1708 1708 * set the fields atomically. Not needed for ixa settings since
1709 1709 * the caller has an exclusive copy of the ixa.
1710 1710 * We can not hold conn_lock across the multicast options though.
1711 1711 */
1712 1712 ASSERT(MUTEX_NOT_HELD(&coa->coa_connp->conn_lock));
1713 1713 switch (name) {
1714 1714 case IPV6_MULTICAST_IF:
1715 1715 ixa->ixa_multicast_ifindex = ifindex;
1716 1716 /* Need to redo ip_attr_connect */
1717 1717 coa->coa_changed |= COA_ROUTE_CHANGED;
1718 1718 break;
1719 1719 case IPV6_UNICAST_HOPS:
1720 1720 /* -1 means use default */
1721 1721 mutex_enter(&connp->conn_lock);
1722 1722 if (*i1 == -1) {
1723 1723 ipp->ipp_unicast_hops = connp->conn_default_ttl;
1724 1724 } else {
1725 1725 ipp->ipp_unicast_hops = (uint8_t)*i1;
1726 1726 }
1727 1727 mutex_exit(&connp->conn_lock);
1728 1728 coa->coa_changed |= COA_HEADER_CHANGED;
1729 1729 break;
1730 1730 case IPV6_MULTICAST_HOPS:
1731 1731 /* -1 means use default */
1732 1732 if (*i1 == -1) {
1733 1733 ixa->ixa_multicast_ttl = IP_DEFAULT_MULTICAST_TTL;
1734 1734 } else {
1735 1735 ixa->ixa_multicast_ttl = (uint8_t)*i1;
1736 1736 }
1737 1737 /* Handled automatically by ip_output */
1738 1738 break;
1739 1739 case IPV6_MULTICAST_LOOP:
1740 1740 if (*i1 != 0)
1741 1741 ixa->ixa_flags |= IXAF_MULTICAST_LOOP;
1742 1742 else
1743 1743 ixa->ixa_flags &= ~IXAF_MULTICAST_LOOP;
1744 1744 /* Handled automatically by ip_output */
1745 1745 break;
1746 1746 case IPV6_JOIN_GROUP:
1747 1747 case IPV6_LEAVE_GROUP:
1748 1748 case MCAST_JOIN_GROUP:
1749 1749 case MCAST_LEAVE_GROUP:
1750 1750 return (ip_opt_set_multicast_group(connp, name,
1751 1751 invalp, B_TRUE, checkonly));
1752 1752
1753 1753 case MCAST_BLOCK_SOURCE:
1754 1754 case MCAST_UNBLOCK_SOURCE:
1755 1755 case MCAST_JOIN_SOURCE_GROUP:
1756 1756 case MCAST_LEAVE_SOURCE_GROUP:
1757 1757 return (ip_opt_set_multicast_sources(connp, name,
1758 1758 invalp, B_TRUE, checkonly));
1759 1759
1760 1760 case IPV6_BOUND_IF:
1761 1761 ixa->ixa_ifindex = ifindex; /* Send */
1762 1762 mutex_enter(&connp->conn_lock);
1763 1763 connp->conn_incoming_ifindex = ifindex; /* Receive */
1764 1764 connp->conn_bound_if = ifindex; /* getsockopt */
1765 1765 mutex_exit(&connp->conn_lock);
1766 1766 coa->coa_changed |= COA_ROUTE_CHANGED;
1767 1767 break;
1768 1768 case IPV6_UNSPEC_SRC:
1769 1769 mutex_enter(&connp->conn_lock);
1770 1770 connp->conn_unspec_src = onoff;
1771 1771 if (onoff)
1772 1772 ixa->ixa_flags &= ~IXAF_VERIFY_SOURCE;
1773 1773 else
1774 1774 ixa->ixa_flags |= IXAF_VERIFY_SOURCE;
1775 1775 mutex_exit(&connp->conn_lock);
1776 1776 break;
1777 1777 case IPV6_RECVPKTINFO:
1778 1778 mutex_enter(&connp->conn_lock);
1779 1779 connp->conn_recv_ancillary.crb_ip_recvpktinfo = onoff;
1780 1780 mutex_exit(&connp->conn_lock);
1781 1781 break;
1782 1782 case IPV6_RECVTCLASS:
1783 1783 mutex_enter(&connp->conn_lock);
1784 1784 connp->conn_recv_ancillary.crb_ipv6_recvtclass = onoff;
1785 1785 mutex_exit(&connp->conn_lock);
1786 1786 break;
1787 1787 case IPV6_RECVPATHMTU:
1788 1788 mutex_enter(&connp->conn_lock);
1789 1789 connp->conn_ipv6_recvpathmtu = onoff;
1790 1790 mutex_exit(&connp->conn_lock);
1791 1791 break;
1792 1792 case IPV6_RECVHOPLIMIT:
1793 1793 mutex_enter(&connp->conn_lock);
1794 1794 connp->conn_recv_ancillary.crb_ipv6_recvhoplimit =
1795 1795 onoff;
1796 1796 mutex_exit(&connp->conn_lock);
1797 1797 break;
1798 1798 case IPV6_RECVHOPOPTS:
1799 1799 mutex_enter(&connp->conn_lock);
1800 1800 connp->conn_recv_ancillary.crb_ipv6_recvhopopts = onoff;
1801 1801 mutex_exit(&connp->conn_lock);
1802 1802 break;
1803 1803 case IPV6_RECVDSTOPTS:
1804 1804 mutex_enter(&connp->conn_lock);
1805 1805 connp->conn_recv_ancillary.crb_ipv6_recvdstopts = onoff;
1806 1806 mutex_exit(&connp->conn_lock);
1807 1807 break;
1808 1808 case _OLD_IPV6_RECVDSTOPTS:
1809 1809 mutex_enter(&connp->conn_lock);
1810 1810 connp->conn_recv_ancillary.crb_old_ipv6_recvdstopts =
1811 1811 onoff;
1812 1812 mutex_exit(&connp->conn_lock);
1813 1813 break;
1814 1814 case IPV6_RECVRTHDRDSTOPTS:
1815 1815 mutex_enter(&connp->conn_lock);
1816 1816 connp->conn_recv_ancillary.crb_ipv6_recvrthdrdstopts =
1817 1817 onoff;
1818 1818 mutex_exit(&connp->conn_lock);
1819 1819 break;
1820 1820 case IPV6_RECVRTHDR:
1821 1821 mutex_enter(&connp->conn_lock);
1822 1822 connp->conn_recv_ancillary.crb_ipv6_recvrthdr = onoff;
1823 1823 mutex_exit(&connp->conn_lock);
1824 1824 break;
1825 1825 case IPV6_PKTINFO:
1826 1826 mutex_enter(&connp->conn_lock);
1827 1827 if (inlen == 0) {
1828 1828 ipp->ipp_fields &= ~IPPF_ADDR;
1829 1829 ipp->ipp_addr = ipv6_all_zeros;
1830 1830 ixa->ixa_ifindex = 0;
1831 1831 } else {
1832 1832 struct in6_pktinfo *pkti;
1833 1833
1834 1834 pkti = (struct in6_pktinfo *)invalp;
1835 1835 ipp->ipp_addr = pkti->ipi6_addr;
1836 1836 if (!IN6_IS_ADDR_UNSPECIFIED(&ipp->ipp_addr))
1837 1837 ipp->ipp_fields |= IPPF_ADDR;
1838 1838 else
1839 1839 ipp->ipp_fields &= ~IPPF_ADDR;
1840 1840 ixa->ixa_ifindex = pkti->ipi6_ifindex;
1841 1841 }
1842 1842 mutex_exit(&connp->conn_lock);
1843 1843 /* Source and ifindex might have changed */
1844 1844 coa->coa_changed |= COA_HEADER_CHANGED;
1845 1845 coa->coa_changed |= COA_ROUTE_CHANGED;
1846 1846 break;
1847 1847 case IPV6_HOPLIMIT:
1848 1848 mutex_enter(&connp->conn_lock);
1849 1849 if (inlen == 0 || *i1 == -1) {
1850 1850 /* Revert to default */
1851 1851 ipp->ipp_fields &= ~IPPF_HOPLIMIT;
1852 1852 ixa->ixa_flags &= ~IXAF_NO_TTL_CHANGE;
1853 1853 } else {
1854 1854 ipp->ipp_hoplimit = *i1;
1855 1855 ipp->ipp_fields |= IPPF_HOPLIMIT;
1856 1856 /* Ensure that it sticks for multicast packets */
1857 1857 ixa->ixa_flags |= IXAF_NO_TTL_CHANGE;
1858 1858 }
1859 1859 mutex_exit(&connp->conn_lock);
1860 1860 coa->coa_changed |= COA_HEADER_CHANGED;
1861 1861 break;
1862 1862 case IPV6_TCLASS:
1863 1863 /*
1864 1864 * IPV6_TCLASS accepts -1 as use kernel default
1865 1865 * and [0, 255] as the actualy traffic class.
1866 1866 */
1867 1867 mutex_enter(&connp->conn_lock);
1868 1868 if (inlen == 0 || *i1 == -1) {
1869 1869 ipp->ipp_tclass = 0;
1870 1870 ipp->ipp_fields &= ~IPPF_TCLASS;
1871 1871 } else {
1872 1872 ipp->ipp_tclass = *i1;
1873 1873 ipp->ipp_fields |= IPPF_TCLASS;
1874 1874 }
1875 1875 mutex_exit(&connp->conn_lock);
1876 1876 coa->coa_changed |= COA_HEADER_CHANGED;
1877 1877 break;
1878 1878 case IPV6_NEXTHOP:
1879 1879 if (inlen == 0) {
1880 1880 ixa->ixa_flags &= ~IXAF_NEXTHOP_SET;
1881 1881 } else {
1882 1882 sin6_t *sin6 = (sin6_t *)invalp;
1883 1883
1884 1884 ixa->ixa_nexthop_v6 = sin6->sin6_addr;
1885 1885 if (!IN6_IS_ADDR_UNSPECIFIED(&ixa->ixa_nexthop_v6))
1886 1886 ixa->ixa_flags |= IXAF_NEXTHOP_SET;
1887 1887 else
1888 1888 ixa->ixa_flags &= ~IXAF_NEXTHOP_SET;
1889 1889 }
1890 1890 coa->coa_changed |= COA_ROUTE_CHANGED;
1891 1891 break;
1892 1892 case IPV6_HOPOPTS:
1893 1893 mutex_enter(&connp->conn_lock);
1894 1894 error = optcom_pkt_set(invalp, inlen,
1895 1895 (uchar_t **)&ipp->ipp_hopopts, &ipp->ipp_hopoptslen);
1896 1896 if (error != 0) {
1897 1897 mutex_exit(&connp->conn_lock);
1898 1898 return (error);
1899 1899 }
1900 1900 if (ipp->ipp_hopoptslen == 0) {
1901 1901 ipp->ipp_fields &= ~IPPF_HOPOPTS;
1902 1902 } else {
1903 1903 ipp->ipp_fields |= IPPF_HOPOPTS;
1904 1904 }
1905 1905 mutex_exit(&connp->conn_lock);
1906 1906 coa->coa_changed |= COA_HEADER_CHANGED;
1907 1907 coa->coa_changed |= COA_WROFF_CHANGED;
1908 1908 break;
1909 1909 case IPV6_RTHDRDSTOPTS:
1910 1910 mutex_enter(&connp->conn_lock);
1911 1911 error = optcom_pkt_set(invalp, inlen,
1912 1912 (uchar_t **)&ipp->ipp_rthdrdstopts,
1913 1913 &ipp->ipp_rthdrdstoptslen);
1914 1914 if (error != 0) {
1915 1915 mutex_exit(&connp->conn_lock);
1916 1916 return (error);
1917 1917 }
1918 1918 if (ipp->ipp_rthdrdstoptslen == 0) {
1919 1919 ipp->ipp_fields &= ~IPPF_RTHDRDSTOPTS;
1920 1920 } else {
1921 1921 ipp->ipp_fields |= IPPF_RTHDRDSTOPTS;
1922 1922 }
1923 1923 mutex_exit(&connp->conn_lock);
1924 1924 coa->coa_changed |= COA_HEADER_CHANGED;
1925 1925 coa->coa_changed |= COA_WROFF_CHANGED;
1926 1926 break;
1927 1927 case IPV6_DSTOPTS:
1928 1928 mutex_enter(&connp->conn_lock);
1929 1929 error = optcom_pkt_set(invalp, inlen,
1930 1930 (uchar_t **)&ipp->ipp_dstopts, &ipp->ipp_dstoptslen);
1931 1931 if (error != 0) {
1932 1932 mutex_exit(&connp->conn_lock);
1933 1933 return (error);
1934 1934 }
1935 1935 if (ipp->ipp_dstoptslen == 0) {
1936 1936 ipp->ipp_fields &= ~IPPF_DSTOPTS;
1937 1937 } else {
1938 1938 ipp->ipp_fields |= IPPF_DSTOPTS;
1939 1939 }
1940 1940 mutex_exit(&connp->conn_lock);
1941 1941 coa->coa_changed |= COA_HEADER_CHANGED;
1942 1942 coa->coa_changed |= COA_WROFF_CHANGED;
1943 1943 break;
1944 1944 case IPV6_RTHDR:
1945 1945 mutex_enter(&connp->conn_lock);
1946 1946 error = optcom_pkt_set(invalp, inlen,
1947 1947 (uchar_t **)&ipp->ipp_rthdr, &ipp->ipp_rthdrlen);
1948 1948 if (error != 0) {
1949 1949 mutex_exit(&connp->conn_lock);
1950 1950 return (error);
1951 1951 }
1952 1952 if (ipp->ipp_rthdrlen == 0) {
1953 1953 ipp->ipp_fields &= ~IPPF_RTHDR;
1954 1954 } else {
1955 1955 ipp->ipp_fields |= IPPF_RTHDR;
1956 1956 }
1957 1957 mutex_exit(&connp->conn_lock);
1958 1958 coa->coa_changed |= COA_HEADER_CHANGED;
1959 1959 coa->coa_changed |= COA_WROFF_CHANGED;
1960 1960 break;
1961 1961
1962 1962 case IPV6_DONTFRAG:
1963 1963 if (onoff) {
1964 1964 ixa->ixa_flags |= IXAF_DONTFRAG;
1965 1965 ixa->ixa_flags &= ~IXAF_PMTU_DISCOVERY;
1966 1966 } else {
1967 1967 ixa->ixa_flags &= ~IXAF_DONTFRAG;
1968 1968 ixa->ixa_flags |= IXAF_PMTU_DISCOVERY;
1969 1969 }
1970 1970 /* Need to redo ip_attr_connect */
1971 1971 coa->coa_changed |= COA_ROUTE_CHANGED;
1972 1972 break;
1973 1973
1974 1974 case IPV6_USE_MIN_MTU:
1975 1975 ixa->ixa_flags |= IXAF_USE_MIN_MTU;
1976 1976 ixa->ixa_use_min_mtu = *i1;
1977 1977 /* Need to redo ip_attr_connect */
1978 1978 coa->coa_changed |= COA_ROUTE_CHANGED;
1979 1979 break;
1980 1980
1981 1981 case IPV6_SEC_OPT:
1982 1982 mutex_enter(&connp->conn_lock);
1983 1983 error = ipsec_set_req(cr, connp, (ipsec_req_t *)invalp);
1984 1984 mutex_exit(&connp->conn_lock);
1985 1985 if (error != 0) {
1986 1986 return (error);
1987 1987 }
1988 1988 /* This is an IPsec policy change - redo ip_attr_connect */
1989 1989 coa->coa_changed |= COA_ROUTE_CHANGED;
1990 1990 break;
1991 1991 case IPV6_SRC_PREFERENCES:
1992 1992 /*
1993 1993 * This socket option only affects connected
1994 1994 * sockets that haven't already bound to a specific
1995 1995 * IPv6 address. In other words, sockets that
1996 1996 * don't call bind() with an address other than the
1997 1997 * unspecified address and that call connect().
1998 1998 * ip_set_destination_v6() passes these preferences
1999 1999 * to the ipif_select_source_v6() function.
2000 2000 */
2001 2001 mutex_enter(&connp->conn_lock);
2002 2002 error = ip6_set_src_preferences(ixa, *(uint32_t *)invalp);
2003 2003 mutex_exit(&connp->conn_lock);
2004 2004 if (error != 0) {
2005 2005 return (error);
2006 2006 }
2007 2007 break;
2008 2008 case IPV6_V6ONLY:
2009 2009 mutex_enter(&connp->conn_lock);
2010 2010 connp->conn_ipv6_v6only = onoff;
2011 2011 mutex_exit(&connp->conn_lock);
2012 2012 break;
2013 2013 }
2014 2014 return (0);
2015 2015 }
2016 2016
2017 2017 /* Handle IPPROTO_UDP */
2018 2018 /* ARGSUSED1 */
2019 2019 static int
2020 2020 conn_opt_set_udp(conn_opt_arg_t *coa, t_scalar_t name, uint_t inlen,
2021 2021 uchar_t *invalp, boolean_t checkonly, cred_t *cr)
2022 2022 {
2023 2023 conn_t *connp = coa->coa_connp;
2024 2024 int *i1 = (int *)invalp;
2025 2025 boolean_t onoff = (*i1 == 0) ? 0 : 1;
2026 2026 int error;
2027 2027
2028 2028 switch (name) {
2029 2029 case UDP_ANONPRIVBIND:
2030 2030 if ((error = secpolicy_net_privaddr(cr, 0, IPPROTO_UDP)) != 0) {
2031 2031 return (error);
2032 2032 }
2033 2033 break;
2034 2034 }
2035 2035 if (checkonly)
2036 2036 return (0);
2037 2037
2038 2038 /* Here we set the actual option value */
2039 2039 mutex_enter(&connp->conn_lock);
2040 2040 switch (name) {
2041 2041 case UDP_ANONPRIVBIND:
2042 2042 connp->conn_anon_priv_bind = onoff;
2043 2043 break;
2044 2044 case UDP_EXCLBIND:
2045 2045 connp->conn_exclbind = onoff;
2046 2046 break;
2047 2047 }
2048 2048 mutex_exit(&connp->conn_lock);
2049 2049 return (0);
2050 2050 }
2051 2051
2052 2052 /* Handle IPPROTO_TCP */
2053 2053 /* ARGSUSED1 */
2054 2054 static int
2055 2055 conn_opt_set_tcp(conn_opt_arg_t *coa, t_scalar_t name, uint_t inlen,
2056 2056 uchar_t *invalp, boolean_t checkonly, cred_t *cr)
2057 2057 {
2058 2058 conn_t *connp = coa->coa_connp;
2059 2059 int *i1 = (int *)invalp;
2060 2060 boolean_t onoff = (*i1 == 0) ? 0 : 1;
2061 2061 int error;
2062 2062
2063 2063 switch (name) {
2064 2064 case TCP_ANONPRIVBIND:
2065 2065 if ((error = secpolicy_net_privaddr(cr, 0, IPPROTO_TCP)) != 0) {
2066 2066 return (error);
2067 2067 }
2068 2068 break;
2069 2069 }
2070 2070 if (checkonly)
2071 2071 return (0);
2072 2072
2073 2073 /* Here we set the actual option value */
2074 2074 mutex_enter(&connp->conn_lock);
2075 2075 switch (name) {
2076 2076 case TCP_ANONPRIVBIND:
2077 2077 connp->conn_anon_priv_bind = onoff;
2078 2078 break;
2079 2079 case TCP_EXCLBIND:
2080 2080 connp->conn_exclbind = onoff;
2081 2081 break;
2082 2082 case TCP_RECVDSTADDR:
2083 2083 connp->conn_recv_ancillary.crb_recvdstaddr = onoff;
2084 2084 break;
2085 2085 }
2086 2086 mutex_exit(&connp->conn_lock);
2087 2087 return (0);
2088 2088 }
2089 2089
2090 2090 int
2091 2091 conn_getsockname(conn_t *connp, struct sockaddr *sa, uint_t *salenp)
2092 2092 {
2093 2093 sin_t *sin;
2094 2094 sin6_t *sin6;
2095 2095
2096 2096 if (connp->conn_family == AF_INET) {
2097 2097 if (*salenp < sizeof (sin_t))
2098 2098 return (EINVAL);
2099 2099
2100 2100 *salenp = sizeof (sin_t);
2101 2101 /* Fill zeroes and then initialize non-zero fields */
2102 2102 sin = (sin_t *)sa;
2103 2103 *sin = sin_null;
2104 2104 sin->sin_family = AF_INET;
2105 2105 if (!IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_saddr_v6) &&
2106 2106 !IN6_IS_ADDR_UNSPECIFIED(&connp->conn_saddr_v6)) {
2107 2107 sin->sin_addr.s_addr = connp->conn_saddr_v4;
2108 2108 } else {
2109 2109 /*
2110 2110 * INADDR_ANY
2111 2111 * conn_saddr is not set, we might be bound to
2112 2112 * broadcast/multicast. Use conn_bound_addr as
2113 2113 * local address instead (that could
2114 2114 * also still be INADDR_ANY)
2115 2115 */
2116 2116 sin->sin_addr.s_addr = connp->conn_bound_addr_v4;
2117 2117 }
2118 2118 sin->sin_port = connp->conn_lport;
2119 2119 } else {
2120 2120 if (*salenp < sizeof (sin6_t))
2121 2121 return (EINVAL);
2122 2122
2123 2123 *salenp = sizeof (sin6_t);
2124 2124 /* Fill zeroes and then initialize non-zero fields */
2125 2125 sin6 = (sin6_t *)sa;
2126 2126 *sin6 = sin6_null;
2127 2127 sin6->sin6_family = AF_INET6;
2128 2128 if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_saddr_v6)) {
2129 2129 sin6->sin6_addr = connp->conn_saddr_v6;
2130 2130 } else {
2131 2131 /*
2132 2132 * conn_saddr is not set, we might be bound to
2133 2133 * broadcast/multicast. Use conn_bound_addr as
2134 2134 * local address instead (which could
2135 2135 * also still be unspecified)
2136 2136 */
2137 2137 sin6->sin6_addr = connp->conn_bound_addr_v6;
2138 2138 }
2139 2139 sin6->sin6_port = connp->conn_lport;
2140 2140 if (IN6_IS_ADDR_LINKSCOPE(&sin6->sin6_addr) &&
2141 2141 (connp->conn_ixa->ixa_flags & IXAF_SCOPEID_SET))
2142 2142 sin6->sin6_scope_id = connp->conn_ixa->ixa_scopeid;
2143 2143 }
2144 2144 return (0);
2145 2145 }
2146 2146
2147 2147 int
2148 2148 conn_getpeername(conn_t *connp, struct sockaddr *sa, uint_t *salenp)
2149 2149 {
2150 2150 struct sockaddr_in *sin;
2151 2151 struct sockaddr_in6 *sin6;
2152 2152
2153 2153 if (connp->conn_family == AF_INET) {
2154 2154 if (*salenp < sizeof (sin_t))
2155 2155 return (EINVAL);
2156 2156
2157 2157 *salenp = sizeof (sin_t);
2158 2158 /* initialize */
2159 2159 sin = (sin_t *)sa;
2160 2160 *sin = sin_null;
2161 2161 sin->sin_family = AF_INET;
2162 2162 sin->sin_addr.s_addr = connp->conn_faddr_v4;
2163 2163 sin->sin_port = connp->conn_fport;
2164 2164 } else {
2165 2165 if (*salenp < sizeof (sin6_t))
2166 2166 return (EINVAL);
2167 2167
2168 2168 *salenp = sizeof (sin6_t);
2169 2169 /* initialize */
2170 2170 sin6 = (sin6_t *)sa;
2171 2171 *sin6 = sin6_null;
2172 2172 sin6->sin6_family = AF_INET6;
2173 2173 sin6->sin6_addr = connp->conn_faddr_v6;
2174 2174 sin6->sin6_port = connp->conn_fport;
2175 2175 sin6->sin6_flowinfo = connp->conn_flowinfo;
2176 2176 if (IN6_IS_ADDR_LINKSCOPE(&sin6->sin6_addr) &&
2177 2177 (connp->conn_ixa->ixa_flags & IXAF_SCOPEID_SET))
2178 2178 sin6->sin6_scope_id = connp->conn_ixa->ixa_scopeid;
2179 2179 }
2180 2180 return (0);
2181 2181 }
2182 2182
2183 2183 static uint32_t cksum_massage_options_v4(ipha_t *, netstack_t *);
2184 2184 static uint32_t cksum_massage_options_v6(ip6_t *, uint_t, netstack_t *);
2185 2185
2186 2186 /*
2187 2187 * Allocate and fill in conn_ht_iphc based on the current information
2188 2188 * in the conn.
2189 2189 * Normally used when we bind() and connect().
2190 2190 * Returns failure if can't allocate memory, or if there is a problem
2191 2191 * with a routing header/option.
2192 2192 *
2193 2193 * We allocate space for the transport header (ulp_hdr_len + extra) and
2194 2194 * indicate the offset of the ulp header by setting ixa_ip_hdr_length.
2195 2195 * The extra is there for transports that want some spare room for future
2196 2196 * options. conn_ht_iphc_allocated is what was allocated; conn_ht_iphc_len
2197 2197 * excludes the extra part.
2198 2198 *
2199 2199 * We massage an routing option/header and store the ckecksum difference
2200 2200 * in conn_sum.
2201 2201 *
2202 2202 * Caller needs to update conn_wroff if desired.
2203 2203 */
2204 2204 int
2205 2205 conn_build_hdr_template(conn_t *connp, uint_t ulp_hdr_length, uint_t extra,
2206 2206 const in6_addr_t *v6src, const in6_addr_t *v6dst, uint32_t flowinfo)
2207 2207 {
2208 2208 ip_xmit_attr_t *ixa = connp->conn_ixa;
2209 2209 ip_pkt_t *ipp = &connp->conn_xmit_ipp;
2210 2210 uint_t ip_hdr_length;
2211 2211 uchar_t *hdrs;
2212 2212 uint_t hdrs_len;
2213 2213
2214 2214 ASSERT(MUTEX_HELD(&connp->conn_lock));
2215 2215
2216 2216 if (ixa->ixa_flags & IXAF_IS_IPV4) {
2217 2217 ip_hdr_length = ip_total_hdrs_len_v4(ipp);
2218 2218 /* In case of TX label and IP options it can be too much */
2219 2219 if (ip_hdr_length > IP_MAX_HDR_LENGTH) {
2220 2220 /* Preserves existing TX errno for this */
2221 2221 return (EHOSTUNREACH);
2222 2222 }
2223 2223 } else {
2224 2224 ip_hdr_length = ip_total_hdrs_len_v6(ipp);
2225 2225 }
2226 2226 ixa->ixa_ip_hdr_length = ip_hdr_length;
2227 2227 hdrs_len = ip_hdr_length + ulp_hdr_length + extra;
2228 2228 ASSERT(hdrs_len != 0);
2229 2229
2230 2230 if (hdrs_len != connp->conn_ht_iphc_allocated) {
2231 2231 /* Allocate new before we free any old */
2232 2232 hdrs = kmem_alloc(hdrs_len, KM_NOSLEEP);
2233 2233 if (hdrs == NULL)
2234 2234 return (ENOMEM);
2235 2235
2236 2236 if (connp->conn_ht_iphc != NULL) {
2237 2237 kmem_free(connp->conn_ht_iphc,
2238 2238 connp->conn_ht_iphc_allocated);
2239 2239 }
2240 2240 connp->conn_ht_iphc = hdrs;
2241 2241 connp->conn_ht_iphc_allocated = hdrs_len;
2242 2242 } else {
2243 2243 hdrs = connp->conn_ht_iphc;
2244 2244 }
2245 2245 hdrs_len -= extra;
2246 2246 connp->conn_ht_iphc_len = hdrs_len;
2247 2247
2248 2248 connp->conn_ht_ulp = hdrs + ip_hdr_length;
2249 2249 connp->conn_ht_ulp_len = ulp_hdr_length;
2250 2250
2251 2251 if (ixa->ixa_flags & IXAF_IS_IPV4) {
2252 2252 ipha_t *ipha = (ipha_t *)hdrs;
2253 2253
2254 2254 IN6_V4MAPPED_TO_IPADDR(v6src, ipha->ipha_src);
2255 2255 IN6_V4MAPPED_TO_IPADDR(v6dst, ipha->ipha_dst);
2256 2256 ip_build_hdrs_v4(hdrs, ip_hdr_length, ipp, connp->conn_proto);
2257 2257 ipha->ipha_length = htons(hdrs_len);
2258 2258 if (ixa->ixa_flags & IXAF_PMTU_IPV4_DF)
2259 2259 ipha->ipha_fragment_offset_and_flags |= IPH_DF_HTONS;
2260 2260 else
2261 2261 ipha->ipha_fragment_offset_and_flags &= ~IPH_DF_HTONS;
2262 2262
2263 2263 if (ipp->ipp_fields & IPPF_IPV4_OPTIONS) {
2264 2264 connp->conn_sum = cksum_massage_options_v4(ipha,
2265 2265 connp->conn_netstack);
2266 2266 } else {
2267 2267 connp->conn_sum = 0;
2268 2268 }
2269 2269 } else {
2270 2270 ip6_t *ip6h = (ip6_t *)hdrs;
2271 2271
2272 2272 ip6h->ip6_src = *v6src;
2273 2273 ip6h->ip6_dst = *v6dst;
2274 2274 ip_build_hdrs_v6(hdrs, ip_hdr_length, ipp, connp->conn_proto,
2275 2275 flowinfo);
2276 2276 ip6h->ip6_plen = htons(hdrs_len - IPV6_HDR_LEN);
2277 2277
2278 2278 if (ipp->ipp_fields & IPPF_RTHDR) {
2279 2279 connp->conn_sum = cksum_massage_options_v6(ip6h,
2280 2280 ip_hdr_length, connp->conn_netstack);
2281 2281
2282 2282 /*
2283 2283 * Verify that the first hop isn't a mapped address.
2284 2284 * Routers along the path need to do this verification
2285 2285 * for subsequent hops.
2286 2286 */
2287 2287 if (IN6_IS_ADDR_V4MAPPED(&ip6h->ip6_dst))
2288 2288 return (EADDRNOTAVAIL);
2289 2289
2290 2290 } else {
2291 2291 connp->conn_sum = 0;
2292 2292 }
2293 2293 }
2294 2294 return (0);
2295 2295 }
2296 2296
2297 2297 /*
2298 2298 * Prepend a header template to data_mp based on the ip_pkt_t
2299 2299 * and the passed in source, destination and protocol.
2300 2300 *
2301 2301 * Returns failure if can't allocate memory, in which case data_mp is freed.
2302 2302 * We allocate space for the transport header (ulp_hdr_len) and
2303 2303 * indicate the offset of the ulp header by setting ixa_ip_hdr_length.
2304 2304 *
2305 2305 * We massage an routing option/header and return the ckecksum difference
2306 2306 * in *sump. This is in host byte order.
2307 2307 *
2308 2308 * Caller needs to update conn_wroff if desired.
2309 2309 */
2310 2310 mblk_t *
2311 2311 conn_prepend_hdr(ip_xmit_attr_t *ixa, const ip_pkt_t *ipp,
2312 2312 const in6_addr_t *v6src, const in6_addr_t *v6dst,
2313 2313 uint8_t protocol, uint32_t flowinfo, uint_t ulp_hdr_length, mblk_t *data_mp,
2314 2314 uint_t data_length, uint_t wroff_extra, uint32_t *sump, int *errorp)
2315 2315 {
2316 2316 uint_t ip_hdr_length;
2317 2317 uchar_t *hdrs;
2318 2318 uint_t hdrs_len;
2319 2319 mblk_t *mp;
2320 2320
2321 2321 if (ixa->ixa_flags & IXAF_IS_IPV4) {
2322 2322 ip_hdr_length = ip_total_hdrs_len_v4(ipp);
2323 2323 ASSERT(ip_hdr_length <= IP_MAX_HDR_LENGTH);
2324 2324 } else {
2325 2325 ip_hdr_length = ip_total_hdrs_len_v6(ipp);
2326 2326 }
2327 2327 hdrs_len = ip_hdr_length + ulp_hdr_length;
2328 2328 ASSERT(hdrs_len != 0);
2329 2329
2330 2330 ixa->ixa_ip_hdr_length = ip_hdr_length;
2331 2331
2332 2332 /* Can we prepend to data_mp? */
2333 2333 if (data_mp != NULL &&
2334 2334 data_mp->b_rptr - data_mp->b_datap->db_base >= hdrs_len &&
2335 2335 data_mp->b_datap->db_ref == 1) {
2336 2336 hdrs = data_mp->b_rptr - hdrs_len;
2337 2337 data_mp->b_rptr = hdrs;
2338 2338 mp = data_mp;
2339 2339 } else {
2340 2340 mp = allocb(hdrs_len + wroff_extra, BPRI_MED);
2341 2341 if (mp == NULL) {
2342 2342 freemsg(data_mp);
2343 2343 *errorp = ENOMEM;
2344 2344 return (NULL);
2345 2345 }
2346 2346 mp->b_wptr = mp->b_datap->db_lim;
2347 2347 hdrs = mp->b_rptr = mp->b_wptr - hdrs_len;
2348 2348 mp->b_cont = data_mp;
2349 2349 }
2350 2350
2351 2351 /*
2352 2352 * Set the source in the header. ip_build_hdrs_v4/v6 will overwrite it
2353 2353 * if PKTINFO (aka IPPF_ADDR) was set.
2354 2354 */
2355 2355 if (ixa->ixa_flags & IXAF_IS_IPV4) {
2356 2356 ipha_t *ipha = (ipha_t *)hdrs;
2357 2357
2358 2358 ASSERT(IN6_IS_ADDR_V4MAPPED(v6dst));
2359 2359 IN6_V4MAPPED_TO_IPADDR(v6src, ipha->ipha_src);
2360 2360 IN6_V4MAPPED_TO_IPADDR(v6dst, ipha->ipha_dst);
2361 2361 ip_build_hdrs_v4(hdrs, ip_hdr_length, ipp, protocol);
2362 2362 ipha->ipha_length = htons(hdrs_len + data_length);
2363 2363 if (ixa->ixa_flags & IXAF_PMTU_IPV4_DF)
2364 2364 ipha->ipha_fragment_offset_and_flags |= IPH_DF_HTONS;
2365 2365 else
2366 2366 ipha->ipha_fragment_offset_and_flags &= ~IPH_DF_HTONS;
2367 2367
2368 2368 if (ipp->ipp_fields & IPPF_IPV4_OPTIONS) {
2369 2369 *sump = cksum_massage_options_v4(ipha,
2370 2370 ixa->ixa_ipst->ips_netstack);
2371 2371 } else {
2372 2372 *sump = 0;
2373 2373 }
2374 2374 } else {
2375 2375 ip6_t *ip6h = (ip6_t *)hdrs;
2376 2376
2377 2377 ip6h->ip6_src = *v6src;
2378 2378 ip6h->ip6_dst = *v6dst;
2379 2379 ip_build_hdrs_v6(hdrs, ip_hdr_length, ipp, protocol, flowinfo);
2380 2380 ip6h->ip6_plen = htons(hdrs_len + data_length - IPV6_HDR_LEN);
2381 2381
2382 2382 if (ipp->ipp_fields & IPPF_RTHDR) {
2383 2383 *sump = cksum_massage_options_v6(ip6h,
2384 2384 ip_hdr_length, ixa->ixa_ipst->ips_netstack);
2385 2385
2386 2386 /*
2387 2387 * Verify that the first hop isn't a mapped address.
2388 2388 * Routers along the path need to do this verification
2389 2389 * for subsequent hops.
2390 2390 */
2391 2391 if (IN6_IS_ADDR_V4MAPPED(&ip6h->ip6_dst)) {
2392 2392 *errorp = EADDRNOTAVAIL;
2393 2393 freemsg(mp);
2394 2394 return (NULL);
2395 2395 }
2396 2396 } else {
2397 2397 *sump = 0;
2398 2398 }
2399 2399 }
2400 2400 return (mp);
2401 2401 }
2402 2402
2403 2403 /*
2404 2404 * Massage a source route if any putting the first hop
2405 2405 * in ipha_dst. Compute a starting value for the checksum which
2406 2406 * takes into account that the original ipha_dst should be
2407 2407 * included in the checksum but that IP will include the
2408 2408 * first hop from the source route in the tcp checksum.
2409 2409 */
2410 2410 static uint32_t
2411 2411 cksum_massage_options_v4(ipha_t *ipha, netstack_t *ns)
2412 2412 {
2413 2413 in_addr_t dst;
2414 2414 uint32_t cksum;
2415 2415
2416 2416 /* Get last hop then diff against first hop */
2417 2417 cksum = ip_massage_options(ipha, ns);
2418 2418 cksum = (cksum & 0xFFFF) + (cksum >> 16);
2419 2419 dst = ipha->ipha_dst;
2420 2420 cksum -= ((dst >> 16) + (dst & 0xffff));
2421 2421 if ((int)cksum < 0)
2422 2422 cksum--;
2423 2423 cksum = (cksum & 0xFFFF) + (cksum >> 16);
2424 2424 cksum = (cksum & 0xFFFF) + (cksum >> 16);
2425 2425 ASSERT(cksum < 0x10000);
2426 2426 return (ntohs(cksum));
2427 2427 }
2428 2428
2429 2429 static uint32_t
2430 2430 cksum_massage_options_v6(ip6_t *ip6h, uint_t ip_hdr_len, netstack_t *ns)
2431 2431 {
2432 2432 uint8_t *end;
2433 2433 ip6_rthdr_t *rth;
2434 2434 uint32_t cksum;
2435 2435
2436 2436 end = (uint8_t *)ip6h + ip_hdr_len;
2437 2437 rth = ip_find_rthdr_v6(ip6h, end);
2438 2438 if (rth == NULL)
2439 2439 return (0);
2440 2440
2441 2441 cksum = ip_massage_options_v6(ip6h, rth, ns);
2442 2442 cksum = (cksum & 0xFFFF) + (cksum >> 16);
2443 2443 ASSERT(cksum < 0x10000);
2444 2444 return (ntohs(cksum));
2445 2445 }
2446 2446
2447 2447 /*
2448 2448 * ULPs that change the destination address need to call this for each
2449 2449 * change to discard any state about a previous destination that might
2450 2450 * have been multicast or multirt.
2451 2451 */
2452 2452 void
2453 2453 ip_attr_newdst(ip_xmit_attr_t *ixa)
2454 2454 {
2455 2455 ixa->ixa_flags &= ~(IXAF_LOOPBACK_COPY | IXAF_NO_HW_CKSUM |
2456 2456 IXAF_NO_TTL_CHANGE | IXAF_IPV6_ADD_FRAGHDR |
2457 2457 IXAF_NO_LOOP_ZONEID_SET);
2458 2458 }
2459 2459
2460 2460 /*
2461 2461 * Determine the nexthop which will be used.
2462 2462 * Normally this is just the destination, but if a IPv4 source route, or
2463 2463 * IPv6 routing header, is in the ip_pkt_t then we extract the nexthop from
2464 2464 * there.
2465 2465 */
2466 2466 void
2467 2467 ip_attr_nexthop(const ip_pkt_t *ipp, const ip_xmit_attr_t *ixa,
2468 2468 const in6_addr_t *dst, in6_addr_t *nexthop)
2469 2469 {
2470 2470 if (!(ipp->ipp_fields & (IPPF_IPV4_OPTIONS|IPPF_RTHDR))) {
2471 2471 *nexthop = *dst;
2472 2472 return;
2473 2473 }
2474 2474 if (ixa->ixa_flags & IXAF_IS_IPV4) {
2475 2475 ipaddr_t v4dst;
2476 2476 ipaddr_t v4nexthop;
2477 2477
2478 2478 IN6_V4MAPPED_TO_IPADDR(dst, v4dst);
2479 2479 v4nexthop = ip_pkt_source_route_v4(ipp);
2480 2480 if (v4nexthop == INADDR_ANY)
2481 2481 v4nexthop = v4dst;
2482 2482
2483 2483 IN6_IPADDR_TO_V4MAPPED(v4nexthop, nexthop);
2484 2484 } else {
2485 2485 const in6_addr_t *v6nexthop;
2486 2486
2487 2487 v6nexthop = ip_pkt_source_route_v6(ipp);
2488 2488 if (v6nexthop == NULL)
2489 2489 v6nexthop = dst;
2490 2490
2491 2491 *nexthop = *v6nexthop;
2492 2492 }
2493 2493 }
2494 2494
2495 2495 /*
2496 2496 * Update the ip_xmit_attr_t based the addresses, conn_xmit_ipp and conn_ixa.
2497 2497 * If IPDF_IPSEC is set we cache the IPsec policy to handle the unconnected
2498 2498 * case (connected latching is done in conn_connect).
2499 2499 * Note that IPsec policy lookup requires conn_proto and conn_laddr to be
2500 2500 * set, but doesn't otherwise use the conn_t.
2501 2501 *
2502 2502 * Caller must set/clear IXAF_IS_IPV4 as appropriately.
2503 2503 * Caller must use ip_attr_nexthop() to determine the nexthop argument.
2504 2504 *
2505 2505 * The caller must NOT hold conn_lock (to avoid problems with ill_refrele
2506 2506 * causing the squeue to run doing ipcl_walk grabbing conn_lock.)
2507 2507 *
2508 2508 * Updates laddrp and uinfo if they are non-NULL.
2509 2509 *
2510 2510 * TSOL notes: The callers if ip_attr_connect must check if the destination
2511 2511 * is different than before and in that case redo conn_update_label.
2512 2512 * The callers of conn_connect do not need that since conn_connect
2513 2513 * performs the conn_update_label.
2514 2514 */
2515 2515 int
2516 2516 ip_attr_connect(const conn_t *connp, ip_xmit_attr_t *ixa,
2517 2517 const in6_addr_t *v6src, const in6_addr_t *v6dst,
2518 2518 const in6_addr_t *v6nexthop, in_port_t dstport, in6_addr_t *laddrp,
2519 2519 iulp_t *uinfo, uint32_t flags)
2520 2520 {
2521 2521 in6_addr_t laddr = *v6src;
2522 2522 int error;
2523 2523
2524 2524 ASSERT(MUTEX_NOT_HELD(&connp->conn_lock));
2525 2525
2526 2526 if (connp->conn_zone_is_global)
2527 2527 flags |= IPDF_ZONE_IS_GLOBAL;
2528 2528 else
2529 2529 flags &= ~IPDF_ZONE_IS_GLOBAL;
2530 2530
2531 2531 /*
2532 2532 * Lookup the route to determine a source address and the uinfo.
2533 2533 * If the ULP has a source route option then the caller will
2534 2534 * have set v6nexthop to be the first hop.
2535 2535 */
2536 2536 if (ixa->ixa_flags & IXAF_IS_IPV4) {
2537 2537 ipaddr_t v4dst;
2538 2538 ipaddr_t v4src, v4nexthop;
2539 2539
2540 2540 IN6_V4MAPPED_TO_IPADDR(v6dst, v4dst);
2541 2541 IN6_V4MAPPED_TO_IPADDR(v6nexthop, v4nexthop);
2542 2542 IN6_V4MAPPED_TO_IPADDR(v6src, v4src);
2543 2543
2544 2544 if (connp->conn_unspec_src || v4src != INADDR_ANY)
2545 2545 flags &= ~IPDF_SELECT_SRC;
2546 2546 else
2547 2547 flags |= IPDF_SELECT_SRC;
2548 2548
2549 2549 error = ip_set_destination_v4(&v4src, v4dst, v4nexthop, ixa,
2550 2550 uinfo, flags, connp->conn_mac_mode);
2551 2551 IN6_IPADDR_TO_V4MAPPED(v4src, &laddr);
2552 2552 } else {
2553 2553 if (connp->conn_unspec_src || !IN6_IS_ADDR_UNSPECIFIED(v6src))
2554 2554 flags &= ~IPDF_SELECT_SRC;
2555 2555 else
2556 2556 flags |= IPDF_SELECT_SRC;
2557 2557
2558 2558 error = ip_set_destination_v6(&laddr, v6dst, v6nexthop, ixa,
2559 2559 uinfo, flags, connp->conn_mac_mode);
2560 2560 }
2561 2561 /* Pass out some address even if we hit a RTF_REJECT etc */
2562 2562 if (laddrp != NULL)
2563 2563 *laddrp = laddr;
2564 2564
2565 2565 if (error != 0)
2566 2566 return (error);
2567 2567
2568 2568 if (flags & IPDF_IPSEC) {
2569 2569 /*
2570 2570 * Set any IPsec policy in ixa. Routine also looks at ULP
2571 2571 * ports.
2572 2572 */
2573 2573 ipsec_cache_outbound_policy(connp, v6src, v6dst, dstport, ixa);
2574 2574 }
2575 2575 return (0);
2576 2576 }
2577 2577
2578 2578 /*
2579 2579 * Connect the conn based on the addresses, conn_xmit_ipp and conn_ixa.
2580 2580 * Assumes that conn_faddr and conn_fport are already set. As such it is not
2581 2581 * usable for SCTP, since SCTP has multiple faddrs.
2582 2582 *
2583 2583 * Caller must hold conn_lock to provide atomic constency between the
2584 2584 * conn_t's addresses and the ixa.
2585 2585 * NOTE: this function drops and reaquires conn_lock since it can't be
2586 2586 * held across ip_attr_connect/ip_set_destination.
2587 2587 *
2588 2588 * The caller needs to handle inserting in the receive-side fanout when
2589 2589 * appropriate after conn_connect returns.
2590 2590 */
2591 2591 int
2592 2592 conn_connect(conn_t *connp, iulp_t *uinfo, uint32_t flags)
2593 2593 {
2594 2594 ip_xmit_attr_t *ixa = connp->conn_ixa;
2595 2595 in6_addr_t nexthop;
2596 2596 in6_addr_t saddr, faddr;
2597 2597 in_port_t fport;
2598 2598 int error;
2599 2599
2600 2600 ASSERT(MUTEX_HELD(&connp->conn_lock));
2601 2601
2602 2602 if (connp->conn_ipversion == IPV4_VERSION)
2603 2603 ixa->ixa_flags |= IXAF_IS_IPV4;
2604 2604 else
2605 2605 ixa->ixa_flags &= ~IXAF_IS_IPV4;
2606 2606
2607 2607 /* We do IPsec latching below - hence no caching in ip_attr_connect */
2608 2608 flags &= ~IPDF_IPSEC;
2609 2609
2610 2610 /* In case we had previously done an ip_attr_connect */
2611 2611 ip_attr_newdst(ixa);
2612 2612
2613 2613 /*
2614 2614 * Determine the nexthop and copy the addresses before dropping
2615 2615 * conn_lock.
2616 2616 */
2617 2617 ip_attr_nexthop(&connp->conn_xmit_ipp, connp->conn_ixa,
2618 2618 &connp->conn_faddr_v6, &nexthop);
2619 2619 saddr = connp->conn_saddr_v6;
2620 2620 faddr = connp->conn_faddr_v6;
2621 2621 fport = connp->conn_fport;
2622 2622
2623 2623 mutex_exit(&connp->conn_lock);
2624 2624 error = ip_attr_connect(connp, ixa, &saddr, &faddr, &nexthop, fport,
2625 2625 &saddr, uinfo, flags | IPDF_VERIFY_DST);
2626 2626 mutex_enter(&connp->conn_lock);
2627 2627
2628 2628 /* Could have changed even if an error */
2629 2629 connp->conn_saddr_v6 = saddr;
2630 2630 if (error != 0)
2631 2631 return (error);
2632 2632
2633 2633 /*
2634 2634 * Check whether Trusted Solaris policy allows communication with this
2635 2635 * host, and pretend that the destination is unreachable if not.
2636 2636 * Compute any needed label and place it in ipp_label_v4/v6.
2637 2637 *
2638 2638 * Later conn_build_hdr_template() takes ipp_label_v4/v6 to form
2639 2639 * the packet.
2640 2640 *
2641 2641 * TSOL Note: Any concurrent threads would pick a different ixa
2642 2642 * (and ipp if they are to change the ipp) so we
2643 2643 * don't have to worry about concurrent threads.
2644 2644 */
2645 2645 if (is_system_labeled()) {
2646 2646 if (connp->conn_mlp_type != mlptSingle)
2647 2647 return (ECONNREFUSED);
2648 2648
2649 2649 /*
2650 2650 * conn_update_label will set ipp_label* which will later
2651 2651 * be used by conn_build_hdr_template.
2652 2652 */
2653 2653 error = conn_update_label(connp, ixa,
2654 2654 &connp->conn_faddr_v6, &connp->conn_xmit_ipp);
2655 2655 if (error != 0)
2656 2656 return (error);
2657 2657 }
2658 2658
2659 2659 /*
2660 2660 * Ensure that we match on the selected local address.
2661 2661 * This overrides conn_laddr in the case we had earlier bound to a
2662 2662 * multicast or broadcast address.
2663 2663 */
2664 2664 connp->conn_laddr_v6 = connp->conn_saddr_v6;
2665 2665
2666 2666 /*
2667 2667 * Allow setting new policies.
2668 2668 * The addresses/ports are already set, thus the IPsec policy calls
2669 2669 * can handle their passed-in conn's.
2670 2670 */
2671 2671 connp->conn_policy_cached = B_FALSE;
2672 2672
2673 2673 /*
2674 2674 * Cache IPsec policy in this conn. If we have per-socket policy,
2675 2675 * we'll cache that. If we don't, we'll inherit global policy.
2676 2676 *
2677 2677 * This is done before the caller inserts in the receive-side fanout.
2678 2678 * Note that conn_policy_cached is set by ipsec_conn_cache_policy() even
2679 2679 * for connections where we don't have a policy. This is to prevent
2680 2680 * global policy lookups in the inbound path.
2681 2681 *
2682 2682 * If we insert before we set conn_policy_cached,
2683 2683 * CONN_INBOUND_POLICY_PRESENT() check can still evaluate true
2684 2684 * because global policy cound be non-empty. We normally call
2685 2685 * ipsec_check_policy() for conn_policy_cached connections only if
2686 2686 * conn_in_enforce_policy is set. But in this case,
2687 2687 * conn_policy_cached can get set anytime since we made the
2688 2688 * CONN_INBOUND_POLICY_PRESENT() check and ipsec_check_policy() is
2689 2689 * called, which will make the above assumption false. Thus, we
2690 2690 * need to insert after we set conn_policy_cached.
2691 2691 */
2692 2692 error = ipsec_conn_cache_policy(connp,
2693 2693 connp->conn_ipversion == IPV4_VERSION);
2694 2694 if (error != 0)
2695 2695 return (error);
2696 2696
2697 2697 /*
2698 2698 * We defer to do LSO check until here since now we have better idea
2699 2699 * whether IPsec is present. If the underlying ill is LSO capable,
2700 2700 * copy its capability in so the ULP can decide whether to enable LSO
2701 2701 * on this connection. So far, only TCP/IPv4 is implemented, so won't
2702 2702 * claim LSO for IPv6.
2703 2703 *
2704 2704 * Currently, won't enable LSO for IRE_LOOPBACK or IRE_LOCAL, because
2705 2705 * the receiver can not handle it. Also not to enable LSO for MULTIRT.
2706 2706 */
2707 2707 ixa->ixa_flags &= ~IXAF_LSO_CAPAB;
2708 2708
2709 2709 ASSERT(ixa->ixa_ire != NULL);
2710 2710 if (ixa->ixa_ipst->ips_ip_lso_outbound && (flags & IPDF_LSO) &&
2711 2711 !(ixa->ixa_flags & IXAF_IPSEC_SECURE) &&
2712 2712 !(ixa->ixa_ire->ire_type & (IRE_LOCAL | IRE_LOOPBACK)) &&
2713 2713 !(ixa->ixa_ire->ire_flags & RTF_MULTIRT) &&
2714 2714 (ixa->ixa_nce != NULL) &&
2715 2715 ((ixa->ixa_flags & IXAF_IS_IPV4) ?
2716 2716 ILL_LSO_TCP_IPV4_USABLE(ixa->ixa_nce->nce_ill) :
2717 2717 ILL_LSO_TCP_IPV6_USABLE(ixa->ixa_nce->nce_ill))) {
2718 2718 ixa->ixa_lso_capab = *ixa->ixa_nce->nce_ill->ill_lso_capab;
2719 2719 ixa->ixa_flags |= IXAF_LSO_CAPAB;
2720 2720 }
2721 2721
2722 2722 /* Check whether ZEROCOPY capability is usable for this connection. */
2723 2723 ixa->ixa_flags &= ~IXAF_ZCOPY_CAPAB;
2724 2724
2725 2725 if ((flags & IPDF_ZCOPY) &&
2726 2726 !(ixa->ixa_flags & IXAF_IPSEC_SECURE) &&
2727 2727 !(ixa->ixa_ire->ire_type & (IRE_LOCAL | IRE_LOOPBACK)) &&
2728 2728 !(ixa->ixa_ire->ire_flags & RTF_MULTIRT) &&
2729 2729 (ixa->ixa_nce != NULL) &&
2730 2730 ILL_ZCOPY_USABLE(ixa->ixa_nce->nce_ill)) {
2731 2731 ixa->ixa_flags |= IXAF_ZCOPY_CAPAB;
2732 2732 }
2733 2733 return (0);
2734 2734 }
2735 2735
2736 2736 /*
2737 2737 * Predicates to check if the addresses match conn_last*
2738 2738 */
2739 2739
2740 2740 /*
2741 2741 * Compare the conn against an address.
2742 2742 * If using mapped addresses on AF_INET6 sockets, use the _v6 function
2743 2743 */
2744 2744 boolean_t
2745 2745 conn_same_as_last_v4(conn_t *connp, sin_t *sin)
2746 2746 {
2747 2747 ASSERT(connp->conn_family == AF_INET);
2748 2748 return (sin->sin_addr.s_addr == connp->conn_v4lastdst &&
2749 2749 sin->sin_port == connp->conn_lastdstport);
2750 2750 }
2751 2751
2752 2752 /*
2753 2753 * Compare, including for mapped addresses
2754 2754 */
2755 2755 boolean_t
2756 2756 conn_same_as_last_v6(conn_t *connp, sin6_t *sin6)
2757 2757 {
2758 2758 return (IN6_ARE_ADDR_EQUAL(&connp->conn_v6lastdst, &sin6->sin6_addr) &&
2759 2759 sin6->sin6_port == connp->conn_lastdstport &&
2760 2760 sin6->sin6_flowinfo == connp->conn_lastflowinfo &&
2761 2761 sin6->sin6_scope_id == connp->conn_lastscopeid);
2762 2762 }
2763 2763
2764 2764 /*
2765 2765 * Compute a label and place it in the ip_packet_t.
2766 2766 * Handles IPv4 and IPv6.
2767 2767 * The caller should have a correct ixa_tsl and ixa_zoneid and have
2768 2768 * already called conn_connect or ip_attr_connect to ensure that tsol_check_dest
2769 2769 * has been called.
2770 2770 */
2771 2771 int
2772 2772 conn_update_label(const conn_t *connp, const ip_xmit_attr_t *ixa,
2773 2773 const in6_addr_t *v6dst, ip_pkt_t *ipp)
2774 2774 {
2775 2775 int err;
2776 2776 ipaddr_t v4dst;
2777 2777
2778 2778 if (IN6_IS_ADDR_V4MAPPED(v6dst)) {
2779 2779 uchar_t opt_storage[IP_MAX_OPT_LENGTH];
2780 2780
2781 2781 IN6_V4MAPPED_TO_IPADDR(v6dst, v4dst);
2782 2782
2783 2783 err = tsol_compute_label_v4(ixa->ixa_tsl, ixa->ixa_zoneid,
2784 2784 v4dst, opt_storage, ixa->ixa_ipst);
2785 2785 if (err == 0) {
2786 2786 /* Length contained in opt_storage[IPOPT_OLEN] */
2787 2787 err = optcom_pkt_set(opt_storage,
2788 2788 opt_storage[IPOPT_OLEN],
2789 2789 (uchar_t **)&ipp->ipp_label_v4,
2790 2790 &ipp->ipp_label_len_v4);
2791 2791 }
2792 2792 if (err != 0) {
2793 2793 DTRACE_PROBE4(tx__ip__log__info__updatelabel,
2794 2794 char *, "conn(1) failed to update options(2) "
2795 2795 "on ixa(3)",
2796 2796 conn_t *, connp, char *, opt_storage,
2797 2797 ip_xmit_attr_t *, ixa);
2798 2798 }
2799 2799 if (ipp->ipp_label_len_v4 != 0)
2800 2800 ipp->ipp_fields |= IPPF_LABEL_V4;
2801 2801 else
2802 2802 ipp->ipp_fields &= ~IPPF_LABEL_V4;
2803 2803 } else {
2804 2804 uchar_t opt_storage[TSOL_MAX_IPV6_OPTION];
2805 2805 uint_t optlen;
2806 2806
2807 2807 err = tsol_compute_label_v6(ixa->ixa_tsl, ixa->ixa_zoneid,
2808 2808 v6dst, opt_storage, ixa->ixa_ipst);
2809 2809 if (err == 0) {
2810 2810 /*
2811 2811 * Note that ipp_label_v6 is just the option - not
2812 2812 * the hopopts extension header.
2813 2813 *
2814 2814 * Length contained in opt_storage[IPOPT_OLEN], but
2815 2815 * that doesn't include the two byte options header.
2816 2816 */
2817 2817 optlen = opt_storage[IPOPT_OLEN];
2818 2818 if (optlen != 0)
2819 2819 optlen += 2;
2820 2820
2821 2821 err = optcom_pkt_set(opt_storage, optlen,
2822 2822 (uchar_t **)&ipp->ipp_label_v6,
2823 2823 &ipp->ipp_label_len_v6);
2824 2824 }
2825 2825 if (err != 0) {
2826 2826 DTRACE_PROBE4(tx__ip__log__info__updatelabel,
2827 2827 char *, "conn(1) failed to update options(2) "
2828 2828 "on ixa(3)",
2829 2829 conn_t *, connp, char *, opt_storage,
2830 2830 ip_xmit_attr_t *, ixa);
2831 2831 }
2832 2832 if (ipp->ipp_label_len_v6 != 0)
2833 2833 ipp->ipp_fields |= IPPF_LABEL_V6;
2834 2834 else
2835 2835 ipp->ipp_fields &= ~IPPF_LABEL_V6;
2836 2836 }
2837 2837 return (err);
2838 2838 }
2839 2839
2840 2840 /*
2841 2841 * Inherit all options settings from the parent/listener to the eager.
2842 2842 * Returns zero on success; ENOMEM if memory allocation failed.
2843 2843 *
2844 2844 * We assume that the eager has not had any work done i.e., the conn_ixa
2845 2845 * and conn_xmit_ipp are all zero.
2846 2846 * Furthermore we assume that no other thread can access the eager (because
2847 2847 * it isn't inserted in any fanout list).
2848 2848 */
2849 2849 int
2850 2850 conn_inherit_parent(conn_t *lconnp, conn_t *econnp)
2851 2851 {
2852 2852 cred_t *credp;
2853 2853 int err;
2854 2854 void *notify_cookie;
2855 2855 uint32_t xmit_hint;
2856 2856
2857 2857 econnp->conn_family = lconnp->conn_family;
2858 2858 econnp->conn_ipv6_v6only = lconnp->conn_ipv6_v6only;
2859 2859 econnp->conn_wq = lconnp->conn_wq;
2860 2860 econnp->conn_rq = lconnp->conn_rq;
2861 2861
2862 2862 /*
2863 2863 * Make a safe copy of the transmit attributes.
2864 2864 * conn_connect will later be used by the caller to setup the ire etc.
2865 2865 */
2866 2866 ASSERT(econnp->conn_ixa->ixa_refcnt == 1);
2867 2867 ASSERT(econnp->conn_ixa->ixa_ire == NULL);
2868 2868 ASSERT(econnp->conn_ixa->ixa_dce == NULL);
2869 2869 ASSERT(econnp->conn_ixa->ixa_nce == NULL);
2870 2870
2871 2871 /* Preserve ixa_notify_cookie and xmit_hint */
2872 2872 notify_cookie = econnp->conn_ixa->ixa_notify_cookie;
2873 2873 xmit_hint = econnp->conn_ixa->ixa_xmit_hint;
2874 2874 ixa_safe_copy(lconnp->conn_ixa, econnp->conn_ixa);
2875 2875 econnp->conn_ixa->ixa_notify_cookie = notify_cookie;
2876 2876 econnp->conn_ixa->ixa_xmit_hint = xmit_hint;
2877 2877
2878 2878 econnp->conn_bound_if = lconnp->conn_bound_if;
2879 2879 econnp->conn_incoming_ifindex = lconnp->conn_incoming_ifindex;
2880 2880
2881 2881 /* Inherit all RECV options */
2882 2882 econnp->conn_recv_ancillary = lconnp->conn_recv_ancillary;
2883 2883
2884 2884 err = ip_pkt_copy(&lconnp->conn_xmit_ipp, &econnp->conn_xmit_ipp,
2885 2885 KM_NOSLEEP);
2886 2886 if (err != 0)
2887 2887 return (err);
2888 2888
2889 2889 econnp->conn_zoneid = lconnp->conn_zoneid;
2890 2890 econnp->conn_allzones = lconnp->conn_allzones;
2891 2891
2892 2892 /* This is odd. Pick a flowlabel for each connection instead? */
2893 2893 econnp->conn_flowinfo = lconnp->conn_flowinfo;
2894 2894
2895 2895 econnp->conn_default_ttl = lconnp->conn_default_ttl;
2896 2896
2897 2897 /*
2898 2898 * TSOL: tsol_input_proc() needs the eager's cred before the
2899 2899 * eager is accepted
2900 2900 */
2901 2901 ASSERT(lconnp->conn_cred != NULL);
2902 2902 econnp->conn_cred = credp = lconnp->conn_cred;
2903 2903 crhold(credp);
2904 2904 econnp->conn_cpid = lconnp->conn_cpid;
2905 2905 econnp->conn_open_time = ddi_get_lbolt64();
2906 2906
2907 2907 /*
2908 2908 * Cache things in the ixa without any refhold.
2909 2909 * Listener might not have set up ixa_cred
2910 2910 */
2911 2911 ASSERT(!(econnp->conn_ixa->ixa_free_flags & IXA_FREE_CRED));
2912 2912 econnp->conn_ixa->ixa_cred = econnp->conn_cred;
2913 2913 econnp->conn_ixa->ixa_cpid = econnp->conn_cpid;
2914 2914 if (is_system_labeled())
2915 2915 econnp->conn_ixa->ixa_tsl = crgetlabel(econnp->conn_cred);
2916 2916
2917 2917 /*
2918 2918 * If the caller has the process-wide flag set, then default to MAC
2919 2919 * exempt mode. This allows read-down to unlabeled hosts.
2920 2920 */
2921 2921 if (getpflags(NET_MAC_AWARE, credp) != 0)
2922 2922 econnp->conn_mac_mode = CONN_MAC_AWARE;
2923 2923
2924 2924 econnp->conn_zone_is_global = lconnp->conn_zone_is_global;
2925 2925
2926 2926 /*
2927 2927 * We eliminate the need for sockfs to send down a T_SVR4_OPTMGMT_REQ
2928 2928 * via soaccept()->soinheritoptions() which essentially applies
2929 2929 * all the listener options to the new connection. The options that we
2930 2930 * need to take care of are:
2931 2931 * SO_DEBUG, SO_REUSEADDR, SO_KEEPALIVE, SO_DONTROUTE, SO_BROADCAST,
2932 2932 * SO_USELOOPBACK, SO_OOBINLINE, SO_DGRAM_ERRIND, SO_LINGER,
2933 2933 * SO_SNDBUF, SO_RCVBUF.
2934 2934 *
2935 2935 * SO_RCVBUF: conn_rcvbuf is set.
2936 2936 * SO_SNDBUF: conn_sndbuf is set.
2937 2937 */
2938 2938
2939 2939 /* Could we define a struct and use a struct copy for this? */
2940 2940 econnp->conn_sndbuf = lconnp->conn_sndbuf;
2941 2941 econnp->conn_rcvbuf = lconnp->conn_rcvbuf;
2942 2942 econnp->conn_sndlowat = lconnp->conn_sndlowat;
2943 2943 econnp->conn_rcvlowat = lconnp->conn_rcvlowat;
2944 2944 econnp->conn_dgram_errind = lconnp->conn_dgram_errind;
2945 2945 econnp->conn_oobinline = lconnp->conn_oobinline;
2946 2946 econnp->conn_debug = lconnp->conn_debug;
2947 2947 econnp->conn_keepalive = lconnp->conn_keepalive;
2948 2948 econnp->conn_linger = lconnp->conn_linger;
2949 2949 econnp->conn_lingertime = lconnp->conn_lingertime;
2950 2950
2951 2951 /* Set the IP options */
2952 2952 econnp->conn_broadcast = lconnp->conn_broadcast;
2953 2953 econnp->conn_useloopback = lconnp->conn_useloopback;
2954 2954 econnp->conn_reuseaddr = lconnp->conn_reuseaddr;
2955 2955 return (0);
2956 2956 }
|
↓ open down ↓ |
2956 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX