1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
24 * Copyright 2016 Joyent, Inc.
25 */
26 /* Copyright (c) 1990 Mentat Inc. */
27
28 #include <sys/types.h>
29 #include <sys/stream.h>
30 #include <sys/strsun.h>
31 #define _SUN_TPI_VERSION 2
32 #include <sys/tihdr.h>
33 #include <sys/xti_inet.h>
34 #include <sys/ucred.h>
35 #include <sys/zone.h>
36 #include <sys/ddi.h>
37 #include <sys/sunddi.h>
38 #include <sys/cmn_err.h>
39 #include <sys/debug.h>
40 #include <sys/atomic.h>
41 #include <sys/policy.h>
42
43 #include <sys/systm.h>
44 #include <sys/param.h>
45 #include <sys/kmem.h>
46 #include <sys/sdt.h>
47 #include <sys/socket.h>
48 #include <sys/ethernet.h>
49 #include <sys/mac.h>
50 #include <net/if.h>
51 #include <net/if_types.h>
52 #include <net/if_arp.h>
53 #include <net/route.h>
54 #include <sys/sockio.h>
55 #include <netinet/in.h>
56 #include <net/if_dl.h>
57
58 #include <inet/common.h>
59 #include <inet/mi.h>
60 #include <inet/mib2.h>
61 #include <inet/nd.h>
62 #include <inet/arp.h>
63 #include <inet/snmpcom.h>
64 #include <inet/kstatcom.h>
65
66 #include <netinet/igmp_var.h>
67 #include <netinet/ip6.h>
68 #include <netinet/icmp6.h>
69 #include <netinet/sctp.h>
70
71 #include <inet/ip.h>
72 #include <inet/ip_impl.h>
73 #include <inet/ip6.h>
74 #include <inet/ip6_asp.h>
75 #include <inet/tcp.h>
76 #include <inet/ip_multi.h>
77 #include <inet/ip_if.h>
78 #include <inet/ip_ire.h>
79 #include <inet/ip_ftable.h>
80 #include <inet/ip_rts.h>
81 #include <inet/optcom.h>
82 #include <inet/ip_ndp.h>
83 #include <inet/ip_listutils.h>
84 #include <netinet/igmp.h>
85 #include <netinet/ip_mroute.h>
86 #include <netinet/udp.h>
87 #include <inet/ipp_common.h>
88
89 #include <net/pfkeyv2.h>
90 #include <inet/sadb.h>
91 #include <inet/ipsec_impl.h>
92 #include <inet/ipdrop.h>
93 #include <inet/ip_netinfo.h>
94
95 #include <inet/ipclassifier.h>
96 #include <inet/sctp_ip.h>
97 #include <inet/sctp/sctp_impl.h>
98 #include <inet/udp_impl.h>
99 #include <sys/sunddi.h>
100
101 #include <sys/tsol/label.h>
102 #include <sys/tsol/tnet.h>
103
104 /*
105 * Return how much size is needed for the different ancillary data items
106 */
107 uint_t
108 conn_recvancillary_size(conn_t *connp, crb_t recv_ancillary,
109 ip_recv_attr_t *ira, mblk_t *mp, ip_pkt_t *ipp)
110 {
111 uint_t ancil_size;
112 ip_stack_t *ipst = connp->conn_netstack->netstack_ip;
113
114 /*
115 * If IP_RECVDSTADDR is set we include the destination IP
116 * address as an option. With IP_RECVOPTS we include all
117 * the IP options.
118 */
119 ancil_size = 0;
120 if (recv_ancillary.crb_recvdstaddr &&
121 (ira->ira_flags & IRAF_IS_IPV4)) {
122 ancil_size += sizeof (struct T_opthdr) +
123 sizeof (struct in_addr);
124 IP_STAT(ipst, conn_in_recvdstaddr);
125 }
126
127 /*
128 * ip_recvpktinfo is used for both AF_INET and AF_INET6 but
129 * are different
130 */
131 if (recv_ancillary.crb_ip_recvpktinfo &&
132 connp->conn_family == AF_INET) {
133 ancil_size += sizeof (struct T_opthdr) +
134 sizeof (struct in_pktinfo);
135 IP_STAT(ipst, conn_in_recvpktinfo);
136 }
137
138 if ((recv_ancillary.crb_recvopts) &&
139 (ipp->ipp_fields & IPPF_IPV4_OPTIONS)) {
140 ancil_size += sizeof (struct T_opthdr) +
141 ipp->ipp_ipv4_options_len;
142 IP_STAT(ipst, conn_in_recvopts);
143 }
144
145 if (recv_ancillary.crb_recvslla) {
146 ip_stack_t *ipst = connp->conn_netstack->netstack_ip;
147 ill_t *ill;
148
149 /* Make sure ira_l2src is setup if not already */
150 if (!(ira->ira_flags & IRAF_L2SRC_SET)) {
151 ill = ill_lookup_on_ifindex(ira->ira_rifindex, B_FALSE,
152 ipst);
153 if (ill != NULL) {
154 ip_setl2src(mp, ira, ill);
155 ill_refrele(ill);
156 }
157 }
158 ancil_size += sizeof (struct T_opthdr) +
159 sizeof (struct sockaddr_dl);
160 IP_STAT(ipst, conn_in_recvslla);
161 }
162
163 if (recv_ancillary.crb_recvif) {
164 ancil_size += sizeof (struct T_opthdr) + sizeof (uint_t);
165 IP_STAT(ipst, conn_in_recvif);
166 }
167
168 /*
169 * ip_recvpktinfo is used for both AF_INET and AF_INET6 but
170 * are different
171 */
172 if (recv_ancillary.crb_ip_recvpktinfo &&
173 connp->conn_family == AF_INET6) {
174 ancil_size += sizeof (struct T_opthdr) +
175 sizeof (struct in6_pktinfo);
176 IP_STAT(ipst, conn_in_recvpktinfo);
177 }
178
179 if (recv_ancillary.crb_ipv6_recvhoplimit) {
180 ancil_size += sizeof (struct T_opthdr) + sizeof (int);
181 IP_STAT(ipst, conn_in_recvhoplimit);
182 }
183
184 if (recv_ancillary.crb_ipv6_recvtclass) {
185 ancil_size += sizeof (struct T_opthdr) + sizeof (int);
186 IP_STAT(ipst, conn_in_recvtclass);
187 }
188
189 if (recv_ancillary.crb_ipv6_recvhopopts &&
190 (ipp->ipp_fields & IPPF_HOPOPTS)) {
191 ancil_size += sizeof (struct T_opthdr) + ipp->ipp_hopoptslen;
192 IP_STAT(ipst, conn_in_recvhopopts);
193 }
194 /*
195 * To honor RFC3542 when an application asks for both IPV6_RECVDSTOPTS
196 * and IPV6_RECVRTHDR, we pass up the item rthdrdstopts (the destination
197 * options that appear before a routing header.
198 * We also pass them up if IPV6_RECVRTHDRDSTOPTS is set.
199 */
200 if (ipp->ipp_fields & IPPF_RTHDRDSTOPTS) {
201 if (recv_ancillary.crb_ipv6_recvrthdrdstopts ||
202 (recv_ancillary.crb_ipv6_recvdstopts &&
203 recv_ancillary.crb_ipv6_recvrthdr)) {
204 ancil_size += sizeof (struct T_opthdr) +
205 ipp->ipp_rthdrdstoptslen;
206 IP_STAT(ipst, conn_in_recvrthdrdstopts);
207 }
208 }
209 if ((recv_ancillary.crb_ipv6_recvrthdr) &&
210 (ipp->ipp_fields & IPPF_RTHDR)) {
211 ancil_size += sizeof (struct T_opthdr) + ipp->ipp_rthdrlen;
212 IP_STAT(ipst, conn_in_recvrthdr);
213 }
214 if ((recv_ancillary.crb_ipv6_recvdstopts ||
215 recv_ancillary.crb_old_ipv6_recvdstopts) &&
216 (ipp->ipp_fields & IPPF_DSTOPTS)) {
217 ancil_size += sizeof (struct T_opthdr) + ipp->ipp_dstoptslen;
218 IP_STAT(ipst, conn_in_recvdstopts);
219 }
220 if (recv_ancillary.crb_recvucred && ira->ira_cred != NULL) {
221 ancil_size += sizeof (struct T_opthdr) +
222 ucredminsize(ira->ira_cred);
223 IP_STAT(ipst, conn_in_recvucred);
224 }
225
226 /*
227 * If SO_TIMESTAMP is set allocate the appropriate sized
228 * buffer. Since gethrestime() expects a pointer aligned
229 * argument, we allocate space necessary for extra
230 * alignment (even though it might not be used).
231 */
232 if (recv_ancillary.crb_timestamp) {
233 ancil_size += sizeof (struct T_opthdr) +
234 sizeof (timestruc_t) + _POINTER_ALIGNMENT;
235 IP_STAT(ipst, conn_in_timestamp);
236 }
237
238 /*
239 * If IP_RECVTTL is set allocate the appropriate sized buffer
240 */
241 if (recv_ancillary.crb_recvttl &&
242 (ira->ira_flags & IRAF_IS_IPV4)) {
243 ancil_size += sizeof (struct T_opthdr) + sizeof (uint8_t);
244 IP_STAT(ipst, conn_in_recvttl);
245 }
246
247 return (ancil_size);
248 }
249
250 /*
251 * Lay down the ancillary data items at "ancil_buf".
252 * Assumes caller has used conn_recvancillary_size to allocate a sufficiently
253 * large buffer - ancil_size.
254 */
255 void
256 conn_recvancillary_add(conn_t *connp, crb_t recv_ancillary,
257 ip_recv_attr_t *ira, ip_pkt_t *ipp, uchar_t *ancil_buf, uint_t ancil_size)
258 {
259 /*
260 * Copy in destination address before options to avoid
261 * any padding issues.
262 */
263 if (recv_ancillary.crb_recvdstaddr &&
264 (ira->ira_flags & IRAF_IS_IPV4)) {
265 struct T_opthdr *toh;
266 ipaddr_t *dstptr;
267
268 toh = (struct T_opthdr *)ancil_buf;
269 toh->level = IPPROTO_IP;
270 toh->name = IP_RECVDSTADDR;
271 toh->len = sizeof (struct T_opthdr) + sizeof (ipaddr_t);
272 toh->status = 0;
273 ancil_buf += sizeof (struct T_opthdr);
274 dstptr = (ipaddr_t *)ancil_buf;
275 *dstptr = ipp->ipp_addr_v4;
276 ancil_buf += sizeof (ipaddr_t);
277 ancil_size -= toh->len;
278 }
279
280 /*
281 * ip_recvpktinfo is used for both AF_INET and AF_INET6 but
282 * are different
283 */
284 if (recv_ancillary.crb_ip_recvpktinfo &&
285 connp->conn_family == AF_INET) {
286 ip_stack_t *ipst = connp->conn_netstack->netstack_ip;
287 struct T_opthdr *toh;
288 struct in_pktinfo *pktinfop;
289 ill_t *ill;
290 ipif_t *ipif;
291
292 toh = (struct T_opthdr *)ancil_buf;
293 toh->level = IPPROTO_IP;
294 toh->name = IP_PKTINFO;
295 toh->len = sizeof (struct T_opthdr) + sizeof (*pktinfop);
296 toh->status = 0;
297 ancil_buf += sizeof (struct T_opthdr);
298 pktinfop = (struct in_pktinfo *)ancil_buf;
299
300 pktinfop->ipi_ifindex = ira->ira_ruifindex;
301 pktinfop->ipi_spec_dst.s_addr = INADDR_ANY;
302
303 /* Find a good address to report */
304 ill = ill_lookup_on_ifindex(ira->ira_ruifindex, B_FALSE, ipst);
305 if (ill != NULL) {
306 ipif = ipif_good_addr(ill, IPCL_ZONEID(connp));
307 if (ipif != NULL) {
308 pktinfop->ipi_spec_dst.s_addr =
309 ipif->ipif_lcl_addr;
310 ipif_refrele(ipif);
311 }
312 ill_refrele(ill);
313 }
314 pktinfop->ipi_addr.s_addr = ipp->ipp_addr_v4;
315 ancil_buf += sizeof (struct in_pktinfo);
316 ancil_size -= toh->len;
317 }
318
319 if ((recv_ancillary.crb_recvopts) &&
320 (ipp->ipp_fields & IPPF_IPV4_OPTIONS)) {
321 struct T_opthdr *toh;
322
323 toh = (struct T_opthdr *)ancil_buf;
324 toh->level = IPPROTO_IP;
325 toh->name = IP_RECVOPTS;
326 toh->len = sizeof (struct T_opthdr) + ipp->ipp_ipv4_options_len;
327 toh->status = 0;
328 ancil_buf += sizeof (struct T_opthdr);
329 bcopy(ipp->ipp_ipv4_options, ancil_buf,
330 ipp->ipp_ipv4_options_len);
331 ancil_buf += ipp->ipp_ipv4_options_len;
332 ancil_size -= toh->len;
333 }
334
335 if (recv_ancillary.crb_recvslla) {
336 ip_stack_t *ipst = connp->conn_netstack->netstack_ip;
337 struct T_opthdr *toh;
338 struct sockaddr_dl *dstptr;
339 ill_t *ill;
340 int alen = 0;
341
342 ill = ill_lookup_on_ifindex(ira->ira_rifindex, B_FALSE, ipst);
343 if (ill != NULL)
344 alen = ill->ill_phys_addr_length;
345
346 /*
347 * For loopback multicast and broadcast the packet arrives
348 * with ira_ruifdex being the physical interface, but
349 * ira_l2src is all zero since ip_postfrag_loopback doesn't
350 * know our l2src. We don't report the address in that case.
351 */
352 if (ira->ira_flags & IRAF_LOOPBACK)
353 alen = 0;
354
355 toh = (struct T_opthdr *)ancil_buf;
356 toh->level = IPPROTO_IP;
357 toh->name = IP_RECVSLLA;
358 toh->len = sizeof (struct T_opthdr) +
359 sizeof (struct sockaddr_dl);
360 toh->status = 0;
361 ancil_buf += sizeof (struct T_opthdr);
362 dstptr = (struct sockaddr_dl *)ancil_buf;
363 dstptr->sdl_family = AF_LINK;
364 dstptr->sdl_index = ira->ira_ruifindex;
365 if (ill != NULL)
366 dstptr->sdl_type = ill->ill_type;
367 else
368 dstptr->sdl_type = 0;
369 dstptr->sdl_nlen = 0;
370 dstptr->sdl_alen = alen;
371 dstptr->sdl_slen = 0;
372 bcopy(ira->ira_l2src, dstptr->sdl_data, alen);
373 ancil_buf += sizeof (struct sockaddr_dl);
374 ancil_size -= toh->len;
375 if (ill != NULL)
376 ill_refrele(ill);
377 }
378
379 if (recv_ancillary.crb_recvif) {
380 struct T_opthdr *toh;
381 uint_t *dstptr;
382
383 toh = (struct T_opthdr *)ancil_buf;
384 toh->level = IPPROTO_IP;
385 toh->name = IP_RECVIF;
386 toh->len = sizeof (struct T_opthdr) + sizeof (uint_t);
387 toh->status = 0;
388 ancil_buf += sizeof (struct T_opthdr);
389 dstptr = (uint_t *)ancil_buf;
390 *dstptr = ira->ira_ruifindex;
391 ancil_buf += sizeof (uint_t);
392 ancil_size -= toh->len;
393 }
394
395 /*
396 * ip_recvpktinfo is used for both AF_INET and AF_INET6 but
397 * are different
398 */
399 if (recv_ancillary.crb_ip_recvpktinfo &&
400 connp->conn_family == AF_INET6) {
401 struct T_opthdr *toh;
402 struct in6_pktinfo *pkti;
403
404 toh = (struct T_opthdr *)ancil_buf;
405 toh->level = IPPROTO_IPV6;
406 toh->name = IPV6_PKTINFO;
407 toh->len = sizeof (struct T_opthdr) + sizeof (*pkti);
408 toh->status = 0;
409 ancil_buf += sizeof (struct T_opthdr);
410 pkti = (struct in6_pktinfo *)ancil_buf;
411 if (ira->ira_flags & IRAF_IS_IPV4) {
412 IN6_IPADDR_TO_V4MAPPED(ipp->ipp_addr_v4,
413 &pkti->ipi6_addr);
414 } else {
415 pkti->ipi6_addr = ipp->ipp_addr;
416 }
417 pkti->ipi6_ifindex = ira->ira_ruifindex;
418
419 ancil_buf += sizeof (*pkti);
420 ancil_size -= toh->len;
421 }
422 if (recv_ancillary.crb_ipv6_recvhoplimit) {
423 struct T_opthdr *toh;
424
425 toh = (struct T_opthdr *)ancil_buf;
426 toh->level = IPPROTO_IPV6;
427 toh->name = IPV6_HOPLIMIT;
428 toh->len = sizeof (struct T_opthdr) + sizeof (uint_t);
429 toh->status = 0;
430 ancil_buf += sizeof (struct T_opthdr);
431 *(uint_t *)ancil_buf = ipp->ipp_hoplimit;
432 ancil_buf += sizeof (uint_t);
433 ancil_size -= toh->len;
434 }
435 if (recv_ancillary.crb_ipv6_recvtclass) {
436 struct T_opthdr *toh;
437
438 toh = (struct T_opthdr *)ancil_buf;
439 toh->level = IPPROTO_IPV6;
440 toh->name = IPV6_TCLASS;
441 toh->len = sizeof (struct T_opthdr) + sizeof (uint_t);
442 toh->status = 0;
443 ancil_buf += sizeof (struct T_opthdr);
444
445 if (ira->ira_flags & IRAF_IS_IPV4)
446 *(uint_t *)ancil_buf = ipp->ipp_type_of_service;
447 else
448 *(uint_t *)ancil_buf = ipp->ipp_tclass;
449 ancil_buf += sizeof (uint_t);
450 ancil_size -= toh->len;
451 }
452 if (recv_ancillary.crb_ipv6_recvhopopts &&
453 (ipp->ipp_fields & IPPF_HOPOPTS)) {
454 struct T_opthdr *toh;
455
456 toh = (struct T_opthdr *)ancil_buf;
457 toh->level = IPPROTO_IPV6;
458 toh->name = IPV6_HOPOPTS;
459 toh->len = sizeof (struct T_opthdr) + ipp->ipp_hopoptslen;
460 toh->status = 0;
461 ancil_buf += sizeof (struct T_opthdr);
462 bcopy(ipp->ipp_hopopts, ancil_buf, ipp->ipp_hopoptslen);
463 ancil_buf += ipp->ipp_hopoptslen;
464 ancil_size -= toh->len;
465 }
466 /*
467 * To honor RFC3542 when an application asks for both IPV6_RECVDSTOPTS
468 * and IPV6_RECVRTHDR, we pass up the item rthdrdstopts (the destination
469 * options that appear before a routing header.
470 * We also pass them up if IPV6_RECVRTHDRDSTOPTS is set.
471 */
472 if (ipp->ipp_fields & IPPF_RTHDRDSTOPTS) {
473 if (recv_ancillary.crb_ipv6_recvrthdrdstopts ||
474 (recv_ancillary.crb_ipv6_recvdstopts &&
475 recv_ancillary.crb_ipv6_recvrthdr)) {
476 struct T_opthdr *toh;
477
478 toh = (struct T_opthdr *)ancil_buf;
479 toh->level = IPPROTO_IPV6;
480 toh->name = IPV6_DSTOPTS;
481 toh->len = sizeof (struct T_opthdr) +
482 ipp->ipp_rthdrdstoptslen;
483 toh->status = 0;
484 ancil_buf += sizeof (struct T_opthdr);
485 bcopy(ipp->ipp_rthdrdstopts, ancil_buf,
486 ipp->ipp_rthdrdstoptslen);
487 ancil_buf += ipp->ipp_rthdrdstoptslen;
488 ancil_size -= toh->len;
489 }
490 }
491 if (recv_ancillary.crb_ipv6_recvrthdr &&
492 (ipp->ipp_fields & IPPF_RTHDR)) {
493 struct T_opthdr *toh;
494
495 toh = (struct T_opthdr *)ancil_buf;
496 toh->level = IPPROTO_IPV6;
497 toh->name = IPV6_RTHDR;
498 toh->len = sizeof (struct T_opthdr) + ipp->ipp_rthdrlen;
499 toh->status = 0;
500 ancil_buf += sizeof (struct T_opthdr);
501 bcopy(ipp->ipp_rthdr, ancil_buf, ipp->ipp_rthdrlen);
502 ancil_buf += ipp->ipp_rthdrlen;
503 ancil_size -= toh->len;
504 }
505 if ((recv_ancillary.crb_ipv6_recvdstopts ||
506 recv_ancillary.crb_old_ipv6_recvdstopts) &&
507 (ipp->ipp_fields & IPPF_DSTOPTS)) {
508 struct T_opthdr *toh;
509
510 toh = (struct T_opthdr *)ancil_buf;
511 toh->level = IPPROTO_IPV6;
512 toh->name = IPV6_DSTOPTS;
513 toh->len = sizeof (struct T_opthdr) + ipp->ipp_dstoptslen;
514 toh->status = 0;
515 ancil_buf += sizeof (struct T_opthdr);
516 bcopy(ipp->ipp_dstopts, ancil_buf, ipp->ipp_dstoptslen);
517 ancil_buf += ipp->ipp_dstoptslen;
518 ancil_size -= toh->len;
519 }
520
521 if (recv_ancillary.crb_recvucred && ira->ira_cred != NULL) {
522 struct T_opthdr *toh;
523 cred_t *rcr = connp->conn_cred;
524
525 toh = (struct T_opthdr *)ancil_buf;
526 toh->level = SOL_SOCKET;
527 toh->name = SCM_UCRED;
528 toh->len = sizeof (struct T_opthdr) +
529 ucredminsize(ira->ira_cred);
530 toh->status = 0;
531 (void) cred2ucred(ira->ira_cred, ira->ira_cpid, &toh[1], rcr);
532 ancil_buf += toh->len;
533 ancil_size -= toh->len;
534 }
535 if (recv_ancillary.crb_timestamp) {
536 struct T_opthdr *toh;
537
538 toh = (struct T_opthdr *)ancil_buf;
539 toh->level = SOL_SOCKET;
540 toh->name = SCM_TIMESTAMP;
541 toh->len = sizeof (struct T_opthdr) +
542 sizeof (timestruc_t) + _POINTER_ALIGNMENT;
543 toh->status = 0;
544 ancil_buf += sizeof (struct T_opthdr);
545 /* Align for gethrestime() */
546 ancil_buf = (uchar_t *)P2ROUNDUP((intptr_t)ancil_buf,
547 sizeof (intptr_t));
548 gethrestime((timestruc_t *)ancil_buf);
549 ancil_buf = (uchar_t *)toh + toh->len;
550 ancil_size -= toh->len;
551 }
552
553 /*
554 * CAUTION:
555 * Due to aligment issues
556 * Processing of IP_RECVTTL option
557 * should always be the last. Adding
558 * any option processing after this will
559 * cause alignment panic.
560 */
561 if (recv_ancillary.crb_recvttl &&
562 (ira->ira_flags & IRAF_IS_IPV4)) {
563 struct T_opthdr *toh;
564 uint8_t *dstptr;
565
566 toh = (struct T_opthdr *)ancil_buf;
567 toh->level = IPPROTO_IP;
568 toh->name = IP_RECVTTL;
569 toh->len = sizeof (struct T_opthdr) + sizeof (uint8_t);
570 toh->status = 0;
571 ancil_buf += sizeof (struct T_opthdr);
572 dstptr = (uint8_t *)ancil_buf;
573 *dstptr = ipp->ipp_hoplimit;
574 ancil_buf += sizeof (uint8_t);
575 ancil_size -= toh->len;
576 }
577
578 /* Consumed all of allocated space */
579 ASSERT(ancil_size == 0);
580
581 }
582
583 /*
584 * This routine retrieves the current status of socket options.
585 * It returns the size of the option retrieved, or -1.
586 */
587 int
588 conn_opt_get(conn_opt_arg_t *coa, t_scalar_t level, t_scalar_t name,
589 uchar_t *ptr)
590 {
591 int *i1 = (int *)ptr;
592 conn_t *connp = coa->coa_connp;
593 ip_xmit_attr_t *ixa = coa->coa_ixa;
594 ip_pkt_t *ipp = coa->coa_ipp;
595 ip_stack_t *ipst = ixa->ixa_ipst;
596 uint_t len;
597
598 ASSERT(MUTEX_HELD(&coa->coa_connp->conn_lock));
599
600 switch (level) {
601 case SOL_SOCKET:
602 switch (name) {
603 case SO_DEBUG:
604 *i1 = connp->conn_debug ? SO_DEBUG : 0;
605 break; /* goto sizeof (int) option return */
606 case SO_KEEPALIVE:
607 *i1 = connp->conn_keepalive ? SO_KEEPALIVE : 0;
608 break;
609 case SO_LINGER: {
610 struct linger *lgr = (struct linger *)ptr;
611
612 lgr->l_onoff = connp->conn_linger ? SO_LINGER : 0;
613 lgr->l_linger = connp->conn_lingertime;
614 }
615 return (sizeof (struct linger));
616
617 case SO_OOBINLINE:
618 *i1 = connp->conn_oobinline ? SO_OOBINLINE : 0;
619 break;
620 case SO_REUSEADDR:
621 *i1 = connp->conn_reuseaddr ? SO_REUSEADDR : 0;
622 break; /* goto sizeof (int) option return */
623 case SO_REUSEPORT:
624 *i1 = connp->conn_reuseport;
625 break; /* goto sizeof (int) option return */
626 case SO_TYPE:
627 *i1 = connp->conn_so_type;
628 break; /* goto sizeof (int) option return */
629 case SO_DONTROUTE:
630 *i1 = (ixa->ixa_flags & IXAF_DONTROUTE) ?
631 SO_DONTROUTE : 0;
632 break; /* goto sizeof (int) option return */
633 case SO_USELOOPBACK:
634 *i1 = connp->conn_useloopback ? SO_USELOOPBACK : 0;
635 break; /* goto sizeof (int) option return */
636 case SO_BROADCAST:
637 *i1 = connp->conn_broadcast ? SO_BROADCAST : 0;
638 break; /* goto sizeof (int) option return */
639
640 case SO_SNDBUF:
641 *i1 = connp->conn_sndbuf;
642 break; /* goto sizeof (int) option return */
643 case SO_RCVBUF:
644 *i1 = connp->conn_rcvbuf;
645 break; /* goto sizeof (int) option return */
646 case SO_RCVTIMEO:
647 case SO_SNDTIMEO:
648 /*
649 * Pass these two options in order for third part
650 * protocol usage. Here just return directly.
651 */
652 *i1 = 0;
653 break;
654 case SO_DGRAM_ERRIND:
655 *i1 = connp->conn_dgram_errind ? SO_DGRAM_ERRIND : 0;
656 break; /* goto sizeof (int) option return */
657 case SO_RECVUCRED:
658 *i1 = connp->conn_recv_ancillary.crb_recvucred;
659 break; /* goto sizeof (int) option return */
660 case SO_TIMESTAMP:
661 *i1 = connp->conn_recv_ancillary.crb_timestamp;
662 break; /* goto sizeof (int) option return */
663 case SO_VRRP:
664 *i1 = connp->conn_isvrrp;
665 break; /* goto sizeof (int) option return */
666 case SO_ANON_MLP:
667 *i1 = connp->conn_anon_mlp;
668 break; /* goto sizeof (int) option return */
669 case SO_MAC_EXEMPT:
670 *i1 = (connp->conn_mac_mode == CONN_MAC_AWARE);
671 break; /* goto sizeof (int) option return */
672 case SO_MAC_IMPLICIT:
673 *i1 = (connp->conn_mac_mode == CONN_MAC_IMPLICIT);
674 break; /* goto sizeof (int) option return */
675 case SO_ALLZONES:
676 *i1 = connp->conn_allzones;
677 break; /* goto sizeof (int) option return */
678 case SO_EXCLBIND:
679 *i1 = connp->conn_exclbind ? SO_EXCLBIND : 0;
680 break;
681 case SO_PROTOTYPE:
682 *i1 = connp->conn_proto;
683 break;
684
685 case SO_DOMAIN:
686 *i1 = connp->conn_family;
687 break;
688 default:
689 return (-1);
690 }
691 break;
692 case IPPROTO_IP:
693 if (connp->conn_family != AF_INET)
694 return (-1);
695 switch (name) {
696 case IP_OPTIONS:
697 case T_IP_OPTIONS:
698 if (!(ipp->ipp_fields & IPPF_IPV4_OPTIONS))
699 return (0);
700
701 len = ipp->ipp_ipv4_options_len;
702 if (len > 0) {
703 bcopy(ipp->ipp_ipv4_options, ptr, len);
704 }
705 return (len);
706
707 case IP_PKTINFO: {
708 /*
709 * This also handles IP_RECVPKTINFO.
710 * IP_PKTINFO and IP_RECVPKTINFO have same value.
711 * Differentiation is based on the size of the
712 * argument passed in.
713 */
714 struct in_pktinfo *pktinfo;
715
716 #ifdef notdef
717 /* optcom doesn't provide a length with "get" */
718 if (inlen == sizeof (int)) {
719 /* This is IP_RECVPKTINFO option. */
720 *i1 = connp->conn_recv_ancillary.
721 crb_ip_recvpktinfo;
722 return (sizeof (int));
723 }
724 #endif
725 /* XXX assumes that caller has room for max size! */
726
727 pktinfo = (struct in_pktinfo *)ptr;
728 pktinfo->ipi_ifindex = ixa->ixa_ifindex;
729 if (ipp->ipp_fields & IPPF_ADDR)
730 pktinfo->ipi_spec_dst.s_addr = ipp->ipp_addr_v4;
731 else
732 pktinfo->ipi_spec_dst.s_addr = INADDR_ANY;
733 return (sizeof (struct in_pktinfo));
734 }
735 case IP_DONTFRAG:
736 *i1 = (ixa->ixa_flags & IXAF_DONTFRAG) != 0;
737 return (sizeof (int));
738 case IP_TOS:
739 case T_IP_TOS:
740 *i1 = (int)ipp->ipp_type_of_service;
741 break; /* goto sizeof (int) option return */
742 case IP_TTL:
743 *i1 = (int)ipp->ipp_unicast_hops;
744 break; /* goto sizeof (int) option return */
745 case IP_DHCPINIT_IF:
746 return (-1);
747 case IP_NEXTHOP:
748 if (ixa->ixa_flags & IXAF_NEXTHOP_SET) {
749 *(ipaddr_t *)ptr = ixa->ixa_nexthop_v4;
750 return (sizeof (ipaddr_t));
751 } else {
752 return (0);
753 }
754
755 case IP_MULTICAST_IF:
756 /* 0 address if not set */
757 *(ipaddr_t *)ptr = ixa->ixa_multicast_ifaddr;
758 return (sizeof (ipaddr_t));
759 case IP_MULTICAST_TTL:
760 *(uchar_t *)ptr = ixa->ixa_multicast_ttl;
761 return (sizeof (uchar_t));
762 case IP_MULTICAST_LOOP:
763 *ptr = (ixa->ixa_flags & IXAF_MULTICAST_LOOP) ? 1 : 0;
764 return (sizeof (uint8_t));
765 case IP_RECVOPTS:
766 *i1 = connp->conn_recv_ancillary.crb_recvopts;
767 break; /* goto sizeof (int) option return */
768 case IP_RECVDSTADDR:
769 *i1 = connp->conn_recv_ancillary.crb_recvdstaddr;
770 break; /* goto sizeof (int) option return */
771 case IP_RECVIF:
772 *i1 = connp->conn_recv_ancillary.crb_recvif;
773 break; /* goto sizeof (int) option return */
774 case IP_RECVSLLA:
775 *i1 = connp->conn_recv_ancillary.crb_recvslla;
776 break; /* goto sizeof (int) option return */
777 case IP_RECVTTL:
778 *i1 = connp->conn_recv_ancillary.crb_recvttl;
779 break; /* goto sizeof (int) option return */
780 case IP_ADD_MEMBERSHIP:
781 case IP_DROP_MEMBERSHIP:
782 case MCAST_JOIN_GROUP:
783 case MCAST_LEAVE_GROUP:
784 case IP_BLOCK_SOURCE:
785 case IP_UNBLOCK_SOURCE:
786 case IP_ADD_SOURCE_MEMBERSHIP:
787 case IP_DROP_SOURCE_MEMBERSHIP:
788 case MCAST_BLOCK_SOURCE:
789 case MCAST_UNBLOCK_SOURCE:
790 case MCAST_JOIN_SOURCE_GROUP:
791 case MCAST_LEAVE_SOURCE_GROUP:
792 case MRT_INIT:
793 case MRT_DONE:
794 case MRT_ADD_VIF:
795 case MRT_DEL_VIF:
796 case MRT_ADD_MFC:
797 case MRT_DEL_MFC:
798 /* cannot "get" the value for these */
799 return (-1);
800 case MRT_VERSION:
801 case MRT_ASSERT:
802 (void) ip_mrouter_get(name, connp, ptr);
803 return (sizeof (int));
804 case IP_SEC_OPT:
805 return (ipsec_req_from_conn(connp, (ipsec_req_t *)ptr,
806 IPSEC_AF_V4));
807 case IP_BOUND_IF:
808 /* Zero if not set */
809 *i1 = connp->conn_bound_if;
810 break; /* goto sizeof (int) option return */
811 case IP_UNSPEC_SRC:
812 *i1 = connp->conn_unspec_src;
813 break; /* goto sizeof (int) option return */
814 case IP_BROADCAST_TTL:
815 if (ixa->ixa_flags & IXAF_BROADCAST_TTL_SET)
816 *(uchar_t *)ptr = ixa->ixa_broadcast_ttl;
817 else
818 *(uchar_t *)ptr = ipst->ips_ip_broadcast_ttl;
819 return (sizeof (uchar_t));
820 default:
821 return (-1);
822 }
823 break;
824 case IPPROTO_IPV6:
825 if (connp->conn_family != AF_INET6)
826 return (-1);
827 switch (name) {
828 case IPV6_UNICAST_HOPS:
829 *i1 = (int)ipp->ipp_unicast_hops;
830 break; /* goto sizeof (int) option return */
831 case IPV6_MULTICAST_IF:
832 /* 0 index if not set */
833 *i1 = ixa->ixa_multicast_ifindex;
834 break; /* goto sizeof (int) option return */
835 case IPV6_MULTICAST_HOPS:
836 *i1 = ixa->ixa_multicast_ttl;
837 break; /* goto sizeof (int) option return */
838 case IPV6_MULTICAST_LOOP:
839 *i1 = (ixa->ixa_flags & IXAF_MULTICAST_LOOP) ? 1 : 0;
840 break; /* goto sizeof (int) option return */
841 case IPV6_JOIN_GROUP:
842 case IPV6_LEAVE_GROUP:
843 case MCAST_JOIN_GROUP:
844 case MCAST_LEAVE_GROUP:
845 case MCAST_BLOCK_SOURCE:
846 case MCAST_UNBLOCK_SOURCE:
847 case MCAST_JOIN_SOURCE_GROUP:
848 case MCAST_LEAVE_SOURCE_GROUP:
849 /* cannot "get" the value for these */
850 return (-1);
851 case IPV6_BOUND_IF:
852 /* Zero if not set */
853 *i1 = connp->conn_bound_if;
854 break; /* goto sizeof (int) option return */
855 case IPV6_UNSPEC_SRC:
856 *i1 = connp->conn_unspec_src;
857 break; /* goto sizeof (int) option return */
858 case IPV6_RECVPKTINFO:
859 *i1 = connp->conn_recv_ancillary.crb_ip_recvpktinfo;
860 break; /* goto sizeof (int) option return */
861 case IPV6_RECVTCLASS:
862 *i1 = connp->conn_recv_ancillary.crb_ipv6_recvtclass;
863 break; /* goto sizeof (int) option return */
864 case IPV6_RECVPATHMTU:
865 *i1 = connp->conn_ipv6_recvpathmtu;
866 break; /* goto sizeof (int) option return */
867 case IPV6_RECVHOPLIMIT:
868 *i1 = connp->conn_recv_ancillary.crb_ipv6_recvhoplimit;
869 break; /* goto sizeof (int) option return */
870 case IPV6_RECVHOPOPTS:
871 *i1 = connp->conn_recv_ancillary.crb_ipv6_recvhopopts;
872 break; /* goto sizeof (int) option return */
873 case IPV6_RECVDSTOPTS:
874 *i1 = connp->conn_recv_ancillary.crb_ipv6_recvdstopts;
875 break; /* goto sizeof (int) option return */
876 case _OLD_IPV6_RECVDSTOPTS:
877 *i1 =
878 connp->conn_recv_ancillary.crb_old_ipv6_recvdstopts;
879 break; /* goto sizeof (int) option return */
880 case IPV6_RECVRTHDRDSTOPTS:
881 *i1 = connp->conn_recv_ancillary.
882 crb_ipv6_recvrthdrdstopts;
883 break; /* goto sizeof (int) option return */
884 case IPV6_RECVRTHDR:
885 *i1 = connp->conn_recv_ancillary.crb_ipv6_recvrthdr;
886 break; /* goto sizeof (int) option return */
887 case IPV6_PKTINFO: {
888 /* XXX assumes that caller has room for max size! */
889 struct in6_pktinfo *pkti;
890
891 pkti = (struct in6_pktinfo *)ptr;
892 pkti->ipi6_ifindex = ixa->ixa_ifindex;
893 if (ipp->ipp_fields & IPPF_ADDR)
894 pkti->ipi6_addr = ipp->ipp_addr;
895 else
896 pkti->ipi6_addr = ipv6_all_zeros;
897 return (sizeof (struct in6_pktinfo));
898 }
899 case IPV6_TCLASS:
900 *i1 = ipp->ipp_tclass;
901 break; /* goto sizeof (int) option return */
902 case IPV6_NEXTHOP: {
903 sin6_t *sin6 = (sin6_t *)ptr;
904
905 if (ixa->ixa_flags & IXAF_NEXTHOP_SET)
906 return (0);
907
908 *sin6 = sin6_null;
909 sin6->sin6_family = AF_INET6;
910 sin6->sin6_addr = ixa->ixa_nexthop_v6;
911
912 return (sizeof (sin6_t));
913 }
914 case IPV6_HOPOPTS:
915 if (!(ipp->ipp_fields & IPPF_HOPOPTS))
916 return (0);
917 bcopy(ipp->ipp_hopopts, ptr,
918 ipp->ipp_hopoptslen);
919 return (ipp->ipp_hopoptslen);
920 case IPV6_RTHDRDSTOPTS:
921 if (!(ipp->ipp_fields & IPPF_RTHDRDSTOPTS))
922 return (0);
923 bcopy(ipp->ipp_rthdrdstopts, ptr,
924 ipp->ipp_rthdrdstoptslen);
925 return (ipp->ipp_rthdrdstoptslen);
926 case IPV6_RTHDR:
927 if (!(ipp->ipp_fields & IPPF_RTHDR))
928 return (0);
929 bcopy(ipp->ipp_rthdr, ptr, ipp->ipp_rthdrlen);
930 return (ipp->ipp_rthdrlen);
931 case IPV6_DSTOPTS:
932 if (!(ipp->ipp_fields & IPPF_DSTOPTS))
933 return (0);
934 bcopy(ipp->ipp_dstopts, ptr, ipp->ipp_dstoptslen);
935 return (ipp->ipp_dstoptslen);
936 case IPV6_PATHMTU:
937 return (ip_fill_mtuinfo(connp, ixa,
938 (struct ip6_mtuinfo *)ptr));
939 case IPV6_SEC_OPT:
940 return (ipsec_req_from_conn(connp, (ipsec_req_t *)ptr,
941 IPSEC_AF_V6));
942 case IPV6_SRC_PREFERENCES:
943 return (ip6_get_src_preferences(ixa, (uint32_t *)ptr));
944 case IPV6_DONTFRAG:
945 *i1 = (ixa->ixa_flags & IXAF_DONTFRAG) != 0;
946 return (sizeof (int));
947 case IPV6_USE_MIN_MTU:
948 if (ixa->ixa_flags & IXAF_USE_MIN_MTU)
949 *i1 = ixa->ixa_use_min_mtu;
950 else
951 *i1 = IPV6_USE_MIN_MTU_MULTICAST;
952 break;
953 case IPV6_V6ONLY:
954 *i1 = connp->conn_ipv6_v6only;
955 return (sizeof (int));
956 default:
957 return (-1);
958 }
959 break;
960 case IPPROTO_UDP:
961 switch (name) {
962 case UDP_ANONPRIVBIND:
963 *i1 = connp->conn_anon_priv_bind;
964 break;
965 case UDP_EXCLBIND:
966 *i1 = connp->conn_exclbind ? UDP_EXCLBIND : 0;
967 break;
968 default:
969 return (-1);
970 }
971 break;
972 case IPPROTO_TCP:
973 switch (name) {
974 case TCP_RECVDSTADDR:
975 *i1 = connp->conn_recv_ancillary.crb_recvdstaddr;
976 break;
977 case TCP_ANONPRIVBIND:
978 *i1 = connp->conn_anon_priv_bind;
979 break;
980 case TCP_EXCLBIND:
981 *i1 = connp->conn_exclbind ? TCP_EXCLBIND : 0;
982 break;
983 default:
984 return (-1);
985 }
986 break;
987 default:
988 return (-1);
989 }
990 return (sizeof (int));
991 }
992
993 static int conn_opt_set_socket(conn_opt_arg_t *coa, t_scalar_t name,
994 uint_t inlen, uchar_t *invalp, boolean_t checkonly, cred_t *cr);
995 static int conn_opt_set_ip(conn_opt_arg_t *coa, t_scalar_t name,
996 uint_t inlen, uchar_t *invalp, boolean_t checkonly, cred_t *cr);
997 static int conn_opt_set_ipv6(conn_opt_arg_t *coa, t_scalar_t name,
998 uint_t inlen, uchar_t *invalp, boolean_t checkonly, cred_t *cr);
999 static int conn_opt_set_udp(conn_opt_arg_t *coa, t_scalar_t name,
1000 uint_t inlen, uchar_t *invalp, boolean_t checkonly, cred_t *cr);
1001 static int conn_opt_set_tcp(conn_opt_arg_t *coa, t_scalar_t name,
1002 uint_t inlen, uchar_t *invalp, boolean_t checkonly, cred_t *cr);
1003
1004 /*
1005 * This routine sets the most common socket options including some
1006 * that are transport/ULP specific.
1007 * It returns errno or zero.
1008 *
1009 * For fixed length options, there is no sanity check
1010 * of passed in length is done. It is assumed *_optcom_req()
1011 * routines do the right thing.
1012 */
1013 int
1014 conn_opt_set(conn_opt_arg_t *coa, t_scalar_t level, t_scalar_t name,
1015 uint_t inlen, uchar_t *invalp, boolean_t checkonly, cred_t *cr)
1016 {
1017 ASSERT(MUTEX_NOT_HELD(&coa->coa_connp->conn_lock));
1018
1019 /* We have different functions for different levels */
1020 switch (level) {
1021 case SOL_SOCKET:
1022 return (conn_opt_set_socket(coa, name, inlen, invalp,
1023 checkonly, cr));
1024 case IPPROTO_IP:
1025 return (conn_opt_set_ip(coa, name, inlen, invalp,
1026 checkonly, cr));
1027 case IPPROTO_IPV6:
1028 return (conn_opt_set_ipv6(coa, name, inlen, invalp,
1029 checkonly, cr));
1030 case IPPROTO_UDP:
1031 return (conn_opt_set_udp(coa, name, inlen, invalp,
1032 checkonly, cr));
1033 case IPPROTO_TCP:
1034 return (conn_opt_set_tcp(coa, name, inlen, invalp,
1035 checkonly, cr));
1036 default:
1037 return (0);
1038 }
1039 }
1040
1041 /*
1042 * Handle SOL_SOCKET
1043 * Note that we do not handle SO_PROTOTYPE here. The ULPs that support
1044 * it implement their own checks and setting of conn_proto.
1045 */
1046 /* ARGSUSED1 */
1047 static int
1048 conn_opt_set_socket(conn_opt_arg_t *coa, t_scalar_t name, uint_t inlen,
1049 uchar_t *invalp, boolean_t checkonly, cred_t *cr)
1050 {
1051 conn_t *connp = coa->coa_connp;
1052 ip_xmit_attr_t *ixa = coa->coa_ixa;
1053 int *i1 = (int *)invalp;
1054 boolean_t onoff = (*i1 == 0) ? 0 : 1;
1055
1056 switch (name) {
1057 case SO_ALLZONES:
1058 if (IPCL_IS_BOUND(connp))
1059 return (EINVAL);
1060 break;
1061 case SO_VRRP:
1062 if (secpolicy_ip_config(cr, checkonly) != 0)
1063 return (EACCES);
1064 break;
1065 case SO_MAC_EXEMPT:
1066 if (secpolicy_net_mac_aware(cr) != 0)
1067 return (EACCES);
1068 if (IPCL_IS_BOUND(connp))
1069 return (EINVAL);
1070 break;
1071 case SO_MAC_IMPLICIT:
1072 if (secpolicy_net_mac_implicit(cr) != 0)
1073 return (EACCES);
1074 break;
1075 }
1076 if (checkonly)
1077 return (0);
1078
1079 mutex_enter(&connp->conn_lock);
1080 /* Here we set the actual option value */
1081 switch (name) {
1082 case SO_DEBUG:
1083 connp->conn_debug = onoff;
1084 break;
1085 case SO_KEEPALIVE:
1086 connp->conn_keepalive = onoff;
1087 break;
1088 case SO_LINGER: {
1089 struct linger *lgr = (struct linger *)invalp;
1090
1091 if (lgr->l_onoff) {
1092 connp->conn_linger = 1;
1093 connp->conn_lingertime = lgr->l_linger;
1094 } else {
1095 connp->conn_linger = 0;
1096 connp->conn_lingertime = 0;
1097 }
1098 break;
1099 }
1100 case SO_OOBINLINE:
1101 connp->conn_oobinline = onoff;
1102 coa->coa_changed |= COA_OOBINLINE_CHANGED;
1103 break;
1104 case SO_REUSEADDR:
1105 connp->conn_reuseaddr = onoff;
1106 break;
1107 case SO_DONTROUTE:
1108 if (onoff)
1109 ixa->ixa_flags |= IXAF_DONTROUTE;
1110 else
1111 ixa->ixa_flags &= ~IXAF_DONTROUTE;
1112 coa->coa_changed |= COA_ROUTE_CHANGED;
1113 break;
1114 case SO_USELOOPBACK:
1115 connp->conn_useloopback = onoff;
1116 break;
1117 case SO_BROADCAST:
1118 connp->conn_broadcast = onoff;
1119 break;
1120 case SO_SNDBUF:
1121 /* ULP has range checked the value */
1122 connp->conn_sndbuf = *i1;
1123 coa->coa_changed |= COA_SNDBUF_CHANGED;
1124 break;
1125 case SO_RCVBUF:
1126 /* ULP has range checked the value */
1127 connp->conn_rcvbuf = *i1;
1128 coa->coa_changed |= COA_RCVBUF_CHANGED;
1129 break;
1130 case SO_RCVTIMEO:
1131 case SO_SNDTIMEO:
1132 /*
1133 * Pass these two options in order for third part
1134 * protocol usage.
1135 */
1136 break;
1137 case SO_DGRAM_ERRIND:
1138 connp->conn_dgram_errind = onoff;
1139 break;
1140 case SO_RECVUCRED:
1141 connp->conn_recv_ancillary.crb_recvucred = onoff;
1142 break;
1143 case SO_ALLZONES:
1144 connp->conn_allzones = onoff;
1145 coa->coa_changed |= COA_ROUTE_CHANGED;
1146 if (onoff)
1147 ixa->ixa_zoneid = ALL_ZONES;
1148 else
1149 ixa->ixa_zoneid = connp->conn_zoneid;
1150 break;
1151 case SO_TIMESTAMP:
1152 connp->conn_recv_ancillary.crb_timestamp = onoff;
1153 break;
1154 case SO_VRRP:
1155 connp->conn_isvrrp = onoff;
1156 break;
1157 case SO_ANON_MLP:
1158 connp->conn_anon_mlp = onoff;
1159 break;
1160 case SO_MAC_EXEMPT:
1161 connp->conn_mac_mode = onoff ?
1162 CONN_MAC_AWARE : CONN_MAC_DEFAULT;
1163 break;
1164 case SO_MAC_IMPLICIT:
1165 connp->conn_mac_mode = onoff ?
1166 CONN_MAC_IMPLICIT : CONN_MAC_DEFAULT;
1167 break;
1168 case SO_EXCLBIND:
1169 connp->conn_exclbind = onoff;
1170 break;
1171 }
1172 mutex_exit(&connp->conn_lock);
1173 return (0);
1174 }
1175
1176 /* Handle IPPROTO_IP */
1177 static int
1178 conn_opt_set_ip(conn_opt_arg_t *coa, t_scalar_t name, uint_t inlen,
1179 uchar_t *invalp, boolean_t checkonly, cred_t *cr)
1180 {
1181 conn_t *connp = coa->coa_connp;
1182 ip_xmit_attr_t *ixa = coa->coa_ixa;
1183 ip_pkt_t *ipp = coa->coa_ipp;
1184 int *i1 = (int *)invalp;
1185 boolean_t onoff = (*i1 == 0) ? 0 : 1;
1186 ipaddr_t addr = (ipaddr_t)*i1;
1187 uint_t ifindex;
1188 zoneid_t zoneid = IPCL_ZONEID(connp);
1189 ipif_t *ipif;
1190 ip_stack_t *ipst = connp->conn_netstack->netstack_ip;
1191 int error;
1192
1193 if (connp->conn_family == AF_INET6 &&
1194 connp->conn_ipversion == IPV4_VERSION) {
1195 /*
1196 * Allow certain IPv4 options to be set on an AF_INET6 socket
1197 * if the connection is still IPv4.
1198 */
1199 switch (name) {
1200 case IP_TOS:
1201 case T_IP_TOS:
1202 case IP_TTL:
1203 case IP_DONTFRAG:
1204 break;
1205 default:
1206 return (EINVAL);
1207 }
1208 } else if (connp->conn_family != AF_INET) {
1209 return (EINVAL);
1210 }
1211
1212 switch (name) {
1213 case IP_TTL:
1214 /* Don't allow zero */
1215 if (*i1 < 1 || *i1 > 255)
1216 return (EINVAL);
1217 break;
1218 case IP_MULTICAST_IF:
1219 if (addr == INADDR_ANY) {
1220 /* Clear */
1221 ifindex = 0;
1222 break;
1223 }
1224 ipif = ipif_lookup_addr(addr, NULL, zoneid, ipst);
1225 if (ipif == NULL)
1226 return (EHOSTUNREACH);
1227 /* not supported by the virtual network iface */
1228 if (IS_VNI(ipif->ipif_ill)) {
1229 ipif_refrele(ipif);
1230 return (EINVAL);
1231 }
1232 ifindex = ipif->ipif_ill->ill_phyint->phyint_ifindex;
1233 ipif_refrele(ipif);
1234 break;
1235 case IP_NEXTHOP: {
1236 ire_t *ire;
1237
1238 if (addr == INADDR_ANY) {
1239 /* Clear */
1240 break;
1241 }
1242 /* Verify that the next-hop is on-link */
1243 ire = ire_ftable_lookup_v4(addr, 0, 0, IRE_ONLINK, NULL, zoneid,
1244 NULL, MATCH_IRE_TYPE, 0, ipst, NULL);
1245 if (ire == NULL)
1246 return (EHOSTUNREACH);
1247 ire_refrele(ire);
1248 break;
1249 }
1250 case IP_OPTIONS:
1251 case T_IP_OPTIONS: {
1252 uint_t newlen;
1253
1254 if (ipp->ipp_fields & IPPF_LABEL_V4)
1255 newlen = inlen + (ipp->ipp_label_len_v4 + 3) & ~3;
1256 else
1257 newlen = inlen;
1258 if ((inlen & 0x3) || newlen > IP_MAX_OPT_LENGTH) {
1259 return (EINVAL);
1260 }
1261 break;
1262 }
1263 case IP_PKTINFO: {
1264 struct in_pktinfo *pktinfo;
1265
1266 /* Two different valid lengths */
1267 if (inlen != sizeof (int) &&
1268 inlen != sizeof (struct in_pktinfo))
1269 return (EINVAL);
1270 if (inlen == sizeof (int))
1271 break;
1272
1273 pktinfo = (struct in_pktinfo *)invalp;
1274 if (pktinfo->ipi_spec_dst.s_addr != INADDR_ANY) {
1275 switch (ip_laddr_verify_v4(pktinfo->ipi_spec_dst.s_addr,
1276 zoneid, ipst, B_FALSE)) {
1277 case IPVL_UNICAST_UP:
1278 case IPVL_UNICAST_DOWN:
1279 break;
1280 default:
1281 return (EADDRNOTAVAIL);
1282 }
1283 }
1284 if (!ip_xmit_ifindex_valid(pktinfo->ipi_ifindex, zoneid,
1285 B_FALSE, ipst))
1286 return (ENXIO);
1287 break;
1288 }
1289 case IP_BOUND_IF:
1290 ifindex = *(uint_t *)i1;
1291
1292 /* Just check it is ok. */
1293 if (!ip_xmit_ifindex_valid(ifindex, zoneid, B_FALSE, ipst))
1294 return (ENXIO);
1295 break;
1296 }
1297 if (checkonly)
1298 return (0);
1299
1300 /* Here we set the actual option value */
1301 /*
1302 * conn_lock protects the bitfields, and is used to
1303 * set the fields atomically. Not needed for ixa settings since
1304 * the caller has an exclusive copy of the ixa.
1305 * We can not hold conn_lock across the multicast options though.
1306 */
1307 switch (name) {
1308 case IP_OPTIONS:
1309 case T_IP_OPTIONS:
1310 /* Save options for use by IP. */
1311 mutex_enter(&connp->conn_lock);
1312 error = optcom_pkt_set(invalp, inlen,
1313 (uchar_t **)&ipp->ipp_ipv4_options,
1314 &ipp->ipp_ipv4_options_len);
1315 if (error != 0) {
1316 mutex_exit(&connp->conn_lock);
1317 return (error);
1318 }
1319 if (ipp->ipp_ipv4_options_len == 0) {
1320 ipp->ipp_fields &= ~IPPF_IPV4_OPTIONS;
1321 } else {
1322 ipp->ipp_fields |= IPPF_IPV4_OPTIONS;
1323 }
1324 mutex_exit(&connp->conn_lock);
1325 coa->coa_changed |= COA_HEADER_CHANGED;
1326 coa->coa_changed |= COA_WROFF_CHANGED;
1327 break;
1328
1329 case IP_TTL:
1330 mutex_enter(&connp->conn_lock);
1331 ipp->ipp_unicast_hops = *i1;
1332 mutex_exit(&connp->conn_lock);
1333 coa->coa_changed |= COA_HEADER_CHANGED;
1334 break;
1335 case IP_TOS:
1336 case T_IP_TOS:
1337 mutex_enter(&connp->conn_lock);
1338 if (*i1 == -1) {
1339 ipp->ipp_type_of_service = 0;
1340 } else {
1341 ipp->ipp_type_of_service = *i1;
1342 }
1343 mutex_exit(&connp->conn_lock);
1344 coa->coa_changed |= COA_HEADER_CHANGED;
1345 break;
1346 case IP_MULTICAST_IF:
1347 ixa->ixa_multicast_ifindex = ifindex;
1348 ixa->ixa_multicast_ifaddr = addr;
1349 coa->coa_changed |= COA_ROUTE_CHANGED;
1350 break;
1351 case IP_MULTICAST_TTL:
1352 ixa->ixa_multicast_ttl = *invalp;
1353 /* Handled automatically by ip_output */
1354 break;
1355 case IP_MULTICAST_LOOP:
1356 if (*invalp != 0)
1357 ixa->ixa_flags |= IXAF_MULTICAST_LOOP;
1358 else
1359 ixa->ixa_flags &= ~IXAF_MULTICAST_LOOP;
1360 /* Handled automatically by ip_output */
1361 break;
1362 case IP_RECVOPTS:
1363 mutex_enter(&connp->conn_lock);
1364 connp->conn_recv_ancillary.crb_recvopts = onoff;
1365 mutex_exit(&connp->conn_lock);
1366 break;
1367 case IP_RECVDSTADDR:
1368 mutex_enter(&connp->conn_lock);
1369 connp->conn_recv_ancillary.crb_recvdstaddr = onoff;
1370 mutex_exit(&connp->conn_lock);
1371 break;
1372 case IP_RECVIF:
1373 mutex_enter(&connp->conn_lock);
1374 connp->conn_recv_ancillary.crb_recvif = onoff;
1375 mutex_exit(&connp->conn_lock);
1376 break;
1377 case IP_RECVSLLA:
1378 mutex_enter(&connp->conn_lock);
1379 connp->conn_recv_ancillary.crb_recvslla = onoff;
1380 mutex_exit(&connp->conn_lock);
1381 break;
1382 case IP_RECVTTL:
1383 mutex_enter(&connp->conn_lock);
1384 connp->conn_recv_ancillary.crb_recvttl = onoff;
1385 mutex_exit(&connp->conn_lock);
1386 break;
1387 case IP_PKTINFO: {
1388 /*
1389 * This also handles IP_RECVPKTINFO.
1390 * IP_PKTINFO and IP_RECVPKTINFO have same value.
1391 * Differentiation is based on the size of the
1392 * argument passed in.
1393 */
1394 struct in_pktinfo *pktinfo;
1395
1396 if (inlen == sizeof (int)) {
1397 /* This is IP_RECVPKTINFO option. */
1398 mutex_enter(&connp->conn_lock);
1399 connp->conn_recv_ancillary.crb_ip_recvpktinfo =
1400 onoff;
1401 mutex_exit(&connp->conn_lock);
1402 break;
1403 }
1404
1405 /* This is IP_PKTINFO option. */
1406 mutex_enter(&connp->conn_lock);
1407 pktinfo = (struct in_pktinfo *)invalp;
1408 if (pktinfo->ipi_spec_dst.s_addr != INADDR_ANY) {
1409 ipp->ipp_fields |= IPPF_ADDR;
1410 IN6_INADDR_TO_V4MAPPED(&pktinfo->ipi_spec_dst,
1411 &ipp->ipp_addr);
1412 } else {
1413 ipp->ipp_fields &= ~IPPF_ADDR;
1414 ipp->ipp_addr = ipv6_all_zeros;
1415 }
1416 mutex_exit(&connp->conn_lock);
1417 ixa->ixa_ifindex = pktinfo->ipi_ifindex;
1418 coa->coa_changed |= COA_ROUTE_CHANGED;
1419 coa->coa_changed |= COA_HEADER_CHANGED;
1420 break;
1421 }
1422 case IP_DONTFRAG:
1423 if (onoff) {
1424 ixa->ixa_flags |= (IXAF_DONTFRAG | IXAF_PMTU_IPV4_DF);
1425 ixa->ixa_flags &= ~IXAF_PMTU_DISCOVERY;
1426 } else {
1427 ixa->ixa_flags &= ~(IXAF_DONTFRAG | IXAF_PMTU_IPV4_DF);
1428 ixa->ixa_flags |= IXAF_PMTU_DISCOVERY;
1429 }
1430 /* Need to redo ip_attr_connect */
1431 coa->coa_changed |= COA_ROUTE_CHANGED;
1432 break;
1433 case IP_ADD_MEMBERSHIP:
1434 case IP_DROP_MEMBERSHIP:
1435 case MCAST_JOIN_GROUP:
1436 case MCAST_LEAVE_GROUP:
1437 return (ip_opt_set_multicast_group(connp, name,
1438 invalp, B_FALSE, checkonly));
1439
1440 case IP_BLOCK_SOURCE:
1441 case IP_UNBLOCK_SOURCE:
1442 case IP_ADD_SOURCE_MEMBERSHIP:
1443 case IP_DROP_SOURCE_MEMBERSHIP:
1444 case MCAST_BLOCK_SOURCE:
1445 case MCAST_UNBLOCK_SOURCE:
1446 case MCAST_JOIN_SOURCE_GROUP:
1447 case MCAST_LEAVE_SOURCE_GROUP:
1448 return (ip_opt_set_multicast_sources(connp, name,
1449 invalp, B_FALSE, checkonly));
1450
1451 case IP_SEC_OPT:
1452 mutex_enter(&connp->conn_lock);
1453 error = ipsec_set_req(cr, connp, (ipsec_req_t *)invalp);
1454 mutex_exit(&connp->conn_lock);
1455 if (error != 0) {
1456 return (error);
1457 }
1458 /* This is an IPsec policy change - redo ip_attr_connect */
1459 coa->coa_changed |= COA_ROUTE_CHANGED;
1460 break;
1461 case IP_NEXTHOP:
1462 ixa->ixa_nexthop_v4 = addr;
1463 if (addr != INADDR_ANY)
1464 ixa->ixa_flags |= IXAF_NEXTHOP_SET;
1465 else
1466 ixa->ixa_flags &= ~IXAF_NEXTHOP_SET;
1467 coa->coa_changed |= COA_ROUTE_CHANGED;
1468 break;
1469
1470 case IP_BOUND_IF:
1471 ixa->ixa_ifindex = ifindex; /* Send */
1472 mutex_enter(&connp->conn_lock);
1473 connp->conn_incoming_ifindex = ifindex; /* Receive */
1474 connp->conn_bound_if = ifindex; /* getsockopt */
1475 mutex_exit(&connp->conn_lock);
1476 coa->coa_changed |= COA_ROUTE_CHANGED;
1477 break;
1478 case IP_UNSPEC_SRC:
1479 mutex_enter(&connp->conn_lock);
1480 connp->conn_unspec_src = onoff;
1481 if (onoff)
1482 ixa->ixa_flags &= ~IXAF_VERIFY_SOURCE;
1483 else
1484 ixa->ixa_flags |= IXAF_VERIFY_SOURCE;
1485
1486 mutex_exit(&connp->conn_lock);
1487 break;
1488 case IP_BROADCAST_TTL:
1489 ixa->ixa_broadcast_ttl = *invalp;
1490 ixa->ixa_flags |= IXAF_BROADCAST_TTL_SET;
1491 /* Handled automatically by ip_output */
1492 break;
1493 case MRT_INIT:
1494 case MRT_DONE:
1495 case MRT_ADD_VIF:
1496 case MRT_DEL_VIF:
1497 case MRT_ADD_MFC:
1498 case MRT_DEL_MFC:
1499 case MRT_ASSERT:
1500 if ((error = secpolicy_ip_config(cr, B_FALSE)) != 0) {
1501 return (error);
1502 }
1503 error = ip_mrouter_set((int)name, connp, checkonly,
1504 (uchar_t *)invalp, inlen);
1505 if (error) {
1506 return (error);
1507 }
1508 return (0);
1509
1510 }
1511 return (0);
1512 }
1513
1514 /* Handle IPPROTO_IPV6 */
1515 static int
1516 conn_opt_set_ipv6(conn_opt_arg_t *coa, t_scalar_t name, uint_t inlen,
1517 uchar_t *invalp, boolean_t checkonly, cred_t *cr)
1518 {
1519 conn_t *connp = coa->coa_connp;
1520 ip_xmit_attr_t *ixa = coa->coa_ixa;
1521 ip_pkt_t *ipp = coa->coa_ipp;
1522 int *i1 = (int *)invalp;
1523 boolean_t onoff = (*i1 == 0) ? 0 : 1;
1524 uint_t ifindex;
1525 zoneid_t zoneid = IPCL_ZONEID(connp);
1526 ip_stack_t *ipst = connp->conn_netstack->netstack_ip;
1527 int error;
1528
1529 if (connp->conn_family != AF_INET6)
1530 return (EINVAL);
1531
1532 switch (name) {
1533 case IPV6_MULTICAST_IF:
1534 /*
1535 * The only possible error is EINVAL.
1536 * We call this option on both V4 and V6
1537 * If both fail, then this call returns
1538 * EINVAL. If at least one of them succeeds we
1539 * return success.
1540 */
1541 ifindex = *(uint_t *)i1;
1542
1543 if (!ip_xmit_ifindex_valid(ifindex, zoneid, B_TRUE, ipst) &&
1544 !ip_xmit_ifindex_valid(ifindex, zoneid, B_FALSE, ipst))
1545 return (EINVAL);
1546 break;
1547 case IPV6_UNICAST_HOPS:
1548 /* Don't allow zero. -1 means to use default */
1549 if (*i1 < -1 || *i1 == 0 || *i1 > IPV6_MAX_HOPS)
1550 return (EINVAL);
1551 break;
1552 case IPV6_MULTICAST_HOPS:
1553 /* -1 means use default */
1554 if (*i1 < -1 || *i1 > IPV6_MAX_HOPS)
1555 return (EINVAL);
1556 break;
1557 case IPV6_MULTICAST_LOOP:
1558 if (*i1 != 0 && *i1 != 1)
1559 return (EINVAL);
1560 break;
1561 case IPV6_BOUND_IF:
1562 ifindex = *(uint_t *)i1;
1563
1564 if (!ip_xmit_ifindex_valid(ifindex, zoneid, B_TRUE, ipst))
1565 return (ENXIO);
1566 break;
1567 case IPV6_PKTINFO: {
1568 struct in6_pktinfo *pkti;
1569 boolean_t isv6;
1570
1571 if (inlen != 0 && inlen != sizeof (struct in6_pktinfo))
1572 return (EINVAL);
1573 if (inlen == 0)
1574 break; /* Clear values below */
1575
1576 /*
1577 * Verify the source address and ifindex. Privileged users
1578 * can use any source address.
1579 */
1580 pkti = (struct in6_pktinfo *)invalp;
1581
1582 /*
1583 * For link-local addresses we use the ipi6_ifindex when
1584 * we verify the local address.
1585 * If net_rawaccess then any source address can be used.
1586 */
1587 if (!IN6_IS_ADDR_UNSPECIFIED(&pkti->ipi6_addr) &&
1588 secpolicy_net_rawaccess(cr) != 0) {
1589 uint_t scopeid = 0;
1590 in6_addr_t *v6src = &pkti->ipi6_addr;
1591 ipaddr_t v4src;
1592 ip_laddr_t laddr_type = IPVL_UNICAST_UP;
1593
1594 if (IN6_IS_ADDR_V4MAPPED(v6src)) {
1595 IN6_V4MAPPED_TO_IPADDR(v6src, v4src);
1596 if (v4src != INADDR_ANY) {
1597 laddr_type = ip_laddr_verify_v4(v4src,
1598 zoneid, ipst, B_FALSE);
1599 }
1600 } else {
1601 if (IN6_IS_ADDR_LINKSCOPE(v6src))
1602 scopeid = pkti->ipi6_ifindex;
1603
1604 laddr_type = ip_laddr_verify_v6(v6src, zoneid,
1605 ipst, B_FALSE, scopeid);
1606 }
1607 switch (laddr_type) {
1608 case IPVL_UNICAST_UP:
1609 case IPVL_UNICAST_DOWN:
1610 break;
1611 default:
1612 return (EADDRNOTAVAIL);
1613 }
1614 ixa->ixa_flags |= IXAF_VERIFY_SOURCE;
1615 } else if (!IN6_IS_ADDR_UNSPECIFIED(&pkti->ipi6_addr)) {
1616 /* Allow any source */
1617 ixa->ixa_flags &= ~IXAF_VERIFY_SOURCE;
1618 }
1619 isv6 = !(IN6_IS_ADDR_V4MAPPED(&pkti->ipi6_addr));
1620 if (!ip_xmit_ifindex_valid(pkti->ipi6_ifindex, zoneid, isv6,
1621 ipst))
1622 return (ENXIO);
1623 break;
1624 }
1625 case IPV6_HOPLIMIT:
1626 /* It is only allowed as ancilary data */
1627 if (!coa->coa_ancillary)
1628 return (EINVAL);
1629
1630 if (inlen != 0 && inlen != sizeof (int))
1631 return (EINVAL);
1632 if (inlen == sizeof (int)) {
1633 if (*i1 > 255 || *i1 < -1 || *i1 == 0)
1634 return (EINVAL);
1635 }
1636 break;
1637 case IPV6_TCLASS:
1638 if (inlen != 0 && inlen != sizeof (int))
1639 return (EINVAL);
1640 if (inlen == sizeof (int)) {
1641 if (*i1 > 255 || *i1 < -1)
1642 return (EINVAL);
1643 }
1644 break;
1645 case IPV6_NEXTHOP:
1646 if (inlen != 0 && inlen != sizeof (sin6_t))
1647 return (EINVAL);
1648 if (inlen == sizeof (sin6_t)) {
1649 sin6_t *sin6 = (sin6_t *)invalp;
1650 ire_t *ire;
1651
1652 if (sin6->sin6_family != AF_INET6)
1653 return (EAFNOSUPPORT);
1654 if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr))
1655 return (EADDRNOTAVAIL);
1656
1657 /* Verify that the next-hop is on-link */
1658 ire = ire_ftable_lookup_v6(&sin6->sin6_addr,
1659 0, 0, IRE_ONLINK, NULL, zoneid,
1660 NULL, MATCH_IRE_TYPE, 0, ipst, NULL);
1661 if (ire == NULL)
1662 return (EHOSTUNREACH);
1663 ire_refrele(ire);
1664 break;
1665 }
1666 break;
1667 case IPV6_RTHDR:
1668 case IPV6_DSTOPTS:
1669 case IPV6_RTHDRDSTOPTS:
1670 case IPV6_HOPOPTS: {
1671 /* All have the length field in the same place */
1672 ip6_hbh_t *hopts = (ip6_hbh_t *)invalp;
1673 /*
1674 * Sanity checks - minimum size, size a multiple of
1675 * eight bytes, and matching size passed in.
1676 */
1677 if (inlen != 0 &&
1678 inlen != (8 * (hopts->ip6h_len + 1)))
1679 return (EINVAL);
1680 break;
1681 }
1682 case IPV6_PATHMTU:
1683 /* Can't be set */
1684 return (EINVAL);
1685
1686 case IPV6_USE_MIN_MTU:
1687 if (inlen != sizeof (int))
1688 return (EINVAL);
1689 if (*i1 < -1 || *i1 > 1)
1690 return (EINVAL);
1691 break;
1692 case IPV6_SRC_PREFERENCES:
1693 if (inlen != sizeof (uint32_t))
1694 return (EINVAL);
1695 break;
1696 case IPV6_V6ONLY:
1697 if (*i1 < 0 || *i1 > 1) {
1698 return (EINVAL);
1699 }
1700 break;
1701 }
1702 if (checkonly)
1703 return (0);
1704
1705 /* Here we set the actual option value */
1706 /*
1707 * conn_lock protects the bitfields, and is used to
1708 * set the fields atomically. Not needed for ixa settings since
1709 * the caller has an exclusive copy of the ixa.
1710 * We can not hold conn_lock across the multicast options though.
1711 */
1712 ASSERT(MUTEX_NOT_HELD(&coa->coa_connp->conn_lock));
1713 switch (name) {
1714 case IPV6_MULTICAST_IF:
1715 ixa->ixa_multicast_ifindex = ifindex;
1716 /* Need to redo ip_attr_connect */
1717 coa->coa_changed |= COA_ROUTE_CHANGED;
1718 break;
1719 case IPV6_UNICAST_HOPS:
1720 /* -1 means use default */
1721 mutex_enter(&connp->conn_lock);
1722 if (*i1 == -1) {
1723 ipp->ipp_unicast_hops = connp->conn_default_ttl;
1724 } else {
1725 ipp->ipp_unicast_hops = (uint8_t)*i1;
1726 }
1727 mutex_exit(&connp->conn_lock);
1728 coa->coa_changed |= COA_HEADER_CHANGED;
1729 break;
1730 case IPV6_MULTICAST_HOPS:
1731 /* -1 means use default */
1732 if (*i1 == -1) {
1733 ixa->ixa_multicast_ttl = IP_DEFAULT_MULTICAST_TTL;
1734 } else {
1735 ixa->ixa_multicast_ttl = (uint8_t)*i1;
1736 }
1737 /* Handled automatically by ip_output */
1738 break;
1739 case IPV6_MULTICAST_LOOP:
1740 if (*i1 != 0)
1741 ixa->ixa_flags |= IXAF_MULTICAST_LOOP;
1742 else
1743 ixa->ixa_flags &= ~IXAF_MULTICAST_LOOP;
1744 /* Handled automatically by ip_output */
1745 break;
1746 case IPV6_JOIN_GROUP:
1747 case IPV6_LEAVE_GROUP:
1748 case MCAST_JOIN_GROUP:
1749 case MCAST_LEAVE_GROUP:
1750 return (ip_opt_set_multicast_group(connp, name,
1751 invalp, B_TRUE, checkonly));
1752
1753 case MCAST_BLOCK_SOURCE:
1754 case MCAST_UNBLOCK_SOURCE:
1755 case MCAST_JOIN_SOURCE_GROUP:
1756 case MCAST_LEAVE_SOURCE_GROUP:
1757 return (ip_opt_set_multicast_sources(connp, name,
1758 invalp, B_TRUE, checkonly));
1759
1760 case IPV6_BOUND_IF:
1761 ixa->ixa_ifindex = ifindex; /* Send */
1762 mutex_enter(&connp->conn_lock);
1763 connp->conn_incoming_ifindex = ifindex; /* Receive */
1764 connp->conn_bound_if = ifindex; /* getsockopt */
1765 mutex_exit(&connp->conn_lock);
1766 coa->coa_changed |= COA_ROUTE_CHANGED;
1767 break;
1768 case IPV6_UNSPEC_SRC:
1769 mutex_enter(&connp->conn_lock);
1770 connp->conn_unspec_src = onoff;
1771 if (onoff)
1772 ixa->ixa_flags &= ~IXAF_VERIFY_SOURCE;
1773 else
1774 ixa->ixa_flags |= IXAF_VERIFY_SOURCE;
1775 mutex_exit(&connp->conn_lock);
1776 break;
1777 case IPV6_RECVPKTINFO:
1778 mutex_enter(&connp->conn_lock);
1779 connp->conn_recv_ancillary.crb_ip_recvpktinfo = onoff;
1780 mutex_exit(&connp->conn_lock);
1781 break;
1782 case IPV6_RECVTCLASS:
1783 mutex_enter(&connp->conn_lock);
1784 connp->conn_recv_ancillary.crb_ipv6_recvtclass = onoff;
1785 mutex_exit(&connp->conn_lock);
1786 break;
1787 case IPV6_RECVPATHMTU:
1788 mutex_enter(&connp->conn_lock);
1789 connp->conn_ipv6_recvpathmtu = onoff;
1790 mutex_exit(&connp->conn_lock);
1791 break;
1792 case IPV6_RECVHOPLIMIT:
1793 mutex_enter(&connp->conn_lock);
1794 connp->conn_recv_ancillary.crb_ipv6_recvhoplimit =
1795 onoff;
1796 mutex_exit(&connp->conn_lock);
1797 break;
1798 case IPV6_RECVHOPOPTS:
1799 mutex_enter(&connp->conn_lock);
1800 connp->conn_recv_ancillary.crb_ipv6_recvhopopts = onoff;
1801 mutex_exit(&connp->conn_lock);
1802 break;
1803 case IPV6_RECVDSTOPTS:
1804 mutex_enter(&connp->conn_lock);
1805 connp->conn_recv_ancillary.crb_ipv6_recvdstopts = onoff;
1806 mutex_exit(&connp->conn_lock);
1807 break;
1808 case _OLD_IPV6_RECVDSTOPTS:
1809 mutex_enter(&connp->conn_lock);
1810 connp->conn_recv_ancillary.crb_old_ipv6_recvdstopts =
1811 onoff;
1812 mutex_exit(&connp->conn_lock);
1813 break;
1814 case IPV6_RECVRTHDRDSTOPTS:
1815 mutex_enter(&connp->conn_lock);
1816 connp->conn_recv_ancillary.crb_ipv6_recvrthdrdstopts =
1817 onoff;
1818 mutex_exit(&connp->conn_lock);
1819 break;
1820 case IPV6_RECVRTHDR:
1821 mutex_enter(&connp->conn_lock);
1822 connp->conn_recv_ancillary.crb_ipv6_recvrthdr = onoff;
1823 mutex_exit(&connp->conn_lock);
1824 break;
1825 case IPV6_PKTINFO:
1826 mutex_enter(&connp->conn_lock);
1827 if (inlen == 0) {
1828 ipp->ipp_fields &= ~IPPF_ADDR;
1829 ipp->ipp_addr = ipv6_all_zeros;
1830 ixa->ixa_ifindex = 0;
1831 } else {
1832 struct in6_pktinfo *pkti;
1833
1834 pkti = (struct in6_pktinfo *)invalp;
1835 ipp->ipp_addr = pkti->ipi6_addr;
1836 if (!IN6_IS_ADDR_UNSPECIFIED(&ipp->ipp_addr))
1837 ipp->ipp_fields |= IPPF_ADDR;
1838 else
1839 ipp->ipp_fields &= ~IPPF_ADDR;
1840 ixa->ixa_ifindex = pkti->ipi6_ifindex;
1841 }
1842 mutex_exit(&connp->conn_lock);
1843 /* Source and ifindex might have changed */
1844 coa->coa_changed |= COA_HEADER_CHANGED;
1845 coa->coa_changed |= COA_ROUTE_CHANGED;
1846 break;
1847 case IPV6_HOPLIMIT:
1848 mutex_enter(&connp->conn_lock);
1849 if (inlen == 0 || *i1 == -1) {
1850 /* Revert to default */
1851 ipp->ipp_fields &= ~IPPF_HOPLIMIT;
1852 ixa->ixa_flags &= ~IXAF_NO_TTL_CHANGE;
1853 } else {
1854 ipp->ipp_hoplimit = *i1;
1855 ipp->ipp_fields |= IPPF_HOPLIMIT;
1856 /* Ensure that it sticks for multicast packets */
1857 ixa->ixa_flags |= IXAF_NO_TTL_CHANGE;
1858 }
1859 mutex_exit(&connp->conn_lock);
1860 coa->coa_changed |= COA_HEADER_CHANGED;
1861 break;
1862 case IPV6_TCLASS:
1863 /*
1864 * IPV6_TCLASS accepts -1 as use kernel default
1865 * and [0, 255] as the actualy traffic class.
1866 */
1867 mutex_enter(&connp->conn_lock);
1868 if (inlen == 0 || *i1 == -1) {
1869 ipp->ipp_tclass = 0;
1870 ipp->ipp_fields &= ~IPPF_TCLASS;
1871 } else {
1872 ipp->ipp_tclass = *i1;
1873 ipp->ipp_fields |= IPPF_TCLASS;
1874 }
1875 mutex_exit(&connp->conn_lock);
1876 coa->coa_changed |= COA_HEADER_CHANGED;
1877 break;
1878 case IPV6_NEXTHOP:
1879 if (inlen == 0) {
1880 ixa->ixa_flags &= ~IXAF_NEXTHOP_SET;
1881 } else {
1882 sin6_t *sin6 = (sin6_t *)invalp;
1883
1884 ixa->ixa_nexthop_v6 = sin6->sin6_addr;
1885 if (!IN6_IS_ADDR_UNSPECIFIED(&ixa->ixa_nexthop_v6))
1886 ixa->ixa_flags |= IXAF_NEXTHOP_SET;
1887 else
1888 ixa->ixa_flags &= ~IXAF_NEXTHOP_SET;
1889 }
1890 coa->coa_changed |= COA_ROUTE_CHANGED;
1891 break;
1892 case IPV6_HOPOPTS:
1893 mutex_enter(&connp->conn_lock);
1894 error = optcom_pkt_set(invalp, inlen,
1895 (uchar_t **)&ipp->ipp_hopopts, &ipp->ipp_hopoptslen);
1896 if (error != 0) {
1897 mutex_exit(&connp->conn_lock);
1898 return (error);
1899 }
1900 if (ipp->ipp_hopoptslen == 0) {
1901 ipp->ipp_fields &= ~IPPF_HOPOPTS;
1902 } else {
1903 ipp->ipp_fields |= IPPF_HOPOPTS;
1904 }
1905 mutex_exit(&connp->conn_lock);
1906 coa->coa_changed |= COA_HEADER_CHANGED;
1907 coa->coa_changed |= COA_WROFF_CHANGED;
1908 break;
1909 case IPV6_RTHDRDSTOPTS:
1910 mutex_enter(&connp->conn_lock);
1911 error = optcom_pkt_set(invalp, inlen,
1912 (uchar_t **)&ipp->ipp_rthdrdstopts,
1913 &ipp->ipp_rthdrdstoptslen);
1914 if (error != 0) {
1915 mutex_exit(&connp->conn_lock);
1916 return (error);
1917 }
1918 if (ipp->ipp_rthdrdstoptslen == 0) {
1919 ipp->ipp_fields &= ~IPPF_RTHDRDSTOPTS;
1920 } else {
1921 ipp->ipp_fields |= IPPF_RTHDRDSTOPTS;
1922 }
1923 mutex_exit(&connp->conn_lock);
1924 coa->coa_changed |= COA_HEADER_CHANGED;
1925 coa->coa_changed |= COA_WROFF_CHANGED;
1926 break;
1927 case IPV6_DSTOPTS:
1928 mutex_enter(&connp->conn_lock);
1929 error = optcom_pkt_set(invalp, inlen,
1930 (uchar_t **)&ipp->ipp_dstopts, &ipp->ipp_dstoptslen);
1931 if (error != 0) {
1932 mutex_exit(&connp->conn_lock);
1933 return (error);
1934 }
1935 if (ipp->ipp_dstoptslen == 0) {
1936 ipp->ipp_fields &= ~IPPF_DSTOPTS;
1937 } else {
1938 ipp->ipp_fields |= IPPF_DSTOPTS;
1939 }
1940 mutex_exit(&connp->conn_lock);
1941 coa->coa_changed |= COA_HEADER_CHANGED;
1942 coa->coa_changed |= COA_WROFF_CHANGED;
1943 break;
1944 case IPV6_RTHDR:
1945 mutex_enter(&connp->conn_lock);
1946 error = optcom_pkt_set(invalp, inlen,
1947 (uchar_t **)&ipp->ipp_rthdr, &ipp->ipp_rthdrlen);
1948 if (error != 0) {
1949 mutex_exit(&connp->conn_lock);
1950 return (error);
1951 }
1952 if (ipp->ipp_rthdrlen == 0) {
1953 ipp->ipp_fields &= ~IPPF_RTHDR;
1954 } else {
1955 ipp->ipp_fields |= IPPF_RTHDR;
1956 }
1957 mutex_exit(&connp->conn_lock);
1958 coa->coa_changed |= COA_HEADER_CHANGED;
1959 coa->coa_changed |= COA_WROFF_CHANGED;
1960 break;
1961
1962 case IPV6_DONTFRAG:
1963 if (onoff) {
1964 ixa->ixa_flags |= IXAF_DONTFRAG;
1965 ixa->ixa_flags &= ~IXAF_PMTU_DISCOVERY;
1966 } else {
1967 ixa->ixa_flags &= ~IXAF_DONTFRAG;
1968 ixa->ixa_flags |= IXAF_PMTU_DISCOVERY;
1969 }
1970 /* Need to redo ip_attr_connect */
1971 coa->coa_changed |= COA_ROUTE_CHANGED;
1972 break;
1973
1974 case IPV6_USE_MIN_MTU:
1975 ixa->ixa_flags |= IXAF_USE_MIN_MTU;
1976 ixa->ixa_use_min_mtu = *i1;
1977 /* Need to redo ip_attr_connect */
1978 coa->coa_changed |= COA_ROUTE_CHANGED;
1979 break;
1980
1981 case IPV6_SEC_OPT:
1982 mutex_enter(&connp->conn_lock);
1983 error = ipsec_set_req(cr, connp, (ipsec_req_t *)invalp);
1984 mutex_exit(&connp->conn_lock);
1985 if (error != 0) {
1986 return (error);
1987 }
1988 /* This is an IPsec policy change - redo ip_attr_connect */
1989 coa->coa_changed |= COA_ROUTE_CHANGED;
1990 break;
1991 case IPV6_SRC_PREFERENCES:
1992 /*
1993 * This socket option only affects connected
1994 * sockets that haven't already bound to a specific
1995 * IPv6 address. In other words, sockets that
1996 * don't call bind() with an address other than the
1997 * unspecified address and that call connect().
1998 * ip_set_destination_v6() passes these preferences
1999 * to the ipif_select_source_v6() function.
2000 */
2001 mutex_enter(&connp->conn_lock);
2002 error = ip6_set_src_preferences(ixa, *(uint32_t *)invalp);
2003 mutex_exit(&connp->conn_lock);
2004 if (error != 0) {
2005 return (error);
2006 }
2007 break;
2008 case IPV6_V6ONLY:
2009 mutex_enter(&connp->conn_lock);
2010 connp->conn_ipv6_v6only = onoff;
2011 mutex_exit(&connp->conn_lock);
2012 break;
2013 }
2014 return (0);
2015 }
2016
2017 /* Handle IPPROTO_UDP */
2018 /* ARGSUSED1 */
2019 static int
2020 conn_opt_set_udp(conn_opt_arg_t *coa, t_scalar_t name, uint_t inlen,
2021 uchar_t *invalp, boolean_t checkonly, cred_t *cr)
2022 {
2023 conn_t *connp = coa->coa_connp;
2024 int *i1 = (int *)invalp;
2025 boolean_t onoff = (*i1 == 0) ? 0 : 1;
2026 int error;
2027
2028 switch (name) {
2029 case UDP_ANONPRIVBIND:
2030 if ((error = secpolicy_net_privaddr(cr, 0, IPPROTO_UDP)) != 0) {
2031 return (error);
2032 }
2033 break;
2034 }
2035 if (checkonly)
2036 return (0);
2037
2038 /* Here we set the actual option value */
2039 mutex_enter(&connp->conn_lock);
2040 switch (name) {
2041 case UDP_ANONPRIVBIND:
2042 connp->conn_anon_priv_bind = onoff;
2043 break;
2044 case UDP_EXCLBIND:
2045 connp->conn_exclbind = onoff;
2046 break;
2047 }
2048 mutex_exit(&connp->conn_lock);
2049 return (0);
2050 }
2051
2052 /* Handle IPPROTO_TCP */
2053 /* ARGSUSED1 */
2054 static int
2055 conn_opt_set_tcp(conn_opt_arg_t *coa, t_scalar_t name, uint_t inlen,
2056 uchar_t *invalp, boolean_t checkonly, cred_t *cr)
2057 {
2058 conn_t *connp = coa->coa_connp;
2059 int *i1 = (int *)invalp;
2060 boolean_t onoff = (*i1 == 0) ? 0 : 1;
2061 int error;
2062
2063 switch (name) {
2064 case TCP_ANONPRIVBIND:
2065 if ((error = secpolicy_net_privaddr(cr, 0, IPPROTO_TCP)) != 0) {
2066 return (error);
2067 }
2068 break;
2069 }
2070 if (checkonly)
2071 return (0);
2072
2073 /* Here we set the actual option value */
2074 mutex_enter(&connp->conn_lock);
2075 switch (name) {
2076 case TCP_ANONPRIVBIND:
2077 connp->conn_anon_priv_bind = onoff;
2078 break;
2079 case TCP_EXCLBIND:
2080 connp->conn_exclbind = onoff;
2081 break;
2082 case TCP_RECVDSTADDR:
2083 connp->conn_recv_ancillary.crb_recvdstaddr = onoff;
2084 break;
2085 }
2086 mutex_exit(&connp->conn_lock);
2087 return (0);
2088 }
2089
2090 int
2091 conn_getsockname(conn_t *connp, struct sockaddr *sa, uint_t *salenp)
2092 {
2093 sin_t *sin;
2094 sin6_t *sin6;
2095
2096 if (connp->conn_family == AF_INET) {
2097 if (*salenp < sizeof (sin_t))
2098 return (EINVAL);
2099
2100 *salenp = sizeof (sin_t);
2101 /* Fill zeroes and then initialize non-zero fields */
2102 sin = (sin_t *)sa;
2103 *sin = sin_null;
2104 sin->sin_family = AF_INET;
2105 if (!IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_saddr_v6) &&
2106 !IN6_IS_ADDR_UNSPECIFIED(&connp->conn_saddr_v6)) {
2107 sin->sin_addr.s_addr = connp->conn_saddr_v4;
2108 } else {
2109 /*
2110 * INADDR_ANY
2111 * conn_saddr is not set, we might be bound to
2112 * broadcast/multicast. Use conn_bound_addr as
2113 * local address instead (that could
2114 * also still be INADDR_ANY)
2115 */
2116 sin->sin_addr.s_addr = connp->conn_bound_addr_v4;
2117 }
2118 sin->sin_port = connp->conn_lport;
2119 } else {
2120 if (*salenp < sizeof (sin6_t))
2121 return (EINVAL);
2122
2123 *salenp = sizeof (sin6_t);
2124 /* Fill zeroes and then initialize non-zero fields */
2125 sin6 = (sin6_t *)sa;
2126 *sin6 = sin6_null;
2127 sin6->sin6_family = AF_INET6;
2128 if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_saddr_v6)) {
2129 sin6->sin6_addr = connp->conn_saddr_v6;
2130 } else {
2131 /*
2132 * conn_saddr is not set, we might be bound to
2133 * broadcast/multicast. Use conn_bound_addr as
2134 * local address instead (which could
2135 * also still be unspecified)
2136 */
2137 sin6->sin6_addr = connp->conn_bound_addr_v6;
2138 }
2139 sin6->sin6_port = connp->conn_lport;
2140 if (IN6_IS_ADDR_LINKSCOPE(&sin6->sin6_addr) &&
2141 (connp->conn_ixa->ixa_flags & IXAF_SCOPEID_SET))
2142 sin6->sin6_scope_id = connp->conn_ixa->ixa_scopeid;
2143 }
2144 return (0);
2145 }
2146
2147 int
2148 conn_getpeername(conn_t *connp, struct sockaddr *sa, uint_t *salenp)
2149 {
2150 struct sockaddr_in *sin;
2151 struct sockaddr_in6 *sin6;
2152
2153 if (connp->conn_family == AF_INET) {
2154 if (*salenp < sizeof (sin_t))
2155 return (EINVAL);
2156
2157 *salenp = sizeof (sin_t);
2158 /* initialize */
2159 sin = (sin_t *)sa;
2160 *sin = sin_null;
2161 sin->sin_family = AF_INET;
2162 sin->sin_addr.s_addr = connp->conn_faddr_v4;
2163 sin->sin_port = connp->conn_fport;
2164 } else {
2165 if (*salenp < sizeof (sin6_t))
2166 return (EINVAL);
2167
2168 *salenp = sizeof (sin6_t);
2169 /* initialize */
2170 sin6 = (sin6_t *)sa;
2171 *sin6 = sin6_null;
2172 sin6->sin6_family = AF_INET6;
2173 sin6->sin6_addr = connp->conn_faddr_v6;
2174 sin6->sin6_port = connp->conn_fport;
2175 sin6->sin6_flowinfo = connp->conn_flowinfo;
2176 if (IN6_IS_ADDR_LINKSCOPE(&sin6->sin6_addr) &&
2177 (connp->conn_ixa->ixa_flags & IXAF_SCOPEID_SET))
2178 sin6->sin6_scope_id = connp->conn_ixa->ixa_scopeid;
2179 }
2180 return (0);
2181 }
2182
2183 static uint32_t cksum_massage_options_v4(ipha_t *, netstack_t *);
2184 static uint32_t cksum_massage_options_v6(ip6_t *, uint_t, netstack_t *);
2185
2186 /*
2187 * Allocate and fill in conn_ht_iphc based on the current information
2188 * in the conn.
2189 * Normally used when we bind() and connect().
2190 * Returns failure if can't allocate memory, or if there is a problem
2191 * with a routing header/option.
2192 *
2193 * We allocate space for the transport header (ulp_hdr_len + extra) and
2194 * indicate the offset of the ulp header by setting ixa_ip_hdr_length.
2195 * The extra is there for transports that want some spare room for future
2196 * options. conn_ht_iphc_allocated is what was allocated; conn_ht_iphc_len
2197 * excludes the extra part.
2198 *
2199 * We massage an routing option/header and store the ckecksum difference
2200 * in conn_sum.
2201 *
2202 * Caller needs to update conn_wroff if desired.
2203 */
2204 int
2205 conn_build_hdr_template(conn_t *connp, uint_t ulp_hdr_length, uint_t extra,
2206 const in6_addr_t *v6src, const in6_addr_t *v6dst, uint32_t flowinfo)
2207 {
2208 ip_xmit_attr_t *ixa = connp->conn_ixa;
2209 ip_pkt_t *ipp = &connp->conn_xmit_ipp;
2210 uint_t ip_hdr_length;
2211 uchar_t *hdrs;
2212 uint_t hdrs_len;
2213
2214 ASSERT(MUTEX_HELD(&connp->conn_lock));
2215
2216 if (ixa->ixa_flags & IXAF_IS_IPV4) {
2217 ip_hdr_length = ip_total_hdrs_len_v4(ipp);
2218 /* In case of TX label and IP options it can be too much */
2219 if (ip_hdr_length > IP_MAX_HDR_LENGTH) {
2220 /* Preserves existing TX errno for this */
2221 return (EHOSTUNREACH);
2222 }
2223 } else {
2224 ip_hdr_length = ip_total_hdrs_len_v6(ipp);
2225 }
2226 ixa->ixa_ip_hdr_length = ip_hdr_length;
2227 hdrs_len = ip_hdr_length + ulp_hdr_length + extra;
2228 ASSERT(hdrs_len != 0);
2229
2230 if (hdrs_len != connp->conn_ht_iphc_allocated) {
2231 /* Allocate new before we free any old */
2232 hdrs = kmem_alloc(hdrs_len, KM_NOSLEEP);
2233 if (hdrs == NULL)
2234 return (ENOMEM);
2235
2236 if (connp->conn_ht_iphc != NULL) {
2237 kmem_free(connp->conn_ht_iphc,
2238 connp->conn_ht_iphc_allocated);
2239 }
2240 connp->conn_ht_iphc = hdrs;
2241 connp->conn_ht_iphc_allocated = hdrs_len;
2242 } else {
2243 hdrs = connp->conn_ht_iphc;
2244 }
2245 hdrs_len -= extra;
2246 connp->conn_ht_iphc_len = hdrs_len;
2247
2248 connp->conn_ht_ulp = hdrs + ip_hdr_length;
2249 connp->conn_ht_ulp_len = ulp_hdr_length;
2250
2251 if (ixa->ixa_flags & IXAF_IS_IPV4) {
2252 ipha_t *ipha = (ipha_t *)hdrs;
2253
2254 IN6_V4MAPPED_TO_IPADDR(v6src, ipha->ipha_src);
2255 IN6_V4MAPPED_TO_IPADDR(v6dst, ipha->ipha_dst);
2256 ip_build_hdrs_v4(hdrs, ip_hdr_length, ipp, connp->conn_proto);
2257 ipha->ipha_length = htons(hdrs_len);
2258 if (ixa->ixa_flags & IXAF_PMTU_IPV4_DF)
2259 ipha->ipha_fragment_offset_and_flags |= IPH_DF_HTONS;
2260 else
2261 ipha->ipha_fragment_offset_and_flags &= ~IPH_DF_HTONS;
2262
2263 if (ipp->ipp_fields & IPPF_IPV4_OPTIONS) {
2264 connp->conn_sum = cksum_massage_options_v4(ipha,
2265 connp->conn_netstack);
2266 } else {
2267 connp->conn_sum = 0;
2268 }
2269 } else {
2270 ip6_t *ip6h = (ip6_t *)hdrs;
2271
2272 ip6h->ip6_src = *v6src;
2273 ip6h->ip6_dst = *v6dst;
2274 ip_build_hdrs_v6(hdrs, ip_hdr_length, ipp, connp->conn_proto,
2275 flowinfo);
2276 ip6h->ip6_plen = htons(hdrs_len - IPV6_HDR_LEN);
2277
2278 if (ipp->ipp_fields & IPPF_RTHDR) {
2279 connp->conn_sum = cksum_massage_options_v6(ip6h,
2280 ip_hdr_length, connp->conn_netstack);
2281
2282 /*
2283 * Verify that the first hop isn't a mapped address.
2284 * Routers along the path need to do this verification
2285 * for subsequent hops.
2286 */
2287 if (IN6_IS_ADDR_V4MAPPED(&ip6h->ip6_dst))
2288 return (EADDRNOTAVAIL);
2289
2290 } else {
2291 connp->conn_sum = 0;
2292 }
2293 }
2294 return (0);
2295 }
2296
2297 /*
2298 * Prepend a header template to data_mp based on the ip_pkt_t
2299 * and the passed in source, destination and protocol.
2300 *
2301 * Returns failure if can't allocate memory, in which case data_mp is freed.
2302 * We allocate space for the transport header (ulp_hdr_len) and
2303 * indicate the offset of the ulp header by setting ixa_ip_hdr_length.
2304 *
2305 * We massage an routing option/header and return the ckecksum difference
2306 * in *sump. This is in host byte order.
2307 *
2308 * Caller needs to update conn_wroff if desired.
2309 */
2310 mblk_t *
2311 conn_prepend_hdr(ip_xmit_attr_t *ixa, const ip_pkt_t *ipp,
2312 const in6_addr_t *v6src, const in6_addr_t *v6dst,
2313 uint8_t protocol, uint32_t flowinfo, uint_t ulp_hdr_length, mblk_t *data_mp,
2314 uint_t data_length, uint_t wroff_extra, uint32_t *sump, int *errorp)
2315 {
2316 uint_t ip_hdr_length;
2317 uchar_t *hdrs;
2318 uint_t hdrs_len;
2319 mblk_t *mp;
2320
2321 if (ixa->ixa_flags & IXAF_IS_IPV4) {
2322 ip_hdr_length = ip_total_hdrs_len_v4(ipp);
2323 ASSERT(ip_hdr_length <= IP_MAX_HDR_LENGTH);
2324 } else {
2325 ip_hdr_length = ip_total_hdrs_len_v6(ipp);
2326 }
2327 hdrs_len = ip_hdr_length + ulp_hdr_length;
2328 ASSERT(hdrs_len != 0);
2329
2330 ixa->ixa_ip_hdr_length = ip_hdr_length;
2331
2332 /* Can we prepend to data_mp? */
2333 if (data_mp != NULL &&
2334 data_mp->b_rptr - data_mp->b_datap->db_base >= hdrs_len &&
2335 data_mp->b_datap->db_ref == 1) {
2336 hdrs = data_mp->b_rptr - hdrs_len;
2337 data_mp->b_rptr = hdrs;
2338 mp = data_mp;
2339 } else {
2340 mp = allocb(hdrs_len + wroff_extra, BPRI_MED);
2341 if (mp == NULL) {
2342 freemsg(data_mp);
2343 *errorp = ENOMEM;
2344 return (NULL);
2345 }
2346 mp->b_wptr = mp->b_datap->db_lim;
2347 hdrs = mp->b_rptr = mp->b_wptr - hdrs_len;
2348 mp->b_cont = data_mp;
2349 }
2350
2351 /*
2352 * Set the source in the header. ip_build_hdrs_v4/v6 will overwrite it
2353 * if PKTINFO (aka IPPF_ADDR) was set.
2354 */
2355 if (ixa->ixa_flags & IXAF_IS_IPV4) {
2356 ipha_t *ipha = (ipha_t *)hdrs;
2357
2358 ASSERT(IN6_IS_ADDR_V4MAPPED(v6dst));
2359 IN6_V4MAPPED_TO_IPADDR(v6src, ipha->ipha_src);
2360 IN6_V4MAPPED_TO_IPADDR(v6dst, ipha->ipha_dst);
2361 ip_build_hdrs_v4(hdrs, ip_hdr_length, ipp, protocol);
2362 ipha->ipha_length = htons(hdrs_len + data_length);
2363 if (ixa->ixa_flags & IXAF_PMTU_IPV4_DF)
2364 ipha->ipha_fragment_offset_and_flags |= IPH_DF_HTONS;
2365 else
2366 ipha->ipha_fragment_offset_and_flags &= ~IPH_DF_HTONS;
2367
2368 if (ipp->ipp_fields & IPPF_IPV4_OPTIONS) {
2369 *sump = cksum_massage_options_v4(ipha,
2370 ixa->ixa_ipst->ips_netstack);
2371 } else {
2372 *sump = 0;
2373 }
2374 } else {
2375 ip6_t *ip6h = (ip6_t *)hdrs;
2376
2377 ip6h->ip6_src = *v6src;
2378 ip6h->ip6_dst = *v6dst;
2379 ip_build_hdrs_v6(hdrs, ip_hdr_length, ipp, protocol, flowinfo);
2380 ip6h->ip6_plen = htons(hdrs_len + data_length - IPV6_HDR_LEN);
2381
2382 if (ipp->ipp_fields & IPPF_RTHDR) {
2383 *sump = cksum_massage_options_v6(ip6h,
2384 ip_hdr_length, ixa->ixa_ipst->ips_netstack);
2385
2386 /*
2387 * Verify that the first hop isn't a mapped address.
2388 * Routers along the path need to do this verification
2389 * for subsequent hops.
2390 */
2391 if (IN6_IS_ADDR_V4MAPPED(&ip6h->ip6_dst)) {
2392 *errorp = EADDRNOTAVAIL;
2393 freemsg(mp);
2394 return (NULL);
2395 }
2396 } else {
2397 *sump = 0;
2398 }
2399 }
2400 return (mp);
2401 }
2402
2403 /*
2404 * Massage a source route if any putting the first hop
2405 * in ipha_dst. Compute a starting value for the checksum which
2406 * takes into account that the original ipha_dst should be
2407 * included in the checksum but that IP will include the
2408 * first hop from the source route in the tcp checksum.
2409 */
2410 static uint32_t
2411 cksum_massage_options_v4(ipha_t *ipha, netstack_t *ns)
2412 {
2413 in_addr_t dst;
2414 uint32_t cksum;
2415
2416 /* Get last hop then diff against first hop */
2417 cksum = ip_massage_options(ipha, ns);
2418 cksum = (cksum & 0xFFFF) + (cksum >> 16);
2419 dst = ipha->ipha_dst;
2420 cksum -= ((dst >> 16) + (dst & 0xffff));
2421 if ((int)cksum < 0)
2422 cksum--;
2423 cksum = (cksum & 0xFFFF) + (cksum >> 16);
2424 cksum = (cksum & 0xFFFF) + (cksum >> 16);
2425 ASSERT(cksum < 0x10000);
2426 return (ntohs(cksum));
2427 }
2428
2429 static uint32_t
2430 cksum_massage_options_v6(ip6_t *ip6h, uint_t ip_hdr_len, netstack_t *ns)
2431 {
2432 uint8_t *end;
2433 ip6_rthdr_t *rth;
2434 uint32_t cksum;
2435
2436 end = (uint8_t *)ip6h + ip_hdr_len;
2437 rth = ip_find_rthdr_v6(ip6h, end);
2438 if (rth == NULL)
2439 return (0);
2440
2441 cksum = ip_massage_options_v6(ip6h, rth, ns);
2442 cksum = (cksum & 0xFFFF) + (cksum >> 16);
2443 ASSERT(cksum < 0x10000);
2444 return (ntohs(cksum));
2445 }
2446
2447 /*
2448 * ULPs that change the destination address need to call this for each
2449 * change to discard any state about a previous destination that might
2450 * have been multicast or multirt.
2451 */
2452 void
2453 ip_attr_newdst(ip_xmit_attr_t *ixa)
2454 {
2455 ixa->ixa_flags &= ~(IXAF_LOOPBACK_COPY | IXAF_NO_HW_CKSUM |
2456 IXAF_NO_TTL_CHANGE | IXAF_IPV6_ADD_FRAGHDR |
2457 IXAF_NO_LOOP_ZONEID_SET);
2458 }
2459
2460 /*
2461 * Determine the nexthop which will be used.
2462 * Normally this is just the destination, but if a IPv4 source route, or
2463 * IPv6 routing header, is in the ip_pkt_t then we extract the nexthop from
2464 * there.
2465 */
2466 void
2467 ip_attr_nexthop(const ip_pkt_t *ipp, const ip_xmit_attr_t *ixa,
2468 const in6_addr_t *dst, in6_addr_t *nexthop)
2469 {
2470 if (!(ipp->ipp_fields & (IPPF_IPV4_OPTIONS|IPPF_RTHDR))) {
2471 *nexthop = *dst;
2472 return;
2473 }
2474 if (ixa->ixa_flags & IXAF_IS_IPV4) {
2475 ipaddr_t v4dst;
2476 ipaddr_t v4nexthop;
2477
2478 IN6_V4MAPPED_TO_IPADDR(dst, v4dst);
2479 v4nexthop = ip_pkt_source_route_v4(ipp);
2480 if (v4nexthop == INADDR_ANY)
2481 v4nexthop = v4dst;
2482
2483 IN6_IPADDR_TO_V4MAPPED(v4nexthop, nexthop);
2484 } else {
2485 const in6_addr_t *v6nexthop;
2486
2487 v6nexthop = ip_pkt_source_route_v6(ipp);
2488 if (v6nexthop == NULL)
2489 v6nexthop = dst;
2490
2491 *nexthop = *v6nexthop;
2492 }
2493 }
2494
2495 /*
2496 * Update the ip_xmit_attr_t based the addresses, conn_xmit_ipp and conn_ixa.
2497 * If IPDF_IPSEC is set we cache the IPsec policy to handle the unconnected
2498 * case (connected latching is done in conn_connect).
2499 * Note that IPsec policy lookup requires conn_proto and conn_laddr to be
2500 * set, but doesn't otherwise use the conn_t.
2501 *
2502 * Caller must set/clear IXAF_IS_IPV4 as appropriately.
2503 * Caller must use ip_attr_nexthop() to determine the nexthop argument.
2504 *
2505 * The caller must NOT hold conn_lock (to avoid problems with ill_refrele
2506 * causing the squeue to run doing ipcl_walk grabbing conn_lock.)
2507 *
2508 * Updates laddrp and uinfo if they are non-NULL.
2509 *
2510 * TSOL notes: The callers if ip_attr_connect must check if the destination
2511 * is different than before and in that case redo conn_update_label.
2512 * The callers of conn_connect do not need that since conn_connect
2513 * performs the conn_update_label.
2514 */
2515 int
2516 ip_attr_connect(const conn_t *connp, ip_xmit_attr_t *ixa,
2517 const in6_addr_t *v6src, const in6_addr_t *v6dst,
2518 const in6_addr_t *v6nexthop, in_port_t dstport, in6_addr_t *laddrp,
2519 iulp_t *uinfo, uint32_t flags)
2520 {
2521 in6_addr_t laddr = *v6src;
2522 int error;
2523
2524 ASSERT(MUTEX_NOT_HELD(&connp->conn_lock));
2525
2526 if (connp->conn_zone_is_global)
2527 flags |= IPDF_ZONE_IS_GLOBAL;
2528 else
2529 flags &= ~IPDF_ZONE_IS_GLOBAL;
2530
2531 /*
2532 * Lookup the route to determine a source address and the uinfo.
2533 * If the ULP has a source route option then the caller will
2534 * have set v6nexthop to be the first hop.
2535 */
2536 if (ixa->ixa_flags & IXAF_IS_IPV4) {
2537 ipaddr_t v4dst;
2538 ipaddr_t v4src, v4nexthop;
2539
2540 IN6_V4MAPPED_TO_IPADDR(v6dst, v4dst);
2541 IN6_V4MAPPED_TO_IPADDR(v6nexthop, v4nexthop);
2542 IN6_V4MAPPED_TO_IPADDR(v6src, v4src);
2543
2544 if (connp->conn_unspec_src || v4src != INADDR_ANY)
2545 flags &= ~IPDF_SELECT_SRC;
2546 else
2547 flags |= IPDF_SELECT_SRC;
2548
2549 error = ip_set_destination_v4(&v4src, v4dst, v4nexthop, ixa,
2550 uinfo, flags, connp->conn_mac_mode);
2551 IN6_IPADDR_TO_V4MAPPED(v4src, &laddr);
2552 } else {
2553 if (connp->conn_unspec_src || !IN6_IS_ADDR_UNSPECIFIED(v6src))
2554 flags &= ~IPDF_SELECT_SRC;
2555 else
2556 flags |= IPDF_SELECT_SRC;
2557
2558 error = ip_set_destination_v6(&laddr, v6dst, v6nexthop, ixa,
2559 uinfo, flags, connp->conn_mac_mode);
2560 }
2561 /* Pass out some address even if we hit a RTF_REJECT etc */
2562 if (laddrp != NULL)
2563 *laddrp = laddr;
2564
2565 if (error != 0)
2566 return (error);
2567
2568 if (flags & IPDF_IPSEC) {
2569 /*
2570 * Set any IPsec policy in ixa. Routine also looks at ULP
2571 * ports.
2572 */
2573 ipsec_cache_outbound_policy(connp, v6src, v6dst, dstport, ixa);
2574 }
2575 return (0);
2576 }
2577
2578 /*
2579 * Connect the conn based on the addresses, conn_xmit_ipp and conn_ixa.
2580 * Assumes that conn_faddr and conn_fport are already set. As such it is not
2581 * usable for SCTP, since SCTP has multiple faddrs.
2582 *
2583 * Caller must hold conn_lock to provide atomic constency between the
2584 * conn_t's addresses and the ixa.
2585 * NOTE: this function drops and reaquires conn_lock since it can't be
2586 * held across ip_attr_connect/ip_set_destination.
2587 *
2588 * The caller needs to handle inserting in the receive-side fanout when
2589 * appropriate after conn_connect returns.
2590 */
2591 int
2592 conn_connect(conn_t *connp, iulp_t *uinfo, uint32_t flags)
2593 {
2594 ip_xmit_attr_t *ixa = connp->conn_ixa;
2595 in6_addr_t nexthop;
2596 in6_addr_t saddr, faddr;
2597 in_port_t fport;
2598 int error;
2599
2600 ASSERT(MUTEX_HELD(&connp->conn_lock));
2601
2602 if (connp->conn_ipversion == IPV4_VERSION)
2603 ixa->ixa_flags |= IXAF_IS_IPV4;
2604 else
2605 ixa->ixa_flags &= ~IXAF_IS_IPV4;
2606
2607 /* We do IPsec latching below - hence no caching in ip_attr_connect */
2608 flags &= ~IPDF_IPSEC;
2609
2610 /* In case we had previously done an ip_attr_connect */
2611 ip_attr_newdst(ixa);
2612
2613 /*
2614 * Determine the nexthop and copy the addresses before dropping
2615 * conn_lock.
2616 */
2617 ip_attr_nexthop(&connp->conn_xmit_ipp, connp->conn_ixa,
2618 &connp->conn_faddr_v6, &nexthop);
2619 saddr = connp->conn_saddr_v6;
2620 faddr = connp->conn_faddr_v6;
2621 fport = connp->conn_fport;
2622
2623 mutex_exit(&connp->conn_lock);
2624 error = ip_attr_connect(connp, ixa, &saddr, &faddr, &nexthop, fport,
2625 &saddr, uinfo, flags | IPDF_VERIFY_DST);
2626 mutex_enter(&connp->conn_lock);
2627
2628 /* Could have changed even if an error */
2629 connp->conn_saddr_v6 = saddr;
2630 if (error != 0)
2631 return (error);
2632
2633 /*
2634 * Check whether Trusted Solaris policy allows communication with this
2635 * host, and pretend that the destination is unreachable if not.
2636 * Compute any needed label and place it in ipp_label_v4/v6.
2637 *
2638 * Later conn_build_hdr_template() takes ipp_label_v4/v6 to form
2639 * the packet.
2640 *
2641 * TSOL Note: Any concurrent threads would pick a different ixa
2642 * (and ipp if they are to change the ipp) so we
2643 * don't have to worry about concurrent threads.
2644 */
2645 if (is_system_labeled()) {
2646 if (connp->conn_mlp_type != mlptSingle)
2647 return (ECONNREFUSED);
2648
2649 /*
2650 * conn_update_label will set ipp_label* which will later
2651 * be used by conn_build_hdr_template.
2652 */
2653 error = conn_update_label(connp, ixa,
2654 &connp->conn_faddr_v6, &connp->conn_xmit_ipp);
2655 if (error != 0)
2656 return (error);
2657 }
2658
2659 /*
2660 * Ensure that we match on the selected local address.
2661 * This overrides conn_laddr in the case we had earlier bound to a
2662 * multicast or broadcast address.
2663 */
2664 connp->conn_laddr_v6 = connp->conn_saddr_v6;
2665
2666 /*
2667 * Allow setting new policies.
2668 * The addresses/ports are already set, thus the IPsec policy calls
2669 * can handle their passed-in conn's.
2670 */
2671 connp->conn_policy_cached = B_FALSE;
2672
2673 /*
2674 * Cache IPsec policy in this conn. If we have per-socket policy,
2675 * we'll cache that. If we don't, we'll inherit global policy.
2676 *
2677 * This is done before the caller inserts in the receive-side fanout.
2678 * Note that conn_policy_cached is set by ipsec_conn_cache_policy() even
2679 * for connections where we don't have a policy. This is to prevent
2680 * global policy lookups in the inbound path.
2681 *
2682 * If we insert before we set conn_policy_cached,
2683 * CONN_INBOUND_POLICY_PRESENT() check can still evaluate true
2684 * because global policy cound be non-empty. We normally call
2685 * ipsec_check_policy() for conn_policy_cached connections only if
2686 * conn_in_enforce_policy is set. But in this case,
2687 * conn_policy_cached can get set anytime since we made the
2688 * CONN_INBOUND_POLICY_PRESENT() check and ipsec_check_policy() is
2689 * called, which will make the above assumption false. Thus, we
2690 * need to insert after we set conn_policy_cached.
2691 */
2692 error = ipsec_conn_cache_policy(connp,
2693 connp->conn_ipversion == IPV4_VERSION);
2694 if (error != 0)
2695 return (error);
2696
2697 /*
2698 * We defer to do LSO check until here since now we have better idea
2699 * whether IPsec is present. If the underlying ill is LSO capable,
2700 * copy its capability in so the ULP can decide whether to enable LSO
2701 * on this connection. So far, only TCP/IPv4 is implemented, so won't
2702 * claim LSO for IPv6.
2703 *
2704 * Currently, won't enable LSO for IRE_LOOPBACK or IRE_LOCAL, because
2705 * the receiver can not handle it. Also not to enable LSO for MULTIRT.
2706 */
2707 ixa->ixa_flags &= ~IXAF_LSO_CAPAB;
2708
2709 ASSERT(ixa->ixa_ire != NULL);
2710 if (ixa->ixa_ipst->ips_ip_lso_outbound && (flags & IPDF_LSO) &&
2711 !(ixa->ixa_flags & IXAF_IPSEC_SECURE) &&
2712 !(ixa->ixa_ire->ire_type & (IRE_LOCAL | IRE_LOOPBACK)) &&
2713 !(ixa->ixa_ire->ire_flags & RTF_MULTIRT) &&
2714 (ixa->ixa_nce != NULL) &&
2715 ((ixa->ixa_flags & IXAF_IS_IPV4) ?
2716 ILL_LSO_TCP_IPV4_USABLE(ixa->ixa_nce->nce_ill) :
2717 ILL_LSO_TCP_IPV6_USABLE(ixa->ixa_nce->nce_ill))) {
2718 ixa->ixa_lso_capab = *ixa->ixa_nce->nce_ill->ill_lso_capab;
2719 ixa->ixa_flags |= IXAF_LSO_CAPAB;
2720 }
2721
2722 /* Check whether ZEROCOPY capability is usable for this connection. */
2723 ixa->ixa_flags &= ~IXAF_ZCOPY_CAPAB;
2724
2725 if ((flags & IPDF_ZCOPY) &&
2726 !(ixa->ixa_flags & IXAF_IPSEC_SECURE) &&
2727 !(ixa->ixa_ire->ire_type & (IRE_LOCAL | IRE_LOOPBACK)) &&
2728 !(ixa->ixa_ire->ire_flags & RTF_MULTIRT) &&
2729 (ixa->ixa_nce != NULL) &&
2730 ILL_ZCOPY_USABLE(ixa->ixa_nce->nce_ill)) {
2731 ixa->ixa_flags |= IXAF_ZCOPY_CAPAB;
2732 }
2733 return (0);
2734 }
2735
2736 /*
2737 * Predicates to check if the addresses match conn_last*
2738 */
2739
2740 /*
2741 * Compare the conn against an address.
2742 * If using mapped addresses on AF_INET6 sockets, use the _v6 function
2743 */
2744 boolean_t
2745 conn_same_as_last_v4(conn_t *connp, sin_t *sin)
2746 {
2747 ASSERT(connp->conn_family == AF_INET);
2748 return (sin->sin_addr.s_addr == connp->conn_v4lastdst &&
2749 sin->sin_port == connp->conn_lastdstport);
2750 }
2751
2752 /*
2753 * Compare, including for mapped addresses
2754 */
2755 boolean_t
2756 conn_same_as_last_v6(conn_t *connp, sin6_t *sin6)
2757 {
2758 return (IN6_ARE_ADDR_EQUAL(&connp->conn_v6lastdst, &sin6->sin6_addr) &&
2759 sin6->sin6_port == connp->conn_lastdstport &&
2760 sin6->sin6_flowinfo == connp->conn_lastflowinfo &&
2761 sin6->sin6_scope_id == connp->conn_lastscopeid);
2762 }
2763
2764 /*
2765 * Compute a label and place it in the ip_packet_t.
2766 * Handles IPv4 and IPv6.
2767 * The caller should have a correct ixa_tsl and ixa_zoneid and have
2768 * already called conn_connect or ip_attr_connect to ensure that tsol_check_dest
2769 * has been called.
2770 */
2771 int
2772 conn_update_label(const conn_t *connp, const ip_xmit_attr_t *ixa,
2773 const in6_addr_t *v6dst, ip_pkt_t *ipp)
2774 {
2775 int err;
2776 ipaddr_t v4dst;
2777
2778 if (IN6_IS_ADDR_V4MAPPED(v6dst)) {
2779 uchar_t opt_storage[IP_MAX_OPT_LENGTH];
2780
2781 IN6_V4MAPPED_TO_IPADDR(v6dst, v4dst);
2782
2783 err = tsol_compute_label_v4(ixa->ixa_tsl, ixa->ixa_zoneid,
2784 v4dst, opt_storage, ixa->ixa_ipst);
2785 if (err == 0) {
2786 /* Length contained in opt_storage[IPOPT_OLEN] */
2787 err = optcom_pkt_set(opt_storage,
2788 opt_storage[IPOPT_OLEN],
2789 (uchar_t **)&ipp->ipp_label_v4,
2790 &ipp->ipp_label_len_v4);
2791 }
2792 if (err != 0) {
2793 DTRACE_PROBE4(tx__ip__log__info__updatelabel,
2794 char *, "conn(1) failed to update options(2) "
2795 "on ixa(3)",
2796 conn_t *, connp, char *, opt_storage,
2797 ip_xmit_attr_t *, ixa);
2798 }
2799 if (ipp->ipp_label_len_v4 != 0)
2800 ipp->ipp_fields |= IPPF_LABEL_V4;
2801 else
2802 ipp->ipp_fields &= ~IPPF_LABEL_V4;
2803 } else {
2804 uchar_t opt_storage[TSOL_MAX_IPV6_OPTION];
2805 uint_t optlen;
2806
2807 err = tsol_compute_label_v6(ixa->ixa_tsl, ixa->ixa_zoneid,
2808 v6dst, opt_storage, ixa->ixa_ipst);
2809 if (err == 0) {
2810 /*
2811 * Note that ipp_label_v6 is just the option - not
2812 * the hopopts extension header.
2813 *
2814 * Length contained in opt_storage[IPOPT_OLEN], but
2815 * that doesn't include the two byte options header.
2816 */
2817 optlen = opt_storage[IPOPT_OLEN];
2818 if (optlen != 0)
2819 optlen += 2;
2820
2821 err = optcom_pkt_set(opt_storage, optlen,
2822 (uchar_t **)&ipp->ipp_label_v6,
2823 &ipp->ipp_label_len_v6);
2824 }
2825 if (err != 0) {
2826 DTRACE_PROBE4(tx__ip__log__info__updatelabel,
2827 char *, "conn(1) failed to update options(2) "
2828 "on ixa(3)",
2829 conn_t *, connp, char *, opt_storage,
2830 ip_xmit_attr_t *, ixa);
2831 }
2832 if (ipp->ipp_label_len_v6 != 0)
2833 ipp->ipp_fields |= IPPF_LABEL_V6;
2834 else
2835 ipp->ipp_fields &= ~IPPF_LABEL_V6;
2836 }
2837 return (err);
2838 }
2839
2840 /*
2841 * Inherit all options settings from the parent/listener to the eager.
2842 * Returns zero on success; ENOMEM if memory allocation failed.
2843 *
2844 * We assume that the eager has not had any work done i.e., the conn_ixa
2845 * and conn_xmit_ipp are all zero.
2846 * Furthermore we assume that no other thread can access the eager (because
2847 * it isn't inserted in any fanout list).
2848 */
2849 int
2850 conn_inherit_parent(conn_t *lconnp, conn_t *econnp)
2851 {
2852 cred_t *credp;
2853 int err;
2854 void *notify_cookie;
2855 uint32_t xmit_hint;
2856
2857 econnp->conn_family = lconnp->conn_family;
2858 econnp->conn_ipv6_v6only = lconnp->conn_ipv6_v6only;
2859 econnp->conn_wq = lconnp->conn_wq;
2860 econnp->conn_rq = lconnp->conn_rq;
2861
2862 /*
2863 * Make a safe copy of the transmit attributes.
2864 * conn_connect will later be used by the caller to setup the ire etc.
2865 */
2866 ASSERT(econnp->conn_ixa->ixa_refcnt == 1);
2867 ASSERT(econnp->conn_ixa->ixa_ire == NULL);
2868 ASSERT(econnp->conn_ixa->ixa_dce == NULL);
2869 ASSERT(econnp->conn_ixa->ixa_nce == NULL);
2870
2871 /* Preserve ixa_notify_cookie and xmit_hint */
2872 notify_cookie = econnp->conn_ixa->ixa_notify_cookie;
2873 xmit_hint = econnp->conn_ixa->ixa_xmit_hint;
2874 ixa_safe_copy(lconnp->conn_ixa, econnp->conn_ixa);
2875 econnp->conn_ixa->ixa_notify_cookie = notify_cookie;
2876 econnp->conn_ixa->ixa_xmit_hint = xmit_hint;
2877
2878 econnp->conn_bound_if = lconnp->conn_bound_if;
2879 econnp->conn_incoming_ifindex = lconnp->conn_incoming_ifindex;
2880
2881 /* Inherit all RECV options */
2882 econnp->conn_recv_ancillary = lconnp->conn_recv_ancillary;
2883
2884 err = ip_pkt_copy(&lconnp->conn_xmit_ipp, &econnp->conn_xmit_ipp,
2885 KM_NOSLEEP);
2886 if (err != 0)
2887 return (err);
2888
2889 econnp->conn_zoneid = lconnp->conn_zoneid;
2890 econnp->conn_allzones = lconnp->conn_allzones;
2891
2892 /* This is odd. Pick a flowlabel for each connection instead? */
2893 econnp->conn_flowinfo = lconnp->conn_flowinfo;
2894
2895 econnp->conn_default_ttl = lconnp->conn_default_ttl;
2896
2897 /*
2898 * TSOL: tsol_input_proc() needs the eager's cred before the
2899 * eager is accepted
2900 */
2901 ASSERT(lconnp->conn_cred != NULL);
2902 econnp->conn_cred = credp = lconnp->conn_cred;
2903 crhold(credp);
2904 econnp->conn_cpid = lconnp->conn_cpid;
2905 econnp->conn_open_time = ddi_get_lbolt64();
2906
2907 /*
2908 * Cache things in the ixa without any refhold.
2909 * Listener might not have set up ixa_cred
2910 */
2911 ASSERT(!(econnp->conn_ixa->ixa_free_flags & IXA_FREE_CRED));
2912 econnp->conn_ixa->ixa_cred = econnp->conn_cred;
2913 econnp->conn_ixa->ixa_cpid = econnp->conn_cpid;
2914 if (is_system_labeled())
2915 econnp->conn_ixa->ixa_tsl = crgetlabel(econnp->conn_cred);
2916
2917 /*
2918 * If the caller has the process-wide flag set, then default to MAC
2919 * exempt mode. This allows read-down to unlabeled hosts.
2920 */
2921 if (getpflags(NET_MAC_AWARE, credp) != 0)
2922 econnp->conn_mac_mode = CONN_MAC_AWARE;
2923
2924 econnp->conn_zone_is_global = lconnp->conn_zone_is_global;
2925
2926 /*
2927 * We eliminate the need for sockfs to send down a T_SVR4_OPTMGMT_REQ
2928 * via soaccept()->soinheritoptions() which essentially applies
2929 * all the listener options to the new connection. The options that we
2930 * need to take care of are:
2931 * SO_DEBUG, SO_REUSEADDR, SO_KEEPALIVE, SO_DONTROUTE, SO_BROADCAST,
2932 * SO_USELOOPBACK, SO_OOBINLINE, SO_DGRAM_ERRIND, SO_LINGER,
2933 * SO_SNDBUF, SO_RCVBUF.
2934 *
2935 * SO_RCVBUF: conn_rcvbuf is set.
2936 * SO_SNDBUF: conn_sndbuf is set.
2937 */
2938
2939 /* Could we define a struct and use a struct copy for this? */
2940 econnp->conn_sndbuf = lconnp->conn_sndbuf;
2941 econnp->conn_rcvbuf = lconnp->conn_rcvbuf;
2942 econnp->conn_sndlowat = lconnp->conn_sndlowat;
2943 econnp->conn_rcvlowat = lconnp->conn_rcvlowat;
2944 econnp->conn_dgram_errind = lconnp->conn_dgram_errind;
2945 econnp->conn_oobinline = lconnp->conn_oobinline;
2946 econnp->conn_debug = lconnp->conn_debug;
2947 econnp->conn_keepalive = lconnp->conn_keepalive;
2948 econnp->conn_linger = lconnp->conn_linger;
2949 econnp->conn_lingertime = lconnp->conn_lingertime;
2950
2951 /* Set the IP options */
2952 econnp->conn_broadcast = lconnp->conn_broadcast;
2953 econnp->conn_useloopback = lconnp->conn_useloopback;
2954 econnp->conn_reuseaddr = lconnp->conn_reuseaddr;
2955 return (0);
2956 }