1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright (c) 1991, 2010, Oracle and/or its affiliates. All rights reserved.
23 * Copyright (c) 1990 Mentat Inc.
24 * Copyright 2017 OmniTI Computer Consulting, Inc. All rights reserved.
25 */
26
27 #include <sys/types.h>
28 #include <sys/stream.h>
29 #include <sys/dlpi.h>
30 #include <sys/stropts.h>
31 #include <sys/sysmacros.h>
32 #include <sys/strsun.h>
33 #include <sys/strlog.h>
34 #include <sys/strsubr.h>
35 #define _SUN_TPI_VERSION 2
36 #include <sys/tihdr.h>
37 #include <sys/ddi.h>
38 #include <sys/sunddi.h>
39 #include <sys/cmn_err.h>
40 #include <sys/debug.h>
41 #include <sys/sdt.h>
42 #include <sys/kobj.h>
43 #include <sys/zone.h>
44 #include <sys/neti.h>
45 #include <sys/hook.h>
46
47 #include <sys/kmem.h>
48 #include <sys/systm.h>
49 #include <sys/param.h>
50 #include <sys/socket.h>
51 #include <sys/vtrace.h>
52 #include <sys/isa_defs.h>
53 #include <sys/atomic.h>
54 #include <sys/policy.h>
55 #include <sys/mac.h>
56 #include <net/if.h>
57 #include <net/if_types.h>
58 #include <net/route.h>
59 #include <net/if_dl.h>
60 #include <sys/sockio.h>
61 #include <netinet/in.h>
62 #include <netinet/ip6.h>
63 #include <netinet/icmp6.h>
64 #include <netinet/sctp.h>
65
66 #include <inet/common.h>
67 #include <inet/mi.h>
68 #include <inet/optcom.h>
69 #include <inet/mib2.h>
70 #include <inet/nd.h>
71 #include <inet/arp.h>
72
73 #include <inet/ip.h>
74 #include <inet/ip_impl.h>
75 #include <inet/ip6.h>
76 #include <inet/ip6_asp.h>
77 #include <inet/tcp.h>
78 #include <inet/tcp_impl.h>
79 #include <inet/udp_impl.h>
80 #include <inet/ipp_common.h>
81
82 #include <inet/ip_multi.h>
83 #include <inet/ip_if.h>
84 #include <inet/ip_ire.h>
85 #include <inet/ip_rts.h>
86 #include <inet/ip_ndp.h>
87 #include <net/pfkeyv2.h>
88 #include <inet/sadb.h>
89 #include <inet/ipsec_impl.h>
90 #include <inet/iptun/iptun_impl.h>
91 #include <inet/sctp_ip.h>
92 #include <sys/pattr.h>
93 #include <inet/ipclassifier.h>
94 #include <inet/ipsecah.h>
95 #include <inet/rawip_impl.h>
96 #include <inet/rts_impl.h>
97 #include <sys/squeue_impl.h>
98 #include <sys/squeue.h>
99
100 #include <sys/tsol/label.h>
101 #include <sys/tsol/tnet.h>
102
103 /* Temporary; for CR 6451644 work-around */
104 #include <sys/ethernet.h>
105
106 /*
107 * Naming conventions:
108 * These rules should be judiciously applied
109 * if there is a need to identify something as IPv6 versus IPv4
110 * IPv6 funcions will end with _v6 in the ip module.
111 * IPv6 funcions will end with _ipv6 in the transport modules.
112 * IPv6 macros:
113 * Some macros end with _V6; e.g. ILL_FRAG_HASH_V6
114 * Some macros start with V6_; e.g. V6_OR_V4_INADDR_ANY
115 * And then there are ..V4_PART_OF_V6.
116 * The intent is that macros in the ip module end with _V6.
117 * IPv6 global variables will start with ipv6_
118 * IPv6 structures will start with ipv6
119 * IPv6 defined constants should start with IPV6_
120 * (but then there are NDP_DEFAULT_VERS_PRI_AND_FLOW, etc)
121 */
122
123 /*
124 * ip6opt_ls is used to enable IPv6 (via /etc/system on TX systems).
125 * We need to do this because we didn't obtain the IP6OPT_LS (0x0a)
126 * from IANA. This mechanism will remain in effect until an official
127 * number is obtained.
128 */
129 uchar_t ip6opt_ls;
130
131 const in6_addr_t ipv6_all_ones =
132 { 0xffffffffU, 0xffffffffU, 0xffffffffU, 0xffffffffU };
133 const in6_addr_t ipv6_all_zeros = { 0, 0, 0, 0 };
134
135 #ifdef _BIG_ENDIAN
136 const in6_addr_t ipv6_unspecified_group = { 0xff000000U, 0, 0, 0 };
137 #else /* _BIG_ENDIAN */
138 const in6_addr_t ipv6_unspecified_group = { 0x000000ffU, 0, 0, 0 };
139 #endif /* _BIG_ENDIAN */
140
141 #ifdef _BIG_ENDIAN
142 const in6_addr_t ipv6_loopback = { 0, 0, 0, 0x00000001U };
143 #else /* _BIG_ENDIAN */
144 const in6_addr_t ipv6_loopback = { 0, 0, 0, 0x01000000U };
145 #endif /* _BIG_ENDIAN */
146
147 #ifdef _BIG_ENDIAN
148 const in6_addr_t ipv6_all_hosts_mcast = { 0xff020000U, 0, 0, 0x00000001U };
149 #else /* _BIG_ENDIAN */
150 const in6_addr_t ipv6_all_hosts_mcast = { 0x000002ffU, 0, 0, 0x01000000U };
151 #endif /* _BIG_ENDIAN */
152
153 #ifdef _BIG_ENDIAN
154 const in6_addr_t ipv6_all_rtrs_mcast = { 0xff020000U, 0, 0, 0x00000002U };
155 #else /* _BIG_ENDIAN */
156 const in6_addr_t ipv6_all_rtrs_mcast = { 0x000002ffU, 0, 0, 0x02000000U };
157 #endif /* _BIG_ENDIAN */
158
159 #ifdef _BIG_ENDIAN
160 const in6_addr_t ipv6_all_v2rtrs_mcast = { 0xff020000U, 0, 0, 0x00000016U };
161 #else /* _BIG_ENDIAN */
162 const in6_addr_t ipv6_all_v2rtrs_mcast = { 0x000002ffU, 0, 0, 0x16000000U };
163 #endif /* _BIG_ENDIAN */
164
165 #ifdef _BIG_ENDIAN
166 const in6_addr_t ipv6_solicited_node_mcast =
167 { 0xff020000U, 0, 0x00000001U, 0xff000000U };
168 #else /* _BIG_ENDIAN */
169 const in6_addr_t ipv6_solicited_node_mcast =
170 { 0x000002ffU, 0, 0x01000000U, 0x000000ffU };
171 #endif /* _BIG_ENDIAN */
172
173 static boolean_t icmp_inbound_verify_v6(mblk_t *, icmp6_t *, ip_recv_attr_t *);
174 static void icmp_inbound_too_big_v6(icmp6_t *, ip_recv_attr_t *);
175 static void icmp_pkt_v6(mblk_t *, void *, size_t, const in6_addr_t *,
176 ip_recv_attr_t *);
177 static void icmp_redirect_v6(mblk_t *, ip6_t *, nd_redirect_t *,
178 ip_recv_attr_t *);
179 static void icmp_send_redirect_v6(mblk_t *, in6_addr_t *,
180 in6_addr_t *, ip_recv_attr_t *);
181 static void icmp_send_reply_v6(mblk_t *, ip6_t *, icmp6_t *,
182 ip_recv_attr_t *);
183 static boolean_t ip_source_routed_v6(ip6_t *, mblk_t *, ip_stack_t *);
184
185 /*
186 * icmp_inbound_v6 deals with ICMP messages that are handled by IP.
187 * If the ICMP message is consumed by IP, i.e., it should not be delivered
188 * to any IPPROTO_ICMP raw sockets, then it returns NULL.
189 * Likewise, if the ICMP error is misformed (too short, etc), then it
190 * returns NULL. The caller uses this to determine whether or not to send
191 * to raw sockets.
192 *
193 * All error messages are passed to the matching transport stream.
194 *
195 * See comment for icmp_inbound_v4() on how IPsec is handled.
196 */
197 mblk_t *
198 icmp_inbound_v6(mblk_t *mp, ip_recv_attr_t *ira)
199 {
200 icmp6_t *icmp6;
201 ip6_t *ip6h; /* Outer header */
202 int ip_hdr_length; /* Outer header length */
203 boolean_t interested;
204 ill_t *ill = ira->ira_ill;
205 ip_stack_t *ipst = ill->ill_ipst;
206 mblk_t *mp_ret = NULL;
207
208 ip6h = (ip6_t *)mp->b_rptr;
209
210 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInMsgs);
211
212 /* Check for Martian packets */
213 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_src)) {
214 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInAddrErrors);
215 ip_drop_input("ipIfStatsInAddrErrors: mcast src", mp, ill);
216 freemsg(mp);
217 return (NULL);
218 }
219
220 /* Make sure ira_l2src is set for ndp_input */
221 if (!(ira->ira_flags & IRAF_L2SRC_SET))
222 ip_setl2src(mp, ira, ira->ira_rill);
223
224 ip_hdr_length = ira->ira_ip_hdr_length;
225 if ((mp->b_wptr - mp->b_rptr) < (ip_hdr_length + ICMP6_MINLEN)) {
226 if (ira->ira_pktlen < (ip_hdr_length + ICMP6_MINLEN)) {
227 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInTruncatedPkts);
228 ip_drop_input("ipIfStatsInTruncatedPkts", mp, ill);
229 freemsg(mp);
230 return (NULL);
231 }
232 ip6h = ip_pullup(mp, ip_hdr_length + ICMP6_MINLEN, ira);
233 if (ip6h == NULL) {
234 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors);
235 freemsg(mp);
236 return (NULL);
237 }
238 }
239
240 icmp6 = (icmp6_t *)(&mp->b_rptr[ip_hdr_length]);
241 DTRACE_PROBE2(icmp__inbound__v6, ip6_t *, ip6h, icmp6_t *, icmp6);
242 ip2dbg(("icmp_inbound_v6: type %d code %d\n", icmp6->icmp6_type,
243 icmp6->icmp6_code));
244
245 /*
246 * We will set "interested" to "true" if we should pass a copy to
247 * the transport i.e., if it is an error message.
248 */
249 interested = !(icmp6->icmp6_type & ICMP6_INFOMSG_MASK);
250
251 switch (icmp6->icmp6_type) {
252 case ICMP6_DST_UNREACH:
253 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInDestUnreachs);
254 if (icmp6->icmp6_code == ICMP6_DST_UNREACH_ADMIN)
255 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInAdminProhibs);
256 break;
257
258 case ICMP6_TIME_EXCEEDED:
259 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInTimeExcds);
260 break;
261
262 case ICMP6_PARAM_PROB:
263 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInParmProblems);
264 break;
265
266 case ICMP6_PACKET_TOO_BIG:
267 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInPktTooBigs);
268 break;
269
270 case ICMP6_ECHO_REQUEST:
271 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInEchos);
272 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst) &&
273 !ipst->ips_ipv6_resp_echo_mcast)
274 break;
275
276 /*
277 * We must have exclusive use of the mblk to convert it to
278 * a response.
279 * If not, we copy it.
280 */
281 if (mp->b_datap->db_ref > 1) {
282 mblk_t *mp1;
283
284 mp1 = copymsg(mp);
285 if (mp1 == NULL) {
286 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
287 ip_drop_input("ipIfStatsInDiscards - copymsg",
288 mp, ill);
289 freemsg(mp);
290 return (NULL);
291 }
292 freemsg(mp);
293 mp = mp1;
294 ip6h = (ip6_t *)mp->b_rptr;
295 icmp6 = (icmp6_t *)(&mp->b_rptr[ip_hdr_length]);
296 }
297
298 icmp6->icmp6_type = ICMP6_ECHO_REPLY;
299 icmp_send_reply_v6(mp, ip6h, icmp6, ira);
300 return (NULL);
301
302 case ICMP6_ECHO_REPLY:
303 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInEchoReplies);
304 break;
305
306 case ND_ROUTER_SOLICIT:
307 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInRouterSolicits);
308 break;
309
310 case ND_ROUTER_ADVERT:
311 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInRouterAdvertisements);
312 break;
313
314 case ND_NEIGHBOR_SOLICIT:
315 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInNeighborSolicits);
316 ndp_input(mp, ira);
317 return (NULL);
318
319 case ND_NEIGHBOR_ADVERT:
320 BUMP_MIB(ill->ill_icmp6_mib,
321 ipv6IfIcmpInNeighborAdvertisements);
322 ndp_input(mp, ira);
323 return (NULL);
324
325 case ND_REDIRECT:
326 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInRedirects);
327
328 if (ipst->ips_ipv6_ignore_redirect)
329 break;
330
331 /* We now allow a RAW socket to receive this. */
332 interested = B_TRUE;
333 break;
334
335 /*
336 * The next three icmp messages will be handled by MLD.
337 * Pass all valid MLD packets up to any process(es)
338 * listening on a raw ICMP socket.
339 */
340 case MLD_LISTENER_QUERY:
341 case MLD_LISTENER_REPORT:
342 case MLD_LISTENER_REDUCTION:
343 mp = mld_input(mp, ira);
344 return (mp);
345 default:
346 break;
347 }
348 /*
349 * See if there is an ICMP client to avoid an extra copymsg/freemsg
350 * if there isn't one.
351 */
352 if (ipst->ips_ipcl_proto_fanout_v6[IPPROTO_ICMPV6].connf_head != NULL) {
353 /* If there is an ICMP client and we want one too, copy it. */
354
355 if (!interested) {
356 /* Caller will deliver to RAW sockets */
357 return (mp);
358 }
359 mp_ret = copymsg(mp);
360 if (mp_ret == NULL) {
361 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
362 ip_drop_input("ipIfStatsInDiscards - copymsg", mp, ill);
363 }
364 } else if (!interested) {
365 /* Neither we nor raw sockets are interested. Drop packet now */
366 freemsg(mp);
367 return (NULL);
368 }
369
370 /*
371 * ICMP error or redirect packet. Make sure we have enough of
372 * the header and that db_ref == 1 since we might end up modifying
373 * the packet.
374 */
375 if (mp->b_cont != NULL) {
376 if (ip_pullup(mp, -1, ira) == NULL) {
377 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
378 ip_drop_input("ipIfStatsInDiscards - ip_pullup",
379 mp, ill);
380 freemsg(mp);
381 return (mp_ret);
382 }
383 }
384
385 if (mp->b_datap->db_ref > 1) {
386 mblk_t *mp1;
387
388 mp1 = copymsg(mp);
389 if (mp1 == NULL) {
390 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
391 ip_drop_input("ipIfStatsInDiscards - copymsg", mp, ill);
392 freemsg(mp);
393 return (mp_ret);
394 }
395 freemsg(mp);
396 mp = mp1;
397 }
398
399 /*
400 * In case mp has changed, verify the message before any further
401 * processes.
402 */
403 ip6h = (ip6_t *)mp->b_rptr;
404 icmp6 = (icmp6_t *)(&mp->b_rptr[ip_hdr_length]);
405 if (!icmp_inbound_verify_v6(mp, icmp6, ira)) {
406 freemsg(mp);
407 return (mp_ret);
408 }
409
410 switch (icmp6->icmp6_type) {
411 case ND_REDIRECT:
412 icmp_redirect_v6(mp, ip6h, (nd_redirect_t *)icmp6, ira);
413 break;
414 case ICMP6_PACKET_TOO_BIG:
415 /* Update DCE and adjust MTU is icmp header if needed */
416 icmp_inbound_too_big_v6(icmp6, ira);
417 /* FALLTHRU */
418 default:
419 icmp_inbound_error_fanout_v6(mp, icmp6, ira);
420 break;
421 }
422
423 return (mp_ret);
424 }
425
426 /*
427 * Send an ICMP echo reply.
428 * The caller has already updated the payload part of the packet.
429 * We handle the ICMP checksum, IP source address selection and feed
430 * the packet into ip_output_simple.
431 */
432 static void
433 icmp_send_reply_v6(mblk_t *mp, ip6_t *ip6h, icmp6_t *icmp6,
434 ip_recv_attr_t *ira)
435 {
436 uint_t ip_hdr_length = ira->ira_ip_hdr_length;
437 ill_t *ill = ira->ira_ill;
438 ip_stack_t *ipst = ill->ill_ipst;
439 ip_xmit_attr_t ixas;
440 in6_addr_t origsrc;
441
442 /*
443 * Remove any extension headers (do not reverse a source route)
444 * and clear the flow id (keep traffic class for now).
445 */
446 if (ip_hdr_length != IPV6_HDR_LEN) {
447 int i;
448
449 for (i = 0; i < IPV6_HDR_LEN; i++) {
450 mp->b_rptr[ip_hdr_length - i - 1] =
451 mp->b_rptr[IPV6_HDR_LEN - i - 1];
452 }
453 mp->b_rptr += (ip_hdr_length - IPV6_HDR_LEN);
454 ip6h = (ip6_t *)mp->b_rptr;
455 ip6h->ip6_nxt = IPPROTO_ICMPV6;
456 i = ntohs(ip6h->ip6_plen);
457 i -= (ip_hdr_length - IPV6_HDR_LEN);
458 ip6h->ip6_plen = htons(i);
459 ip_hdr_length = IPV6_HDR_LEN;
460 ASSERT(ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN == msgdsize(mp));
461 }
462 ip6h->ip6_vcf &= ~IPV6_FLOWINFO_FLOWLABEL;
463
464 /* Reverse the source and destination addresses. */
465 origsrc = ip6h->ip6_src;
466 ip6h->ip6_src = ip6h->ip6_dst;
467 ip6h->ip6_dst = origsrc;
468
469 /* set the hop limit */
470 ip6h->ip6_hops = ipst->ips_ipv6_def_hops;
471
472 /*
473 * Prepare for checksum by putting icmp length in the icmp
474 * checksum field. The checksum is calculated in ip_output
475 */
476 icmp6->icmp6_cksum = ip6h->ip6_plen;
477
478 bzero(&ixas, sizeof (ixas));
479 ixas.ixa_flags = IXAF_BASIC_SIMPLE_V6;
480 ixas.ixa_zoneid = ira->ira_zoneid;
481 ixas.ixa_cred = kcred;
482 ixas.ixa_cpid = NOPID;
483 ixas.ixa_tsl = ira->ira_tsl; /* Behave as a multi-level responder */
484 ixas.ixa_ifindex = 0;
485 ixas.ixa_ipst = ipst;
486 ixas.ixa_multicast_ttl = IP_DEFAULT_MULTICAST_TTL;
487
488 if (!(ira->ira_flags & IRAF_IPSEC_SECURE)) {
489 /*
490 * This packet should go out the same way as it
491 * came in i.e in clear, independent of the IPsec
492 * policy for transmitting packets.
493 */
494 ixas.ixa_flags |= IXAF_NO_IPSEC;
495 } else {
496 if (!ipsec_in_to_out(ira, &ixas, mp, NULL, ip6h)) {
497 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
498 /* Note: mp already consumed and ip_drop_packet done */
499 return;
500 }
501 }
502
503 /* Was the destination (now source) link-local? Send out same group */
504 if (IN6_IS_ADDR_LINKSCOPE(&ip6h->ip6_src)) {
505 ixas.ixa_flags |= IXAF_SCOPEID_SET;
506 if (IS_UNDER_IPMP(ill))
507 ixas.ixa_scopeid = ill_get_upper_ifindex(ill);
508 else
509 ixas.ixa_scopeid = ill->ill_phyint->phyint_ifindex;
510 }
511
512 if (ira->ira_flags & IRAF_MULTIBROADCAST) {
513 /*
514 * Not one or our addresses (IRE_LOCALs), thus we let
515 * ip_output_simple pick the source.
516 */
517 ip6h->ip6_src = ipv6_all_zeros;
518 ixas.ixa_flags |= IXAF_SET_SOURCE;
519 }
520
521 /* Should we send using dce_pmtu? */
522 if (ipst->ips_ipv6_icmp_return_pmtu)
523 ixas.ixa_flags |= IXAF_PMTU_DISCOVERY;
524
525 (void) ip_output_simple(mp, &ixas);
526 ixa_cleanup(&ixas);
527
528 }
529
530 /*
531 * Verify the ICMP messages for either for ICMP error or redirect packet.
532 * The caller should have fully pulled up the message. If it's a redirect
533 * packet, only basic checks on IP header will be done; otherwise, verify
534 * the packet by looking at the included ULP header.
535 *
536 * Called before icmp_inbound_error_fanout_v6 is called.
537 */
538 static boolean_t
539 icmp_inbound_verify_v6(mblk_t *mp, icmp6_t *icmp6, ip_recv_attr_t *ira)
540 {
541 ill_t *ill = ira->ira_ill;
542 uint16_t hdr_length;
543 uint8_t *nexthdrp;
544 uint8_t nexthdr;
545 ip_stack_t *ipst = ill->ill_ipst;
546 conn_t *connp;
547 ip6_t *ip6h; /* Inner header */
548
549 ip6h = (ip6_t *)&icmp6[1];
550 if ((uchar_t *)ip6h + IPV6_HDR_LEN > mp->b_wptr)
551 goto truncated;
552
553 if (icmp6->icmp6_type == ND_REDIRECT) {
554 hdr_length = sizeof (nd_redirect_t);
555 } else {
556 if ((IPH_HDR_VERSION(ip6h) != IPV6_VERSION))
557 goto discard_pkt;
558 hdr_length = IPV6_HDR_LEN;
559 }
560
561 if ((uchar_t *)ip6h + hdr_length > mp->b_wptr)
562 goto truncated;
563
564 /*
565 * Stop here for ICMP_REDIRECT.
566 */
567 if (icmp6->icmp6_type == ND_REDIRECT)
568 return (B_TRUE);
569
570 /*
571 * ICMP errors only.
572 */
573 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &hdr_length, &nexthdrp))
574 goto discard_pkt;
575 nexthdr = *nexthdrp;
576
577 /* Try to pass the ICMP message to clients who need it */
578 switch (nexthdr) {
579 case IPPROTO_UDP:
580 /*
581 * Verify we have at least ICMP_MIN_TP_HDR_LEN bytes of
582 * transport header.
583 */
584 if ((uchar_t *)ip6h + hdr_length + ICMP_MIN_TP_HDR_LEN >
585 mp->b_wptr)
586 goto truncated;
587 break;
588 case IPPROTO_TCP: {
589 tcpha_t *tcpha;
590
591 /*
592 * Verify we have at least ICMP_MIN_TP_HDR_LEN bytes of
593 * transport header.
594 */
595 if ((uchar_t *)ip6h + hdr_length + ICMP_MIN_TP_HDR_LEN >
596 mp->b_wptr)
597 goto truncated;
598
599 tcpha = (tcpha_t *)((uchar_t *)ip6h + hdr_length);
600 /*
601 * With IPMP we need to match across group, which we do
602 * since we have the upper ill from ira_ill.
603 */
604 connp = ipcl_tcp_lookup_reversed_ipv6(ip6h, tcpha, TCPS_LISTEN,
605 ill->ill_phyint->phyint_ifindex, ipst);
606 if (connp == NULL)
607 goto discard_pkt;
608
609 if ((connp->conn_verifyicmp != NULL) &&
610 !connp->conn_verifyicmp(connp, tcpha, NULL, icmp6, ira)) {
611 CONN_DEC_REF(connp);
612 goto discard_pkt;
613 }
614 CONN_DEC_REF(connp);
615 break;
616 }
617 case IPPROTO_SCTP:
618 /*
619 * Verify we have at least ICMP_MIN_TP_HDR_LEN bytes of
620 * transport header.
621 */
622 if ((uchar_t *)ip6h + hdr_length + ICMP_MIN_TP_HDR_LEN >
623 mp->b_wptr)
624 goto truncated;
625 break;
626 case IPPROTO_ESP:
627 case IPPROTO_AH:
628 break;
629 case IPPROTO_ENCAP:
630 case IPPROTO_IPV6: {
631 /* Look for self-encapsulated packets that caused an error */
632 ip6_t *in_ip6h;
633
634 in_ip6h = (ip6_t *)((uint8_t *)ip6h + hdr_length);
635 if ((uint8_t *)in_ip6h + (nexthdr == IPPROTO_ENCAP ?
636 sizeof (ipha_t) : sizeof (ip6_t)) > mp->b_wptr)
637 goto truncated;
638 break;
639 }
640 default:
641 break;
642 }
643
644 return (B_TRUE);
645
646 discard_pkt:
647 /* Bogus ICMP error. */
648 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
649 return (B_FALSE);
650
651 truncated:
652 /* We pulled up everthing already. Must be truncated */
653 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors);
654 return (B_FALSE);
655 }
656
657 /*
658 * Process received IPv6 ICMP Packet too big.
659 * The caller is responsible for validating the packet before passing it in
660 * and also to fanout the ICMP error to any matching transport conns. Assumes
661 * the message has been fully pulled up.
662 *
663 * Before getting here, the caller has called icmp_inbound_verify_v6()
664 * that should have verified with ULP to prevent undoing the changes we're
665 * going to make to DCE. For example, TCP might have verified that the packet
666 * which generated error is in the send window.
667 *
668 * In some cases modified this MTU in the ICMP header packet; the caller
669 * should pass to the matching ULP after this returns.
670 */
671 static void
672 icmp_inbound_too_big_v6(icmp6_t *icmp6, ip_recv_attr_t *ira)
673 {
674 uint32_t mtu;
675 dce_t *dce;
676 ill_t *ill = ira->ira_ill; /* Upper ill if IPMP */
677 ip_stack_t *ipst = ill->ill_ipst;
678 int old_max_frag;
679 in6_addr_t final_dst;
680 ip6_t *ip6h; /* Inner IP header */
681
682 /* Caller has already pulled up everything. */
683 ip6h = (ip6_t *)&icmp6[1];
684 final_dst = ip_get_dst_v6(ip6h, NULL, NULL);
685
686 mtu = ntohl(icmp6->icmp6_mtu);
687 if (mtu < IPV6_MIN_MTU) {
688 /*
689 * RFC 8021 suggests to ignore messages where mtu is
690 * less than the IPv6 minimum.
691 */
692 ip1dbg(("Received mtu less than IPv6 "
693 "min mtu %d: %d\n", IPV6_MIN_MTU, mtu));
694 DTRACE_PROBE1(icmp6__too__small__mtu, uint32_t, mtu);
695 return;
696 }
697
698 /*
699 * For link local destinations matching simply on address is not
700 * sufficient. Same link local addresses for different ILL's is
701 * possible.
702 */
703 if (IN6_IS_ADDR_LINKSCOPE(&final_dst)) {
704 dce = dce_lookup_and_add_v6(&final_dst,
705 ill->ill_phyint->phyint_ifindex, ipst);
706 } else {
707 dce = dce_lookup_and_add_v6(&final_dst, 0, ipst);
708 }
709 if (dce == NULL) {
710 /* Couldn't add a unique one - ENOMEM */
711 if (ip_debug > 2) {
712 /* ip1dbg */
713 pr_addr_dbg("icmp_inbound_too_big_v6:"
714 "no dce for dst %s\n", AF_INET6,
715 &final_dst);
716 }
717 return;
718 }
719
720 mutex_enter(&dce->dce_lock);
721 if (dce->dce_flags & DCEF_PMTU)
722 old_max_frag = dce->dce_pmtu;
723 else if (IN6_IS_ADDR_MULTICAST(&final_dst))
724 old_max_frag = ill->ill_mc_mtu;
725 else
726 old_max_frag = ill->ill_mtu;
727
728 ip1dbg(("Received mtu from router: %d\n", mtu));
729 DTRACE_PROBE1(icmp6__received__mtu, uint32_t, mtu);
730 dce->dce_pmtu = MIN(old_max_frag, mtu);
731 icmp6->icmp6_mtu = htonl(dce->dce_pmtu);
732
733 /* We now have a PMTU for sure */
734 dce->dce_flags |= DCEF_PMTU;
735 dce->dce_last_change_time = TICK_TO_SEC(ddi_get_lbolt64());
736
737 mutex_exit(&dce->dce_lock);
738 /*
739 * After dropping the lock the new value is visible to everyone.
740 * Then we bump the generation number so any cached values reinspect
741 * the dce_t.
742 */
743 dce_increment_generation(dce);
744 dce_refrele(dce);
745 }
746
747 /*
748 * Fanout received ICMPv6 error packets to the transports.
749 * Assumes the IPv6 plus ICMPv6 headers have been pulled up but nothing else.
750 *
751 * The caller must have called icmp_inbound_verify_v6.
752 */
753 void
754 icmp_inbound_error_fanout_v6(mblk_t *mp, icmp6_t *icmp6, ip_recv_attr_t *ira)
755 {
756 uint16_t *up; /* Pointer to ports in ULP header */
757 uint32_t ports; /* reversed ports for fanout */
758 ip6_t rip6h; /* With reversed addresses */
759 ip6_t *ip6h; /* Inner IP header */
760 uint16_t hdr_length; /* Inner IP header length */
761 uint8_t *nexthdrp;
762 uint8_t nexthdr;
763 tcpha_t *tcpha;
764 conn_t *connp;
765 ill_t *ill = ira->ira_ill; /* Upper in the case of IPMP */
766 ip_stack_t *ipst = ill->ill_ipst;
767 ipsec_stack_t *ipss = ipst->ips_netstack->netstack_ipsec;
768
769 /* Caller has already pulled up everything. */
770 ip6h = (ip6_t *)&icmp6[1];
771 ASSERT(mp->b_cont == NULL);
772 ASSERT((uchar_t *)&ip6h[1] <= mp->b_wptr);
773
774 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &hdr_length, &nexthdrp))
775 goto drop_pkt;
776 nexthdr = *nexthdrp;
777 ira->ira_protocol = nexthdr;
778
779 /*
780 * We need a separate IP header with the source and destination
781 * addresses reversed to do fanout/classification because the ip6h in
782 * the ICMPv6 error is in the form we sent it out.
783 */
784 rip6h.ip6_src = ip6h->ip6_dst;
785 rip6h.ip6_dst = ip6h->ip6_src;
786 rip6h.ip6_nxt = nexthdr;
787
788 /* Try to pass the ICMP message to clients who need it */
789 switch (nexthdr) {
790 case IPPROTO_UDP: {
791 /* Attempt to find a client stream based on port. */
792 up = (uint16_t *)((uchar_t *)ip6h + hdr_length);
793
794 /* Note that we send error to all matches. */
795 ira->ira_flags |= IRAF_ICMP_ERROR;
796 ip_fanout_udp_multi_v6(mp, &rip6h, up[0], up[1], ira);
797 ira->ira_flags &= ~IRAF_ICMP_ERROR;
798 return;
799 }
800 case IPPROTO_TCP: {
801 /*
802 * Attempt to find a client stream based on port.
803 * Note that we do a reverse lookup since the header is
804 * in the form we sent it out.
805 */
806 tcpha = (tcpha_t *)((uchar_t *)ip6h + hdr_length);
807 /*
808 * With IPMP we need to match across group, which we do
809 * since we have the upper ill from ira_ill.
810 */
811 connp = ipcl_tcp_lookup_reversed_ipv6(ip6h, tcpha,
812 TCPS_LISTEN, ill->ill_phyint->phyint_ifindex, ipst);
813 if (connp == NULL) {
814 goto drop_pkt;
815 }
816
817 if (CONN_INBOUND_POLICY_PRESENT_V6(connp, ipss) ||
818 (ira->ira_flags & IRAF_IPSEC_SECURE)) {
819 mp = ipsec_check_inbound_policy(mp, connp,
820 NULL, ip6h, ira);
821 if (mp == NULL) {
822 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
823 /* Note that mp is NULL */
824 ip_drop_input("ipIfStatsInDiscards", mp, ill);
825 CONN_DEC_REF(connp);
826 return;
827 }
828 }
829
830 ira->ira_flags |= IRAF_ICMP_ERROR;
831 if (IPCL_IS_TCP(connp)) {
832 SQUEUE_ENTER_ONE(connp->conn_sqp, mp,
833 connp->conn_recvicmp, connp, ira, SQ_FILL,
834 SQTAG_TCP6_INPUT_ICMP_ERR);
835 } else {
836 /* Not TCP; must be SOCK_RAW, IPPROTO_TCP */
837 ill_t *rill = ira->ira_rill;
838
839 ira->ira_ill = ira->ira_rill = NULL;
840 (connp->conn_recv)(connp, mp, NULL, ira);
841 CONN_DEC_REF(connp);
842 ira->ira_ill = ill;
843 ira->ira_rill = rill;
844 }
845 ira->ira_flags &= ~IRAF_ICMP_ERROR;
846 return;
847
848 }
849 case IPPROTO_SCTP:
850 up = (uint16_t *)((uchar_t *)ip6h + hdr_length);
851 /* Find a SCTP client stream for this packet. */
852 ((uint16_t *)&ports)[0] = up[1];
853 ((uint16_t *)&ports)[1] = up[0];
854
855 ira->ira_flags |= IRAF_ICMP_ERROR;
856 ip_fanout_sctp(mp, NULL, &rip6h, ports, ira);
857 ira->ira_flags &= ~IRAF_ICMP_ERROR;
858 return;
859
860 case IPPROTO_ESP:
861 case IPPROTO_AH:
862 if (!ipsec_loaded(ipss)) {
863 ip_proto_not_sup(mp, ira);
864 return;
865 }
866
867 if (nexthdr == IPPROTO_ESP)
868 mp = ipsecesp_icmp_error(mp, ira);
869 else
870 mp = ipsecah_icmp_error(mp, ira);
871 if (mp == NULL)
872 return;
873
874 /* Just in case ipsec didn't preserve the NULL b_cont */
875 if (mp->b_cont != NULL) {
876 if (!pullupmsg(mp, -1))
877 goto drop_pkt;
878 }
879
880 /*
881 * If succesful, the mp has been modified to not include
882 * the ESP/AH header so we can fanout to the ULP's icmp
883 * error handler.
884 */
885 if (mp->b_wptr - mp->b_rptr < IPV6_HDR_LEN)
886 goto drop_pkt;
887
888 ip6h = (ip6_t *)mp->b_rptr;
889 /* Don't call hdr_length_v6() unless you have to. */
890 if (ip6h->ip6_nxt != IPPROTO_ICMPV6)
891 hdr_length = ip_hdr_length_v6(mp, ip6h);
892 else
893 hdr_length = IPV6_HDR_LEN;
894
895 /* Verify the modified message before any further processes. */
896 icmp6 = (icmp6_t *)(&mp->b_rptr[hdr_length]);
897 if (!icmp_inbound_verify_v6(mp, icmp6, ira)) {
898 freemsg(mp);
899 return;
900 }
901
902 icmp_inbound_error_fanout_v6(mp, icmp6, ira);
903 return;
904
905 case IPPROTO_IPV6: {
906 /* Look for self-encapsulated packets that caused an error */
907 ip6_t *in_ip6h;
908
909 in_ip6h = (ip6_t *)((uint8_t *)ip6h + hdr_length);
910
911 if (IN6_ARE_ADDR_EQUAL(&in_ip6h->ip6_src, &ip6h->ip6_src) &&
912 IN6_ARE_ADDR_EQUAL(&in_ip6h->ip6_dst, &ip6h->ip6_dst)) {
913 /*
914 * Self-encapsulated case. As in the ipv4 case,
915 * we need to strip the 2nd IP header. Since mp
916 * is already pulled-up, we can simply bcopy
917 * the 3rd header + data over the 2nd header.
918 */
919 uint16_t unused_len;
920
921 /*
922 * Make sure we don't do recursion more than once.
923 */
924 if (!ip_hdr_length_nexthdr_v6(mp, in_ip6h,
925 &unused_len, &nexthdrp) ||
926 *nexthdrp == IPPROTO_IPV6) {
927 goto drop_pkt;
928 }
929
930 /*
931 * Copy the 3rd header + remaining data on top
932 * of the 2nd header.
933 */
934 bcopy(in_ip6h, ip6h, mp->b_wptr - (uchar_t *)in_ip6h);
935
936 /*
937 * Subtract length of the 2nd header.
938 */
939 mp->b_wptr -= hdr_length;
940
941 ip6h = (ip6_t *)mp->b_rptr;
942 /* Don't call hdr_length_v6() unless you have to. */
943 if (ip6h->ip6_nxt != IPPROTO_ICMPV6)
944 hdr_length = ip_hdr_length_v6(mp, ip6h);
945 else
946 hdr_length = IPV6_HDR_LEN;
947
948 /*
949 * Verify the modified message before any further
950 * processes.
951 */
952 icmp6 = (icmp6_t *)(&mp->b_rptr[hdr_length]);
953 if (!icmp_inbound_verify_v6(mp, icmp6, ira)) {
954 freemsg(mp);
955 return;
956 }
957
958 /*
959 * Now recurse, and see what I _really_ should be
960 * doing here.
961 */
962 icmp_inbound_error_fanout_v6(mp, icmp6, ira);
963 return;
964 }
965 /* FALLTHRU */
966 }
967 case IPPROTO_ENCAP:
968 if ((connp = ipcl_iptun_classify_v6(&rip6h.ip6_src,
969 &rip6h.ip6_dst, ipst)) != NULL) {
970 ira->ira_flags |= IRAF_ICMP_ERROR;
971 connp->conn_recvicmp(connp, mp, NULL, ira);
972 CONN_DEC_REF(connp);
973 ira->ira_flags &= ~IRAF_ICMP_ERROR;
974 return;
975 }
976 /*
977 * No IP tunnel is interested, fallthrough and see
978 * if a raw socket will want it.
979 */
980 /* FALLTHRU */
981 default:
982 ira->ira_flags |= IRAF_ICMP_ERROR;
983 ASSERT(ira->ira_protocol == nexthdr);
984 ip_fanout_proto_v6(mp, &rip6h, ira);
985 ira->ira_flags &= ~IRAF_ICMP_ERROR;
986 return;
987 }
988 /* NOTREACHED */
989 drop_pkt:
990 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors);
991 ip1dbg(("icmp_inbound_error_fanout_v6: drop pkt\n"));
992 freemsg(mp);
993 }
994
995 /*
996 * Process received IPv6 ICMP Redirect messages.
997 * Assumes the caller has verified that the headers are in the pulled up mblk.
998 * Consumes mp.
999 */
1000 /* ARGSUSED */
1001 static void
1002 icmp_redirect_v6(mblk_t *mp, ip6_t *ip6h, nd_redirect_t *rd,
1003 ip_recv_attr_t *ira)
1004 {
1005 ire_t *ire, *nire;
1006 ire_t *prev_ire = NULL;
1007 ire_t *redir_ire;
1008 in6_addr_t *src, *dst, *gateway;
1009 nd_opt_hdr_t *opt;
1010 nce_t *nce;
1011 int ncec_flags = 0;
1012 int err = 0;
1013 boolean_t redirect_to_router = B_FALSE;
1014 int len;
1015 int optlen;
1016 ill_t *ill = ira->ira_rill;
1017 ill_t *rill = ira->ira_rill;
1018 ip_stack_t *ipst = ill->ill_ipst;
1019
1020 /*
1021 * Since ira_ill is where the IRE_LOCAL was hosted we use ira_rill
1022 * and make it be the IPMP upper so avoid being confused by a packet
1023 * addressed to a unicast address on a different ill.
1024 */
1025 if (IS_UNDER_IPMP(rill)) {
1026 rill = ipmp_ill_hold_ipmp_ill(rill);
1027 if (rill == NULL) {
1028 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects);
1029 ip_drop_input("ipv6IfIcmpInBadRedirects - IPMP ill",
1030 mp, ill);
1031 freemsg(mp);
1032 return;
1033 }
1034 ASSERT(rill != ira->ira_rill);
1035 }
1036
1037 len = mp->b_wptr - (uchar_t *)rd;
1038 src = &ip6h->ip6_src;
1039 dst = &rd->nd_rd_dst;
1040 gateway = &rd->nd_rd_target;
1041
1042 /* Verify if it is a valid redirect */
1043 if (!IN6_IS_ADDR_LINKLOCAL(src) ||
1044 (ip6h->ip6_hops != IPV6_MAX_HOPS) ||
1045 (rd->nd_rd_code != 0) ||
1046 (len < sizeof (nd_redirect_t)) ||
1047 (IN6_IS_ADDR_V4MAPPED(dst)) ||
1048 (IN6_IS_ADDR_MULTICAST(dst))) {
1049 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects);
1050 ip_drop_input("ipv6IfIcmpInBadRedirects - addr/len", mp, ill);
1051 goto fail_redirect;
1052 }
1053
1054 if (!(IN6_IS_ADDR_LINKLOCAL(gateway) ||
1055 IN6_ARE_ADDR_EQUAL(gateway, dst))) {
1056 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects);
1057 ip_drop_input("ipv6IfIcmpInBadRedirects - bad gateway",
1058 mp, ill);
1059 goto fail_redirect;
1060 }
1061
1062 optlen = len - sizeof (nd_redirect_t);
1063 if (optlen != 0) {
1064 if (!ndp_verify_optlen((nd_opt_hdr_t *)&rd[1], optlen)) {
1065 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects);
1066 ip_drop_input("ipv6IfIcmpInBadRedirects - options",
1067 mp, ill);
1068 goto fail_redirect;
1069 }
1070 }
1071
1072 if (!IN6_ARE_ADDR_EQUAL(gateway, dst)) {
1073 redirect_to_router = B_TRUE;
1074 ncec_flags |= NCE_F_ISROUTER;
1075 } else {
1076 gateway = dst; /* Add nce for dst */
1077 }
1078
1079
1080 /*
1081 * Verify that the IP source address of the redirect is
1082 * the same as the current first-hop router for the specified
1083 * ICMP destination address.
1084 * Also, Make sure we had a route for the dest in question and
1085 * that route was pointing to the old gateway (the source of the
1086 * redirect packet.)
1087 * We do longest match and then compare ire_gateway_addr_v6 below.
1088 */
1089 prev_ire = ire_ftable_lookup_v6(dst, 0, 0, 0, rill,
1090 ALL_ZONES, NULL, MATCH_IRE_ILL, 0, ipst, NULL);
1091
1092 /*
1093 * Check that
1094 * the redirect was not from ourselves
1095 * old gateway is still directly reachable
1096 */
1097 if (prev_ire == NULL ||
1098 (prev_ire->ire_type & (IRE_LOCAL|IRE_LOOPBACK)) ||
1099 (prev_ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE)) ||
1100 !IN6_ARE_ADDR_EQUAL(src, &prev_ire->ire_gateway_addr_v6)) {
1101 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects);
1102 ip_drop_input("ipv6IfIcmpInBadRedirects - ire", mp, ill);
1103 goto fail_redirect;
1104 }
1105
1106 ASSERT(prev_ire->ire_ill != NULL);
1107 if (prev_ire->ire_ill->ill_flags & ILLF_NONUD)
1108 ncec_flags |= NCE_F_NONUD;
1109
1110 opt = (nd_opt_hdr_t *)&rd[1];
1111 opt = ndp_get_option(opt, optlen, ND_OPT_TARGET_LINKADDR);
1112 if (opt != NULL) {
1113 err = nce_lookup_then_add_v6(rill,
1114 (uchar_t *)&opt[1], /* Link layer address */
1115 rill->ill_phys_addr_length,
1116 gateway, ncec_flags, ND_STALE, &nce);
1117 switch (err) {
1118 case 0:
1119 nce_refrele(nce);
1120 break;
1121 case EEXIST:
1122 /*
1123 * Check to see if link layer address has changed and
1124 * process the ncec_state accordingly.
1125 */
1126 nce_process(nce->nce_common,
1127 (uchar_t *)&opt[1], 0, B_FALSE);
1128 nce_refrele(nce);
1129 break;
1130 default:
1131 ip1dbg(("icmp_redirect_v6: NCE create failed %d\n",
1132 err));
1133 goto fail_redirect;
1134 }
1135 }
1136 if (redirect_to_router) {
1137 ASSERT(IN6_IS_ADDR_LINKLOCAL(gateway));
1138
1139 /*
1140 * Create a Route Association. This will allow us to remember
1141 * a router told us to use the particular gateway.
1142 */
1143 ire = ire_create_v6(
1144 dst,
1145 &ipv6_all_ones, /* mask */
1146 gateway, /* gateway addr */
1147 IRE_HOST,
1148 prev_ire->ire_ill,
1149 ALL_ZONES,
1150 (RTF_DYNAMIC | RTF_GATEWAY | RTF_HOST),
1151 NULL,
1152 ipst);
1153 } else {
1154 ipif_t *ipif;
1155 in6_addr_t gw;
1156
1157 /*
1158 * Just create an on link entry, i.e. interface route.
1159 * The gateway field is our link-local on the ill.
1160 */
1161 mutex_enter(&rill->ill_lock);
1162 for (ipif = rill->ill_ipif; ipif != NULL;
1163 ipif = ipif->ipif_next) {
1164 if (!(ipif->ipif_state_flags & IPIF_CONDEMNED) &&
1165 IN6_IS_ADDR_LINKLOCAL(&ipif->ipif_v6lcl_addr))
1166 break;
1167 }
1168 if (ipif == NULL) {
1169 /* We have no link-local address! */
1170 mutex_exit(&rill->ill_lock);
1171 goto fail_redirect;
1172 }
1173 gw = ipif->ipif_v6lcl_addr;
1174 mutex_exit(&rill->ill_lock);
1175
1176 ire = ire_create_v6(
1177 dst, /* gateway == dst */
1178 &ipv6_all_ones, /* mask */
1179 &gw, /* gateway addr */
1180 rill->ill_net_type, /* IF_[NO]RESOLVER */
1181 prev_ire->ire_ill,
1182 ALL_ZONES,
1183 (RTF_DYNAMIC | RTF_HOST),
1184 NULL,
1185 ipst);
1186 }
1187
1188 if (ire == NULL)
1189 goto fail_redirect;
1190
1191 nire = ire_add(ire);
1192 /* Check if it was a duplicate entry */
1193 if (nire != NULL && nire != ire) {
1194 ASSERT(nire->ire_identical_ref > 1);
1195 ire_delete(nire);
1196 ire_refrele(nire);
1197 nire = NULL;
1198 }
1199 ire = nire;
1200 if (ire != NULL) {
1201 ire_refrele(ire); /* Held in ire_add */
1202
1203 /* tell routing sockets that we received a redirect */
1204 ip_rts_change_v6(RTM_REDIRECT,
1205 &rd->nd_rd_dst,
1206 &rd->nd_rd_target,
1207 &ipv6_all_ones, 0, src,
1208 (RTF_DYNAMIC | RTF_GATEWAY | RTF_HOST), 0,
1209 (RTA_DST | RTA_GATEWAY | RTA_NETMASK | RTA_AUTHOR), ipst);
1210
1211 /*
1212 * Delete any existing IRE_HOST type ires for this destination.
1213 * This together with the added IRE has the effect of
1214 * modifying an existing redirect.
1215 */
1216 redir_ire = ire_ftable_lookup_v6(dst, 0, src, IRE_HOST,
1217 prev_ire->ire_ill, ALL_ZONES, NULL,
1218 (MATCH_IRE_GW | MATCH_IRE_TYPE | MATCH_IRE_ILL), 0, ipst,
1219 NULL);
1220
1221 if (redir_ire != NULL) {
1222 if (redir_ire->ire_flags & RTF_DYNAMIC)
1223 ire_delete(redir_ire);
1224 ire_refrele(redir_ire);
1225 }
1226 }
1227
1228 ire_refrele(prev_ire);
1229 prev_ire = NULL;
1230
1231 fail_redirect:
1232 if (prev_ire != NULL)
1233 ire_refrele(prev_ire);
1234 freemsg(mp);
1235 if (rill != ira->ira_rill)
1236 ill_refrele(rill);
1237 }
1238
1239 /*
1240 * Build and ship an IPv6 ICMP message using the packet data in mp,
1241 * and the ICMP header pointed to by "stuff". (May be called as
1242 * writer.)
1243 * Note: assumes that icmp_pkt_err_ok_v6 has been called to
1244 * verify that an icmp error packet can be sent.
1245 *
1246 * If v6src_ptr is set use it as a source. Otherwise select a reasonable
1247 * source address (see above function).
1248 */
1249 static void
1250 icmp_pkt_v6(mblk_t *mp, void *stuff, size_t len,
1251 const in6_addr_t *v6src_ptr, ip_recv_attr_t *ira)
1252 {
1253 ip6_t *ip6h;
1254 in6_addr_t v6dst;
1255 size_t len_needed;
1256 size_t msg_len;
1257 mblk_t *mp1;
1258 icmp6_t *icmp6;
1259 in6_addr_t v6src;
1260 ill_t *ill = ira->ira_ill;
1261 ip_stack_t *ipst = ill->ill_ipst;
1262 ip_xmit_attr_t ixas;
1263
1264 ip6h = (ip6_t *)mp->b_rptr;
1265
1266 bzero(&ixas, sizeof (ixas));
1267 ixas.ixa_flags = IXAF_BASIC_SIMPLE_V6;
1268 ixas.ixa_zoneid = ira->ira_zoneid;
1269 ixas.ixa_ifindex = 0;
1270 ixas.ixa_ipst = ipst;
1271 ixas.ixa_cred = kcred;
1272 ixas.ixa_cpid = NOPID;
1273 ixas.ixa_tsl = ira->ira_tsl; /* Behave as a multi-level responder */
1274 ixas.ixa_multicast_ttl = IP_DEFAULT_MULTICAST_TTL;
1275
1276 /*
1277 * If the source of the original packet was link-local, then
1278 * make sure we send on the same ill (group) as we received it on.
1279 */
1280 if (IN6_IS_ADDR_LINKSCOPE(&ip6h->ip6_src)) {
1281 ixas.ixa_flags |= IXAF_SCOPEID_SET;
1282 if (IS_UNDER_IPMP(ill))
1283 ixas.ixa_scopeid = ill_get_upper_ifindex(ill);
1284 else
1285 ixas.ixa_scopeid = ill->ill_phyint->phyint_ifindex;
1286 }
1287
1288 if (ira->ira_flags & IRAF_IPSEC_SECURE) {
1289 /*
1290 * Apply IPsec based on how IPsec was applied to
1291 * the packet that had the error.
1292 *
1293 * If it was an outbound packet that caused the ICMP
1294 * error, then the caller will have setup the IRA
1295 * appropriately.
1296 */
1297 if (!ipsec_in_to_out(ira, &ixas, mp, NULL, ip6h)) {
1298 BUMP_MIB(&ipst->ips_ip_mib, ipIfStatsOutDiscards);
1299 /* Note: mp already consumed and ip_drop_packet done */
1300 return;
1301 }
1302 } else {
1303 /*
1304 * This is in clear. The icmp message we are building
1305 * here should go out in clear, independent of our policy.
1306 */
1307 ixas.ixa_flags |= IXAF_NO_IPSEC;
1308 }
1309
1310 /*
1311 * If the caller specified the source we use that.
1312 * Otherwise, if the packet was for one of our unicast addresses, make
1313 * sure we respond with that as the source. Otherwise
1314 * have ip_output_simple pick the source address.
1315 */
1316 if (v6src_ptr != NULL) {
1317 v6src = *v6src_ptr;
1318 } else {
1319 ire_t *ire;
1320 uint_t match_flags = MATCH_IRE_TYPE | MATCH_IRE_ZONEONLY;
1321
1322 if (IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_src) ||
1323 IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_dst))
1324 match_flags |= MATCH_IRE_ILL;
1325
1326 ire = ire_ftable_lookup_v6(&ip6h->ip6_dst, 0, 0,
1327 (IRE_LOCAL|IRE_LOOPBACK), ill, ira->ira_zoneid, NULL,
1328 match_flags, 0, ipst, NULL);
1329 if (ire != NULL) {
1330 v6src = ip6h->ip6_dst;
1331 ire_refrele(ire);
1332 } else {
1333 v6src = ipv6_all_zeros;
1334 ixas.ixa_flags |= IXAF_SET_SOURCE;
1335 }
1336 }
1337 v6dst = ip6h->ip6_src;
1338 len_needed = ipst->ips_ipv6_icmp_return - IPV6_HDR_LEN - len;
1339 msg_len = msgdsize(mp);
1340 if (msg_len > len_needed) {
1341 if (!adjmsg(mp, len_needed - msg_len)) {
1342 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutErrors);
1343 freemsg(mp);
1344 return;
1345 }
1346 msg_len = len_needed;
1347 }
1348 mp1 = allocb(IPV6_HDR_LEN + len, BPRI_MED);
1349 if (mp1 == NULL) {
1350 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutErrors);
1351 freemsg(mp);
1352 return;
1353 }
1354 mp1->b_cont = mp;
1355 mp = mp1;
1356
1357 /*
1358 * Set IXAF_TRUSTED_ICMP so we can let the ICMP messages this
1359 * node generates be accepted in peace by all on-host destinations.
1360 * If we do NOT assume that all on-host destinations trust
1361 * self-generated ICMP messages, then rework here, ip6.c, and spd.c.
1362 * (Look for IXAF_TRUSTED_ICMP).
1363 */
1364 ixas.ixa_flags |= IXAF_TRUSTED_ICMP;
1365
1366 ip6h = (ip6_t *)mp->b_rptr;
1367 mp1->b_wptr = (uchar_t *)ip6h + (IPV6_HDR_LEN + len);
1368
1369 ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW;
1370 ip6h->ip6_nxt = IPPROTO_ICMPV6;
1371 ip6h->ip6_hops = ipst->ips_ipv6_def_hops;
1372 ip6h->ip6_dst = v6dst;
1373 ip6h->ip6_src = v6src;
1374 msg_len += IPV6_HDR_LEN + len;
1375 if (msg_len > IP_MAXPACKET + IPV6_HDR_LEN) {
1376 (void) adjmsg(mp, IP_MAXPACKET + IPV6_HDR_LEN - msg_len);
1377 msg_len = IP_MAXPACKET + IPV6_HDR_LEN;
1378 }
1379 ip6h->ip6_plen = htons((uint16_t)(msgdsize(mp) - IPV6_HDR_LEN));
1380 icmp6 = (icmp6_t *)&ip6h[1];
1381 bcopy(stuff, (char *)icmp6, len);
1382 /*
1383 * Prepare for checksum by putting icmp length in the icmp
1384 * checksum field. The checksum is calculated in ip_output_wire_v6.
1385 */
1386 icmp6->icmp6_cksum = ip6h->ip6_plen;
1387 if (icmp6->icmp6_type == ND_REDIRECT) {
1388 ip6h->ip6_hops = IPV6_MAX_HOPS;
1389 }
1390
1391 (void) ip_output_simple(mp, &ixas);
1392 ixa_cleanup(&ixas);
1393 }
1394
1395 /*
1396 * Update the output mib when ICMPv6 packets are sent.
1397 */
1398 void
1399 icmp_update_out_mib_v6(ill_t *ill, icmp6_t *icmp6)
1400 {
1401 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutMsgs);
1402
1403 switch (icmp6->icmp6_type) {
1404 case ICMP6_DST_UNREACH:
1405 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutDestUnreachs);
1406 if (icmp6->icmp6_code == ICMP6_DST_UNREACH_ADMIN)
1407 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutAdminProhibs);
1408 break;
1409
1410 case ICMP6_TIME_EXCEEDED:
1411 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutTimeExcds);
1412 break;
1413
1414 case ICMP6_PARAM_PROB:
1415 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutParmProblems);
1416 break;
1417
1418 case ICMP6_PACKET_TOO_BIG:
1419 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutPktTooBigs);
1420 break;
1421
1422 case ICMP6_ECHO_REQUEST:
1423 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutEchos);
1424 break;
1425
1426 case ICMP6_ECHO_REPLY:
1427 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutEchoReplies);
1428 break;
1429
1430 case ND_ROUTER_SOLICIT:
1431 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutRouterSolicits);
1432 break;
1433
1434 case ND_ROUTER_ADVERT:
1435 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutRouterAdvertisements);
1436 break;
1437
1438 case ND_NEIGHBOR_SOLICIT:
1439 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutNeighborSolicits);
1440 break;
1441
1442 case ND_NEIGHBOR_ADVERT:
1443 BUMP_MIB(ill->ill_icmp6_mib,
1444 ipv6IfIcmpOutNeighborAdvertisements);
1445 break;
1446
1447 case ND_REDIRECT:
1448 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutRedirects);
1449 break;
1450
1451 case MLD_LISTENER_QUERY:
1452 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutGroupMembQueries);
1453 break;
1454
1455 case MLD_LISTENER_REPORT:
1456 case MLD_V2_LISTENER_REPORT:
1457 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutGroupMembResponses);
1458 break;
1459
1460 case MLD_LISTENER_REDUCTION:
1461 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutGroupMembReductions);
1462 break;
1463 }
1464 }
1465
1466 /*
1467 * Check if it is ok to send an ICMPv6 error packet in
1468 * response to the IP packet in mp.
1469 * Free the message and return null if no
1470 * ICMP error packet should be sent.
1471 */
1472 static mblk_t *
1473 icmp_pkt_err_ok_v6(mblk_t *mp, boolean_t mcast_ok, ip_recv_attr_t *ira)
1474 {
1475 ill_t *ill = ira->ira_ill;
1476 ip_stack_t *ipst = ill->ill_ipst;
1477 boolean_t llbcast;
1478 ip6_t *ip6h;
1479
1480 if (!mp)
1481 return (NULL);
1482
1483 /* We view multicast and broadcast as the same.. */
1484 llbcast = (ira->ira_flags &
1485 (IRAF_L2DST_MULTICAST|IRAF_L2DST_BROADCAST)) != 0;
1486 ip6h = (ip6_t *)mp->b_rptr;
1487
1488 /* Check if source address uniquely identifies the host */
1489
1490 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_src) ||
1491 IN6_IS_ADDR_V4MAPPED(&ip6h->ip6_src) ||
1492 IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)) {
1493 freemsg(mp);
1494 return (NULL);
1495 }
1496
1497 if (ip6h->ip6_nxt == IPPROTO_ICMPV6) {
1498 size_t len_needed = IPV6_HDR_LEN + ICMP6_MINLEN;
1499 icmp6_t *icmp6;
1500
1501 if (mp->b_wptr - mp->b_rptr < len_needed) {
1502 if (!pullupmsg(mp, len_needed)) {
1503 BUMP_MIB(ill->ill_icmp6_mib,
1504 ipv6IfIcmpInErrors);
1505 freemsg(mp);
1506 return (NULL);
1507 }
1508 ip6h = (ip6_t *)mp->b_rptr;
1509 }
1510 icmp6 = (icmp6_t *)&ip6h[1];
1511 /* Explicitly do not generate errors in response to redirects */
1512 if (ICMP6_IS_ERROR(icmp6->icmp6_type) ||
1513 icmp6->icmp6_type == ND_REDIRECT) {
1514 freemsg(mp);
1515 return (NULL);
1516 }
1517 }
1518 /*
1519 * Check that the destination is not multicast and that the packet
1520 * was not sent on link layer broadcast or multicast. (Exception
1521 * is Packet too big message as per the draft - when mcast_ok is set.)
1522 */
1523 if (!mcast_ok &&
1524 (llbcast || IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst))) {
1525 freemsg(mp);
1526 return (NULL);
1527 }
1528 /*
1529 * If this is a labeled system, then check to see if we're allowed to
1530 * send a response to this particular sender. If not, then just drop.
1531 */
1532 if (is_system_labeled() && !tsol_can_reply_error(mp, ira)) {
1533 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutErrors);
1534 freemsg(mp);
1535 return (NULL);
1536 }
1537
1538 if (icmp_err_rate_limit(ipst)) {
1539 /*
1540 * Only send ICMP error packets every so often.
1541 * This should be done on a per port/source basis,
1542 * but for now this will suffice.
1543 */
1544 freemsg(mp);
1545 return (NULL);
1546 }
1547 return (mp);
1548 }
1549
1550 /*
1551 * Called when a packet was sent out the same link that it arrived on.
1552 * Check if it is ok to send a redirect and then send it.
1553 */
1554 void
1555 ip_send_potential_redirect_v6(mblk_t *mp, ip6_t *ip6h, ire_t *ire,
1556 ip_recv_attr_t *ira)
1557 {
1558 ill_t *ill = ira->ira_ill;
1559 ip_stack_t *ipst = ill->ill_ipst;
1560 in6_addr_t *v6targ;
1561 ire_t *src_ire_v6 = NULL;
1562 mblk_t *mp1;
1563 ire_t *nhop_ire = NULL;
1564
1565 /*
1566 * Don't send a redirect when forwarding a source
1567 * routed packet.
1568 */
1569 if (ip_source_routed_v6(ip6h, mp, ipst))
1570 return;
1571
1572 if (ire->ire_type & IRE_ONLINK) {
1573 /* Target is directly connected */
1574 v6targ = &ip6h->ip6_dst;
1575 } else {
1576 /* Determine the most specific IRE used to send the packets */
1577 nhop_ire = ire_nexthop(ire);
1578 if (nhop_ire == NULL)
1579 return;
1580
1581 /*
1582 * We won't send redirects to a router
1583 * that doesn't have a link local
1584 * address, but will forward.
1585 */
1586 if (!IN6_IS_ADDR_LINKLOCAL(&nhop_ire->ire_addr_v6)) {
1587 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInAddrErrors);
1588 ip_drop_input("ipIfStatsInAddrErrors", mp, ill);
1589 ire_refrele(nhop_ire);
1590 return;
1591 }
1592 v6targ = &nhop_ire->ire_addr_v6;
1593 }
1594 src_ire_v6 = ire_ftable_lookup_v6(&ip6h->ip6_src,
1595 NULL, NULL, IRE_INTERFACE, ire->ire_ill, ALL_ZONES, NULL,
1596 MATCH_IRE_ILL | MATCH_IRE_TYPE, 0, ipst, NULL);
1597
1598 if (src_ire_v6 == NULL) {
1599 if (nhop_ire != NULL)
1600 ire_refrele(nhop_ire);
1601 return;
1602 }
1603
1604 /*
1605 * The source is directly connected.
1606 */
1607 mp1 = copymsg(mp);
1608 if (mp1 != NULL)
1609 icmp_send_redirect_v6(mp1, v6targ, &ip6h->ip6_dst, ira);
1610
1611 if (nhop_ire != NULL)
1612 ire_refrele(nhop_ire);
1613 ire_refrele(src_ire_v6);
1614 }
1615
1616 /*
1617 * Generate an ICMPv6 redirect message.
1618 * Include target link layer address option if it exits.
1619 * Always include redirect header.
1620 */
1621 static void
1622 icmp_send_redirect_v6(mblk_t *mp, in6_addr_t *targetp, in6_addr_t *dest,
1623 ip_recv_attr_t *ira)
1624 {
1625 nd_redirect_t *rd;
1626 nd_opt_rd_hdr_t *rdh;
1627 uchar_t *buf;
1628 ncec_t *ncec = NULL;
1629 nd_opt_hdr_t *opt;
1630 int len;
1631 int ll_opt_len = 0;
1632 int max_redir_hdr_data_len;
1633 int pkt_len;
1634 in6_addr_t *srcp;
1635 ill_t *ill;
1636 boolean_t need_refrele;
1637 ip_stack_t *ipst = ira->ira_ill->ill_ipst;
1638
1639 mp = icmp_pkt_err_ok_v6(mp, B_FALSE, ira);
1640 if (mp == NULL)
1641 return;
1642
1643 if (IS_UNDER_IPMP(ira->ira_ill)) {
1644 ill = ipmp_ill_hold_ipmp_ill(ira->ira_ill);
1645 if (ill == NULL) {
1646 ill = ira->ira_ill;
1647 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects);
1648 ip_drop_output("no IPMP ill for sending redirect",
1649 mp, ill);
1650 freemsg(mp);
1651 return;
1652 }
1653 need_refrele = B_TRUE;
1654 } else {
1655 ill = ira->ira_ill;
1656 need_refrele = B_FALSE;
1657 }
1658
1659 ncec = ncec_lookup_illgrp_v6(ill, targetp);
1660 if (ncec != NULL && ncec->ncec_state != ND_INCOMPLETE &&
1661 ncec->ncec_lladdr != NULL) {
1662 ll_opt_len = (sizeof (nd_opt_hdr_t) +
1663 ill->ill_phys_addr_length + 7)/8 * 8;
1664 }
1665 len = sizeof (nd_redirect_t) + sizeof (nd_opt_rd_hdr_t) + ll_opt_len;
1666 ASSERT(len % 4 == 0);
1667 buf = kmem_alloc(len, KM_NOSLEEP);
1668 if (buf == NULL) {
1669 if (ncec != NULL)
1670 ncec_refrele(ncec);
1671 if (need_refrele)
1672 ill_refrele(ill);
1673 freemsg(mp);
1674 return;
1675 }
1676
1677 rd = (nd_redirect_t *)buf;
1678 rd->nd_rd_type = (uint8_t)ND_REDIRECT;
1679 rd->nd_rd_code = 0;
1680 rd->nd_rd_reserved = 0;
1681 rd->nd_rd_target = *targetp;
1682 rd->nd_rd_dst = *dest;
1683
1684 opt = (nd_opt_hdr_t *)(buf + sizeof (nd_redirect_t));
1685 if (ncec != NULL && ll_opt_len != 0) {
1686 opt->nd_opt_type = ND_OPT_TARGET_LINKADDR;
1687 opt->nd_opt_len = ll_opt_len/8;
1688 bcopy((char *)ncec->ncec_lladdr, &opt[1],
1689 ill->ill_phys_addr_length);
1690 }
1691 if (ncec != NULL)
1692 ncec_refrele(ncec);
1693 rdh = (nd_opt_rd_hdr_t *)(buf + sizeof (nd_redirect_t) + ll_opt_len);
1694 rdh->nd_opt_rh_type = (uint8_t)ND_OPT_REDIRECTED_HEADER;
1695 /* max_redir_hdr_data_len and nd_opt_rh_len must be multiple of 8 */
1696 max_redir_hdr_data_len =
1697 (ipst->ips_ipv6_icmp_return - IPV6_HDR_LEN - len)/8*8;
1698 pkt_len = msgdsize(mp);
1699 /* Make sure mp is 8 byte aligned */
1700 if (pkt_len > max_redir_hdr_data_len) {
1701 rdh->nd_opt_rh_len = (max_redir_hdr_data_len +
1702 sizeof (nd_opt_rd_hdr_t))/8;
1703 (void) adjmsg(mp, max_redir_hdr_data_len - pkt_len);
1704 } else {
1705 rdh->nd_opt_rh_len = (pkt_len + sizeof (nd_opt_rd_hdr_t))/8;
1706 (void) adjmsg(mp, -(pkt_len % 8));
1707 }
1708 rdh->nd_opt_rh_reserved1 = 0;
1709 rdh->nd_opt_rh_reserved2 = 0;
1710 /* ipif_v6lcl_addr contains the link-local source address */
1711 srcp = &ill->ill_ipif->ipif_v6lcl_addr;
1712
1713 /* Redirects sent by router, and router is global zone */
1714 ASSERT(ira->ira_zoneid == ALL_ZONES);
1715 ira->ira_zoneid = GLOBAL_ZONEID;
1716 icmp_pkt_v6(mp, buf, len, srcp, ira);
1717 kmem_free(buf, len);
1718 if (need_refrele)
1719 ill_refrele(ill);
1720 }
1721
1722
1723 /* Generate an ICMP time exceeded message. (May be called as writer.) */
1724 void
1725 icmp_time_exceeded_v6(mblk_t *mp, uint8_t code, boolean_t mcast_ok,
1726 ip_recv_attr_t *ira)
1727 {
1728 icmp6_t icmp6;
1729
1730 mp = icmp_pkt_err_ok_v6(mp, mcast_ok, ira);
1731 if (mp == NULL)
1732 return;
1733
1734 bzero(&icmp6, sizeof (icmp6_t));
1735 icmp6.icmp6_type = ICMP6_TIME_EXCEEDED;
1736 icmp6.icmp6_code = code;
1737 icmp_pkt_v6(mp, &icmp6, sizeof (icmp6_t), NULL, ira);
1738 }
1739
1740 /*
1741 * Generate an ICMP unreachable message.
1742 * When called from ip_output side a minimal ip_recv_attr_t needs to be
1743 * constructed by the caller.
1744 */
1745 void
1746 icmp_unreachable_v6(mblk_t *mp, uint8_t code, boolean_t mcast_ok,
1747 ip_recv_attr_t *ira)
1748 {
1749 icmp6_t icmp6;
1750
1751 mp = icmp_pkt_err_ok_v6(mp, mcast_ok, ira);
1752 if (mp == NULL)
1753 return;
1754
1755 bzero(&icmp6, sizeof (icmp6_t));
1756 icmp6.icmp6_type = ICMP6_DST_UNREACH;
1757 icmp6.icmp6_code = code;
1758 icmp_pkt_v6(mp, &icmp6, sizeof (icmp6_t), NULL, ira);
1759 }
1760
1761 /*
1762 * Generate an ICMP pkt too big message.
1763 * When called from ip_output side a minimal ip_recv_attr_t needs to be
1764 * constructed by the caller.
1765 */
1766 void
1767 icmp_pkt2big_v6(mblk_t *mp, uint32_t mtu, boolean_t mcast_ok,
1768 ip_recv_attr_t *ira)
1769 {
1770 icmp6_t icmp6;
1771
1772 mp = icmp_pkt_err_ok_v6(mp, mcast_ok, ira);
1773 if (mp == NULL)
1774 return;
1775
1776 bzero(&icmp6, sizeof (icmp6_t));
1777 icmp6.icmp6_type = ICMP6_PACKET_TOO_BIG;
1778 icmp6.icmp6_code = 0;
1779 icmp6.icmp6_mtu = htonl(mtu);
1780
1781 icmp_pkt_v6(mp, &icmp6, sizeof (icmp6_t), NULL, ira);
1782 }
1783
1784 /*
1785 * Generate an ICMP parameter problem message. (May be called as writer.)
1786 * 'offset' is the offset from the beginning of the packet in error.
1787 * When called from ip_output side a minimal ip_recv_attr_t needs to be
1788 * constructed by the caller.
1789 */
1790 static void
1791 icmp_param_problem_v6(mblk_t *mp, uint8_t code, uint32_t offset,
1792 boolean_t mcast_ok, ip_recv_attr_t *ira)
1793 {
1794 icmp6_t icmp6;
1795
1796 mp = icmp_pkt_err_ok_v6(mp, mcast_ok, ira);
1797 if (mp == NULL)
1798 return;
1799
1800 bzero((char *)&icmp6, sizeof (icmp6_t));
1801 icmp6.icmp6_type = ICMP6_PARAM_PROB;
1802 icmp6.icmp6_code = code;
1803 icmp6.icmp6_pptr = htonl(offset);
1804 icmp_pkt_v6(mp, &icmp6, sizeof (icmp6_t), NULL, ira);
1805 }
1806
1807 void
1808 icmp_param_problem_nexthdr_v6(mblk_t *mp, boolean_t mcast_ok,
1809 ip_recv_attr_t *ira)
1810 {
1811 ip6_t *ip6h = (ip6_t *)mp->b_rptr;
1812 uint16_t hdr_length;
1813 uint8_t *nexthdrp;
1814 uint32_t offset;
1815 ill_t *ill = ira->ira_ill;
1816
1817 /* Determine the offset of the bad nexthdr value */
1818 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &hdr_length, &nexthdrp)) {
1819 /* Malformed packet */
1820 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
1821 ip_drop_input("ipIfStatsInDiscards", mp, ill);
1822 freemsg(mp);
1823 return;
1824 }
1825
1826 offset = nexthdrp - mp->b_rptr;
1827 icmp_param_problem_v6(mp, ICMP6_PARAMPROB_NEXTHEADER, offset,
1828 mcast_ok, ira);
1829 }
1830
1831 /*
1832 * Verify whether or not the IP address is a valid local address.
1833 * Could be a unicast, including one for a down interface.
1834 * If allow_mcbc then a multicast or broadcast address is also
1835 * acceptable.
1836 *
1837 * In the case of a multicast address, however, the
1838 * upper protocol is expected to reset the src address
1839 * to zero when we return IPVL_MCAST so that
1840 * no packets are emitted with multicast address as
1841 * source address.
1842 * The addresses valid for bind are:
1843 * (1) - in6addr_any
1844 * (2) - IP address of an UP interface
1845 * (3) - IP address of a DOWN interface
1846 * (4) - a multicast address. In this case
1847 * the conn will only receive packets destined to
1848 * the specified multicast address. Note: the
1849 * application still has to issue an
1850 * IPV6_JOIN_GROUP socket option.
1851 *
1852 * In all the above cases, the bound address must be valid in the current zone.
1853 * When the address is loopback or multicast, there might be many matching IREs
1854 * so bind has to look up based on the zone.
1855 */
1856 ip_laddr_t
1857 ip_laddr_verify_v6(const in6_addr_t *v6src, zoneid_t zoneid,
1858 ip_stack_t *ipst, boolean_t allow_mcbc, uint_t scopeid)
1859 {
1860 ire_t *src_ire;
1861 uint_t match_flags;
1862 ill_t *ill = NULL;
1863
1864 ASSERT(!IN6_IS_ADDR_V4MAPPED(v6src));
1865 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(v6src));
1866
1867 match_flags = MATCH_IRE_ZONEONLY;
1868 if (scopeid != 0) {
1869 ill = ill_lookup_on_ifindex(scopeid, B_TRUE, ipst);
1870 if (ill == NULL)
1871 return (IPVL_BAD);
1872 match_flags |= MATCH_IRE_ILL;
1873 }
1874
1875 src_ire = ire_ftable_lookup_v6(v6src, NULL, NULL, 0,
1876 ill, zoneid, NULL, match_flags, 0, ipst, NULL);
1877 if (ill != NULL)
1878 ill_refrele(ill);
1879
1880 /*
1881 * If an address other than in6addr_any is requested,
1882 * we verify that it is a valid address for bind
1883 * Note: Following code is in if-else-if form for
1884 * readability compared to a condition check.
1885 */
1886 if (src_ire != NULL && (src_ire->ire_type & (IRE_LOCAL|IRE_LOOPBACK))) {
1887 /*
1888 * (2) Bind to address of local UP interface
1889 */
1890 ire_refrele(src_ire);
1891 return (IPVL_UNICAST_UP);
1892 } else if (IN6_IS_ADDR_MULTICAST(v6src)) {
1893 /* (4) bind to multicast address. */
1894 if (src_ire != NULL)
1895 ire_refrele(src_ire);
1896
1897 /*
1898 * Note: caller should take IPV6_MULTICAST_IF
1899 * into account when selecting a real source address.
1900 */
1901 if (allow_mcbc)
1902 return (IPVL_MCAST);
1903 else
1904 return (IPVL_BAD);
1905 } else {
1906 ipif_t *ipif;
1907
1908 /*
1909 * (3) Bind to address of local DOWN interface?
1910 * (ipif_lookup_addr() looks up all interfaces
1911 * but we do not get here for UP interfaces
1912 * - case (2) above)
1913 */
1914 if (src_ire != NULL)
1915 ire_refrele(src_ire);
1916
1917 ipif = ipif_lookup_addr_v6(v6src, NULL, zoneid, ipst);
1918 if (ipif == NULL)
1919 return (IPVL_BAD);
1920
1921 /* Not a useful source? */
1922 if (ipif->ipif_flags & (IPIF_NOLOCAL | IPIF_ANYCAST)) {
1923 ipif_refrele(ipif);
1924 return (IPVL_BAD);
1925 }
1926 ipif_refrele(ipif);
1927 return (IPVL_UNICAST_DOWN);
1928 }
1929 }
1930
1931 /*
1932 * Verify that both the source and destination addresses are valid. If
1933 * IPDF_VERIFY_DST is not set, then the destination address may be unreachable,
1934 * i.e. have no route to it. Protocols like TCP want to verify destination
1935 * reachability, while tunnels do not.
1936 *
1937 * Determine the route, the interface, and (optionally) the source address
1938 * to use to reach a given destination.
1939 * Note that we allow connect to broadcast and multicast addresses when
1940 * IPDF_ALLOW_MCBC is set.
1941 * first_hop and dst_addr are normally the same, but if source routing
1942 * they will differ; in that case the first_hop is what we'll use for the
1943 * routing lookup but the dce and label checks will be done on dst_addr,
1944 *
1945 * If uinfo is set, then we fill in the best available information
1946 * we have for the destination. This is based on (in priority order) any
1947 * metrics and path MTU stored in a dce_t, route metrics, and finally the
1948 * ill_mtu/ill_mc_mtu.
1949 *
1950 * Tsol note: If we have a source route then dst_addr != firsthop. But we
1951 * always do the label check on dst_addr.
1952 *
1953 * Assumes that the caller has set ixa_scopeid for link-local communication.
1954 */
1955 int
1956 ip_set_destination_v6(in6_addr_t *src_addrp, const in6_addr_t *dst_addr,
1957 const in6_addr_t *firsthop, ip_xmit_attr_t *ixa, iulp_t *uinfo,
1958 uint32_t flags, uint_t mac_mode)
1959 {
1960 ire_t *ire;
1961 int error = 0;
1962 in6_addr_t setsrc; /* RTF_SETSRC */
1963 zoneid_t zoneid = ixa->ixa_zoneid; /* Honors SO_ALLZONES */
1964 ip_stack_t *ipst = ixa->ixa_ipst;
1965 dce_t *dce;
1966 uint_t pmtu;
1967 uint_t ifindex;
1968 uint_t generation;
1969 nce_t *nce;
1970 ill_t *ill = NULL;
1971 boolean_t multirt = B_FALSE;
1972
1973 ASSERT(!IN6_IS_ADDR_V4MAPPED(dst_addr));
1974
1975 ASSERT(!(ixa->ixa_flags & IXAF_IS_IPV4));
1976
1977 /*
1978 * We never send to zero; the ULPs map it to the loopback address.
1979 * We can't allow it since we use zero to mean unitialized in some
1980 * places.
1981 */
1982 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(dst_addr));
1983
1984 if (is_system_labeled()) {
1985 ts_label_t *tsl = NULL;
1986
1987 error = tsol_check_dest(ixa->ixa_tsl, dst_addr, IPV6_VERSION,
1988 mac_mode, (flags & IPDF_ZONE_IS_GLOBAL) != 0, &tsl);
1989 if (error != 0)
1990 return (error);
1991 if (tsl != NULL) {
1992 /* Update the label */
1993 ip_xmit_attr_replace_tsl(ixa, tsl);
1994 }
1995 }
1996
1997 setsrc = ipv6_all_zeros;
1998 /*
1999 * Select a route; For IPMP interfaces, we would only select
2000 * a "hidden" route (i.e., going through a specific under_ill)
2001 * if ixa_ifindex has been specified.
2002 */
2003 ire = ip_select_route_v6(firsthop, *src_addrp, ixa, &generation,
2004 &setsrc, &error, &multirt);
2005 ASSERT(ire != NULL); /* IRE_NOROUTE if none found */
2006 if (error != 0)
2007 goto bad_addr;
2008
2009 /*
2010 * ire can't be a broadcast or multicast unless IPDF_ALLOW_MCBC is set.
2011 * If IPDF_VERIFY_DST is set, the destination must be reachable.
2012 * Otherwise the destination needn't be reachable.
2013 *
2014 * If we match on a reject or black hole, then we've got a
2015 * local failure. May as well fail out the connect() attempt,
2016 * since it's never going to succeed.
2017 */
2018 if (ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE)) {
2019 /*
2020 * If we're verifying destination reachability, we always want
2021 * to complain here.
2022 *
2023 * If we're not verifying destination reachability but the
2024 * destination has a route, we still want to fail on the
2025 * temporary address and broadcast address tests.
2026 *
2027 * In both cases do we let the code continue so some reasonable
2028 * information is returned to the caller. That enables the
2029 * caller to use (and even cache) the IRE. conn_ip_ouput will
2030 * use the generation mismatch path to check for the unreachable
2031 * case thereby avoiding any specific check in the main path.
2032 */
2033 ASSERT(generation == IRE_GENERATION_VERIFY);
2034 if (flags & IPDF_VERIFY_DST) {
2035 /*
2036 * Set errno but continue to set up ixa_ire to be
2037 * the RTF_REJECT|RTF_BLACKHOLE IRE.
2038 * That allows callers to use ip_output to get an
2039 * ICMP error back.
2040 */
2041 if (!(ire->ire_type & IRE_HOST))
2042 error = ENETUNREACH;
2043 else
2044 error = EHOSTUNREACH;
2045 }
2046 }
2047
2048 if ((ire->ire_type & (IRE_BROADCAST|IRE_MULTICAST)) &&
2049 !(flags & IPDF_ALLOW_MCBC)) {
2050 ire_refrele(ire);
2051 ire = ire_reject(ipst, B_FALSE);
2052 generation = IRE_GENERATION_VERIFY;
2053 error = ENETUNREACH;
2054 }
2055
2056 /* Cache things */
2057 if (ixa->ixa_ire != NULL)
2058 ire_refrele_notr(ixa->ixa_ire);
2059 #ifdef DEBUG
2060 ire_refhold_notr(ire);
2061 ire_refrele(ire);
2062 #endif
2063 ixa->ixa_ire = ire;
2064 ixa->ixa_ire_generation = generation;
2065
2066 /*
2067 * Ensure that ixa_dce is always set any time that ixa_ire is set,
2068 * since some callers will send a packet to conn_ip_output() even if
2069 * there's an error.
2070 */
2071 ifindex = 0;
2072 if (IN6_IS_ADDR_LINKSCOPE(dst_addr)) {
2073 /* If we are creating a DCE we'd better have an ifindex */
2074 if (ill != NULL)
2075 ifindex = ill->ill_phyint->phyint_ifindex;
2076 else
2077 flags &= ~IPDF_UNIQUE_DCE;
2078 }
2079
2080 if (flags & IPDF_UNIQUE_DCE) {
2081 /* Fallback to the default dce if allocation fails */
2082 dce = dce_lookup_and_add_v6(dst_addr, ifindex, ipst);
2083 if (dce != NULL) {
2084 generation = dce->dce_generation;
2085 } else {
2086 dce = dce_lookup_v6(dst_addr, ifindex, ipst,
2087 &generation);
2088 }
2089 } else {
2090 dce = dce_lookup_v6(dst_addr, ifindex, ipst, &generation);
2091 }
2092 ASSERT(dce != NULL);
2093 if (ixa->ixa_dce != NULL)
2094 dce_refrele_notr(ixa->ixa_dce);
2095 #ifdef DEBUG
2096 dce_refhold_notr(dce);
2097 dce_refrele(dce);
2098 #endif
2099 ixa->ixa_dce = dce;
2100 ixa->ixa_dce_generation = generation;
2101
2102
2103 /*
2104 * For multicast with multirt we have a flag passed back from
2105 * ire_lookup_multi_ill_v6 since we don't have an IRE for each
2106 * possible multicast address.
2107 * We also need a flag for multicast since we can't check
2108 * whether RTF_MULTIRT is set in ixa_ire for multicast.
2109 */
2110 if (multirt) {
2111 ixa->ixa_postfragfn = ip_postfrag_multirt_v6;
2112 ixa->ixa_flags |= IXAF_MULTIRT_MULTICAST;
2113 } else {
2114 ixa->ixa_postfragfn = ire->ire_postfragfn;
2115 ixa->ixa_flags &= ~IXAF_MULTIRT_MULTICAST;
2116 }
2117 if (!(ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE))) {
2118 /* Get an nce to cache. */
2119 nce = ire_to_nce(ire, NULL, firsthop);
2120 if (nce == NULL) {
2121 /* Allocation failure? */
2122 ixa->ixa_ire_generation = IRE_GENERATION_VERIFY;
2123 } else {
2124 if (ixa->ixa_nce != NULL)
2125 nce_refrele(ixa->ixa_nce);
2126 ixa->ixa_nce = nce;
2127 }
2128 }
2129
2130 /*
2131 * If the source address is a loopback address, the
2132 * destination had best be local or multicast.
2133 * If we are sending to an IRE_LOCAL using a loopback source then
2134 * it had better be the same zoneid.
2135 */
2136 if (IN6_IS_ADDR_LOOPBACK(src_addrp)) {
2137 if ((ire->ire_type & IRE_LOCAL) && ire->ire_zoneid != zoneid) {
2138 ire = NULL; /* Stored in ixa_ire */
2139 error = EADDRNOTAVAIL;
2140 goto bad_addr;
2141 }
2142 if (!(ire->ire_type & (IRE_LOOPBACK|IRE_LOCAL|IRE_MULTICAST))) {
2143 ire = NULL; /* Stored in ixa_ire */
2144 error = EADDRNOTAVAIL;
2145 goto bad_addr;
2146 }
2147 }
2148
2149 /*
2150 * Does the caller want us to pick a source address?
2151 */
2152 if (flags & IPDF_SELECT_SRC) {
2153 in6_addr_t src_addr;
2154
2155 /*
2156 * We use use ire_nexthop_ill to avoid the under ipmp
2157 * interface for source address selection. Note that for ipmp
2158 * probe packets, ixa_ifindex would have been specified, and
2159 * the ip_select_route() invocation would have picked an ire
2160 * will ire_ill pointing at an under interface.
2161 */
2162 ill = ire_nexthop_ill(ire);
2163
2164 /* If unreachable we have no ill but need some source */
2165 if (ill == NULL) {
2166 src_addr = ipv6_loopback;
2167 /* Make sure we look for a better source address */
2168 generation = SRC_GENERATION_VERIFY;
2169 } else {
2170 error = ip_select_source_v6(ill, &setsrc, dst_addr,
2171 zoneid, ipst, B_FALSE, ixa->ixa_src_preferences,
2172 &src_addr, &generation, NULL);
2173 if (error != 0) {
2174 ire = NULL; /* Stored in ixa_ire */
2175 goto bad_addr;
2176 }
2177 }
2178
2179 /*
2180 * We allow the source address to to down.
2181 * However, we check that we don't use the loopback address
2182 * as a source when sending out on the wire.
2183 */
2184 if (IN6_IS_ADDR_LOOPBACK(&src_addr) &&
2185 !(ire->ire_type & (IRE_LOCAL|IRE_LOOPBACK|IRE_MULTICAST)) &&
2186 !(ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE))) {
2187 ire = NULL; /* Stored in ixa_ire */
2188 error = EADDRNOTAVAIL;
2189 goto bad_addr;
2190 }
2191
2192 *src_addrp = src_addr;
2193 ixa->ixa_src_generation = generation;
2194 }
2195
2196 /*
2197 * Make sure we don't leave an unreachable ixa_nce in place
2198 * since ip_select_route is used when we unplumb i.e., remove
2199 * references on ixa_ire, ixa_nce, and ixa_dce.
2200 */
2201 nce = ixa->ixa_nce;
2202 if (nce != NULL && nce->nce_is_condemned) {
2203 nce_refrele(nce);
2204 ixa->ixa_nce = NULL;
2205 ixa->ixa_ire_generation = IRE_GENERATION_VERIFY;
2206 }
2207
2208 /*
2209 * Note that IPv6 multicast supports PMTU discovery unlike IPv4
2210 * multicast. But pmtu discovery is only enabled for connected
2211 * sockets in general.
2212 */
2213
2214 /*
2215 * Set initial value for fragmentation limit. Either conn_ip_output
2216 * or ULP might updates it when there are routing changes.
2217 * Handles a NULL ixa_ire->ire_ill or a NULL ixa_nce for RTF_REJECT.
2218 */
2219 pmtu = ip_get_pmtu(ixa);
2220 ixa->ixa_fragsize = pmtu;
2221 /* Make sure ixa_fragsize and ixa_pmtu remain identical */
2222 if (ixa->ixa_flags & IXAF_VERIFY_PMTU)
2223 ixa->ixa_pmtu = pmtu;
2224
2225 /*
2226 * Extract information useful for some transports.
2227 * First we look for DCE metrics. Then we take what we have in
2228 * the metrics in the route, where the offlink is used if we have
2229 * one.
2230 */
2231 if (uinfo != NULL) {
2232 bzero(uinfo, sizeof (*uinfo));
2233
2234 if (dce->dce_flags & DCEF_UINFO)
2235 *uinfo = dce->dce_uinfo;
2236
2237 rts_merge_metrics(uinfo, &ire->ire_metrics);
2238
2239 /* Allow ire_metrics to decrease the path MTU from above */
2240 if (uinfo->iulp_mtu == 0 || uinfo->iulp_mtu > pmtu)
2241 uinfo->iulp_mtu = pmtu;
2242
2243 uinfo->iulp_localnet = (ire->ire_type & IRE_ONLINK) != 0;
2244 uinfo->iulp_loopback = (ire->ire_type & IRE_LOOPBACK) != 0;
2245 uinfo->iulp_local = (ire->ire_type & IRE_LOCAL) != 0;
2246 }
2247
2248 if (ill != NULL)
2249 ill_refrele(ill);
2250
2251 return (error);
2252
2253 bad_addr:
2254 if (ire != NULL)
2255 ire_refrele(ire);
2256
2257 if (ill != NULL)
2258 ill_refrele(ill);
2259
2260 /*
2261 * Make sure we don't leave an unreachable ixa_nce in place
2262 * since ip_select_route is used when we unplumb i.e., remove
2263 * references on ixa_ire, ixa_nce, and ixa_dce.
2264 */
2265 nce = ixa->ixa_nce;
2266 if (nce != NULL && nce->nce_is_condemned) {
2267 nce_refrele(nce);
2268 ixa->ixa_nce = NULL;
2269 ixa->ixa_ire_generation = IRE_GENERATION_VERIFY;
2270 }
2271
2272 return (error);
2273 }
2274
2275 /*
2276 * Handle protocols with which IP is less intimate. There
2277 * can be more than one stream bound to a particular
2278 * protocol. When this is the case, normally each one gets a copy
2279 * of any incoming packets.
2280 *
2281 * Zones notes:
2282 * Packets will be distributed to conns in all zones. This is really only
2283 * useful for ICMPv6 as only applications in the global zone can create raw
2284 * sockets for other protocols.
2285 */
2286 void
2287 ip_fanout_proto_v6(mblk_t *mp, ip6_t *ip6h, ip_recv_attr_t *ira)
2288 {
2289 mblk_t *mp1;
2290 in6_addr_t laddr = ip6h->ip6_dst;
2291 conn_t *connp, *first_connp, *next_connp;
2292 connf_t *connfp;
2293 ill_t *ill = ira->ira_ill;
2294 ip_stack_t *ipst = ill->ill_ipst;
2295
2296 connfp = &ipst->ips_ipcl_proto_fanout_v6[ira->ira_protocol];
2297 mutex_enter(&connfp->connf_lock);
2298 connp = connfp->connf_head;
2299 for (connp = connfp->connf_head; connp != NULL;
2300 connp = connp->conn_next) {
2301 /* Note: IPCL_PROTO_MATCH_V6 includes conn_wantpacket */
2302 if (IPCL_PROTO_MATCH_V6(connp, ira, ip6h) &&
2303 (!(ira->ira_flags & IRAF_SYSTEM_LABELED) ||
2304 tsol_receive_local(mp, &laddr, IPV6_VERSION, ira, connp)))
2305 break;
2306 }
2307
2308 if (connp == NULL) {
2309 /*
2310 * No one bound to this port. Is
2311 * there a client that wants all
2312 * unclaimed datagrams?
2313 */
2314 mutex_exit(&connfp->connf_lock);
2315 ip_fanout_send_icmp_v6(mp, ICMP6_PARAM_PROB,
2316 ICMP6_PARAMPROB_NEXTHEADER, ira);
2317 return;
2318 }
2319
2320 ASSERT(IPCL_IS_NONSTR(connp) || connp->conn_rq != NULL);
2321
2322 CONN_INC_REF(connp);
2323 first_connp = connp;
2324
2325 /*
2326 * XXX: Fix the multiple protocol listeners case. We should not
2327 * be walking the conn->conn_next list here.
2328 */
2329 connp = connp->conn_next;
2330 for (;;) {
2331 while (connp != NULL) {
2332 /* Note: IPCL_PROTO_MATCH_V6 includes conn_wantpacket */
2333 if (IPCL_PROTO_MATCH_V6(connp, ira, ip6h) &&
2334 (!(ira->ira_flags & IRAF_SYSTEM_LABELED) ||
2335 tsol_receive_local(mp, &laddr, IPV6_VERSION,
2336 ira, connp)))
2337 break;
2338 connp = connp->conn_next;
2339 }
2340
2341 if (connp == NULL) {
2342 /* No more interested clients */
2343 connp = first_connp;
2344 break;
2345 }
2346 if (((mp1 = dupmsg(mp)) == NULL) &&
2347 ((mp1 = copymsg(mp)) == NULL)) {
2348 /* Memory allocation failed */
2349 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
2350 ip_drop_input("ipIfStatsInDiscards", mp, ill);
2351 connp = first_connp;
2352 break;
2353 }
2354
2355 CONN_INC_REF(connp);
2356 mutex_exit(&connfp->connf_lock);
2357
2358 ip_fanout_proto_conn(connp, mp1, NULL, (ip6_t *)mp1->b_rptr,
2359 ira);
2360
2361 mutex_enter(&connfp->connf_lock);
2362 /* Follow the next pointer before releasing the conn. */
2363 next_connp = connp->conn_next;
2364 CONN_DEC_REF(connp);
2365 connp = next_connp;
2366 }
2367
2368 /* Last one. Send it upstream. */
2369 mutex_exit(&connfp->connf_lock);
2370
2371 ip_fanout_proto_conn(connp, mp, NULL, ip6h, ira);
2372
2373 CONN_DEC_REF(connp);
2374 }
2375
2376 /*
2377 * Called when it is conceptually a ULP that would sent the packet
2378 * e.g., port unreachable and nexthdr unknown. Check that the packet
2379 * would have passed the IPsec global policy before sending the error.
2380 *
2381 * Send an ICMP error after patching up the packet appropriately.
2382 * Uses ip_drop_input and bumps the appropriate MIB.
2383 * For ICMP6_PARAMPROB_NEXTHEADER we determine the offset to use.
2384 */
2385 void
2386 ip_fanout_send_icmp_v6(mblk_t *mp, uint_t icmp_type, uint8_t icmp_code,
2387 ip_recv_attr_t *ira)
2388 {
2389 ip6_t *ip6h;
2390 boolean_t secure;
2391 ill_t *ill = ira->ira_ill;
2392 ip_stack_t *ipst = ill->ill_ipst;
2393 netstack_t *ns = ipst->ips_netstack;
2394 ipsec_stack_t *ipss = ns->netstack_ipsec;
2395
2396 secure = ira->ira_flags & IRAF_IPSEC_SECURE;
2397
2398 /*
2399 * We are generating an icmp error for some inbound packet.
2400 * Called from all ip_fanout_(udp, tcp, proto) functions.
2401 * Before we generate an error, check with global policy
2402 * to see whether this is allowed to enter the system. As
2403 * there is no "conn", we are checking with global policy.
2404 */
2405 ip6h = (ip6_t *)mp->b_rptr;
2406 if (secure || ipss->ipsec_inbound_v6_policy_present) {
2407 mp = ipsec_check_global_policy(mp, NULL, NULL, ip6h, ira, ns);
2408 if (mp == NULL)
2409 return;
2410 }
2411
2412 /* We never send errors for protocols that we do implement */
2413 if (ira->ira_protocol == IPPROTO_ICMPV6) {
2414 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
2415 ip_drop_input("ip_fanout_send_icmp_v6", mp, ill);
2416 freemsg(mp);
2417 return;
2418 }
2419
2420 switch (icmp_type) {
2421 case ICMP6_DST_UNREACH:
2422 ASSERT(icmp_code == ICMP6_DST_UNREACH_NOPORT);
2423
2424 BUMP_MIB(ill->ill_ip_mib, udpIfStatsNoPorts);
2425 ip_drop_input("ipIfStatsNoPorts", mp, ill);
2426
2427 icmp_unreachable_v6(mp, icmp_code, B_FALSE, ira);
2428 break;
2429 case ICMP6_PARAM_PROB:
2430 ASSERT(icmp_code == ICMP6_PARAMPROB_NEXTHEADER);
2431
2432 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInUnknownProtos);
2433 ip_drop_input("ipIfStatsInUnknownProtos", mp, ill);
2434
2435 /* Let the system determine the offset for this one */
2436 icmp_param_problem_nexthdr_v6(mp, B_FALSE, ira);
2437 break;
2438 default:
2439 #ifdef DEBUG
2440 panic("ip_fanout_send_icmp_v6: wrong type");
2441 /*NOTREACHED*/
2442 #else
2443 freemsg(mp);
2444 break;
2445 #endif
2446 }
2447 }
2448
2449 /*
2450 * Fanout for UDP packets that are multicast or ICMP errors.
2451 * (Unicast fanout is handled in ip_input_v6.)
2452 *
2453 * If SO_REUSEADDR is set all multicast packets
2454 * will be delivered to all conns bound to the same port.
2455 *
2456 * Fanout for UDP packets.
2457 * The caller puts <fport, lport> in the ports parameter.
2458 * ire_type must be IRE_BROADCAST for multicast and broadcast packets.
2459 *
2460 * If SO_REUSEADDR is set all multicast and broadcast packets
2461 * will be delivered to all conns bound to the same port.
2462 *
2463 * Zones notes:
2464 * Earlier in ip_input on a system with multiple shared-IP zones we
2465 * duplicate the multicast and broadcast packets and send them up
2466 * with each explicit zoneid that exists on that ill.
2467 * This means that here we can match the zoneid with SO_ALLZONES being special.
2468 */
2469 void
2470 ip_fanout_udp_multi_v6(mblk_t *mp, ip6_t *ip6h, uint16_t lport, uint16_t fport,
2471 ip_recv_attr_t *ira)
2472 {
2473 in6_addr_t laddr;
2474 conn_t *connp;
2475 connf_t *connfp;
2476 in6_addr_t faddr;
2477 ill_t *ill = ira->ira_ill;
2478 ip_stack_t *ipst = ill->ill_ipst;
2479
2480 ASSERT(ira->ira_flags & (IRAF_MULTIBROADCAST|IRAF_ICMP_ERROR));
2481
2482 laddr = ip6h->ip6_dst;
2483 faddr = ip6h->ip6_src;
2484
2485 /* Attempt to find a client stream based on destination port. */
2486 connfp = &ipst->ips_ipcl_udp_fanout[IPCL_UDP_HASH(lport, ipst)];
2487 mutex_enter(&connfp->connf_lock);
2488 connp = connfp->connf_head;
2489 while (connp != NULL) {
2490 if ((IPCL_UDP_MATCH_V6(connp, lport, laddr, fport, faddr)) &&
2491 conn_wantpacket_v6(connp, ira, ip6h) &&
2492 (!(ira->ira_flags & IRAF_SYSTEM_LABELED) ||
2493 tsol_receive_local(mp, &laddr, IPV6_VERSION, ira, connp)))
2494 break;
2495 connp = connp->conn_next;
2496 }
2497
2498 if (connp == NULL)
2499 goto notfound;
2500
2501 CONN_INC_REF(connp);
2502
2503 if (connp->conn_reuseaddr) {
2504 conn_t *first_connp = connp;
2505 conn_t *next_connp;
2506 mblk_t *mp1;
2507
2508 connp = connp->conn_next;
2509 for (;;) {
2510 while (connp != NULL) {
2511 if (IPCL_UDP_MATCH_V6(connp, lport, laddr,
2512 fport, faddr) &&
2513 conn_wantpacket_v6(connp, ira, ip6h) &&
2514 (!(ira->ira_flags & IRAF_SYSTEM_LABELED) ||
2515 tsol_receive_local(mp, &laddr, IPV6_VERSION,
2516 ira, connp)))
2517 break;
2518 connp = connp->conn_next;
2519 }
2520 if (connp == NULL) {
2521 /* No more interested clients */
2522 connp = first_connp;
2523 break;
2524 }
2525 if (((mp1 = dupmsg(mp)) == NULL) &&
2526 ((mp1 = copymsg(mp)) == NULL)) {
2527 /* Memory allocation failed */
2528 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
2529 ip_drop_input("ipIfStatsInDiscards", mp, ill);
2530 connp = first_connp;
2531 break;
2532 }
2533
2534 CONN_INC_REF(connp);
2535 mutex_exit(&connfp->connf_lock);
2536
2537 IP6_STAT(ipst, ip6_udp_fanmb);
2538 ip_fanout_udp_conn(connp, mp1, NULL,
2539 (ip6_t *)mp1->b_rptr, ira);
2540
2541 mutex_enter(&connfp->connf_lock);
2542 /* Follow the next pointer before releasing the conn. */
2543 next_connp = connp->conn_next;
2544 IP6_STAT(ipst, ip6_udp_fanmb);
2545 CONN_DEC_REF(connp);
2546 connp = next_connp;
2547 }
2548 }
2549
2550 /* Last one. Send it upstream. */
2551 mutex_exit(&connfp->connf_lock);
2552
2553 IP6_STAT(ipst, ip6_udp_fanmb);
2554 ip_fanout_udp_conn(connp, mp, NULL, ip6h, ira);
2555 CONN_DEC_REF(connp);
2556 return;
2557
2558 notfound:
2559 mutex_exit(&connfp->connf_lock);
2560 /*
2561 * No one bound to this port. Is
2562 * there a client that wants all
2563 * unclaimed datagrams?
2564 */
2565 if (ipst->ips_ipcl_proto_fanout_v6[IPPROTO_UDP].connf_head != NULL) {
2566 ASSERT(ira->ira_protocol == IPPROTO_UDP);
2567 ip_fanout_proto_v6(mp, ip6h, ira);
2568 } else {
2569 ip_fanout_send_icmp_v6(mp, ICMP6_DST_UNREACH,
2570 ICMP6_DST_UNREACH_NOPORT, ira);
2571 }
2572 }
2573
2574 /*
2575 * int ip_find_hdr_v6()
2576 *
2577 * This routine is used by the upper layer protocols, iptun, and IPsec:
2578 * - Set extension header pointers to appropriate locations
2579 * - Determine IPv6 header length and return it
2580 * - Return a pointer to the last nexthdr value
2581 *
2582 * The caller must initialize ipp_fields.
2583 * The upper layer protocols normally set label_separate which makes the
2584 * routine put the TX label in ipp_label_v6. If this is not set then
2585 * the hop-by-hop options including the label are placed in ipp_hopopts.
2586 *
2587 * NOTE: If multiple extension headers of the same type are present,
2588 * ip_find_hdr_v6() will set the respective extension header pointers
2589 * to the first one that it encounters in the IPv6 header. It also
2590 * skips fragment headers. This routine deals with malformed packets
2591 * of various sorts in which case the returned length is up to the
2592 * malformed part.
2593 */
2594 int
2595 ip_find_hdr_v6(mblk_t *mp, ip6_t *ip6h, boolean_t label_separate, ip_pkt_t *ipp,
2596 uint8_t *nexthdrp)
2597 {
2598 uint_t length, ehdrlen;
2599 uint8_t nexthdr;
2600 uint8_t *whereptr, *endptr;
2601 ip6_dest_t *tmpdstopts;
2602 ip6_rthdr_t *tmprthdr;
2603 ip6_hbh_t *tmphopopts;
2604 ip6_frag_t *tmpfraghdr;
2605
2606 ipp->ipp_fields |= IPPF_HOPLIMIT | IPPF_TCLASS | IPPF_ADDR;
2607 ipp->ipp_hoplimit = ip6h->ip6_hops;
2608 ipp->ipp_tclass = IPV6_FLOW_TCLASS(ip6h->ip6_flow);
2609 ipp->ipp_addr = ip6h->ip6_dst;
2610
2611 length = IPV6_HDR_LEN;
2612 whereptr = ((uint8_t *)&ip6h[1]); /* point to next hdr */
2613 endptr = mp->b_wptr;
2614
2615 nexthdr = ip6h->ip6_nxt;
2616 while (whereptr < endptr) {
2617 /* Is there enough left for len + nexthdr? */
2618 if (whereptr + MIN_EHDR_LEN > endptr)
2619 goto done;
2620
2621 switch (nexthdr) {
2622 case IPPROTO_HOPOPTS: {
2623 /* We check for any CIPSO */
2624 uchar_t *secopt;
2625 boolean_t hbh_needed;
2626 uchar_t *after_secopt;
2627
2628 tmphopopts = (ip6_hbh_t *)whereptr;
2629 ehdrlen = 8 * (tmphopopts->ip6h_len + 1);
2630 if ((uchar_t *)tmphopopts + ehdrlen > endptr)
2631 goto done;
2632 nexthdr = tmphopopts->ip6h_nxt;
2633
2634 if (!label_separate) {
2635 secopt = NULL;
2636 after_secopt = whereptr;
2637 } else {
2638 /*
2639 * We have dropped packets with bad options in
2640 * ip6_input. No need to check return value
2641 * here.
2642 */
2643 (void) tsol_find_secopt_v6(whereptr, ehdrlen,
2644 &secopt, &after_secopt, &hbh_needed);
2645 }
2646 if (secopt != NULL && after_secopt - whereptr > 0) {
2647 ipp->ipp_fields |= IPPF_LABEL_V6;
2648 ipp->ipp_label_v6 = secopt;
2649 ipp->ipp_label_len_v6 = after_secopt - whereptr;
2650 } else {
2651 ipp->ipp_label_len_v6 = 0;
2652 after_secopt = whereptr;
2653 hbh_needed = B_TRUE;
2654 }
2655 /* return only 1st hbh */
2656 if (hbh_needed && !(ipp->ipp_fields & IPPF_HOPOPTS)) {
2657 ipp->ipp_fields |= IPPF_HOPOPTS;
2658 ipp->ipp_hopopts = (ip6_hbh_t *)after_secopt;
2659 ipp->ipp_hopoptslen = ehdrlen -
2660 ipp->ipp_label_len_v6;
2661 }
2662 break;
2663 }
2664 case IPPROTO_DSTOPTS:
2665 tmpdstopts = (ip6_dest_t *)whereptr;
2666 ehdrlen = 8 * (tmpdstopts->ip6d_len + 1);
2667 if ((uchar_t *)tmpdstopts + ehdrlen > endptr)
2668 goto done;
2669 nexthdr = tmpdstopts->ip6d_nxt;
2670 /*
2671 * ipp_dstopts is set to the destination header after a
2672 * routing header.
2673 * Assume it is a post-rthdr destination header
2674 * and adjust when we find an rthdr.
2675 */
2676 if (!(ipp->ipp_fields & IPPF_DSTOPTS)) {
2677 ipp->ipp_fields |= IPPF_DSTOPTS;
2678 ipp->ipp_dstopts = tmpdstopts;
2679 ipp->ipp_dstoptslen = ehdrlen;
2680 }
2681 break;
2682 case IPPROTO_ROUTING:
2683 tmprthdr = (ip6_rthdr_t *)whereptr;
2684 ehdrlen = 8 * (tmprthdr->ip6r_len + 1);
2685 if ((uchar_t *)tmprthdr + ehdrlen > endptr)
2686 goto done;
2687 nexthdr = tmprthdr->ip6r_nxt;
2688 /* return only 1st rthdr */
2689 if (!(ipp->ipp_fields & IPPF_RTHDR)) {
2690 ipp->ipp_fields |= IPPF_RTHDR;
2691 ipp->ipp_rthdr = tmprthdr;
2692 ipp->ipp_rthdrlen = ehdrlen;
2693 }
2694 /*
2695 * Make any destination header we've seen be a
2696 * pre-rthdr destination header.
2697 */
2698 if (ipp->ipp_fields & IPPF_DSTOPTS) {
2699 ipp->ipp_fields &= ~IPPF_DSTOPTS;
2700 ipp->ipp_fields |= IPPF_RTHDRDSTOPTS;
2701 ipp->ipp_rthdrdstopts = ipp->ipp_dstopts;
2702 ipp->ipp_dstopts = NULL;
2703 ipp->ipp_rthdrdstoptslen = ipp->ipp_dstoptslen;
2704 ipp->ipp_dstoptslen = 0;
2705 }
2706 break;
2707 case IPPROTO_FRAGMENT:
2708 tmpfraghdr = (ip6_frag_t *)whereptr;
2709 ehdrlen = sizeof (ip6_frag_t);
2710 if ((uchar_t *)tmpfraghdr + ehdrlen > endptr)
2711 goto done;
2712 nexthdr = tmpfraghdr->ip6f_nxt;
2713 if (!(ipp->ipp_fields & IPPF_FRAGHDR)) {
2714 ipp->ipp_fields |= IPPF_FRAGHDR;
2715 ipp->ipp_fraghdr = tmpfraghdr;
2716 ipp->ipp_fraghdrlen = ehdrlen;
2717 }
2718 break;
2719 case IPPROTO_NONE:
2720 default:
2721 goto done;
2722 }
2723 length += ehdrlen;
2724 whereptr += ehdrlen;
2725 }
2726 done:
2727 if (nexthdrp != NULL)
2728 *nexthdrp = nexthdr;
2729 return (length);
2730 }
2731
2732 /*
2733 * Try to determine where and what are the IPv6 header length and
2734 * pointer to nexthdr value for the upper layer protocol (or an
2735 * unknown next hdr).
2736 *
2737 * Parameters returns a pointer to the nexthdr value;
2738 * Must handle malformed packets of various sorts.
2739 * Function returns failure for malformed cases.
2740 */
2741 boolean_t
2742 ip_hdr_length_nexthdr_v6(mblk_t *mp, ip6_t *ip6h, uint16_t *hdr_length_ptr,
2743 uint8_t **nexthdrpp)
2744 {
2745 uint16_t length;
2746 uint_t ehdrlen;
2747 uint8_t *nexthdrp;
2748 uint8_t *whereptr;
2749 uint8_t *endptr;
2750 ip6_dest_t *desthdr;
2751 ip6_rthdr_t *rthdr;
2752 ip6_frag_t *fraghdr;
2753
2754 ASSERT(IPH_HDR_VERSION(ip6h) == IPV6_VERSION);
2755 length = IPV6_HDR_LEN;
2756 whereptr = ((uint8_t *)&ip6h[1]); /* point to next hdr */
2757 endptr = mp->b_wptr;
2758
2759 nexthdrp = &ip6h->ip6_nxt;
2760 while (whereptr < endptr) {
2761 /* Is there enough left for len + nexthdr? */
2762 if (whereptr + MIN_EHDR_LEN > endptr)
2763 break;
2764
2765 switch (*nexthdrp) {
2766 case IPPROTO_HOPOPTS:
2767 case IPPROTO_DSTOPTS:
2768 /* Assumes the headers are identical for hbh and dst */
2769 desthdr = (ip6_dest_t *)whereptr;
2770 ehdrlen = 8 * (desthdr->ip6d_len + 1);
2771 if ((uchar_t *)desthdr + ehdrlen > endptr)
2772 return (B_FALSE);
2773 nexthdrp = &desthdr->ip6d_nxt;
2774 break;
2775 case IPPROTO_ROUTING:
2776 rthdr = (ip6_rthdr_t *)whereptr;
2777 ehdrlen = 8 * (rthdr->ip6r_len + 1);
2778 if ((uchar_t *)rthdr + ehdrlen > endptr)
2779 return (B_FALSE);
2780 nexthdrp = &rthdr->ip6r_nxt;
2781 break;
2782 case IPPROTO_FRAGMENT:
2783 fraghdr = (ip6_frag_t *)whereptr;
2784 ehdrlen = sizeof (ip6_frag_t);
2785 if ((uchar_t *)&fraghdr[1] > endptr)
2786 return (B_FALSE);
2787 nexthdrp = &fraghdr->ip6f_nxt;
2788 break;
2789 case IPPROTO_NONE:
2790 /* No next header means we're finished */
2791 default:
2792 *hdr_length_ptr = length;
2793 *nexthdrpp = nexthdrp;
2794 return (B_TRUE);
2795 }
2796 length += ehdrlen;
2797 whereptr += ehdrlen;
2798 *hdr_length_ptr = length;
2799 *nexthdrpp = nexthdrp;
2800 }
2801 switch (*nexthdrp) {
2802 case IPPROTO_HOPOPTS:
2803 case IPPROTO_DSTOPTS:
2804 case IPPROTO_ROUTING:
2805 case IPPROTO_FRAGMENT:
2806 /*
2807 * If any know extension headers are still to be processed,
2808 * the packet's malformed (or at least all the IP header(s) are
2809 * not in the same mblk - and that should never happen.
2810 */
2811 return (B_FALSE);
2812
2813 default:
2814 /*
2815 * If we get here, we know that all of the IP headers were in
2816 * the same mblk, even if the ULP header is in the next mblk.
2817 */
2818 *hdr_length_ptr = length;
2819 *nexthdrpp = nexthdrp;
2820 return (B_TRUE);
2821 }
2822 }
2823
2824 /*
2825 * Return the length of the IPv6 related headers (including extension headers)
2826 * Returns a length even if the packet is malformed.
2827 */
2828 int
2829 ip_hdr_length_v6(mblk_t *mp, ip6_t *ip6h)
2830 {
2831 uint16_t hdr_len;
2832 uint8_t *nexthdrp;
2833
2834 (void) ip_hdr_length_nexthdr_v6(mp, ip6h, &hdr_len, &nexthdrp);
2835 return (hdr_len);
2836 }
2837
2838 /*
2839 * Parse and process any hop-by-hop or destination options.
2840 *
2841 * Assumes that q is an ill read queue so that ICMP errors for link-local
2842 * destinations are sent out the correct interface.
2843 *
2844 * Returns -1 if there was an error and mp has been consumed.
2845 * Returns 0 if no special action is needed.
2846 * Returns 1 if the packet contained a router alert option for this node
2847 * which is verified to be "interesting/known" for our implementation.
2848 *
2849 * XXX Note: In future as more hbh or dest options are defined,
2850 * it may be better to have different routines for hbh and dest
2851 * options as opt_type fields other than IP6OPT_PAD1 and IP6OPT_PADN
2852 * may have same value in different namespaces. Or is it same namespace ??
2853 * Current code checks for each opt_type (other than pads) if it is in
2854 * the expected nexthdr (hbh or dest)
2855 */
2856 int
2857 ip_process_options_v6(mblk_t *mp, ip6_t *ip6h,
2858 uint8_t *optptr, uint_t optlen, uint8_t hdr_type, ip_recv_attr_t *ira)
2859 {
2860 uint8_t opt_type;
2861 uint_t optused;
2862 int ret = 0;
2863 const char *errtype;
2864 ill_t *ill = ira->ira_ill;
2865 ip_stack_t *ipst = ill->ill_ipst;
2866
2867 while (optlen != 0) {
2868 opt_type = *optptr;
2869 if (opt_type == IP6OPT_PAD1) {
2870 optused = 1;
2871 } else {
2872 if (optlen < 2)
2873 goto bad_opt;
2874 errtype = "malformed";
2875 if (opt_type == ip6opt_ls) {
2876 optused = 2 + optptr[1];
2877 if (optused > optlen)
2878 goto bad_opt;
2879 } else switch (opt_type) {
2880 case IP6OPT_PADN:
2881 /*
2882 * Note:We don't verify that (N-2) pad octets
2883 * are zero as required by spec. Adhere to
2884 * "be liberal in what you accept..." part of
2885 * implementation philosophy (RFC791,RFC1122)
2886 */
2887 optused = 2 + optptr[1];
2888 if (optused > optlen)
2889 goto bad_opt;
2890 break;
2891
2892 case IP6OPT_JUMBO:
2893 if (hdr_type != IPPROTO_HOPOPTS)
2894 goto opt_error;
2895 goto opt_error; /* XXX Not implemented! */
2896
2897 case IP6OPT_ROUTER_ALERT: {
2898 struct ip6_opt_router *or;
2899
2900 if (hdr_type != IPPROTO_HOPOPTS)
2901 goto opt_error;
2902 optused = 2 + optptr[1];
2903 if (optused > optlen)
2904 goto bad_opt;
2905 or = (struct ip6_opt_router *)optptr;
2906 /* Check total length and alignment */
2907 if (optused != sizeof (*or) ||
2908 ((uintptr_t)or->ip6or_value & 0x1) != 0)
2909 goto opt_error;
2910 /* Check value */
2911 switch (*((uint16_t *)or->ip6or_value)) {
2912 case IP6_ALERT_MLD:
2913 case IP6_ALERT_RSVP:
2914 ret = 1;
2915 }
2916 break;
2917 }
2918 case IP6OPT_HOME_ADDRESS: {
2919 /*
2920 * Minimal support for the home address option
2921 * (which is required by all IPv6 nodes).
2922 * Implement by just swapping the home address
2923 * and source address.
2924 * XXX Note: this has IPsec implications since
2925 * AH needs to take this into account.
2926 * Also, when IPsec is used we need to ensure
2927 * that this is only processed once
2928 * in the received packet (to avoid swapping
2929 * back and forth).
2930 * NOTE:This option processing is considered
2931 * to be unsafe and prone to a denial of
2932 * service attack.
2933 * The current processing is not safe even with
2934 * IPsec secured IP packets. Since the home
2935 * address option processing requirement still
2936 * is in the IETF draft and in the process of
2937 * being redefined for its usage, it has been
2938 * decided to turn off the option by default.
2939 * If this section of code needs to be executed,
2940 * ndd variable ip6_ignore_home_address_opt
2941 * should be set to 0 at the user's own risk.
2942 */
2943 struct ip6_opt_home_address *oh;
2944 in6_addr_t tmp;
2945
2946 if (ipst->ips_ipv6_ignore_home_address_opt)
2947 goto opt_error;
2948
2949 if (hdr_type != IPPROTO_DSTOPTS)
2950 goto opt_error;
2951 optused = 2 + optptr[1];
2952 if (optused > optlen)
2953 goto bad_opt;
2954
2955 /*
2956 * We did this dest. opt the first time
2957 * around (i.e. before AH processing).
2958 * If we've done AH... stop now.
2959 */
2960 if ((ira->ira_flags & IRAF_IPSEC_SECURE) &&
2961 ira->ira_ipsec_ah_sa != NULL)
2962 break;
2963
2964 oh = (struct ip6_opt_home_address *)optptr;
2965 /* Check total length and alignment */
2966 if (optused < sizeof (*oh) ||
2967 ((uintptr_t)oh->ip6oh_addr & 0x7) != 0)
2968 goto opt_error;
2969 /* Swap ip6_src and the home address */
2970 tmp = ip6h->ip6_src;
2971 /* XXX Note: only 8 byte alignment option */
2972 ip6h->ip6_src = *(in6_addr_t *)oh->ip6oh_addr;
2973 *(in6_addr_t *)oh->ip6oh_addr = tmp;
2974 break;
2975 }
2976
2977 case IP6OPT_TUNNEL_LIMIT:
2978 if (hdr_type != IPPROTO_DSTOPTS) {
2979 goto opt_error;
2980 }
2981 optused = 2 + optptr[1];
2982 if (optused > optlen) {
2983 goto bad_opt;
2984 }
2985 if (optused != 3) {
2986 goto opt_error;
2987 }
2988 break;
2989
2990 default:
2991 errtype = "unknown";
2992 /* FALLTHROUGH */
2993 opt_error:
2994 /* Determine which zone should send error */
2995 switch (IP6OPT_TYPE(opt_type)) {
2996 case IP6OPT_TYPE_SKIP:
2997 optused = 2 + optptr[1];
2998 if (optused > optlen)
2999 goto bad_opt;
3000 ip1dbg(("ip_process_options_v6: %s "
3001 "opt 0x%x skipped\n",
3002 errtype, opt_type));
3003 break;
3004 case IP6OPT_TYPE_DISCARD:
3005 ip1dbg(("ip_process_options_v6: %s "
3006 "opt 0x%x; packet dropped\n",
3007 errtype, opt_type));
3008 BUMP_MIB(ill->ill_ip_mib,
3009 ipIfStatsInHdrErrors);
3010 ip_drop_input("ipIfStatsInHdrErrors",
3011 mp, ill);
3012 freemsg(mp);
3013 return (-1);
3014 case IP6OPT_TYPE_ICMP:
3015 BUMP_MIB(ill->ill_ip_mib,
3016 ipIfStatsInHdrErrors);
3017 ip_drop_input("ipIfStatsInHdrErrors",
3018 mp, ill);
3019 icmp_param_problem_v6(mp,
3020 ICMP6_PARAMPROB_OPTION,
3021 (uint32_t)(optptr -
3022 (uint8_t *)ip6h),
3023 B_FALSE, ira);
3024 return (-1);
3025 case IP6OPT_TYPE_FORCEICMP:
3026 BUMP_MIB(ill->ill_ip_mib,
3027 ipIfStatsInHdrErrors);
3028 ip_drop_input("ipIfStatsInHdrErrors",
3029 mp, ill);
3030 icmp_param_problem_v6(mp,
3031 ICMP6_PARAMPROB_OPTION,
3032 (uint32_t)(optptr -
3033 (uint8_t *)ip6h),
3034 B_TRUE, ira);
3035 return (-1);
3036 default:
3037 ASSERT(0);
3038 }
3039 }
3040 }
3041 optlen -= optused;
3042 optptr += optused;
3043 }
3044 return (ret);
3045
3046 bad_opt:
3047 /* Determine which zone should send error */
3048 ip_drop_input("ICMP_PARAM_PROBLEM", mp, ill);
3049 icmp_param_problem_v6(mp, ICMP6_PARAMPROB_OPTION,
3050 (uint32_t)(optptr - (uint8_t *)ip6h),
3051 B_FALSE, ira);
3052 return (-1);
3053 }
3054
3055 /*
3056 * Process a routing header that is not yet empty.
3057 * Because of RFC 5095, we now reject all route headers.
3058 */
3059 void
3060 ip_process_rthdr(mblk_t *mp, ip6_t *ip6h, ip6_rthdr_t *rth,
3061 ip_recv_attr_t *ira)
3062 {
3063 ill_t *ill = ira->ira_ill;
3064 ip_stack_t *ipst = ill->ill_ipst;
3065
3066 ASSERT(rth->ip6r_segleft != 0);
3067
3068 if (!ipst->ips_ipv6_forward_src_routed) {
3069 /* XXX Check for source routed out same interface? */
3070 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits);
3071 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInAddrErrors);
3072 ip_drop_input("ipIfStatsInAddrErrors", mp, ill);
3073 freemsg(mp);
3074 return;
3075 }
3076
3077 ip_drop_input("ICMP_PARAM_PROBLEM", mp, ill);
3078 icmp_param_problem_v6(mp, ICMP6_PARAMPROB_HEADER,
3079 (uint32_t)((uchar_t *)&rth->ip6r_type - (uchar_t *)ip6h),
3080 B_FALSE, ira);
3081 }
3082
3083 /*
3084 * Read side put procedure for IPv6 module.
3085 */
3086 void
3087 ip_rput_v6(queue_t *q, mblk_t *mp)
3088 {
3089 ill_t *ill;
3090
3091 ill = (ill_t *)q->q_ptr;
3092 if (ill->ill_state_flags & (ILL_CONDEMNED | ILL_LL_SUBNET_PENDING)) {
3093 union DL_primitives *dl;
3094
3095 dl = (union DL_primitives *)mp->b_rptr;
3096 /*
3097 * Things are opening or closing - only accept DLPI
3098 * ack messages. If the stream is closing and ip_wsrv
3099 * has completed, ip_close is out of the qwait, but has
3100 * not yet completed qprocsoff. Don't proceed any further
3101 * because the ill has been cleaned up and things hanging
3102 * off the ill have been freed.
3103 */
3104 if ((mp->b_datap->db_type != M_PCPROTO) ||
3105 (dl->dl_primitive == DL_UNITDATA_IND)) {
3106 inet_freemsg(mp);
3107 return;
3108 }
3109 }
3110 if (DB_TYPE(mp) == M_DATA) {
3111 struct mac_header_info_s mhi;
3112
3113 ip_mdata_to_mhi(ill, mp, &mhi);
3114 ip_input_v6(ill, NULL, mp, &mhi);
3115 } else {
3116 ip_rput_notdata(ill, mp);
3117 }
3118 }
3119
3120 /*
3121 * Walk through the IPv6 packet in mp and see if there's an AH header
3122 * in it. See if the AH header needs to get done before other headers in
3123 * the packet. (Worker function for ipsec_early_ah_v6().)
3124 */
3125 #define IPSEC_HDR_DONT_PROCESS 0
3126 #define IPSEC_HDR_PROCESS 1
3127 #define IPSEC_MEMORY_ERROR 2 /* or malformed packet */
3128 static int
3129 ipsec_needs_processing_v6(mblk_t *mp, uint8_t *nexthdr)
3130 {
3131 uint_t length;
3132 uint_t ehdrlen;
3133 uint8_t *whereptr;
3134 uint8_t *endptr;
3135 uint8_t *nexthdrp;
3136 ip6_dest_t *desthdr;
3137 ip6_rthdr_t *rthdr;
3138 ip6_t *ip6h;
3139
3140 /*
3141 * For now just pullup everything. In general, the less pullups,
3142 * the better, but there's so much squirrelling through anyway,
3143 * it's just easier this way.
3144 */
3145 if (!pullupmsg(mp, -1)) {
3146 return (IPSEC_MEMORY_ERROR);
3147 }
3148
3149 ip6h = (ip6_t *)mp->b_rptr;
3150 length = IPV6_HDR_LEN;
3151 whereptr = ((uint8_t *)&ip6h[1]); /* point to next hdr */
3152 endptr = mp->b_wptr;
3153
3154 /*
3155 * We can't just use the argument nexthdr in the place
3156 * of nexthdrp becaue we don't dereference nexthdrp
3157 * till we confirm whether it is a valid address.
3158 */
3159 nexthdrp = &ip6h->ip6_nxt;
3160 while (whereptr < endptr) {
3161 /* Is there enough left for len + nexthdr? */
3162 if (whereptr + MIN_EHDR_LEN > endptr)
3163 return (IPSEC_MEMORY_ERROR);
3164
3165 switch (*nexthdrp) {
3166 case IPPROTO_HOPOPTS:
3167 case IPPROTO_DSTOPTS:
3168 /* Assumes the headers are identical for hbh and dst */
3169 desthdr = (ip6_dest_t *)whereptr;
3170 ehdrlen = 8 * (desthdr->ip6d_len + 1);
3171 if ((uchar_t *)desthdr + ehdrlen > endptr)
3172 return (IPSEC_MEMORY_ERROR);
3173 /*
3174 * Return DONT_PROCESS because the destination
3175 * options header may be for each hop in a
3176 * routing-header, and we only want AH if we're
3177 * finished with routing headers.
3178 */
3179 if (*nexthdrp == IPPROTO_DSTOPTS)
3180 return (IPSEC_HDR_DONT_PROCESS);
3181 nexthdrp = &desthdr->ip6d_nxt;
3182 break;
3183 case IPPROTO_ROUTING:
3184 rthdr = (ip6_rthdr_t *)whereptr;
3185
3186 /*
3187 * If there's more hops left on the routing header,
3188 * return now with DON'T PROCESS.
3189 */
3190 if (rthdr->ip6r_segleft > 0)
3191 return (IPSEC_HDR_DONT_PROCESS);
3192
3193 ehdrlen = 8 * (rthdr->ip6r_len + 1);
3194 if ((uchar_t *)rthdr + ehdrlen > endptr)
3195 return (IPSEC_MEMORY_ERROR);
3196 nexthdrp = &rthdr->ip6r_nxt;
3197 break;
3198 case IPPROTO_FRAGMENT:
3199 /* Wait for reassembly */
3200 return (IPSEC_HDR_DONT_PROCESS);
3201 case IPPROTO_AH:
3202 *nexthdr = IPPROTO_AH;
3203 return (IPSEC_HDR_PROCESS);
3204 case IPPROTO_NONE:
3205 /* No next header means we're finished */
3206 default:
3207 return (IPSEC_HDR_DONT_PROCESS);
3208 }
3209 length += ehdrlen;
3210 whereptr += ehdrlen;
3211 }
3212 /*
3213 * Malformed/truncated packet.
3214 */
3215 return (IPSEC_MEMORY_ERROR);
3216 }
3217
3218 /*
3219 * Path for AH if options are present.
3220 * Returns NULL if the mblk was consumed.
3221 *
3222 * Sometimes AH needs to be done before other IPv6 headers for security
3223 * reasons. This function (and its ipsec_needs_processing_v6() above)
3224 * indicates if that is so, and fans out to the appropriate IPsec protocol
3225 * for the datagram passed in.
3226 */
3227 mblk_t *
3228 ipsec_early_ah_v6(mblk_t *mp, ip_recv_attr_t *ira)
3229 {
3230 uint8_t nexthdr;
3231 ah_t *ah;
3232 ill_t *ill = ira->ira_ill;
3233 ip_stack_t *ipst = ill->ill_ipst;
3234 ipsec_stack_t *ipss = ipst->ips_netstack->netstack_ipsec;
3235
3236 switch (ipsec_needs_processing_v6(mp, &nexthdr)) {
3237 case IPSEC_MEMORY_ERROR:
3238 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
3239 ip_drop_input("ipIfStatsInDiscards", mp, ill);
3240 freemsg(mp);
3241 return (NULL);
3242 case IPSEC_HDR_DONT_PROCESS:
3243 return (mp);
3244 }
3245
3246 /* Default means send it to AH! */
3247 ASSERT(nexthdr == IPPROTO_AH);
3248
3249 if (!ipsec_loaded(ipss)) {
3250 ip_proto_not_sup(mp, ira);
3251 return (NULL);
3252 }
3253
3254 mp = ipsec_inbound_ah_sa(mp, ira, &ah);
3255 if (mp == NULL)
3256 return (NULL);
3257 ASSERT(ah != NULL);
3258 ASSERT(ira->ira_flags & IRAF_IPSEC_SECURE);
3259 ASSERT(ira->ira_ipsec_ah_sa != NULL);
3260 ASSERT(ira->ira_ipsec_ah_sa->ipsa_input_func != NULL);
3261 mp = ira->ira_ipsec_ah_sa->ipsa_input_func(mp, ah, ira);
3262
3263 if (mp == NULL) {
3264 /*
3265 * Either it failed or is pending. In the former case
3266 * ipIfStatsInDiscards was increased.
3267 */
3268 return (NULL);
3269 }
3270
3271 /* we're done with IPsec processing, send it up */
3272 ip_input_post_ipsec(mp, ira);
3273 return (NULL);
3274 }
3275
3276 /*
3277 * Reassemble fragment.
3278 * When it returns a completed message the first mblk will only contain
3279 * the headers prior to the fragment header, with the nexthdr value updated
3280 * to be the header after the fragment header.
3281 */
3282 mblk_t *
3283 ip_input_fragment_v6(mblk_t *mp, ip6_t *ip6h,
3284 ip6_frag_t *fraghdr, uint_t remlen, ip_recv_attr_t *ira)
3285 {
3286 uint32_t ident = ntohl(fraghdr->ip6f_ident);
3287 uint16_t offset;
3288 boolean_t more_frags;
3289 uint8_t nexthdr = fraghdr->ip6f_nxt;
3290 in6_addr_t *v6dst_ptr;
3291 in6_addr_t *v6src_ptr;
3292 uint_t end;
3293 uint_t hdr_length;
3294 size_t count;
3295 ipf_t *ipf;
3296 ipf_t **ipfp;
3297 ipfb_t *ipfb;
3298 mblk_t *mp1;
3299 uint8_t ecn_info = 0;
3300 size_t msg_len;
3301 mblk_t *tail_mp;
3302 mblk_t *t_mp;
3303 boolean_t pruned = B_FALSE;
3304 uint32_t sum_val;
3305 uint16_t sum_flags;
3306 ill_t *ill = ira->ira_ill;
3307 ip_stack_t *ipst = ill->ill_ipst;
3308 uint_t prev_nexthdr_offset;
3309 uint8_t prev_nexthdr;
3310 uint8_t *ptr;
3311 uint32_t packet_size;
3312
3313 /*
3314 * We utilize hardware computed checksum info only for UDP since
3315 * IP fragmentation is a normal occurence for the protocol. In
3316 * addition, checksum offload support for IP fragments carrying
3317 * UDP payload is commonly implemented across network adapters.
3318 */
3319 ASSERT(ira->ira_rill != NULL);
3320 if (nexthdr == IPPROTO_UDP && dohwcksum &&
3321 ILL_HCKSUM_CAPABLE(ira->ira_rill) &&
3322 (DB_CKSUMFLAGS(mp) & (HCK_FULLCKSUM | HCK_PARTIALCKSUM))) {
3323 mblk_t *mp1 = mp->b_cont;
3324 int32_t len;
3325
3326 /* Record checksum information from the packet */
3327 sum_val = (uint32_t)DB_CKSUM16(mp);
3328 sum_flags = DB_CKSUMFLAGS(mp);
3329
3330 /* fragmented payload offset from beginning of mblk */
3331 offset = (uint16_t)((uchar_t *)&fraghdr[1] - mp->b_rptr);
3332
3333 if ((sum_flags & HCK_PARTIALCKSUM) &&
3334 (mp1 == NULL || mp1->b_cont == NULL) &&
3335 offset >= DB_CKSUMSTART(mp) &&
3336 ((len = offset - DB_CKSUMSTART(mp)) & 1) == 0) {
3337 uint32_t adj;
3338 /*
3339 * Partial checksum has been calculated by hardware
3340 * and attached to the packet; in addition, any
3341 * prepended extraneous data is even byte aligned.
3342 * If any such data exists, we adjust the checksum;
3343 * this would also handle any postpended data.
3344 */
3345 IP_ADJCKSUM_PARTIAL(mp->b_rptr + DB_CKSUMSTART(mp),
3346 mp, mp1, len, adj);
3347
3348 /* One's complement subtract extraneous checksum */
3349 if (adj >= sum_val)
3350 sum_val = ~(adj - sum_val) & 0xFFFF;
3351 else
3352 sum_val -= adj;
3353 }
3354 } else {
3355 sum_val = 0;
3356 sum_flags = 0;
3357 }
3358
3359 /* Clear hardware checksumming flag */
3360 DB_CKSUMFLAGS(mp) = 0;
3361
3362 /*
3363 * Determine the offset (from the begining of the IP header)
3364 * of the nexthdr value which has IPPROTO_FRAGMENT. We use
3365 * this when removing the fragment header from the packet.
3366 * This packet consists of the IPv6 header, a potential
3367 * hop-by-hop options header, a potential pre-routing-header
3368 * destination options header, and a potential routing header.
3369 */
3370 prev_nexthdr_offset = (uint8_t *)&ip6h->ip6_nxt - (uint8_t *)ip6h;
3371 prev_nexthdr = ip6h->ip6_nxt;
3372 ptr = (uint8_t *)&ip6h[1];
3373
3374 if (prev_nexthdr == IPPROTO_HOPOPTS) {
3375 ip6_hbh_t *hbh_hdr;
3376 uint_t hdr_len;
3377
3378 hbh_hdr = (ip6_hbh_t *)ptr;
3379 hdr_len = 8 * (hbh_hdr->ip6h_len + 1);
3380 prev_nexthdr = hbh_hdr->ip6h_nxt;
3381 prev_nexthdr_offset = (uint8_t *)&hbh_hdr->ip6h_nxt
3382 - (uint8_t *)ip6h;
3383 ptr += hdr_len;
3384 }
3385 if (prev_nexthdr == IPPROTO_DSTOPTS) {
3386 ip6_dest_t *dest_hdr;
3387 uint_t hdr_len;
3388
3389 dest_hdr = (ip6_dest_t *)ptr;
3390 hdr_len = 8 * (dest_hdr->ip6d_len + 1);
3391 prev_nexthdr = dest_hdr->ip6d_nxt;
3392 prev_nexthdr_offset = (uint8_t *)&dest_hdr->ip6d_nxt
3393 - (uint8_t *)ip6h;
3394 ptr += hdr_len;
3395 }
3396 if (prev_nexthdr == IPPROTO_ROUTING) {
3397 ip6_rthdr_t *rthdr;
3398 uint_t hdr_len;
3399
3400 rthdr = (ip6_rthdr_t *)ptr;
3401 prev_nexthdr = rthdr->ip6r_nxt;
3402 prev_nexthdr_offset = (uint8_t *)&rthdr->ip6r_nxt
3403 - (uint8_t *)ip6h;
3404 hdr_len = 8 * (rthdr->ip6r_len + 1);
3405 ptr += hdr_len;
3406 }
3407 if (prev_nexthdr != IPPROTO_FRAGMENT) {
3408 /* Can't handle other headers before the fragment header */
3409 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInHdrErrors);
3410 ip_drop_input("ipIfStatsInHdrErrors", mp, ill);
3411 freemsg(mp);
3412 return (NULL);
3413 }
3414
3415 /*
3416 * Note: Fragment offset in header is in 8-octet units.
3417 * Clearing least significant 3 bits not only extracts
3418 * it but also gets it in units of octets.
3419 */
3420 offset = ntohs(fraghdr->ip6f_offlg) & ~7;
3421 more_frags = (fraghdr->ip6f_offlg & IP6F_MORE_FRAG);
3422
3423 /*
3424 * Is the more frags flag on and the payload length not a multiple
3425 * of eight?
3426 */
3427 if (more_frags && (ntohs(ip6h->ip6_plen) & 7)) {
3428 ip_drop_input("ICMP_PARAM_PROBLEM", mp, ill);
3429 icmp_param_problem_v6(mp, ICMP6_PARAMPROB_HEADER,
3430 (uint32_t)((char *)&ip6h->ip6_plen -
3431 (char *)ip6h), B_FALSE, ira);
3432 return (NULL);
3433 }
3434
3435 v6src_ptr = &ip6h->ip6_src;
3436 v6dst_ptr = &ip6h->ip6_dst;
3437 end = remlen;
3438
3439 hdr_length = (uint_t)((char *)&fraghdr[1] - (char *)ip6h);
3440 end += offset;
3441
3442 /*
3443 * Would fragment cause reassembled packet to have a payload length
3444 * greater than IP_MAXPACKET - the max payload size?
3445 */
3446 if (end > IP_MAXPACKET) {
3447 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInHdrErrors);
3448 ip_drop_input("Reassembled packet too large", mp, ill);
3449 icmp_param_problem_v6(mp, ICMP6_PARAMPROB_HEADER,
3450 (uint32_t)((char *)&fraghdr->ip6f_offlg -
3451 (char *)ip6h), B_FALSE, ira);
3452 return (NULL);
3453 }
3454
3455 /*
3456 * This packet just has one fragment. Reassembly not
3457 * needed.
3458 */
3459 if (!more_frags && offset == 0) {
3460 goto reass_done;
3461 }
3462
3463 /*
3464 * Drop the fragmented as early as possible, if
3465 * we don't have resource(s) to re-assemble.
3466 */
3467 if (ipst->ips_ip_reass_queue_bytes == 0) {
3468 freemsg(mp);
3469 return (NULL);
3470 }
3471
3472 /* Record the ECN field info. */
3473 ecn_info = (uint8_t)(ntohl(ip6h->ip6_vcf & htonl(~0xFFCFFFFF)) >> 20);
3474 /*
3475 * If this is not the first fragment, dump the unfragmentable
3476 * portion of the packet.
3477 */
3478 if (offset)
3479 mp->b_rptr = (uchar_t *)&fraghdr[1];
3480
3481 /*
3482 * Fragmentation reassembly. Each ILL has a hash table for
3483 * queueing packets undergoing reassembly for all IPIFs
3484 * associated with the ILL. The hash is based on the packet
3485 * IP ident field. The ILL frag hash table was allocated
3486 * as a timer block at the time the ILL was created. Whenever
3487 * there is anything on the reassembly queue, the timer will
3488 * be running.
3489 */
3490 /* Handle vnic loopback of fragments */
3491 if (mp->b_datap->db_ref > 2)
3492 msg_len = 0;
3493 else
3494 msg_len = MBLKSIZE(mp);
3495
3496 tail_mp = mp;
3497 while (tail_mp->b_cont != NULL) {
3498 tail_mp = tail_mp->b_cont;
3499 if (tail_mp->b_datap->db_ref <= 2)
3500 msg_len += MBLKSIZE(tail_mp);
3501 }
3502 /*
3503 * If the reassembly list for this ILL will get too big
3504 * prune it.
3505 */
3506
3507 if ((msg_len + sizeof (*ipf) + ill->ill_frag_count) >=
3508 ipst->ips_ip_reass_queue_bytes) {
3509 DTRACE_PROBE3(ip_reass_queue_bytes, uint_t, msg_len,
3510 uint_t, ill->ill_frag_count,
3511 uint_t, ipst->ips_ip_reass_queue_bytes);
3512 ill_frag_prune(ill,
3513 (ipst->ips_ip_reass_queue_bytes < msg_len) ? 0 :
3514 (ipst->ips_ip_reass_queue_bytes - msg_len));
3515 pruned = B_TRUE;
3516 }
3517
3518 ipfb = &ill->ill_frag_hash_tbl[ILL_FRAG_HASH_V6(*v6src_ptr, ident)];
3519 mutex_enter(&ipfb->ipfb_lock);
3520
3521 ipfp = &ipfb->ipfb_ipf;
3522 /* Try to find an existing fragment queue for this packet. */
3523 for (;;) {
3524 ipf = ipfp[0];
3525 if (ipf) {
3526 /*
3527 * It has to match on ident, source address, and
3528 * dest address.
3529 */
3530 if (ipf->ipf_ident == ident &&
3531 IN6_ARE_ADDR_EQUAL(&ipf->ipf_v6src, v6src_ptr) &&
3532 IN6_ARE_ADDR_EQUAL(&ipf->ipf_v6dst, v6dst_ptr)) {
3533
3534 /*
3535 * If we have received too many
3536 * duplicate fragments for this packet
3537 * free it.
3538 */
3539 if (ipf->ipf_num_dups > ip_max_frag_dups) {
3540 ill_frag_free_pkts(ill, ipfb, ipf, 1);
3541 freemsg(mp);
3542 mutex_exit(&ipfb->ipfb_lock);
3543 return (NULL);
3544 }
3545
3546 break;
3547 }
3548 ipfp = &ipf->ipf_hash_next;
3549 continue;
3550 }
3551
3552
3553 /*
3554 * If we pruned the list, do we want to store this new
3555 * fragment?. We apply an optimization here based on the
3556 * fact that most fragments will be received in order.
3557 * So if the offset of this incoming fragment is zero,
3558 * it is the first fragment of a new packet. We will
3559 * keep it. Otherwise drop the fragment, as we have
3560 * probably pruned the packet already (since the
3561 * packet cannot be found).
3562 */
3563
3564 if (pruned && offset != 0) {
3565 mutex_exit(&ipfb->ipfb_lock);
3566 freemsg(mp);
3567 return (NULL);
3568 }
3569
3570 /* New guy. Allocate a frag message. */
3571 mp1 = allocb(sizeof (*ipf), BPRI_MED);
3572 if (!mp1) {
3573 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
3574 ip_drop_input("ipIfStatsInDiscards", mp, ill);
3575 freemsg(mp);
3576 partial_reass_done:
3577 mutex_exit(&ipfb->ipfb_lock);
3578 return (NULL);
3579 }
3580
3581 if (ipfb->ipfb_frag_pkts >= MAX_FRAG_PKTS(ipst)) {
3582 /*
3583 * Too many fragmented packets in this hash bucket.
3584 * Free the oldest.
3585 */
3586 ill_frag_free_pkts(ill, ipfb, ipfb->ipfb_ipf, 1);
3587 }
3588
3589 mp1->b_cont = mp;
3590
3591 /* Initialize the fragment header. */
3592 ipf = (ipf_t *)mp1->b_rptr;
3593 ipf->ipf_mp = mp1;
3594 ipf->ipf_ptphn = ipfp;
3595 ipfp[0] = ipf;
3596 ipf->ipf_hash_next = NULL;
3597 ipf->ipf_ident = ident;
3598 ipf->ipf_v6src = *v6src_ptr;
3599 ipf->ipf_v6dst = *v6dst_ptr;
3600 /* Record reassembly start time. */
3601 ipf->ipf_timestamp = gethrestime_sec();
3602 /* Record ipf generation and account for frag header */
3603 ipf->ipf_gen = ill->ill_ipf_gen++;
3604 ipf->ipf_count = MBLKSIZE(mp1);
3605 ipf->ipf_protocol = nexthdr;
3606 ipf->ipf_nf_hdr_len = 0;
3607 ipf->ipf_prev_nexthdr_offset = 0;
3608 ipf->ipf_last_frag_seen = B_FALSE;
3609 ipf->ipf_ecn = ecn_info;
3610 ipf->ipf_num_dups = 0;
3611 ipfb->ipfb_frag_pkts++;
3612 ipf->ipf_checksum = 0;
3613 ipf->ipf_checksum_flags = 0;
3614
3615 /* Store checksum value in fragment header */
3616 if (sum_flags != 0) {
3617 sum_val = (sum_val & 0xFFFF) + (sum_val >> 16);
3618 sum_val = (sum_val & 0xFFFF) + (sum_val >> 16);
3619 ipf->ipf_checksum = sum_val;
3620 ipf->ipf_checksum_flags = sum_flags;
3621 }
3622
3623 /*
3624 * We handle reassembly two ways. In the easy case,
3625 * where all the fragments show up in order, we do
3626 * minimal bookkeeping, and just clip new pieces on
3627 * the end. If we ever see a hole, then we go off
3628 * to ip_reassemble which has to mark the pieces and
3629 * keep track of the number of holes, etc. Obviously,
3630 * the point of having both mechanisms is so we can
3631 * handle the easy case as efficiently as possible.
3632 */
3633 if (offset == 0) {
3634 /* Easy case, in-order reassembly so far. */
3635 /* Update the byte count */
3636 ipf->ipf_count += msg_len;
3637 ipf->ipf_tail_mp = tail_mp;
3638 /*
3639 * Keep track of next expected offset in
3640 * ipf_end.
3641 */
3642 ipf->ipf_end = end;
3643 ipf->ipf_nf_hdr_len = hdr_length;
3644 ipf->ipf_prev_nexthdr_offset = prev_nexthdr_offset;
3645 } else {
3646 /* Hard case, hole at the beginning. */
3647 ipf->ipf_tail_mp = NULL;
3648 /*
3649 * ipf_end == 0 means that we have given up
3650 * on easy reassembly.
3651 */
3652 ipf->ipf_end = 0;
3653
3654 /* Forget checksum offload from now on */
3655 ipf->ipf_checksum_flags = 0;
3656
3657 /*
3658 * ipf_hole_cnt is set by ip_reassemble.
3659 * ipf_count is updated by ip_reassemble.
3660 * No need to check for return value here
3661 * as we don't expect reassembly to complete or
3662 * fail for the first fragment itself.
3663 */
3664 (void) ip_reassemble(mp, ipf, offset, more_frags, ill,
3665 msg_len);
3666 }
3667 /* Update per ipfb and ill byte counts */
3668 ipfb->ipfb_count += ipf->ipf_count;
3669 ASSERT(ipfb->ipfb_count > 0); /* Wraparound */
3670 atomic_add_32(&ill->ill_frag_count, ipf->ipf_count);
3671 /* If the frag timer wasn't already going, start it. */
3672 mutex_enter(&ill->ill_lock);
3673 ill_frag_timer_start(ill);
3674 mutex_exit(&ill->ill_lock);
3675 goto partial_reass_done;
3676 }
3677
3678 /*
3679 * If the packet's flag has changed (it could be coming up
3680 * from an interface different than the previous, therefore
3681 * possibly different checksum capability), then forget about
3682 * any stored checksum states. Otherwise add the value to
3683 * the existing one stored in the fragment header.
3684 */
3685 if (sum_flags != 0 && sum_flags == ipf->ipf_checksum_flags) {
3686 sum_val += ipf->ipf_checksum;
3687 sum_val = (sum_val & 0xFFFF) + (sum_val >> 16);
3688 sum_val = (sum_val & 0xFFFF) + (sum_val >> 16);
3689 ipf->ipf_checksum = sum_val;
3690 } else if (ipf->ipf_checksum_flags != 0) {
3691 /* Forget checksum offload from now on */
3692 ipf->ipf_checksum_flags = 0;
3693 }
3694
3695 /*
3696 * We have a new piece of a datagram which is already being
3697 * reassembled. Update the ECN info if all IP fragments
3698 * are ECN capable. If there is one which is not, clear
3699 * all the info. If there is at least one which has CE
3700 * code point, IP needs to report that up to transport.
3701 */
3702 if (ecn_info != IPH_ECN_NECT && ipf->ipf_ecn != IPH_ECN_NECT) {
3703 if (ecn_info == IPH_ECN_CE)
3704 ipf->ipf_ecn = IPH_ECN_CE;
3705 } else {
3706 ipf->ipf_ecn = IPH_ECN_NECT;
3707 }
3708
3709 if (offset && ipf->ipf_end == offset) {
3710 /* The new fragment fits at the end */
3711 ipf->ipf_tail_mp->b_cont = mp;
3712 /* Update the byte count */
3713 ipf->ipf_count += msg_len;
3714 /* Update per ipfb and ill byte counts */
3715 ipfb->ipfb_count += msg_len;
3716 ASSERT(ipfb->ipfb_count > 0); /* Wraparound */
3717 atomic_add_32(&ill->ill_frag_count, msg_len);
3718 if (more_frags) {
3719 /* More to come. */
3720 ipf->ipf_end = end;
3721 ipf->ipf_tail_mp = tail_mp;
3722 goto partial_reass_done;
3723 }
3724 } else {
3725 /*
3726 * Go do the hard cases.
3727 * Call ip_reassemble().
3728 */
3729 int ret;
3730
3731 if (offset == 0) {
3732 if (ipf->ipf_prev_nexthdr_offset == 0) {
3733 ipf->ipf_nf_hdr_len = hdr_length;
3734 ipf->ipf_prev_nexthdr_offset =
3735 prev_nexthdr_offset;
3736 }
3737 }
3738 /* Save current byte count */
3739 count = ipf->ipf_count;
3740 ret = ip_reassemble(mp, ipf, offset, more_frags, ill, msg_len);
3741
3742 /* Count of bytes added and subtracted (freeb()ed) */
3743 count = ipf->ipf_count - count;
3744 if (count) {
3745 /* Update per ipfb and ill byte counts */
3746 ipfb->ipfb_count += count;
3747 ASSERT(ipfb->ipfb_count > 0); /* Wraparound */
3748 atomic_add_32(&ill->ill_frag_count, count);
3749 }
3750 if (ret == IP_REASS_PARTIAL) {
3751 goto partial_reass_done;
3752 } else if (ret == IP_REASS_FAILED) {
3753 /* Reassembly failed. Free up all resources */
3754 ill_frag_free_pkts(ill, ipfb, ipf, 1);
3755 for (t_mp = mp; t_mp != NULL; t_mp = t_mp->b_cont) {
3756 IP_REASS_SET_START(t_mp, 0);
3757 IP_REASS_SET_END(t_mp, 0);
3758 }
3759 freemsg(mp);
3760 goto partial_reass_done;
3761 }
3762
3763 /* We will reach here iff 'ret' is IP_REASS_COMPLETE */
3764 }
3765 /*
3766 * We have completed reassembly. Unhook the frag header from
3767 * the reassembly list.
3768 *
3769 * Grab the unfragmentable header length next header value out
3770 * of the first fragment
3771 */
3772 ASSERT(ipf->ipf_nf_hdr_len != 0);
3773 hdr_length = ipf->ipf_nf_hdr_len;
3774
3775 /*
3776 * Before we free the frag header, record the ECN info
3777 * to report back to the transport.
3778 */
3779 ecn_info = ipf->ipf_ecn;
3780
3781 /*
3782 * Store the nextheader field in the header preceding the fragment
3783 * header
3784 */
3785 nexthdr = ipf->ipf_protocol;
3786 prev_nexthdr_offset = ipf->ipf_prev_nexthdr_offset;
3787 ipfp = ipf->ipf_ptphn;
3788
3789 /* We need to supply these to caller */
3790 if ((sum_flags = ipf->ipf_checksum_flags) != 0)
3791 sum_val = ipf->ipf_checksum;
3792 else
3793 sum_val = 0;
3794
3795 mp1 = ipf->ipf_mp;
3796 count = ipf->ipf_count;
3797 ipf = ipf->ipf_hash_next;
3798 if (ipf)
3799 ipf->ipf_ptphn = ipfp;
3800 ipfp[0] = ipf;
3801 atomic_add_32(&ill->ill_frag_count, -count);
3802 ASSERT(ipfb->ipfb_count >= count);
3803 ipfb->ipfb_count -= count;
3804 ipfb->ipfb_frag_pkts--;
3805 mutex_exit(&ipfb->ipfb_lock);
3806 /* Ditch the frag header. */
3807 mp = mp1->b_cont;
3808 freeb(mp1);
3809
3810 /*
3811 * Make sure the packet is good by doing some sanity
3812 * check. If bad we can silentely drop the packet.
3813 */
3814 reass_done:
3815 if (hdr_length < sizeof (ip6_frag_t)) {
3816 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInHdrErrors);
3817 ip_drop_input("ipIfStatsInHdrErrors", mp, ill);
3818 ip1dbg(("ip_input_fragment_v6: bad packet\n"));
3819 freemsg(mp);
3820 return (NULL);
3821 }
3822
3823 /*
3824 * Remove the fragment header from the initial header by
3825 * splitting the mblk into the non-fragmentable header and
3826 * everthing after the fragment extension header. This has the
3827 * side effect of putting all the headers that need destination
3828 * processing into the b_cont block-- on return this fact is
3829 * used in order to avoid having to look at the extensions
3830 * already processed.
3831 *
3832 * Note that this code assumes that the unfragmentable portion
3833 * of the header is in the first mblk and increments
3834 * the read pointer past it. If this assumption is broken
3835 * this code fails badly.
3836 */
3837 if (mp->b_rptr + hdr_length != mp->b_wptr) {
3838 mblk_t *nmp;
3839
3840 if (!(nmp = dupb(mp))) {
3841 ip1dbg(("ip_input_fragment_v6: dupb failed\n"));
3842 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
3843 ip_drop_input("ipIfStatsInDiscards", mp, ill);
3844 freemsg(mp);
3845 return (NULL);
3846 }
3847 nmp->b_cont = mp->b_cont;
3848 mp->b_cont = nmp;
3849 nmp->b_rptr += hdr_length;
3850 }
3851 mp->b_wptr = mp->b_rptr + hdr_length - sizeof (ip6_frag_t);
3852
3853 ip6h = (ip6_t *)mp->b_rptr;
3854 ((char *)ip6h)[prev_nexthdr_offset] = nexthdr;
3855
3856 /* Restore original IP length in header. */
3857 packet_size = msgdsize(mp);
3858 ip6h->ip6_plen = htons((uint16_t)(packet_size - IPV6_HDR_LEN));
3859 /* Record the ECN info. */
3860 ip6h->ip6_vcf &= htonl(0xFFCFFFFF);
3861 ip6h->ip6_vcf |= htonl(ecn_info << 20);
3862
3863 /* Update the receive attributes */
3864 ira->ira_pktlen = packet_size;
3865 ira->ira_ip_hdr_length = hdr_length - sizeof (ip6_frag_t);
3866 ira->ira_protocol = nexthdr;
3867
3868 /* Reassembly is successful; set checksum information in packet */
3869 DB_CKSUM16(mp) = (uint16_t)sum_val;
3870 DB_CKSUMFLAGS(mp) = sum_flags;
3871 DB_CKSUMSTART(mp) = ira->ira_ip_hdr_length;
3872
3873 return (mp);
3874 }
3875
3876 /*
3877 * Given an mblk and a ptr, find the destination address in an IPv6 routing
3878 * header.
3879 */
3880 static in6_addr_t
3881 pluck_out_dst(const mblk_t *mp, uint8_t *whereptr, in6_addr_t oldrv)
3882 {
3883 ip6_rthdr0_t *rt0;
3884 int segleft, numaddr;
3885 in6_addr_t *ap, rv = oldrv;
3886
3887 rt0 = (ip6_rthdr0_t *)whereptr;
3888 if (rt0->ip6r0_type != 0 && rt0->ip6r0_type != 2) {
3889 DTRACE_PROBE2(pluck_out_dst_unknown_type, mblk_t *, mp,
3890 uint8_t *, whereptr);
3891 return (rv);
3892 }
3893 segleft = rt0->ip6r0_segleft;
3894 numaddr = rt0->ip6r0_len / 2;
3895
3896 if ((rt0->ip6r0_len & 0x1) ||
3897 (mp != NULL && whereptr + (rt0->ip6r0_len + 1) * 8 > mp->b_wptr) ||
3898 (segleft > rt0->ip6r0_len / 2)) {
3899 /*
3900 * Corrupt packet. Either the routing header length is odd
3901 * (can't happen) or mismatched compared to the packet, or the
3902 * number of addresses is. Return what we can. This will
3903 * only be a problem on forwarded packets that get squeezed
3904 * through an outbound tunnel enforcing IPsec Tunnel Mode.
3905 */
3906 DTRACE_PROBE2(pluck_out_dst_badpkt, mblk_t *, mp, uint8_t *,
3907 whereptr);
3908 return (rv);
3909 }
3910
3911 if (segleft != 0) {
3912 ap = (in6_addr_t *)((char *)rt0 + sizeof (*rt0));
3913 rv = ap[numaddr - 1];
3914 }
3915
3916 return (rv);
3917 }
3918
3919 /*
3920 * Walk through the options to see if there is a routing header.
3921 * If present get the destination which is the last address of
3922 * the option.
3923 * mp needs to be provided in cases when the extension headers might span
3924 * b_cont; mp is never modified by this function.
3925 */
3926 in6_addr_t
3927 ip_get_dst_v6(ip6_t *ip6h, const mblk_t *mp, boolean_t *is_fragment)
3928 {
3929 const mblk_t *current_mp = mp;
3930 uint8_t nexthdr;
3931 uint8_t *whereptr;
3932 int ehdrlen;
3933 in6_addr_t rv;
3934
3935 whereptr = (uint8_t *)ip6h;
3936 ehdrlen = sizeof (ip6_t);
3937
3938 /* We assume at least the IPv6 base header is within one mblk. */
3939 ASSERT(mp == NULL ||
3940 (mp->b_rptr <= whereptr && mp->b_wptr >= whereptr + ehdrlen));
3941
3942 rv = ip6h->ip6_dst;
3943 nexthdr = ip6h->ip6_nxt;
3944 if (is_fragment != NULL)
3945 *is_fragment = B_FALSE;
3946
3947 /*
3948 * We also assume (thanks to ipsec_tun_outbound()'s pullup) that
3949 * no extension headers will be split across mblks.
3950 */
3951
3952 while (nexthdr == IPPROTO_HOPOPTS || nexthdr == IPPROTO_DSTOPTS ||
3953 nexthdr == IPPROTO_ROUTING) {
3954 if (nexthdr == IPPROTO_ROUTING)
3955 rv = pluck_out_dst(current_mp, whereptr, rv);
3956
3957 /*
3958 * All IPv6 extension headers have the next-header in byte
3959 * 0, and the (length - 8) in 8-byte-words.
3960 */
3961 while (current_mp != NULL &&
3962 whereptr + ehdrlen >= current_mp->b_wptr) {
3963 ehdrlen -= (current_mp->b_wptr - whereptr);
3964 current_mp = current_mp->b_cont;
3965 if (current_mp == NULL) {
3966 /* Bad packet. Return what we can. */
3967 DTRACE_PROBE3(ip_get_dst_v6_badpkt, mblk_t *,
3968 mp, mblk_t *, current_mp, ip6_t *, ip6h);
3969 goto done;
3970 }
3971 whereptr = current_mp->b_rptr;
3972 }
3973 whereptr += ehdrlen;
3974
3975 nexthdr = *whereptr;
3976 ASSERT(current_mp == NULL || whereptr + 1 < current_mp->b_wptr);
3977 ehdrlen = (*(whereptr + 1) + 1) * 8;
3978 }
3979
3980 done:
3981 if (nexthdr == IPPROTO_FRAGMENT && is_fragment != NULL)
3982 *is_fragment = B_TRUE;
3983 return (rv);
3984 }
3985
3986 /*
3987 * ip_source_routed_v6:
3988 * This function is called by redirect code (called from ip_input_v6) to
3989 * know whether this packet is source routed through this node i.e
3990 * whether this node (router) is part of the journey. This
3991 * function is called under two cases :
3992 *
3993 * case 1 : Routing header was processed by this node and
3994 * ip_process_rthdr replaced ip6_dst with the next hop
3995 * and we are forwarding the packet to the next hop.
3996 *
3997 * case 2 : Routing header was not processed by this node and we
3998 * are just forwarding the packet.
3999 *
4000 * For case (1) we don't want to send redirects. For case(2) we
4001 * want to send redirects.
4002 */
4003 static boolean_t
4004 ip_source_routed_v6(ip6_t *ip6h, mblk_t *mp, ip_stack_t *ipst)
4005 {
4006 uint8_t nexthdr;
4007 in6_addr_t *addrptr;
4008 ip6_rthdr0_t *rthdr;
4009 uint8_t numaddr;
4010 ip6_hbh_t *hbhhdr;
4011 uint_t ehdrlen;
4012 uint8_t *byteptr;
4013
4014 ip2dbg(("ip_source_routed_v6\n"));
4015 nexthdr = ip6h->ip6_nxt;
4016 ehdrlen = IPV6_HDR_LEN;
4017
4018 /* if a routing hdr is preceeded by HOPOPT or DSTOPT */
4019 while (nexthdr == IPPROTO_HOPOPTS ||
4020 nexthdr == IPPROTO_DSTOPTS) {
4021 byteptr = (uint8_t *)ip6h + ehdrlen;
4022 /*
4023 * Check if we have already processed
4024 * packets or we are just a forwarding
4025 * router which only pulled up msgs up
4026 * to IPV6HDR and one HBH ext header
4027 */
4028 if (byteptr + MIN_EHDR_LEN > mp->b_wptr) {
4029 ip2dbg(("ip_source_routed_v6: Extension"
4030 " headers not processed\n"));
4031 return (B_FALSE);
4032 }
4033 hbhhdr = (ip6_hbh_t *)byteptr;
4034 nexthdr = hbhhdr->ip6h_nxt;
4035 ehdrlen = ehdrlen + 8 * (hbhhdr->ip6h_len + 1);
4036 }
4037 switch (nexthdr) {
4038 case IPPROTO_ROUTING:
4039 byteptr = (uint8_t *)ip6h + ehdrlen;
4040 /*
4041 * If for some reason, we haven't pulled up
4042 * the routing hdr data mblk, then we must
4043 * not have processed it at all. So for sure
4044 * we are not part of the source routed journey.
4045 */
4046 if (byteptr + MIN_EHDR_LEN > mp->b_wptr) {
4047 ip2dbg(("ip_source_routed_v6: Routing"
4048 " header not processed\n"));
4049 return (B_FALSE);
4050 }
4051 rthdr = (ip6_rthdr0_t *)byteptr;
4052 /*
4053 * Either we are an intermediate router or the
4054 * last hop before destination and we have
4055 * already processed the routing header.
4056 * If segment_left is greater than or equal to zero,
4057 * then we must be the (numaddr - segleft) entry
4058 * of the routing header. Although ip6r0_segleft
4059 * is a unit8_t variable, we still check for zero
4060 * or greater value, if in case the data type
4061 * is changed someday in future.
4062 */
4063 if (rthdr->ip6r0_segleft > 0 ||
4064 rthdr->ip6r0_segleft == 0) {
4065 numaddr = rthdr->ip6r0_len / 2;
4066 addrptr = (in6_addr_t *)((char *)rthdr +
4067 sizeof (*rthdr));
4068 addrptr += (numaddr - (rthdr->ip6r0_segleft + 1));
4069 if (addrptr != NULL) {
4070 if (ip_type_v6(addrptr, ipst) == IRE_LOCAL)
4071 return (B_TRUE);
4072 ip1dbg(("ip_source_routed_v6: Not local\n"));
4073 }
4074 }
4075 /* FALLTHRU */
4076 default:
4077 ip2dbg(("ip_source_routed_v6: Not source routed here\n"));
4078 return (B_FALSE);
4079 }
4080 }
4081
4082 /*
4083 * IPv6 fragmentation. Essentially the same as IPv4 fragmentation.
4084 * We have not optimized this in terms of number of mblks
4085 * allocated. For instance, for each fragment sent we always allocate a
4086 * mblk to hold the IPv6 header and fragment header.
4087 *
4088 * Assumes that all the extension headers are contained in the first mblk
4089 * and that the fragment header has has already been added by calling
4090 * ip_fraghdr_add_v6.
4091 */
4092 int
4093 ip_fragment_v6(mblk_t *mp, nce_t *nce, iaflags_t ixaflags, uint_t pkt_len,
4094 uint32_t max_frag, uint32_t xmit_hint, zoneid_t szone, zoneid_t nolzid,
4095 pfirepostfrag_t postfragfn, uintptr_t *ixa_cookie)
4096 {
4097 ip6_t *ip6h = (ip6_t *)mp->b_rptr;
4098 ip6_t *fip6h;
4099 mblk_t *hmp;
4100 mblk_t *hmp0;
4101 mblk_t *dmp;
4102 ip6_frag_t *fraghdr;
4103 size_t unfragmentable_len;
4104 size_t mlen;
4105 size_t max_chunk;
4106 uint16_t off_flags;
4107 uint16_t offset = 0;
4108 ill_t *ill = nce->nce_ill;
4109 uint8_t nexthdr;
4110 uint8_t *ptr;
4111 ip_stack_t *ipst = ill->ill_ipst;
4112 uint_t priority = mp->b_band;
4113 int error = 0;
4114
4115 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragReqds);
4116 if (max_frag == 0) {
4117 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragFails);
4118 ip_drop_output("FragFails: zero max_frag", mp, ill);
4119 freemsg(mp);
4120 return (EINVAL);
4121 }
4122
4123 /*
4124 * Caller should have added fraghdr_t to pkt_len, and also
4125 * updated ip6_plen.
4126 */
4127 ASSERT(ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN == pkt_len);
4128 ASSERT(msgdsize(mp) == pkt_len);
4129
4130 /*
4131 * Determine the length of the unfragmentable portion of this
4132 * datagram. This consists of the IPv6 header, a potential
4133 * hop-by-hop options header, a potential pre-routing-header
4134 * destination options header, and a potential routing header.
4135 */
4136 nexthdr = ip6h->ip6_nxt;
4137 ptr = (uint8_t *)&ip6h[1];
4138
4139 if (nexthdr == IPPROTO_HOPOPTS) {
4140 ip6_hbh_t *hbh_hdr;
4141 uint_t hdr_len;
4142
4143 hbh_hdr = (ip6_hbh_t *)ptr;
4144 hdr_len = 8 * (hbh_hdr->ip6h_len + 1);
4145 nexthdr = hbh_hdr->ip6h_nxt;
4146 ptr += hdr_len;
4147 }
4148 if (nexthdr == IPPROTO_DSTOPTS) {
4149 ip6_dest_t *dest_hdr;
4150 uint_t hdr_len;
4151
4152 dest_hdr = (ip6_dest_t *)ptr;
4153 if (dest_hdr->ip6d_nxt == IPPROTO_ROUTING) {
4154 hdr_len = 8 * (dest_hdr->ip6d_len + 1);
4155 nexthdr = dest_hdr->ip6d_nxt;
4156 ptr += hdr_len;
4157 }
4158 }
4159 if (nexthdr == IPPROTO_ROUTING) {
4160 ip6_rthdr_t *rthdr;
4161 uint_t hdr_len;
4162
4163 rthdr = (ip6_rthdr_t *)ptr;
4164 nexthdr = rthdr->ip6r_nxt;
4165 hdr_len = 8 * (rthdr->ip6r_len + 1);
4166 ptr += hdr_len;
4167 }
4168 if (nexthdr != IPPROTO_FRAGMENT) {
4169 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragFails);
4170 ip_drop_output("FragFails: bad nexthdr", mp, ill);
4171 freemsg(mp);
4172 return (EINVAL);
4173 }
4174 unfragmentable_len = (uint_t)(ptr - (uint8_t *)ip6h);
4175 unfragmentable_len += sizeof (ip6_frag_t);
4176
4177 max_chunk = (max_frag - unfragmentable_len) & ~7;
4178
4179 /*
4180 * Allocate an mblk with enough room for the link-layer
4181 * header and the unfragmentable part of the datagram, which includes
4182 * the fragment header. This (or a copy) will be used as the
4183 * first mblk for each fragment we send.
4184 */
4185 hmp = allocb_tmpl(unfragmentable_len + ipst->ips_ip_wroff_extra, mp);
4186 if (hmp == NULL) {
4187 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragFails);
4188 ip_drop_output("FragFails: no hmp", mp, ill);
4189 freemsg(mp);
4190 return (ENOBUFS);
4191 }
4192 hmp->b_rptr += ipst->ips_ip_wroff_extra;
4193 hmp->b_wptr = hmp->b_rptr + unfragmentable_len;
4194
4195 fip6h = (ip6_t *)hmp->b_rptr;
4196 bcopy(ip6h, fip6h, unfragmentable_len);
4197
4198 /*
4199 * pkt_len is set to the total length of the fragmentable data in this
4200 * datagram. For each fragment sent, we will decrement pkt_len
4201 * by the amount of fragmentable data sent in that fragment
4202 * until len reaches zero.
4203 */
4204 pkt_len -= unfragmentable_len;
4205
4206 /*
4207 * Move read ptr past unfragmentable portion, we don't want this part
4208 * of the data in our fragments.
4209 */
4210 mp->b_rptr += unfragmentable_len;
4211 if (mp->b_rptr == mp->b_wptr) {
4212 mblk_t *mp1 = mp->b_cont;
4213 freeb(mp);
4214 mp = mp1;
4215 }
4216
4217 while (pkt_len != 0) {
4218 mlen = MIN(pkt_len, max_chunk);
4219 pkt_len -= mlen;
4220 if (pkt_len != 0) {
4221 /* Not last */
4222 hmp0 = copyb(hmp);
4223 if (hmp0 == NULL) {
4224 BUMP_MIB(ill->ill_ip_mib,
4225 ipIfStatsOutFragFails);
4226 ip_drop_output("FragFails: copyb failed",
4227 mp, ill);
4228 freeb(hmp);
4229 freemsg(mp);
4230 ip1dbg(("ip_fragment_v6: copyb failed\n"));
4231 return (ENOBUFS);
4232 }
4233 off_flags = IP6F_MORE_FRAG;
4234 } else {
4235 /* Last fragment */
4236 hmp0 = hmp;
4237 hmp = NULL;
4238 off_flags = 0;
4239 }
4240 fip6h = (ip6_t *)(hmp0->b_rptr);
4241 fraghdr = (ip6_frag_t *)(hmp0->b_rptr + unfragmentable_len -
4242 sizeof (ip6_frag_t));
4243
4244 fip6h->ip6_plen = htons((uint16_t)(mlen +
4245 unfragmentable_len - IPV6_HDR_LEN));
4246 /*
4247 * Note: Optimization alert.
4248 * In IPv6 (and IPv4) protocol header, Fragment Offset
4249 * ("offset") is 13 bits wide and in 8-octet units.
4250 * In IPv6 protocol header (unlike IPv4) in a 16 bit field,
4251 * it occupies the most significant 13 bits.
4252 * (least significant 13 bits in IPv4).
4253 * We do not do any shifts here. Not shifting is same effect
4254 * as taking offset value in octet units, dividing by 8 and
4255 * then shifting 3 bits left to line it up in place in proper
4256 * place protocol header.
4257 */
4258 fraghdr->ip6f_offlg = htons(offset) | off_flags;
4259
4260 if (!(dmp = ip_carve_mp(&mp, mlen))) {
4261 /* mp has already been freed by ip_carve_mp() */
4262 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragFails);
4263 ip_drop_output("FragFails: could not carve mp",
4264 hmp0, ill);
4265 if (hmp != NULL)
4266 freeb(hmp);
4267 freeb(hmp0);
4268 ip1dbg(("ip_carve_mp: failed\n"));
4269 return (ENOBUFS);
4270 }
4271 hmp0->b_cont = dmp;
4272 /* Get the priority marking, if any */
4273 hmp0->b_band = priority;
4274
4275 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragCreates);
4276
4277 error = postfragfn(hmp0, nce, ixaflags,
4278 mlen + unfragmentable_len, xmit_hint, szone, nolzid,
4279 ixa_cookie);
4280 if (error != 0 && error != EWOULDBLOCK && hmp != NULL) {
4281 /* No point in sending the other fragments */
4282 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragFails);
4283 ip_drop_output("FragFails: postfragfn failed",
4284 hmp, ill);
4285 freeb(hmp);
4286 freemsg(mp);
4287 return (error);
4288 }
4289 /* No need to redo state machine in loop */
4290 ixaflags &= ~IXAF_REACH_CONF;
4291
4292 offset += mlen;
4293 }
4294 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragOKs);
4295 return (error);
4296 }
4297
4298 /*
4299 * Add a fragment header to an IPv6 packet.
4300 * Assumes that all the extension headers are contained in the first mblk.
4301 *
4302 * The fragment header is inserted after an hop-by-hop options header
4303 * and after [an optional destinations header followed by] a routing header.
4304 */
4305 mblk_t *
4306 ip_fraghdr_add_v6(mblk_t *mp, uint32_t ident, ip_xmit_attr_t *ixa)
4307 {
4308 ip6_t *ip6h = (ip6_t *)mp->b_rptr;
4309 ip6_t *fip6h;
4310 mblk_t *hmp;
4311 ip6_frag_t *fraghdr;
4312 size_t unfragmentable_len;
4313 uint8_t nexthdr;
4314 uint_t prev_nexthdr_offset;
4315 uint8_t *ptr;
4316 uint_t priority = mp->b_band;
4317 ip_stack_t *ipst = ixa->ixa_ipst;
4318
4319 /*
4320 * Determine the length of the unfragmentable portion of this
4321 * datagram. This consists of the IPv6 header, a potential
4322 * hop-by-hop options header, a potential pre-routing-header
4323 * destination options header, and a potential routing header.
4324 */
4325 nexthdr = ip6h->ip6_nxt;
4326 prev_nexthdr_offset = (uint8_t *)&ip6h->ip6_nxt - (uint8_t *)ip6h;
4327 ptr = (uint8_t *)&ip6h[1];
4328
4329 if (nexthdr == IPPROTO_HOPOPTS) {
4330 ip6_hbh_t *hbh_hdr;
4331 uint_t hdr_len;
4332
4333 hbh_hdr = (ip6_hbh_t *)ptr;
4334 hdr_len = 8 * (hbh_hdr->ip6h_len + 1);
4335 nexthdr = hbh_hdr->ip6h_nxt;
4336 prev_nexthdr_offset = (uint8_t *)&hbh_hdr->ip6h_nxt
4337 - (uint8_t *)ip6h;
4338 ptr += hdr_len;
4339 }
4340 if (nexthdr == IPPROTO_DSTOPTS) {
4341 ip6_dest_t *dest_hdr;
4342 uint_t hdr_len;
4343
4344 dest_hdr = (ip6_dest_t *)ptr;
4345 if (dest_hdr->ip6d_nxt == IPPROTO_ROUTING) {
4346 hdr_len = 8 * (dest_hdr->ip6d_len + 1);
4347 nexthdr = dest_hdr->ip6d_nxt;
4348 prev_nexthdr_offset = (uint8_t *)&dest_hdr->ip6d_nxt
4349 - (uint8_t *)ip6h;
4350 ptr += hdr_len;
4351 }
4352 }
4353 if (nexthdr == IPPROTO_ROUTING) {
4354 ip6_rthdr_t *rthdr;
4355 uint_t hdr_len;
4356
4357 rthdr = (ip6_rthdr_t *)ptr;
4358 nexthdr = rthdr->ip6r_nxt;
4359 prev_nexthdr_offset = (uint8_t *)&rthdr->ip6r_nxt
4360 - (uint8_t *)ip6h;
4361 hdr_len = 8 * (rthdr->ip6r_len + 1);
4362 ptr += hdr_len;
4363 }
4364 unfragmentable_len = (uint_t)(ptr - (uint8_t *)ip6h);
4365
4366 /*
4367 * Allocate an mblk with enough room for the link-layer
4368 * header, the unfragmentable part of the datagram, and the
4369 * fragment header.
4370 */
4371 hmp = allocb_tmpl(unfragmentable_len + sizeof (ip6_frag_t) +
4372 ipst->ips_ip_wroff_extra, mp);
4373 if (hmp == NULL) {
4374 ill_t *ill = ixa->ixa_nce->nce_ill;
4375
4376 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards);
4377 ip_drop_output("ipIfStatsOutDiscards: allocb failure", mp, ill);
4378 freemsg(mp);
4379 return (NULL);
4380 }
4381 hmp->b_rptr += ipst->ips_ip_wroff_extra;
4382 hmp->b_wptr = hmp->b_rptr + unfragmentable_len + sizeof (ip6_frag_t);
4383
4384 fip6h = (ip6_t *)hmp->b_rptr;
4385 fraghdr = (ip6_frag_t *)(hmp->b_rptr + unfragmentable_len);
4386
4387 bcopy(ip6h, fip6h, unfragmentable_len);
4388 fip6h->ip6_plen = htons(ntohs(fip6h->ip6_plen) + sizeof (ip6_frag_t));
4389 hmp->b_rptr[prev_nexthdr_offset] = IPPROTO_FRAGMENT;
4390
4391 fraghdr->ip6f_nxt = nexthdr;
4392 fraghdr->ip6f_reserved = 0;
4393 fraghdr->ip6f_offlg = 0;
4394 fraghdr->ip6f_ident = htonl(ident);
4395
4396 /* Get the priority marking, if any */
4397 hmp->b_band = priority;
4398
4399 /*
4400 * Move read ptr past unfragmentable portion, we don't want this part
4401 * of the data in our fragments.
4402 */
4403 mp->b_rptr += unfragmentable_len;
4404 hmp->b_cont = mp;
4405 return (hmp);
4406 }
4407
4408 /*
4409 * Determine if the ill and multicast aspects of that packets
4410 * "matches" the conn.
4411 */
4412 boolean_t
4413 conn_wantpacket_v6(conn_t *connp, ip_recv_attr_t *ira, ip6_t *ip6h)
4414 {
4415 ill_t *ill = ira->ira_rill;
4416 zoneid_t zoneid = ira->ira_zoneid;
4417 uint_t in_ifindex;
4418 in6_addr_t *v6dst_ptr = &ip6h->ip6_dst;
4419 in6_addr_t *v6src_ptr = &ip6h->ip6_src;
4420
4421 /*
4422 * conn_incoming_ifindex is set by IPV6_BOUND_IF and as link-local
4423 * scopeid. This is used to limit
4424 * unicast and multicast reception to conn_incoming_ifindex.
4425 * conn_wantpacket_v6 is called both for unicast and
4426 * multicast packets.
4427 */
4428 in_ifindex = connp->conn_incoming_ifindex;
4429
4430 /* mpathd can bind to the under IPMP interface, which we allow */
4431 if (in_ifindex != 0 && in_ifindex != ill->ill_phyint->phyint_ifindex) {
4432 if (!IS_UNDER_IPMP(ill))
4433 return (B_FALSE);
4434
4435 if (in_ifindex != ipmp_ill_get_ipmp_ifindex(ill))
4436 return (B_FALSE);
4437 }
4438
4439 if (!IPCL_ZONE_MATCH(connp, zoneid))
4440 return (B_FALSE);
4441
4442 if (!(ira->ira_flags & IRAF_MULTICAST))
4443 return (B_TRUE);
4444
4445 if (connp->conn_multi_router)
4446 return (B_TRUE);
4447
4448 if (ira->ira_protocol == IPPROTO_RSVP)
4449 return (B_TRUE);
4450
4451 return (conn_hasmembers_ill_withsrc_v6(connp, v6dst_ptr, v6src_ptr,
4452 ira->ira_ill));
4453 }
4454
4455 /*
4456 * pr_addr_dbg function provides the needed buffer space to call
4457 * inet_ntop() function's 3rd argument. This function should be
4458 * used by any kernel routine which wants to save INET6_ADDRSTRLEN
4459 * stack buffer space in it's own stack frame. This function uses
4460 * a buffer from it's own stack and prints the information.
4461 * Example: pr_addr_dbg("func: no route for %s\n ", AF_INET, addr)
4462 *
4463 * Note: This function can call inet_ntop() once.
4464 */
4465 void
4466 pr_addr_dbg(char *fmt1, int af, const void *addr)
4467 {
4468 char buf[INET6_ADDRSTRLEN];
4469
4470 if (fmt1 == NULL) {
4471 ip0dbg(("pr_addr_dbg: Wrong arguments\n"));
4472 return;
4473 }
4474
4475 /*
4476 * This does not compare debug level and just prints
4477 * out. Thus it is the responsibility of the caller
4478 * to check the appropriate debug-level before calling
4479 * this function.
4480 */
4481 if (ip_debug > 0) {
4482 printf(fmt1, inet_ntop(af, addr, buf, sizeof (buf)));
4483 }
4484
4485
4486 }
4487
4488
4489 /*
4490 * Return the length in bytes of the IPv6 headers (base header
4491 * extension headers) that will be needed based on the
4492 * ip_pkt_t structure passed by the caller.
4493 *
4494 * The returned length does not include the length of the upper level
4495 * protocol (ULP) header.
4496 */
4497 int
4498 ip_total_hdrs_len_v6(const ip_pkt_t *ipp)
4499 {
4500 int len;
4501
4502 len = IPV6_HDR_LEN;
4503
4504 /*
4505 * If there's a security label here, then we ignore any hop-by-hop
4506 * options the user may try to set.
4507 */
4508 if (ipp->ipp_fields & IPPF_LABEL_V6) {
4509 uint_t hopoptslen;
4510 /*
4511 * Note that ipp_label_len_v6 is just the option - not
4512 * the hopopts extension header. It also needs to be padded
4513 * to a multiple of 8 bytes.
4514 */
4515 ASSERT(ipp->ipp_label_len_v6 != 0);
4516 hopoptslen = ipp->ipp_label_len_v6 + sizeof (ip6_hbh_t);
4517 hopoptslen = (hopoptslen + 7)/8 * 8;
4518 len += hopoptslen;
4519 } else if (ipp->ipp_fields & IPPF_HOPOPTS) {
4520 ASSERT(ipp->ipp_hopoptslen != 0);
4521 len += ipp->ipp_hopoptslen;
4522 }
4523
4524 /*
4525 * En-route destination options
4526 * Only do them if there's a routing header as well
4527 */
4528 if ((ipp->ipp_fields & (IPPF_RTHDRDSTOPTS|IPPF_RTHDR)) ==
4529 (IPPF_RTHDRDSTOPTS|IPPF_RTHDR)) {
4530 ASSERT(ipp->ipp_rthdrdstoptslen != 0);
4531 len += ipp->ipp_rthdrdstoptslen;
4532 }
4533 if (ipp->ipp_fields & IPPF_RTHDR) {
4534 ASSERT(ipp->ipp_rthdrlen != 0);
4535 len += ipp->ipp_rthdrlen;
4536 }
4537 if (ipp->ipp_fields & IPPF_DSTOPTS) {
4538 ASSERT(ipp->ipp_dstoptslen != 0);
4539 len += ipp->ipp_dstoptslen;
4540 }
4541 return (len);
4542 }
4543
4544 /*
4545 * All-purpose routine to build a header chain of an IPv6 header
4546 * followed by any required extension headers and a proto header.
4547 *
4548 * The caller has to set the source and destination address as well as
4549 * ip6_plen. The caller has to massage any routing header and compensate
4550 * for the ULP pseudo-header checksum due to the source route.
4551 *
4552 * The extension headers will all be fully filled in.
4553 */
4554 void
4555 ip_build_hdrs_v6(uchar_t *buf, uint_t buf_len, const ip_pkt_t *ipp,
4556 uint8_t protocol, uint32_t flowinfo)
4557 {
4558 uint8_t *nxthdr_ptr;
4559 uint8_t *cp;
4560 ip6_t *ip6h = (ip6_t *)buf;
4561
4562 /* Initialize IPv6 header */
4563 ip6h->ip6_vcf =
4564 (IPV6_DEFAULT_VERS_AND_FLOW & IPV6_VERS_AND_FLOW_MASK) |
4565 (flowinfo & ~IPV6_VERS_AND_FLOW_MASK);
4566
4567 if (ipp->ipp_fields & IPPF_TCLASS) {
4568 /* Overrides the class part of flowinfo */
4569 ip6h->ip6_vcf = IPV6_TCLASS_FLOW(ip6h->ip6_vcf,
4570 ipp->ipp_tclass);
4571 }
4572
4573 if (ipp->ipp_fields & IPPF_HOPLIMIT)
4574 ip6h->ip6_hops = ipp->ipp_hoplimit;
4575 else
4576 ip6h->ip6_hops = ipp->ipp_unicast_hops;
4577
4578 if ((ipp->ipp_fields & IPPF_ADDR) &&
4579 !IN6_IS_ADDR_V4MAPPED(&ipp->ipp_addr))
4580 ip6h->ip6_src = ipp->ipp_addr;
4581
4582 nxthdr_ptr = (uint8_t *)&ip6h->ip6_nxt;
4583 cp = (uint8_t *)&ip6h[1];
4584 /*
4585 * Here's where we have to start stringing together
4586 * any extension headers in the right order:
4587 * Hop-by-hop, destination, routing, and final destination opts.
4588 */
4589 /*
4590 * If there's a security label here, then we ignore any hop-by-hop
4591 * options the user may try to set.
4592 */
4593 if (ipp->ipp_fields & IPPF_LABEL_V6) {
4594 /*
4595 * Hop-by-hop options with the label.
4596 * Note that ipp_label_v6 is just the option - not
4597 * the hopopts extension header. It also needs to be padded
4598 * to a multiple of 8 bytes.
4599 */
4600 ip6_hbh_t *hbh = (ip6_hbh_t *)cp;
4601 uint_t hopoptslen;
4602 uint_t padlen;
4603
4604 padlen = ipp->ipp_label_len_v6 + sizeof (ip6_hbh_t);
4605 hopoptslen = (padlen + 7)/8 * 8;
4606 padlen = hopoptslen - padlen;
4607
4608 *nxthdr_ptr = IPPROTO_HOPOPTS;
4609 nxthdr_ptr = &hbh->ip6h_nxt;
4610 hbh->ip6h_len = hopoptslen/8 - 1;
4611 cp += sizeof (ip6_hbh_t);
4612 bcopy(ipp->ipp_label_v6, cp, ipp->ipp_label_len_v6);
4613 cp += ipp->ipp_label_len_v6;
4614
4615 ASSERT(padlen <= 7);
4616 switch (padlen) {
4617 case 0:
4618 break;
4619 case 1:
4620 cp[0] = IP6OPT_PAD1;
4621 break;
4622 default:
4623 cp[0] = IP6OPT_PADN;
4624 cp[1] = padlen - 2;
4625 bzero(&cp[2], padlen - 2);
4626 break;
4627 }
4628 cp += padlen;
4629 } else if (ipp->ipp_fields & IPPF_HOPOPTS) {
4630 /* Hop-by-hop options */
4631 ip6_hbh_t *hbh = (ip6_hbh_t *)cp;
4632
4633 *nxthdr_ptr = IPPROTO_HOPOPTS;
4634 nxthdr_ptr = &hbh->ip6h_nxt;
4635
4636 bcopy(ipp->ipp_hopopts, cp, ipp->ipp_hopoptslen);
4637 cp += ipp->ipp_hopoptslen;
4638 }
4639 /*
4640 * En-route destination options
4641 * Only do them if there's a routing header as well
4642 */
4643 if ((ipp->ipp_fields & (IPPF_RTHDRDSTOPTS|IPPF_RTHDR)) ==
4644 (IPPF_RTHDRDSTOPTS|IPPF_RTHDR)) {
4645 ip6_dest_t *dst = (ip6_dest_t *)cp;
4646
4647 *nxthdr_ptr = IPPROTO_DSTOPTS;
4648 nxthdr_ptr = &dst->ip6d_nxt;
4649
4650 bcopy(ipp->ipp_rthdrdstopts, cp, ipp->ipp_rthdrdstoptslen);
4651 cp += ipp->ipp_rthdrdstoptslen;
4652 }
4653 /*
4654 * Routing header next
4655 */
4656 if (ipp->ipp_fields & IPPF_RTHDR) {
4657 ip6_rthdr_t *rt = (ip6_rthdr_t *)cp;
4658
4659 *nxthdr_ptr = IPPROTO_ROUTING;
4660 nxthdr_ptr = &rt->ip6r_nxt;
4661
4662 bcopy(ipp->ipp_rthdr, cp, ipp->ipp_rthdrlen);
4663 cp += ipp->ipp_rthdrlen;
4664 }
4665 /*
4666 * Do ultimate destination options
4667 */
4668 if (ipp->ipp_fields & IPPF_DSTOPTS) {
4669 ip6_dest_t *dest = (ip6_dest_t *)cp;
4670
4671 *nxthdr_ptr = IPPROTO_DSTOPTS;
4672 nxthdr_ptr = &dest->ip6d_nxt;
4673
4674 bcopy(ipp->ipp_dstopts, cp, ipp->ipp_dstoptslen);
4675 cp += ipp->ipp_dstoptslen;
4676 }
4677 /*
4678 * Now set the last header pointer to the proto passed in
4679 */
4680 *nxthdr_ptr = protocol;
4681 ASSERT((int)(cp - buf) == buf_len);
4682 }
4683
4684 /*
4685 * Return a pointer to the routing header extension header
4686 * in the IPv6 header(s) chain passed in.
4687 * If none found, return NULL
4688 * Assumes that all extension headers are in same mblk as the v6 header
4689 */
4690 ip6_rthdr_t *
4691 ip_find_rthdr_v6(ip6_t *ip6h, uint8_t *endptr)
4692 {
4693 ip6_dest_t *desthdr;
4694 ip6_frag_t *fraghdr;
4695 uint_t hdrlen;
4696 uint8_t nexthdr;
4697 uint8_t *ptr = (uint8_t *)&ip6h[1];
4698
4699 if (ip6h->ip6_nxt == IPPROTO_ROUTING)
4700 return ((ip6_rthdr_t *)ptr);
4701
4702 /*
4703 * The routing header will precede all extension headers
4704 * other than the hop-by-hop and destination options
4705 * extension headers, so if we see anything other than those,
4706 * we're done and didn't find it.
4707 * We could see a destination options header alone but no
4708 * routing header, in which case we'll return NULL as soon as
4709 * we see anything after that.
4710 * Hop-by-hop and destination option headers are identical,
4711 * so we can use either one we want as a template.
4712 */
4713 nexthdr = ip6h->ip6_nxt;
4714 while (ptr < endptr) {
4715 /* Is there enough left for len + nexthdr? */
4716 if (ptr + MIN_EHDR_LEN > endptr)
4717 return (NULL);
4718
4719 switch (nexthdr) {
4720 case IPPROTO_HOPOPTS:
4721 case IPPROTO_DSTOPTS:
4722 /* Assumes the headers are identical for hbh and dst */
4723 desthdr = (ip6_dest_t *)ptr;
4724 hdrlen = 8 * (desthdr->ip6d_len + 1);
4725 nexthdr = desthdr->ip6d_nxt;
4726 break;
4727
4728 case IPPROTO_ROUTING:
4729 return ((ip6_rthdr_t *)ptr);
4730
4731 case IPPROTO_FRAGMENT:
4732 fraghdr = (ip6_frag_t *)ptr;
4733 hdrlen = sizeof (ip6_frag_t);
4734 nexthdr = fraghdr->ip6f_nxt;
4735 break;
4736
4737 default:
4738 return (NULL);
4739 }
4740 ptr += hdrlen;
4741 }
4742 return (NULL);
4743 }
4744
4745 /*
4746 * Called for source-routed packets originating on this node.
4747 * Manipulates the original routing header by moving every entry up
4748 * one slot, placing the first entry in the v6 header's v6_dst field,
4749 * and placing the ultimate destination in the routing header's last
4750 * slot.
4751 *
4752 * Returns the checksum diference between the ultimate destination
4753 * (last hop in the routing header when the packet is sent) and
4754 * the first hop (ip6_dst when the packet is sent)
4755 */
4756 /* ARGSUSED2 */
4757 uint32_t
4758 ip_massage_options_v6(ip6_t *ip6h, ip6_rthdr_t *rth, netstack_t *ns)
4759 {
4760 uint_t numaddr;
4761 uint_t i;
4762 in6_addr_t *addrptr;
4763 in6_addr_t tmp;
4764 ip6_rthdr0_t *rthdr = (ip6_rthdr0_t *)rth;
4765 uint32_t cksm;
4766 uint32_t addrsum = 0;
4767 uint16_t *ptr;
4768
4769 /*
4770 * Perform any processing needed for source routing.
4771 * We know that all extension headers will be in the same mblk
4772 * as the IPv6 header.
4773 */
4774
4775 /*
4776 * If no segments left in header, or the header length field is zero,
4777 * don't move hop addresses around;
4778 * Checksum difference is zero.
4779 */
4780 if ((rthdr->ip6r0_segleft == 0) || (rthdr->ip6r0_len == 0))
4781 return (0);
4782
4783 ptr = (uint16_t *)&ip6h->ip6_dst;
4784 cksm = 0;
4785 for (i = 0; i < (sizeof (in6_addr_t) / sizeof (uint16_t)); i++) {
4786 cksm += ptr[i];
4787 }
4788 cksm = (cksm & 0xFFFF) + (cksm >> 16);
4789
4790 /*
4791 * Here's where the fun begins - we have to
4792 * move all addresses up one spot, take the
4793 * first hop and make it our first ip6_dst,
4794 * and place the ultimate destination in the
4795 * newly-opened last slot.
4796 */
4797 addrptr = (in6_addr_t *)((char *)rthdr + sizeof (*rthdr));
4798 numaddr = rthdr->ip6r0_len / 2;
4799 tmp = *addrptr;
4800 for (i = 0; i < (numaddr - 1); addrptr++, i++) {
4801 *addrptr = addrptr[1];
4802 }
4803 *addrptr = ip6h->ip6_dst;
4804 ip6h->ip6_dst = tmp;
4805
4806 /*
4807 * From the checksummed ultimate destination subtract the checksummed
4808 * current ip6_dst (the first hop address). Return that number.
4809 * (In the v4 case, the second part of this is done in each routine
4810 * that calls ip_massage_options(). We do it all in this one place
4811 * for v6).
4812 */
4813 ptr = (uint16_t *)&ip6h->ip6_dst;
4814 for (i = 0; i < (sizeof (in6_addr_t) / sizeof (uint16_t)); i++) {
4815 addrsum += ptr[i];
4816 }
4817 cksm -= ((addrsum >> 16) + (addrsum & 0xFFFF));
4818 if ((int)cksm < 0)
4819 cksm--;
4820 cksm = (cksm & 0xFFFF) + (cksm >> 16);
4821
4822 return (cksm);
4823 }
4824
4825 void
4826 *ip6_kstat_init(netstackid_t stackid, ip6_stat_t *ip6_statisticsp)
4827 {
4828 kstat_t *ksp;
4829
4830 ip6_stat_t template = {
4831 { "ip6_udp_fannorm", KSTAT_DATA_UINT64 },
4832 { "ip6_udp_fanmb", KSTAT_DATA_UINT64 },
4833 { "ip6_recv_pullup", KSTAT_DATA_UINT64 },
4834 { "ip6_db_ref", KSTAT_DATA_UINT64 },
4835 { "ip6_notaligned", KSTAT_DATA_UINT64 },
4836 { "ip6_multimblk", KSTAT_DATA_UINT64 },
4837 { "ipsec_proto_ahesp", KSTAT_DATA_UINT64 },
4838 { "ip6_out_sw_cksum", KSTAT_DATA_UINT64 },
4839 { "ip6_out_sw_cksum_bytes", KSTAT_DATA_UINT64 },
4840 { "ip6_in_sw_cksum", KSTAT_DATA_UINT64 },
4841 { "ip6_tcp_in_full_hw_cksum_err", KSTAT_DATA_UINT64 },
4842 { "ip6_tcp_in_part_hw_cksum_err", KSTAT_DATA_UINT64 },
4843 { "ip6_tcp_in_sw_cksum_err", KSTAT_DATA_UINT64 },
4844 { "ip6_udp_in_full_hw_cksum_err", KSTAT_DATA_UINT64 },
4845 { "ip6_udp_in_part_hw_cksum_err", KSTAT_DATA_UINT64 },
4846 { "ip6_udp_in_sw_cksum_err", KSTAT_DATA_UINT64 },
4847 };
4848 ksp = kstat_create_netstack("ip", 0, "ip6stat", "net",
4849 KSTAT_TYPE_NAMED, sizeof (template) / sizeof (kstat_named_t),
4850 KSTAT_FLAG_VIRTUAL, stackid);
4851
4852 if (ksp == NULL)
4853 return (NULL);
4854
4855 bcopy(&template, ip6_statisticsp, sizeof (template));
4856 ksp->ks_data = (void *)ip6_statisticsp;
4857 ksp->ks_private = (void *)(uintptr_t)stackid;
4858
4859 kstat_install(ksp);
4860 return (ksp);
4861 }
4862
4863 void
4864 ip6_kstat_fini(netstackid_t stackid, kstat_t *ksp)
4865 {
4866 if (ksp != NULL) {
4867 ASSERT(stackid == (netstackid_t)(uintptr_t)ksp->ks_private);
4868 kstat_delete_netstack(ksp, stackid);
4869 }
4870 }
4871
4872 /*
4873 * The following two functions set and get the value for the
4874 * IPV6_SRC_PREFERENCES socket option.
4875 */
4876 int
4877 ip6_set_src_preferences(ip_xmit_attr_t *ixa, uint32_t prefs)
4878 {
4879 /*
4880 * We only support preferences that are covered by
4881 * IPV6_PREFER_SRC_MASK.
4882 */
4883 if (prefs & ~IPV6_PREFER_SRC_MASK)
4884 return (EINVAL);
4885
4886 /*
4887 * Look for conflicting preferences or default preferences. If
4888 * both bits of a related pair are clear, the application wants the
4889 * system's default value for that pair. Both bits in a pair can't
4890 * be set.
4891 */
4892 if ((prefs & IPV6_PREFER_SRC_MIPMASK) == 0) {
4893 prefs |= IPV6_PREFER_SRC_MIPDEFAULT;
4894 } else if ((prefs & IPV6_PREFER_SRC_MIPMASK) ==
4895 IPV6_PREFER_SRC_MIPMASK) {
4896 return (EINVAL);
4897 }
4898 if ((prefs & IPV6_PREFER_SRC_TMPMASK) == 0) {
4899 prefs |= IPV6_PREFER_SRC_TMPDEFAULT;
4900 } else if ((prefs & IPV6_PREFER_SRC_TMPMASK) ==
4901 IPV6_PREFER_SRC_TMPMASK) {
4902 return (EINVAL);
4903 }
4904 if ((prefs & IPV6_PREFER_SRC_CGAMASK) == 0) {
4905 prefs |= IPV6_PREFER_SRC_CGADEFAULT;
4906 } else if ((prefs & IPV6_PREFER_SRC_CGAMASK) ==
4907 IPV6_PREFER_SRC_CGAMASK) {
4908 return (EINVAL);
4909 }
4910
4911 ixa->ixa_src_preferences = prefs;
4912 return (0);
4913 }
4914
4915 size_t
4916 ip6_get_src_preferences(ip_xmit_attr_t *ixa, uint32_t *val)
4917 {
4918 *val = ixa->ixa_src_preferences;
4919 return (sizeof (ixa->ixa_src_preferences));
4920 }
4921
4922 /*
4923 * Get the size of the IP options (including the IP headers size)
4924 * without including the AH header's size. If till_ah is B_FALSE,
4925 * and if AH header is present, dest options beyond AH header will
4926 * also be included in the returned size.
4927 */
4928 int
4929 ipsec_ah_get_hdr_size_v6(mblk_t *mp, boolean_t till_ah)
4930 {
4931 ip6_t *ip6h;
4932 uint8_t nexthdr;
4933 uint8_t *whereptr;
4934 ip6_hbh_t *hbhhdr;
4935 ip6_dest_t *dsthdr;
4936 ip6_rthdr_t *rthdr;
4937 int ehdrlen;
4938 int size;
4939 ah_t *ah;
4940
4941 ip6h = (ip6_t *)mp->b_rptr;
4942 size = IPV6_HDR_LEN;
4943 nexthdr = ip6h->ip6_nxt;
4944 whereptr = (uint8_t *)&ip6h[1];
4945 for (;;) {
4946 /* Assume IP has already stripped it */
4947 ASSERT(nexthdr != IPPROTO_FRAGMENT);
4948 switch (nexthdr) {
4949 case IPPROTO_HOPOPTS:
4950 hbhhdr = (ip6_hbh_t *)whereptr;
4951 nexthdr = hbhhdr->ip6h_nxt;
4952 ehdrlen = 8 * (hbhhdr->ip6h_len + 1);
4953 break;
4954 case IPPROTO_DSTOPTS:
4955 dsthdr = (ip6_dest_t *)whereptr;
4956 nexthdr = dsthdr->ip6d_nxt;
4957 ehdrlen = 8 * (dsthdr->ip6d_len + 1);
4958 break;
4959 case IPPROTO_ROUTING:
4960 rthdr = (ip6_rthdr_t *)whereptr;
4961 nexthdr = rthdr->ip6r_nxt;
4962 ehdrlen = 8 * (rthdr->ip6r_len + 1);
4963 break;
4964 default :
4965 if (till_ah) {
4966 ASSERT(nexthdr == IPPROTO_AH);
4967 return (size);
4968 }
4969 /*
4970 * If we don't have a AH header to traverse,
4971 * return now. This happens normally for
4972 * outbound datagrams where we have not inserted
4973 * the AH header.
4974 */
4975 if (nexthdr != IPPROTO_AH) {
4976 return (size);
4977 }
4978
4979 /*
4980 * We don't include the AH header's size
4981 * to be symmetrical with other cases where
4982 * we either don't have a AH header (outbound)
4983 * or peek into the AH header yet (inbound and
4984 * not pulled up yet).
4985 */
4986 ah = (ah_t *)whereptr;
4987 nexthdr = ah->ah_nexthdr;
4988 ehdrlen = (ah->ah_length << 2) + 8;
4989
4990 if (nexthdr == IPPROTO_DSTOPTS) {
4991 if (whereptr + ehdrlen >= mp->b_wptr) {
4992 /*
4993 * The destination options header
4994 * is not part of the first mblk.
4995 */
4996 whereptr = mp->b_cont->b_rptr;
4997 } else {
4998 whereptr += ehdrlen;
4999 }
5000
5001 dsthdr = (ip6_dest_t *)whereptr;
5002 ehdrlen = 8 * (dsthdr->ip6d_len + 1);
5003 size += ehdrlen;
5004 }
5005 return (size);
5006 }
5007 whereptr += ehdrlen;
5008 size += ehdrlen;
5009 }
5010 }
5011
5012 /*
5013 * Utility routine that checks if `v6srcp' is a valid address on underlying
5014 * interface `ill'. If `ipifp' is non-NULL, it's set to a held ipif
5015 * associated with `v6srcp' on success. NOTE: if this is not called from
5016 * inside the IPSQ (ill_g_lock is not held), `ill' may be removed from the
5017 * group during or after this lookup.
5018 */
5019 boolean_t
5020 ipif_lookup_testaddr_v6(ill_t *ill, const in6_addr_t *v6srcp, ipif_t **ipifp)
5021 {
5022 ipif_t *ipif;
5023
5024
5025 ipif = ipif_lookup_addr_exact_v6(v6srcp, ill, ill->ill_ipst);
5026 if (ipif != NULL) {
5027 if (ipifp != NULL)
5028 *ipifp = ipif;
5029 else
5030 ipif_refrele(ipif);
5031 return (B_TRUE);
5032 }
5033
5034 if (ip_debug > 2) {
5035 pr_addr_dbg("ipif_lookup_testaddr_v6: cannot find ipif for "
5036 "src %s\n", AF_INET6, v6srcp);
5037 }
5038 return (B_FALSE);
5039 }