1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved
  24  */
  25 /* Copyright (c) 1990 Mentat Inc. */
  26 
  27 #include <sys/types.h>
  28 #include <sys/stream.h>
  29 #include <sys/dlpi.h>
  30 #include <sys/stropts.h>
  31 #include <sys/sysmacros.h>
  32 #include <sys/strsubr.h>
  33 #include <sys/strlog.h>
  34 #include <sys/strsun.h>
  35 #include <sys/zone.h>
  36 #define _SUN_TPI_VERSION 2
  37 #include <sys/tihdr.h>
  38 #include <sys/xti_inet.h>
  39 #include <sys/ddi.h>
  40 #include <sys/sunddi.h>
  41 #include <sys/cmn_err.h>
  42 #include <sys/debug.h>
  43 #include <sys/kobj.h>
  44 #include <sys/modctl.h>
  45 #include <sys/atomic.h>
  46 #include <sys/policy.h>
  47 #include <sys/priv.h>
  48 
  49 #include <sys/systm.h>
  50 #include <sys/param.h>
  51 #include <sys/kmem.h>
  52 #include <sys/sdt.h>
  53 #include <sys/socket.h>
  54 #include <sys/vtrace.h>
  55 #include <sys/isa_defs.h>
  56 #include <sys/mac.h>
  57 #include <net/if.h>
  58 #include <net/if_arp.h>
  59 #include <net/route.h>
  60 #include <sys/sockio.h>
  61 #include <netinet/in.h>
  62 #include <net/if_dl.h>
  63 
  64 #include <inet/common.h>
  65 #include <inet/mi.h>
  66 #include <inet/mib2.h>
  67 #include <inet/nd.h>
  68 #include <inet/arp.h>
  69 #include <inet/snmpcom.h>
  70 #include <inet/kstatcom.h>
  71 
  72 #include <netinet/igmp_var.h>
  73 #include <netinet/ip6.h>
  74 #include <netinet/icmp6.h>
  75 #include <netinet/sctp.h>
  76 
  77 #include <inet/ip.h>
  78 #include <inet/ip_impl.h>
  79 #include <inet/ip6.h>
  80 #include <inet/ip6_asp.h>
  81 #include <inet/optcom.h>
  82 #include <inet/tcp.h>
  83 #include <inet/tcp_impl.h>
  84 #include <inet/ip_multi.h>
  85 #include <inet/ip_if.h>
  86 #include <inet/ip_ire.h>
  87 #include <inet/ip_ftable.h>
  88 #include <inet/ip_rts.h>
  89 #include <inet/ip_ndp.h>
  90 #include <inet/ip_listutils.h>
  91 #include <netinet/igmp.h>
  92 #include <netinet/ip_mroute.h>
  93 #include <inet/ipp_common.h>
  94 
  95 #include <net/pfkeyv2.h>
  96 #include <inet/sadb.h>
  97 #include <inet/ipsec_impl.h>
  98 #include <inet/ipdrop.h>
  99 #include <inet/ip_netinfo.h>
 100 #include <inet/ilb_ip.h>
 101 #include <sys/squeue_impl.h>
 102 #include <sys/squeue.h>
 103 
 104 #include <sys/ethernet.h>
 105 #include <net/if_types.h>
 106 #include <sys/cpuvar.h>
 107 
 108 #include <ipp/ipp.h>
 109 #include <ipp/ipp_impl.h>
 110 #include <ipp/ipgpc/ipgpc.h>
 111 
 112 #include <sys/pattr.h>
 113 #include <inet/ipclassifier.h>
 114 #include <inet/sctp_ip.h>
 115 #include <inet/sctp/sctp_impl.h>
 116 #include <inet/udp_impl.h>
 117 #include <sys/sunddi.h>
 118 
 119 #include <sys/tsol/label.h>
 120 #include <sys/tsol/tnet.h>
 121 
 122 #include <sys/clock_impl.h>       /* For LBOLT_FASTPATH{,64} */
 123 
 124 #ifdef  DEBUG
 125 extern boolean_t skip_sctp_cksum;
 126 #endif
 127 
 128 static void     ip_input_local_v6(ire_t *, mblk_t *, ip6_t *, ip_recv_attr_t *);
 129 
 130 static void     ip_input_multicast_v6(ire_t *, mblk_t *, ip6_t *,
 131     ip_recv_attr_t *);
 132 
 133 #pragma inline(ip_input_common_v6, ip_input_local_v6, ip_forward_xmit_v6)
 134 
 135 /*
 136  * Direct read side procedure capable of dealing with chains. GLDv3 based
 137  * drivers call this function directly with mblk chains while STREAMS
 138  * read side procedure ip_rput() calls this for single packet with ip_ring
 139  * set to NULL to process one packet at a time.
 140  *
 141  * The ill will always be valid if this function is called directly from
 142  * the driver.
 143  *
 144  * If ip_input_v6() is called from GLDv3:
 145  *
 146  *   - This must be a non-VLAN IP stream.
 147  *   - 'mp' is either an untagged or a special priority-tagged packet.
 148  *   - Any VLAN tag that was in the MAC header has been stripped.
 149  *
 150  * If the IP header in packet is not 32-bit aligned, every message in the
 151  * chain will be aligned before further operations. This is required on SPARC
 152  * platform.
 153  */
 154 void
 155 ip_input_v6(ill_t *ill, ill_rx_ring_t *ip_ring, mblk_t *mp_chain,
 156     struct mac_header_info_s *mhip)
 157 {
 158         (void) ip_input_common_v6(ill, ip_ring, mp_chain, mhip, NULL, NULL,
 159             NULL);
 160 }
 161 
 162 /*
 163  * ip_accept_tcp_v6() - This function is called by the squeue when it retrieves
 164  * a chain of packets in the poll mode. The packets have gone through the
 165  * data link processing but not IP processing. For performance and latency
 166  * reasons, the squeue wants to process the chain in line instead of feeding
 167  * it back via ip_input path.
 168  *
 169  * We set up the ip_recv_attr_t with IRAF_TARGET_SQP to that ip_fanout_v6
 170  * will pass back any TCP packets matching the target sqp to
 171  * ip_input_common_v6 using ira_target_sqp_mp. Other packets are handled by
 172  * ip_input_v6 and ip_fanout_v6 as normal.
 173  * The TCP packets that match the target squeue are returned to the caller
 174  * as a b_next chain after each packet has been prepend with an mblk
 175  * from ip_recv_attr_to_mblk.
 176  */
 177 mblk_t *
 178 ip_accept_tcp_v6(ill_t *ill, ill_rx_ring_t *ip_ring, squeue_t *target_sqp,
 179     mblk_t *mp_chain, mblk_t **last, uint_t *cnt)
 180 {
 181         return (ip_input_common_v6(ill, ip_ring, mp_chain, NULL, target_sqp,
 182             last, cnt));
 183 }
 184 
 185 /*
 186  * Used by ip_input_v6 and ip_accept_tcp_v6
 187  * The last three arguments are only used by ip_accept_tcp_v6, and mhip is
 188  * only used by ip_input_v6.
 189  */
 190 mblk_t *
 191 ip_input_common_v6(ill_t *ill, ill_rx_ring_t *ip_ring, mblk_t *mp_chain,
 192     struct mac_header_info_s *mhip, squeue_t *target_sqp,
 193     mblk_t **last, uint_t *cnt)
 194 {
 195         mblk_t          *mp;
 196         ip6_t           *ip6h;
 197         ip_recv_attr_t  iras;   /* Receive attributes */
 198         rtc_t           rtc;
 199         iaflags_t       chain_flags = 0;        /* Fixed for chain */
 200         mblk_t          *ahead = NULL;  /* Accepted head */
 201         mblk_t          *atail = NULL;  /* Accepted tail */
 202         uint_t          acnt = 0;       /* Accepted count */
 203 
 204         ASSERT(mp_chain != NULL);
 205         ASSERT(ill != NULL);
 206 
 207         /* These ones do not change as we loop over packets */
 208         iras.ira_ill = iras.ira_rill = ill;
 209         iras.ira_ruifindex = ill->ill_phyint->phyint_ifindex;
 210         iras.ira_rifindex = iras.ira_ruifindex;
 211         iras.ira_sqp = NULL;
 212         iras.ira_ring = ip_ring;
 213         /* For ECMP and outbound transmit ring selection */
 214         iras.ira_xmit_hint = ILL_RING_TO_XMIT_HINT(ip_ring);
 215 
 216         iras.ira_target_sqp = target_sqp;
 217         iras.ira_target_sqp_mp = NULL;
 218         if (target_sqp != NULL)
 219                 chain_flags |= IRAF_TARGET_SQP;
 220 
 221         /*
 222          * We try to have a mhip pointer when possible, but
 223          * it might be NULL in some cases. In those cases we
 224          * have to assume unicast.
 225          */
 226         iras.ira_mhip = mhip;
 227         iras.ira_flags = 0;
 228         if (mhip != NULL) {
 229                 switch (mhip->mhi_dsttype) {
 230                 case MAC_ADDRTYPE_MULTICAST :
 231                         chain_flags |= IRAF_L2DST_MULTICAST;
 232                         break;
 233                 case MAC_ADDRTYPE_BROADCAST :
 234                         chain_flags |= IRAF_L2DST_BROADCAST;
 235                         break;
 236                 }
 237         }
 238 
 239         /*
 240          * Initialize the one-element route cache.
 241          *
 242          * We do ire caching from one iteration to
 243          * another. In the event the packet chain contains
 244          * all packets from the same dst, this caching saves
 245          * an ire_route_recursive for each of the succeeding
 246          * packets in a packet chain.
 247          */
 248         rtc.rtc_ire = NULL;
 249         rtc.rtc_ip6addr = ipv6_all_zeros;
 250 
 251         /* Loop over b_next */
 252         for (mp = mp_chain; mp != NULL; mp = mp_chain) {
 253                 mp_chain = mp->b_next;
 254                 mp->b_next = NULL;
 255 
 256                 /*
 257                  * if db_ref > 1 then copymsg and free original. Packet
 258                  * may be changed and we do not want the other entity
 259                  * who has a reference to this message to trip over the
 260                  * changes. This is a blind change because trying to
 261                  * catch all places that might change the packet is too
 262                  * difficult.
 263                  *
 264                  * This corresponds to the fast path case, where we have
 265                  * a chain of M_DATA mblks.  We check the db_ref count
 266                  * of only the 1st data block in the mblk chain. There
 267                  * doesn't seem to be a reason why a device driver would
 268                  * send up data with varying db_ref counts in the mblk
 269                  * chain. In any case the Fast path is a private
 270                  * interface, and our drivers don't do such a thing.
 271                  * Given the above assumption, there is no need to walk
 272                  * down the entire mblk chain (which could have a
 273                  * potential performance problem)
 274                  *
 275                  * The "(DB_REF(mp) > 1)" check was moved from ip_rput()
 276                  * to here because of exclusive ip stacks and vnics.
 277                  * Packets transmitted from exclusive stack over vnic
 278                  * can have db_ref > 1 and when it gets looped back to
 279                  * another vnic in a different zone, you have ip_input()
 280                  * getting dblks with db_ref > 1. So if someone
 281                  * complains of TCP performance under this scenario,
 282                  * take a serious look here on the impact of copymsg().
 283                  */
 284                 if (DB_REF(mp) > 1) {
 285                         if ((mp = ip_fix_dbref(mp, &iras)) == NULL)
 286                                 continue;
 287                 }
 288 
 289                 /*
 290                  * IP header ptr not aligned?
 291                  * OR IP header not complete in first mblk
 292                  */
 293                 ip6h = (ip6_t *)mp->b_rptr;
 294                 if (!OK_32PTR(ip6h) || MBLKL(mp) < IPV6_HDR_LEN) {
 295                         mp = ip_check_and_align_header(mp, IPV6_HDR_LEN, &iras);
 296                         if (mp == NULL)
 297                                 continue;
 298                         ip6h = (ip6_t *)mp->b_rptr;
 299                 }
 300 
 301                 /* Protect against a mix of Ethertypes and IP versions */
 302                 if (IPH_HDR_VERSION(ip6h) != IPV6_VERSION) {
 303                         BUMP_MIB(ill->ill_ip_mib, ipIfStatsInHdrErrors);
 304                         ip_drop_input("ipIfStatsInHdrErrors", mp, ill);
 305                         freemsg(mp);
 306                         /* mhip might point into 1st packet in the chain. */
 307                         iras.ira_mhip = NULL;
 308                         continue;
 309                 }
 310 
 311                 /*
 312                  * Check for Martian addrs; we have to explicitly
 313                  * test for for zero dst since this is also used as
 314                  * an indication that the rtc is not used.
 315                  */
 316                 if (IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_dst)) {
 317                         BUMP_MIB(ill->ill_ip_mib, ipIfStatsInAddrErrors);
 318                         ip_drop_input("ipIfStatsInAddrErrors", mp, ill);
 319                         freemsg(mp);
 320                         /* mhip might point into 1st packet in the chain. */
 321                         iras.ira_mhip = NULL;
 322                         continue;
 323                 }
 324                 /*
 325                  * Keep L2SRC from a previous packet in chain since mhip
 326                  * might point into an earlier packet in the chain.
 327                  */
 328                 chain_flags |= (iras.ira_flags & IRAF_L2SRC_SET);
 329 
 330                 iras.ira_flags = IRAF_VERIFY_ULP_CKSUM | chain_flags;
 331                 iras.ira_free_flags = 0;
 332                 iras.ira_cred = NULL;
 333                 iras.ira_cpid = NOPID;
 334                 iras.ira_tsl = NULL;
 335                 iras.ira_zoneid = ALL_ZONES;    /* Default for forwarding */
 336 
 337                 /*
 338                  * We must count all incoming packets, even if they end
 339                  * up being dropped later on. Defer counting bytes until
 340                  * we have the whole IP header in first mblk.
 341                  */
 342                 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInReceives);
 343 
 344                 iras.ira_pktlen = ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN;
 345                 UPDATE_MIB(ill->ill_ip_mib, ipIfStatsHCInOctets,
 346                     iras.ira_pktlen);
 347 
 348                 /*
 349                  * Call one of:
 350                  *      ill_input_full_v6
 351                  *      ill_input_short_v6
 352                  * The former is used in the case of TX. See ill_set_inputfn().
 353                  */
 354                 (*ill->ill_inputfn)(mp, ip6h, &ip6h->ip6_dst, &iras, &rtc);
 355 
 356                 /* Any references to clean up? No hold on ira_ill */
 357                 if (iras.ira_flags & (IRAF_IPSEC_SECURE|IRAF_SYSTEM_LABELED))
 358                         ira_cleanup(&iras, B_FALSE);
 359 
 360                 if (iras.ira_target_sqp_mp != NULL) {
 361                         /* Better be called from ip_accept_tcp */
 362                         ASSERT(target_sqp != NULL);
 363 
 364                         /* Found one packet to accept */
 365                         mp = iras.ira_target_sqp_mp;
 366                         iras.ira_target_sqp_mp = NULL;
 367                         ASSERT(ip_recv_attr_is_mblk(mp));
 368 
 369                         if (atail != NULL)
 370                                 atail->b_next = mp;
 371                         else
 372                                 ahead = mp;
 373                         atail = mp;
 374                         acnt++;
 375                         mp = NULL;
 376                 }
 377                 /* mhip might point into 1st packet in the chain. */
 378                 iras.ira_mhip = NULL;
 379         }
 380         /* Any remaining references to the route cache? */
 381         if (rtc.rtc_ire != NULL) {
 382                 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&rtc.rtc_ip6addr));
 383                 ire_refrele(rtc.rtc_ire);
 384         }
 385 
 386         if (ahead != NULL) {
 387                 /* Better be called from ip_accept_tcp */
 388                 ASSERT(target_sqp != NULL);
 389                 *last = atail;
 390                 *cnt = acnt;
 391                 return (ahead);
 392         }
 393 
 394         return (NULL);
 395 }
 396 
 397 /*
 398  * This input function is used when
 399  *  - is_system_labeled()
 400  *
 401  * Note that for IPv6 CGTP filtering is handled only when receiving fragment
 402  * headers, and RSVP uses router alert options, thus we don't need anything
 403  * extra for them.
 404  */
 405 void
 406 ill_input_full_v6(mblk_t *mp, void *iph_arg, void *nexthop_arg,
 407     ip_recv_attr_t *ira, rtc_t *rtc)
 408 {
 409         ip6_t           *ip6h = (ip6_t *)iph_arg;
 410         in6_addr_t      *nexthop = (in6_addr_t *)nexthop_arg;
 411         ill_t           *ill = ira->ira_ill;
 412 
 413         ASSERT(ira->ira_tsl == NULL);
 414 
 415         /*
 416          * Attach any necessary label information to
 417          * this packet
 418          */
 419         if (is_system_labeled()) {
 420                 ira->ira_flags |= IRAF_SYSTEM_LABELED;
 421 
 422                 /*
 423                  * This updates ira_cred, ira_tsl and ira_free_flags based
 424                  * on the label.
 425                  */
 426                 if (!tsol_get_pkt_label(mp, IPV6_VERSION, ira)) {
 427                         if (ip6opt_ls != 0)
 428                                 ip0dbg(("tsol_get_pkt_label v6 failed\n"));
 429                         BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
 430                         ip_drop_input("ipIfStatsInDiscards", mp, ill);
 431                         freemsg(mp);
 432                         return;
 433                 }
 434                 /* Note that ira_tsl can be NULL here. */
 435 
 436                 /* tsol_get_pkt_label sometimes does pullupmsg */
 437                 ip6h = (ip6_t *)mp->b_rptr;
 438         }
 439         ill_input_short_v6(mp, ip6h, nexthop, ira, rtc);
 440 }
 441 
 442 /*
 443  * Check for IPv6 addresses that should not appear on the wire
 444  * as either source or destination.
 445  * If we ever implement Stateless IPv6 Translators (SIIT) we'd have
 446  * to revisit the IPv4-mapped part.
 447  */
 448 static boolean_t
 449 ip6_bad_address(in6_addr_t *addr, boolean_t is_src)
 450 {
 451         if (IN6_IS_ADDR_V4MAPPED(addr)) {
 452                 ip1dbg(("ip_input_v6: pkt with IPv4-mapped addr"));
 453                 return (B_TRUE);
 454         }
 455         if (IN6_IS_ADDR_LOOPBACK(addr)) {
 456                 ip1dbg(("ip_input_v6: pkt with loopback addr"));
 457                 return (B_TRUE);
 458         }
 459         if (!is_src && IN6_IS_ADDR_UNSPECIFIED(addr)) {
 460                 /*
 461                  * having :: in the src is ok: it's used for DAD.
 462                  */
 463                 ip1dbg(("ip_input_v6: pkt with unspecified addr"));
 464                 return (B_TRUE);
 465         }
 466         return (B_FALSE);
 467 }
 468 
 469 /*
 470  * Routing lookup for IPv6 link-locals.
 471  * First we look on the inbound interface, then we check for IPMP and
 472  * look on the upper interface.
 473  * We update ira_ruifindex if we find the IRE on the upper interface.
 474  */
 475 static ire_t *
 476 ire_linklocal(const in6_addr_t *nexthop, ill_t *ill, ip_recv_attr_t *ira,
 477     uint_t irr_flags, ip_stack_t *ipst)
 478 {
 479         int match_flags = MATCH_IRE_SECATTR | MATCH_IRE_ILL;
 480         ire_t *ire;
 481 
 482         ASSERT(IN6_IS_ADDR_LINKLOCAL(nexthop));
 483         ire = ire_route_recursive_v6(nexthop, 0, ill, ALL_ZONES, ira->ira_tsl,
 484             match_flags, irr_flags, ira->ira_xmit_hint, ipst, NULL, NULL, NULL);
 485         if (!(ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE)) ||
 486             !IS_UNDER_IPMP(ill))
 487                 return (ire);
 488 
 489         /*
 490          * When we are using IMP we need to look for an IRE on both the
 491          * under and upper interfaces since there are different
 492          * link-local addresses for the under and upper.
 493          */
 494         ill = ipmp_ill_hold_ipmp_ill(ill);
 495         if (ill == NULL)
 496                 return (ire);
 497 
 498         ira->ira_ruifindex = ill->ill_phyint->phyint_ifindex;
 499 
 500         ire_refrele(ire);
 501         ire = ire_route_recursive_v6(nexthop, 0, ill, ALL_ZONES, ira->ira_tsl,
 502             match_flags, irr_flags, ira->ira_xmit_hint, ipst, NULL, NULL, NULL);
 503         ill_refrele(ill);
 504         return (ire);
 505 }
 506 
 507 /*
 508  * This is the tail-end of the full receive side packet handling.
 509  * It can be used directly when the configuration is simple.
 510  */
 511 void
 512 ill_input_short_v6(mblk_t *mp, void *iph_arg, void *nexthop_arg,
 513     ip_recv_attr_t *ira, rtc_t *rtc)
 514 {
 515         ire_t           *ire;
 516         ill_t           *ill = ira->ira_ill;
 517         ip_stack_t      *ipst = ill->ill_ipst;
 518         uint_t          pkt_len;
 519         ssize_t         len;
 520         ip6_t           *ip6h = (ip6_t *)iph_arg;
 521         in6_addr_t      nexthop = *(in6_addr_t *)nexthop_arg;
 522         ilb_stack_t     *ilbs = ipst->ips_netstack->netstack_ilb;
 523         uint_t          irr_flags;
 524 #define rptr    ((uchar_t *)ip6h)
 525 
 526         ASSERT(DB_TYPE(mp) == M_DATA);
 527 
 528         /*
 529          * Check for source/dest being a bad address: loopback, any, or
 530          * v4mapped. All of them start with a 64 bits of zero.
 531          */
 532         if (ip6h->ip6_src.s6_addr32[0] == 0 &&
 533             ip6h->ip6_src.s6_addr32[1] == 0) {
 534                 if (ip6_bad_address(&ip6h->ip6_src, B_TRUE)) {
 535                         ip1dbg(("ip_input_v6: pkt with bad src addr\n"));
 536                         BUMP_MIB(ill->ill_ip_mib, ipIfStatsInAddrErrors);
 537                         ip_drop_input("ipIfStatsInAddrErrors", mp, ill);
 538                         freemsg(mp);
 539                         return;
 540                 }
 541         }
 542         if (ip6h->ip6_dst.s6_addr32[0] == 0 &&
 543             ip6h->ip6_dst.s6_addr32[1] == 0) {
 544                 if (ip6_bad_address(&ip6h->ip6_dst, B_FALSE)) {
 545                         ip1dbg(("ip_input_v6: pkt with bad dst addr\n"));
 546                         BUMP_MIB(ill->ill_ip_mib, ipIfStatsInAddrErrors);
 547                         ip_drop_input("ipIfStatsInAddrErrors", mp, ill);
 548                         freemsg(mp);
 549                         return;
 550                 }
 551         }
 552 
 553         len = mp->b_wptr - rptr;
 554         pkt_len = ira->ira_pktlen;
 555 
 556         /* multiple mblk or too short */
 557         len -= pkt_len;
 558         if (len != 0) {
 559                 mp = ip_check_length(mp, rptr, len, pkt_len, IPV6_HDR_LEN, ira);
 560                 if (mp == NULL)
 561                         return;
 562                 ip6h = (ip6_t *)mp->b_rptr;
 563         }
 564 
 565         DTRACE_IP7(receive, mblk_t *, mp, conn_t *, NULL, void_ip_t *,
 566             ip6h, __dtrace_ipsr_ill_t *, ill, ipha_t *, NULL, ip6_t *, ip6h,
 567             int, 0);
 568         /*
 569          * The event for packets being received from a 'physical'
 570          * interface is placed after validation of the source and/or
 571          * destination address as being local so that packets can be
 572          * redirected to loopback addresses using ipnat.
 573          */
 574         DTRACE_PROBE4(ip6__physical__in__start,
 575             ill_t *, ill, ill_t *, NULL,
 576             ip6_t *, ip6h, mblk_t *, mp);
 577 
 578         if (HOOKS6_INTERESTED_PHYSICAL_IN(ipst)) {
 579                 int     ll_multicast = 0;
 580                 int     error;
 581                 in6_addr_t orig_dst = ip6h->ip6_dst;
 582 
 583                 if (ira->ira_flags & IRAF_L2DST_MULTICAST)
 584                         ll_multicast = HPE_MULTICAST;
 585                 else if (ira->ira_flags & IRAF_L2DST_BROADCAST)
 586                         ll_multicast = HPE_BROADCAST;
 587 
 588                 FW_HOOKS6(ipst->ips_ip6_physical_in_event,
 589                     ipst->ips_ipv6firewall_physical_in,
 590                     ill, NULL, ip6h, mp, mp, ll_multicast, ipst, error);
 591 
 592                 DTRACE_PROBE1(ip6__physical__in__end, mblk_t *, mp);
 593 
 594                 if (mp == NULL)
 595                         return;
 596 
 597                 /* The length could have changed */
 598                 ip6h = (ip6_t *)mp->b_rptr;
 599                 ira->ira_pktlen = ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN;
 600                 pkt_len = ira->ira_pktlen;
 601 
 602                 /*
 603                  * In case the destination changed we override any previous
 604                  * change to nexthop.
 605                  */
 606                 if (!IN6_ARE_ADDR_EQUAL(&orig_dst, &ip6h->ip6_dst))
 607                         nexthop = ip6h->ip6_dst;
 608 
 609                 if (IN6_IS_ADDR_UNSPECIFIED(&nexthop)) {
 610                         BUMP_MIB(ill->ill_ip_mib, ipIfStatsInAddrErrors);
 611                         ip_drop_input("ipIfStatsInAddrErrors", mp, ill);
 612                         freemsg(mp);
 613                         return;
 614                 }
 615 
 616         }
 617 
 618         if (ipst->ips_ip6_observe.he_interested) {
 619                 zoneid_t dzone;
 620 
 621                 /*
 622                  * On the inbound path the src zone will be unknown as
 623                  * this packet has come from the wire.
 624                  */
 625                 dzone = ip_get_zoneid_v6(&nexthop, mp, ill, ira, ALL_ZONES);
 626                 ipobs_hook(mp, IPOBS_HOOK_INBOUND, ALL_ZONES, dzone, ill, ipst);
 627         }
 628 
 629         if ((ip6h->ip6_vcf & IPV6_VERS_AND_FLOW_MASK) !=
 630             IPV6_DEFAULT_VERS_AND_FLOW) {
 631                 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInHdrErrors);
 632                 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInWrongIPVersion);
 633                 ip_drop_input("ipIfStatsInWrongIPVersion", mp, ill);
 634                 freemsg(mp);
 635                 return;
 636         }
 637 
 638         /*
 639          * For IPv6 we update ira_ip_hdr_length and ira_protocol as
 640          * we parse the headers, starting with the hop-by-hop options header.
 641          */
 642         ira->ira_ip_hdr_length = IPV6_HDR_LEN;
 643         if ((ira->ira_protocol = ip6h->ip6_nxt) == IPPROTO_HOPOPTS) {
 644                 ip6_hbh_t       *hbhhdr;
 645                 uint_t          ehdrlen;
 646                 uint8_t         *optptr;
 647 
 648                 if (pkt_len < IPV6_HDR_LEN + MIN_EHDR_LEN) {
 649                         BUMP_MIB(ill->ill_ip_mib, ipIfStatsInTruncatedPkts);
 650                         ip_drop_input("ipIfStatsInTruncatedPkts", mp, ill);
 651                         freemsg(mp);
 652                         return;
 653                 }
 654                 if (mp->b_cont != NULL &&
 655                     rptr + IPV6_HDR_LEN + MIN_EHDR_LEN > mp->b_wptr) {
 656                         ip6h = ip_pullup(mp, IPV6_HDR_LEN + MIN_EHDR_LEN, ira);
 657                         if (ip6h == NULL) {
 658                                 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
 659                                 ip_drop_input("ipIfStatsInDiscards", mp, ill);
 660                                 freemsg(mp);
 661                                 return;
 662                         }
 663                 }
 664                 hbhhdr = (ip6_hbh_t *)&ip6h[1];
 665                 ehdrlen = 8 * (hbhhdr->ip6h_len + 1);
 666 
 667                 if (pkt_len < IPV6_HDR_LEN + ehdrlen) {
 668                         BUMP_MIB(ill->ill_ip_mib, ipIfStatsInTruncatedPkts);
 669                         ip_drop_input("ipIfStatsInTruncatedPkts", mp, ill);
 670                         freemsg(mp);
 671                         return;
 672                 }
 673                 if (mp->b_cont != NULL &&
 674                     rptr + IPV6_HDR_LEN + ehdrlen > mp->b_wptr) {
 675                         ip6h = ip_pullup(mp, IPV6_HDR_LEN + ehdrlen, ira);
 676                         if (ip6h == NULL) {
 677                                 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
 678                                 ip_drop_input("ipIfStatsInDiscards", mp, ill);
 679                                 freemsg(mp);
 680                                 return;
 681                         }
 682                         hbhhdr = (ip6_hbh_t *)&ip6h[1];
 683                 }
 684 
 685                 /*
 686                  * Update ira_ip_hdr_length to skip the hop-by-hop header
 687                  * once we get to ip_fanout_v6
 688                  */
 689                 ira->ira_ip_hdr_length += ehdrlen;
 690                 ira->ira_protocol = hbhhdr->ip6h_nxt;
 691 
 692                 optptr = (uint8_t *)&hbhhdr[1];
 693                 switch (ip_process_options_v6(mp, ip6h, optptr,
 694                     ehdrlen - 2, IPPROTO_HOPOPTS, ira)) {
 695                 case -1:
 696                         /*
 697                          * Packet has been consumed and any
 698                          * needed ICMP messages sent.
 699                          */
 700                         return;
 701                 case 0:
 702                         /* no action needed */
 703                         break;
 704                 case 1:
 705                         /*
 706                          * Known router alert. Make use handle it as local
 707                          * by setting the nexthop to be the all-host multicast
 708                          * address, and skip multicast membership filter by
 709                          * marking as a router alert.
 710                          */
 711                         ira->ira_flags |= IRAF_ROUTER_ALERT;
 712                         nexthop = ipv6_all_hosts_mcast;
 713                         break;
 714                 }
 715         }
 716 
 717         /*
 718          * Here we check to see if we machine is setup as
 719          * L3 loadbalancer and if the incoming packet is for a VIP
 720          *
 721          * Check the following:
 722          * - there is at least a rule
 723          * - protocol of the packet is supported
 724          *
 725          * We don't load balance IPv6 link-locals.
 726          */
 727         if (ilb_has_rules(ilbs) && ILB_SUPP_L4(ira->ira_protocol) &&
 728             !IN6_IS_ADDR_LINKLOCAL(&nexthop)) {
 729                 in6_addr_t      lb_dst;
 730                 int             lb_ret;
 731 
 732                 /* For convenience, we just pull up the mblk. */
 733                 if (mp->b_cont != NULL) {
 734                         if (pullupmsg(mp, -1) == 0) {
 735                                 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
 736                                 ip_drop_input("ipIfStatsInDiscards - pullupmsg",
 737                                     mp, ill);
 738                                 freemsg(mp);
 739                                 return;
 740                         }
 741                         ip6h = (ip6_t *)mp->b_rptr;
 742                 }
 743                 lb_ret = ilb_check_v6(ilbs, ill, mp, ip6h, ira->ira_protocol,
 744                     (uint8_t *)ip6h + ira->ira_ip_hdr_length, &lb_dst);
 745                 if (lb_ret == ILB_DROPPED) {
 746                         BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
 747                         ip_drop_input("ILB_DROPPED", mp, ill);
 748                         freemsg(mp);
 749                         return;
 750                 }
 751                 if (lb_ret == ILB_BALANCED) {
 752                         /* Set the dst to that of the chosen server */
 753                         nexthop = lb_dst;
 754                         DB_CKSUMFLAGS(mp) = 0;
 755                 }
 756         }
 757 
 758         if (ill->ill_flags & ILLF_ROUTER)
 759                 irr_flags = IRR_ALLOCATE;
 760         else
 761                 irr_flags = IRR_NONE;
 762 
 763         /* Can not use route cache with TX since the labels can differ */
 764         if (ira->ira_flags & IRAF_SYSTEM_LABELED) {
 765                 if (IN6_IS_ADDR_MULTICAST(&nexthop)) {
 766                         ire = ire_multicast(ill);
 767                 } else if (IN6_IS_ADDR_LINKLOCAL(&nexthop)) {
 768                         ire = ire_linklocal(&nexthop, ill, ira, irr_flags,
 769                             ipst);
 770                 } else {
 771                         /* Match destination and label */
 772                         ire = ire_route_recursive_v6(&nexthop, 0, NULL,
 773                             ALL_ZONES, ira->ira_tsl, MATCH_IRE_SECATTR,
 774                             irr_flags, ira->ira_xmit_hint, ipst, NULL, NULL,
 775                             NULL);
 776                 }
 777                 /* Update the route cache so we do the ire_refrele */
 778                 ASSERT(ire != NULL);
 779                 if (rtc->rtc_ire != NULL)
 780                         ire_refrele(rtc->rtc_ire);
 781                 rtc->rtc_ire = ire;
 782                 rtc->rtc_ip6addr = nexthop;
 783         } else if (IN6_ARE_ADDR_EQUAL(&nexthop, &rtc->rtc_ip6addr)) {
 784                 /* Use the route cache */
 785                 ASSERT(rtc->rtc_ire != NULL);
 786                 ire = rtc->rtc_ire;
 787         } else {
 788                 /* Update the route cache */
 789                 if (IN6_IS_ADDR_MULTICAST(&nexthop)) {
 790                         ire = ire_multicast(ill);
 791                 } else if (IN6_IS_ADDR_LINKLOCAL(&nexthop)) {
 792                         ire = ire_linklocal(&nexthop, ill, ira, irr_flags,
 793                             ipst);
 794                 } else {
 795                         ire = ire_route_recursive_dstonly_v6(&nexthop,
 796                             irr_flags, ira->ira_xmit_hint, ipst);
 797                 }
 798                 ASSERT(ire != NULL);
 799                 if (rtc->rtc_ire != NULL)
 800                         ire_refrele(rtc->rtc_ire);
 801                 rtc->rtc_ire = ire;
 802                 rtc->rtc_ip6addr = nexthop;
 803         }
 804 
 805         ire->ire_ib_pkt_count++;
 806 
 807         /*
 808          * Based on ire_type and ire_flags call one of:
 809          *      ire_recv_local_v6 - for IRE_LOCAL
 810          *      ire_recv_loopback_v6 - for IRE_LOOPBACK
 811          *      ire_recv_multirt_v6 - if RTF_MULTIRT
 812          *      ire_recv_noroute_v6 - if RTF_REJECT or RTF_BLACHOLE
 813          *      ire_recv_multicast_v6 - for IRE_MULTICAST
 814          *      ire_recv_noaccept_v6 - for ire_noaccept ones
 815          *      ire_recv_forward_v6 - for the rest.
 816          */
 817 
 818         (*ire->ire_recvfn)(ire, mp, ip6h, ira);
 819 }
 820 #undef rptr
 821 
 822 /*
 823  * ire_recvfn for IREs that need forwarding
 824  */
 825 void
 826 ire_recv_forward_v6(ire_t *ire, mblk_t *mp, void *iph_arg, ip_recv_attr_t *ira)
 827 {
 828         ip6_t           *ip6h = (ip6_t *)iph_arg;
 829         ill_t           *ill = ira->ira_ill;
 830         ip_stack_t      *ipst = ill->ill_ipst;
 831         iaflags_t       iraflags = ira->ira_flags;
 832         ill_t           *dst_ill;
 833         nce_t           *nce;
 834         uint32_t        added_tx_len;
 835         uint32_t        mtu, iremtu;
 836 
 837         if (iraflags & (IRAF_L2DST_MULTICAST|IRAF_L2DST_BROADCAST)) {
 838                 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits);
 839                 ip_drop_input("l2 multicast not forwarded", mp, ill);
 840                 freemsg(mp);
 841                 return;
 842         }
 843 
 844         if (!(ill->ill_flags & ILLF_ROUTER)) {
 845                 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits);
 846                 ip_drop_input("ipIfStatsForwProhibits", mp, ill);
 847                 freemsg(mp);
 848                 return;
 849         }
 850 
 851         /*
 852          * Either ire_nce_capable or ire_dep_parent would be set for the IRE
 853          * when it is found by ire_route_recursive, but that some other thread
 854          * could have changed the routes with the effect of clearing
 855          * ire_dep_parent. In that case we'd end up dropping the packet, or
 856          * finding a new nce below.
 857          * Get, allocate, or update the nce.
 858          * We get a refhold on ire_nce_cache as a result of this to avoid races
 859          * where ire_nce_cache is deleted.
 860          *
 861          * This ensures that we don't forward if the interface is down since
 862          * ipif_down removes all the nces.
 863          */
 864         mutex_enter(&ire->ire_lock);
 865         nce = ire->ire_nce_cache;
 866         if (nce == NULL) {
 867                 /* Not yet set up - try to set one up */
 868                 mutex_exit(&ire->ire_lock);
 869                 (void) ire_revalidate_nce(ire);
 870                 mutex_enter(&ire->ire_lock);
 871                 nce = ire->ire_nce_cache;
 872                 if (nce == NULL) {
 873                         mutex_exit(&ire->ire_lock);
 874                         /* The ire_dep_parent chain went bad, or no memory */
 875                         BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
 876                         ip_drop_input("No ire_dep_parent", mp, ill);
 877                         freemsg(mp);
 878                         return;
 879                 }
 880         }
 881         nce_refhold(nce);
 882         mutex_exit(&ire->ire_lock);
 883 
 884         if (nce->nce_is_condemned) {
 885                 nce_t *nce1;
 886 
 887                 nce1 = ire_handle_condemned_nce(nce, ire, NULL, ip6h, B_FALSE);
 888                 nce_refrele(nce);
 889                 if (nce1 == NULL) {
 890                         BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
 891                         ip_drop_input("No nce", mp, ill);
 892                         freemsg(mp);
 893                         return;
 894                 }
 895                 nce = nce1;
 896         }
 897         dst_ill = nce->nce_ill;
 898 
 899         /*
 900          * Unless we are forwarding, drop the packet.
 901          * Unlike IPv4 we don't allow source routed packets out the same
 902          * interface when we are not a router.
 903          * Note that ill_forward_set() will set the ILLF_ROUTER on
 904          * all the group members when it gets an ipmp-ill or under-ill.
 905          */
 906         if (!(dst_ill->ill_flags & ILLF_ROUTER)) {
 907                 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits);
 908                 ip_drop_input("ipIfStatsForwProhibits", mp, ill);
 909                 freemsg(mp);
 910                 nce_refrele(nce);
 911                 return;
 912         }
 913 
 914         if (ire->ire_zoneid != GLOBAL_ZONEID && ire->ire_zoneid != ALL_ZONES) {
 915                 ire->ire_ib_pkt_count--;
 916                 /*
 917                  * Should only use IREs that are visible from the
 918                  * global zone for forwarding.
 919                  * For IPv6 any source route would have already been
 920                  * advanced in ip_fanout_v6
 921                  */
 922                 ire = ire_route_recursive_v6(&ip6h->ip6_dst, 0, NULL,
 923                     GLOBAL_ZONEID, ira->ira_tsl, MATCH_IRE_SECATTR,
 924                     (ill->ill_flags & ILLF_ROUTER) ? IRR_ALLOCATE : IRR_NONE,
 925                     ira->ira_xmit_hint, ipst, NULL, NULL, NULL);
 926                 ire->ire_ib_pkt_count++;
 927                 (*ire->ire_recvfn)(ire, mp, ip6h, ira);
 928                 ire_refrele(ire);
 929                 nce_refrele(nce);
 930                 return;
 931         }
 932         /*
 933          * ipIfStatsHCInForwDatagrams should only be increment if there
 934          * will be an attempt to forward the packet, which is why we
 935          * increment after the above condition has been checked.
 936          */
 937         BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInForwDatagrams);
 938 
 939         /* Initiate Read side IPPF processing */
 940         if (IPP_ENABLED(IPP_FWD_IN, ipst)) {
 941                 /* ip_process translates an IS_UNDER_IPMP */
 942                 mp = ip_process(IPP_FWD_IN, mp, ill, ill);
 943                 if (mp == NULL) {
 944                         /* ip_drop_packet and MIB done */
 945                         ip2dbg(("ire_recv_forward_v6: pkt dropped/deferred "
 946                             "during IPPF processing\n"));
 947                         nce_refrele(nce);
 948                         return;
 949                 }
 950         }
 951 
 952         DTRACE_PROBE4(ip6__forwarding__start,
 953             ill_t *, ill, ill_t *, dst_ill, ip6_t *, ip6h, mblk_t *, mp);
 954 
 955         if (HOOKS6_INTERESTED_FORWARDING(ipst)) {
 956                 int     error;
 957 
 958                 FW_HOOKS(ipst->ips_ip6_forwarding_event,
 959                     ipst->ips_ipv6firewall_forwarding,
 960                     ill, dst_ill, ip6h, mp, mp, 0, ipst, error);
 961 
 962                 DTRACE_PROBE1(ip6__forwarding__end, mblk_t *, mp);
 963 
 964                 if (mp == NULL) {
 965                         nce_refrele(nce);
 966                         return;
 967                 }
 968                 /*
 969                  * Even if the destination was changed by the filter we use the
 970                  * forwarding decision that was made based on the address
 971                  * in ip_input.
 972                  */
 973 
 974                 /* Might have changed */
 975                 ip6h = (ip6_t *)mp->b_rptr;
 976                 ira->ira_pktlen = ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN;
 977         }
 978 
 979         /* Packet is being forwarded. Turning off hwcksum flag. */
 980         DB_CKSUMFLAGS(mp) = 0;
 981 
 982         /*
 983          * Per RFC 3513 section 2.5.2, we must not forward packets with
 984          * an unspecified source address.
 985          * The loopback address check for both src and dst has already
 986          * been checked in ip_input_v6
 987          * In the future one can envision adding RPF checks using number 3.
 988          */
 989         switch (ipst->ips_src_check) {
 990         case 0:
 991                 break;
 992         case 1:
 993         case 2:
 994                 if (IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src) ||
 995                     IN6_IS_ADDR_MULTICAST(&ip6h->ip6_src)) {
 996                         BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits);
 997                         BUMP_MIB(ill->ill_ip_mib, ipIfStatsInAddrErrors);
 998                         ip_drop_input("ipIfStatsInAddrErrors", mp, ill);
 999                         nce_refrele(nce);
1000                         freemsg(mp);
1001                         return;
1002                 }
1003                 break;
1004         }
1005 
1006         /*
1007          * Check to see if we're forwarding the packet to a
1008          * different link from which it came.  If so, check the
1009          * source and destination addresses since routers must not
1010          * forward any packets with link-local source or
1011          * destination addresses to other links.  Otherwise (if
1012          * we're forwarding onto the same link), conditionally send
1013          * a redirect message.
1014          */
1015         if (!IS_ON_SAME_LAN(dst_ill, ill)) {
1016                 if (IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_dst) ||
1017                     IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_src)) {
1018                         BUMP_MIB(ill->ill_ip_mib, ipIfStatsInAddrErrors);
1019                         ip_drop_input("ipIfStatsInAddrErrors", mp, ill);
1020                         freemsg(mp);
1021                         nce_refrele(nce);
1022                         return;
1023                 }
1024                 /* TBD add site-local check at site boundary? */
1025         } else if (ipst->ips_ipv6_send_redirects) {
1026                 ip_send_potential_redirect_v6(mp, ip6h, ire, ira);
1027         }
1028 
1029         added_tx_len = 0;
1030         if (iraflags & IRAF_SYSTEM_LABELED) {
1031                 mblk_t          *mp1;
1032                 uint32_t        old_pkt_len = ira->ira_pktlen;
1033 
1034                 /*
1035                  * Check if it can be forwarded and add/remove
1036                  * CIPSO options as needed.
1037                  */
1038                 if ((mp1 = tsol_ip_forward(ire, mp, ira)) == NULL) {
1039                         BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits);
1040                         ip_drop_input("tsol_ip_forward", mp, ill);
1041                         freemsg(mp);
1042                         nce_refrele(nce);
1043                         return;
1044                 }
1045                 /*
1046                  * Size may have changed. Remember amount added in case
1047                  * ip_fragment needs to send an ICMP too big.
1048                  */
1049                 mp = mp1;
1050                 ip6h = (ip6_t *)mp->b_rptr;
1051                 ira->ira_pktlen = ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN;
1052                 ira->ira_ip_hdr_length = IPV6_HDR_LEN;
1053                 if (ira->ira_pktlen > old_pkt_len)
1054                         added_tx_len = ira->ira_pktlen - old_pkt_len;
1055         }
1056 
1057         mtu = dst_ill->ill_mtu;
1058         if ((iremtu = ire->ire_metrics.iulp_mtu) != 0 && iremtu < mtu)
1059                 mtu = iremtu;
1060         ip_forward_xmit_v6(nce, mp, ip6h, ira, mtu, added_tx_len);
1061         nce_refrele(nce);
1062         return;
1063 
1064 }
1065 
1066 /*
1067  * Used for sending out unicast and multicast packets that are
1068  * forwarded.
1069  */
1070 void
1071 ip_forward_xmit_v6(nce_t *nce, mblk_t *mp, ip6_t *ip6h, ip_recv_attr_t *ira,
1072     uint32_t mtu, uint32_t added_tx_len)
1073 {
1074         ill_t           *dst_ill = nce->nce_ill;
1075         uint32_t        pkt_len;
1076         iaflags_t       iraflags = ira->ira_flags;
1077         ip_stack_t      *ipst = dst_ill->ill_ipst;
1078 
1079         if (ip6h->ip6_hops-- <= 1) {
1080                 BUMP_MIB(ira->ira_ill->ill_ip_mib, ipIfStatsInDiscards);
1081                 ip_drop_input("ICMP6_TIME_EXCEED_TRANSIT", mp, ira->ira_ill);
1082                 icmp_time_exceeded_v6(mp, ICMP6_TIME_EXCEED_TRANSIT, B_FALSE,
1083                     ira);
1084                 return;
1085         }
1086 
1087         /* Initiate Write side IPPF processing before any fragmentation */
1088         if (IPP_ENABLED(IPP_FWD_OUT, ipst)) {
1089                 /* ip_process translates an IS_UNDER_IPMP */
1090                 mp = ip_process(IPP_FWD_OUT, mp, dst_ill, dst_ill);
1091                 if (mp == NULL) {
1092                         /* ip_drop_packet and MIB done */
1093                         ip2dbg(("ire_recv_forward_v6: pkt dropped/deferred" \
1094                             " during IPPF processing\n"));
1095                         return;
1096                 }
1097         }
1098 
1099         pkt_len = ira->ira_pktlen;
1100 
1101         BUMP_MIB(dst_ill->ill_ip_mib, ipIfStatsHCOutForwDatagrams);
1102 
1103         if (pkt_len > mtu) {
1104                 BUMP_MIB(dst_ill->ill_ip_mib, ipIfStatsOutFragFails);
1105                 ip_drop_output("ipIfStatsOutFragFails", mp, dst_ill);
1106                 if (iraflags & IRAF_SYSTEM_LABELED) {
1107                         /*
1108                          * Remove any CIPSO option added by
1109                          * tsol_ip_forward, and make sure we report
1110                          * a path MTU so that there
1111                          * is room to add such a CIPSO option for future
1112                          * packets.
1113                          */
1114                         mtu = tsol_pmtu_adjust(mp, mtu, added_tx_len, AF_INET6);
1115                 }
1116                 icmp_pkt2big_v6(mp, mtu, B_TRUE, ira);
1117                 return;
1118         }
1119 
1120         ASSERT(pkt_len ==
1121             ntohs(((ip6_t *)mp->b_rptr)->ip6_plen) + IPV6_HDR_LEN);
1122 
1123         if (iraflags & IRAF_LOOPBACK_COPY) {
1124                 /*
1125                  * IXAF_NO_LOOP_ZONEID is not set hence 6th arg
1126                  * is don't care
1127                  */
1128                 (void) ip_postfrag_loopcheck(mp, nce,
1129                     (IXAF_LOOPBACK_COPY | IXAF_NO_DEV_FLOW_CTL),
1130                     pkt_len, ira->ira_xmit_hint, GLOBAL_ZONEID, 0, NULL);
1131         } else {
1132                 (void) ip_xmit(mp, nce, IXAF_NO_DEV_FLOW_CTL,
1133                     pkt_len, ira->ira_xmit_hint, GLOBAL_ZONEID, 0, NULL);
1134         }
1135 }
1136 
1137 /*
1138  * ire_recvfn for RTF_REJECT and RTF_BLACKHOLE routes, including IRE_NOROUTE,
1139  * which is what ire_route_recursive returns when there is no matching ire.
1140  * Send ICMP unreachable unless blackhole.
1141  */
1142 void
1143 ire_recv_noroute_v6(ire_t *ire, mblk_t *mp, void *iph_arg, ip_recv_attr_t *ira)
1144 {
1145         ip6_t           *ip6h = (ip6_t *)iph_arg;
1146         ill_t           *ill = ira->ira_ill;
1147         ip_stack_t      *ipst = ill->ill_ipst;
1148 
1149         /* Would we have forwarded this packet if we had a route? */
1150         if (ira->ira_flags & (IRAF_L2DST_MULTICAST|IRAF_L2DST_BROADCAST)) {
1151                 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits);
1152                 ip_drop_input("l2 multicast not forwarded", mp, ill);
1153                 freemsg(mp);
1154                 return;
1155         }
1156 
1157         if (!(ill->ill_flags & ILLF_ROUTER)) {
1158                 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits);
1159                 ip_drop_input("ipIfStatsForwProhibits", mp, ill);
1160                 freemsg(mp);
1161                 return;
1162         }
1163         /*
1164          * If we had a route this could have been forwarded. Count as such.
1165          *
1166          * ipIfStatsHCInForwDatagrams should only be increment if there
1167          * will be an attempt to forward the packet, which is why we
1168          * increment after the above condition has been checked.
1169          */
1170         BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInForwDatagrams);
1171 
1172         BUMP_MIB(ill->ill_ip_mib, ipIfStatsInNoRoutes);
1173 
1174         ip_rts_change_v6(RTM_MISS, &ip6h->ip6_dst, 0, 0, 0, 0, 0, 0, RTA_DST,
1175             ipst);
1176 
1177         if (ire->ire_flags & RTF_BLACKHOLE) {
1178                 ip_drop_input("ipIfStatsInNoRoutes RTF_BLACKHOLE", mp, ill);
1179                 freemsg(mp);
1180         } else {
1181                 ip_drop_input("ipIfStatsInNoRoutes RTF_REJECT", mp, ill);
1182 
1183                 icmp_unreachable_v6(mp, ICMP6_DST_UNREACH_NOROUTE, B_FALSE,
1184                     ira);
1185         }
1186 }
1187 
1188 /*
1189  * ire_recvfn for IRE_LOCALs marked with ire_noaccept. Such IREs are used for
1190  * VRRP when in noaccept mode.
1191  * We silently drop packets except for Neighbor Solicitations and
1192  * Neighbor Advertisements.
1193  */
1194 void
1195 ire_recv_noaccept_v6(ire_t *ire, mblk_t *mp, void *iph_arg,
1196     ip_recv_attr_t *ira)
1197 {
1198         ip6_t           *ip6h = (ip6_t *)iph_arg;
1199         ill_t           *ill = ira->ira_ill;
1200         icmp6_t         *icmp6;
1201         int             ip_hdr_length;
1202 
1203         if (ip6h->ip6_nxt != IPPROTO_ICMPV6) {
1204                 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
1205                 ip_drop_input("ipIfStatsInDiscards - noaccept", mp, ill);
1206                 freemsg(mp);
1207                 return;
1208         }
1209         ip_hdr_length = ira->ira_ip_hdr_length;
1210         if ((mp->b_wptr - mp->b_rptr) < (ip_hdr_length + ICMP6_MINLEN)) {
1211                 if (ira->ira_pktlen < (ip_hdr_length + ICMP6_MINLEN)) {
1212                         BUMP_MIB(ill->ill_ip_mib, ipIfStatsInTruncatedPkts);
1213                         ip_drop_input("ipIfStatsInTruncatedPkts", mp, ill);
1214                         freemsg(mp);
1215                         return;
1216                 }
1217                 ip6h = ip_pullup(mp, ip_hdr_length + ICMP6_MINLEN, ira);
1218                 if (ip6h == NULL) {
1219                         BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors);
1220                         freemsg(mp);
1221                         return;
1222                 }
1223         }
1224         icmp6 = (icmp6_t *)(&mp->b_rptr[ip_hdr_length]);
1225 
1226         if (icmp6->icmp6_type != ND_NEIGHBOR_SOLICIT &&
1227             icmp6->icmp6_type != ND_NEIGHBOR_ADVERT) {
1228                 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
1229                 ip_drop_input("ipIfStatsInDiscards - noaccept", mp, ill);
1230                 freemsg(mp);
1231                 return;
1232         }
1233         ire_recv_local_v6(ire, mp, ip6h, ira);
1234 }
1235 
1236 /*
1237  * ire_recvfn for IRE_MULTICAST.
1238  */
1239 void
1240 ire_recv_multicast_v6(ire_t *ire, mblk_t *mp, void *iph_arg,
1241     ip_recv_attr_t *ira)
1242 {
1243         ip6_t           *ip6h = (ip6_t *)iph_arg;
1244         ill_t           *ill = ira->ira_ill;
1245 
1246         ASSERT(ire->ire_ill == ira->ira_ill);
1247 
1248         BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInMcastPkts);
1249         UPDATE_MIB(ill->ill_ip_mib, ipIfStatsHCInMcastOctets, ira->ira_pktlen);
1250 
1251         /* Tag for higher-level protocols */
1252         ira->ira_flags |= IRAF_MULTICAST;
1253 
1254         /*
1255          * So that we don't end up with dups, only one ill an IPMP group is
1256          * nominated to receive multicast traffic.
1257          * If we have no cast_ill we are liberal and accept everything.
1258          */
1259         if (IS_UNDER_IPMP(ill)) {
1260                 ip_stack_t      *ipst = ill->ill_ipst;
1261 
1262                 /* For an under ill_grp can change under lock */
1263                 rw_enter(&ipst->ips_ill_g_lock, RW_READER);
1264                 if (!ill->ill_nom_cast && ill->ill_grp != NULL &&
1265                     ill->ill_grp->ig_cast_ill != NULL) {
1266                         rw_exit(&ipst->ips_ill_g_lock);
1267                         ip_drop_input("not on cast ill", mp, ill);
1268                         freemsg(mp);
1269                         return;
1270                 }
1271                 rw_exit(&ipst->ips_ill_g_lock);
1272                 /*
1273                  * We switch to the upper ill so that mrouter and hasmembers
1274                  * can operate on upper here and in ip_input_multicast.
1275                  */
1276                 ill = ipmp_ill_hold_ipmp_ill(ill);
1277                 if (ill != NULL) {
1278                         ASSERT(ill != ira->ira_ill);
1279                         ASSERT(ire->ire_ill == ira->ira_ill);
1280                         ira->ira_ill = ill;
1281                         ira->ira_ruifindex = ill->ill_phyint->phyint_ifindex;
1282                 } else {
1283                         ill = ira->ira_ill;
1284                 }
1285         }
1286 
1287 #ifdef notdef
1288         /*
1289          * Check if we are a multicast router - send ip_mforward a copy of
1290          * the packet.
1291          * Due to mroute_decap tunnels we consider forwarding packets even if
1292          * mrouted has not joined the allmulti group on this interface.
1293          */
1294         if (ipst->ips_ip_g_mrouter) {
1295                 int retval;
1296 
1297                 /*
1298                  * Clear the indication that this may have hardware
1299                  * checksum as we are not using it for forwarding.
1300                  */
1301                 DB_CKSUMFLAGS(mp) = 0;
1302 
1303                 /*
1304                  * ip_mforward helps us make these distinctions: If received
1305                  * on tunnel and not IGMP, then drop.
1306                  * If IGMP packet, then don't check membership
1307                  * If received on a phyint and IGMP or PIM, then
1308                  * don't check membership
1309                  */
1310                 retval = ip_mforward_v6(mp, ira);
1311                 /* ip_mforward updates mib variables if needed */
1312 
1313                 switch (retval) {
1314                 case 0:
1315                         /*
1316                          * pkt is okay and arrived on phyint.
1317                          */
1318                         break;
1319                 case -1:
1320                         /* pkt is mal-formed, toss it */
1321                         freemsg(mp);
1322                         goto done;
1323                 case 1:
1324                         /*
1325                          * pkt is okay and arrived on a tunnel
1326                          *
1327                          * If we are running a multicast router
1328                          * we need to see all mld packets, which
1329                          * are marked with router alerts.
1330                          */
1331                         if (ira->ira_flags & IRAF_ROUTER_ALERT)
1332                                 goto forus;
1333                         ip_drop_input("Multicast on tunnel ignored", mp, ill);
1334                         freemsg(mp);
1335                         goto done;
1336                 }
1337         }
1338 #endif /* notdef */
1339 
1340         /*
1341          * If this was a router alert we skip the group membership check.
1342          */
1343         if (ira->ira_flags & IRAF_ROUTER_ALERT)
1344                 goto forus;
1345 
1346         /*
1347          * Check if we have members on this ill. This is not necessary for
1348          * correctness because even if the NIC/GLD had a leaky filter, we
1349          * filter before passing to each conn_t.
1350          */
1351         if (!ill_hasmembers_v6(ill, &ip6h->ip6_dst)) {
1352                 /*
1353                  * Nobody interested
1354                  *
1355                  * This might just be caused by the fact that
1356                  * multiple IP Multicast addresses map to the same
1357                  * link layer multicast - no need to increment counter!
1358                  */
1359                 ip_drop_input("Multicast with no members", mp, ill);
1360                 freemsg(mp);
1361                 goto done;
1362         }
1363 forus:
1364         ip2dbg(("ire_recv_multicast_v6: multicast for us\n"));
1365 
1366         /*
1367          * After reassembly and IPsec we will need to duplicate the
1368          * multicast packet for all matching zones on the ill.
1369          */
1370         ira->ira_zoneid = ALL_ZONES;
1371 
1372         /* Reassemble on the ill on which the packet arrived */
1373         ip_input_local_v6(ire, mp, ip6h, ira);
1374 done:
1375         if (ill != ire->ire_ill) {
1376                 ill_refrele(ill);
1377                 ira->ira_ill = ire->ire_ill;
1378                 ira->ira_ruifindex = ira->ira_ill->ill_phyint->phyint_ifindex;
1379         }
1380 }
1381 
1382 /*
1383  * ire_recvfn for IRE_OFFLINK with RTF_MULTIRT.
1384  * Drop packets since we don't forward out multirt routes.
1385  */
1386 /* ARGSUSED */
1387 void
1388 ire_recv_multirt_v6(ire_t *ire, mblk_t *mp, void *iph_arg, ip_recv_attr_t *ira)
1389 {
1390         ill_t           *ill = ira->ira_ill;
1391 
1392         BUMP_MIB(ill->ill_ip_mib, ipIfStatsInNoRoutes);
1393         ip_drop_input("Not forwarding out MULTIRT", mp, ill);
1394         freemsg(mp);
1395 }
1396 
1397 /*
1398  * ire_recvfn for IRE_LOOPBACK. This is only used when a FW_HOOK
1399  * has rewritten the packet to have a loopback destination address (We
1400  * filter out packet with a loopback destination from arriving over the wire).
1401  * We don't know what zone to use, thus we always use the GLOBAL_ZONEID.
1402  */
1403 void
1404 ire_recv_loopback_v6(ire_t *ire, mblk_t *mp, void *iph_arg, ip_recv_attr_t *ira)
1405 {
1406         ip6_t           *ip6h = (ip6_t *)iph_arg;
1407         ill_t           *ill = ira->ira_ill;
1408         ill_t           *ire_ill = ire->ire_ill;
1409 
1410         ira->ira_zoneid = GLOBAL_ZONEID;
1411 
1412         /* Switch to the lo0 ill for further processing  */
1413         if (ire_ill != ill) {
1414                 /*
1415                  * Update ira_ill to be the ILL on which the IP address
1416                  * is hosted.
1417                  * No need to hold the ill since we have a hold on the ire
1418                  */
1419                 ASSERT(ira->ira_ill == ira->ira_rill);
1420                 ira->ira_ill = ire_ill;
1421 
1422                 ip_input_local_v6(ire, mp, ip6h, ira);
1423 
1424                 /* Restore */
1425                 ASSERT(ira->ira_ill == ire_ill);
1426                 ira->ira_ill = ill;
1427                 return;
1428 
1429         }
1430         ip_input_local_v6(ire, mp, ip6h, ira);
1431 }
1432 
1433 /*
1434  * ire_recvfn for IRE_LOCAL.
1435  */
1436 void
1437 ire_recv_local_v6(ire_t *ire, mblk_t *mp, void *iph_arg, ip_recv_attr_t *ira)
1438 {
1439         ip6_t           *ip6h = (ip6_t *)iph_arg;
1440         ill_t           *ill = ira->ira_ill;
1441         ill_t           *ire_ill = ire->ire_ill;
1442 
1443         /* Make a note for DAD that this address is in use */
1444         ire->ire_last_used_time = LBOLT_FASTPATH;
1445 
1446         /* Only target the IRE_LOCAL with the right zoneid. */
1447         ira->ira_zoneid = ire->ire_zoneid;
1448 
1449         /*
1450          * If the packet arrived on the wrong ill, we check that
1451          * this is ok.
1452          * If it is, then we ensure that we do the reassembly on
1453          * the ill on which the address is hosted. We keep ira_rill as
1454          * the one on which the packet arrived, so that IP_PKTINFO and
1455          * friends can report this.
1456          */
1457         if (ire_ill != ill) {
1458                 ire_t *new_ire;
1459 
1460                 new_ire = ip_check_multihome(&ip6h->ip6_dst, ire, ill);
1461                 if (new_ire == NULL) {
1462                         /* Drop packet */
1463                         BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits);
1464                         ip_drop_input("ipIfStatsInForwProhibits", mp, ill);
1465                         freemsg(mp);
1466                         return;
1467                 }
1468                 /*
1469                  * Update ira_ill to be the ILL on which the IP address
1470                  * is hosted. No need to hold the ill since we have a
1471                  * hold on the ire. Note that we do the switch even if
1472                  * new_ire == ire (for IPMP, ire would be the one corresponding
1473                  * to the IPMP ill).
1474                  */
1475                 ASSERT(ira->ira_ill == ira->ira_rill);
1476                 ira->ira_ill = new_ire->ire_ill;
1477 
1478                 /* ira_ruifindex tracks the upper for ira_rill */
1479                 if (IS_UNDER_IPMP(ill))
1480                         ira->ira_ruifindex = ill_get_upper_ifindex(ill);
1481 
1482                 ip_input_local_v6(new_ire, mp, ip6h, ira);
1483 
1484                 /* Restore */
1485                 ASSERT(ira->ira_ill == new_ire->ire_ill);
1486                 ira->ira_ill = ill;
1487                 ira->ira_ruifindex = ill->ill_phyint->phyint_ifindex;
1488 
1489                 if (new_ire != ire)
1490                         ire_refrele(new_ire);
1491                 return;
1492         }
1493 
1494         ip_input_local_v6(ire, mp, ip6h, ira);
1495 }
1496 
1497 /*
1498  * Common function for packets arriving for the host. Handles
1499  * checksum verification, reassembly checks, etc.
1500  */
1501 static void
1502 ip_input_local_v6(ire_t *ire, mblk_t *mp, ip6_t *ip6h, ip_recv_attr_t *ira)
1503 {
1504         iaflags_t       iraflags = ira->ira_flags;
1505 
1506         /*
1507          * For multicast we need some extra work before
1508          * we call ip_fanout_v6(), since in the case of shared-IP zones
1509          * we need to pretend that a packet arrived for each zoneid.
1510          */
1511         if (iraflags & IRAF_MULTICAST) {
1512                 ip_input_multicast_v6(ire, mp, ip6h, ira);
1513                 return;
1514         }
1515         ip_fanout_v6(mp, ip6h, ira);
1516 }
1517 
1518 /*
1519  * Handle multiple zones which want to receive the same multicast packets
1520  * on this ill by delivering a packet to each of them.
1521  *
1522  * Note that for packets delivered to transports we could instead do this
1523  * as part of the fanout code, but since we need to handle icmp_inbound
1524  * it is simpler to have multicast work the same as IPv4 broadcast.
1525  *
1526  * The ip_fanout matching for multicast matches based on ilm independent of
1527  * zoneid since the zoneid restriction is applied when joining a multicast
1528  * group.
1529  */
1530 /* ARGSUSED */
1531 static void
1532 ip_input_multicast_v6(ire_t *ire, mblk_t *mp, ip6_t *ip6h, ip_recv_attr_t *ira)
1533 {
1534         ill_t           *ill = ira->ira_ill;
1535         iaflags_t       iraflags = ira->ira_flags;
1536         ip_stack_t      *ipst = ill->ill_ipst;
1537         netstack_t      *ns = ipst->ips_netstack;
1538         zoneid_t        zoneid;
1539         mblk_t          *mp1;
1540         ip6_t           *ip6h1;
1541         uint_t          ira_pktlen = ira->ira_pktlen;
1542         uint16_t        ira_ip_hdr_length = ira->ira_ip_hdr_length;
1543 
1544         /* ire_recv_multicast has switched to the upper ill for IPMP */
1545         ASSERT(!IS_UNDER_IPMP(ill));
1546 
1547         /*
1548          * If we don't have more than one shared-IP zone, or if
1549          * there are no members in anything but the global zone,
1550          * then just set the zoneid and proceed.
1551          */
1552         if (ns->netstack_numzones == 1 ||
1553             !ill_hasmembers_otherzones_v6(ill, &ip6h->ip6_dst,
1554             GLOBAL_ZONEID)) {
1555                 ira->ira_zoneid = GLOBAL_ZONEID;
1556 
1557                 /* If sender didn't want this zone to receive it, drop */
1558                 if ((iraflags & IRAF_NO_LOOP_ZONEID_SET) &&
1559                     ira->ira_no_loop_zoneid == ira->ira_zoneid) {
1560                         ip_drop_input("Multicast but wrong zoneid", mp, ill);
1561                         freemsg(mp);
1562                         return;
1563                 }
1564                 ip_fanout_v6(mp, ip6h, ira);
1565                 return;
1566         }
1567 
1568         /*
1569          * Here we loop over all zoneids that have members in the group
1570          * and deliver a packet to ip_fanout for each zoneid.
1571          *
1572          * First find any members in the lowest numeric zoneid by looking for
1573          * first zoneid larger than -1 (ALL_ZONES).
1574          * We terminate the loop when we receive -1 (ALL_ZONES).
1575          */
1576         zoneid = ill_hasmembers_nextzone_v6(ill, &ip6h->ip6_dst, ALL_ZONES);
1577         for (; zoneid != ALL_ZONES;
1578             zoneid = ill_hasmembers_nextzone_v6(ill, &ip6h->ip6_dst, zoneid)) {
1579                 /*
1580                  * Avoid an extra copymsg/freemsg by skipping global zone here
1581                  * and doing that at the end.
1582                  */
1583                 if (zoneid == GLOBAL_ZONEID)
1584                         continue;
1585 
1586                 ira->ira_zoneid = zoneid;
1587 
1588                 /* If sender didn't want this zone to receive it, skip */
1589                 if ((iraflags & IRAF_NO_LOOP_ZONEID_SET) &&
1590                     ira->ira_no_loop_zoneid == ira->ira_zoneid)
1591                         continue;
1592 
1593                 mp1 = copymsg(mp);
1594                 if (mp1 == NULL) {
1595                         /* Failed to deliver to one zone */
1596                         BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
1597                         ip_drop_input("ipIfStatsInDiscards", mp, ill);
1598                         continue;
1599                 }
1600                 ip6h1 = (ip6_t *)mp1->b_rptr;
1601                 ip_fanout_v6(mp1, ip6h1, ira);
1602                 /*
1603                  * IPsec might have modified ira_pktlen and ira_ip_hdr_length
1604                  * so we restore them for a potential next iteration
1605                  */
1606                 ira->ira_pktlen = ira_pktlen;
1607                 ira->ira_ip_hdr_length = ira_ip_hdr_length;
1608         }
1609 
1610         /* Do the main ire */
1611         ira->ira_zoneid = GLOBAL_ZONEID;
1612         /* If sender didn't want this zone to receive it, drop */
1613         if ((iraflags & IRAF_NO_LOOP_ZONEID_SET) &&
1614             ira->ira_no_loop_zoneid == ira->ira_zoneid) {
1615                 ip_drop_input("Multicast but wrong zoneid", mp, ill);
1616                 freemsg(mp);
1617         } else {
1618                 ip_fanout_v6(mp, ip6h, ira);
1619         }
1620 }
1621 
1622 
1623 /*
1624  * Determine the zoneid and IRAF_TX_MAC_EXEMPTABLE if trusted extensions
1625  * is in use. Updates ira_zoneid and ira_flags as a result.
1626  */
1627 static void
1628 ip_fanout_tx_v6(mblk_t *mp, ip6_t *ip6h, uint8_t protocol, uint_t ip_hdr_length,
1629     ip_recv_attr_t *ira)
1630 {
1631         uint16_t        *up;
1632         uint16_t        lport;
1633         zoneid_t        zoneid;
1634 
1635         ASSERT(ira->ira_flags & IRAF_SYSTEM_LABELED);
1636 
1637         /*
1638          * If the packet is unlabeled we might allow read-down
1639          * for MAC_EXEMPT. Below we clear this if it is a multi-level
1640          * port (MLP).
1641          * Note that ira_tsl can be NULL here.
1642          */
1643         if (ira->ira_tsl != NULL && ira->ira_tsl->tsl_flags & TSLF_UNLABELED)
1644                 ira->ira_flags |= IRAF_TX_MAC_EXEMPTABLE;
1645 
1646         if (ira->ira_zoneid != ALL_ZONES)
1647                 return;
1648 
1649         ira->ira_flags |= IRAF_TX_SHARED_ADDR;
1650 
1651         up = (uint16_t *)((uchar_t *)ip6h + ip_hdr_length);
1652         switch (protocol) {
1653         case IPPROTO_TCP:
1654         case IPPROTO_SCTP:
1655         case IPPROTO_UDP:
1656                 /* Caller ensures this */
1657                 ASSERT(((uchar_t *)ip6h) + ip_hdr_length +4 <= mp->b_wptr);
1658 
1659                 /*
1660                  * Only these transports support MLP.
1661                  * We know their destination port numbers is in
1662                  * the same place in the header.
1663                  */
1664                 lport = up[1];
1665 
1666                 /*
1667                  * No need to handle exclusive-stack zones
1668                  * since ALL_ZONES only applies to the shared IP instance.
1669                  */
1670                 zoneid = tsol_mlp_findzone(protocol, lport);
1671                 /*
1672                  * If no shared MLP is found, tsol_mlp_findzone returns
1673                  * ALL_ZONES.  In that case, we assume it's SLP, and
1674                  * search for the zone based on the packet label.
1675                  *
1676                  * If there is such a zone, we prefer to find a
1677                  * connection in it.  Otherwise, we look for a
1678                  * MAC-exempt connection in any zone whose label
1679                  * dominates the default label on the packet.
1680                  */
1681                 if (zoneid == ALL_ZONES)
1682                         zoneid = tsol_attr_to_zoneid(ira);
1683                 else
1684                         ira->ira_flags &= ~IRAF_TX_MAC_EXEMPTABLE;
1685                 break;
1686         default:
1687                 /* Handle shared address for other protocols */
1688                 zoneid = tsol_attr_to_zoneid(ira);
1689                 break;
1690         }
1691         ira->ira_zoneid = zoneid;
1692 }
1693 
1694 /*
1695  * Increment checksum failure statistics
1696  */
1697 static void
1698 ip_input_cksum_err_v6(uint8_t protocol, uint16_t hck_flags, ill_t *ill)
1699 {
1700         ip_stack_t      *ipst = ill->ill_ipst;
1701 
1702         switch (protocol) {
1703         case IPPROTO_TCP:
1704                 BUMP_MIB(ill->ill_ip_mib, tcpIfStatsInErrs);
1705 
1706                 if (hck_flags & HCK_FULLCKSUM)
1707                         IP6_STAT(ipst, ip6_tcp_in_full_hw_cksum_err);
1708                 else if (hck_flags & HCK_PARTIALCKSUM)
1709                         IP6_STAT(ipst, ip6_tcp_in_part_hw_cksum_err);
1710                 else
1711                         IP6_STAT(ipst, ip6_tcp_in_sw_cksum_err);
1712                 break;
1713         case IPPROTO_UDP:
1714                 BUMP_MIB(ill->ill_ip_mib, udpIfStatsInCksumErrs);
1715                 if (hck_flags & HCK_FULLCKSUM)
1716                         IP6_STAT(ipst, ip6_udp_in_full_hw_cksum_err);
1717                 else if (hck_flags & HCK_PARTIALCKSUM)
1718                         IP6_STAT(ipst, ip6_udp_in_part_hw_cksum_err);
1719                 else
1720                         IP6_STAT(ipst, ip6_udp_in_sw_cksum_err);
1721                 break;
1722         case IPPROTO_ICMPV6:
1723                 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInMsgs);
1724                 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors);
1725                 break;
1726         default:
1727                 ASSERT(0);
1728                 break;
1729         }
1730 }
1731 
1732 /* Calculate the IPv6 pseudo-header checksum for TCP, UDP, and ICMPV6 */
1733 uint32_t
1734 ip_input_cksum_pseudo_v6(ip6_t *ip6h, ip_recv_attr_t *ira)
1735 {
1736         uint_t          ulp_len;
1737         uint32_t        cksum;
1738         uint8_t         protocol = ira->ira_protocol;
1739         uint16_t        ip_hdr_length = ira->ira_ip_hdr_length;
1740 
1741 #define iphs    ((uint16_t *)ip6h)
1742 
1743         switch (protocol) {
1744         case IPPROTO_TCP:
1745                 ulp_len = ira->ira_pktlen - ip_hdr_length;
1746 
1747                 /* Protocol and length */
1748                 cksum = htons(ulp_len) + IP_TCP_CSUM_COMP;
1749                 /* IP addresses */
1750                 cksum += iphs[4] + iphs[5] + iphs[6] + iphs[7] +
1751                     iphs[8] + iphs[9] + iphs[10] + iphs[11] +
1752                     iphs[12] + iphs[13] + iphs[14] + iphs[15] +
1753                     iphs[16] + iphs[17] + iphs[18] + iphs[19];
1754                 break;
1755 
1756         case IPPROTO_UDP: {
1757                 udpha_t         *udpha;
1758 
1759                 udpha = (udpha_t  *)((uchar_t *)ip6h + ip_hdr_length);
1760 
1761                 /* Protocol and length */
1762                 cksum = udpha->uha_length + IP_UDP_CSUM_COMP;
1763                 /* IP addresses */
1764                 cksum += iphs[4] + iphs[5] + iphs[6] + iphs[7] +
1765                     iphs[8] + iphs[9] + iphs[10] + iphs[11] +
1766                     iphs[12] + iphs[13] + iphs[14] + iphs[15] +
1767                     iphs[16] + iphs[17] + iphs[18] + iphs[19];
1768                 break;
1769         }
1770         case IPPROTO_ICMPV6:
1771                 ulp_len = ira->ira_pktlen - ip_hdr_length;
1772 
1773                 /* Protocol and length */
1774                 cksum = htons(ulp_len) + IP_ICMPV6_CSUM_COMP;
1775                 /* IP addresses */
1776                 cksum += iphs[4] + iphs[5] + iphs[6] + iphs[7] +
1777                     iphs[8] + iphs[9] + iphs[10] + iphs[11] +
1778                     iphs[12] + iphs[13] + iphs[14] + iphs[15] +
1779                     iphs[16] + iphs[17] + iphs[18] + iphs[19];
1780                 break;
1781         default:
1782                 cksum = 0;
1783                 break;
1784         }
1785 #undef  iphs
1786         return (cksum);
1787 }
1788 
1789 
1790 /*
1791  * Software verification of the ULP checksums.
1792  * Returns B_TRUE if ok.
1793  * Increments statistics of failed.
1794  */
1795 static boolean_t
1796 ip_input_sw_cksum_v6(mblk_t *mp, ip6_t *ip6h, ip_recv_attr_t *ira)
1797 {
1798         ip_stack_t      *ipst = ira->ira_ill->ill_ipst;
1799         uint32_t        cksum;
1800         uint8_t         protocol = ira->ira_protocol;
1801         uint16_t        ip_hdr_length = ira->ira_ip_hdr_length;
1802 
1803         IP6_STAT(ipst, ip6_in_sw_cksum);
1804 
1805         ASSERT(protocol == IPPROTO_TCP || protocol == IPPROTO_UDP ||
1806             protocol == IPPROTO_ICMPV6);
1807 
1808         cksum = ip_input_cksum_pseudo_v6(ip6h, ira);
1809         cksum = IP_CSUM(mp, ip_hdr_length, cksum);
1810         if (cksum == 0)
1811                 return (B_TRUE);
1812 
1813         ip_input_cksum_err_v6(protocol, 0, ira->ira_ill);
1814         return (B_FALSE);
1815 }
1816 
1817 /*
1818  * Verify the ULP checksums.
1819  * Returns B_TRUE if ok, or if the ULP doesn't have a well-defined checksum
1820  * algorithm.
1821  * Increments statistics if failed.
1822  */
1823 static boolean_t
1824 ip_input_cksum_v6(iaflags_t iraflags, mblk_t *mp, ip6_t *ip6h,
1825     ip_recv_attr_t *ira)
1826 {
1827         ill_t           *ill = ira->ira_rill;
1828         uint16_t        hck_flags;
1829         uint32_t        cksum;
1830         mblk_t          *mp1;
1831         uint_t          len;
1832         uint8_t         protocol = ira->ira_protocol;
1833         uint16_t        ip_hdr_length = ira->ira_ip_hdr_length;
1834 
1835 
1836         switch (protocol) {
1837         case IPPROTO_TCP:
1838         case IPPROTO_ICMPV6:
1839                 break;
1840 
1841         case IPPROTO_UDP: {
1842                 udpha_t         *udpha;
1843 
1844                 udpha = (udpha_t  *)((uchar_t *)ip6h + ip_hdr_length);
1845                 /*
1846                  *  Before going through the regular checksum
1847                  *  calculation, make sure the received checksum
1848                  *  is non-zero. RFC 2460 says, a 0x0000 checksum
1849                  *  in a UDP packet (within IPv6 packet) is invalid
1850                  *  and should be replaced by 0xffff. This makes
1851                  *  sense as regular checksum calculation will
1852                  *  pass for both the cases i.e. 0x0000 and 0xffff.
1853                  *  Removing one of the case makes error detection
1854                  *  stronger.
1855                  */
1856                 if (udpha->uha_checksum == 0) {
1857                         /* 0x0000 checksum is invalid */
1858                         BUMP_MIB(ill->ill_ip_mib, udpIfStatsInCksumErrs);
1859                         return (B_FALSE);
1860                 }
1861                 break;
1862         }
1863         case IPPROTO_SCTP: {
1864                 sctp_hdr_t      *sctph;
1865                 uint32_t        pktsum;
1866 
1867                 sctph = (sctp_hdr_t *)((uchar_t *)ip6h + ip_hdr_length);
1868 #ifdef  DEBUG
1869                 if (skip_sctp_cksum)
1870                         return (B_TRUE);
1871 #endif
1872                 pktsum = sctph->sh_chksum;
1873                 sctph->sh_chksum = 0;
1874                 cksum = sctp_cksum(mp, ip_hdr_length);
1875                 sctph->sh_chksum = pktsum;
1876                 if (cksum == pktsum)
1877                         return (B_TRUE);
1878 
1879                 /*
1880                  * Defer until later whether a bad checksum is ok
1881                  * in order to allow RAW sockets to use Adler checksum
1882                  * with SCTP.
1883                  */
1884                 ira->ira_flags |= IRAF_SCTP_CSUM_ERR;
1885                 return (B_TRUE);
1886         }
1887 
1888         default:
1889                 /* No ULP checksum to verify. */
1890                 return (B_TRUE);
1891         }
1892 
1893         /*
1894          * Revert to software checksum calculation if the interface
1895          * isn't capable of checksum offload.
1896          * We clear DB_CKSUMFLAGS when going through IPsec in ip_fanout.
1897          * Note: IRAF_NO_HW_CKSUM is not currently used.
1898          */
1899         ASSERT(!IS_IPMP(ill));
1900         if ((iraflags & IRAF_NO_HW_CKSUM) || !ILL_HCKSUM_CAPABLE(ill) ||
1901             !dohwcksum) {
1902                 return (ip_input_sw_cksum_v6(mp, ip6h, ira));
1903         }
1904 
1905         /*
1906          * We apply this for all ULP protocols. Does the HW know to
1907          * not set the flags for SCTP and other protocols.
1908          */
1909 
1910         hck_flags = DB_CKSUMFLAGS(mp);
1911 
1912         if (hck_flags & HCK_FULLCKSUM_OK) {
1913                 /*
1914                  * Hardware has already verified the checksum.
1915                  */
1916                 return (B_TRUE);
1917         }
1918 
1919         if (hck_flags & HCK_FULLCKSUM) {
1920                 /*
1921                  * Full checksum has been computed by the hardware
1922                  * and has been attached.  If the driver wants us to
1923                  * verify the correctness of the attached value, in
1924                  * order to protect against faulty hardware, compare
1925                  * it against -0 (0xFFFF) to see if it's valid.
1926                  */
1927                 cksum = DB_CKSUM16(mp);
1928                 if (cksum == 0xFFFF)
1929                         return (B_TRUE);
1930                 ip_input_cksum_err_v6(protocol, hck_flags, ira->ira_ill);
1931                 return (B_FALSE);
1932         }
1933 
1934         mp1 = mp->b_cont;
1935         if ((hck_flags & HCK_PARTIALCKSUM) &&
1936             (mp1 == NULL || mp1->b_cont == NULL) &&
1937             ip_hdr_length >= DB_CKSUMSTART(mp) &&
1938             ((len = ip_hdr_length - DB_CKSUMSTART(mp)) & 1) == 0) {
1939                 uint32_t        adj;
1940                 uchar_t         *cksum_start;
1941 
1942                 cksum = ip_input_cksum_pseudo_v6(ip6h, ira);
1943 
1944                 cksum_start = ((uchar_t *)ip6h + DB_CKSUMSTART(mp));
1945 
1946                 /*
1947                  * Partial checksum has been calculated by hardware
1948                  * and attached to the packet; in addition, any
1949                  * prepended extraneous data is even byte aligned,
1950                  * and there are at most two mblks associated with
1951                  * the packet.  If any such data exists, we adjust
1952                  * the checksum; also take care any postpended data.
1953                  */
1954                 IP_ADJCKSUM_PARTIAL(cksum_start, mp, mp1, len, adj);
1955                 /*
1956                  * One's complement subtract extraneous checksum
1957                  */
1958                 cksum += DB_CKSUM16(mp);
1959                 if (adj >= cksum)
1960                         cksum = ~(adj - cksum) & 0xFFFF;
1961                 else
1962                         cksum -= adj;
1963                 cksum = (cksum & 0xFFFF) + ((int)cksum >> 16);
1964                 cksum = (cksum & 0xFFFF) + ((int)cksum >> 16);
1965                 if (!(~cksum & 0xFFFF))
1966                         return (B_TRUE);
1967 
1968                 ip_input_cksum_err_v6(protocol, hck_flags, ira->ira_ill);
1969                 return (B_FALSE);
1970         }
1971         return (ip_input_sw_cksum_v6(mp, ip6h, ira));
1972 }
1973 
1974 
1975 /*
1976  * Handle fanout of received packets.
1977  * Unicast packets that are looped back (from ire_send_local_v6) and packets
1978  * from the wire are differentiated by checking IRAF_VERIFY_ULP_CKSUM.
1979  *
1980  * IPQoS Notes
1981  * Before sending it to the client, invoke IPPF processing. Policy processing
1982  * takes place only if the callout_position, IPP_LOCAL_IN, is enabled.
1983  */
1984 void
1985 ip_fanout_v6(mblk_t *mp, ip6_t *ip6h, ip_recv_attr_t *ira)
1986 {
1987         ill_t           *ill = ira->ira_ill;
1988         iaflags_t       iraflags = ira->ira_flags;
1989         ip_stack_t      *ipst = ill->ill_ipst;
1990         uint8_t         protocol;
1991         conn_t          *connp;
1992 #define rptr    ((uchar_t *)ip6h)
1993         uint_t          ip_hdr_length;
1994         uint_t          min_ulp_header_length;
1995         int             offset;
1996         ssize_t         len;
1997         netstack_t      *ns = ipst->ips_netstack;
1998         ipsec_stack_t   *ipss = ns->netstack_ipsec;
1999         ill_t           *rill = ira->ira_rill;
2000 
2001         ASSERT(ira->ira_pktlen == ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN);
2002 
2003         /*
2004          * We repeat this as we parse over destination options header and
2005          * fragment headers (earlier we've handled any hop-by-hop options
2006          * header.)
2007          * We update ira_protocol and ira_ip_hdr_length as we skip past
2008          * the intermediate headers; they already point past any
2009          * hop-by-hop header.
2010          */
2011 repeat:
2012         protocol = ira->ira_protocol;
2013         ip_hdr_length = ira->ira_ip_hdr_length;
2014 
2015         /*
2016          * Time for IPP once we've done reassembly and IPsec.
2017          * We skip this for loopback packets since we don't do IPQoS
2018          * on loopback.
2019          */
2020         if (IPP_ENABLED(IPP_LOCAL_IN, ipst) &&
2021             !(iraflags & IRAF_LOOPBACK) &&
2022             (protocol != IPPROTO_ESP || protocol != IPPROTO_AH ||
2023             protocol != IPPROTO_DSTOPTS || protocol != IPPROTO_ROUTING ||
2024             protocol != IPPROTO_FRAGMENT)) {
2025                 /*
2026                  * Use the interface on which the packet arrived - not where
2027                  * the IP address is hosted.
2028                  */
2029                 /* ip_process translates an IS_UNDER_IPMP */
2030                 mp = ip_process(IPP_LOCAL_IN, mp, rill, ill);
2031                 if (mp == NULL) {
2032                         /* ip_drop_packet and MIB done */
2033                         return;
2034                 }
2035         }
2036 
2037         /* Determine the minimum required size of the upper-layer header */
2038         /* Need to do this for at least the set of ULPs that TX handles. */
2039         switch (protocol) {
2040         case IPPROTO_TCP:
2041                 min_ulp_header_length = TCP_MIN_HEADER_LENGTH;
2042                 break;
2043         case IPPROTO_SCTP:
2044                 min_ulp_header_length = SCTP_COMMON_HDR_LENGTH;
2045                 break;
2046         case IPPROTO_UDP:
2047                 min_ulp_header_length = UDPH_SIZE;
2048                 break;
2049         case IPPROTO_ICMP:
2050         case IPPROTO_ICMPV6:
2051                 min_ulp_header_length = ICMPH_SIZE;
2052                 break;
2053         case IPPROTO_FRAGMENT:
2054         case IPPROTO_DSTOPTS:
2055         case IPPROTO_ROUTING:
2056                 min_ulp_header_length = MIN_EHDR_LEN;
2057                 break;
2058         default:
2059                 min_ulp_header_length = 0;
2060                 break;
2061         }
2062         /* Make sure we have the min ULP header length */
2063         len = mp->b_wptr - rptr;
2064         if (len < ip_hdr_length + min_ulp_header_length) {
2065                 if (ira->ira_pktlen < ip_hdr_length + min_ulp_header_length)
2066                         goto pkt_too_short;
2067 
2068                 IP6_STAT(ipst, ip6_recv_pullup);
2069                 ip6h = ip_pullup(mp, ip_hdr_length + min_ulp_header_length,
2070                     ira);
2071                 if (ip6h == NULL)
2072                         goto discard;
2073                 len = mp->b_wptr - rptr;
2074         }
2075 
2076         /*
2077          * If trusted extensions then determine the zoneid and TX specific
2078          * ira_flags.
2079          */
2080         if (iraflags & IRAF_SYSTEM_LABELED) {
2081                 /* This can update ira->ira_flags and ira->ira_zoneid */
2082                 ip_fanout_tx_v6(mp, ip6h, protocol, ip_hdr_length, ira);
2083                 iraflags = ira->ira_flags;
2084         }
2085 
2086 
2087         /* Verify ULP checksum. Handles TCP, UDP, and SCTP */
2088         if (iraflags & IRAF_VERIFY_ULP_CKSUM) {
2089                 if (!ip_input_cksum_v6(iraflags, mp, ip6h, ira)) {
2090                         /* Bad checksum. Stats are already incremented */
2091                         ip_drop_input("Bad ULP checksum", mp, ill);
2092                         freemsg(mp);
2093                         return;
2094                 }
2095                 /* IRAF_SCTP_CSUM_ERR could have been set */
2096                 iraflags = ira->ira_flags;
2097         }
2098         switch (protocol) {
2099         case IPPROTO_TCP:
2100                 /* For TCP, discard multicast packets. */
2101                 if (iraflags & IRAF_MULTIBROADCAST)
2102                         goto discard;
2103 
2104                 /* First mblk contains IP+TCP headers per above check */
2105                 ASSERT(len >= ip_hdr_length + TCP_MIN_HEADER_LENGTH);
2106 
2107                 /* TCP options present? */
2108                 offset = ((uchar_t *)ip6h)[ip_hdr_length + 12] >> 4;
2109                 if (offset != 5) {
2110                         if (offset < 5)
2111                                 goto discard;
2112 
2113                         /*
2114                          * There must be TCP options.
2115                          * Make sure we can grab them.
2116                          */
2117                         offset <<= 2;
2118                         offset += ip_hdr_length;
2119                         if (len < offset) {
2120                                 if (ira->ira_pktlen < offset)
2121                                         goto pkt_too_short;
2122 
2123                                 IP6_STAT(ipst, ip6_recv_pullup);
2124                                 ip6h = ip_pullup(mp, offset, ira);
2125                                 if (ip6h == NULL)
2126                                         goto discard;
2127                                 len = mp->b_wptr - rptr;
2128                         }
2129                 }
2130 
2131                 /*
2132                  * Pass up a squeue hint to tcp.
2133                  * If ira_sqp is already set (this is loopback) we leave it
2134                  * alone.
2135                  */
2136                 if (ira->ira_sqp == NULL) {
2137                         ira->ira_sqp = ip_squeue_get(ira->ira_ring);
2138                 }
2139 
2140                 /* Look for AF_INET or AF_INET6 that matches */
2141                 connp = ipcl_classify_v6(mp, IPPROTO_TCP, ip_hdr_length,
2142                     ira, ipst);
2143                 if (connp == NULL) {
2144                         /* Send the TH_RST */
2145                         BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers);
2146                         tcp_xmit_listeners_reset(mp, ira, ipst, NULL);
2147                         return;
2148                 }
2149                 if (connp->conn_incoming_ifindex != 0 &&
2150                     connp->conn_incoming_ifindex != ira->ira_ruifindex) {
2151                         CONN_DEC_REF(connp);
2152 
2153                         /* Send the TH_RST */
2154                         BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers);
2155                         tcp_xmit_listeners_reset(mp, ira, ipst, NULL);
2156                         return;
2157                 }
2158                 if (CONN_INBOUND_POLICY_PRESENT_V6(connp, ipss) ||
2159                     (iraflags & IRAF_IPSEC_SECURE)) {
2160                         mp = ipsec_check_inbound_policy(mp, connp,
2161                             NULL, ip6h, ira);
2162                         if (mp == NULL) {
2163                                 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
2164                                 /* Note that mp is NULL */
2165                                 ip_drop_input("ipIfStatsInDiscards", mp, ill);
2166                                 CONN_DEC_REF(connp);
2167                                 return;
2168                         }
2169                 }
2170                 /* Found a client; up it goes */
2171                 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers);
2172                 ira->ira_ill = ira->ira_rill = NULL;
2173                 if (!IPCL_IS_TCP(connp)) {
2174                         /* Not TCP; must be SOCK_RAW, IPPROTO_TCP */
2175                         (connp->conn_recv)(connp, mp, NULL, ira);
2176                         CONN_DEC_REF(connp);
2177                         ira->ira_ill = ill;
2178                         ira->ira_rill = rill;
2179                         return;
2180                 }
2181 
2182                 /*
2183                  * We do different processing whether called from
2184                  * ip_accept_tcp and we match the target, don't match
2185                  * the target, and when we are called by ip_input.
2186                  */
2187                 if (iraflags & IRAF_TARGET_SQP) {
2188                         if (ira->ira_target_sqp == connp->conn_sqp) {
2189                                 mblk_t  *attrmp;
2190 
2191                                 attrmp = ip_recv_attr_to_mblk(ira);
2192                                 if (attrmp == NULL) {
2193                                         BUMP_MIB(ill->ill_ip_mib,
2194                                             ipIfStatsInDiscards);
2195                                         ip_drop_input("ipIfStatsInDiscards",
2196                                             mp, ill);
2197                                         freemsg(mp);
2198                                         CONN_DEC_REF(connp);
2199                                 } else {
2200                                         SET_SQUEUE(attrmp, connp->conn_recv,
2201                                             connp);
2202                                         attrmp->b_cont = mp;
2203                                         ASSERT(ira->ira_target_sqp_mp == NULL);
2204                                         ira->ira_target_sqp_mp = attrmp;
2205                                         /*
2206                                          * Conn ref release when drained from
2207                                          * the squeue.
2208                                          */
2209                                 }
2210                         } else {
2211                                 SQUEUE_ENTER_ONE(connp->conn_sqp, mp,
2212                                     connp->conn_recv, connp, ira, SQ_FILL,
2213                                     SQTAG_IP6_TCP_INPUT);
2214                         }
2215                 } else {
2216                         SQUEUE_ENTER_ONE(connp->conn_sqp, mp, connp->conn_recv,
2217                             connp, ira, ip_squeue_flag, SQTAG_IP6_TCP_INPUT);
2218                 }
2219                 ira->ira_ill = ill;
2220                 ira->ira_rill = rill;
2221                 return;
2222 
2223         case IPPROTO_SCTP: {
2224                 sctp_hdr_t      *sctph;
2225                 uint32_t        ports;  /* Source and destination ports */
2226                 sctp_stack_t    *sctps = ipst->ips_netstack->netstack_sctp;
2227 
2228                 /* For SCTP, discard multicast packets. */
2229                 if (iraflags & IRAF_MULTIBROADCAST)
2230                         goto discard;
2231 
2232                 /*
2233                  * Since there is no SCTP h/w cksum support yet, just
2234                  * clear the flag.
2235                  */
2236                 DB_CKSUMFLAGS(mp) = 0;
2237 
2238                 /* Length ensured above */
2239                 ASSERT(MBLKL(mp) >= ip_hdr_length + SCTP_COMMON_HDR_LENGTH);
2240                 sctph = (sctp_hdr_t *)(rptr + ip_hdr_length);
2241 
2242                 /* get the ports */
2243                 ports = *(uint32_t *)&sctph->sh_sport;
2244 
2245                 if (iraflags & IRAF_SCTP_CSUM_ERR) {
2246                         /*
2247                          * No potential sctp checksum errors go to the Sun
2248                          * sctp stack however they might be Adler-32 summed
2249                          * packets a userland stack bound to a raw IP socket
2250                          * could reasonably use. Note though that Adler-32 is
2251                          * a long deprecated algorithm and customer sctp
2252                          * networks should eventually migrate to CRC-32 at
2253                          * which time this facility should be removed.
2254                          */
2255                         ip_fanout_sctp_raw(mp, NULL, ip6h, ports, ira);
2256                         return;
2257                 }
2258                 connp = sctp_fanout(&ip6h->ip6_src, &ip6h->ip6_dst, ports,
2259                     ira, mp, sctps, sctph);
2260                 if (connp == NULL) {
2261                         /* Check for raw socket or OOTB handling */
2262                         ip_fanout_sctp_raw(mp, NULL, ip6h, ports, ira);
2263                         return;
2264                 }
2265                 if (connp->conn_incoming_ifindex != 0 &&
2266                     connp->conn_incoming_ifindex != ira->ira_ruifindex) {
2267                         CONN_DEC_REF(connp);
2268 
2269                         /* Check for raw socket or OOTB handling */
2270                         ip_fanout_sctp_raw(mp, NULL, ip6h, ports, ira);
2271                         return;
2272                 }
2273 
2274                 /* Found a client; up it goes */
2275                 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers);
2276                 sctp_input(connp, NULL, ip6h, mp, ira);
2277                 /* sctp_input does a rele of the sctp_t */
2278                 return;
2279         }
2280 
2281         case IPPROTO_UDP:
2282                 /* First mblk contains IP+UDP headers as checked above */
2283                 ASSERT(MBLKL(mp) >= ip_hdr_length + UDPH_SIZE);
2284 
2285                 if (iraflags & IRAF_MULTIBROADCAST) {
2286                         uint16_t *up;   /* Pointer to ports in ULP header */
2287 
2288                         up = (uint16_t *)((uchar_t *)ip6h + ip_hdr_length);
2289 
2290                         ip_fanout_udp_multi_v6(mp, ip6h, up[1], up[0], ira);
2291                         return;
2292                 }
2293 
2294                 /* Look for AF_INET or AF_INET6 that matches */
2295                 connp = ipcl_classify_v6(mp, IPPROTO_UDP, ip_hdr_length,
2296                     ira, ipst);
2297                 if (connp == NULL) {
2298         no_udp_match:
2299                         if (ipst->ips_ipcl_proto_fanout_v6[IPPROTO_UDP].
2300                             connf_head != NULL) {
2301                                 ASSERT(ira->ira_protocol == IPPROTO_UDP);
2302                                 ip_fanout_proto_v6(mp, ip6h, ira);
2303                         } else {
2304                                 ip_fanout_send_icmp_v6(mp, ICMP6_DST_UNREACH,
2305                                     ICMP6_DST_UNREACH_NOPORT, ira);
2306                         }
2307                         return;
2308 
2309                 }
2310                 if (connp->conn_incoming_ifindex != 0 &&
2311                     connp->conn_incoming_ifindex != ira->ira_ruifindex) {
2312                         CONN_DEC_REF(connp);
2313                         goto no_udp_match;
2314                 }
2315                 if (IPCL_IS_NONSTR(connp) ? connp->conn_flow_cntrld :
2316                     !canputnext(connp->conn_rq)) {
2317                         CONN_DEC_REF(connp);
2318                         BUMP_MIB(ill->ill_ip_mib, udpIfStatsInOverflows);
2319                         ip_drop_input("udpIfStatsInOverflows", mp, ill);
2320                         freemsg(mp);
2321                         return;
2322                 }
2323                 if (CONN_INBOUND_POLICY_PRESENT_V6(connp, ipss) ||
2324                     (iraflags & IRAF_IPSEC_SECURE)) {
2325                         mp = ipsec_check_inbound_policy(mp, connp,
2326                             NULL, ip6h, ira);
2327                         if (mp == NULL) {
2328                                 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
2329                                 /* Note that mp is NULL */
2330                                 ip_drop_input("ipIfStatsInDiscards", mp, ill);
2331                                 CONN_DEC_REF(connp);
2332                                 return;
2333                         }
2334                 }
2335 
2336                 /* Found a client; up it goes */
2337                 IP6_STAT(ipst, ip6_udp_fannorm);
2338                 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers);
2339                 ira->ira_ill = ira->ira_rill = NULL;
2340                 (connp->conn_recv)(connp, mp, NULL, ira);
2341                 CONN_DEC_REF(connp);
2342                 ira->ira_ill = ill;
2343                 ira->ira_rill = rill;
2344                 return;
2345         default:
2346                 break;
2347         }
2348 
2349         /*
2350          * Clear hardware checksumming flag as it is currently only
2351          * used by TCP and UDP.
2352          */
2353         DB_CKSUMFLAGS(mp) = 0;
2354 
2355         switch (protocol) {
2356         case IPPROTO_ICMPV6:
2357                 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInMsgs);
2358 
2359                 /* Check variable for testing applications */
2360                 if (ipst->ips_ipv6_drop_inbound_icmpv6) {
2361                         ip_drop_input("ipv6_drop_inbound_icmpv6", mp, ill);
2362                         freemsg(mp);
2363                         return;
2364                 }
2365                 /*
2366                  * We need to accomodate icmp messages coming in clear
2367                  * until we get everything secure from the wire. If
2368                  * icmp_accept_clear_messages is zero we check with
2369                  * the global policy and act accordingly. If it is
2370                  * non-zero, we accept the message without any checks.
2371                  * But *this does not mean* that this will be delivered
2372                  * to RAW socket clients. By accepting we might send
2373                  * replies back, change our MTU value etc.,
2374                  * but delivery to the ULP/clients depends on their
2375                  * policy dispositions.
2376                  */
2377                 if (ipst->ips_icmp_accept_clear_messages == 0) {
2378                         mp = ipsec_check_global_policy(mp, NULL,
2379                             NULL, ip6h, ira, ns);
2380                         if (mp == NULL)
2381                                 return;
2382                 }
2383 
2384                 /*
2385                  * On a labeled system, we have to check whether the zone
2386                  * itself is permitted to receive raw traffic.
2387                  */
2388                 if (ira->ira_flags & IRAF_SYSTEM_LABELED) {
2389                         if (!tsol_can_accept_raw(mp, ira, B_FALSE)) {
2390                                 BUMP_MIB(ill->ill_icmp6_mib,
2391                                     ipv6IfIcmpInErrors);
2392                                 ip_drop_input("tsol_can_accept_raw", mp, ill);
2393                                 freemsg(mp);
2394                                 return;
2395                         }
2396                 }
2397 
2398                 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers);
2399                 mp = icmp_inbound_v6(mp, ira);
2400                 if (mp == NULL) {
2401                         /* No need to pass to RAW sockets */
2402                         return;
2403                 }
2404                 break;
2405 
2406         case IPPROTO_DSTOPTS: {
2407                 ip6_dest_t      *desthdr;
2408                 uint_t          ehdrlen;
2409                 uint8_t         *optptr;
2410 
2411                 /* We already check for MIN_EHDR_LEN above */
2412 
2413                 /* Check if AH is present and needs to be processed. */
2414                 mp = ipsec_early_ah_v6(mp, ira);
2415                 if (mp == NULL)
2416                         return;
2417 
2418                 /*
2419                  * Reinitialize pointers, as ipsec_early_ah_v6() does
2420                  * complete pullups.  We don't have to do more pullups
2421                  * as a result.
2422                  */
2423                 ip6h = (ip6_t *)mp->b_rptr;
2424 
2425                 if (ira->ira_pktlen - ip_hdr_length < MIN_EHDR_LEN)
2426                         goto pkt_too_short;
2427 
2428                 if (mp->b_cont != NULL &&
2429                     rptr + ip_hdr_length + MIN_EHDR_LEN > mp->b_wptr) {
2430                         ip6h = ip_pullup(mp, ip_hdr_length + MIN_EHDR_LEN, ira);
2431                         if (ip6h == NULL)
2432                                 goto discard;
2433                 }
2434                 desthdr = (ip6_dest_t *)(rptr + ip_hdr_length);
2435                 ehdrlen = 8 * (desthdr->ip6d_len + 1);
2436                 if (ira->ira_pktlen - ip_hdr_length < ehdrlen)
2437                         goto pkt_too_short;
2438                 if (mp->b_cont != NULL &&
2439                     rptr + IPV6_HDR_LEN + ehdrlen > mp->b_wptr) {
2440                         ip6h = ip_pullup(mp, IPV6_HDR_LEN + ehdrlen, ira);
2441                         if (ip6h == NULL)
2442                                 goto discard;
2443 
2444                         desthdr = (ip6_dest_t *)(rptr + ip_hdr_length);
2445                 }
2446                 optptr = (uint8_t *)&desthdr[1];
2447 
2448                 /*
2449                  * Update ira_ip_hdr_length to skip the destination header
2450                  * when we repeat.
2451                  */
2452                 ira->ira_ip_hdr_length += ehdrlen;
2453 
2454                 ira->ira_protocol = desthdr->ip6d_nxt;
2455 
2456                 /*
2457                  * Note: XXX This code does not seem to make
2458                  * distinction between Destination Options Header
2459                  * being before/after Routing Header which can
2460                  * happen if we are at the end of source route.
2461                  * This may become significant in future.
2462                  * (No real significant Destination Options are
2463                  * defined/implemented yet ).
2464                  */
2465                 switch (ip_process_options_v6(mp, ip6h, optptr,
2466                     ehdrlen - 2, IPPROTO_DSTOPTS, ira)) {
2467                 case -1:
2468                         /*
2469                          * Packet has been consumed and any needed
2470                          * ICMP errors sent.
2471                          */
2472                         return;
2473                 case 0:
2474                         /* No action needed  continue */
2475                         break;
2476                 case 1:
2477                         /*
2478                          * Unnexpected return value
2479                          * (Router alert is a Hop-by-Hop option)
2480                          */
2481 #ifdef DEBUG
2482                         panic("ip_fanout_v6: router "
2483                             "alert hbh opt indication in dest opt");
2484                         /*NOTREACHED*/
2485 #else
2486                         BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
2487                         ip_drop_input("ipIfStatsInDiscards", mp, ill);
2488                         freemsg(mp);
2489                         return;
2490 #endif
2491                 }
2492                 goto repeat;
2493         }
2494         case IPPROTO_FRAGMENT: {
2495                 ip6_frag_t *fraghdr;
2496 
2497                 if (ira->ira_pktlen - ip_hdr_length < sizeof (ip6_frag_t))
2498                         goto pkt_too_short;
2499 
2500                 if (mp->b_cont != NULL &&
2501                     rptr + ip_hdr_length + sizeof (ip6_frag_t) > mp->b_wptr) {
2502                         ip6h = ip_pullup(mp,
2503                             ip_hdr_length + sizeof (ip6_frag_t), ira);
2504                         if (ip6h == NULL)
2505                                 goto discard;
2506                 }
2507 
2508                 fraghdr = (ip6_frag_t *)(rptr + ip_hdr_length);
2509                 BUMP_MIB(ill->ill_ip_mib, ipIfStatsReasmReqds);
2510 
2511                 /*
2512                  * Invoke the CGTP (multirouting) filtering module to
2513                  * process the incoming packet. Packets identified as
2514                  * duplicates must be discarded. Filtering is active
2515                  * only if the ip_cgtp_filter ndd variable is
2516                  * non-zero.
2517                  */
2518                 if (ipst->ips_ip_cgtp_filter &&
2519                     ipst->ips_ip_cgtp_filter_ops != NULL) {
2520                         int cgtp_flt_pkt;
2521                         netstackid_t stackid;
2522 
2523                         stackid = ipst->ips_netstack->netstack_stackid;
2524 
2525                         /*
2526                          * CGTP and IPMP are mutually exclusive so
2527                          * phyint_ifindex is fine here.
2528                          */
2529                         cgtp_flt_pkt =
2530                             ipst->ips_ip_cgtp_filter_ops->cfo_filter_v6(
2531                             stackid, ill->ill_phyint->phyint_ifindex,
2532                             ip6h, fraghdr);
2533                         if (cgtp_flt_pkt == CGTP_IP_PKT_DUPLICATE) {
2534                                 ip_drop_input("CGTP_IP_PKT_DUPLICATE", mp, ill);
2535                                 freemsg(mp);
2536                                 return;
2537                         }
2538                 }
2539 
2540                 /*
2541                  * Update ip_hdr_length to skip the frag header
2542                  * ip_input_fragment_v6 will determine the extension header
2543                  * prior to the fragment header and update its nexthdr value,
2544                  * and also set ira_protocol to the nexthdr that follows the
2545                  * completed fragment.
2546                  */
2547                 ip_hdr_length += sizeof (ip6_frag_t);
2548 
2549                 /*
2550                  * Make sure we have ira_l2src before we loose the original
2551                  * mblk
2552                  */
2553                 if (!(ira->ira_flags & IRAF_L2SRC_SET))
2554                         ip_setl2src(mp, ira, ira->ira_rill);
2555 
2556                 mp = ip_input_fragment_v6(mp, ip6h, fraghdr,
2557                     ira->ira_pktlen - ip_hdr_length, ira);
2558                 if (mp == NULL) {
2559                         /* Reassembly is still pending */
2560                         return;
2561                 }
2562                 BUMP_MIB(ill->ill_ip_mib, ipIfStatsReasmOKs);
2563 
2564                 /*
2565                  * The mblk chain has the frag header removed and
2566                  * ira_protocol, ira_pktlen, ira_ip_hdr_length as well as the
2567                  * IP header has been updated to refleact the result.
2568                  */
2569                 ip6h = (ip6_t *)mp->b_rptr;
2570                 ip_hdr_length = ira->ira_ip_hdr_length;
2571                 goto repeat;
2572         }
2573         case IPPROTO_HOPOPTS:
2574                 /*
2575                  * Illegal header sequence.
2576                  * (Hop-by-hop headers are processed above
2577                  *  and required to immediately follow IPv6 header)
2578                  */
2579                 ip_drop_input("ICMP_PARAM_PROBLEM", mp, ill);
2580                 icmp_param_problem_nexthdr_v6(mp, B_FALSE, ira);
2581                 return;
2582 
2583         case IPPROTO_ROUTING: {
2584                 uint_t ehdrlen;
2585                 ip6_rthdr_t *rthdr;
2586 
2587                 /* Check if AH is present and needs to be processed. */
2588                 mp = ipsec_early_ah_v6(mp, ira);
2589                 if (mp == NULL)
2590                         return;
2591 
2592                 /*
2593                  * Reinitialize pointers, as ipsec_early_ah_v6() does
2594                  * complete pullups.  We don't have to do more pullups
2595                  * as a result.
2596                  */
2597                 ip6h = (ip6_t *)mp->b_rptr;
2598 
2599                 if (ira->ira_pktlen - ip_hdr_length < MIN_EHDR_LEN)
2600                         goto pkt_too_short;
2601 
2602                 if (mp->b_cont != NULL &&
2603                     rptr + ip_hdr_length + MIN_EHDR_LEN > mp->b_wptr) {
2604                         ip6h = ip_pullup(mp, ip_hdr_length + MIN_EHDR_LEN, ira);
2605                         if (ip6h == NULL)
2606                                 goto discard;
2607                 }
2608                 rthdr = (ip6_rthdr_t *)(rptr + ip_hdr_length);
2609                 protocol = ira->ira_protocol = rthdr->ip6r_nxt;
2610                 ehdrlen = 8 * (rthdr->ip6r_len + 1);
2611                 if (ira->ira_pktlen - ip_hdr_length < ehdrlen)
2612                         goto pkt_too_short;
2613                 if (mp->b_cont != NULL &&
2614                     rptr + IPV6_HDR_LEN + ehdrlen > mp->b_wptr) {
2615                         ip6h = ip_pullup(mp, IPV6_HDR_LEN + ehdrlen, ira);
2616                         if (ip6h == NULL)
2617                                 goto discard;
2618                         rthdr = (ip6_rthdr_t *)(rptr + ip_hdr_length);
2619                 }
2620                 if (rthdr->ip6r_segleft != 0) {
2621                         /* Not end of source route */
2622                         if (ira->ira_flags &
2623                             (IRAF_L2DST_MULTICAST|IRAF_L2DST_BROADCAST)) {
2624                                 BUMP_MIB(ill->ill_ip_mib,
2625                                     ipIfStatsForwProhibits);
2626                                 ip_drop_input("ipIfStatsInForwProhibits",
2627                                     mp, ill);
2628                                 freemsg(mp);
2629                                 return;
2630                         }
2631                         ip_process_rthdr(mp, ip6h, rthdr, ira);
2632                         return;
2633                 }
2634                 ira->ira_ip_hdr_length += ehdrlen;
2635                 goto repeat;
2636         }
2637 
2638         case IPPROTO_AH:
2639         case IPPROTO_ESP: {
2640                 /*
2641                  * Fast path for AH/ESP.
2642                  */
2643                 netstack_t *ns = ipst->ips_netstack;
2644                 ipsec_stack_t *ipss = ns->netstack_ipsec;
2645 
2646                 IP_STAT(ipst, ipsec_proto_ahesp);
2647 
2648                 if (!ipsec_loaded(ipss)) {
2649                         ip_proto_not_sup(mp, ira);
2650                         return;
2651                 }
2652 
2653                 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers);
2654                 /* select inbound SA and have IPsec process the pkt */
2655                 if (protocol == IPPROTO_ESP) {
2656                         esph_t *esph;
2657 
2658                         mp = ipsec_inbound_esp_sa(mp, ira, &esph);
2659                         if (mp == NULL)
2660                                 return;
2661 
2662                         ASSERT(esph != NULL);
2663                         ASSERT(ira->ira_flags & IRAF_IPSEC_SECURE);
2664                         ASSERT(ira->ira_ipsec_esp_sa != NULL);
2665                         ASSERT(ira->ira_ipsec_esp_sa->ipsa_input_func != NULL);
2666 
2667                         mp = ira->ira_ipsec_esp_sa->ipsa_input_func(mp, esph,
2668                             ira);
2669                 } else {
2670                         ah_t *ah;
2671 
2672                         mp = ipsec_inbound_ah_sa(mp, ira, &ah);
2673                         if (mp == NULL)
2674                                 return;
2675 
2676                         ASSERT(ah != NULL);
2677                         ASSERT(ira->ira_flags & IRAF_IPSEC_SECURE);
2678                         ASSERT(ira->ira_ipsec_ah_sa != NULL);
2679                         ASSERT(ira->ira_ipsec_ah_sa->ipsa_input_func != NULL);
2680                         mp = ira->ira_ipsec_ah_sa->ipsa_input_func(mp, ah,
2681                             ira);
2682                 }
2683 
2684                 if (mp == NULL) {
2685                         /*
2686                          * Either it failed or is pending. In the former case
2687                          * ipIfStatsInDiscards was increased.
2688                          */
2689                         return;
2690                 }
2691                 /* we're done with IPsec processing, send it up */
2692                 ip_input_post_ipsec(mp, ira);
2693                 return;
2694         }
2695         case IPPROTO_NONE:
2696                 /* All processing is done. Count as "delivered". */
2697                 freemsg(mp);
2698                 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers);
2699                 return;
2700 
2701         case IPPROTO_ENCAP:
2702         case IPPROTO_IPV6:
2703                 /* iptun will verify trusted label */
2704                 connp = ipcl_classify_v6(mp, protocol, ip_hdr_length,
2705                     ira, ipst);
2706                 if (connp != NULL) {
2707                         BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers);
2708                         ira->ira_ill = ira->ira_rill = NULL;
2709                         connp->conn_recv(connp, mp, NULL, ira);
2710                         CONN_DEC_REF(connp);
2711                         ira->ira_ill = ill;
2712                         ira->ira_rill = rill;
2713                         return;
2714                 }
2715                 /* FALLTHRU */
2716         default:
2717                 /*
2718                  * On a labeled system, we have to check whether the zone
2719                  * itself is permitted to receive raw traffic.
2720                  */
2721                 if (ira->ira_flags & IRAF_SYSTEM_LABELED) {
2722                         if (!tsol_can_accept_raw(mp, ira, B_FALSE)) {
2723                                 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
2724                                 ip_drop_input("ipIfStatsInDiscards", mp, ill);
2725                                 freemsg(mp);
2726                                 return;
2727                         }
2728                 }
2729                 break;
2730         }
2731 
2732         /*
2733          * The above input functions may have returned the pulled up message.
2734          * So ip6h need to be reinitialized.
2735          */
2736         ip6h = (ip6_t *)mp->b_rptr;
2737         ira->ira_protocol = protocol;
2738         if (ipst->ips_ipcl_proto_fanout_v6[protocol].connf_head == NULL) {
2739                 /* No user-level listener for these packets packets */
2740                 ip_proto_not_sup(mp, ira);
2741                 return;
2742         }
2743 
2744         /*
2745          * Handle fanout to raw sockets.  There
2746          * can be more than one stream bound to a particular
2747          * protocol.  When this is the case, each one gets a copy
2748          * of any incoming packets.
2749          */
2750         ASSERT(ira->ira_protocol == protocol);
2751         ip_fanout_proto_v6(mp, ip6h, ira);
2752         return;
2753 
2754 pkt_too_short:
2755         BUMP_MIB(ill->ill_ip_mib, ipIfStatsInTruncatedPkts);
2756         ip_drop_input("ipIfStatsInTruncatedPkts", mp, ill);
2757         freemsg(mp);
2758         return;
2759 
2760 discard:
2761         BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
2762         ip_drop_input("ipIfStatsInDiscards", mp, ill);
2763         freemsg(mp);
2764 #undef rptr
2765 }