Print this page
    
787 Kernel panic in ip_input.c
    
      
        | Split | 
	Close | 
      
      | Expand all | 
      | Collapse all | 
    
    
          --- old/usr/src/uts/common/inet/ip/ip6_input.c
          +++ new/usr/src/uts/common/inet/ip/ip6_input.c
   1    1  /*
   2    2   * CDDL HEADER START
   3    3   *
   4    4   * The contents of this file are subject to the terms of the
   5    5   * Common Development and Distribution License (the "License").
   6    6   * You may not use this file except in compliance with the License.
   7    7   *
   8    8   * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9    9   * or http://www.opensolaris.org/os/licensing.
  10   10   * See the License for the specific language governing permissions
  11   11   * and limitations under the License.
  12   12   *
  13   13   * When distributing Covered Code, include this CDDL HEADER in each
  
    | 
      ↓ open down ↓ | 
    13 lines elided | 
    
      ↑ open up ↑ | 
  
  14   14   * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15   15   * If applicable, add the following below this CDDL HEADER, with the
  16   16   * fields enclosed by brackets "[]" replaced with your own identifying
  17   17   * information: Portions Copyright [yyyy] [name of copyright owner]
  18   18   *
  19   19   * CDDL HEADER END
  20   20   */
  21   21  
  22   22  /*
  23   23   * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved
       24 + *
       25 + * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
  24   26   */
  25   27  /* Copyright (c) 1990 Mentat Inc. */
  26   28  
  27   29  #include <sys/types.h>
  28   30  #include <sys/stream.h>
  29   31  #include <sys/dlpi.h>
  30   32  #include <sys/stropts.h>
  31   33  #include <sys/sysmacros.h>
  32   34  #include <sys/strsubr.h>
  33   35  #include <sys/strlog.h>
  34   36  #include <sys/strsun.h>
  35   37  #include <sys/zone.h>
  36   38  #define _SUN_TPI_VERSION 2
  37   39  #include <sys/tihdr.h>
  38   40  #include <sys/xti_inet.h>
  39   41  #include <sys/ddi.h>
  40   42  #include <sys/sunddi.h>
  41   43  #include <sys/cmn_err.h>
  42   44  #include <sys/debug.h>
  43   45  #include <sys/kobj.h>
  44   46  #include <sys/modctl.h>
  45   47  #include <sys/atomic.h>
  46   48  #include <sys/policy.h>
  47   49  #include <sys/priv.h>
  48   50  
  49   51  #include <sys/systm.h>
  50   52  #include <sys/param.h>
  51   53  #include <sys/kmem.h>
  52   54  #include <sys/sdt.h>
  53   55  #include <sys/socket.h>
  54   56  #include <sys/vtrace.h>
  55   57  #include <sys/isa_defs.h>
  56   58  #include <sys/mac.h>
  57   59  #include <net/if.h>
  58   60  #include <net/if_arp.h>
  59   61  #include <net/route.h>
  60   62  #include <sys/sockio.h>
  61   63  #include <netinet/in.h>
  62   64  #include <net/if_dl.h>
  63   65  
  64   66  #include <inet/common.h>
  65   67  #include <inet/mi.h>
  66   68  #include <inet/mib2.h>
  67   69  #include <inet/nd.h>
  68   70  #include <inet/arp.h>
  69   71  #include <inet/snmpcom.h>
  70   72  #include <inet/kstatcom.h>
  71   73  
  72   74  #include <netinet/igmp_var.h>
  73   75  #include <netinet/ip6.h>
  74   76  #include <netinet/icmp6.h>
  75   77  #include <netinet/sctp.h>
  76   78  
  77   79  #include <inet/ip.h>
  78   80  #include <inet/ip_impl.h>
  79   81  #include <inet/ip6.h>
  80   82  #include <inet/ip6_asp.h>
  81   83  #include <inet/optcom.h>
  82   84  #include <inet/tcp.h>
  83   85  #include <inet/tcp_impl.h>
  84   86  #include <inet/ip_multi.h>
  85   87  #include <inet/ip_if.h>
  86   88  #include <inet/ip_ire.h>
  87   89  #include <inet/ip_ftable.h>
  88   90  #include <inet/ip_rts.h>
  89   91  #include <inet/ip_ndp.h>
  90   92  #include <inet/ip_listutils.h>
  91   93  #include <netinet/igmp.h>
  92   94  #include <netinet/ip_mroute.h>
  93   95  #include <inet/ipp_common.h>
  94   96  
  95   97  #include <net/pfkeyv2.h>
  96   98  #include <inet/sadb.h>
  97   99  #include <inet/ipsec_impl.h>
  98  100  #include <inet/ipdrop.h>
  99  101  #include <inet/ip_netinfo.h>
 100  102  #include <inet/ilb_ip.h>
 101  103  #include <sys/squeue_impl.h>
 102  104  #include <sys/squeue.h>
 103  105  
 104  106  #include <sys/ethernet.h>
 105  107  #include <net/if_types.h>
 106  108  #include <sys/cpuvar.h>
 107  109  
 108  110  #include <ipp/ipp.h>
 109  111  #include <ipp/ipp_impl.h>
 110  112  #include <ipp/ipgpc/ipgpc.h>
 111  113  
 112  114  #include <sys/pattr.h>
 113  115  #include <inet/ipclassifier.h>
 114  116  #include <inet/sctp_ip.h>
 115  117  #include <inet/sctp/sctp_impl.h>
 116  118  #include <inet/udp_impl.h>
 117  119  #include <sys/sunddi.h>
 118  120  
 119  121  #include <sys/tsol/label.h>
 120  122  #include <sys/tsol/tnet.h>
 121  123  
 122  124  #include <sys/clock_impl.h>     /* For LBOLT_FASTPATH{,64} */
 123  125  
 124  126  #ifdef  DEBUG
 125  127  extern boolean_t skip_sctp_cksum;
 126  128  #endif
 127  129  
 128  130  static void     ip_input_local_v6(ire_t *, mblk_t *, ip6_t *, ip_recv_attr_t *);
 129  131  
 130  132  static void     ip_input_multicast_v6(ire_t *, mblk_t *, ip6_t *,
 131  133      ip_recv_attr_t *);
 132  134  
 133  135  #pragma inline(ip_input_common_v6, ip_input_local_v6, ip_forward_xmit_v6)
 134  136  
 135  137  /*
 136  138   * Direct read side procedure capable of dealing with chains. GLDv3 based
 137  139   * drivers call this function directly with mblk chains while STREAMS
 138  140   * read side procedure ip_rput() calls this for single packet with ip_ring
 139  141   * set to NULL to process one packet at a time.
 140  142   *
 141  143   * The ill will always be valid if this function is called directly from
 142  144   * the driver.
 143  145   *
 144  146   * If ip_input_v6() is called from GLDv3:
 145  147   *
 146  148   *   - This must be a non-VLAN IP stream.
 147  149   *   - 'mp' is either an untagged or a special priority-tagged packet.
 148  150   *   - Any VLAN tag that was in the MAC header has been stripped.
 149  151   *
 150  152   * If the IP header in packet is not 32-bit aligned, every message in the
 151  153   * chain will be aligned before further operations. This is required on SPARC
 152  154   * platform.
 153  155   */
 154  156  void
 155  157  ip_input_v6(ill_t *ill, ill_rx_ring_t *ip_ring, mblk_t *mp_chain,
 156  158      struct mac_header_info_s *mhip)
 157  159  {
 158  160          (void) ip_input_common_v6(ill, ip_ring, mp_chain, mhip, NULL, NULL,
 159  161              NULL);
 160  162  }
 161  163  
 162  164  /*
 163  165   * ip_accept_tcp_v6() - This function is called by the squeue when it retrieves
 164  166   * a chain of packets in the poll mode. The packets have gone through the
 165  167   * data link processing but not IP processing. For performance and latency
 166  168   * reasons, the squeue wants to process the chain in line instead of feeding
 167  169   * it back via ip_input path.
 168  170   *
 169  171   * We set up the ip_recv_attr_t with IRAF_TARGET_SQP to that ip_fanout_v6
 170  172   * will pass back any TCP packets matching the target sqp to
 171  173   * ip_input_common_v6 using ira_target_sqp_mp. Other packets are handled by
 172  174   * ip_input_v6 and ip_fanout_v6 as normal.
 173  175   * The TCP packets that match the target squeue are returned to the caller
 174  176   * as a b_next chain after each packet has been prepend with an mblk
 175  177   * from ip_recv_attr_to_mblk.
 176  178   */
 177  179  mblk_t *
 178  180  ip_accept_tcp_v6(ill_t *ill, ill_rx_ring_t *ip_ring, squeue_t *target_sqp,
 179  181      mblk_t *mp_chain, mblk_t **last, uint_t *cnt)
 180  182  {
 181  183          return (ip_input_common_v6(ill, ip_ring, mp_chain, NULL, target_sqp,
 182  184              last, cnt));
 183  185  }
 184  186  
 185  187  /*
 186  188   * Used by ip_input_v6 and ip_accept_tcp_v6
 187  189   * The last three arguments are only used by ip_accept_tcp_v6, and mhip is
 188  190   * only used by ip_input_v6.
 189  191   */
 190  192  mblk_t *
 191  193  ip_input_common_v6(ill_t *ill, ill_rx_ring_t *ip_ring, mblk_t *mp_chain,
 192  194      struct mac_header_info_s *mhip, squeue_t *target_sqp,
 193  195      mblk_t **last, uint_t *cnt)
 194  196  {
 195  197          mblk_t          *mp;
 196  198          ip6_t           *ip6h;
 197  199          ip_recv_attr_t  iras;   /* Receive attributes */
 198  200          rtc_t           rtc;
 199  201          iaflags_t       chain_flags = 0;        /* Fixed for chain */
 200  202          mblk_t          *ahead = NULL;  /* Accepted head */
 201  203          mblk_t          *atail = NULL;  /* Accepted tail */
 202  204          uint_t          acnt = 0;       /* Accepted count */
 203  205  
 204  206          ASSERT(mp_chain != NULL);
 205  207          ASSERT(ill != NULL);
 206  208  
 207  209          /* These ones do not change as we loop over packets */
 208  210          iras.ira_ill = iras.ira_rill = ill;
 209  211          iras.ira_ruifindex = ill->ill_phyint->phyint_ifindex;
 210  212          iras.ira_rifindex = iras.ira_ruifindex;
 211  213          iras.ira_sqp = NULL;
 212  214          iras.ira_ring = ip_ring;
 213  215          /* For ECMP and outbound transmit ring selection */
 214  216          iras.ira_xmit_hint = ILL_RING_TO_XMIT_HINT(ip_ring);
 215  217  
 216  218          iras.ira_target_sqp = target_sqp;
 217  219          iras.ira_target_sqp_mp = NULL;
 218  220          if (target_sqp != NULL)
 219  221                  chain_flags |= IRAF_TARGET_SQP;
 220  222  
 221  223          /*
 222  224           * We try to have a mhip pointer when possible, but
 223  225           * it might be NULL in some cases. In those cases we
 224  226           * have to assume unicast.
 225  227           */
 226  228          iras.ira_mhip = mhip;
 227  229          iras.ira_flags = 0;
 228  230          if (mhip != NULL) {
 229  231                  switch (mhip->mhi_dsttype) {
 230  232                  case MAC_ADDRTYPE_MULTICAST :
 231  233                          chain_flags |= IRAF_L2DST_MULTICAST;
 232  234                          break;
 233  235                  case MAC_ADDRTYPE_BROADCAST :
 234  236                          chain_flags |= IRAF_L2DST_BROADCAST;
 235  237                          break;
 236  238                  }
 237  239          }
 238  240  
 239  241          /*
 240  242           * Initialize the one-element route cache.
 241  243           *
 242  244           * We do ire caching from one iteration to
 243  245           * another. In the event the packet chain contains
 244  246           * all packets from the same dst, this caching saves
 245  247           * an ire_route_recursive for each of the succeeding
 246  248           * packets in a packet chain.
 247  249           */
 248  250          rtc.rtc_ire = NULL;
 249  251          rtc.rtc_ip6addr = ipv6_all_zeros;
 250  252  
 251  253          /* Loop over b_next */
 252  254          for (mp = mp_chain; mp != NULL; mp = mp_chain) {
 253  255                  mp_chain = mp->b_next;
 254  256                  mp->b_next = NULL;
 255  257  
 256  258                  /*
 257  259                   * if db_ref > 1 then copymsg and free original. Packet
 258  260                   * may be changed and we do not want the other entity
 259  261                   * who has a reference to this message to trip over the
 260  262                   * changes. This is a blind change because trying to
 261  263                   * catch all places that might change the packet is too
 262  264                   * difficult.
 263  265                   *
 264  266                   * This corresponds to the fast path case, where we have
 265  267                   * a chain of M_DATA mblks.  We check the db_ref count
 266  268                   * of only the 1st data block in the mblk chain. There
 267  269                   * doesn't seem to be a reason why a device driver would
 268  270                   * send up data with varying db_ref counts in the mblk
 269  271                   * chain. In any case the Fast path is a private
 270  272                   * interface, and our drivers don't do such a thing.
 271  273                   * Given the above assumption, there is no need to walk
 272  274                   * down the entire mblk chain (which could have a
 273  275                   * potential performance problem)
 274  276                   *
 275  277                   * The "(DB_REF(mp) > 1)" check was moved from ip_rput()
 276  278                   * to here because of exclusive ip stacks and vnics.
 277  279                   * Packets transmitted from exclusive stack over vnic
 278  280                   * can have db_ref > 1 and when it gets looped back to
 279  281                   * another vnic in a different zone, you have ip_input()
 280  282                   * getting dblks with db_ref > 1. So if someone
 281  283                   * complains of TCP performance under this scenario,
 282  284                   * take a serious look here on the impact of copymsg().
 283  285                   */
 284  286                  if (DB_REF(mp) > 1) {
 285  287                          if ((mp = ip_fix_dbref(mp, &iras)) == NULL)
 286  288                                  continue;
 287  289                  }
 288  290  
 289  291                  /*
 290  292                   * IP header ptr not aligned?
 291  293                   * OR IP header not complete in first mblk
 292  294                   */
 293  295                  ip6h = (ip6_t *)mp->b_rptr;
 294  296                  if (!OK_32PTR(ip6h) || MBLKL(mp) < IPV6_HDR_LEN) {
 295  297                          mp = ip_check_and_align_header(mp, IPV6_HDR_LEN, &iras);
 296  298                          if (mp == NULL)
 297  299                                  continue;
 298  300                          ip6h = (ip6_t *)mp->b_rptr;
 299  301                  }
 300  302  
 301  303                  /* Protect against a mix of Ethertypes and IP versions */
 302  304                  if (IPH_HDR_VERSION(ip6h) != IPV6_VERSION) {
 303  305                          BUMP_MIB(ill->ill_ip_mib, ipIfStatsInHdrErrors);
 304  306                          ip_drop_input("ipIfStatsInHdrErrors", mp, ill);
 305  307                          freemsg(mp);
 306  308                          /* mhip might point into 1st packet in the chain. */
 307  309                          iras.ira_mhip = NULL;
 308  310                          continue;
 309  311                  }
 310  312  
 311  313                  /*
 312  314                   * Check for Martian addrs; we have to explicitly
 313  315                   * test for for zero dst since this is also used as
 314  316                   * an indication that the rtc is not used.
 315  317                   */
 316  318                  if (IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_dst)) {
 317  319                          BUMP_MIB(ill->ill_ip_mib, ipIfStatsInAddrErrors);
 318  320                          ip_drop_input("ipIfStatsInAddrErrors", mp, ill);
 319  321                          freemsg(mp);
 320  322                          /* mhip might point into 1st packet in the chain. */
 321  323                          iras.ira_mhip = NULL;
 322  324                          continue;
 323  325                  }
 324  326                  /*
 325  327                   * Keep L2SRC from a previous packet in chain since mhip
 326  328                   * might point into an earlier packet in the chain.
 327  329                   */
 328  330                  chain_flags |= (iras.ira_flags & IRAF_L2SRC_SET);
 329  331  
 330  332                  iras.ira_flags = IRAF_VERIFY_ULP_CKSUM | chain_flags;
 331  333                  iras.ira_free_flags = 0;
 332  334                  iras.ira_cred = NULL;
 333  335                  iras.ira_cpid = NOPID;
 334  336                  iras.ira_tsl = NULL;
 335  337                  iras.ira_zoneid = ALL_ZONES;    /* Default for forwarding */
 336  338  
 337  339                  /*
 338  340                   * We must count all incoming packets, even if they end
 339  341                   * up being dropped later on. Defer counting bytes until
 340  342                   * we have the whole IP header in first mblk.
 341  343                   */
 342  344                  BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInReceives);
 343  345  
 344  346                  iras.ira_pktlen = ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN;
 345  347                  UPDATE_MIB(ill->ill_ip_mib, ipIfStatsHCInOctets,
 346  348                      iras.ira_pktlen);
 347  349  
 348  350                  /*
 349  351                   * Call one of:
 350  352                   *      ill_input_full_v6
 351  353                   *      ill_input_short_v6
 352  354                   * The former is used in the case of TX. See ill_set_inputfn().
 353  355                   */
 354  356                  (*ill->ill_inputfn)(mp, ip6h, &ip6h->ip6_dst, &iras, &rtc);
 355  357  
 356  358                  /* Any references to clean up? No hold on ira_ill */
 357  359                  if (iras.ira_flags & (IRAF_IPSEC_SECURE|IRAF_SYSTEM_LABELED))
 358  360                          ira_cleanup(&iras, B_FALSE);
 359  361  
 360  362                  if (iras.ira_target_sqp_mp != NULL) {
 361  363                          /* Better be called from ip_accept_tcp */
 362  364                          ASSERT(target_sqp != NULL);
 363  365  
 364  366                          /* Found one packet to accept */
 365  367                          mp = iras.ira_target_sqp_mp;
 366  368                          iras.ira_target_sqp_mp = NULL;
 367  369                          ASSERT(ip_recv_attr_is_mblk(mp));
 368  370  
 369  371                          if (atail != NULL)
 370  372                                  atail->b_next = mp;
 371  373                          else
 372  374                                  ahead = mp;
 373  375                          atail = mp;
 374  376                          acnt++;
 375  377                          mp = NULL;
 376  378                  }
 377  379                  /* mhip might point into 1st packet in the chain. */
 378  380                  iras.ira_mhip = NULL;
 379  381          }
 380  382          /* Any remaining references to the route cache? */
 381  383          if (rtc.rtc_ire != NULL) {
 382  384                  ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&rtc.rtc_ip6addr));
 383  385                  ire_refrele(rtc.rtc_ire);
 384  386          }
 385  387  
 386  388          if (ahead != NULL) {
 387  389                  /* Better be called from ip_accept_tcp */
 388  390                  ASSERT(target_sqp != NULL);
 389  391                  *last = atail;
 390  392                  *cnt = acnt;
 391  393                  return (ahead);
 392  394          }
 393  395  
 394  396          return (NULL);
 395  397  }
 396  398  
 397  399  /*
 398  400   * This input function is used when
 399  401   *  - is_system_labeled()
 400  402   *
 401  403   * Note that for IPv6 CGTP filtering is handled only when receiving fragment
 402  404   * headers, and RSVP uses router alert options, thus we don't need anything
 403  405   * extra for them.
 404  406   */
 405  407  void
 406  408  ill_input_full_v6(mblk_t *mp, void *iph_arg, void *nexthop_arg,
 407  409      ip_recv_attr_t *ira, rtc_t *rtc)
 408  410  {
 409  411          ip6_t           *ip6h = (ip6_t *)iph_arg;
 410  412          in6_addr_t      *nexthop = (in6_addr_t *)nexthop_arg;
 411  413          ill_t           *ill = ira->ira_ill;
 412  414  
 413  415          ASSERT(ira->ira_tsl == NULL);
 414  416  
 415  417          /*
 416  418           * Attach any necessary label information to
 417  419           * this packet
 418  420           */
 419  421          if (is_system_labeled()) {
 420  422                  ira->ira_flags |= IRAF_SYSTEM_LABELED;
 421  423  
 422  424                  /*
 423  425                   * This updates ira_cred, ira_tsl and ira_free_flags based
 424  426                   * on the label.
 425  427                   */
 426  428                  if (!tsol_get_pkt_label(mp, IPV6_VERSION, ira)) {
 427  429                          if (ip6opt_ls != 0)
 428  430                                  ip0dbg(("tsol_get_pkt_label v6 failed\n"));
 429  431                          BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
 430  432                          ip_drop_input("ipIfStatsInDiscards", mp, ill);
 431  433                          freemsg(mp);
 432  434                          return;
 433  435                  }
 434  436                  /* Note that ira_tsl can be NULL here. */
 435  437  
 436  438                  /* tsol_get_pkt_label sometimes does pullupmsg */
 437  439                  ip6h = (ip6_t *)mp->b_rptr;
 438  440          }
 439  441          ill_input_short_v6(mp, ip6h, nexthop, ira, rtc);
 440  442  }
 441  443  
 442  444  /*
 443  445   * Check for IPv6 addresses that should not appear on the wire
 444  446   * as either source or destination.
 445  447   * If we ever implement Stateless IPv6 Translators (SIIT) we'd have
 446  448   * to revisit the IPv4-mapped part.
 447  449   */
 448  450  static boolean_t
 449  451  ip6_bad_address(in6_addr_t *addr, boolean_t is_src)
 450  452  {
 451  453          if (IN6_IS_ADDR_V4MAPPED(addr)) {
 452  454                  ip1dbg(("ip_input_v6: pkt with IPv4-mapped addr"));
 453  455                  return (B_TRUE);
 454  456          }
 455  457          if (IN6_IS_ADDR_LOOPBACK(addr)) {
 456  458                  ip1dbg(("ip_input_v6: pkt with loopback addr"));
 457  459                  return (B_TRUE);
 458  460          }
 459  461          if (!is_src && IN6_IS_ADDR_UNSPECIFIED(addr)) {
 460  462                  /*
 461  463                   * having :: in the src is ok: it's used for DAD.
 462  464                   */
 463  465                  ip1dbg(("ip_input_v6: pkt with unspecified addr"));
 464  466                  return (B_TRUE);
 465  467          }
 466  468          return (B_FALSE);
 467  469  }
 468  470  
 469  471  /*
 470  472   * Routing lookup for IPv6 link-locals.
 471  473   * First we look on the inbound interface, then we check for IPMP and
 472  474   * look on the upper interface.
 473  475   * We update ira_ruifindex if we find the IRE on the upper interface.
 474  476   */
 475  477  static ire_t *
 476  478  ire_linklocal(const in6_addr_t *nexthop, ill_t *ill, ip_recv_attr_t *ira,
 477  479      uint_t irr_flags, ip_stack_t *ipst)
 478  480  {
 479  481          int match_flags = MATCH_IRE_SECATTR | MATCH_IRE_ILL;
 480  482          ire_t *ire;
 481  483  
 482  484          ASSERT(IN6_IS_ADDR_LINKLOCAL(nexthop));
 483  485          ire = ire_route_recursive_v6(nexthop, 0, ill, ALL_ZONES, ira->ira_tsl,
 484  486              match_flags, irr_flags, ira->ira_xmit_hint, ipst, NULL, NULL, NULL);
 485  487          if (!(ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE)) ||
 486  488              !IS_UNDER_IPMP(ill))
 487  489                  return (ire);
 488  490  
 489  491          /*
 490  492           * When we are using IMP we need to look for an IRE on both the
 491  493           * under and upper interfaces since there are different
 492  494           * link-local addresses for the under and upper.
 493  495           */
 494  496          ill = ipmp_ill_hold_ipmp_ill(ill);
 495  497          if (ill == NULL)
 496  498                  return (ire);
 497  499  
 498  500          ira->ira_ruifindex = ill->ill_phyint->phyint_ifindex;
 499  501  
 500  502          ire_refrele(ire);
 501  503          ire = ire_route_recursive_v6(nexthop, 0, ill, ALL_ZONES, ira->ira_tsl,
 502  504              match_flags, irr_flags, ira->ira_xmit_hint, ipst, NULL, NULL, NULL);
 503  505          ill_refrele(ill);
 504  506          return (ire);
 505  507  }
 506  508  
 507  509  /*
 508  510   * This is the tail-end of the full receive side packet handling.
 509  511   * It can be used directly when the configuration is simple.
 510  512   */
 511  513  void
 512  514  ill_input_short_v6(mblk_t *mp, void *iph_arg, void *nexthop_arg,
 513  515      ip_recv_attr_t *ira, rtc_t *rtc)
 514  516  {
 515  517          ire_t           *ire;
 516  518          ill_t           *ill = ira->ira_ill;
 517  519          ip_stack_t      *ipst = ill->ill_ipst;
 518  520          uint_t          pkt_len;
 519  521          ssize_t         len;
 520  522          ip6_t           *ip6h = (ip6_t *)iph_arg;
 521  523          in6_addr_t      nexthop = *(in6_addr_t *)nexthop_arg;
 522  524          ilb_stack_t     *ilbs = ipst->ips_netstack->netstack_ilb;
 523  525          uint_t          irr_flags;
 524  526  #define rptr    ((uchar_t *)ip6h)
 525  527  
 526  528          ASSERT(DB_TYPE(mp) == M_DATA);
 527  529  
 528  530          /*
 529  531           * Check for source/dest being a bad address: loopback, any, or
 530  532           * v4mapped. All of them start with a 64 bits of zero.
 531  533           */
 532  534          if (ip6h->ip6_src.s6_addr32[0] == 0 &&
 533  535              ip6h->ip6_src.s6_addr32[1] == 0) {
 534  536                  if (ip6_bad_address(&ip6h->ip6_src, B_TRUE)) {
 535  537                          ip1dbg(("ip_input_v6: pkt with bad src addr\n"));
 536  538                          BUMP_MIB(ill->ill_ip_mib, ipIfStatsInAddrErrors);
 537  539                          ip_drop_input("ipIfStatsInAddrErrors", mp, ill);
 538  540                          freemsg(mp);
 539  541                          return;
 540  542                  }
 541  543          }
 542  544          if (ip6h->ip6_dst.s6_addr32[0] == 0 &&
 543  545              ip6h->ip6_dst.s6_addr32[1] == 0) {
 544  546                  if (ip6_bad_address(&ip6h->ip6_dst, B_FALSE)) {
 545  547                          ip1dbg(("ip_input_v6: pkt with bad dst addr\n"));
 546  548                          BUMP_MIB(ill->ill_ip_mib, ipIfStatsInAddrErrors);
 547  549                          ip_drop_input("ipIfStatsInAddrErrors", mp, ill);
 548  550                          freemsg(mp);
 549  551                          return;
 550  552                  }
 551  553          }
 552  554  
 553  555          len = mp->b_wptr - rptr;
 554  556          pkt_len = ira->ira_pktlen;
 555  557  
 556  558          /* multiple mblk or too short */
 557  559          len -= pkt_len;
 558  560          if (len != 0) {
 559  561                  mp = ip_check_length(mp, rptr, len, pkt_len, IPV6_HDR_LEN, ira);
 560  562                  if (mp == NULL)
 561  563                          return;
 562  564                  ip6h = (ip6_t *)mp->b_rptr;
 563  565          }
 564  566  
 565  567          DTRACE_IP7(receive, mblk_t *, mp, conn_t *, NULL, void_ip_t *,
 566  568              ip6h, __dtrace_ipsr_ill_t *, ill, ipha_t *, NULL, ip6_t *, ip6h,
 567  569              int, 0);
 568  570          /*
 569  571           * The event for packets being received from a 'physical'
 570  572           * interface is placed after validation of the source and/or
 571  573           * destination address as being local so that packets can be
 572  574           * redirected to loopback addresses using ipnat.
 573  575           */
 574  576          DTRACE_PROBE4(ip6__physical__in__start,
 575  577              ill_t *, ill, ill_t *, NULL,
 576  578              ip6_t *, ip6h, mblk_t *, mp);
 577  579  
 578  580          if (HOOKS6_INTERESTED_PHYSICAL_IN(ipst)) {
 579  581                  int     ll_multicast = 0;
 580  582                  int     error;
 581  583                  in6_addr_t orig_dst = ip6h->ip6_dst;
 582  584  
 583  585                  if (ira->ira_flags & IRAF_L2DST_MULTICAST)
 584  586                          ll_multicast = HPE_MULTICAST;
 585  587                  else if (ira->ira_flags & IRAF_L2DST_BROADCAST)
 586  588                          ll_multicast = HPE_BROADCAST;
 587  589  
 588  590                  FW_HOOKS6(ipst->ips_ip6_physical_in_event,
 589  591                      ipst->ips_ipv6firewall_physical_in,
 590  592                      ill, NULL, ip6h, mp, mp, ll_multicast, ipst, error);
 591  593  
 592  594                  DTRACE_PROBE1(ip6__physical__in__end, mblk_t *, mp);
 593  595  
 594  596                  if (mp == NULL)
 595  597                          return;
 596  598  
 597  599                  /* The length could have changed */
 598  600                  ip6h = (ip6_t *)mp->b_rptr;
 599  601                  ira->ira_pktlen = ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN;
 600  602                  pkt_len = ira->ira_pktlen;
 601  603  
 602  604                  /*
 603  605                   * In case the destination changed we override any previous
 604  606                   * change to nexthop.
 605  607                   */
 606  608                  if (!IN6_ARE_ADDR_EQUAL(&orig_dst, &ip6h->ip6_dst))
 607  609                          nexthop = ip6h->ip6_dst;
 608  610  
 609  611                  if (IN6_IS_ADDR_UNSPECIFIED(&nexthop)) {
 610  612                          BUMP_MIB(ill->ill_ip_mib, ipIfStatsInAddrErrors);
 611  613                          ip_drop_input("ipIfStatsInAddrErrors", mp, ill);
 612  614                          freemsg(mp);
 613  615                          return;
 614  616                  }
 615  617  
 616  618          }
 617  619  
 618  620          if (ipst->ips_ip6_observe.he_interested) {
 619  621                  zoneid_t dzone;
 620  622  
 621  623                  /*
 622  624                   * On the inbound path the src zone will be unknown as
 623  625                   * this packet has come from the wire.
 624  626                   */
 625  627                  dzone = ip_get_zoneid_v6(&nexthop, mp, ill, ira, ALL_ZONES);
 626  628                  ipobs_hook(mp, IPOBS_HOOK_INBOUND, ALL_ZONES, dzone, ill, ipst);
 627  629          }
 628  630  
 629  631          if ((ip6h->ip6_vcf & IPV6_VERS_AND_FLOW_MASK) !=
 630  632              IPV6_DEFAULT_VERS_AND_FLOW) {
 631  633                  BUMP_MIB(ill->ill_ip_mib, ipIfStatsInHdrErrors);
 632  634                  BUMP_MIB(ill->ill_ip_mib, ipIfStatsInWrongIPVersion);
 633  635                  ip_drop_input("ipIfStatsInWrongIPVersion", mp, ill);
 634  636                  freemsg(mp);
 635  637                  return;
 636  638          }
 637  639  
 638  640          /*
 639  641           * For IPv6 we update ira_ip_hdr_length and ira_protocol as
 640  642           * we parse the headers, starting with the hop-by-hop options header.
 641  643           */
 642  644          ira->ira_ip_hdr_length = IPV6_HDR_LEN;
 643  645          if ((ira->ira_protocol = ip6h->ip6_nxt) == IPPROTO_HOPOPTS) {
 644  646                  ip6_hbh_t       *hbhhdr;
 645  647                  uint_t          ehdrlen;
 646  648                  uint8_t         *optptr;
 647  649  
 648  650                  if (pkt_len < IPV6_HDR_LEN + MIN_EHDR_LEN) {
 649  651                          BUMP_MIB(ill->ill_ip_mib, ipIfStatsInTruncatedPkts);
 650  652                          ip_drop_input("ipIfStatsInTruncatedPkts", mp, ill);
 651  653                          freemsg(mp);
 652  654                          return;
 653  655                  }
 654  656                  if (mp->b_cont != NULL &&
 655  657                      rptr + IPV6_HDR_LEN + MIN_EHDR_LEN > mp->b_wptr) {
 656  658                          ip6h = ip_pullup(mp, IPV6_HDR_LEN + MIN_EHDR_LEN, ira);
 657  659                          if (ip6h == NULL) {
 658  660                                  BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
 659  661                                  ip_drop_input("ipIfStatsInDiscards", mp, ill);
 660  662                                  freemsg(mp);
 661  663                                  return;
 662  664                          }
 663  665                  }
 664  666                  hbhhdr = (ip6_hbh_t *)&ip6h[1];
 665  667                  ehdrlen = 8 * (hbhhdr->ip6h_len + 1);
 666  668  
 667  669                  if (pkt_len < IPV6_HDR_LEN + ehdrlen) {
 668  670                          BUMP_MIB(ill->ill_ip_mib, ipIfStatsInTruncatedPkts);
 669  671                          ip_drop_input("ipIfStatsInTruncatedPkts", mp, ill);
 670  672                          freemsg(mp);
 671  673                          return;
 672  674                  }
 673  675                  if (mp->b_cont != NULL &&
 674  676                      rptr + IPV6_HDR_LEN + ehdrlen > mp->b_wptr) {
 675  677                          ip6h = ip_pullup(mp, IPV6_HDR_LEN + ehdrlen, ira);
 676  678                          if (ip6h == NULL) {
 677  679                                  BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
 678  680                                  ip_drop_input("ipIfStatsInDiscards", mp, ill);
 679  681                                  freemsg(mp);
 680  682                                  return;
 681  683                          }
 682  684                          hbhhdr = (ip6_hbh_t *)&ip6h[1];
 683  685                  }
 684  686  
 685  687                  /*
 686  688                   * Update ira_ip_hdr_length to skip the hop-by-hop header
 687  689                   * once we get to ip_fanout_v6
 688  690                   */
 689  691                  ira->ira_ip_hdr_length += ehdrlen;
 690  692                  ira->ira_protocol = hbhhdr->ip6h_nxt;
 691  693  
 692  694                  optptr = (uint8_t *)&hbhhdr[1];
 693  695                  switch (ip_process_options_v6(mp, ip6h, optptr,
 694  696                      ehdrlen - 2, IPPROTO_HOPOPTS, ira)) {
 695  697                  case -1:
 696  698                          /*
 697  699                           * Packet has been consumed and any
 698  700                           * needed ICMP messages sent.
 699  701                           */
 700  702                          return;
 701  703                  case 0:
 702  704                          /* no action needed */
 703  705                          break;
 704  706                  case 1:
 705  707                          /*
 706  708                           * Known router alert. Make use handle it as local
 707  709                           * by setting the nexthop to be the all-host multicast
 708  710                           * address, and skip multicast membership filter by
 709  711                           * marking as a router alert.
 710  712                           */
 711  713                          ira->ira_flags |= IRAF_ROUTER_ALERT;
 712  714                          nexthop = ipv6_all_hosts_mcast;
 713  715                          break;
 714  716                  }
 715  717          }
 716  718  
 717  719          /*
 718  720           * Here we check to see if we machine is setup as
 719  721           * L3 loadbalancer and if the incoming packet is for a VIP
 720  722           *
 721  723           * Check the following:
 722  724           * - there is at least a rule
 723  725           * - protocol of the packet is supported
 724  726           *
 725  727           * We don't load balance IPv6 link-locals.
 726  728           */
 727  729          if (ilb_has_rules(ilbs) && ILB_SUPP_L4(ira->ira_protocol) &&
 728  730              !IN6_IS_ADDR_LINKLOCAL(&nexthop)) {
 729  731                  in6_addr_t      lb_dst;
 730  732                  int             lb_ret;
 731  733  
 732  734                  /* For convenience, we just pull up the mblk. */
 733  735                  if (mp->b_cont != NULL) {
 734  736                          if (pullupmsg(mp, -1) == 0) {
 735  737                                  BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
 736  738                                  ip_drop_input("ipIfStatsInDiscards - pullupmsg",
 737  739                                      mp, ill);
 738  740                                  freemsg(mp);
 739  741                                  return;
 740  742                          }
 741  743                          ip6h = (ip6_t *)mp->b_rptr;
 742  744                  }
 743  745                  lb_ret = ilb_check_v6(ilbs, ill, mp, ip6h, ira->ira_protocol,
 744  746                      (uint8_t *)ip6h + ira->ira_ip_hdr_length, &lb_dst);
 745  747                  if (lb_ret == ILB_DROPPED) {
 746  748                          BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
 747  749                          ip_drop_input("ILB_DROPPED", mp, ill);
 748  750                          freemsg(mp);
 749  751                          return;
 750  752                  }
 751  753                  if (lb_ret == ILB_BALANCED) {
 752  754                          /* Set the dst to that of the chosen server */
 753  755                          nexthop = lb_dst;
 754  756                          DB_CKSUMFLAGS(mp) = 0;
 755  757                  }
 756  758          }
 757  759  
 758  760          if (ill->ill_flags & ILLF_ROUTER)
 759  761                  irr_flags = IRR_ALLOCATE;
 760  762          else
 761  763                  irr_flags = IRR_NONE;
 762  764  
 763  765          /* Can not use route cache with TX since the labels can differ */
 764  766          if (ira->ira_flags & IRAF_SYSTEM_LABELED) {
 765  767                  if (IN6_IS_ADDR_MULTICAST(&nexthop)) {
 766  768                          ire = ire_multicast(ill);
 767  769                  } else if (IN6_IS_ADDR_LINKLOCAL(&nexthop)) {
 768  770                          ire = ire_linklocal(&nexthop, ill, ira, irr_flags,
 769  771                              ipst);
 770  772                  } else {
 771  773                          /* Match destination and label */
 772  774                          ire = ire_route_recursive_v6(&nexthop, 0, NULL,
  
    | 
      ↓ open down ↓ | 
    739 lines elided | 
    
      ↑ open up ↑ | 
  
 773  775                              ALL_ZONES, ira->ira_tsl, MATCH_IRE_SECATTR,
 774  776                              irr_flags, ira->ira_xmit_hint, ipst, NULL, NULL,
 775  777                              NULL);
 776  778                  }
 777  779                  /* Update the route cache so we do the ire_refrele */
 778  780                  ASSERT(ire != NULL);
 779  781                  if (rtc->rtc_ire != NULL)
 780  782                          ire_refrele(rtc->rtc_ire);
 781  783                  rtc->rtc_ire = ire;
 782  784                  rtc->rtc_ip6addr = nexthop;
 783      -        } else if (IN6_ARE_ADDR_EQUAL(&nexthop, &rtc->rtc_ip6addr)) {
      785 +        } else if (IN6_ARE_ADDR_EQUAL(&nexthop, &rtc->rtc_ip6addr) &&
      786 +            rtc->rtc_ire != NULL) {
 784  787                  /* Use the route cache */
 785      -                ASSERT(rtc->rtc_ire != NULL);
 786  788                  ire = rtc->rtc_ire;
 787  789          } else {
 788  790                  /* Update the route cache */
 789  791                  if (IN6_IS_ADDR_MULTICAST(&nexthop)) {
 790  792                          ire = ire_multicast(ill);
 791  793                  } else if (IN6_IS_ADDR_LINKLOCAL(&nexthop)) {
 792  794                          ire = ire_linklocal(&nexthop, ill, ira, irr_flags,
 793  795                              ipst);
 794  796                  } else {
 795  797                          ire = ire_route_recursive_dstonly_v6(&nexthop,
 796  798                              irr_flags, ira->ira_xmit_hint, ipst);
 797  799                  }
 798  800                  ASSERT(ire != NULL);
 799  801                  if (rtc->rtc_ire != NULL)
 800  802                          ire_refrele(rtc->rtc_ire);
 801  803                  rtc->rtc_ire = ire;
 802  804                  rtc->rtc_ip6addr = nexthop;
 803  805          }
 804  806  
 805  807          ire->ire_ib_pkt_count++;
 806  808  
 807  809          /*
 808  810           * Based on ire_type and ire_flags call one of:
 809  811           *      ire_recv_local_v6 - for IRE_LOCAL
 810  812           *      ire_recv_loopback_v6 - for IRE_LOOPBACK
 811  813           *      ire_recv_multirt_v6 - if RTF_MULTIRT
 812  814           *      ire_recv_noroute_v6 - if RTF_REJECT or RTF_BLACHOLE
 813  815           *      ire_recv_multicast_v6 - for IRE_MULTICAST
 814  816           *      ire_recv_noaccept_v6 - for ire_noaccept ones
 815  817           *      ire_recv_forward_v6 - for the rest.
 816  818           */
 817  819  
 818  820          (*ire->ire_recvfn)(ire, mp, ip6h, ira);
 819  821  }
 820  822  #undef rptr
 821  823  
 822  824  /*
 823  825   * ire_recvfn for IREs that need forwarding
 824  826   */
 825  827  void
 826  828  ire_recv_forward_v6(ire_t *ire, mblk_t *mp, void *iph_arg, ip_recv_attr_t *ira)
 827  829  {
 828  830          ip6_t           *ip6h = (ip6_t *)iph_arg;
 829  831          ill_t           *ill = ira->ira_ill;
 830  832          ip_stack_t      *ipst = ill->ill_ipst;
 831  833          iaflags_t       iraflags = ira->ira_flags;
 832  834          ill_t           *dst_ill;
 833  835          nce_t           *nce;
 834  836          uint32_t        added_tx_len;
 835  837          uint32_t        mtu, iremtu;
 836  838  
 837  839          if (iraflags & (IRAF_L2DST_MULTICAST|IRAF_L2DST_BROADCAST)) {
 838  840                  BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits);
 839  841                  ip_drop_input("l2 multicast not forwarded", mp, ill);
 840  842                  freemsg(mp);
 841  843                  return;
 842  844          }
 843  845  
 844  846          if (!(ill->ill_flags & ILLF_ROUTER)) {
 845  847                  BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits);
 846  848                  ip_drop_input("ipIfStatsForwProhibits", mp, ill);
 847  849                  freemsg(mp);
 848  850                  return;
 849  851          }
 850  852  
 851  853          /*
 852  854           * Either ire_nce_capable or ire_dep_parent would be set for the IRE
 853  855           * when it is found by ire_route_recursive, but that some other thread
 854  856           * could have changed the routes with the effect of clearing
 855  857           * ire_dep_parent. In that case we'd end up dropping the packet, or
 856  858           * finding a new nce below.
 857  859           * Get, allocate, or update the nce.
 858  860           * We get a refhold on ire_nce_cache as a result of this to avoid races
 859  861           * where ire_nce_cache is deleted.
 860  862           *
 861  863           * This ensures that we don't forward if the interface is down since
 862  864           * ipif_down removes all the nces.
 863  865           */
 864  866          mutex_enter(&ire->ire_lock);
 865  867          nce = ire->ire_nce_cache;
 866  868          if (nce == NULL) {
 867  869                  /* Not yet set up - try to set one up */
 868  870                  mutex_exit(&ire->ire_lock);
 869  871                  (void) ire_revalidate_nce(ire);
 870  872                  mutex_enter(&ire->ire_lock);
 871  873                  nce = ire->ire_nce_cache;
 872  874                  if (nce == NULL) {
 873  875                          mutex_exit(&ire->ire_lock);
 874  876                          /* The ire_dep_parent chain went bad, or no memory */
 875  877                          BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
 876  878                          ip_drop_input("No ire_dep_parent", mp, ill);
 877  879                          freemsg(mp);
 878  880                          return;
 879  881                  }
 880  882          }
 881  883          nce_refhold(nce);
 882  884          mutex_exit(&ire->ire_lock);
 883  885  
 884  886          if (nce->nce_is_condemned) {
 885  887                  nce_t *nce1;
 886  888  
 887  889                  nce1 = ire_handle_condemned_nce(nce, ire, NULL, ip6h, B_FALSE);
 888  890                  nce_refrele(nce);
 889  891                  if (nce1 == NULL) {
 890  892                          BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
 891  893                          ip_drop_input("No nce", mp, ill);
 892  894                          freemsg(mp);
 893  895                          return;
 894  896                  }
 895  897                  nce = nce1;
 896  898          }
 897  899          dst_ill = nce->nce_ill;
 898  900  
 899  901          /*
 900  902           * Unless we are forwarding, drop the packet.
 901  903           * Unlike IPv4 we don't allow source routed packets out the same
 902  904           * interface when we are not a router.
 903  905           * Note that ill_forward_set() will set the ILLF_ROUTER on
 904  906           * all the group members when it gets an ipmp-ill or under-ill.
 905  907           */
 906  908          if (!(dst_ill->ill_flags & ILLF_ROUTER)) {
 907  909                  BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits);
 908  910                  ip_drop_input("ipIfStatsForwProhibits", mp, ill);
 909  911                  freemsg(mp);
 910  912                  nce_refrele(nce);
 911  913                  return;
 912  914          }
 913  915  
 914  916          if (ire->ire_zoneid != GLOBAL_ZONEID && ire->ire_zoneid != ALL_ZONES) {
 915  917                  ire->ire_ib_pkt_count--;
 916  918                  /*
 917  919                   * Should only use IREs that are visible from the
 918  920                   * global zone for forwarding.
 919  921                   * For IPv6 any source route would have already been
 920  922                   * advanced in ip_fanout_v6
 921  923                   */
 922  924                  ire = ire_route_recursive_v6(&ip6h->ip6_dst, 0, NULL,
 923  925                      GLOBAL_ZONEID, ira->ira_tsl, MATCH_IRE_SECATTR,
 924  926                      (ill->ill_flags & ILLF_ROUTER) ? IRR_ALLOCATE : IRR_NONE,
 925  927                      ira->ira_xmit_hint, ipst, NULL, NULL, NULL);
 926  928                  ire->ire_ib_pkt_count++;
 927  929                  (*ire->ire_recvfn)(ire, mp, ip6h, ira);
 928  930                  ire_refrele(ire);
 929  931                  nce_refrele(nce);
 930  932                  return;
 931  933          }
 932  934          /*
 933  935           * ipIfStatsHCInForwDatagrams should only be increment if there
 934  936           * will be an attempt to forward the packet, which is why we
 935  937           * increment after the above condition has been checked.
 936  938           */
 937  939          BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInForwDatagrams);
 938  940  
 939  941          /* Initiate Read side IPPF processing */
 940  942          if (IPP_ENABLED(IPP_FWD_IN, ipst)) {
 941  943                  /* ip_process translates an IS_UNDER_IPMP */
 942  944                  mp = ip_process(IPP_FWD_IN, mp, ill, ill);
 943  945                  if (mp == NULL) {
 944  946                          /* ip_drop_packet and MIB done */
 945  947                          ip2dbg(("ire_recv_forward_v6: pkt dropped/deferred "
 946  948                              "during IPPF processing\n"));
 947  949                          nce_refrele(nce);
 948  950                          return;
 949  951                  }
 950  952          }
 951  953  
 952  954          DTRACE_PROBE4(ip6__forwarding__start,
 953  955              ill_t *, ill, ill_t *, dst_ill, ip6_t *, ip6h, mblk_t *, mp);
 954  956  
 955  957          if (HOOKS6_INTERESTED_FORWARDING(ipst)) {
 956  958                  int     error;
 957  959  
 958  960                  FW_HOOKS(ipst->ips_ip6_forwarding_event,
 959  961                      ipst->ips_ipv6firewall_forwarding,
 960  962                      ill, dst_ill, ip6h, mp, mp, 0, ipst, error);
 961  963  
 962  964                  DTRACE_PROBE1(ip6__forwarding__end, mblk_t *, mp);
 963  965  
 964  966                  if (mp == NULL) {
 965  967                          nce_refrele(nce);
 966  968                          return;
 967  969                  }
 968  970                  /*
 969  971                   * Even if the destination was changed by the filter we use the
 970  972                   * forwarding decision that was made based on the address
 971  973                   * in ip_input.
 972  974                   */
 973  975  
 974  976                  /* Might have changed */
 975  977                  ip6h = (ip6_t *)mp->b_rptr;
 976  978                  ira->ira_pktlen = ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN;
 977  979          }
 978  980  
 979  981          /* Packet is being forwarded. Turning off hwcksum flag. */
 980  982          DB_CKSUMFLAGS(mp) = 0;
 981  983  
 982  984          /*
 983  985           * Per RFC 3513 section 2.5.2, we must not forward packets with
 984  986           * an unspecified source address.
 985  987           * The loopback address check for both src and dst has already
 986  988           * been checked in ip_input_v6
 987  989           * In the future one can envision adding RPF checks using number 3.
 988  990           */
 989  991          switch (ipst->ips_src_check) {
 990  992          case 0:
 991  993                  break;
 992  994          case 1:
 993  995          case 2:
 994  996                  if (IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src) ||
 995  997                      IN6_IS_ADDR_MULTICAST(&ip6h->ip6_src)) {
 996  998                          BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits);
 997  999                          BUMP_MIB(ill->ill_ip_mib, ipIfStatsInAddrErrors);
 998 1000                          ip_drop_input("ipIfStatsInAddrErrors", mp, ill);
 999 1001                          nce_refrele(nce);
1000 1002                          freemsg(mp);
1001 1003                          return;
1002 1004                  }
1003 1005                  break;
1004 1006          }
1005 1007  
1006 1008          /*
1007 1009           * Check to see if we're forwarding the packet to a
1008 1010           * different link from which it came.  If so, check the
1009 1011           * source and destination addresses since routers must not
1010 1012           * forward any packets with link-local source or
1011 1013           * destination addresses to other links.  Otherwise (if
1012 1014           * we're forwarding onto the same link), conditionally send
1013 1015           * a redirect message.
1014 1016           */
1015 1017          if (!IS_ON_SAME_LAN(dst_ill, ill)) {
1016 1018                  if (IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_dst) ||
1017 1019                      IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_src)) {
1018 1020                          BUMP_MIB(ill->ill_ip_mib, ipIfStatsInAddrErrors);
1019 1021                          ip_drop_input("ipIfStatsInAddrErrors", mp, ill);
1020 1022                          freemsg(mp);
1021 1023                          nce_refrele(nce);
1022 1024                          return;
1023 1025                  }
1024 1026                  /* TBD add site-local check at site boundary? */
1025 1027          } else if (ipst->ips_ipv6_send_redirects) {
1026 1028                  ip_send_potential_redirect_v6(mp, ip6h, ire, ira);
1027 1029          }
1028 1030  
1029 1031          added_tx_len = 0;
1030 1032          if (iraflags & IRAF_SYSTEM_LABELED) {
1031 1033                  mblk_t          *mp1;
1032 1034                  uint32_t        old_pkt_len = ira->ira_pktlen;
1033 1035  
1034 1036                  /*
1035 1037                   * Check if it can be forwarded and add/remove
1036 1038                   * CIPSO options as needed.
1037 1039                   */
1038 1040                  if ((mp1 = tsol_ip_forward(ire, mp, ira)) == NULL) {
1039 1041                          BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits);
1040 1042                          ip_drop_input("tsol_ip_forward", mp, ill);
1041 1043                          freemsg(mp);
1042 1044                          nce_refrele(nce);
1043 1045                          return;
1044 1046                  }
1045 1047                  /*
1046 1048                   * Size may have changed. Remember amount added in case
1047 1049                   * ip_fragment needs to send an ICMP too big.
1048 1050                   */
1049 1051                  mp = mp1;
1050 1052                  ip6h = (ip6_t *)mp->b_rptr;
1051 1053                  ira->ira_pktlen = ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN;
1052 1054                  ira->ira_ip_hdr_length = IPV6_HDR_LEN;
1053 1055                  if (ira->ira_pktlen > old_pkt_len)
1054 1056                          added_tx_len = ira->ira_pktlen - old_pkt_len;
1055 1057          }
1056 1058  
1057 1059          mtu = dst_ill->ill_mtu;
1058 1060          if ((iremtu = ire->ire_metrics.iulp_mtu) != 0 && iremtu < mtu)
1059 1061                  mtu = iremtu;
1060 1062          ip_forward_xmit_v6(nce, mp, ip6h, ira, mtu, added_tx_len);
1061 1063          nce_refrele(nce);
1062 1064          return;
1063 1065  
1064 1066  }
1065 1067  
1066 1068  /*
1067 1069   * Used for sending out unicast and multicast packets that are
1068 1070   * forwarded.
1069 1071   */
1070 1072  void
1071 1073  ip_forward_xmit_v6(nce_t *nce, mblk_t *mp, ip6_t *ip6h, ip_recv_attr_t *ira,
1072 1074      uint32_t mtu, uint32_t added_tx_len)
1073 1075  {
1074 1076          ill_t           *dst_ill = nce->nce_ill;
1075 1077          uint32_t        pkt_len;
1076 1078          iaflags_t       iraflags = ira->ira_flags;
1077 1079          ip_stack_t      *ipst = dst_ill->ill_ipst;
1078 1080  
1079 1081          if (ip6h->ip6_hops-- <= 1) {
1080 1082                  BUMP_MIB(ira->ira_ill->ill_ip_mib, ipIfStatsInDiscards);
1081 1083                  ip_drop_input("ICMP6_TIME_EXCEED_TRANSIT", mp, ira->ira_ill);
1082 1084                  icmp_time_exceeded_v6(mp, ICMP6_TIME_EXCEED_TRANSIT, B_FALSE,
1083 1085                      ira);
1084 1086                  return;
1085 1087          }
1086 1088  
1087 1089          /* Initiate Write side IPPF processing before any fragmentation */
1088 1090          if (IPP_ENABLED(IPP_FWD_OUT, ipst)) {
1089 1091                  /* ip_process translates an IS_UNDER_IPMP */
1090 1092                  mp = ip_process(IPP_FWD_OUT, mp, dst_ill, dst_ill);
1091 1093                  if (mp == NULL) {
1092 1094                          /* ip_drop_packet and MIB done */
1093 1095                          ip2dbg(("ire_recv_forward_v6: pkt dropped/deferred" \
1094 1096                              " during IPPF processing\n"));
1095 1097                          return;
1096 1098                  }
1097 1099          }
1098 1100  
1099 1101          pkt_len = ira->ira_pktlen;
1100 1102  
1101 1103          BUMP_MIB(dst_ill->ill_ip_mib, ipIfStatsHCOutForwDatagrams);
1102 1104  
1103 1105          if (pkt_len > mtu) {
1104 1106                  BUMP_MIB(dst_ill->ill_ip_mib, ipIfStatsOutFragFails);
1105 1107                  ip_drop_output("ipIfStatsOutFragFails", mp, dst_ill);
1106 1108                  if (iraflags & IRAF_SYSTEM_LABELED) {
1107 1109                          /*
1108 1110                           * Remove any CIPSO option added by
1109 1111                           * tsol_ip_forward, and make sure we report
1110 1112                           * a path MTU so that there
1111 1113                           * is room to add such a CIPSO option for future
1112 1114                           * packets.
1113 1115                           */
1114 1116                          mtu = tsol_pmtu_adjust(mp, mtu, added_tx_len, AF_INET6);
1115 1117                  }
1116 1118                  icmp_pkt2big_v6(mp, mtu, B_TRUE, ira);
1117 1119                  return;
1118 1120          }
1119 1121  
1120 1122          ASSERT(pkt_len ==
1121 1123              ntohs(((ip6_t *)mp->b_rptr)->ip6_plen) + IPV6_HDR_LEN);
1122 1124  
1123 1125          if (iraflags & IRAF_LOOPBACK_COPY) {
1124 1126                  /*
1125 1127                   * IXAF_NO_LOOP_ZONEID is not set hence 6th arg
1126 1128                   * is don't care
1127 1129                   */
1128 1130                  (void) ip_postfrag_loopcheck(mp, nce,
1129 1131                      (IXAF_LOOPBACK_COPY | IXAF_NO_DEV_FLOW_CTL),
1130 1132                      pkt_len, ira->ira_xmit_hint, GLOBAL_ZONEID, 0, NULL);
1131 1133          } else {
1132 1134                  (void) ip_xmit(mp, nce, IXAF_NO_DEV_FLOW_CTL,
1133 1135                      pkt_len, ira->ira_xmit_hint, GLOBAL_ZONEID, 0, NULL);
1134 1136          }
1135 1137  }
1136 1138  
1137 1139  /*
1138 1140   * ire_recvfn for RTF_REJECT and RTF_BLACKHOLE routes, including IRE_NOROUTE,
1139 1141   * which is what ire_route_recursive returns when there is no matching ire.
1140 1142   * Send ICMP unreachable unless blackhole.
1141 1143   */
1142 1144  void
1143 1145  ire_recv_noroute_v6(ire_t *ire, mblk_t *mp, void *iph_arg, ip_recv_attr_t *ira)
1144 1146  {
1145 1147          ip6_t           *ip6h = (ip6_t *)iph_arg;
1146 1148          ill_t           *ill = ira->ira_ill;
1147 1149          ip_stack_t      *ipst = ill->ill_ipst;
1148 1150  
1149 1151          /* Would we have forwarded this packet if we had a route? */
1150 1152          if (ira->ira_flags & (IRAF_L2DST_MULTICAST|IRAF_L2DST_BROADCAST)) {
1151 1153                  BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits);
1152 1154                  ip_drop_input("l2 multicast not forwarded", mp, ill);
1153 1155                  freemsg(mp);
1154 1156                  return;
1155 1157          }
1156 1158  
1157 1159          if (!(ill->ill_flags & ILLF_ROUTER)) {
1158 1160                  BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits);
1159 1161                  ip_drop_input("ipIfStatsForwProhibits", mp, ill);
1160 1162                  freemsg(mp);
1161 1163                  return;
1162 1164          }
1163 1165          /*
1164 1166           * If we had a route this could have been forwarded. Count as such.
1165 1167           *
1166 1168           * ipIfStatsHCInForwDatagrams should only be increment if there
1167 1169           * will be an attempt to forward the packet, which is why we
1168 1170           * increment after the above condition has been checked.
1169 1171           */
1170 1172          BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInForwDatagrams);
1171 1173  
1172 1174          BUMP_MIB(ill->ill_ip_mib, ipIfStatsInNoRoutes);
1173 1175  
1174 1176          ip_rts_change_v6(RTM_MISS, &ip6h->ip6_dst, 0, 0, 0, 0, 0, 0, RTA_DST,
1175 1177              ipst);
1176 1178  
1177 1179          if (ire->ire_flags & RTF_BLACKHOLE) {
1178 1180                  ip_drop_input("ipIfStatsInNoRoutes RTF_BLACKHOLE", mp, ill);
1179 1181                  freemsg(mp);
1180 1182          } else {
1181 1183                  ip_drop_input("ipIfStatsInNoRoutes RTF_REJECT", mp, ill);
1182 1184  
1183 1185                  icmp_unreachable_v6(mp, ICMP6_DST_UNREACH_NOROUTE, B_FALSE,
1184 1186                      ira);
1185 1187          }
1186 1188  }
1187 1189  
1188 1190  /*
1189 1191   * ire_recvfn for IRE_LOCALs marked with ire_noaccept. Such IREs are used for
1190 1192   * VRRP when in noaccept mode.
1191 1193   * We silently drop packets except for Neighbor Solicitations and
1192 1194   * Neighbor Advertisements.
1193 1195   */
1194 1196  void
1195 1197  ire_recv_noaccept_v6(ire_t *ire, mblk_t *mp, void *iph_arg,
1196 1198      ip_recv_attr_t *ira)
1197 1199  {
1198 1200          ip6_t           *ip6h = (ip6_t *)iph_arg;
1199 1201          ill_t           *ill = ira->ira_ill;
1200 1202          icmp6_t         *icmp6;
1201 1203          int             ip_hdr_length;
1202 1204  
1203 1205          if (ip6h->ip6_nxt != IPPROTO_ICMPV6) {
1204 1206                  BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
1205 1207                  ip_drop_input("ipIfStatsInDiscards - noaccept", mp, ill);
1206 1208                  freemsg(mp);
1207 1209                  return;
1208 1210          }
1209 1211          ip_hdr_length = ira->ira_ip_hdr_length;
1210 1212          if ((mp->b_wptr - mp->b_rptr) < (ip_hdr_length + ICMP6_MINLEN)) {
1211 1213                  if (ira->ira_pktlen < (ip_hdr_length + ICMP6_MINLEN)) {
1212 1214                          BUMP_MIB(ill->ill_ip_mib, ipIfStatsInTruncatedPkts);
1213 1215                          ip_drop_input("ipIfStatsInTruncatedPkts", mp, ill);
1214 1216                          freemsg(mp);
1215 1217                          return;
1216 1218                  }
1217 1219                  ip6h = ip_pullup(mp, ip_hdr_length + ICMP6_MINLEN, ira);
1218 1220                  if (ip6h == NULL) {
1219 1221                          BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors);
1220 1222                          freemsg(mp);
1221 1223                          return;
1222 1224                  }
1223 1225          }
1224 1226          icmp6 = (icmp6_t *)(&mp->b_rptr[ip_hdr_length]);
1225 1227  
1226 1228          if (icmp6->icmp6_type != ND_NEIGHBOR_SOLICIT &&
1227 1229              icmp6->icmp6_type != ND_NEIGHBOR_ADVERT) {
1228 1230                  BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
1229 1231                  ip_drop_input("ipIfStatsInDiscards - noaccept", mp, ill);
1230 1232                  freemsg(mp);
1231 1233                  return;
1232 1234          }
1233 1235          ire_recv_local_v6(ire, mp, ip6h, ira);
1234 1236  }
1235 1237  
1236 1238  /*
1237 1239   * ire_recvfn for IRE_MULTICAST.
1238 1240   */
1239 1241  void
1240 1242  ire_recv_multicast_v6(ire_t *ire, mblk_t *mp, void *iph_arg,
1241 1243      ip_recv_attr_t *ira)
1242 1244  {
1243 1245          ip6_t           *ip6h = (ip6_t *)iph_arg;
1244 1246          ill_t           *ill = ira->ira_ill;
1245 1247  
1246 1248          ASSERT(ire->ire_ill == ira->ira_ill);
1247 1249  
1248 1250          BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInMcastPkts);
1249 1251          UPDATE_MIB(ill->ill_ip_mib, ipIfStatsHCInMcastOctets, ira->ira_pktlen);
1250 1252  
1251 1253          /* Tag for higher-level protocols */
1252 1254          ira->ira_flags |= IRAF_MULTICAST;
1253 1255  
1254 1256          /*
1255 1257           * So that we don't end up with dups, only one ill an IPMP group is
1256 1258           * nominated to receive multicast traffic.
1257 1259           * If we have no cast_ill we are liberal and accept everything.
1258 1260           */
1259 1261          if (IS_UNDER_IPMP(ill)) {
1260 1262                  ip_stack_t      *ipst = ill->ill_ipst;
1261 1263  
1262 1264                  /* For an under ill_grp can change under lock */
1263 1265                  rw_enter(&ipst->ips_ill_g_lock, RW_READER);
1264 1266                  if (!ill->ill_nom_cast && ill->ill_grp != NULL &&
1265 1267                      ill->ill_grp->ig_cast_ill != NULL) {
1266 1268                          rw_exit(&ipst->ips_ill_g_lock);
1267 1269                          ip_drop_input("not on cast ill", mp, ill);
1268 1270                          freemsg(mp);
1269 1271                          return;
1270 1272                  }
1271 1273                  rw_exit(&ipst->ips_ill_g_lock);
1272 1274                  /*
1273 1275                   * We switch to the upper ill so that mrouter and hasmembers
1274 1276                   * can operate on upper here and in ip_input_multicast.
1275 1277                   */
1276 1278                  ill = ipmp_ill_hold_ipmp_ill(ill);
1277 1279                  if (ill != NULL) {
1278 1280                          ASSERT(ill != ira->ira_ill);
1279 1281                          ASSERT(ire->ire_ill == ira->ira_ill);
1280 1282                          ira->ira_ill = ill;
1281 1283                          ira->ira_ruifindex = ill->ill_phyint->phyint_ifindex;
1282 1284                  } else {
1283 1285                          ill = ira->ira_ill;
1284 1286                  }
1285 1287          }
1286 1288  
1287 1289  #ifdef notdef
1288 1290          /*
1289 1291           * Check if we are a multicast router - send ip_mforward a copy of
1290 1292           * the packet.
1291 1293           * Due to mroute_decap tunnels we consider forwarding packets even if
1292 1294           * mrouted has not joined the allmulti group on this interface.
1293 1295           */
1294 1296          if (ipst->ips_ip_g_mrouter) {
1295 1297                  int retval;
1296 1298  
1297 1299                  /*
1298 1300                   * Clear the indication that this may have hardware
1299 1301                   * checksum as we are not using it for forwarding.
1300 1302                   */
1301 1303                  DB_CKSUMFLAGS(mp) = 0;
1302 1304  
1303 1305                  /*
1304 1306                   * ip_mforward helps us make these distinctions: If received
1305 1307                   * on tunnel and not IGMP, then drop.
1306 1308                   * If IGMP packet, then don't check membership
1307 1309                   * If received on a phyint and IGMP or PIM, then
1308 1310                   * don't check membership
1309 1311                   */
1310 1312                  retval = ip_mforward_v6(mp, ira);
1311 1313                  /* ip_mforward updates mib variables if needed */
1312 1314  
1313 1315                  switch (retval) {
1314 1316                  case 0:
1315 1317                          /*
1316 1318                           * pkt is okay and arrived on phyint.
1317 1319                           */
1318 1320                          break;
1319 1321                  case -1:
1320 1322                          /* pkt is mal-formed, toss it */
1321 1323                          freemsg(mp);
1322 1324                          goto done;
1323 1325                  case 1:
1324 1326                          /*
1325 1327                           * pkt is okay and arrived on a tunnel
1326 1328                           *
1327 1329                           * If we are running a multicast router
1328 1330                           * we need to see all mld packets, which
1329 1331                           * are marked with router alerts.
1330 1332                           */
1331 1333                          if (ira->ira_flags & IRAF_ROUTER_ALERT)
1332 1334                                  goto forus;
1333 1335                          ip_drop_input("Multicast on tunnel ignored", mp, ill);
1334 1336                          freemsg(mp);
1335 1337                          goto done;
1336 1338                  }
1337 1339          }
1338 1340  #endif /* notdef */
1339 1341  
1340 1342          /*
1341 1343           * If this was a router alert we skip the group membership check.
1342 1344           */
1343 1345          if (ira->ira_flags & IRAF_ROUTER_ALERT)
1344 1346                  goto forus;
1345 1347  
1346 1348          /*
1347 1349           * Check if we have members on this ill. This is not necessary for
1348 1350           * correctness because even if the NIC/GLD had a leaky filter, we
1349 1351           * filter before passing to each conn_t.
1350 1352           */
1351 1353          if (!ill_hasmembers_v6(ill, &ip6h->ip6_dst)) {
1352 1354                  /*
1353 1355                   * Nobody interested
1354 1356                   *
1355 1357                   * This might just be caused by the fact that
1356 1358                   * multiple IP Multicast addresses map to the same
1357 1359                   * link layer multicast - no need to increment counter!
1358 1360                   */
1359 1361                  ip_drop_input("Multicast with no members", mp, ill);
1360 1362                  freemsg(mp);
1361 1363                  goto done;
1362 1364          }
1363 1365  forus:
1364 1366          ip2dbg(("ire_recv_multicast_v6: multicast for us\n"));
1365 1367  
1366 1368          /*
1367 1369           * After reassembly and IPsec we will need to duplicate the
1368 1370           * multicast packet for all matching zones on the ill.
1369 1371           */
1370 1372          ira->ira_zoneid = ALL_ZONES;
1371 1373  
1372 1374          /* Reassemble on the ill on which the packet arrived */
1373 1375          ip_input_local_v6(ire, mp, ip6h, ira);
1374 1376  done:
1375 1377          if (ill != ire->ire_ill) {
1376 1378                  ill_refrele(ill);
1377 1379                  ira->ira_ill = ire->ire_ill;
1378 1380                  ira->ira_ruifindex = ira->ira_ill->ill_phyint->phyint_ifindex;
1379 1381          }
1380 1382  }
1381 1383  
1382 1384  /*
1383 1385   * ire_recvfn for IRE_OFFLINK with RTF_MULTIRT.
1384 1386   * Drop packets since we don't forward out multirt routes.
1385 1387   */
1386 1388  /* ARGSUSED */
1387 1389  void
1388 1390  ire_recv_multirt_v6(ire_t *ire, mblk_t *mp, void *iph_arg, ip_recv_attr_t *ira)
1389 1391  {
1390 1392          ill_t           *ill = ira->ira_ill;
1391 1393  
1392 1394          BUMP_MIB(ill->ill_ip_mib, ipIfStatsInNoRoutes);
1393 1395          ip_drop_input("Not forwarding out MULTIRT", mp, ill);
1394 1396          freemsg(mp);
1395 1397  }
1396 1398  
1397 1399  /*
1398 1400   * ire_recvfn for IRE_LOOPBACK. This is only used when a FW_HOOK
1399 1401   * has rewritten the packet to have a loopback destination address (We
1400 1402   * filter out packet with a loopback destination from arriving over the wire).
1401 1403   * We don't know what zone to use, thus we always use the GLOBAL_ZONEID.
1402 1404   */
1403 1405  void
1404 1406  ire_recv_loopback_v6(ire_t *ire, mblk_t *mp, void *iph_arg, ip_recv_attr_t *ira)
1405 1407  {
1406 1408          ip6_t           *ip6h = (ip6_t *)iph_arg;
1407 1409          ill_t           *ill = ira->ira_ill;
1408 1410          ill_t           *ire_ill = ire->ire_ill;
1409 1411  
1410 1412          ira->ira_zoneid = GLOBAL_ZONEID;
1411 1413  
1412 1414          /* Switch to the lo0 ill for further processing  */
1413 1415          if (ire_ill != ill) {
1414 1416                  /*
1415 1417                   * Update ira_ill to be the ILL on which the IP address
1416 1418                   * is hosted.
1417 1419                   * No need to hold the ill since we have a hold on the ire
1418 1420                   */
1419 1421                  ASSERT(ira->ira_ill == ira->ira_rill);
1420 1422                  ira->ira_ill = ire_ill;
1421 1423  
1422 1424                  ip_input_local_v6(ire, mp, ip6h, ira);
1423 1425  
1424 1426                  /* Restore */
1425 1427                  ASSERT(ira->ira_ill == ire_ill);
1426 1428                  ira->ira_ill = ill;
1427 1429                  return;
1428 1430  
1429 1431          }
1430 1432          ip_input_local_v6(ire, mp, ip6h, ira);
1431 1433  }
1432 1434  
1433 1435  /*
1434 1436   * ire_recvfn for IRE_LOCAL.
1435 1437   */
1436 1438  void
1437 1439  ire_recv_local_v6(ire_t *ire, mblk_t *mp, void *iph_arg, ip_recv_attr_t *ira)
1438 1440  {
1439 1441          ip6_t           *ip6h = (ip6_t *)iph_arg;
1440 1442          ill_t           *ill = ira->ira_ill;
1441 1443          ill_t           *ire_ill = ire->ire_ill;
1442 1444  
1443 1445          /* Make a note for DAD that this address is in use */
1444 1446          ire->ire_last_used_time = LBOLT_FASTPATH;
1445 1447  
1446 1448          /* Only target the IRE_LOCAL with the right zoneid. */
1447 1449          ira->ira_zoneid = ire->ire_zoneid;
1448 1450  
1449 1451          /*
1450 1452           * If the packet arrived on the wrong ill, we check that
1451 1453           * this is ok.
1452 1454           * If it is, then we ensure that we do the reassembly on
1453 1455           * the ill on which the address is hosted. We keep ira_rill as
1454 1456           * the one on which the packet arrived, so that IP_PKTINFO and
1455 1457           * friends can report this.
1456 1458           */
1457 1459          if (ire_ill != ill) {
1458 1460                  ire_t *new_ire;
1459 1461  
1460 1462                  new_ire = ip_check_multihome(&ip6h->ip6_dst, ire, ill);
1461 1463                  if (new_ire == NULL) {
1462 1464                          /* Drop packet */
1463 1465                          BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits);
1464 1466                          ip_drop_input("ipIfStatsInForwProhibits", mp, ill);
1465 1467                          freemsg(mp);
1466 1468                          return;
1467 1469                  }
1468 1470                  /*
1469 1471                   * Update ira_ill to be the ILL on which the IP address
1470 1472                   * is hosted. No need to hold the ill since we have a
1471 1473                   * hold on the ire. Note that we do the switch even if
1472 1474                   * new_ire == ire (for IPMP, ire would be the one corresponding
1473 1475                   * to the IPMP ill).
1474 1476                   */
1475 1477                  ASSERT(ira->ira_ill == ira->ira_rill);
1476 1478                  ira->ira_ill = new_ire->ire_ill;
1477 1479  
1478 1480                  /* ira_ruifindex tracks the upper for ira_rill */
1479 1481                  if (IS_UNDER_IPMP(ill))
1480 1482                          ira->ira_ruifindex = ill_get_upper_ifindex(ill);
1481 1483  
1482 1484                  ip_input_local_v6(new_ire, mp, ip6h, ira);
1483 1485  
1484 1486                  /* Restore */
1485 1487                  ASSERT(ira->ira_ill == new_ire->ire_ill);
1486 1488                  ira->ira_ill = ill;
1487 1489                  ira->ira_ruifindex = ill->ill_phyint->phyint_ifindex;
1488 1490  
1489 1491                  if (new_ire != ire)
1490 1492                          ire_refrele(new_ire);
1491 1493                  return;
1492 1494          }
1493 1495  
1494 1496          ip_input_local_v6(ire, mp, ip6h, ira);
1495 1497  }
1496 1498  
1497 1499  /*
1498 1500   * Common function for packets arriving for the host. Handles
1499 1501   * checksum verification, reassembly checks, etc.
1500 1502   */
1501 1503  static void
1502 1504  ip_input_local_v6(ire_t *ire, mblk_t *mp, ip6_t *ip6h, ip_recv_attr_t *ira)
1503 1505  {
1504 1506          iaflags_t       iraflags = ira->ira_flags;
1505 1507  
1506 1508          /*
1507 1509           * For multicast we need some extra work before
1508 1510           * we call ip_fanout_v6(), since in the case of shared-IP zones
1509 1511           * we need to pretend that a packet arrived for each zoneid.
1510 1512           */
1511 1513          if (iraflags & IRAF_MULTICAST) {
1512 1514                  ip_input_multicast_v6(ire, mp, ip6h, ira);
1513 1515                  return;
1514 1516          }
1515 1517          ip_fanout_v6(mp, ip6h, ira);
1516 1518  }
1517 1519  
1518 1520  /*
1519 1521   * Handle multiple zones which want to receive the same multicast packets
1520 1522   * on this ill by delivering a packet to each of them.
1521 1523   *
1522 1524   * Note that for packets delivered to transports we could instead do this
1523 1525   * as part of the fanout code, but since we need to handle icmp_inbound
1524 1526   * it is simpler to have multicast work the same as IPv4 broadcast.
1525 1527   *
1526 1528   * The ip_fanout matching for multicast matches based on ilm independent of
1527 1529   * zoneid since the zoneid restriction is applied when joining a multicast
1528 1530   * group.
1529 1531   */
1530 1532  /* ARGSUSED */
1531 1533  static void
1532 1534  ip_input_multicast_v6(ire_t *ire, mblk_t *mp, ip6_t *ip6h, ip_recv_attr_t *ira)
1533 1535  {
1534 1536          ill_t           *ill = ira->ira_ill;
1535 1537          iaflags_t       iraflags = ira->ira_flags;
1536 1538          ip_stack_t      *ipst = ill->ill_ipst;
1537 1539          netstack_t      *ns = ipst->ips_netstack;
1538 1540          zoneid_t        zoneid;
1539 1541          mblk_t          *mp1;
1540 1542          ip6_t           *ip6h1;
1541 1543          uint_t          ira_pktlen = ira->ira_pktlen;
1542 1544          uint16_t        ira_ip_hdr_length = ira->ira_ip_hdr_length;
1543 1545  
1544 1546          /* ire_recv_multicast has switched to the upper ill for IPMP */
1545 1547          ASSERT(!IS_UNDER_IPMP(ill));
1546 1548  
1547 1549          /*
1548 1550           * If we don't have more than one shared-IP zone, or if
1549 1551           * there are no members in anything but the global zone,
1550 1552           * then just set the zoneid and proceed.
1551 1553           */
1552 1554          if (ns->netstack_numzones == 1 ||
1553 1555              !ill_hasmembers_otherzones_v6(ill, &ip6h->ip6_dst,
1554 1556              GLOBAL_ZONEID)) {
1555 1557                  ira->ira_zoneid = GLOBAL_ZONEID;
1556 1558  
1557 1559                  /* If sender didn't want this zone to receive it, drop */
1558 1560                  if ((iraflags & IRAF_NO_LOOP_ZONEID_SET) &&
1559 1561                      ira->ira_no_loop_zoneid == ira->ira_zoneid) {
1560 1562                          ip_drop_input("Multicast but wrong zoneid", mp, ill);
1561 1563                          freemsg(mp);
1562 1564                          return;
1563 1565                  }
1564 1566                  ip_fanout_v6(mp, ip6h, ira);
1565 1567                  return;
1566 1568          }
1567 1569  
1568 1570          /*
1569 1571           * Here we loop over all zoneids that have members in the group
1570 1572           * and deliver a packet to ip_fanout for each zoneid.
1571 1573           *
1572 1574           * First find any members in the lowest numeric zoneid by looking for
1573 1575           * first zoneid larger than -1 (ALL_ZONES).
1574 1576           * We terminate the loop when we receive -1 (ALL_ZONES).
1575 1577           */
1576 1578          zoneid = ill_hasmembers_nextzone_v6(ill, &ip6h->ip6_dst, ALL_ZONES);
1577 1579          for (; zoneid != ALL_ZONES;
1578 1580              zoneid = ill_hasmembers_nextzone_v6(ill, &ip6h->ip6_dst, zoneid)) {
1579 1581                  /*
1580 1582                   * Avoid an extra copymsg/freemsg by skipping global zone here
1581 1583                   * and doing that at the end.
1582 1584                   */
1583 1585                  if (zoneid == GLOBAL_ZONEID)
1584 1586                          continue;
1585 1587  
1586 1588                  ira->ira_zoneid = zoneid;
1587 1589  
1588 1590                  /* If sender didn't want this zone to receive it, skip */
1589 1591                  if ((iraflags & IRAF_NO_LOOP_ZONEID_SET) &&
1590 1592                      ira->ira_no_loop_zoneid == ira->ira_zoneid)
1591 1593                          continue;
1592 1594  
1593 1595                  mp1 = copymsg(mp);
1594 1596                  if (mp1 == NULL) {
1595 1597                          /* Failed to deliver to one zone */
1596 1598                          BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
1597 1599                          ip_drop_input("ipIfStatsInDiscards", mp, ill);
1598 1600                          continue;
1599 1601                  }
1600 1602                  ip6h1 = (ip6_t *)mp1->b_rptr;
1601 1603                  ip_fanout_v6(mp1, ip6h1, ira);
1602 1604                  /*
1603 1605                   * IPsec might have modified ira_pktlen and ira_ip_hdr_length
1604 1606                   * so we restore them for a potential next iteration
1605 1607                   */
1606 1608                  ira->ira_pktlen = ira_pktlen;
1607 1609                  ira->ira_ip_hdr_length = ira_ip_hdr_length;
1608 1610          }
1609 1611  
1610 1612          /* Do the main ire */
1611 1613          ira->ira_zoneid = GLOBAL_ZONEID;
1612 1614          /* If sender didn't want this zone to receive it, drop */
1613 1615          if ((iraflags & IRAF_NO_LOOP_ZONEID_SET) &&
1614 1616              ira->ira_no_loop_zoneid == ira->ira_zoneid) {
1615 1617                  ip_drop_input("Multicast but wrong zoneid", mp, ill);
1616 1618                  freemsg(mp);
1617 1619          } else {
1618 1620                  ip_fanout_v6(mp, ip6h, ira);
1619 1621          }
1620 1622  }
1621 1623  
1622 1624  
1623 1625  /*
1624 1626   * Determine the zoneid and IRAF_TX_MAC_EXEMPTABLE if trusted extensions
1625 1627   * is in use. Updates ira_zoneid and ira_flags as a result.
1626 1628   */
1627 1629  static void
1628 1630  ip_fanout_tx_v6(mblk_t *mp, ip6_t *ip6h, uint8_t protocol, uint_t ip_hdr_length,
1629 1631      ip_recv_attr_t *ira)
1630 1632  {
1631 1633          uint16_t        *up;
1632 1634          uint16_t        lport;
1633 1635          zoneid_t        zoneid;
1634 1636  
1635 1637          ASSERT(ira->ira_flags & IRAF_SYSTEM_LABELED);
1636 1638  
1637 1639          /*
1638 1640           * If the packet is unlabeled we might allow read-down
1639 1641           * for MAC_EXEMPT. Below we clear this if it is a multi-level
1640 1642           * port (MLP).
1641 1643           * Note that ira_tsl can be NULL here.
1642 1644           */
1643 1645          if (ira->ira_tsl != NULL && ira->ira_tsl->tsl_flags & TSLF_UNLABELED)
1644 1646                  ira->ira_flags |= IRAF_TX_MAC_EXEMPTABLE;
1645 1647  
1646 1648          if (ira->ira_zoneid != ALL_ZONES)
1647 1649                  return;
1648 1650  
1649 1651          ira->ira_flags |= IRAF_TX_SHARED_ADDR;
1650 1652  
1651 1653          up = (uint16_t *)((uchar_t *)ip6h + ip_hdr_length);
1652 1654          switch (protocol) {
1653 1655          case IPPROTO_TCP:
1654 1656          case IPPROTO_SCTP:
1655 1657          case IPPROTO_UDP:
1656 1658                  /* Caller ensures this */
1657 1659                  ASSERT(((uchar_t *)ip6h) + ip_hdr_length +4 <= mp->b_wptr);
1658 1660  
1659 1661                  /*
1660 1662                   * Only these transports support MLP.
1661 1663                   * We know their destination port numbers is in
1662 1664                   * the same place in the header.
1663 1665                   */
1664 1666                  lport = up[1];
1665 1667  
1666 1668                  /*
1667 1669                   * No need to handle exclusive-stack zones
1668 1670                   * since ALL_ZONES only applies to the shared IP instance.
1669 1671                   */
1670 1672                  zoneid = tsol_mlp_findzone(protocol, lport);
1671 1673                  /*
1672 1674                   * If no shared MLP is found, tsol_mlp_findzone returns
1673 1675                   * ALL_ZONES.  In that case, we assume it's SLP, and
1674 1676                   * search for the zone based on the packet label.
1675 1677                   *
1676 1678                   * If there is such a zone, we prefer to find a
1677 1679                   * connection in it.  Otherwise, we look for a
1678 1680                   * MAC-exempt connection in any zone whose label
1679 1681                   * dominates the default label on the packet.
1680 1682                   */
1681 1683                  if (zoneid == ALL_ZONES)
1682 1684                          zoneid = tsol_attr_to_zoneid(ira);
1683 1685                  else
1684 1686                          ira->ira_flags &= ~IRAF_TX_MAC_EXEMPTABLE;
1685 1687                  break;
1686 1688          default:
1687 1689                  /* Handle shared address for other protocols */
1688 1690                  zoneid = tsol_attr_to_zoneid(ira);
1689 1691                  break;
1690 1692          }
1691 1693          ira->ira_zoneid = zoneid;
1692 1694  }
1693 1695  
1694 1696  /*
1695 1697   * Increment checksum failure statistics
1696 1698   */
1697 1699  static void
1698 1700  ip_input_cksum_err_v6(uint8_t protocol, uint16_t hck_flags, ill_t *ill)
1699 1701  {
1700 1702          ip_stack_t      *ipst = ill->ill_ipst;
1701 1703  
1702 1704          switch (protocol) {
1703 1705          case IPPROTO_TCP:
1704 1706                  BUMP_MIB(ill->ill_ip_mib, tcpIfStatsInErrs);
1705 1707  
1706 1708                  if (hck_flags & HCK_FULLCKSUM)
1707 1709                          IP6_STAT(ipst, ip6_tcp_in_full_hw_cksum_err);
1708 1710                  else if (hck_flags & HCK_PARTIALCKSUM)
1709 1711                          IP6_STAT(ipst, ip6_tcp_in_part_hw_cksum_err);
1710 1712                  else
1711 1713                          IP6_STAT(ipst, ip6_tcp_in_sw_cksum_err);
1712 1714                  break;
1713 1715          case IPPROTO_UDP:
1714 1716                  BUMP_MIB(ill->ill_ip_mib, udpIfStatsInCksumErrs);
1715 1717                  if (hck_flags & HCK_FULLCKSUM)
1716 1718                          IP6_STAT(ipst, ip6_udp_in_full_hw_cksum_err);
1717 1719                  else if (hck_flags & HCK_PARTIALCKSUM)
1718 1720                          IP6_STAT(ipst, ip6_udp_in_part_hw_cksum_err);
1719 1721                  else
1720 1722                          IP6_STAT(ipst, ip6_udp_in_sw_cksum_err);
1721 1723                  break;
1722 1724          case IPPROTO_ICMPV6:
1723 1725                  BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInMsgs);
1724 1726                  BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors);
1725 1727                  break;
1726 1728          default:
1727 1729                  ASSERT(0);
1728 1730                  break;
1729 1731          }
1730 1732  }
1731 1733  
1732 1734  /* Calculate the IPv6 pseudo-header checksum for TCP, UDP, and ICMPV6 */
1733 1735  uint32_t
1734 1736  ip_input_cksum_pseudo_v6(ip6_t *ip6h, ip_recv_attr_t *ira)
1735 1737  {
1736 1738          uint_t          ulp_len;
1737 1739          uint32_t        cksum;
1738 1740          uint8_t         protocol = ira->ira_protocol;
1739 1741          uint16_t        ip_hdr_length = ira->ira_ip_hdr_length;
1740 1742  
1741 1743  #define iphs    ((uint16_t *)ip6h)
1742 1744  
1743 1745          switch (protocol) {
1744 1746          case IPPROTO_TCP:
1745 1747                  ulp_len = ira->ira_pktlen - ip_hdr_length;
1746 1748  
1747 1749                  /* Protocol and length */
1748 1750                  cksum = htons(ulp_len) + IP_TCP_CSUM_COMP;
1749 1751                  /* IP addresses */
1750 1752                  cksum += iphs[4] + iphs[5] + iphs[6] + iphs[7] +
1751 1753                      iphs[8] + iphs[9] + iphs[10] + iphs[11] +
1752 1754                      iphs[12] + iphs[13] + iphs[14] + iphs[15] +
1753 1755                      iphs[16] + iphs[17] + iphs[18] + iphs[19];
1754 1756                  break;
1755 1757  
1756 1758          case IPPROTO_UDP: {
1757 1759                  udpha_t         *udpha;
1758 1760  
1759 1761                  udpha = (udpha_t  *)((uchar_t *)ip6h + ip_hdr_length);
1760 1762  
1761 1763                  /* Protocol and length */
1762 1764                  cksum = udpha->uha_length + IP_UDP_CSUM_COMP;
1763 1765                  /* IP addresses */
1764 1766                  cksum += iphs[4] + iphs[5] + iphs[6] + iphs[7] +
1765 1767                      iphs[8] + iphs[9] + iphs[10] + iphs[11] +
1766 1768                      iphs[12] + iphs[13] + iphs[14] + iphs[15] +
1767 1769                      iphs[16] + iphs[17] + iphs[18] + iphs[19];
1768 1770                  break;
1769 1771          }
1770 1772          case IPPROTO_ICMPV6:
1771 1773                  ulp_len = ira->ira_pktlen - ip_hdr_length;
1772 1774  
1773 1775                  /* Protocol and length */
1774 1776                  cksum = htons(ulp_len) + IP_ICMPV6_CSUM_COMP;
1775 1777                  /* IP addresses */
1776 1778                  cksum += iphs[4] + iphs[5] + iphs[6] + iphs[7] +
1777 1779                      iphs[8] + iphs[9] + iphs[10] + iphs[11] +
1778 1780                      iphs[12] + iphs[13] + iphs[14] + iphs[15] +
1779 1781                      iphs[16] + iphs[17] + iphs[18] + iphs[19];
1780 1782                  break;
1781 1783          default:
1782 1784                  cksum = 0;
1783 1785                  break;
1784 1786          }
1785 1787  #undef  iphs
1786 1788          return (cksum);
1787 1789  }
1788 1790  
1789 1791  
1790 1792  /*
1791 1793   * Software verification of the ULP checksums.
1792 1794   * Returns B_TRUE if ok.
1793 1795   * Increments statistics of failed.
1794 1796   */
1795 1797  static boolean_t
1796 1798  ip_input_sw_cksum_v6(mblk_t *mp, ip6_t *ip6h, ip_recv_attr_t *ira)
1797 1799  {
1798 1800          ip_stack_t      *ipst = ira->ira_ill->ill_ipst;
1799 1801          uint32_t        cksum;
1800 1802          uint8_t         protocol = ira->ira_protocol;
1801 1803          uint16_t        ip_hdr_length = ira->ira_ip_hdr_length;
1802 1804  
1803 1805          IP6_STAT(ipst, ip6_in_sw_cksum);
1804 1806  
1805 1807          ASSERT(protocol == IPPROTO_TCP || protocol == IPPROTO_UDP ||
1806 1808              protocol == IPPROTO_ICMPV6);
1807 1809  
1808 1810          cksum = ip_input_cksum_pseudo_v6(ip6h, ira);
1809 1811          cksum = IP_CSUM(mp, ip_hdr_length, cksum);
1810 1812          if (cksum == 0)
1811 1813                  return (B_TRUE);
1812 1814  
1813 1815          ip_input_cksum_err_v6(protocol, 0, ira->ira_ill);
1814 1816          return (B_FALSE);
1815 1817  }
1816 1818  
1817 1819  /*
1818 1820   * Verify the ULP checksums.
1819 1821   * Returns B_TRUE if ok, or if the ULP doesn't have a well-defined checksum
1820 1822   * algorithm.
1821 1823   * Increments statistics if failed.
1822 1824   */
1823 1825  static boolean_t
1824 1826  ip_input_cksum_v6(iaflags_t iraflags, mblk_t *mp, ip6_t *ip6h,
1825 1827      ip_recv_attr_t *ira)
1826 1828  {
1827 1829          ill_t           *ill = ira->ira_rill;
1828 1830          uint16_t        hck_flags;
1829 1831          uint32_t        cksum;
1830 1832          mblk_t          *mp1;
1831 1833          uint_t          len;
1832 1834          uint8_t         protocol = ira->ira_protocol;
1833 1835          uint16_t        ip_hdr_length = ira->ira_ip_hdr_length;
1834 1836  
1835 1837  
1836 1838          switch (protocol) {
1837 1839          case IPPROTO_TCP:
1838 1840          case IPPROTO_ICMPV6:
1839 1841                  break;
1840 1842  
1841 1843          case IPPROTO_UDP: {
1842 1844                  udpha_t         *udpha;
1843 1845  
1844 1846                  udpha = (udpha_t  *)((uchar_t *)ip6h + ip_hdr_length);
1845 1847                  /*
1846 1848                   *  Before going through the regular checksum
1847 1849                   *  calculation, make sure the received checksum
1848 1850                   *  is non-zero. RFC 2460 says, a 0x0000 checksum
1849 1851                   *  in a UDP packet (within IPv6 packet) is invalid
1850 1852                   *  and should be replaced by 0xffff. This makes
1851 1853                   *  sense as regular checksum calculation will
1852 1854                   *  pass for both the cases i.e. 0x0000 and 0xffff.
1853 1855                   *  Removing one of the case makes error detection
1854 1856                   *  stronger.
1855 1857                   */
1856 1858                  if (udpha->uha_checksum == 0) {
1857 1859                          /* 0x0000 checksum is invalid */
1858 1860                          BUMP_MIB(ill->ill_ip_mib, udpIfStatsInCksumErrs);
1859 1861                          return (B_FALSE);
1860 1862                  }
1861 1863                  break;
1862 1864          }
1863 1865          case IPPROTO_SCTP: {
1864 1866                  sctp_hdr_t      *sctph;
1865 1867                  uint32_t        pktsum;
1866 1868  
1867 1869                  sctph = (sctp_hdr_t *)((uchar_t *)ip6h + ip_hdr_length);
1868 1870  #ifdef  DEBUG
1869 1871                  if (skip_sctp_cksum)
1870 1872                          return (B_TRUE);
1871 1873  #endif
1872 1874                  pktsum = sctph->sh_chksum;
1873 1875                  sctph->sh_chksum = 0;
1874 1876                  cksum = sctp_cksum(mp, ip_hdr_length);
1875 1877                  sctph->sh_chksum = pktsum;
1876 1878                  if (cksum == pktsum)
1877 1879                          return (B_TRUE);
1878 1880  
1879 1881                  /*
1880 1882                   * Defer until later whether a bad checksum is ok
1881 1883                   * in order to allow RAW sockets to use Adler checksum
1882 1884                   * with SCTP.
1883 1885                   */
1884 1886                  ira->ira_flags |= IRAF_SCTP_CSUM_ERR;
1885 1887                  return (B_TRUE);
1886 1888          }
1887 1889  
1888 1890          default:
1889 1891                  /* No ULP checksum to verify. */
1890 1892                  return (B_TRUE);
1891 1893          }
1892 1894  
1893 1895          /*
1894 1896           * Revert to software checksum calculation if the interface
1895 1897           * isn't capable of checksum offload.
1896 1898           * We clear DB_CKSUMFLAGS when going through IPsec in ip_fanout.
1897 1899           * Note: IRAF_NO_HW_CKSUM is not currently used.
1898 1900           */
1899 1901          ASSERT(!IS_IPMP(ill));
1900 1902          if ((iraflags & IRAF_NO_HW_CKSUM) || !ILL_HCKSUM_CAPABLE(ill) ||
1901 1903              !dohwcksum) {
1902 1904                  return (ip_input_sw_cksum_v6(mp, ip6h, ira));
1903 1905          }
1904 1906  
1905 1907          /*
1906 1908           * We apply this for all ULP protocols. Does the HW know to
1907 1909           * not set the flags for SCTP and other protocols.
1908 1910           */
1909 1911  
1910 1912          hck_flags = DB_CKSUMFLAGS(mp);
1911 1913  
1912 1914          if (hck_flags & HCK_FULLCKSUM_OK) {
1913 1915                  /*
1914 1916                   * Hardware has already verified the checksum.
1915 1917                   */
1916 1918                  return (B_TRUE);
1917 1919          }
1918 1920  
1919 1921          if (hck_flags & HCK_FULLCKSUM) {
1920 1922                  /*
1921 1923                   * Full checksum has been computed by the hardware
1922 1924                   * and has been attached.  If the driver wants us to
1923 1925                   * verify the correctness of the attached value, in
1924 1926                   * order to protect against faulty hardware, compare
1925 1927                   * it against -0 (0xFFFF) to see if it's valid.
1926 1928                   */
1927 1929                  cksum = DB_CKSUM16(mp);
1928 1930                  if (cksum == 0xFFFF)
1929 1931                          return (B_TRUE);
1930 1932                  ip_input_cksum_err_v6(protocol, hck_flags, ira->ira_ill);
1931 1933                  return (B_FALSE);
1932 1934          }
1933 1935  
1934 1936          mp1 = mp->b_cont;
1935 1937          if ((hck_flags & HCK_PARTIALCKSUM) &&
1936 1938              (mp1 == NULL || mp1->b_cont == NULL) &&
1937 1939              ip_hdr_length >= DB_CKSUMSTART(mp) &&
1938 1940              ((len = ip_hdr_length - DB_CKSUMSTART(mp)) & 1) == 0) {
1939 1941                  uint32_t        adj;
1940 1942                  uchar_t         *cksum_start;
1941 1943  
1942 1944                  cksum = ip_input_cksum_pseudo_v6(ip6h, ira);
1943 1945  
1944 1946                  cksum_start = ((uchar_t *)ip6h + DB_CKSUMSTART(mp));
1945 1947  
1946 1948                  /*
1947 1949                   * Partial checksum has been calculated by hardware
1948 1950                   * and attached to the packet; in addition, any
1949 1951                   * prepended extraneous data is even byte aligned,
1950 1952                   * and there are at most two mblks associated with
1951 1953                   * the packet.  If any such data exists, we adjust
1952 1954                   * the checksum; also take care any postpended data.
1953 1955                   */
1954 1956                  IP_ADJCKSUM_PARTIAL(cksum_start, mp, mp1, len, adj);
1955 1957                  /*
1956 1958                   * One's complement subtract extraneous checksum
1957 1959                   */
1958 1960                  cksum += DB_CKSUM16(mp);
1959 1961                  if (adj >= cksum)
1960 1962                          cksum = ~(adj - cksum) & 0xFFFF;
1961 1963                  else
1962 1964                          cksum -= adj;
1963 1965                  cksum = (cksum & 0xFFFF) + ((int)cksum >> 16);
1964 1966                  cksum = (cksum & 0xFFFF) + ((int)cksum >> 16);
1965 1967                  if (!(~cksum & 0xFFFF))
1966 1968                          return (B_TRUE);
1967 1969  
1968 1970                  ip_input_cksum_err_v6(protocol, hck_flags, ira->ira_ill);
1969 1971                  return (B_FALSE);
1970 1972          }
1971 1973          return (ip_input_sw_cksum_v6(mp, ip6h, ira));
1972 1974  }
1973 1975  
1974 1976  
1975 1977  /*
1976 1978   * Handle fanout of received packets.
1977 1979   * Unicast packets that are looped back (from ire_send_local_v6) and packets
1978 1980   * from the wire are differentiated by checking IRAF_VERIFY_ULP_CKSUM.
1979 1981   *
1980 1982   * IPQoS Notes
1981 1983   * Before sending it to the client, invoke IPPF processing. Policy processing
1982 1984   * takes place only if the callout_position, IPP_LOCAL_IN, is enabled.
1983 1985   */
1984 1986  void
1985 1987  ip_fanout_v6(mblk_t *mp, ip6_t *ip6h, ip_recv_attr_t *ira)
1986 1988  {
1987 1989          ill_t           *ill = ira->ira_ill;
1988 1990          iaflags_t       iraflags = ira->ira_flags;
1989 1991          ip_stack_t      *ipst = ill->ill_ipst;
1990 1992          uint8_t         protocol;
1991 1993          conn_t          *connp;
1992 1994  #define rptr    ((uchar_t *)ip6h)
1993 1995          uint_t          ip_hdr_length;
1994 1996          uint_t          min_ulp_header_length;
1995 1997          int             offset;
1996 1998          ssize_t         len;
1997 1999          netstack_t      *ns = ipst->ips_netstack;
1998 2000          ipsec_stack_t   *ipss = ns->netstack_ipsec;
1999 2001          ill_t           *rill = ira->ira_rill;
2000 2002  
2001 2003          ASSERT(ira->ira_pktlen == ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN);
2002 2004  
2003 2005          /*
2004 2006           * We repeat this as we parse over destination options header and
2005 2007           * fragment headers (earlier we've handled any hop-by-hop options
2006 2008           * header.)
2007 2009           * We update ira_protocol and ira_ip_hdr_length as we skip past
2008 2010           * the intermediate headers; they already point past any
2009 2011           * hop-by-hop header.
2010 2012           */
2011 2013  repeat:
2012 2014          protocol = ira->ira_protocol;
2013 2015          ip_hdr_length = ira->ira_ip_hdr_length;
2014 2016  
2015 2017          /*
2016 2018           * Time for IPP once we've done reassembly and IPsec.
2017 2019           * We skip this for loopback packets since we don't do IPQoS
2018 2020           * on loopback.
2019 2021           */
2020 2022          if (IPP_ENABLED(IPP_LOCAL_IN, ipst) &&
2021 2023              !(iraflags & IRAF_LOOPBACK) &&
2022 2024              (protocol != IPPROTO_ESP || protocol != IPPROTO_AH ||
2023 2025              protocol != IPPROTO_DSTOPTS || protocol != IPPROTO_ROUTING ||
2024 2026              protocol != IPPROTO_FRAGMENT)) {
2025 2027                  /*
2026 2028                   * Use the interface on which the packet arrived - not where
2027 2029                   * the IP address is hosted.
2028 2030                   */
2029 2031                  /* ip_process translates an IS_UNDER_IPMP */
2030 2032                  mp = ip_process(IPP_LOCAL_IN, mp, rill, ill);
2031 2033                  if (mp == NULL) {
2032 2034                          /* ip_drop_packet and MIB done */
2033 2035                          return;
2034 2036                  }
2035 2037          }
2036 2038  
2037 2039          /* Determine the minimum required size of the upper-layer header */
2038 2040          /* Need to do this for at least the set of ULPs that TX handles. */
2039 2041          switch (protocol) {
2040 2042          case IPPROTO_TCP:
2041 2043                  min_ulp_header_length = TCP_MIN_HEADER_LENGTH;
2042 2044                  break;
2043 2045          case IPPROTO_SCTP:
2044 2046                  min_ulp_header_length = SCTP_COMMON_HDR_LENGTH;
2045 2047                  break;
2046 2048          case IPPROTO_UDP:
2047 2049                  min_ulp_header_length = UDPH_SIZE;
2048 2050                  break;
2049 2051          case IPPROTO_ICMP:
2050 2052          case IPPROTO_ICMPV6:
2051 2053                  min_ulp_header_length = ICMPH_SIZE;
2052 2054                  break;
2053 2055          case IPPROTO_FRAGMENT:
2054 2056          case IPPROTO_DSTOPTS:
2055 2057          case IPPROTO_ROUTING:
2056 2058                  min_ulp_header_length = MIN_EHDR_LEN;
2057 2059                  break;
2058 2060          default:
2059 2061                  min_ulp_header_length = 0;
2060 2062                  break;
2061 2063          }
2062 2064          /* Make sure we have the min ULP header length */
2063 2065          len = mp->b_wptr - rptr;
2064 2066          if (len < ip_hdr_length + min_ulp_header_length) {
2065 2067                  if (ira->ira_pktlen < ip_hdr_length + min_ulp_header_length)
2066 2068                          goto pkt_too_short;
2067 2069  
2068 2070                  IP6_STAT(ipst, ip6_recv_pullup);
2069 2071                  ip6h = ip_pullup(mp, ip_hdr_length + min_ulp_header_length,
2070 2072                      ira);
2071 2073                  if (ip6h == NULL)
2072 2074                          goto discard;
2073 2075                  len = mp->b_wptr - rptr;
2074 2076          }
2075 2077  
2076 2078          /*
2077 2079           * If trusted extensions then determine the zoneid and TX specific
2078 2080           * ira_flags.
2079 2081           */
2080 2082          if (iraflags & IRAF_SYSTEM_LABELED) {
2081 2083                  /* This can update ira->ira_flags and ira->ira_zoneid */
2082 2084                  ip_fanout_tx_v6(mp, ip6h, protocol, ip_hdr_length, ira);
2083 2085                  iraflags = ira->ira_flags;
2084 2086          }
2085 2087  
2086 2088  
2087 2089          /* Verify ULP checksum. Handles TCP, UDP, and SCTP */
2088 2090          if (iraflags & IRAF_VERIFY_ULP_CKSUM) {
2089 2091                  if (!ip_input_cksum_v6(iraflags, mp, ip6h, ira)) {
2090 2092                          /* Bad checksum. Stats are already incremented */
2091 2093                          ip_drop_input("Bad ULP checksum", mp, ill);
2092 2094                          freemsg(mp);
2093 2095                          return;
2094 2096                  }
2095 2097                  /* IRAF_SCTP_CSUM_ERR could have been set */
2096 2098                  iraflags = ira->ira_flags;
2097 2099          }
2098 2100          switch (protocol) {
2099 2101          case IPPROTO_TCP:
2100 2102                  /* For TCP, discard multicast packets. */
2101 2103                  if (iraflags & IRAF_MULTIBROADCAST)
2102 2104                          goto discard;
2103 2105  
2104 2106                  /* First mblk contains IP+TCP headers per above check */
2105 2107                  ASSERT(len >= ip_hdr_length + TCP_MIN_HEADER_LENGTH);
2106 2108  
2107 2109                  /* TCP options present? */
2108 2110                  offset = ((uchar_t *)ip6h)[ip_hdr_length + 12] >> 4;
2109 2111                  if (offset != 5) {
2110 2112                          if (offset < 5)
2111 2113                                  goto discard;
2112 2114  
2113 2115                          /*
2114 2116                           * There must be TCP options.
2115 2117                           * Make sure we can grab them.
2116 2118                           */
2117 2119                          offset <<= 2;
2118 2120                          offset += ip_hdr_length;
2119 2121                          if (len < offset) {
2120 2122                                  if (ira->ira_pktlen < offset)
2121 2123                                          goto pkt_too_short;
2122 2124  
2123 2125                                  IP6_STAT(ipst, ip6_recv_pullup);
2124 2126                                  ip6h = ip_pullup(mp, offset, ira);
2125 2127                                  if (ip6h == NULL)
2126 2128                                          goto discard;
2127 2129                                  len = mp->b_wptr - rptr;
2128 2130                          }
2129 2131                  }
2130 2132  
2131 2133                  /*
2132 2134                   * Pass up a squeue hint to tcp.
2133 2135                   * If ira_sqp is already set (this is loopback) we leave it
2134 2136                   * alone.
2135 2137                   */
2136 2138                  if (ira->ira_sqp == NULL) {
2137 2139                          ira->ira_sqp = ip_squeue_get(ira->ira_ring);
2138 2140                  }
2139 2141  
2140 2142                  /* Look for AF_INET or AF_INET6 that matches */
2141 2143                  connp = ipcl_classify_v6(mp, IPPROTO_TCP, ip_hdr_length,
2142 2144                      ira, ipst);
2143 2145                  if (connp == NULL) {
2144 2146                          /* Send the TH_RST */
2145 2147                          BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers);
2146 2148                          tcp_xmit_listeners_reset(mp, ira, ipst, NULL);
2147 2149                          return;
2148 2150                  }
2149 2151                  if (connp->conn_incoming_ifindex != 0 &&
2150 2152                      connp->conn_incoming_ifindex != ira->ira_ruifindex) {
2151 2153                          CONN_DEC_REF(connp);
2152 2154  
2153 2155                          /* Send the TH_RST */
2154 2156                          BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers);
2155 2157                          tcp_xmit_listeners_reset(mp, ira, ipst, NULL);
2156 2158                          return;
2157 2159                  }
2158 2160                  if (CONN_INBOUND_POLICY_PRESENT_V6(connp, ipss) ||
2159 2161                      (iraflags & IRAF_IPSEC_SECURE)) {
2160 2162                          mp = ipsec_check_inbound_policy(mp, connp,
2161 2163                              NULL, ip6h, ira);
2162 2164                          if (mp == NULL) {
2163 2165                                  BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
2164 2166                                  /* Note that mp is NULL */
2165 2167                                  ip_drop_input("ipIfStatsInDiscards", mp, ill);
2166 2168                                  CONN_DEC_REF(connp);
2167 2169                                  return;
2168 2170                          }
2169 2171                  }
2170 2172                  /* Found a client; up it goes */
2171 2173                  BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers);
2172 2174                  ira->ira_ill = ira->ira_rill = NULL;
2173 2175                  if (!IPCL_IS_TCP(connp)) {
2174 2176                          /* Not TCP; must be SOCK_RAW, IPPROTO_TCP */
2175 2177                          (connp->conn_recv)(connp, mp, NULL, ira);
2176 2178                          CONN_DEC_REF(connp);
2177 2179                          ira->ira_ill = ill;
2178 2180                          ira->ira_rill = rill;
2179 2181                          return;
2180 2182                  }
2181 2183  
2182 2184                  /*
2183 2185                   * We do different processing whether called from
2184 2186                   * ip_accept_tcp and we match the target, don't match
2185 2187                   * the target, and when we are called by ip_input.
2186 2188                   */
2187 2189                  if (iraflags & IRAF_TARGET_SQP) {
2188 2190                          if (ira->ira_target_sqp == connp->conn_sqp) {
2189 2191                                  mblk_t  *attrmp;
2190 2192  
2191 2193                                  attrmp = ip_recv_attr_to_mblk(ira);
2192 2194                                  if (attrmp == NULL) {
2193 2195                                          BUMP_MIB(ill->ill_ip_mib,
2194 2196                                              ipIfStatsInDiscards);
2195 2197                                          ip_drop_input("ipIfStatsInDiscards",
2196 2198                                              mp, ill);
2197 2199                                          freemsg(mp);
2198 2200                                          CONN_DEC_REF(connp);
2199 2201                                  } else {
2200 2202                                          SET_SQUEUE(attrmp, connp->conn_recv,
2201 2203                                              connp);
2202 2204                                          attrmp->b_cont = mp;
2203 2205                                          ASSERT(ira->ira_target_sqp_mp == NULL);
2204 2206                                          ira->ira_target_sqp_mp = attrmp;
2205 2207                                          /*
2206 2208                                           * Conn ref release when drained from
2207 2209                                           * the squeue.
2208 2210                                           */
2209 2211                                  }
2210 2212                          } else {
2211 2213                                  SQUEUE_ENTER_ONE(connp->conn_sqp, mp,
2212 2214                                      connp->conn_recv, connp, ira, SQ_FILL,
2213 2215                                      SQTAG_IP6_TCP_INPUT);
2214 2216                          }
2215 2217                  } else {
2216 2218                          SQUEUE_ENTER_ONE(connp->conn_sqp, mp, connp->conn_recv,
2217 2219                              connp, ira, ip_squeue_flag, SQTAG_IP6_TCP_INPUT);
2218 2220                  }
2219 2221                  ira->ira_ill = ill;
2220 2222                  ira->ira_rill = rill;
2221 2223                  return;
2222 2224  
2223 2225          case IPPROTO_SCTP: {
2224 2226                  sctp_hdr_t      *sctph;
2225 2227                  uint32_t        ports;  /* Source and destination ports */
2226 2228                  sctp_stack_t    *sctps = ipst->ips_netstack->netstack_sctp;
2227 2229  
2228 2230                  /* For SCTP, discard multicast packets. */
2229 2231                  if (iraflags & IRAF_MULTIBROADCAST)
2230 2232                          goto discard;
2231 2233  
2232 2234                  /*
2233 2235                   * Since there is no SCTP h/w cksum support yet, just
2234 2236                   * clear the flag.
2235 2237                   */
2236 2238                  DB_CKSUMFLAGS(mp) = 0;
2237 2239  
2238 2240                  /* Length ensured above */
2239 2241                  ASSERT(MBLKL(mp) >= ip_hdr_length + SCTP_COMMON_HDR_LENGTH);
2240 2242                  sctph = (sctp_hdr_t *)(rptr + ip_hdr_length);
2241 2243  
2242 2244                  /* get the ports */
2243 2245                  ports = *(uint32_t *)&sctph->sh_sport;
2244 2246  
2245 2247                  if (iraflags & IRAF_SCTP_CSUM_ERR) {
2246 2248                          /*
2247 2249                           * No potential sctp checksum errors go to the Sun
2248 2250                           * sctp stack however they might be Adler-32 summed
2249 2251                           * packets a userland stack bound to a raw IP socket
2250 2252                           * could reasonably use. Note though that Adler-32 is
2251 2253                           * a long deprecated algorithm and customer sctp
2252 2254                           * networks should eventually migrate to CRC-32 at
2253 2255                           * which time this facility should be removed.
2254 2256                           */
2255 2257                          ip_fanout_sctp_raw(mp, NULL, ip6h, ports, ira);
2256 2258                          return;
2257 2259                  }
2258 2260                  connp = sctp_fanout(&ip6h->ip6_src, &ip6h->ip6_dst, ports,
2259 2261                      ira, mp, sctps, sctph);
2260 2262                  if (connp == NULL) {
2261 2263                          /* Check for raw socket or OOTB handling */
2262 2264                          ip_fanout_sctp_raw(mp, NULL, ip6h, ports, ira);
2263 2265                          return;
2264 2266                  }
2265 2267                  if (connp->conn_incoming_ifindex != 0 &&
2266 2268                      connp->conn_incoming_ifindex != ira->ira_ruifindex) {
2267 2269                          CONN_DEC_REF(connp);
2268 2270  
2269 2271                          /* Check for raw socket or OOTB handling */
2270 2272                          ip_fanout_sctp_raw(mp, NULL, ip6h, ports, ira);
2271 2273                          return;
2272 2274                  }
2273 2275  
2274 2276                  /* Found a client; up it goes */
2275 2277                  BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers);
2276 2278                  sctp_input(connp, NULL, ip6h, mp, ira);
2277 2279                  /* sctp_input does a rele of the sctp_t */
2278 2280                  return;
2279 2281          }
2280 2282  
2281 2283          case IPPROTO_UDP:
2282 2284                  /* First mblk contains IP+UDP headers as checked above */
2283 2285                  ASSERT(MBLKL(mp) >= ip_hdr_length + UDPH_SIZE);
2284 2286  
2285 2287                  if (iraflags & IRAF_MULTIBROADCAST) {
2286 2288                          uint16_t *up;   /* Pointer to ports in ULP header */
2287 2289  
2288 2290                          up = (uint16_t *)((uchar_t *)ip6h + ip_hdr_length);
2289 2291  
2290 2292                          ip_fanout_udp_multi_v6(mp, ip6h, up[1], up[0], ira);
2291 2293                          return;
2292 2294                  }
2293 2295  
2294 2296                  /* Look for AF_INET or AF_INET6 that matches */
2295 2297                  connp = ipcl_classify_v6(mp, IPPROTO_UDP, ip_hdr_length,
2296 2298                      ira, ipst);
2297 2299                  if (connp == NULL) {
2298 2300          no_udp_match:
2299 2301                          if (ipst->ips_ipcl_proto_fanout_v6[IPPROTO_UDP].
2300 2302                              connf_head != NULL) {
2301 2303                                  ASSERT(ira->ira_protocol == IPPROTO_UDP);
2302 2304                                  ip_fanout_proto_v6(mp, ip6h, ira);
2303 2305                          } else {
2304 2306                                  ip_fanout_send_icmp_v6(mp, ICMP6_DST_UNREACH,
2305 2307                                      ICMP6_DST_UNREACH_NOPORT, ira);
2306 2308                          }
2307 2309                          return;
2308 2310  
2309 2311                  }
2310 2312                  if (connp->conn_incoming_ifindex != 0 &&
2311 2313                      connp->conn_incoming_ifindex != ira->ira_ruifindex) {
2312 2314                          CONN_DEC_REF(connp);
2313 2315                          goto no_udp_match;
2314 2316                  }
2315 2317                  if (IPCL_IS_NONSTR(connp) ? connp->conn_flow_cntrld :
2316 2318                      !canputnext(connp->conn_rq)) {
2317 2319                          CONN_DEC_REF(connp);
2318 2320                          BUMP_MIB(ill->ill_ip_mib, udpIfStatsInOverflows);
2319 2321                          ip_drop_input("udpIfStatsInOverflows", mp, ill);
2320 2322                          freemsg(mp);
2321 2323                          return;
2322 2324                  }
2323 2325                  if (CONN_INBOUND_POLICY_PRESENT_V6(connp, ipss) ||
2324 2326                      (iraflags & IRAF_IPSEC_SECURE)) {
2325 2327                          mp = ipsec_check_inbound_policy(mp, connp,
2326 2328                              NULL, ip6h, ira);
2327 2329                          if (mp == NULL) {
2328 2330                                  BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
2329 2331                                  /* Note that mp is NULL */
2330 2332                                  ip_drop_input("ipIfStatsInDiscards", mp, ill);
2331 2333                                  CONN_DEC_REF(connp);
2332 2334                                  return;
2333 2335                          }
2334 2336                  }
2335 2337  
2336 2338                  /* Found a client; up it goes */
2337 2339                  IP6_STAT(ipst, ip6_udp_fannorm);
2338 2340                  BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers);
2339 2341                  ira->ira_ill = ira->ira_rill = NULL;
2340 2342                  (connp->conn_recv)(connp, mp, NULL, ira);
2341 2343                  CONN_DEC_REF(connp);
2342 2344                  ira->ira_ill = ill;
2343 2345                  ira->ira_rill = rill;
2344 2346                  return;
2345 2347          default:
2346 2348                  break;
2347 2349          }
2348 2350  
2349 2351          /*
2350 2352           * Clear hardware checksumming flag as it is currently only
2351 2353           * used by TCP and UDP.
2352 2354           */
2353 2355          DB_CKSUMFLAGS(mp) = 0;
2354 2356  
2355 2357          switch (protocol) {
2356 2358          case IPPROTO_ICMPV6:
2357 2359                  BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInMsgs);
2358 2360  
2359 2361                  /* Check variable for testing applications */
2360 2362                  if (ipst->ips_ipv6_drop_inbound_icmpv6) {
2361 2363                          ip_drop_input("ipv6_drop_inbound_icmpv6", mp, ill);
2362 2364                          freemsg(mp);
2363 2365                          return;
2364 2366                  }
2365 2367                  /*
2366 2368                   * We need to accomodate icmp messages coming in clear
2367 2369                   * until we get everything secure from the wire. If
2368 2370                   * icmp_accept_clear_messages is zero we check with
2369 2371                   * the global policy and act accordingly. If it is
2370 2372                   * non-zero, we accept the message without any checks.
2371 2373                   * But *this does not mean* that this will be delivered
2372 2374                   * to RAW socket clients. By accepting we might send
2373 2375                   * replies back, change our MTU value etc.,
2374 2376                   * but delivery to the ULP/clients depends on their
2375 2377                   * policy dispositions.
2376 2378                   */
2377 2379                  if (ipst->ips_icmp_accept_clear_messages == 0) {
2378 2380                          mp = ipsec_check_global_policy(mp, NULL,
2379 2381                              NULL, ip6h, ira, ns);
2380 2382                          if (mp == NULL)
2381 2383                                  return;
2382 2384                  }
2383 2385  
2384 2386                  /*
2385 2387                   * On a labeled system, we have to check whether the zone
2386 2388                   * itself is permitted to receive raw traffic.
2387 2389                   */
2388 2390                  if (ira->ira_flags & IRAF_SYSTEM_LABELED) {
2389 2391                          if (!tsol_can_accept_raw(mp, ira, B_FALSE)) {
2390 2392                                  BUMP_MIB(ill->ill_icmp6_mib,
2391 2393                                      ipv6IfIcmpInErrors);
2392 2394                                  ip_drop_input("tsol_can_accept_raw", mp, ill);
2393 2395                                  freemsg(mp);
2394 2396                                  return;
2395 2397                          }
2396 2398                  }
2397 2399  
2398 2400                  BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers);
2399 2401                  mp = icmp_inbound_v6(mp, ira);
2400 2402                  if (mp == NULL) {
2401 2403                          /* No need to pass to RAW sockets */
2402 2404                          return;
2403 2405                  }
2404 2406                  break;
2405 2407  
2406 2408          case IPPROTO_DSTOPTS: {
2407 2409                  ip6_dest_t      *desthdr;
2408 2410                  uint_t          ehdrlen;
2409 2411                  uint8_t         *optptr;
2410 2412  
2411 2413                  /* We already check for MIN_EHDR_LEN above */
2412 2414  
2413 2415                  /* Check if AH is present and needs to be processed. */
2414 2416                  mp = ipsec_early_ah_v6(mp, ira);
2415 2417                  if (mp == NULL)
2416 2418                          return;
2417 2419  
2418 2420                  /*
2419 2421                   * Reinitialize pointers, as ipsec_early_ah_v6() does
2420 2422                   * complete pullups.  We don't have to do more pullups
2421 2423                   * as a result.
2422 2424                   */
2423 2425                  ip6h = (ip6_t *)mp->b_rptr;
2424 2426  
2425 2427                  if (ira->ira_pktlen - ip_hdr_length < MIN_EHDR_LEN)
2426 2428                          goto pkt_too_short;
2427 2429  
2428 2430                  if (mp->b_cont != NULL &&
2429 2431                      rptr + ip_hdr_length + MIN_EHDR_LEN > mp->b_wptr) {
2430 2432                          ip6h = ip_pullup(mp, ip_hdr_length + MIN_EHDR_LEN, ira);
2431 2433                          if (ip6h == NULL)
2432 2434                                  goto discard;
2433 2435                  }
2434 2436                  desthdr = (ip6_dest_t *)(rptr + ip_hdr_length);
2435 2437                  ehdrlen = 8 * (desthdr->ip6d_len + 1);
2436 2438                  if (ira->ira_pktlen - ip_hdr_length < ehdrlen)
2437 2439                          goto pkt_too_short;
2438 2440                  if (mp->b_cont != NULL &&
2439 2441                      rptr + IPV6_HDR_LEN + ehdrlen > mp->b_wptr) {
2440 2442                          ip6h = ip_pullup(mp, IPV6_HDR_LEN + ehdrlen, ira);
2441 2443                          if (ip6h == NULL)
2442 2444                                  goto discard;
2443 2445  
2444 2446                          desthdr = (ip6_dest_t *)(rptr + ip_hdr_length);
2445 2447                  }
2446 2448                  optptr = (uint8_t *)&desthdr[1];
2447 2449  
2448 2450                  /*
2449 2451                   * Update ira_ip_hdr_length to skip the destination header
2450 2452                   * when we repeat.
2451 2453                   */
2452 2454                  ira->ira_ip_hdr_length += ehdrlen;
2453 2455  
2454 2456                  ira->ira_protocol = desthdr->ip6d_nxt;
2455 2457  
2456 2458                  /*
2457 2459                   * Note: XXX This code does not seem to make
2458 2460                   * distinction between Destination Options Header
2459 2461                   * being before/after Routing Header which can
2460 2462                   * happen if we are at the end of source route.
2461 2463                   * This may become significant in future.
2462 2464                   * (No real significant Destination Options are
2463 2465                   * defined/implemented yet ).
2464 2466                   */
2465 2467                  switch (ip_process_options_v6(mp, ip6h, optptr,
2466 2468                      ehdrlen - 2, IPPROTO_DSTOPTS, ira)) {
2467 2469                  case -1:
2468 2470                          /*
2469 2471                           * Packet has been consumed and any needed
2470 2472                           * ICMP errors sent.
2471 2473                           */
2472 2474                          return;
2473 2475                  case 0:
2474 2476                          /* No action needed  continue */
2475 2477                          break;
2476 2478                  case 1:
2477 2479                          /*
2478 2480                           * Unnexpected return value
2479 2481                           * (Router alert is a Hop-by-Hop option)
2480 2482                           */
2481 2483  #ifdef DEBUG
2482 2484                          panic("ip_fanout_v6: router "
2483 2485                              "alert hbh opt indication in dest opt");
2484 2486                          /*NOTREACHED*/
2485 2487  #else
2486 2488                          BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
2487 2489                          ip_drop_input("ipIfStatsInDiscards", mp, ill);
2488 2490                          freemsg(mp);
2489 2491                          return;
2490 2492  #endif
2491 2493                  }
2492 2494                  goto repeat;
2493 2495          }
2494 2496          case IPPROTO_FRAGMENT: {
2495 2497                  ip6_frag_t *fraghdr;
2496 2498  
2497 2499                  if (ira->ira_pktlen - ip_hdr_length < sizeof (ip6_frag_t))
2498 2500                          goto pkt_too_short;
2499 2501  
2500 2502                  if (mp->b_cont != NULL &&
2501 2503                      rptr + ip_hdr_length + sizeof (ip6_frag_t) > mp->b_wptr) {
2502 2504                          ip6h = ip_pullup(mp,
2503 2505                              ip_hdr_length + sizeof (ip6_frag_t), ira);
2504 2506                          if (ip6h == NULL)
2505 2507                                  goto discard;
2506 2508                  }
2507 2509  
2508 2510                  fraghdr = (ip6_frag_t *)(rptr + ip_hdr_length);
2509 2511                  BUMP_MIB(ill->ill_ip_mib, ipIfStatsReasmReqds);
2510 2512  
2511 2513                  /*
2512 2514                   * Invoke the CGTP (multirouting) filtering module to
2513 2515                   * process the incoming packet. Packets identified as
2514 2516                   * duplicates must be discarded. Filtering is active
2515 2517                   * only if the ip_cgtp_filter ndd variable is
2516 2518                   * non-zero.
2517 2519                   */
2518 2520                  if (ipst->ips_ip_cgtp_filter &&
2519 2521                      ipst->ips_ip_cgtp_filter_ops != NULL) {
2520 2522                          int cgtp_flt_pkt;
2521 2523                          netstackid_t stackid;
2522 2524  
2523 2525                          stackid = ipst->ips_netstack->netstack_stackid;
2524 2526  
2525 2527                          /*
2526 2528                           * CGTP and IPMP are mutually exclusive so
2527 2529                           * phyint_ifindex is fine here.
2528 2530                           */
2529 2531                          cgtp_flt_pkt =
2530 2532                              ipst->ips_ip_cgtp_filter_ops->cfo_filter_v6(
2531 2533                              stackid, ill->ill_phyint->phyint_ifindex,
2532 2534                              ip6h, fraghdr);
2533 2535                          if (cgtp_flt_pkt == CGTP_IP_PKT_DUPLICATE) {
2534 2536                                  ip_drop_input("CGTP_IP_PKT_DUPLICATE", mp, ill);
2535 2537                                  freemsg(mp);
2536 2538                                  return;
2537 2539                          }
2538 2540                  }
2539 2541  
2540 2542                  /*
2541 2543                   * Update ip_hdr_length to skip the frag header
2542 2544                   * ip_input_fragment_v6 will determine the extension header
2543 2545                   * prior to the fragment header and update its nexthdr value,
2544 2546                   * and also set ira_protocol to the nexthdr that follows the
2545 2547                   * completed fragment.
2546 2548                   */
2547 2549                  ip_hdr_length += sizeof (ip6_frag_t);
2548 2550  
2549 2551                  /*
2550 2552                   * Make sure we have ira_l2src before we loose the original
2551 2553                   * mblk
2552 2554                   */
2553 2555                  if (!(ira->ira_flags & IRAF_L2SRC_SET))
2554 2556                          ip_setl2src(mp, ira, ira->ira_rill);
2555 2557  
2556 2558                  mp = ip_input_fragment_v6(mp, ip6h, fraghdr,
2557 2559                      ira->ira_pktlen - ip_hdr_length, ira);
2558 2560                  if (mp == NULL) {
2559 2561                          /* Reassembly is still pending */
2560 2562                          return;
2561 2563                  }
2562 2564                  BUMP_MIB(ill->ill_ip_mib, ipIfStatsReasmOKs);
2563 2565  
2564 2566                  /*
2565 2567                   * The mblk chain has the frag header removed and
2566 2568                   * ira_protocol, ira_pktlen, ira_ip_hdr_length as well as the
2567 2569                   * IP header has been updated to refleact the result.
2568 2570                   */
2569 2571                  ip6h = (ip6_t *)mp->b_rptr;
2570 2572                  ip_hdr_length = ira->ira_ip_hdr_length;
2571 2573                  goto repeat;
2572 2574          }
2573 2575          case IPPROTO_HOPOPTS:
2574 2576                  /*
2575 2577                   * Illegal header sequence.
2576 2578                   * (Hop-by-hop headers are processed above
2577 2579                   *  and required to immediately follow IPv6 header)
2578 2580                   */
2579 2581                  ip_drop_input("ICMP_PARAM_PROBLEM", mp, ill);
2580 2582                  icmp_param_problem_nexthdr_v6(mp, B_FALSE, ira);
2581 2583                  return;
2582 2584  
2583 2585          case IPPROTO_ROUTING: {
2584 2586                  uint_t ehdrlen;
2585 2587                  ip6_rthdr_t *rthdr;
2586 2588  
2587 2589                  /* Check if AH is present and needs to be processed. */
2588 2590                  mp = ipsec_early_ah_v6(mp, ira);
2589 2591                  if (mp == NULL)
2590 2592                          return;
2591 2593  
2592 2594                  /*
2593 2595                   * Reinitialize pointers, as ipsec_early_ah_v6() does
2594 2596                   * complete pullups.  We don't have to do more pullups
2595 2597                   * as a result.
2596 2598                   */
2597 2599                  ip6h = (ip6_t *)mp->b_rptr;
2598 2600  
2599 2601                  if (ira->ira_pktlen - ip_hdr_length < MIN_EHDR_LEN)
2600 2602                          goto pkt_too_short;
2601 2603  
2602 2604                  if (mp->b_cont != NULL &&
2603 2605                      rptr + ip_hdr_length + MIN_EHDR_LEN > mp->b_wptr) {
2604 2606                          ip6h = ip_pullup(mp, ip_hdr_length + MIN_EHDR_LEN, ira);
2605 2607                          if (ip6h == NULL)
2606 2608                                  goto discard;
2607 2609                  }
2608 2610                  rthdr = (ip6_rthdr_t *)(rptr + ip_hdr_length);
2609 2611                  protocol = ira->ira_protocol = rthdr->ip6r_nxt;
2610 2612                  ehdrlen = 8 * (rthdr->ip6r_len + 1);
2611 2613                  if (ira->ira_pktlen - ip_hdr_length < ehdrlen)
2612 2614                          goto pkt_too_short;
2613 2615                  if (mp->b_cont != NULL &&
2614 2616                      rptr + IPV6_HDR_LEN + ehdrlen > mp->b_wptr) {
2615 2617                          ip6h = ip_pullup(mp, IPV6_HDR_LEN + ehdrlen, ira);
2616 2618                          if (ip6h == NULL)
2617 2619                                  goto discard;
2618 2620                          rthdr = (ip6_rthdr_t *)(rptr + ip_hdr_length);
2619 2621                  }
2620 2622                  if (rthdr->ip6r_segleft != 0) {
2621 2623                          /* Not end of source route */
2622 2624                          if (ira->ira_flags &
2623 2625                              (IRAF_L2DST_MULTICAST|IRAF_L2DST_BROADCAST)) {
2624 2626                                  BUMP_MIB(ill->ill_ip_mib,
2625 2627                                      ipIfStatsForwProhibits);
2626 2628                                  ip_drop_input("ipIfStatsInForwProhibits",
2627 2629                                      mp, ill);
2628 2630                                  freemsg(mp);
2629 2631                                  return;
2630 2632                          }
2631 2633                          ip_process_rthdr(mp, ip6h, rthdr, ira);
2632 2634                          return;
2633 2635                  }
2634 2636                  ira->ira_ip_hdr_length += ehdrlen;
2635 2637                  goto repeat;
2636 2638          }
2637 2639  
2638 2640          case IPPROTO_AH:
2639 2641          case IPPROTO_ESP: {
2640 2642                  /*
2641 2643                   * Fast path for AH/ESP.
2642 2644                   */
2643 2645                  netstack_t *ns = ipst->ips_netstack;
2644 2646                  ipsec_stack_t *ipss = ns->netstack_ipsec;
2645 2647  
2646 2648                  IP_STAT(ipst, ipsec_proto_ahesp);
2647 2649  
2648 2650                  if (!ipsec_loaded(ipss)) {
2649 2651                          ip_proto_not_sup(mp, ira);
2650 2652                          return;
2651 2653                  }
2652 2654  
2653 2655                  BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers);
2654 2656                  /* select inbound SA and have IPsec process the pkt */
2655 2657                  if (protocol == IPPROTO_ESP) {
2656 2658                          esph_t *esph;
2657 2659  
2658 2660                          mp = ipsec_inbound_esp_sa(mp, ira, &esph);
2659 2661                          if (mp == NULL)
2660 2662                                  return;
2661 2663  
2662 2664                          ASSERT(esph != NULL);
2663 2665                          ASSERT(ira->ira_flags & IRAF_IPSEC_SECURE);
2664 2666                          ASSERT(ira->ira_ipsec_esp_sa != NULL);
2665 2667                          ASSERT(ira->ira_ipsec_esp_sa->ipsa_input_func != NULL);
2666 2668  
2667 2669                          mp = ira->ira_ipsec_esp_sa->ipsa_input_func(mp, esph,
2668 2670                              ira);
2669 2671                  } else {
2670 2672                          ah_t *ah;
2671 2673  
2672 2674                          mp = ipsec_inbound_ah_sa(mp, ira, &ah);
2673 2675                          if (mp == NULL)
2674 2676                                  return;
2675 2677  
2676 2678                          ASSERT(ah != NULL);
2677 2679                          ASSERT(ira->ira_flags & IRAF_IPSEC_SECURE);
2678 2680                          ASSERT(ira->ira_ipsec_ah_sa != NULL);
2679 2681                          ASSERT(ira->ira_ipsec_ah_sa->ipsa_input_func != NULL);
2680 2682                          mp = ira->ira_ipsec_ah_sa->ipsa_input_func(mp, ah,
2681 2683                              ira);
2682 2684                  }
2683 2685  
2684 2686                  if (mp == NULL) {
2685 2687                          /*
2686 2688                           * Either it failed or is pending. In the former case
2687 2689                           * ipIfStatsInDiscards was increased.
2688 2690                           */
2689 2691                          return;
2690 2692                  }
2691 2693                  /* we're done with IPsec processing, send it up */
2692 2694                  ip_input_post_ipsec(mp, ira);
2693 2695                  return;
2694 2696          }
2695 2697          case IPPROTO_NONE:
2696 2698                  /* All processing is done. Count as "delivered". */
2697 2699                  freemsg(mp);
2698 2700                  BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers);
2699 2701                  return;
2700 2702  
2701 2703          case IPPROTO_ENCAP:
2702 2704          case IPPROTO_IPV6:
2703 2705                  /* iptun will verify trusted label */
2704 2706                  connp = ipcl_classify_v6(mp, protocol, ip_hdr_length,
2705 2707                      ira, ipst);
2706 2708                  if (connp != NULL) {
2707 2709                          BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers);
2708 2710                          ira->ira_ill = ira->ira_rill = NULL;
2709 2711                          connp->conn_recv(connp, mp, NULL, ira);
2710 2712                          CONN_DEC_REF(connp);
2711 2713                          ira->ira_ill = ill;
2712 2714                          ira->ira_rill = rill;
2713 2715                          return;
2714 2716                  }
2715 2717                  /* FALLTHRU */
2716 2718          default:
2717 2719                  /*
2718 2720                   * On a labeled system, we have to check whether the zone
2719 2721                   * itself is permitted to receive raw traffic.
2720 2722                   */
2721 2723                  if (ira->ira_flags & IRAF_SYSTEM_LABELED) {
2722 2724                          if (!tsol_can_accept_raw(mp, ira, B_FALSE)) {
2723 2725                                  BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
2724 2726                                  ip_drop_input("ipIfStatsInDiscards", mp, ill);
2725 2727                                  freemsg(mp);
2726 2728                                  return;
2727 2729                          }
2728 2730                  }
2729 2731                  break;
2730 2732          }
2731 2733  
2732 2734          /*
2733 2735           * The above input functions may have returned the pulled up message.
2734 2736           * So ip6h need to be reinitialized.
2735 2737           */
2736 2738          ip6h = (ip6_t *)mp->b_rptr;
2737 2739          ira->ira_protocol = protocol;
2738 2740          if (ipst->ips_ipcl_proto_fanout_v6[protocol].connf_head == NULL) {
2739 2741                  /* No user-level listener for these packets packets */
2740 2742                  ip_proto_not_sup(mp, ira);
2741 2743                  return;
2742 2744          }
2743 2745  
2744 2746          /*
2745 2747           * Handle fanout to raw sockets.  There
2746 2748           * can be more than one stream bound to a particular
2747 2749           * protocol.  When this is the case, each one gets a copy
2748 2750           * of any incoming packets.
2749 2751           */
2750 2752          ASSERT(ira->ira_protocol == protocol);
2751 2753          ip_fanout_proto_v6(mp, ip6h, ira);
2752 2754          return;
2753 2755  
2754 2756  pkt_too_short:
2755 2757          BUMP_MIB(ill->ill_ip_mib, ipIfStatsInTruncatedPkts);
2756 2758          ip_drop_input("ipIfStatsInTruncatedPkts", mp, ill);
2757 2759          freemsg(mp);
2758 2760          return;
2759 2761  
2760 2762  discard:
2761 2763          BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
2762 2764          ip_drop_input("ipIfStatsInDiscards", mp, ill);
2763 2765          freemsg(mp);
2764 2766  #undef rptr
2765 2767  }
  
    | 
      ↓ open down ↓ | 
    1970 lines elided | 
    
      ↑ open up ↑ | 
  
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX