Print this page
    
8903 IPPF paths have bad logic
Reviewed by: Dan McDonald <danmcd@joyent.com>
Reviewed by: John Levon <levon@movementarian.org>
Reviewed by: Gergő Doma <domag02@gmail.com>
    
      
        | Split | 
	Close | 
      
      | Expand all | 
      | Collapse all | 
    
    
          --- old/usr/src/uts/common/inet/ip/ip6_input.c
          +++ new/usr/src/uts/common/inet/ip/ip6_input.c
   1    1  /*
   2    2   * CDDL HEADER START
   3    3   *
   4    4   * The contents of this file are subject to the terms of the
   5    5   * Common Development and Distribution License (the "License").
   6    6   * You may not use this file except in compliance with the License.
   7    7   *
   8    8   * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9    9   * or http://www.opensolaris.org/os/licensing.
  10   10   * See the License for the specific language governing permissions
  11   11   * and limitations under the License.
  12   12   *
  13   13   * When distributing Covered Code, include this CDDL HEADER in each
  14   14   * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15   15   * If applicable, add the following below this CDDL HEADER, with the
  16   16   * fields enclosed by brackets "[]" replaced with your own identifying
  17   17   * information: Portions Copyright [yyyy] [name of copyright owner]
  18   18   *
  19   19   * CDDL HEADER END
  20   20   */
  21   21  
  22   22  /*
  23   23   * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved
  24   24   *
  25   25   * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
  26   26   */
  27   27  /* Copyright (c) 1990 Mentat Inc. */
  28   28  
  29   29  #include <sys/types.h>
  30   30  #include <sys/stream.h>
  31   31  #include <sys/dlpi.h>
  32   32  #include <sys/stropts.h>
  33   33  #include <sys/sysmacros.h>
  34   34  #include <sys/strsubr.h>
  35   35  #include <sys/strlog.h>
  36   36  #include <sys/strsun.h>
  37   37  #include <sys/zone.h>
  38   38  #define _SUN_TPI_VERSION 2
  39   39  #include <sys/tihdr.h>
  40   40  #include <sys/xti_inet.h>
  41   41  #include <sys/ddi.h>
  42   42  #include <sys/sunddi.h>
  43   43  #include <sys/cmn_err.h>
  44   44  #include <sys/debug.h>
  45   45  #include <sys/kobj.h>
  46   46  #include <sys/modctl.h>
  47   47  #include <sys/atomic.h>
  48   48  #include <sys/policy.h>
  49   49  #include <sys/priv.h>
  50   50  
  51   51  #include <sys/systm.h>
  52   52  #include <sys/param.h>
  53   53  #include <sys/kmem.h>
  54   54  #include <sys/sdt.h>
  55   55  #include <sys/socket.h>
  56   56  #include <sys/vtrace.h>
  57   57  #include <sys/isa_defs.h>
  58   58  #include <sys/mac.h>
  59   59  #include <net/if.h>
  60   60  #include <net/if_arp.h>
  61   61  #include <net/route.h>
  62   62  #include <sys/sockio.h>
  63   63  #include <netinet/in.h>
  64   64  #include <net/if_dl.h>
  65   65  
  66   66  #include <inet/common.h>
  67   67  #include <inet/mi.h>
  68   68  #include <inet/mib2.h>
  69   69  #include <inet/nd.h>
  70   70  #include <inet/arp.h>
  71   71  #include <inet/snmpcom.h>
  72   72  #include <inet/kstatcom.h>
  73   73  
  74   74  #include <netinet/igmp_var.h>
  75   75  #include <netinet/ip6.h>
  76   76  #include <netinet/icmp6.h>
  77   77  #include <netinet/sctp.h>
  78   78  
  79   79  #include <inet/ip.h>
  80   80  #include <inet/ip_impl.h>
  81   81  #include <inet/ip6.h>
  82   82  #include <inet/ip6_asp.h>
  83   83  #include <inet/optcom.h>
  84   84  #include <inet/tcp.h>
  85   85  #include <inet/tcp_impl.h>
  86   86  #include <inet/ip_multi.h>
  87   87  #include <inet/ip_if.h>
  88   88  #include <inet/ip_ire.h>
  89   89  #include <inet/ip_ftable.h>
  90   90  #include <inet/ip_rts.h>
  91   91  #include <inet/ip_ndp.h>
  92   92  #include <inet/ip_listutils.h>
  93   93  #include <netinet/igmp.h>
  94   94  #include <netinet/ip_mroute.h>
  95   95  #include <inet/ipp_common.h>
  96   96  
  97   97  #include <net/pfkeyv2.h>
  98   98  #include <inet/sadb.h>
  99   99  #include <inet/ipsec_impl.h>
 100  100  #include <inet/ipdrop.h>
 101  101  #include <inet/ip_netinfo.h>
 102  102  #include <inet/ilb_ip.h>
 103  103  #include <sys/squeue_impl.h>
 104  104  #include <sys/squeue.h>
 105  105  
 106  106  #include <sys/ethernet.h>
 107  107  #include <net/if_types.h>
 108  108  #include <sys/cpuvar.h>
 109  109  
 110  110  #include <ipp/ipp.h>
 111  111  #include <ipp/ipp_impl.h>
 112  112  #include <ipp/ipgpc/ipgpc.h>
 113  113  
 114  114  #include <sys/pattr.h>
 115  115  #include <inet/ipclassifier.h>
 116  116  #include <inet/sctp_ip.h>
 117  117  #include <inet/sctp/sctp_impl.h>
 118  118  #include <inet/udp_impl.h>
 119  119  #include <sys/sunddi.h>
 120  120  
 121  121  #include <sys/tsol/label.h>
 122  122  #include <sys/tsol/tnet.h>
 123  123  
 124  124  #include <sys/clock_impl.h>     /* For LBOLT_FASTPATH{,64} */
 125  125  
 126  126  #ifdef  DEBUG
 127  127  extern boolean_t skip_sctp_cksum;
 128  128  #endif
 129  129  
 130  130  static void     ip_input_local_v6(ire_t *, mblk_t *, ip6_t *, ip_recv_attr_t *);
 131  131  
 132  132  static void     ip_input_multicast_v6(ire_t *, mblk_t *, ip6_t *,
 133  133      ip_recv_attr_t *);
 134  134  
 135  135  #pragma inline(ip_input_common_v6, ip_input_local_v6, ip_forward_xmit_v6)
 136  136  
 137  137  /*
 138  138   * Direct read side procedure capable of dealing with chains. GLDv3 based
 139  139   * drivers call this function directly with mblk chains while STREAMS
 140  140   * read side procedure ip_rput() calls this for single packet with ip_ring
 141  141   * set to NULL to process one packet at a time.
 142  142   *
 143  143   * The ill will always be valid if this function is called directly from
 144  144   * the driver.
 145  145   *
 146  146   * If ip_input_v6() is called from GLDv3:
 147  147   *
 148  148   *   - This must be a non-VLAN IP stream.
 149  149   *   - 'mp' is either an untagged or a special priority-tagged packet.
 150  150   *   - Any VLAN tag that was in the MAC header has been stripped.
 151  151   *
 152  152   * If the IP header in packet is not 32-bit aligned, every message in the
 153  153   * chain will be aligned before further operations. This is required on SPARC
 154  154   * platform.
 155  155   */
 156  156  void
 157  157  ip_input_v6(ill_t *ill, ill_rx_ring_t *ip_ring, mblk_t *mp_chain,
 158  158      struct mac_header_info_s *mhip)
 159  159  {
 160  160          (void) ip_input_common_v6(ill, ip_ring, mp_chain, mhip, NULL, NULL,
 161  161              NULL);
 162  162  }
 163  163  
 164  164  /*
 165  165   * ip_accept_tcp_v6() - This function is called by the squeue when it retrieves
 166  166   * a chain of packets in the poll mode. The packets have gone through the
 167  167   * data link processing but not IP processing. For performance and latency
 168  168   * reasons, the squeue wants to process the chain in line instead of feeding
 169  169   * it back via ip_input path.
 170  170   *
 171  171   * We set up the ip_recv_attr_t with IRAF_TARGET_SQP to that ip_fanout_v6
 172  172   * will pass back any TCP packets matching the target sqp to
 173  173   * ip_input_common_v6 using ira_target_sqp_mp. Other packets are handled by
 174  174   * ip_input_v6 and ip_fanout_v6 as normal.
 175  175   * The TCP packets that match the target squeue are returned to the caller
 176  176   * as a b_next chain after each packet has been prepend with an mblk
 177  177   * from ip_recv_attr_to_mblk.
 178  178   */
 179  179  mblk_t *
 180  180  ip_accept_tcp_v6(ill_t *ill, ill_rx_ring_t *ip_ring, squeue_t *target_sqp,
 181  181      mblk_t *mp_chain, mblk_t **last, uint_t *cnt)
 182  182  {
 183  183          return (ip_input_common_v6(ill, ip_ring, mp_chain, NULL, target_sqp,
 184  184              last, cnt));
 185  185  }
 186  186  
 187  187  /*
 188  188   * Used by ip_input_v6 and ip_accept_tcp_v6
 189  189   * The last three arguments are only used by ip_accept_tcp_v6, and mhip is
 190  190   * only used by ip_input_v6.
 191  191   */
 192  192  mblk_t *
 193  193  ip_input_common_v6(ill_t *ill, ill_rx_ring_t *ip_ring, mblk_t *mp_chain,
 194  194      struct mac_header_info_s *mhip, squeue_t *target_sqp,
 195  195      mblk_t **last, uint_t *cnt)
 196  196  {
 197  197          mblk_t          *mp;
 198  198          ip6_t           *ip6h;
 199  199          ip_recv_attr_t  iras;   /* Receive attributes */
 200  200          rtc_t           rtc;
 201  201          iaflags_t       chain_flags = 0;        /* Fixed for chain */
 202  202          mblk_t          *ahead = NULL;  /* Accepted head */
 203  203          mblk_t          *atail = NULL;  /* Accepted tail */
 204  204          uint_t          acnt = 0;       /* Accepted count */
 205  205  
 206  206          ASSERT(mp_chain != NULL);
 207  207          ASSERT(ill != NULL);
 208  208  
 209  209          /* These ones do not change as we loop over packets */
 210  210          iras.ira_ill = iras.ira_rill = ill;
 211  211          iras.ira_ruifindex = ill->ill_phyint->phyint_ifindex;
 212  212          iras.ira_rifindex = iras.ira_ruifindex;
 213  213          iras.ira_sqp = NULL;
 214  214          iras.ira_ring = ip_ring;
 215  215          /* For ECMP and outbound transmit ring selection */
 216  216          iras.ira_xmit_hint = ILL_RING_TO_XMIT_HINT(ip_ring);
 217  217  
 218  218          iras.ira_target_sqp = target_sqp;
 219  219          iras.ira_target_sqp_mp = NULL;
 220  220          if (target_sqp != NULL)
 221  221                  chain_flags |= IRAF_TARGET_SQP;
 222  222  
 223  223          /*
 224  224           * We try to have a mhip pointer when possible, but
 225  225           * it might be NULL in some cases. In those cases we
 226  226           * have to assume unicast.
 227  227           */
 228  228          iras.ira_mhip = mhip;
 229  229          iras.ira_flags = 0;
 230  230          if (mhip != NULL) {
 231  231                  switch (mhip->mhi_dsttype) {
 232  232                  case MAC_ADDRTYPE_MULTICAST :
 233  233                          chain_flags |= IRAF_L2DST_MULTICAST;
 234  234                          break;
 235  235                  case MAC_ADDRTYPE_BROADCAST :
 236  236                          chain_flags |= IRAF_L2DST_BROADCAST;
 237  237                          break;
 238  238                  }
 239  239          }
 240  240  
 241  241          /*
 242  242           * Initialize the one-element route cache.
 243  243           *
 244  244           * We do ire caching from one iteration to
 245  245           * another. In the event the packet chain contains
 246  246           * all packets from the same dst, this caching saves
 247  247           * an ire_route_recursive for each of the succeeding
 248  248           * packets in a packet chain.
 249  249           */
 250  250          rtc.rtc_ire = NULL;
 251  251          rtc.rtc_ip6addr = ipv6_all_zeros;
 252  252  
 253  253          /* Loop over b_next */
 254  254          for (mp = mp_chain; mp != NULL; mp = mp_chain) {
 255  255                  mp_chain = mp->b_next;
 256  256                  mp->b_next = NULL;
 257  257  
 258  258                  /*
 259  259                   * if db_ref > 1 then copymsg and free original. Packet
 260  260                   * may be changed and we do not want the other entity
 261  261                   * who has a reference to this message to trip over the
 262  262                   * changes. This is a blind change because trying to
 263  263                   * catch all places that might change the packet is too
 264  264                   * difficult.
 265  265                   *
 266  266                   * This corresponds to the fast path case, where we have
 267  267                   * a chain of M_DATA mblks.  We check the db_ref count
 268  268                   * of only the 1st data block in the mblk chain. There
 269  269                   * doesn't seem to be a reason why a device driver would
 270  270                   * send up data with varying db_ref counts in the mblk
 271  271                   * chain. In any case the Fast path is a private
 272  272                   * interface, and our drivers don't do such a thing.
 273  273                   * Given the above assumption, there is no need to walk
 274  274                   * down the entire mblk chain (which could have a
 275  275                   * potential performance problem)
 276  276                   *
 277  277                   * The "(DB_REF(mp) > 1)" check was moved from ip_rput()
 278  278                   * to here because of exclusive ip stacks and vnics.
 279  279                   * Packets transmitted from exclusive stack over vnic
 280  280                   * can have db_ref > 1 and when it gets looped back to
 281  281                   * another vnic in a different zone, you have ip_input()
 282  282                   * getting dblks with db_ref > 1. So if someone
 283  283                   * complains of TCP performance under this scenario,
 284  284                   * take a serious look here on the impact of copymsg().
 285  285                   */
 286  286                  if (DB_REF(mp) > 1) {
 287  287                          if ((mp = ip_fix_dbref(mp, &iras)) == NULL)
 288  288                                  continue;
 289  289                  }
 290  290  
 291  291                  /*
 292  292                   * IP header ptr not aligned?
 293  293                   * OR IP header not complete in first mblk
 294  294                   */
 295  295                  ip6h = (ip6_t *)mp->b_rptr;
 296  296                  if (!OK_32PTR(ip6h) || MBLKL(mp) < IPV6_HDR_LEN) {
 297  297                          mp = ip_check_and_align_header(mp, IPV6_HDR_LEN, &iras);
 298  298                          if (mp == NULL)
 299  299                                  continue;
 300  300                          ip6h = (ip6_t *)mp->b_rptr;
 301  301                  }
 302  302  
 303  303                  /* Protect against a mix of Ethertypes and IP versions */
 304  304                  if (IPH_HDR_VERSION(ip6h) != IPV6_VERSION) {
 305  305                          BUMP_MIB(ill->ill_ip_mib, ipIfStatsInHdrErrors);
 306  306                          ip_drop_input("ipIfStatsInHdrErrors", mp, ill);
 307  307                          freemsg(mp);
 308  308                          /* mhip might point into 1st packet in the chain. */
 309  309                          iras.ira_mhip = NULL;
 310  310                          continue;
 311  311                  }
 312  312  
 313  313                  /*
 314  314                   * Check for Martian addrs; we have to explicitly
 315  315                   * test for for zero dst since this is also used as
 316  316                   * an indication that the rtc is not used.
 317  317                   */
 318  318                  if (IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_dst)) {
 319  319                          BUMP_MIB(ill->ill_ip_mib, ipIfStatsInAddrErrors);
 320  320                          ip_drop_input("ipIfStatsInAddrErrors", mp, ill);
 321  321                          freemsg(mp);
 322  322                          /* mhip might point into 1st packet in the chain. */
 323  323                          iras.ira_mhip = NULL;
 324  324                          continue;
 325  325                  }
 326  326                  /*
 327  327                   * Keep L2SRC from a previous packet in chain since mhip
 328  328                   * might point into an earlier packet in the chain.
 329  329                   */
 330  330                  chain_flags |= (iras.ira_flags & IRAF_L2SRC_SET);
 331  331  
 332  332                  iras.ira_flags = IRAF_VERIFY_ULP_CKSUM | chain_flags;
 333  333                  iras.ira_free_flags = 0;
 334  334                  iras.ira_cred = NULL;
 335  335                  iras.ira_cpid = NOPID;
 336  336                  iras.ira_tsl = NULL;
 337  337                  iras.ira_zoneid = ALL_ZONES;    /* Default for forwarding */
 338  338  
 339  339                  /*
 340  340                   * We must count all incoming packets, even if they end
 341  341                   * up being dropped later on. Defer counting bytes until
 342  342                   * we have the whole IP header in first mblk.
 343  343                   */
 344  344                  BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInReceives);
 345  345  
 346  346                  iras.ira_pktlen = ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN;
 347  347                  UPDATE_MIB(ill->ill_ip_mib, ipIfStatsHCInOctets,
 348  348                      iras.ira_pktlen);
 349  349  
 350  350                  /*
 351  351                   * Call one of:
 352  352                   *      ill_input_full_v6
 353  353                   *      ill_input_short_v6
 354  354                   * The former is used in the case of TX. See ill_set_inputfn().
 355  355                   */
 356  356                  (*ill->ill_inputfn)(mp, ip6h, &ip6h->ip6_dst, &iras, &rtc);
 357  357  
 358  358                  /* Any references to clean up? No hold on ira_ill */
 359  359                  if (iras.ira_flags & (IRAF_IPSEC_SECURE|IRAF_SYSTEM_LABELED))
 360  360                          ira_cleanup(&iras, B_FALSE);
 361  361  
 362  362                  if (iras.ira_target_sqp_mp != NULL) {
 363  363                          /* Better be called from ip_accept_tcp */
 364  364                          ASSERT(target_sqp != NULL);
 365  365  
 366  366                          /* Found one packet to accept */
 367  367                          mp = iras.ira_target_sqp_mp;
 368  368                          iras.ira_target_sqp_mp = NULL;
 369  369                          ASSERT(ip_recv_attr_is_mblk(mp));
 370  370  
 371  371                          if (atail != NULL)
 372  372                                  atail->b_next = mp;
 373  373                          else
 374  374                                  ahead = mp;
 375  375                          atail = mp;
 376  376                          acnt++;
 377  377                          mp = NULL;
 378  378                  }
 379  379                  /* mhip might point into 1st packet in the chain. */
 380  380                  iras.ira_mhip = NULL;
 381  381          }
 382  382          /* Any remaining references to the route cache? */
 383  383          if (rtc.rtc_ire != NULL) {
 384  384                  ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&rtc.rtc_ip6addr));
 385  385                  ire_refrele(rtc.rtc_ire);
 386  386          }
 387  387  
 388  388          if (ahead != NULL) {
 389  389                  /* Better be called from ip_accept_tcp */
 390  390                  ASSERT(target_sqp != NULL);
 391  391                  *last = atail;
 392  392                  *cnt = acnt;
 393  393                  return (ahead);
 394  394          }
 395  395  
 396  396          return (NULL);
 397  397  }
 398  398  
 399  399  /*
 400  400   * This input function is used when
 401  401   *  - is_system_labeled()
 402  402   *
 403  403   * Note that for IPv6 CGTP filtering is handled only when receiving fragment
 404  404   * headers, and RSVP uses router alert options, thus we don't need anything
 405  405   * extra for them.
 406  406   */
 407  407  void
 408  408  ill_input_full_v6(mblk_t *mp, void *iph_arg, void *nexthop_arg,
 409  409      ip_recv_attr_t *ira, rtc_t *rtc)
 410  410  {
 411  411          ip6_t           *ip6h = (ip6_t *)iph_arg;
 412  412          in6_addr_t      *nexthop = (in6_addr_t *)nexthop_arg;
 413  413          ill_t           *ill = ira->ira_ill;
 414  414  
 415  415          ASSERT(ira->ira_tsl == NULL);
 416  416  
 417  417          /*
 418  418           * Attach any necessary label information to
 419  419           * this packet
 420  420           */
 421  421          if (is_system_labeled()) {
 422  422                  ira->ira_flags |= IRAF_SYSTEM_LABELED;
 423  423  
 424  424                  /*
 425  425                   * This updates ira_cred, ira_tsl and ira_free_flags based
 426  426                   * on the label.
 427  427                   */
 428  428                  if (!tsol_get_pkt_label(mp, IPV6_VERSION, ira)) {
 429  429                          if (ip6opt_ls != 0)
 430  430                                  ip0dbg(("tsol_get_pkt_label v6 failed\n"));
 431  431                          BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
 432  432                          ip_drop_input("ipIfStatsInDiscards", mp, ill);
 433  433                          freemsg(mp);
 434  434                          return;
 435  435                  }
 436  436                  /* Note that ira_tsl can be NULL here. */
 437  437  
 438  438                  /* tsol_get_pkt_label sometimes does pullupmsg */
 439  439                  ip6h = (ip6_t *)mp->b_rptr;
 440  440          }
 441  441          ill_input_short_v6(mp, ip6h, nexthop, ira, rtc);
 442  442  }
 443  443  
 444  444  /*
 445  445   * Check for IPv6 addresses that should not appear on the wire
 446  446   * as either source or destination.
 447  447   * If we ever implement Stateless IPv6 Translators (SIIT) we'd have
 448  448   * to revisit the IPv4-mapped part.
 449  449   */
 450  450  static boolean_t
 451  451  ip6_bad_address(in6_addr_t *addr, boolean_t is_src)
 452  452  {
 453  453          if (IN6_IS_ADDR_V4MAPPED(addr)) {
 454  454                  ip1dbg(("ip_input_v6: pkt with IPv4-mapped addr"));
 455  455                  return (B_TRUE);
 456  456          }
 457  457          if (IN6_IS_ADDR_LOOPBACK(addr)) {
 458  458                  ip1dbg(("ip_input_v6: pkt with loopback addr"));
 459  459                  return (B_TRUE);
 460  460          }
 461  461          if (!is_src && IN6_IS_ADDR_UNSPECIFIED(addr)) {
 462  462                  /*
 463  463                   * having :: in the src is ok: it's used for DAD.
 464  464                   */
 465  465                  ip1dbg(("ip_input_v6: pkt with unspecified addr"));
 466  466                  return (B_TRUE);
 467  467          }
 468  468          return (B_FALSE);
 469  469  }
 470  470  
 471  471  /*
 472  472   * Routing lookup for IPv6 link-locals.
 473  473   * First we look on the inbound interface, then we check for IPMP and
 474  474   * look on the upper interface.
 475  475   * We update ira_ruifindex if we find the IRE on the upper interface.
 476  476   */
 477  477  static ire_t *
 478  478  ire_linklocal(const in6_addr_t *nexthop, ill_t *ill, ip_recv_attr_t *ira,
 479  479      uint_t irr_flags, ip_stack_t *ipst)
 480  480  {
 481  481          int match_flags = MATCH_IRE_SECATTR | MATCH_IRE_ILL;
 482  482          ire_t *ire;
 483  483  
 484  484          ASSERT(IN6_IS_ADDR_LINKLOCAL(nexthop));
 485  485          ire = ire_route_recursive_v6(nexthop, 0, ill, ALL_ZONES, ira->ira_tsl,
 486  486              match_flags, irr_flags, ira->ira_xmit_hint, ipst, NULL, NULL, NULL);
 487  487          if (!(ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE)) ||
 488  488              !IS_UNDER_IPMP(ill))
 489  489                  return (ire);
 490  490  
 491  491          /*
 492  492           * When we are using IMP we need to look for an IRE on both the
 493  493           * under and upper interfaces since there are different
 494  494           * link-local addresses for the under and upper.
 495  495           */
 496  496          ill = ipmp_ill_hold_ipmp_ill(ill);
 497  497          if (ill == NULL)
 498  498                  return (ire);
 499  499  
 500  500          ira->ira_ruifindex = ill->ill_phyint->phyint_ifindex;
 501  501  
 502  502          ire_refrele(ire);
 503  503          ire = ire_route_recursive_v6(nexthop, 0, ill, ALL_ZONES, ira->ira_tsl,
 504  504              match_flags, irr_flags, ira->ira_xmit_hint, ipst, NULL, NULL, NULL);
 505  505          ill_refrele(ill);
 506  506          return (ire);
 507  507  }
 508  508  
 509  509  /*
 510  510   * This is the tail-end of the full receive side packet handling.
 511  511   * It can be used directly when the configuration is simple.
 512  512   */
 513  513  void
 514  514  ill_input_short_v6(mblk_t *mp, void *iph_arg, void *nexthop_arg,
 515  515      ip_recv_attr_t *ira, rtc_t *rtc)
 516  516  {
 517  517          ire_t           *ire;
 518  518          ill_t           *ill = ira->ira_ill;
 519  519          ip_stack_t      *ipst = ill->ill_ipst;
 520  520          uint_t          pkt_len;
 521  521          ssize_t         len;
 522  522          ip6_t           *ip6h = (ip6_t *)iph_arg;
 523  523          in6_addr_t      nexthop = *(in6_addr_t *)nexthop_arg;
 524  524          ilb_stack_t     *ilbs = ipst->ips_netstack->netstack_ilb;
 525  525          uint_t          irr_flags;
 526  526  #define rptr    ((uchar_t *)ip6h)
 527  527  
 528  528          ASSERT(DB_TYPE(mp) == M_DATA);
 529  529  
 530  530          /*
 531  531           * Check for source/dest being a bad address: loopback, any, or
 532  532           * v4mapped. All of them start with a 64 bits of zero.
 533  533           */
 534  534          if (ip6h->ip6_src.s6_addr32[0] == 0 &&
 535  535              ip6h->ip6_src.s6_addr32[1] == 0) {
 536  536                  if (ip6_bad_address(&ip6h->ip6_src, B_TRUE)) {
 537  537                          ip1dbg(("ip_input_v6: pkt with bad src addr\n"));
 538  538                          BUMP_MIB(ill->ill_ip_mib, ipIfStatsInAddrErrors);
 539  539                          ip_drop_input("ipIfStatsInAddrErrors", mp, ill);
 540  540                          freemsg(mp);
 541  541                          return;
 542  542                  }
 543  543          }
 544  544          if (ip6h->ip6_dst.s6_addr32[0] == 0 &&
 545  545              ip6h->ip6_dst.s6_addr32[1] == 0) {
 546  546                  if (ip6_bad_address(&ip6h->ip6_dst, B_FALSE)) {
 547  547                          ip1dbg(("ip_input_v6: pkt with bad dst addr\n"));
 548  548                          BUMP_MIB(ill->ill_ip_mib, ipIfStatsInAddrErrors);
 549  549                          ip_drop_input("ipIfStatsInAddrErrors", mp, ill);
 550  550                          freemsg(mp);
 551  551                          return;
 552  552                  }
 553  553          }
 554  554  
 555  555          len = mp->b_wptr - rptr;
 556  556          pkt_len = ira->ira_pktlen;
 557  557  
 558  558          /* multiple mblk or too short */
 559  559          len -= pkt_len;
 560  560          if (len != 0) {
 561  561                  mp = ip_check_length(mp, rptr, len, pkt_len, IPV6_HDR_LEN, ira);
 562  562                  if (mp == NULL)
 563  563                          return;
 564  564                  ip6h = (ip6_t *)mp->b_rptr;
 565  565          }
 566  566  
 567  567          DTRACE_IP7(receive, mblk_t *, mp, conn_t *, NULL, void_ip_t *,
 568  568              ip6h, __dtrace_ipsr_ill_t *, ill, ipha_t *, NULL, ip6_t *, ip6h,
 569  569              int, 0);
 570  570          /*
 571  571           * The event for packets being received from a 'physical'
 572  572           * interface is placed after validation of the source and/or
 573  573           * destination address as being local so that packets can be
 574  574           * redirected to loopback addresses using ipnat.
 575  575           */
 576  576          DTRACE_PROBE4(ip6__physical__in__start,
 577  577              ill_t *, ill, ill_t *, NULL,
 578  578              ip6_t *, ip6h, mblk_t *, mp);
 579  579  
 580  580          if (HOOKS6_INTERESTED_PHYSICAL_IN(ipst)) {
 581  581                  int     ll_multicast = 0;
 582  582                  int     error;
 583  583                  in6_addr_t orig_dst = ip6h->ip6_dst;
 584  584  
 585  585                  if (ira->ira_flags & IRAF_L2DST_MULTICAST)
 586  586                          ll_multicast = HPE_MULTICAST;
 587  587                  else if (ira->ira_flags & IRAF_L2DST_BROADCAST)
 588  588                          ll_multicast = HPE_BROADCAST;
 589  589  
 590  590                  FW_HOOKS6(ipst->ips_ip6_physical_in_event,
 591  591                      ipst->ips_ipv6firewall_physical_in,
 592  592                      ill, NULL, ip6h, mp, mp, ll_multicast, ipst, error);
 593  593  
 594  594                  DTRACE_PROBE1(ip6__physical__in__end, mblk_t *, mp);
 595  595  
 596  596                  if (mp == NULL)
 597  597                          return;
 598  598  
 599  599                  /* The length could have changed */
 600  600                  ip6h = (ip6_t *)mp->b_rptr;
 601  601                  ira->ira_pktlen = ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN;
 602  602                  pkt_len = ira->ira_pktlen;
 603  603  
 604  604                  /*
 605  605                   * In case the destination changed we override any previous
 606  606                   * change to nexthop.
 607  607                   */
 608  608                  if (!IN6_ARE_ADDR_EQUAL(&orig_dst, &ip6h->ip6_dst))
 609  609                          nexthop = ip6h->ip6_dst;
 610  610  
 611  611                  if (IN6_IS_ADDR_UNSPECIFIED(&nexthop)) {
 612  612                          BUMP_MIB(ill->ill_ip_mib, ipIfStatsInAddrErrors);
 613  613                          ip_drop_input("ipIfStatsInAddrErrors", mp, ill);
 614  614                          freemsg(mp);
 615  615                          return;
 616  616                  }
 617  617  
 618  618          }
 619  619  
 620  620          if (ipst->ips_ip6_observe.he_interested) {
 621  621                  zoneid_t dzone;
 622  622  
 623  623                  /*
 624  624                   * On the inbound path the src zone will be unknown as
 625  625                   * this packet has come from the wire.
 626  626                   */
 627  627                  dzone = ip_get_zoneid_v6(&nexthop, mp, ill, ira, ALL_ZONES);
 628  628                  ipobs_hook(mp, IPOBS_HOOK_INBOUND, ALL_ZONES, dzone, ill, ipst);
 629  629          }
 630  630  
 631  631          if ((ip6h->ip6_vcf & IPV6_VERS_AND_FLOW_MASK) !=
 632  632              IPV6_DEFAULT_VERS_AND_FLOW) {
 633  633                  BUMP_MIB(ill->ill_ip_mib, ipIfStatsInHdrErrors);
 634  634                  BUMP_MIB(ill->ill_ip_mib, ipIfStatsInWrongIPVersion);
 635  635                  ip_drop_input("ipIfStatsInWrongIPVersion", mp, ill);
 636  636                  freemsg(mp);
 637  637                  return;
 638  638          }
 639  639  
 640  640          /*
 641  641           * For IPv6 we update ira_ip_hdr_length and ira_protocol as
 642  642           * we parse the headers, starting with the hop-by-hop options header.
 643  643           */
 644  644          ira->ira_ip_hdr_length = IPV6_HDR_LEN;
 645  645          if ((ira->ira_protocol = ip6h->ip6_nxt) == IPPROTO_HOPOPTS) {
 646  646                  ip6_hbh_t       *hbhhdr;
 647  647                  uint_t          ehdrlen;
 648  648                  uint8_t         *optptr;
 649  649  
 650  650                  if (pkt_len < IPV6_HDR_LEN + MIN_EHDR_LEN) {
 651  651                          BUMP_MIB(ill->ill_ip_mib, ipIfStatsInTruncatedPkts);
 652  652                          ip_drop_input("ipIfStatsInTruncatedPkts", mp, ill);
 653  653                          freemsg(mp);
 654  654                          return;
 655  655                  }
 656  656                  if (mp->b_cont != NULL &&
 657  657                      rptr + IPV6_HDR_LEN + MIN_EHDR_LEN > mp->b_wptr) {
 658  658                          ip6h = ip_pullup(mp, IPV6_HDR_LEN + MIN_EHDR_LEN, ira);
 659  659                          if (ip6h == NULL) {
 660  660                                  BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
 661  661                                  ip_drop_input("ipIfStatsInDiscards", mp, ill);
 662  662                                  freemsg(mp);
 663  663                                  return;
 664  664                          }
 665  665                  }
 666  666                  hbhhdr = (ip6_hbh_t *)&ip6h[1];
 667  667                  ehdrlen = 8 * (hbhhdr->ip6h_len + 1);
 668  668  
 669  669                  if (pkt_len < IPV6_HDR_LEN + ehdrlen) {
 670  670                          BUMP_MIB(ill->ill_ip_mib, ipIfStatsInTruncatedPkts);
 671  671                          ip_drop_input("ipIfStatsInTruncatedPkts", mp, ill);
 672  672                          freemsg(mp);
 673  673                          return;
 674  674                  }
 675  675                  if (mp->b_cont != NULL &&
 676  676                      rptr + IPV6_HDR_LEN + ehdrlen > mp->b_wptr) {
 677  677                          ip6h = ip_pullup(mp, IPV6_HDR_LEN + ehdrlen, ira);
 678  678                          if (ip6h == NULL) {
 679  679                                  BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
 680  680                                  ip_drop_input("ipIfStatsInDiscards", mp, ill);
 681  681                                  freemsg(mp);
 682  682                                  return;
 683  683                          }
 684  684                          hbhhdr = (ip6_hbh_t *)&ip6h[1];
 685  685                  }
 686  686  
 687  687                  /*
 688  688                   * Update ira_ip_hdr_length to skip the hop-by-hop header
 689  689                   * once we get to ip_fanout_v6
 690  690                   */
 691  691                  ira->ira_ip_hdr_length += ehdrlen;
 692  692                  ira->ira_protocol = hbhhdr->ip6h_nxt;
 693  693  
 694  694                  optptr = (uint8_t *)&hbhhdr[1];
 695  695                  switch (ip_process_options_v6(mp, ip6h, optptr,
 696  696                      ehdrlen - 2, IPPROTO_HOPOPTS, ira)) {
 697  697                  case -1:
 698  698                          /*
 699  699                           * Packet has been consumed and any
 700  700                           * needed ICMP messages sent.
 701  701                           */
 702  702                          return;
 703  703                  case 0:
 704  704                          /* no action needed */
 705  705                          break;
 706  706                  case 1:
 707  707                          /*
 708  708                           * Known router alert. Make use handle it as local
 709  709                           * by setting the nexthop to be the all-host multicast
 710  710                           * address, and skip multicast membership filter by
 711  711                           * marking as a router alert.
 712  712                           */
 713  713                          ira->ira_flags |= IRAF_ROUTER_ALERT;
 714  714                          nexthop = ipv6_all_hosts_mcast;
 715  715                          break;
 716  716                  }
 717  717          }
 718  718  
 719  719          /*
 720  720           * Here we check to see if we machine is setup as
 721  721           * L3 loadbalancer and if the incoming packet is for a VIP
 722  722           *
 723  723           * Check the following:
 724  724           * - there is at least a rule
 725  725           * - protocol of the packet is supported
 726  726           *
 727  727           * We don't load balance IPv6 link-locals.
 728  728           */
 729  729          if (ilb_has_rules(ilbs) && ILB_SUPP_L4(ira->ira_protocol) &&
 730  730              !IN6_IS_ADDR_LINKLOCAL(&nexthop)) {
 731  731                  in6_addr_t      lb_dst;
 732  732                  int             lb_ret;
 733  733  
 734  734                  /* For convenience, we just pull up the mblk. */
 735  735                  if (mp->b_cont != NULL) {
 736  736                          if (pullupmsg(mp, -1) == 0) {
 737  737                                  BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
 738  738                                  ip_drop_input("ipIfStatsInDiscards - pullupmsg",
 739  739                                      mp, ill);
 740  740                                  freemsg(mp);
 741  741                                  return;
 742  742                          }
 743  743                          ip6h = (ip6_t *)mp->b_rptr;
 744  744                  }
 745  745                  lb_ret = ilb_check_v6(ilbs, ill, mp, ip6h, ira->ira_protocol,
 746  746                      (uint8_t *)ip6h + ira->ira_ip_hdr_length, &lb_dst);
 747  747                  if (lb_ret == ILB_DROPPED) {
 748  748                          BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
 749  749                          ip_drop_input("ILB_DROPPED", mp, ill);
 750  750                          freemsg(mp);
 751  751                          return;
 752  752                  }
 753  753                  if (lb_ret == ILB_BALANCED) {
 754  754                          /* Set the dst to that of the chosen server */
 755  755                          nexthop = lb_dst;
 756  756                          DB_CKSUMFLAGS(mp) = 0;
 757  757                  }
 758  758          }
 759  759  
 760  760          if (ill->ill_flags & ILLF_ROUTER)
 761  761                  irr_flags = IRR_ALLOCATE;
 762  762          else
 763  763                  irr_flags = IRR_NONE;
 764  764  
 765  765          /* Can not use route cache with TX since the labels can differ */
 766  766          if (ira->ira_flags & IRAF_SYSTEM_LABELED) {
 767  767                  if (IN6_IS_ADDR_MULTICAST(&nexthop)) {
 768  768                          ire = ire_multicast(ill);
 769  769                  } else if (IN6_IS_ADDR_LINKLOCAL(&nexthop)) {
 770  770                          ire = ire_linklocal(&nexthop, ill, ira, irr_flags,
 771  771                              ipst);
 772  772                  } else {
 773  773                          /* Match destination and label */
 774  774                          ire = ire_route_recursive_v6(&nexthop, 0, NULL,
 775  775                              ALL_ZONES, ira->ira_tsl, MATCH_IRE_SECATTR,
 776  776                              irr_flags, ira->ira_xmit_hint, ipst, NULL, NULL,
 777  777                              NULL);
 778  778                  }
 779  779                  /* Update the route cache so we do the ire_refrele */
 780  780                  ASSERT(ire != NULL);
 781  781                  if (rtc->rtc_ire != NULL)
 782  782                          ire_refrele(rtc->rtc_ire);
 783  783                  rtc->rtc_ire = ire;
 784  784                  rtc->rtc_ip6addr = nexthop;
 785  785          } else if (IN6_ARE_ADDR_EQUAL(&nexthop, &rtc->rtc_ip6addr) &&
 786  786              rtc->rtc_ire != NULL) {
 787  787                  /* Use the route cache */
 788  788                  ire = rtc->rtc_ire;
 789  789          } else {
 790  790                  /* Update the route cache */
 791  791                  if (IN6_IS_ADDR_MULTICAST(&nexthop)) {
 792  792                          ire = ire_multicast(ill);
 793  793                  } else if (IN6_IS_ADDR_LINKLOCAL(&nexthop)) {
 794  794                          ire = ire_linklocal(&nexthop, ill, ira, irr_flags,
 795  795                              ipst);
 796  796                  } else {
 797  797                          ire = ire_route_recursive_dstonly_v6(&nexthop,
 798  798                              irr_flags, ira->ira_xmit_hint, ipst);
 799  799                  }
 800  800                  ASSERT(ire != NULL);
 801  801                  if (rtc->rtc_ire != NULL)
 802  802                          ire_refrele(rtc->rtc_ire);
 803  803                  rtc->rtc_ire = ire;
 804  804                  rtc->rtc_ip6addr = nexthop;
 805  805          }
 806  806  
 807  807          ire->ire_ib_pkt_count++;
 808  808  
 809  809          /*
 810  810           * Based on ire_type and ire_flags call one of:
 811  811           *      ire_recv_local_v6 - for IRE_LOCAL
 812  812           *      ire_recv_loopback_v6 - for IRE_LOOPBACK
 813  813           *      ire_recv_multirt_v6 - if RTF_MULTIRT
 814  814           *      ire_recv_noroute_v6 - if RTF_REJECT or RTF_BLACHOLE
 815  815           *      ire_recv_multicast_v6 - for IRE_MULTICAST
 816  816           *      ire_recv_noaccept_v6 - for ire_noaccept ones
 817  817           *      ire_recv_forward_v6 - for the rest.
 818  818           */
 819  819  
 820  820          (*ire->ire_recvfn)(ire, mp, ip6h, ira);
 821  821  }
 822  822  #undef rptr
 823  823  
 824  824  /*
 825  825   * ire_recvfn for IREs that need forwarding
 826  826   */
 827  827  void
 828  828  ire_recv_forward_v6(ire_t *ire, mblk_t *mp, void *iph_arg, ip_recv_attr_t *ira)
 829  829  {
 830  830          ip6_t           *ip6h = (ip6_t *)iph_arg;
 831  831          ill_t           *ill = ira->ira_ill;
 832  832          ip_stack_t      *ipst = ill->ill_ipst;
 833  833          iaflags_t       iraflags = ira->ira_flags;
 834  834          ill_t           *dst_ill;
 835  835          nce_t           *nce;
 836  836          uint32_t        added_tx_len;
 837  837          uint32_t        mtu, iremtu;
 838  838  
 839  839          if (iraflags & (IRAF_L2DST_MULTICAST|IRAF_L2DST_BROADCAST)) {
 840  840                  BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits);
 841  841                  ip_drop_input("l2 multicast not forwarded", mp, ill);
 842  842                  freemsg(mp);
 843  843                  return;
 844  844          }
 845  845  
 846  846          if (!(ill->ill_flags & ILLF_ROUTER)) {
 847  847                  BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits);
 848  848                  ip_drop_input("ipIfStatsForwProhibits", mp, ill);
 849  849                  freemsg(mp);
 850  850                  return;
 851  851          }
 852  852  
 853  853          /*
 854  854           * Either ire_nce_capable or ire_dep_parent would be set for the IRE
 855  855           * when it is found by ire_route_recursive, but that some other thread
 856  856           * could have changed the routes with the effect of clearing
 857  857           * ire_dep_parent. In that case we'd end up dropping the packet, or
 858  858           * finding a new nce below.
 859  859           * Get, allocate, or update the nce.
 860  860           * We get a refhold on ire_nce_cache as a result of this to avoid races
 861  861           * where ire_nce_cache is deleted.
 862  862           *
 863  863           * This ensures that we don't forward if the interface is down since
 864  864           * ipif_down removes all the nces.
 865  865           */
 866  866          mutex_enter(&ire->ire_lock);
 867  867          nce = ire->ire_nce_cache;
 868  868          if (nce == NULL) {
 869  869                  /* Not yet set up - try to set one up */
 870  870                  mutex_exit(&ire->ire_lock);
 871  871                  (void) ire_revalidate_nce(ire);
 872  872                  mutex_enter(&ire->ire_lock);
 873  873                  nce = ire->ire_nce_cache;
 874  874                  if (nce == NULL) {
 875  875                          mutex_exit(&ire->ire_lock);
 876  876                          /* The ire_dep_parent chain went bad, or no memory */
 877  877                          BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
 878  878                          ip_drop_input("No ire_dep_parent", mp, ill);
 879  879                          freemsg(mp);
 880  880                          return;
 881  881                  }
 882  882          }
 883  883          nce_refhold(nce);
 884  884          mutex_exit(&ire->ire_lock);
 885  885  
 886  886          if (nce->nce_is_condemned) {
 887  887                  nce_t *nce1;
 888  888  
 889  889                  nce1 = ire_handle_condemned_nce(nce, ire, NULL, ip6h, B_FALSE);
 890  890                  nce_refrele(nce);
 891  891                  if (nce1 == NULL) {
 892  892                          BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
 893  893                          ip_drop_input("No nce", mp, ill);
 894  894                          freemsg(mp);
 895  895                          return;
 896  896                  }
 897  897                  nce = nce1;
 898  898          }
 899  899          dst_ill = nce->nce_ill;
 900  900  
 901  901          /*
 902  902           * Unless we are forwarding, drop the packet.
 903  903           * Unlike IPv4 we don't allow source routed packets out the same
 904  904           * interface when we are not a router.
 905  905           * Note that ill_forward_set() will set the ILLF_ROUTER on
 906  906           * all the group members when it gets an ipmp-ill or under-ill.
 907  907           */
 908  908          if (!(dst_ill->ill_flags & ILLF_ROUTER)) {
 909  909                  BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits);
 910  910                  ip_drop_input("ipIfStatsForwProhibits", mp, ill);
 911  911                  freemsg(mp);
 912  912                  nce_refrele(nce);
 913  913                  return;
 914  914          }
 915  915  
 916  916          if (ire->ire_zoneid != GLOBAL_ZONEID && ire->ire_zoneid != ALL_ZONES) {
 917  917                  ire->ire_ib_pkt_count--;
 918  918                  /*
 919  919                   * Should only use IREs that are visible from the
 920  920                   * global zone for forwarding.
 921  921                   * For IPv6 any source route would have already been
 922  922                   * advanced in ip_fanout_v6
 923  923                   */
 924  924                  ire = ire_route_recursive_v6(&ip6h->ip6_dst, 0, NULL,
 925  925                      GLOBAL_ZONEID, ira->ira_tsl, MATCH_IRE_SECATTR,
 926  926                      (ill->ill_flags & ILLF_ROUTER) ? IRR_ALLOCATE : IRR_NONE,
 927  927                      ira->ira_xmit_hint, ipst, NULL, NULL, NULL);
 928  928                  ire->ire_ib_pkt_count++;
 929  929                  (*ire->ire_recvfn)(ire, mp, ip6h, ira);
 930  930                  ire_refrele(ire);
 931  931                  nce_refrele(nce);
 932  932                  return;
 933  933          }
 934  934          /*
 935  935           * ipIfStatsHCInForwDatagrams should only be increment if there
 936  936           * will be an attempt to forward the packet, which is why we
 937  937           * increment after the above condition has been checked.
 938  938           */
 939  939          BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInForwDatagrams);
 940  940  
 941  941          /* Initiate Read side IPPF processing */
 942  942          if (IPP_ENABLED(IPP_FWD_IN, ipst)) {
 943  943                  /* ip_process translates an IS_UNDER_IPMP */
 944  944                  mp = ip_process(IPP_FWD_IN, mp, ill, ill);
 945  945                  if (mp == NULL) {
 946  946                          /* ip_drop_packet and MIB done */
 947  947                          ip2dbg(("ire_recv_forward_v6: pkt dropped/deferred "
 948  948                              "during IPPF processing\n"));
 949  949                          nce_refrele(nce);
 950  950                          return;
 951  951                  }
 952  952          }
 953  953  
 954  954          DTRACE_PROBE4(ip6__forwarding__start,
 955  955              ill_t *, ill, ill_t *, dst_ill, ip6_t *, ip6h, mblk_t *, mp);
 956  956  
 957  957          if (HOOKS6_INTERESTED_FORWARDING(ipst)) {
 958  958                  int     error;
 959  959  
 960  960                  FW_HOOKS(ipst->ips_ip6_forwarding_event,
 961  961                      ipst->ips_ipv6firewall_forwarding,
 962  962                      ill, dst_ill, ip6h, mp, mp, 0, ipst, error);
 963  963  
 964  964                  DTRACE_PROBE1(ip6__forwarding__end, mblk_t *, mp);
 965  965  
 966  966                  if (mp == NULL) {
 967  967                          nce_refrele(nce);
 968  968                          return;
 969  969                  }
 970  970                  /*
 971  971                   * Even if the destination was changed by the filter we use the
 972  972                   * forwarding decision that was made based on the address
 973  973                   * in ip_input.
 974  974                   */
 975  975  
 976  976                  /* Might have changed */
 977  977                  ip6h = (ip6_t *)mp->b_rptr;
 978  978                  ira->ira_pktlen = ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN;
 979  979          }
 980  980  
 981  981          /* Packet is being forwarded. Turning off hwcksum flag. */
 982  982          DB_CKSUMFLAGS(mp) = 0;
 983  983  
 984  984          /*
 985  985           * Per RFC 3513 section 2.5.2, we must not forward packets with
 986  986           * an unspecified source address.
 987  987           * The loopback address check for both src and dst has already
 988  988           * been checked in ip_input_v6
 989  989           * In the future one can envision adding RPF checks using number 3.
 990  990           */
 991  991          switch (ipst->ips_src_check) {
 992  992          case 0:
 993  993                  break;
 994  994          case 1:
 995  995          case 2:
 996  996                  if (IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src) ||
 997  997                      IN6_IS_ADDR_MULTICAST(&ip6h->ip6_src)) {
 998  998                          BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits);
 999  999                          BUMP_MIB(ill->ill_ip_mib, ipIfStatsInAddrErrors);
1000 1000                          ip_drop_input("ipIfStatsInAddrErrors", mp, ill);
1001 1001                          nce_refrele(nce);
1002 1002                          freemsg(mp);
1003 1003                          return;
1004 1004                  }
1005 1005                  break;
1006 1006          }
1007 1007  
1008 1008          /*
1009 1009           * Check to see if we're forwarding the packet to a
1010 1010           * different link from which it came.  If so, check the
1011 1011           * source and destination addresses since routers must not
1012 1012           * forward any packets with link-local source or
1013 1013           * destination addresses to other links.  Otherwise (if
1014 1014           * we're forwarding onto the same link), conditionally send
1015 1015           * a redirect message.
1016 1016           */
1017 1017          if (!IS_ON_SAME_LAN(dst_ill, ill)) {
1018 1018                  if (IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_dst) ||
1019 1019                      IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_src)) {
1020 1020                          BUMP_MIB(ill->ill_ip_mib, ipIfStatsInAddrErrors);
1021 1021                          ip_drop_input("ipIfStatsInAddrErrors", mp, ill);
1022 1022                          freemsg(mp);
1023 1023                          nce_refrele(nce);
1024 1024                          return;
1025 1025                  }
1026 1026                  /* TBD add site-local check at site boundary? */
1027 1027          } else if (ipst->ips_ipv6_send_redirects) {
1028 1028                  ip_send_potential_redirect_v6(mp, ip6h, ire, ira);
1029 1029          }
1030 1030  
1031 1031          added_tx_len = 0;
1032 1032          if (iraflags & IRAF_SYSTEM_LABELED) {
1033 1033                  mblk_t          *mp1;
1034 1034                  uint32_t        old_pkt_len = ira->ira_pktlen;
1035 1035  
1036 1036                  /*
1037 1037                   * Check if it can be forwarded and add/remove
1038 1038                   * CIPSO options as needed.
1039 1039                   */
1040 1040                  if ((mp1 = tsol_ip_forward(ire, mp, ira)) == NULL) {
1041 1041                          BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits);
1042 1042                          ip_drop_input("tsol_ip_forward", mp, ill);
1043 1043                          freemsg(mp);
1044 1044                          nce_refrele(nce);
1045 1045                          return;
1046 1046                  }
1047 1047                  /*
1048 1048                   * Size may have changed. Remember amount added in case
1049 1049                   * ip_fragment needs to send an ICMP too big.
1050 1050                   */
1051 1051                  mp = mp1;
1052 1052                  ip6h = (ip6_t *)mp->b_rptr;
1053 1053                  ira->ira_pktlen = ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN;
1054 1054                  ira->ira_ip_hdr_length = IPV6_HDR_LEN;
1055 1055                  if (ira->ira_pktlen > old_pkt_len)
1056 1056                          added_tx_len = ira->ira_pktlen - old_pkt_len;
1057 1057          }
1058 1058  
1059 1059          mtu = dst_ill->ill_mtu;
1060 1060          if ((iremtu = ire->ire_metrics.iulp_mtu) != 0 && iremtu < mtu)
1061 1061                  mtu = iremtu;
1062 1062          ip_forward_xmit_v6(nce, mp, ip6h, ira, mtu, added_tx_len);
1063 1063          nce_refrele(nce);
1064 1064          return;
1065 1065  
1066 1066  }
1067 1067  
1068 1068  /*
1069 1069   * Used for sending out unicast and multicast packets that are
1070 1070   * forwarded.
1071 1071   */
1072 1072  void
1073 1073  ip_forward_xmit_v6(nce_t *nce, mblk_t *mp, ip6_t *ip6h, ip_recv_attr_t *ira,
1074 1074      uint32_t mtu, uint32_t added_tx_len)
1075 1075  {
1076 1076          ill_t           *dst_ill = nce->nce_ill;
1077 1077          uint32_t        pkt_len;
1078 1078          iaflags_t       iraflags = ira->ira_flags;
1079 1079          ip_stack_t      *ipst = dst_ill->ill_ipst;
1080 1080  
1081 1081          if (ip6h->ip6_hops-- <= 1) {
1082 1082                  BUMP_MIB(ira->ira_ill->ill_ip_mib, ipIfStatsInDiscards);
1083 1083                  ip_drop_input("ICMP6_TIME_EXCEED_TRANSIT", mp, ira->ira_ill);
1084 1084                  icmp_time_exceeded_v6(mp, ICMP6_TIME_EXCEED_TRANSIT, B_FALSE,
1085 1085                      ira);
1086 1086                  return;
1087 1087          }
1088 1088  
1089 1089          /* Initiate Write side IPPF processing before any fragmentation */
1090 1090          if (IPP_ENABLED(IPP_FWD_OUT, ipst)) {
1091 1091                  /* ip_process translates an IS_UNDER_IPMP */
1092 1092                  mp = ip_process(IPP_FWD_OUT, mp, dst_ill, dst_ill);
1093 1093                  if (mp == NULL) {
1094 1094                          /* ip_drop_packet and MIB done */
1095 1095                          ip2dbg(("ire_recv_forward_v6: pkt dropped/deferred" \
1096 1096                              " during IPPF processing\n"));
1097 1097                          return;
1098 1098                  }
1099 1099          }
1100 1100  
1101 1101          pkt_len = ira->ira_pktlen;
1102 1102  
1103 1103          BUMP_MIB(dst_ill->ill_ip_mib, ipIfStatsHCOutForwDatagrams);
1104 1104  
1105 1105          if (pkt_len > mtu) {
1106 1106                  BUMP_MIB(dst_ill->ill_ip_mib, ipIfStatsOutFragFails);
1107 1107                  ip_drop_output("ipIfStatsOutFragFails", mp, dst_ill);
1108 1108                  if (iraflags & IRAF_SYSTEM_LABELED) {
1109 1109                          /*
1110 1110                           * Remove any CIPSO option added by
1111 1111                           * tsol_ip_forward, and make sure we report
1112 1112                           * a path MTU so that there
1113 1113                           * is room to add such a CIPSO option for future
1114 1114                           * packets.
1115 1115                           */
1116 1116                          mtu = tsol_pmtu_adjust(mp, mtu, added_tx_len, AF_INET6);
1117 1117                  }
1118 1118                  icmp_pkt2big_v6(mp, mtu, B_TRUE, ira);
1119 1119                  return;
1120 1120          }
1121 1121  
1122 1122          ASSERT(pkt_len ==
1123 1123              ntohs(((ip6_t *)mp->b_rptr)->ip6_plen) + IPV6_HDR_LEN);
1124 1124  
1125 1125          if (iraflags & IRAF_LOOPBACK_COPY) {
1126 1126                  /*
1127 1127                   * IXAF_NO_LOOP_ZONEID is not set hence 6th arg
1128 1128                   * is don't care
1129 1129                   */
1130 1130                  (void) ip_postfrag_loopcheck(mp, nce,
1131 1131                      (IXAF_LOOPBACK_COPY | IXAF_NO_DEV_FLOW_CTL),
1132 1132                      pkt_len, ira->ira_xmit_hint, GLOBAL_ZONEID, 0, NULL);
1133 1133          } else {
1134 1134                  (void) ip_xmit(mp, nce, IXAF_NO_DEV_FLOW_CTL,
1135 1135                      pkt_len, ira->ira_xmit_hint, GLOBAL_ZONEID, 0, NULL);
1136 1136          }
1137 1137  }
1138 1138  
1139 1139  /*
1140 1140   * ire_recvfn for RTF_REJECT and RTF_BLACKHOLE routes, including IRE_NOROUTE,
1141 1141   * which is what ire_route_recursive returns when there is no matching ire.
1142 1142   * Send ICMP unreachable unless blackhole.
1143 1143   */
1144 1144  void
1145 1145  ire_recv_noroute_v6(ire_t *ire, mblk_t *mp, void *iph_arg, ip_recv_attr_t *ira)
1146 1146  {
1147 1147          ip6_t           *ip6h = (ip6_t *)iph_arg;
1148 1148          ill_t           *ill = ira->ira_ill;
1149 1149          ip_stack_t      *ipst = ill->ill_ipst;
1150 1150  
1151 1151          /* Would we have forwarded this packet if we had a route? */
1152 1152          if (ira->ira_flags & (IRAF_L2DST_MULTICAST|IRAF_L2DST_BROADCAST)) {
1153 1153                  BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits);
1154 1154                  ip_drop_input("l2 multicast not forwarded", mp, ill);
1155 1155                  freemsg(mp);
1156 1156                  return;
1157 1157          }
1158 1158  
1159 1159          if (!(ill->ill_flags & ILLF_ROUTER)) {
1160 1160                  BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits);
1161 1161                  ip_drop_input("ipIfStatsForwProhibits", mp, ill);
1162 1162                  freemsg(mp);
1163 1163                  return;
1164 1164          }
1165 1165          /*
1166 1166           * If we had a route this could have been forwarded. Count as such.
1167 1167           *
1168 1168           * ipIfStatsHCInForwDatagrams should only be increment if there
1169 1169           * will be an attempt to forward the packet, which is why we
1170 1170           * increment after the above condition has been checked.
1171 1171           */
1172 1172          BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInForwDatagrams);
1173 1173  
1174 1174          BUMP_MIB(ill->ill_ip_mib, ipIfStatsInNoRoutes);
1175 1175  
1176 1176          ip_rts_change_v6(RTM_MISS, &ip6h->ip6_dst, 0, 0, 0, 0, 0, 0, RTA_DST,
1177 1177              ipst);
1178 1178  
1179 1179          if (ire->ire_flags & RTF_BLACKHOLE) {
1180 1180                  ip_drop_input("ipIfStatsInNoRoutes RTF_BLACKHOLE", mp, ill);
1181 1181                  freemsg(mp);
1182 1182          } else {
1183 1183                  ip_drop_input("ipIfStatsInNoRoutes RTF_REJECT", mp, ill);
1184 1184  
1185 1185                  icmp_unreachable_v6(mp, ICMP6_DST_UNREACH_NOROUTE, B_FALSE,
1186 1186                      ira);
1187 1187          }
1188 1188  }
1189 1189  
1190 1190  /*
1191 1191   * ire_recvfn for IRE_LOCALs marked with ire_noaccept. Such IREs are used for
1192 1192   * VRRP when in noaccept mode.
1193 1193   * We silently drop packets except for Neighbor Solicitations and
1194 1194   * Neighbor Advertisements.
1195 1195   */
1196 1196  void
1197 1197  ire_recv_noaccept_v6(ire_t *ire, mblk_t *mp, void *iph_arg,
1198 1198      ip_recv_attr_t *ira)
1199 1199  {
1200 1200          ip6_t           *ip6h = (ip6_t *)iph_arg;
1201 1201          ill_t           *ill = ira->ira_ill;
1202 1202          icmp6_t         *icmp6;
1203 1203          int             ip_hdr_length;
1204 1204  
1205 1205          if (ip6h->ip6_nxt != IPPROTO_ICMPV6) {
1206 1206                  BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
1207 1207                  ip_drop_input("ipIfStatsInDiscards - noaccept", mp, ill);
1208 1208                  freemsg(mp);
1209 1209                  return;
1210 1210          }
1211 1211          ip_hdr_length = ira->ira_ip_hdr_length;
1212 1212          if ((mp->b_wptr - mp->b_rptr) < (ip_hdr_length + ICMP6_MINLEN)) {
1213 1213                  if (ira->ira_pktlen < (ip_hdr_length + ICMP6_MINLEN)) {
1214 1214                          BUMP_MIB(ill->ill_ip_mib, ipIfStatsInTruncatedPkts);
1215 1215                          ip_drop_input("ipIfStatsInTruncatedPkts", mp, ill);
1216 1216                          freemsg(mp);
1217 1217                          return;
1218 1218                  }
1219 1219                  ip6h = ip_pullup(mp, ip_hdr_length + ICMP6_MINLEN, ira);
1220 1220                  if (ip6h == NULL) {
1221 1221                          BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors);
1222 1222                          freemsg(mp);
1223 1223                          return;
1224 1224                  }
1225 1225          }
1226 1226          icmp6 = (icmp6_t *)(&mp->b_rptr[ip_hdr_length]);
1227 1227  
1228 1228          if (icmp6->icmp6_type != ND_NEIGHBOR_SOLICIT &&
1229 1229              icmp6->icmp6_type != ND_NEIGHBOR_ADVERT) {
1230 1230                  BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
1231 1231                  ip_drop_input("ipIfStatsInDiscards - noaccept", mp, ill);
1232 1232                  freemsg(mp);
1233 1233                  return;
1234 1234          }
1235 1235          ire_recv_local_v6(ire, mp, ip6h, ira);
1236 1236  }
1237 1237  
1238 1238  /*
1239 1239   * ire_recvfn for IRE_MULTICAST.
1240 1240   */
1241 1241  void
1242 1242  ire_recv_multicast_v6(ire_t *ire, mblk_t *mp, void *iph_arg,
1243 1243      ip_recv_attr_t *ira)
1244 1244  {
1245 1245          ip6_t           *ip6h = (ip6_t *)iph_arg;
1246 1246          ill_t           *ill = ira->ira_ill;
1247 1247  
1248 1248          ASSERT(ire->ire_ill == ira->ira_ill);
1249 1249  
1250 1250          BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInMcastPkts);
1251 1251          UPDATE_MIB(ill->ill_ip_mib, ipIfStatsHCInMcastOctets, ira->ira_pktlen);
1252 1252  
1253 1253          /* Tag for higher-level protocols */
1254 1254          ira->ira_flags |= IRAF_MULTICAST;
1255 1255  
1256 1256          /*
1257 1257           * So that we don't end up with dups, only one ill an IPMP group is
1258 1258           * nominated to receive multicast traffic.
1259 1259           * If we have no cast_ill we are liberal and accept everything.
1260 1260           */
1261 1261          if (IS_UNDER_IPMP(ill)) {
1262 1262                  ip_stack_t      *ipst = ill->ill_ipst;
1263 1263  
1264 1264                  /* For an under ill_grp can change under lock */
1265 1265                  rw_enter(&ipst->ips_ill_g_lock, RW_READER);
1266 1266                  if (!ill->ill_nom_cast && ill->ill_grp != NULL &&
1267 1267                      ill->ill_grp->ig_cast_ill != NULL) {
1268 1268                          rw_exit(&ipst->ips_ill_g_lock);
1269 1269                          ip_drop_input("not on cast ill", mp, ill);
1270 1270                          freemsg(mp);
1271 1271                          return;
1272 1272                  }
1273 1273                  rw_exit(&ipst->ips_ill_g_lock);
1274 1274                  /*
1275 1275                   * We switch to the upper ill so that mrouter and hasmembers
1276 1276                   * can operate on upper here and in ip_input_multicast.
1277 1277                   */
1278 1278                  ill = ipmp_ill_hold_ipmp_ill(ill);
1279 1279                  if (ill != NULL) {
1280 1280                          ASSERT(ill != ira->ira_ill);
1281 1281                          ASSERT(ire->ire_ill == ira->ira_ill);
1282 1282                          ira->ira_ill = ill;
1283 1283                          ira->ira_ruifindex = ill->ill_phyint->phyint_ifindex;
1284 1284                  } else {
1285 1285                          ill = ira->ira_ill;
1286 1286                  }
1287 1287          }
1288 1288  
1289 1289  #ifdef notdef
1290 1290          /*
1291 1291           * Check if we are a multicast router - send ip_mforward a copy of
1292 1292           * the packet.
1293 1293           * Due to mroute_decap tunnels we consider forwarding packets even if
1294 1294           * mrouted has not joined the allmulti group on this interface.
1295 1295           */
1296 1296          if (ipst->ips_ip_g_mrouter) {
1297 1297                  int retval;
1298 1298  
1299 1299                  /*
1300 1300                   * Clear the indication that this may have hardware
1301 1301                   * checksum as we are not using it for forwarding.
1302 1302                   */
1303 1303                  DB_CKSUMFLAGS(mp) = 0;
1304 1304  
1305 1305                  /*
1306 1306                   * ip_mforward helps us make these distinctions: If received
1307 1307                   * on tunnel and not IGMP, then drop.
1308 1308                   * If IGMP packet, then don't check membership
1309 1309                   * If received on a phyint and IGMP or PIM, then
1310 1310                   * don't check membership
1311 1311                   */
1312 1312                  retval = ip_mforward_v6(mp, ira);
1313 1313                  /* ip_mforward updates mib variables if needed */
1314 1314  
1315 1315                  switch (retval) {
1316 1316                  case 0:
1317 1317                          /*
1318 1318                           * pkt is okay and arrived on phyint.
1319 1319                           */
1320 1320                          break;
1321 1321                  case -1:
1322 1322                          /* pkt is mal-formed, toss it */
1323 1323                          freemsg(mp);
1324 1324                          goto done;
1325 1325                  case 1:
1326 1326                          /*
1327 1327                           * pkt is okay and arrived on a tunnel
1328 1328                           *
1329 1329                           * If we are running a multicast router
1330 1330                           * we need to see all mld packets, which
1331 1331                           * are marked with router alerts.
1332 1332                           */
1333 1333                          if (ira->ira_flags & IRAF_ROUTER_ALERT)
1334 1334                                  goto forus;
1335 1335                          ip_drop_input("Multicast on tunnel ignored", mp, ill);
1336 1336                          freemsg(mp);
1337 1337                          goto done;
1338 1338                  }
1339 1339          }
1340 1340  #endif /* notdef */
1341 1341  
1342 1342          /*
1343 1343           * If this was a router alert we skip the group membership check.
1344 1344           */
1345 1345          if (ira->ira_flags & IRAF_ROUTER_ALERT)
1346 1346                  goto forus;
1347 1347  
1348 1348          /*
1349 1349           * Check if we have members on this ill. This is not necessary for
1350 1350           * correctness because even if the NIC/GLD had a leaky filter, we
1351 1351           * filter before passing to each conn_t.
1352 1352           */
1353 1353          if (!ill_hasmembers_v6(ill, &ip6h->ip6_dst)) {
1354 1354                  /*
1355 1355                   * Nobody interested
1356 1356                   *
1357 1357                   * This might just be caused by the fact that
1358 1358                   * multiple IP Multicast addresses map to the same
1359 1359                   * link layer multicast - no need to increment counter!
1360 1360                   */
1361 1361                  ip_drop_input("Multicast with no members", mp, ill);
1362 1362                  freemsg(mp);
1363 1363                  goto done;
1364 1364          }
1365 1365  forus:
1366 1366          ip2dbg(("ire_recv_multicast_v6: multicast for us\n"));
1367 1367  
1368 1368          /*
1369 1369           * After reassembly and IPsec we will need to duplicate the
1370 1370           * multicast packet for all matching zones on the ill.
1371 1371           */
1372 1372          ira->ira_zoneid = ALL_ZONES;
1373 1373  
1374 1374          /* Reassemble on the ill on which the packet arrived */
1375 1375          ip_input_local_v6(ire, mp, ip6h, ira);
1376 1376  done:
1377 1377          if (ill != ire->ire_ill) {
1378 1378                  ill_refrele(ill);
1379 1379                  ira->ira_ill = ire->ire_ill;
1380 1380                  ira->ira_ruifindex = ira->ira_ill->ill_phyint->phyint_ifindex;
1381 1381          }
1382 1382  }
1383 1383  
1384 1384  /*
1385 1385   * ire_recvfn for IRE_OFFLINK with RTF_MULTIRT.
1386 1386   * Drop packets since we don't forward out multirt routes.
1387 1387   */
1388 1388  /* ARGSUSED */
1389 1389  void
1390 1390  ire_recv_multirt_v6(ire_t *ire, mblk_t *mp, void *iph_arg, ip_recv_attr_t *ira)
1391 1391  {
1392 1392          ill_t           *ill = ira->ira_ill;
1393 1393  
1394 1394          BUMP_MIB(ill->ill_ip_mib, ipIfStatsInNoRoutes);
1395 1395          ip_drop_input("Not forwarding out MULTIRT", mp, ill);
1396 1396          freemsg(mp);
1397 1397  }
1398 1398  
1399 1399  /*
1400 1400   * ire_recvfn for IRE_LOOPBACK. This is only used when a FW_HOOK
1401 1401   * has rewritten the packet to have a loopback destination address (We
1402 1402   * filter out packet with a loopback destination from arriving over the wire).
1403 1403   * We don't know what zone to use, thus we always use the GLOBAL_ZONEID.
1404 1404   */
1405 1405  void
1406 1406  ire_recv_loopback_v6(ire_t *ire, mblk_t *mp, void *iph_arg, ip_recv_attr_t *ira)
1407 1407  {
1408 1408          ip6_t           *ip6h = (ip6_t *)iph_arg;
1409 1409          ill_t           *ill = ira->ira_ill;
1410 1410          ill_t           *ire_ill = ire->ire_ill;
1411 1411  
1412 1412          ira->ira_zoneid = GLOBAL_ZONEID;
1413 1413  
1414 1414          /* Switch to the lo0 ill for further processing  */
1415 1415          if (ire_ill != ill) {
1416 1416                  /*
1417 1417                   * Update ira_ill to be the ILL on which the IP address
1418 1418                   * is hosted.
1419 1419                   * No need to hold the ill since we have a hold on the ire
1420 1420                   */
1421 1421                  ASSERT(ira->ira_ill == ira->ira_rill);
1422 1422                  ira->ira_ill = ire_ill;
1423 1423  
1424 1424                  ip_input_local_v6(ire, mp, ip6h, ira);
1425 1425  
1426 1426                  /* Restore */
1427 1427                  ASSERT(ira->ira_ill == ire_ill);
1428 1428                  ira->ira_ill = ill;
1429 1429                  return;
1430 1430  
1431 1431          }
1432 1432          ip_input_local_v6(ire, mp, ip6h, ira);
1433 1433  }
1434 1434  
1435 1435  /*
1436 1436   * ire_recvfn for IRE_LOCAL.
1437 1437   */
1438 1438  void
1439 1439  ire_recv_local_v6(ire_t *ire, mblk_t *mp, void *iph_arg, ip_recv_attr_t *ira)
1440 1440  {
1441 1441          ip6_t           *ip6h = (ip6_t *)iph_arg;
1442 1442          ill_t           *ill = ira->ira_ill;
1443 1443          ill_t           *ire_ill = ire->ire_ill;
1444 1444  
1445 1445          /* Make a note for DAD that this address is in use */
1446 1446          ire->ire_last_used_time = LBOLT_FASTPATH;
1447 1447  
1448 1448          /* Only target the IRE_LOCAL with the right zoneid. */
1449 1449          ira->ira_zoneid = ire->ire_zoneid;
1450 1450  
1451 1451          /*
1452 1452           * If the packet arrived on the wrong ill, we check that
1453 1453           * this is ok.
1454 1454           * If it is, then we ensure that we do the reassembly on
1455 1455           * the ill on which the address is hosted. We keep ira_rill as
1456 1456           * the one on which the packet arrived, so that IP_PKTINFO and
1457 1457           * friends can report this.
1458 1458           */
1459 1459          if (ire_ill != ill) {
1460 1460                  ire_t *new_ire;
1461 1461  
1462 1462                  new_ire = ip_check_multihome(&ip6h->ip6_dst, ire, ill);
1463 1463                  if (new_ire == NULL) {
1464 1464                          /* Drop packet */
1465 1465                          BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits);
1466 1466                          ip_drop_input("ipIfStatsInForwProhibits", mp, ill);
1467 1467                          freemsg(mp);
1468 1468                          return;
1469 1469                  }
1470 1470                  /*
1471 1471                   * Update ira_ill to be the ILL on which the IP address
1472 1472                   * is hosted. No need to hold the ill since we have a
1473 1473                   * hold on the ire. Note that we do the switch even if
1474 1474                   * new_ire == ire (for IPMP, ire would be the one corresponding
1475 1475                   * to the IPMP ill).
1476 1476                   */
1477 1477                  ASSERT(ira->ira_ill == ira->ira_rill);
1478 1478                  ira->ira_ill = new_ire->ire_ill;
1479 1479  
1480 1480                  /* ira_ruifindex tracks the upper for ira_rill */
1481 1481                  if (IS_UNDER_IPMP(ill))
1482 1482                          ira->ira_ruifindex = ill_get_upper_ifindex(ill);
1483 1483  
1484 1484                  ip_input_local_v6(new_ire, mp, ip6h, ira);
1485 1485  
1486 1486                  /* Restore */
1487 1487                  ASSERT(ira->ira_ill == new_ire->ire_ill);
1488 1488                  ira->ira_ill = ill;
1489 1489                  ira->ira_ruifindex = ill->ill_phyint->phyint_ifindex;
1490 1490  
1491 1491                  if (new_ire != ire)
1492 1492                          ire_refrele(new_ire);
1493 1493                  return;
1494 1494          }
1495 1495  
1496 1496          ip_input_local_v6(ire, mp, ip6h, ira);
1497 1497  }
1498 1498  
1499 1499  /*
1500 1500   * Common function for packets arriving for the host. Handles
1501 1501   * checksum verification, reassembly checks, etc.
1502 1502   */
1503 1503  static void
1504 1504  ip_input_local_v6(ire_t *ire, mblk_t *mp, ip6_t *ip6h, ip_recv_attr_t *ira)
1505 1505  {
1506 1506          iaflags_t       iraflags = ira->ira_flags;
1507 1507  
1508 1508          /*
1509 1509           * For multicast we need some extra work before
1510 1510           * we call ip_fanout_v6(), since in the case of shared-IP zones
1511 1511           * we need to pretend that a packet arrived for each zoneid.
1512 1512           */
1513 1513          if (iraflags & IRAF_MULTICAST) {
1514 1514                  ip_input_multicast_v6(ire, mp, ip6h, ira);
1515 1515                  return;
1516 1516          }
1517 1517          ip_fanout_v6(mp, ip6h, ira);
1518 1518  }
1519 1519  
1520 1520  /*
1521 1521   * Handle multiple zones which want to receive the same multicast packets
1522 1522   * on this ill by delivering a packet to each of them.
1523 1523   *
1524 1524   * Note that for packets delivered to transports we could instead do this
1525 1525   * as part of the fanout code, but since we need to handle icmp_inbound
1526 1526   * it is simpler to have multicast work the same as IPv4 broadcast.
1527 1527   *
1528 1528   * The ip_fanout matching for multicast matches based on ilm independent of
1529 1529   * zoneid since the zoneid restriction is applied when joining a multicast
1530 1530   * group.
1531 1531   */
1532 1532  /* ARGSUSED */
1533 1533  static void
1534 1534  ip_input_multicast_v6(ire_t *ire, mblk_t *mp, ip6_t *ip6h, ip_recv_attr_t *ira)
1535 1535  {
1536 1536          ill_t           *ill = ira->ira_ill;
1537 1537          iaflags_t       iraflags = ira->ira_flags;
1538 1538          ip_stack_t      *ipst = ill->ill_ipst;
1539 1539          netstack_t      *ns = ipst->ips_netstack;
1540 1540          zoneid_t        zoneid;
1541 1541          mblk_t          *mp1;
1542 1542          ip6_t           *ip6h1;
1543 1543          uint_t          ira_pktlen = ira->ira_pktlen;
1544 1544          uint16_t        ira_ip_hdr_length = ira->ira_ip_hdr_length;
1545 1545  
1546 1546          /* ire_recv_multicast has switched to the upper ill for IPMP */
1547 1547          ASSERT(!IS_UNDER_IPMP(ill));
1548 1548  
1549 1549          /*
1550 1550           * If we don't have more than one shared-IP zone, or if
1551 1551           * there are no members in anything but the global zone,
1552 1552           * then just set the zoneid and proceed.
1553 1553           */
1554 1554          if (ns->netstack_numzones == 1 ||
1555 1555              !ill_hasmembers_otherzones_v6(ill, &ip6h->ip6_dst,
1556 1556              GLOBAL_ZONEID)) {
1557 1557                  ira->ira_zoneid = GLOBAL_ZONEID;
1558 1558  
1559 1559                  /* If sender didn't want this zone to receive it, drop */
1560 1560                  if ((iraflags & IRAF_NO_LOOP_ZONEID_SET) &&
1561 1561                      ira->ira_no_loop_zoneid == ira->ira_zoneid) {
1562 1562                          ip_drop_input("Multicast but wrong zoneid", mp, ill);
1563 1563                          freemsg(mp);
1564 1564                          return;
1565 1565                  }
1566 1566                  ip_fanout_v6(mp, ip6h, ira);
1567 1567                  return;
1568 1568          }
1569 1569  
1570 1570          /*
1571 1571           * Here we loop over all zoneids that have members in the group
1572 1572           * and deliver a packet to ip_fanout for each zoneid.
1573 1573           *
1574 1574           * First find any members in the lowest numeric zoneid by looking for
1575 1575           * first zoneid larger than -1 (ALL_ZONES).
1576 1576           * We terminate the loop when we receive -1 (ALL_ZONES).
1577 1577           */
1578 1578          zoneid = ill_hasmembers_nextzone_v6(ill, &ip6h->ip6_dst, ALL_ZONES);
1579 1579          for (; zoneid != ALL_ZONES;
1580 1580              zoneid = ill_hasmembers_nextzone_v6(ill, &ip6h->ip6_dst, zoneid)) {
1581 1581                  /*
1582 1582                   * Avoid an extra copymsg/freemsg by skipping global zone here
1583 1583                   * and doing that at the end.
1584 1584                   */
1585 1585                  if (zoneid == GLOBAL_ZONEID)
1586 1586                          continue;
1587 1587  
1588 1588                  ira->ira_zoneid = zoneid;
1589 1589  
1590 1590                  /* If sender didn't want this zone to receive it, skip */
1591 1591                  if ((iraflags & IRAF_NO_LOOP_ZONEID_SET) &&
1592 1592                      ira->ira_no_loop_zoneid == ira->ira_zoneid)
1593 1593                          continue;
1594 1594  
1595 1595                  mp1 = copymsg(mp);
1596 1596                  if (mp1 == NULL) {
1597 1597                          /* Failed to deliver to one zone */
1598 1598                          BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
1599 1599                          ip_drop_input("ipIfStatsInDiscards", mp, ill);
1600 1600                          continue;
1601 1601                  }
1602 1602                  ip6h1 = (ip6_t *)mp1->b_rptr;
1603 1603                  ip_fanout_v6(mp1, ip6h1, ira);
1604 1604                  /*
1605 1605                   * IPsec might have modified ira_pktlen and ira_ip_hdr_length
1606 1606                   * so we restore them for a potential next iteration
1607 1607                   */
1608 1608                  ira->ira_pktlen = ira_pktlen;
1609 1609                  ira->ira_ip_hdr_length = ira_ip_hdr_length;
1610 1610          }
1611 1611  
1612 1612          /* Do the main ire */
1613 1613          ira->ira_zoneid = GLOBAL_ZONEID;
1614 1614          /* If sender didn't want this zone to receive it, drop */
1615 1615          if ((iraflags & IRAF_NO_LOOP_ZONEID_SET) &&
1616 1616              ira->ira_no_loop_zoneid == ira->ira_zoneid) {
1617 1617                  ip_drop_input("Multicast but wrong zoneid", mp, ill);
1618 1618                  freemsg(mp);
1619 1619          } else {
1620 1620                  ip_fanout_v6(mp, ip6h, ira);
1621 1621          }
1622 1622  }
1623 1623  
1624 1624  
1625 1625  /*
1626 1626   * Determine the zoneid and IRAF_TX_MAC_EXEMPTABLE if trusted extensions
1627 1627   * is in use. Updates ira_zoneid and ira_flags as a result.
1628 1628   */
1629 1629  static void
1630 1630  ip_fanout_tx_v6(mblk_t *mp, ip6_t *ip6h, uint8_t protocol, uint_t ip_hdr_length,
1631 1631      ip_recv_attr_t *ira)
1632 1632  {
1633 1633          uint16_t        *up;
1634 1634          uint16_t        lport;
1635 1635          zoneid_t        zoneid;
1636 1636  
1637 1637          ASSERT(ira->ira_flags & IRAF_SYSTEM_LABELED);
1638 1638  
1639 1639          /*
1640 1640           * If the packet is unlabeled we might allow read-down
1641 1641           * for MAC_EXEMPT. Below we clear this if it is a multi-level
1642 1642           * port (MLP).
1643 1643           * Note that ira_tsl can be NULL here.
1644 1644           */
1645 1645          if (ira->ira_tsl != NULL && ira->ira_tsl->tsl_flags & TSLF_UNLABELED)
1646 1646                  ira->ira_flags |= IRAF_TX_MAC_EXEMPTABLE;
1647 1647  
1648 1648          if (ira->ira_zoneid != ALL_ZONES)
1649 1649                  return;
1650 1650  
1651 1651          ira->ira_flags |= IRAF_TX_SHARED_ADDR;
1652 1652  
1653 1653          up = (uint16_t *)((uchar_t *)ip6h + ip_hdr_length);
1654 1654          switch (protocol) {
1655 1655          case IPPROTO_TCP:
1656 1656          case IPPROTO_SCTP:
1657 1657          case IPPROTO_UDP:
1658 1658                  /* Caller ensures this */
1659 1659                  ASSERT(((uchar_t *)ip6h) + ip_hdr_length +4 <= mp->b_wptr);
1660 1660  
1661 1661                  /*
1662 1662                   * Only these transports support MLP.
1663 1663                   * We know their destination port numbers is in
1664 1664                   * the same place in the header.
1665 1665                   */
1666 1666                  lport = up[1];
1667 1667  
1668 1668                  /*
1669 1669                   * No need to handle exclusive-stack zones
1670 1670                   * since ALL_ZONES only applies to the shared IP instance.
1671 1671                   */
1672 1672                  zoneid = tsol_mlp_findzone(protocol, lport);
1673 1673                  /*
1674 1674                   * If no shared MLP is found, tsol_mlp_findzone returns
1675 1675                   * ALL_ZONES.  In that case, we assume it's SLP, and
1676 1676                   * search for the zone based on the packet label.
1677 1677                   *
1678 1678                   * If there is such a zone, we prefer to find a
1679 1679                   * connection in it.  Otherwise, we look for a
1680 1680                   * MAC-exempt connection in any zone whose label
1681 1681                   * dominates the default label on the packet.
1682 1682                   */
1683 1683                  if (zoneid == ALL_ZONES)
1684 1684                          zoneid = tsol_attr_to_zoneid(ira);
1685 1685                  else
1686 1686                          ira->ira_flags &= ~IRAF_TX_MAC_EXEMPTABLE;
1687 1687                  break;
1688 1688          default:
1689 1689                  /* Handle shared address for other protocols */
1690 1690                  zoneid = tsol_attr_to_zoneid(ira);
1691 1691                  break;
1692 1692          }
1693 1693          ira->ira_zoneid = zoneid;
1694 1694  }
1695 1695  
1696 1696  /*
1697 1697   * Increment checksum failure statistics
1698 1698   */
1699 1699  static void
1700 1700  ip_input_cksum_err_v6(uint8_t protocol, uint16_t hck_flags, ill_t *ill)
1701 1701  {
1702 1702          ip_stack_t      *ipst = ill->ill_ipst;
1703 1703  
1704 1704          switch (protocol) {
1705 1705          case IPPROTO_TCP:
1706 1706                  BUMP_MIB(ill->ill_ip_mib, tcpIfStatsInErrs);
1707 1707  
1708 1708                  if (hck_flags & HCK_FULLCKSUM)
1709 1709                          IP6_STAT(ipst, ip6_tcp_in_full_hw_cksum_err);
1710 1710                  else if (hck_flags & HCK_PARTIALCKSUM)
1711 1711                          IP6_STAT(ipst, ip6_tcp_in_part_hw_cksum_err);
1712 1712                  else
1713 1713                          IP6_STAT(ipst, ip6_tcp_in_sw_cksum_err);
1714 1714                  break;
1715 1715          case IPPROTO_UDP:
1716 1716                  BUMP_MIB(ill->ill_ip_mib, udpIfStatsInCksumErrs);
1717 1717                  if (hck_flags & HCK_FULLCKSUM)
1718 1718                          IP6_STAT(ipst, ip6_udp_in_full_hw_cksum_err);
1719 1719                  else if (hck_flags & HCK_PARTIALCKSUM)
1720 1720                          IP6_STAT(ipst, ip6_udp_in_part_hw_cksum_err);
1721 1721                  else
1722 1722                          IP6_STAT(ipst, ip6_udp_in_sw_cksum_err);
1723 1723                  break;
1724 1724          case IPPROTO_ICMPV6:
1725 1725                  BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInMsgs);
1726 1726                  BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors);
1727 1727                  break;
1728 1728          default:
1729 1729                  ASSERT(0);
1730 1730                  break;
1731 1731          }
1732 1732  }
1733 1733  
1734 1734  /* Calculate the IPv6 pseudo-header checksum for TCP, UDP, and ICMPV6 */
1735 1735  uint32_t
1736 1736  ip_input_cksum_pseudo_v6(ip6_t *ip6h, ip_recv_attr_t *ira)
1737 1737  {
1738 1738          uint_t          ulp_len;
1739 1739          uint32_t        cksum;
1740 1740          uint8_t         protocol = ira->ira_protocol;
1741 1741          uint16_t        ip_hdr_length = ira->ira_ip_hdr_length;
1742 1742  
1743 1743  #define iphs    ((uint16_t *)ip6h)
1744 1744  
1745 1745          switch (protocol) {
1746 1746          case IPPROTO_TCP:
1747 1747                  ulp_len = ira->ira_pktlen - ip_hdr_length;
1748 1748  
1749 1749                  /* Protocol and length */
1750 1750                  cksum = htons(ulp_len) + IP_TCP_CSUM_COMP;
1751 1751                  /* IP addresses */
1752 1752                  cksum += iphs[4] + iphs[5] + iphs[6] + iphs[7] +
1753 1753                      iphs[8] + iphs[9] + iphs[10] + iphs[11] +
1754 1754                      iphs[12] + iphs[13] + iphs[14] + iphs[15] +
1755 1755                      iphs[16] + iphs[17] + iphs[18] + iphs[19];
1756 1756                  break;
1757 1757  
1758 1758          case IPPROTO_UDP: {
1759 1759                  udpha_t         *udpha;
1760 1760  
1761 1761                  udpha = (udpha_t  *)((uchar_t *)ip6h + ip_hdr_length);
1762 1762  
1763 1763                  /* Protocol and length */
1764 1764                  cksum = udpha->uha_length + IP_UDP_CSUM_COMP;
1765 1765                  /* IP addresses */
1766 1766                  cksum += iphs[4] + iphs[5] + iphs[6] + iphs[7] +
1767 1767                      iphs[8] + iphs[9] + iphs[10] + iphs[11] +
1768 1768                      iphs[12] + iphs[13] + iphs[14] + iphs[15] +
1769 1769                      iphs[16] + iphs[17] + iphs[18] + iphs[19];
1770 1770                  break;
1771 1771          }
1772 1772          case IPPROTO_ICMPV6:
1773 1773                  ulp_len = ira->ira_pktlen - ip_hdr_length;
1774 1774  
1775 1775                  /* Protocol and length */
1776 1776                  cksum = htons(ulp_len) + IP_ICMPV6_CSUM_COMP;
1777 1777                  /* IP addresses */
1778 1778                  cksum += iphs[4] + iphs[5] + iphs[6] + iphs[7] +
1779 1779                      iphs[8] + iphs[9] + iphs[10] + iphs[11] +
1780 1780                      iphs[12] + iphs[13] + iphs[14] + iphs[15] +
1781 1781                      iphs[16] + iphs[17] + iphs[18] + iphs[19];
1782 1782                  break;
1783 1783          default:
1784 1784                  cksum = 0;
1785 1785                  break;
1786 1786          }
1787 1787  #undef  iphs
1788 1788          return (cksum);
1789 1789  }
1790 1790  
1791 1791  
1792 1792  /*
1793 1793   * Software verification of the ULP checksums.
1794 1794   * Returns B_TRUE if ok.
1795 1795   * Increments statistics of failed.
1796 1796   */
1797 1797  static boolean_t
1798 1798  ip_input_sw_cksum_v6(mblk_t *mp, ip6_t *ip6h, ip_recv_attr_t *ira)
1799 1799  {
1800 1800          ip_stack_t      *ipst = ira->ira_ill->ill_ipst;
1801 1801          uint32_t        cksum;
1802 1802          uint8_t         protocol = ira->ira_protocol;
1803 1803          uint16_t        ip_hdr_length = ira->ira_ip_hdr_length;
1804 1804  
1805 1805          IP6_STAT(ipst, ip6_in_sw_cksum);
1806 1806  
1807 1807          ASSERT(protocol == IPPROTO_TCP || protocol == IPPROTO_UDP ||
1808 1808              protocol == IPPROTO_ICMPV6);
1809 1809  
1810 1810          cksum = ip_input_cksum_pseudo_v6(ip6h, ira);
1811 1811          cksum = IP_CSUM(mp, ip_hdr_length, cksum);
1812 1812          if (cksum == 0)
1813 1813                  return (B_TRUE);
1814 1814  
1815 1815          ip_input_cksum_err_v6(protocol, 0, ira->ira_ill);
1816 1816          return (B_FALSE);
1817 1817  }
1818 1818  
1819 1819  /*
1820 1820   * Verify the ULP checksums.
1821 1821   * Returns B_TRUE if ok, or if the ULP doesn't have a well-defined checksum
1822 1822   * algorithm.
1823 1823   * Increments statistics if failed.
1824 1824   */
1825 1825  static boolean_t
1826 1826  ip_input_cksum_v6(iaflags_t iraflags, mblk_t *mp, ip6_t *ip6h,
1827 1827      ip_recv_attr_t *ira)
1828 1828  {
1829 1829          ill_t           *ill = ira->ira_rill;
1830 1830          uint16_t        hck_flags;
1831 1831          uint32_t        cksum;
1832 1832          mblk_t          *mp1;
1833 1833          uint_t          len;
1834 1834          uint8_t         protocol = ira->ira_protocol;
1835 1835          uint16_t        ip_hdr_length = ira->ira_ip_hdr_length;
1836 1836  
1837 1837  
1838 1838          switch (protocol) {
1839 1839          case IPPROTO_TCP:
1840 1840          case IPPROTO_ICMPV6:
1841 1841                  break;
1842 1842  
1843 1843          case IPPROTO_UDP: {
1844 1844                  udpha_t         *udpha;
1845 1845  
1846 1846                  udpha = (udpha_t  *)((uchar_t *)ip6h + ip_hdr_length);
1847 1847                  /*
1848 1848                   *  Before going through the regular checksum
1849 1849                   *  calculation, make sure the received checksum
1850 1850                   *  is non-zero. RFC 2460 says, a 0x0000 checksum
1851 1851                   *  in a UDP packet (within IPv6 packet) is invalid
1852 1852                   *  and should be replaced by 0xffff. This makes
1853 1853                   *  sense as regular checksum calculation will
1854 1854                   *  pass for both the cases i.e. 0x0000 and 0xffff.
1855 1855                   *  Removing one of the case makes error detection
1856 1856                   *  stronger.
1857 1857                   */
1858 1858                  if (udpha->uha_checksum == 0) {
1859 1859                          /* 0x0000 checksum is invalid */
1860 1860                          BUMP_MIB(ill->ill_ip_mib, udpIfStatsInCksumErrs);
1861 1861                          return (B_FALSE);
1862 1862                  }
1863 1863                  break;
1864 1864          }
1865 1865          case IPPROTO_SCTP: {
1866 1866                  sctp_hdr_t      *sctph;
1867 1867                  uint32_t        pktsum;
1868 1868  
1869 1869                  sctph = (sctp_hdr_t *)((uchar_t *)ip6h + ip_hdr_length);
1870 1870  #ifdef  DEBUG
1871 1871                  if (skip_sctp_cksum)
1872 1872                          return (B_TRUE);
1873 1873  #endif
1874 1874                  pktsum = sctph->sh_chksum;
1875 1875                  sctph->sh_chksum = 0;
1876 1876                  cksum = sctp_cksum(mp, ip_hdr_length);
1877 1877                  sctph->sh_chksum = pktsum;
1878 1878                  if (cksum == pktsum)
1879 1879                          return (B_TRUE);
1880 1880  
1881 1881                  /*
1882 1882                   * Defer until later whether a bad checksum is ok
1883 1883                   * in order to allow RAW sockets to use Adler checksum
1884 1884                   * with SCTP.
1885 1885                   */
1886 1886                  ira->ira_flags |= IRAF_SCTP_CSUM_ERR;
1887 1887                  return (B_TRUE);
1888 1888          }
1889 1889  
1890 1890          default:
1891 1891                  /* No ULP checksum to verify. */
1892 1892                  return (B_TRUE);
1893 1893          }
1894 1894  
1895 1895          /*
1896 1896           * Revert to software checksum calculation if the interface
1897 1897           * isn't capable of checksum offload.
1898 1898           * We clear DB_CKSUMFLAGS when going through IPsec in ip_fanout.
1899 1899           * Note: IRAF_NO_HW_CKSUM is not currently used.
1900 1900           */
1901 1901          ASSERT(!IS_IPMP(ill));
1902 1902          if ((iraflags & IRAF_NO_HW_CKSUM) || !ILL_HCKSUM_CAPABLE(ill) ||
1903 1903              !dohwcksum) {
1904 1904                  return (ip_input_sw_cksum_v6(mp, ip6h, ira));
1905 1905          }
1906 1906  
1907 1907          /*
1908 1908           * We apply this for all ULP protocols. Does the HW know to
1909 1909           * not set the flags for SCTP and other protocols.
1910 1910           */
1911 1911  
1912 1912          hck_flags = DB_CKSUMFLAGS(mp);
1913 1913  
1914 1914          if (hck_flags & HCK_FULLCKSUM_OK) {
1915 1915                  /*
1916 1916                   * Hardware has already verified the checksum.
1917 1917                   */
1918 1918                  return (B_TRUE);
1919 1919          }
1920 1920  
1921 1921          if (hck_flags & HCK_FULLCKSUM) {
1922 1922                  /*
1923 1923                   * Full checksum has been computed by the hardware
1924 1924                   * and has been attached.  If the driver wants us to
1925 1925                   * verify the correctness of the attached value, in
1926 1926                   * order to protect against faulty hardware, compare
1927 1927                   * it against -0 (0xFFFF) to see if it's valid.
1928 1928                   */
1929 1929                  cksum = DB_CKSUM16(mp);
1930 1930                  if (cksum == 0xFFFF)
1931 1931                          return (B_TRUE);
1932 1932                  ip_input_cksum_err_v6(protocol, hck_flags, ira->ira_ill);
1933 1933                  return (B_FALSE);
1934 1934          }
1935 1935  
1936 1936          mp1 = mp->b_cont;
1937 1937          if ((hck_flags & HCK_PARTIALCKSUM) &&
1938 1938              (mp1 == NULL || mp1->b_cont == NULL) &&
1939 1939              ip_hdr_length >= DB_CKSUMSTART(mp) &&
1940 1940              ((len = ip_hdr_length - DB_CKSUMSTART(mp)) & 1) == 0) {
1941 1941                  uint32_t        adj;
1942 1942                  uchar_t         *cksum_start;
1943 1943  
1944 1944                  cksum = ip_input_cksum_pseudo_v6(ip6h, ira);
1945 1945  
1946 1946                  cksum_start = ((uchar_t *)ip6h + DB_CKSUMSTART(mp));
1947 1947  
1948 1948                  /*
1949 1949                   * Partial checksum has been calculated by hardware
1950 1950                   * and attached to the packet; in addition, any
1951 1951                   * prepended extraneous data is even byte aligned,
1952 1952                   * and there are at most two mblks associated with
1953 1953                   * the packet.  If any such data exists, we adjust
1954 1954                   * the checksum; also take care any postpended data.
1955 1955                   */
1956 1956                  IP_ADJCKSUM_PARTIAL(cksum_start, mp, mp1, len, adj);
1957 1957                  /*
1958 1958                   * One's complement subtract extraneous checksum
1959 1959                   */
1960 1960                  cksum += DB_CKSUM16(mp);
1961 1961                  if (adj >= cksum)
1962 1962                          cksum = ~(adj - cksum) & 0xFFFF;
1963 1963                  else
1964 1964                          cksum -= adj;
1965 1965                  cksum = (cksum & 0xFFFF) + ((int)cksum >> 16);
1966 1966                  cksum = (cksum & 0xFFFF) + ((int)cksum >> 16);
1967 1967                  if (!(~cksum & 0xFFFF))
1968 1968                          return (B_TRUE);
1969 1969  
1970 1970                  ip_input_cksum_err_v6(protocol, hck_flags, ira->ira_ill);
1971 1971                  return (B_FALSE);
1972 1972          }
1973 1973          return (ip_input_sw_cksum_v6(mp, ip6h, ira));
1974 1974  }
1975 1975  
1976 1976  
1977 1977  /*
1978 1978   * Handle fanout of received packets.
1979 1979   * Unicast packets that are looped back (from ire_send_local_v6) and packets
1980 1980   * from the wire are differentiated by checking IRAF_VERIFY_ULP_CKSUM.
1981 1981   *
1982 1982   * IPQoS Notes
1983 1983   * Before sending it to the client, invoke IPPF processing. Policy processing
1984 1984   * takes place only if the callout_position, IPP_LOCAL_IN, is enabled.
1985 1985   */
1986 1986  void
1987 1987  ip_fanout_v6(mblk_t *mp, ip6_t *ip6h, ip_recv_attr_t *ira)
1988 1988  {
1989 1989          ill_t           *ill = ira->ira_ill;
1990 1990          iaflags_t       iraflags = ira->ira_flags;
1991 1991          ip_stack_t      *ipst = ill->ill_ipst;
1992 1992          uint8_t         protocol;
1993 1993          conn_t          *connp;
1994 1994  #define rptr    ((uchar_t *)ip6h)
1995 1995          uint_t          ip_hdr_length;
1996 1996          uint_t          min_ulp_header_length;
1997 1997          int             offset;
1998 1998          ssize_t         len;
1999 1999          netstack_t      *ns = ipst->ips_netstack;
2000 2000          ipsec_stack_t   *ipss = ns->netstack_ipsec;
2001 2001          ill_t           *rill = ira->ira_rill;
2002 2002  
2003 2003          ASSERT(ira->ira_pktlen == ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN);
2004 2004  
2005 2005          /*
2006 2006           * We repeat this as we parse over destination options header and
2007 2007           * fragment headers (earlier we've handled any hop-by-hop options
2008 2008           * header.)
2009 2009           * We update ira_protocol and ira_ip_hdr_length as we skip past
2010 2010           * the intermediate headers; they already point past any
2011 2011           * hop-by-hop header.
2012 2012           */
2013 2013  repeat:
  
    | 
      ↓ open down ↓ | 
    2013 lines elided | 
    
      ↑ open up ↑ | 
  
2014 2014          protocol = ira->ira_protocol;
2015 2015          ip_hdr_length = ira->ira_ip_hdr_length;
2016 2016  
2017 2017          /*
2018 2018           * Time for IPP once we've done reassembly and IPsec.
2019 2019           * We skip this for loopback packets since we don't do IPQoS
2020 2020           * on loopback.
2021 2021           */
2022 2022          if (IPP_ENABLED(IPP_LOCAL_IN, ipst) &&
2023 2023              !(iraflags & IRAF_LOOPBACK) &&
2024      -            (protocol != IPPROTO_ESP || protocol != IPPROTO_AH ||
2025      -            protocol != IPPROTO_DSTOPTS || protocol != IPPROTO_ROUTING ||
     2024 +            (protocol != IPPROTO_ESP && protocol != IPPROTO_AH &&
     2025 +            protocol != IPPROTO_DSTOPTS && protocol != IPPROTO_ROUTING &&
2026 2026              protocol != IPPROTO_FRAGMENT)) {
2027 2027                  /*
2028 2028                   * Use the interface on which the packet arrived - not where
2029 2029                   * the IP address is hosted.
2030 2030                   */
2031 2031                  /* ip_process translates an IS_UNDER_IPMP */
2032 2032                  mp = ip_process(IPP_LOCAL_IN, mp, rill, ill);
2033 2033                  if (mp == NULL) {
2034 2034                          /* ip_drop_packet and MIB done */
2035 2035                          return;
2036 2036                  }
2037 2037          }
2038 2038  
2039 2039          /* Determine the minimum required size of the upper-layer header */
2040 2040          /* Need to do this for at least the set of ULPs that TX handles. */
2041 2041          switch (protocol) {
2042 2042          case IPPROTO_TCP:
2043 2043                  min_ulp_header_length = TCP_MIN_HEADER_LENGTH;
2044 2044                  break;
2045 2045          case IPPROTO_SCTP:
2046 2046                  min_ulp_header_length = SCTP_COMMON_HDR_LENGTH;
2047 2047                  break;
2048 2048          case IPPROTO_UDP:
2049 2049                  min_ulp_header_length = UDPH_SIZE;
2050 2050                  break;
2051 2051          case IPPROTO_ICMP:
2052 2052          case IPPROTO_ICMPV6:
2053 2053                  min_ulp_header_length = ICMPH_SIZE;
2054 2054                  break;
2055 2055          case IPPROTO_FRAGMENT:
2056 2056          case IPPROTO_DSTOPTS:
2057 2057          case IPPROTO_ROUTING:
2058 2058                  min_ulp_header_length = MIN_EHDR_LEN;
2059 2059                  break;
2060 2060          default:
2061 2061                  min_ulp_header_length = 0;
2062 2062                  break;
2063 2063          }
2064 2064          /* Make sure we have the min ULP header length */
2065 2065          len = mp->b_wptr - rptr;
2066 2066          if (len < ip_hdr_length + min_ulp_header_length) {
2067 2067                  if (ira->ira_pktlen < ip_hdr_length + min_ulp_header_length)
2068 2068                          goto pkt_too_short;
2069 2069  
2070 2070                  IP6_STAT(ipst, ip6_recv_pullup);
2071 2071                  ip6h = ip_pullup(mp, ip_hdr_length + min_ulp_header_length,
2072 2072                      ira);
2073 2073                  if (ip6h == NULL)
2074 2074                          goto discard;
2075 2075                  len = mp->b_wptr - rptr;
2076 2076          }
2077 2077  
2078 2078          /*
2079 2079           * If trusted extensions then determine the zoneid and TX specific
2080 2080           * ira_flags.
2081 2081           */
2082 2082          if (iraflags & IRAF_SYSTEM_LABELED) {
2083 2083                  /* This can update ira->ira_flags and ira->ira_zoneid */
2084 2084                  ip_fanout_tx_v6(mp, ip6h, protocol, ip_hdr_length, ira);
2085 2085                  iraflags = ira->ira_flags;
2086 2086          }
2087 2087  
2088 2088  
2089 2089          /* Verify ULP checksum. Handles TCP, UDP, and SCTP */
2090 2090          if (iraflags & IRAF_VERIFY_ULP_CKSUM) {
2091 2091                  if (!ip_input_cksum_v6(iraflags, mp, ip6h, ira)) {
2092 2092                          /* Bad checksum. Stats are already incremented */
2093 2093                          ip_drop_input("Bad ULP checksum", mp, ill);
2094 2094                          freemsg(mp);
2095 2095                          return;
2096 2096                  }
2097 2097                  /* IRAF_SCTP_CSUM_ERR could have been set */
2098 2098                  iraflags = ira->ira_flags;
2099 2099          }
2100 2100          switch (protocol) {
2101 2101          case IPPROTO_TCP:
2102 2102                  /* For TCP, discard multicast packets. */
2103 2103                  if (iraflags & IRAF_MULTIBROADCAST)
2104 2104                          goto discard;
2105 2105  
2106 2106                  /* First mblk contains IP+TCP headers per above check */
2107 2107                  ASSERT(len >= ip_hdr_length + TCP_MIN_HEADER_LENGTH);
2108 2108  
2109 2109                  /* TCP options present? */
2110 2110                  offset = ((uchar_t *)ip6h)[ip_hdr_length + 12] >> 4;
2111 2111                  if (offset != 5) {
2112 2112                          if (offset < 5)
2113 2113                                  goto discard;
2114 2114  
2115 2115                          /*
2116 2116                           * There must be TCP options.
2117 2117                           * Make sure we can grab them.
2118 2118                           */
2119 2119                          offset <<= 2;
2120 2120                          offset += ip_hdr_length;
2121 2121                          if (len < offset) {
2122 2122                                  if (ira->ira_pktlen < offset)
2123 2123                                          goto pkt_too_short;
2124 2124  
2125 2125                                  IP6_STAT(ipst, ip6_recv_pullup);
2126 2126                                  ip6h = ip_pullup(mp, offset, ira);
2127 2127                                  if (ip6h == NULL)
2128 2128                                          goto discard;
2129 2129                                  len = mp->b_wptr - rptr;
2130 2130                          }
2131 2131                  }
2132 2132  
2133 2133                  /*
2134 2134                   * Pass up a squeue hint to tcp.
2135 2135                   * If ira_sqp is already set (this is loopback) we leave it
2136 2136                   * alone.
2137 2137                   */
2138 2138                  if (ira->ira_sqp == NULL) {
2139 2139                          ira->ira_sqp = ip_squeue_get(ira->ira_ring);
2140 2140                  }
2141 2141  
2142 2142                  /* Look for AF_INET or AF_INET6 that matches */
2143 2143                  connp = ipcl_classify_v6(mp, IPPROTO_TCP, ip_hdr_length,
2144 2144                      ira, ipst);
2145 2145                  if (connp == NULL) {
2146 2146                          /* Send the TH_RST */
2147 2147                          BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers);
2148 2148                          tcp_xmit_listeners_reset(mp, ira, ipst, NULL);
2149 2149                          return;
2150 2150                  }
2151 2151                  if (connp->conn_incoming_ifindex != 0 &&
2152 2152                      connp->conn_incoming_ifindex != ira->ira_ruifindex) {
2153 2153                          CONN_DEC_REF(connp);
2154 2154  
2155 2155                          /* Send the TH_RST */
2156 2156                          BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers);
2157 2157                          tcp_xmit_listeners_reset(mp, ira, ipst, NULL);
2158 2158                          return;
2159 2159                  }
2160 2160                  if (CONN_INBOUND_POLICY_PRESENT_V6(connp, ipss) ||
2161 2161                      (iraflags & IRAF_IPSEC_SECURE)) {
2162 2162                          mp = ipsec_check_inbound_policy(mp, connp,
2163 2163                              NULL, ip6h, ira);
2164 2164                          if (mp == NULL) {
2165 2165                                  BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
2166 2166                                  /* Note that mp is NULL */
2167 2167                                  ip_drop_input("ipIfStatsInDiscards", mp, ill);
2168 2168                                  CONN_DEC_REF(connp);
2169 2169                                  return;
2170 2170                          }
2171 2171                  }
2172 2172                  /* Found a client; up it goes */
2173 2173                  BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers);
2174 2174                  ira->ira_ill = ira->ira_rill = NULL;
2175 2175                  if (!IPCL_IS_TCP(connp)) {
2176 2176                          /* Not TCP; must be SOCK_RAW, IPPROTO_TCP */
2177 2177                          (connp->conn_recv)(connp, mp, NULL, ira);
2178 2178                          CONN_DEC_REF(connp);
2179 2179                          ira->ira_ill = ill;
2180 2180                          ira->ira_rill = rill;
2181 2181                          return;
2182 2182                  }
2183 2183  
2184 2184                  /*
2185 2185                   * We do different processing whether called from
2186 2186                   * ip_accept_tcp and we match the target, don't match
2187 2187                   * the target, and when we are called by ip_input.
2188 2188                   */
2189 2189                  if (iraflags & IRAF_TARGET_SQP) {
2190 2190                          if (ira->ira_target_sqp == connp->conn_sqp) {
2191 2191                                  mblk_t  *attrmp;
2192 2192  
2193 2193                                  attrmp = ip_recv_attr_to_mblk(ira);
2194 2194                                  if (attrmp == NULL) {
2195 2195                                          BUMP_MIB(ill->ill_ip_mib,
2196 2196                                              ipIfStatsInDiscards);
2197 2197                                          ip_drop_input("ipIfStatsInDiscards",
2198 2198                                              mp, ill);
2199 2199                                          freemsg(mp);
2200 2200                                          CONN_DEC_REF(connp);
2201 2201                                  } else {
2202 2202                                          SET_SQUEUE(attrmp, connp->conn_recv,
2203 2203                                              connp);
2204 2204                                          attrmp->b_cont = mp;
2205 2205                                          ASSERT(ira->ira_target_sqp_mp == NULL);
2206 2206                                          ira->ira_target_sqp_mp = attrmp;
2207 2207                                          /*
2208 2208                                           * Conn ref release when drained from
2209 2209                                           * the squeue.
2210 2210                                           */
2211 2211                                  }
2212 2212                          } else {
2213 2213                                  SQUEUE_ENTER_ONE(connp->conn_sqp, mp,
2214 2214                                      connp->conn_recv, connp, ira, SQ_FILL,
2215 2215                                      SQTAG_IP6_TCP_INPUT);
2216 2216                          }
2217 2217                  } else {
2218 2218                          SQUEUE_ENTER_ONE(connp->conn_sqp, mp, connp->conn_recv,
2219 2219                              connp, ira, ip_squeue_flag, SQTAG_IP6_TCP_INPUT);
2220 2220                  }
2221 2221                  ira->ira_ill = ill;
2222 2222                  ira->ira_rill = rill;
2223 2223                  return;
2224 2224  
2225 2225          case IPPROTO_SCTP: {
2226 2226                  sctp_hdr_t      *sctph;
2227 2227                  uint32_t        ports;  /* Source and destination ports */
2228 2228                  sctp_stack_t    *sctps = ipst->ips_netstack->netstack_sctp;
2229 2229  
2230 2230                  /* For SCTP, discard multicast packets. */
2231 2231                  if (iraflags & IRAF_MULTIBROADCAST)
2232 2232                          goto discard;
2233 2233  
2234 2234                  /*
2235 2235                   * Since there is no SCTP h/w cksum support yet, just
2236 2236                   * clear the flag.
2237 2237                   */
2238 2238                  DB_CKSUMFLAGS(mp) = 0;
2239 2239  
2240 2240                  /* Length ensured above */
2241 2241                  ASSERT(MBLKL(mp) >= ip_hdr_length + SCTP_COMMON_HDR_LENGTH);
2242 2242                  sctph = (sctp_hdr_t *)(rptr + ip_hdr_length);
2243 2243  
2244 2244                  /* get the ports */
2245 2245                  ports = *(uint32_t *)&sctph->sh_sport;
2246 2246  
2247 2247                  if (iraflags & IRAF_SCTP_CSUM_ERR) {
2248 2248                          /*
2249 2249                           * No potential sctp checksum errors go to the Sun
2250 2250                           * sctp stack however they might be Adler-32 summed
2251 2251                           * packets a userland stack bound to a raw IP socket
2252 2252                           * could reasonably use. Note though that Adler-32 is
2253 2253                           * a long deprecated algorithm and customer sctp
2254 2254                           * networks should eventually migrate to CRC-32 at
2255 2255                           * which time this facility should be removed.
2256 2256                           */
2257 2257                          ip_fanout_sctp_raw(mp, NULL, ip6h, ports, ira);
2258 2258                          return;
2259 2259                  }
2260 2260                  connp = sctp_fanout(&ip6h->ip6_src, &ip6h->ip6_dst, ports,
2261 2261                      ira, mp, sctps, sctph);
2262 2262                  if (connp == NULL) {
2263 2263                          /* Check for raw socket or OOTB handling */
2264 2264                          ip_fanout_sctp_raw(mp, NULL, ip6h, ports, ira);
2265 2265                          return;
2266 2266                  }
2267 2267                  if (connp->conn_incoming_ifindex != 0 &&
2268 2268                      connp->conn_incoming_ifindex != ira->ira_ruifindex) {
2269 2269                          CONN_DEC_REF(connp);
2270 2270  
2271 2271                          /* Check for raw socket or OOTB handling */
2272 2272                          ip_fanout_sctp_raw(mp, NULL, ip6h, ports, ira);
2273 2273                          return;
2274 2274                  }
2275 2275  
2276 2276                  /* Found a client; up it goes */
2277 2277                  BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers);
2278 2278                  sctp_input(connp, NULL, ip6h, mp, ira);
2279 2279                  /* sctp_input does a rele of the sctp_t */
2280 2280                  return;
2281 2281          }
2282 2282  
2283 2283          case IPPROTO_UDP:
2284 2284                  /* First mblk contains IP+UDP headers as checked above */
2285 2285                  ASSERT(MBLKL(mp) >= ip_hdr_length + UDPH_SIZE);
2286 2286  
2287 2287                  if (iraflags & IRAF_MULTIBROADCAST) {
2288 2288                          uint16_t *up;   /* Pointer to ports in ULP header */
2289 2289  
2290 2290                          up = (uint16_t *)((uchar_t *)ip6h + ip_hdr_length);
2291 2291  
2292 2292                          ip_fanout_udp_multi_v6(mp, ip6h, up[1], up[0], ira);
2293 2293                          return;
2294 2294                  }
2295 2295  
2296 2296                  /* Look for AF_INET or AF_INET6 that matches */
2297 2297                  connp = ipcl_classify_v6(mp, IPPROTO_UDP, ip_hdr_length,
2298 2298                      ira, ipst);
2299 2299                  if (connp == NULL) {
2300 2300          no_udp_match:
2301 2301                          if (ipst->ips_ipcl_proto_fanout_v6[IPPROTO_UDP].
2302 2302                              connf_head != NULL) {
2303 2303                                  ASSERT(ira->ira_protocol == IPPROTO_UDP);
2304 2304                                  ip_fanout_proto_v6(mp, ip6h, ira);
2305 2305                          } else {
2306 2306                                  ip_fanout_send_icmp_v6(mp, ICMP6_DST_UNREACH,
2307 2307                                      ICMP6_DST_UNREACH_NOPORT, ira);
2308 2308                          }
2309 2309                          return;
2310 2310  
2311 2311                  }
2312 2312                  if (connp->conn_incoming_ifindex != 0 &&
2313 2313                      connp->conn_incoming_ifindex != ira->ira_ruifindex) {
2314 2314                          CONN_DEC_REF(connp);
2315 2315                          goto no_udp_match;
2316 2316                  }
2317 2317                  if (IPCL_IS_NONSTR(connp) ? connp->conn_flow_cntrld :
2318 2318                      !canputnext(connp->conn_rq)) {
2319 2319                          CONN_DEC_REF(connp);
2320 2320                          BUMP_MIB(ill->ill_ip_mib, udpIfStatsInOverflows);
2321 2321                          ip_drop_input("udpIfStatsInOverflows", mp, ill);
2322 2322                          freemsg(mp);
2323 2323                          return;
2324 2324                  }
2325 2325                  if (CONN_INBOUND_POLICY_PRESENT_V6(connp, ipss) ||
2326 2326                      (iraflags & IRAF_IPSEC_SECURE)) {
2327 2327                          mp = ipsec_check_inbound_policy(mp, connp,
2328 2328                              NULL, ip6h, ira);
2329 2329                          if (mp == NULL) {
2330 2330                                  BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
2331 2331                                  /* Note that mp is NULL */
2332 2332                                  ip_drop_input("ipIfStatsInDiscards", mp, ill);
2333 2333                                  CONN_DEC_REF(connp);
2334 2334                                  return;
2335 2335                          }
2336 2336                  }
2337 2337  
2338 2338                  /* Found a client; up it goes */
2339 2339                  IP6_STAT(ipst, ip6_udp_fannorm);
2340 2340                  BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers);
2341 2341                  ira->ira_ill = ira->ira_rill = NULL;
2342 2342                  (connp->conn_recv)(connp, mp, NULL, ira);
2343 2343                  CONN_DEC_REF(connp);
2344 2344                  ira->ira_ill = ill;
2345 2345                  ira->ira_rill = rill;
2346 2346                  return;
2347 2347          default:
2348 2348                  break;
2349 2349          }
2350 2350  
2351 2351          /*
2352 2352           * Clear hardware checksumming flag as it is currently only
2353 2353           * used by TCP and UDP.
2354 2354           */
2355 2355          DB_CKSUMFLAGS(mp) = 0;
2356 2356  
2357 2357          switch (protocol) {
2358 2358          case IPPROTO_ICMPV6:
2359 2359                  BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInMsgs);
2360 2360  
2361 2361                  /* Check variable for testing applications */
2362 2362                  if (ipst->ips_ipv6_drop_inbound_icmpv6) {
2363 2363                          ip_drop_input("ipv6_drop_inbound_icmpv6", mp, ill);
2364 2364                          freemsg(mp);
2365 2365                          return;
2366 2366                  }
2367 2367                  /*
2368 2368                   * We need to accomodate icmp messages coming in clear
2369 2369                   * until we get everything secure from the wire. If
2370 2370                   * icmp_accept_clear_messages is zero we check with
2371 2371                   * the global policy and act accordingly. If it is
2372 2372                   * non-zero, we accept the message without any checks.
2373 2373                   * But *this does not mean* that this will be delivered
2374 2374                   * to RAW socket clients. By accepting we might send
2375 2375                   * replies back, change our MTU value etc.,
2376 2376                   * but delivery to the ULP/clients depends on their
2377 2377                   * policy dispositions.
2378 2378                   */
2379 2379                  if (ipst->ips_icmp_accept_clear_messages == 0) {
2380 2380                          mp = ipsec_check_global_policy(mp, NULL,
2381 2381                              NULL, ip6h, ira, ns);
2382 2382                          if (mp == NULL)
2383 2383                                  return;
2384 2384                  }
2385 2385  
2386 2386                  /*
2387 2387                   * On a labeled system, we have to check whether the zone
2388 2388                   * itself is permitted to receive raw traffic.
2389 2389                   */
2390 2390                  if (ira->ira_flags & IRAF_SYSTEM_LABELED) {
2391 2391                          if (!tsol_can_accept_raw(mp, ira, B_FALSE)) {
2392 2392                                  BUMP_MIB(ill->ill_icmp6_mib,
2393 2393                                      ipv6IfIcmpInErrors);
2394 2394                                  ip_drop_input("tsol_can_accept_raw", mp, ill);
2395 2395                                  freemsg(mp);
2396 2396                                  return;
2397 2397                          }
2398 2398                  }
2399 2399  
2400 2400                  BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers);
2401 2401                  mp = icmp_inbound_v6(mp, ira);
2402 2402                  if (mp == NULL) {
2403 2403                          /* No need to pass to RAW sockets */
2404 2404                          return;
2405 2405                  }
2406 2406                  break;
2407 2407  
2408 2408          case IPPROTO_DSTOPTS: {
2409 2409                  ip6_dest_t      *desthdr;
2410 2410                  uint_t          ehdrlen;
2411 2411                  uint8_t         *optptr;
2412 2412  
2413 2413                  /* We already check for MIN_EHDR_LEN above */
2414 2414  
2415 2415                  /* Check if AH is present and needs to be processed. */
2416 2416                  mp = ipsec_early_ah_v6(mp, ira);
2417 2417                  if (mp == NULL)
2418 2418                          return;
2419 2419  
2420 2420                  /*
2421 2421                   * Reinitialize pointers, as ipsec_early_ah_v6() does
2422 2422                   * complete pullups.  We don't have to do more pullups
2423 2423                   * as a result.
2424 2424                   */
2425 2425                  ip6h = (ip6_t *)mp->b_rptr;
2426 2426  
2427 2427                  if (ira->ira_pktlen - ip_hdr_length < MIN_EHDR_LEN)
2428 2428                          goto pkt_too_short;
2429 2429  
2430 2430                  if (mp->b_cont != NULL &&
2431 2431                      rptr + ip_hdr_length + MIN_EHDR_LEN > mp->b_wptr) {
2432 2432                          ip6h = ip_pullup(mp, ip_hdr_length + MIN_EHDR_LEN, ira);
2433 2433                          if (ip6h == NULL)
2434 2434                                  goto discard;
2435 2435                  }
2436 2436                  desthdr = (ip6_dest_t *)(rptr + ip_hdr_length);
2437 2437                  ehdrlen = 8 * (desthdr->ip6d_len + 1);
2438 2438                  if (ira->ira_pktlen - ip_hdr_length < ehdrlen)
2439 2439                          goto pkt_too_short;
2440 2440                  if (mp->b_cont != NULL &&
2441 2441                      rptr + IPV6_HDR_LEN + ehdrlen > mp->b_wptr) {
2442 2442                          ip6h = ip_pullup(mp, IPV6_HDR_LEN + ehdrlen, ira);
2443 2443                          if (ip6h == NULL)
2444 2444                                  goto discard;
2445 2445  
2446 2446                          desthdr = (ip6_dest_t *)(rptr + ip_hdr_length);
2447 2447                  }
2448 2448                  optptr = (uint8_t *)&desthdr[1];
2449 2449  
2450 2450                  /*
2451 2451                   * Update ira_ip_hdr_length to skip the destination header
2452 2452                   * when we repeat.
2453 2453                   */
2454 2454                  ira->ira_ip_hdr_length += ehdrlen;
2455 2455  
2456 2456                  ira->ira_protocol = desthdr->ip6d_nxt;
2457 2457  
2458 2458                  /*
2459 2459                   * Note: XXX This code does not seem to make
2460 2460                   * distinction between Destination Options Header
2461 2461                   * being before/after Routing Header which can
2462 2462                   * happen if we are at the end of source route.
2463 2463                   * This may become significant in future.
2464 2464                   * (No real significant Destination Options are
2465 2465                   * defined/implemented yet ).
2466 2466                   */
2467 2467                  switch (ip_process_options_v6(mp, ip6h, optptr,
2468 2468                      ehdrlen - 2, IPPROTO_DSTOPTS, ira)) {
2469 2469                  case -1:
2470 2470                          /*
2471 2471                           * Packet has been consumed and any needed
2472 2472                           * ICMP errors sent.
2473 2473                           */
2474 2474                          return;
2475 2475                  case 0:
2476 2476                          /* No action needed  continue */
2477 2477                          break;
2478 2478                  case 1:
2479 2479                          /*
2480 2480                           * Unnexpected return value
2481 2481                           * (Router alert is a Hop-by-Hop option)
2482 2482                           */
2483 2483  #ifdef DEBUG
2484 2484                          panic("ip_fanout_v6: router "
2485 2485                              "alert hbh opt indication in dest opt");
2486 2486                          /*NOTREACHED*/
2487 2487  #else
2488 2488                          BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
2489 2489                          ip_drop_input("ipIfStatsInDiscards", mp, ill);
2490 2490                          freemsg(mp);
2491 2491                          return;
2492 2492  #endif
2493 2493                  }
2494 2494                  goto repeat;
2495 2495          }
2496 2496          case IPPROTO_FRAGMENT: {
2497 2497                  ip6_frag_t *fraghdr;
2498 2498  
2499 2499                  if (ira->ira_pktlen - ip_hdr_length < sizeof (ip6_frag_t))
2500 2500                          goto pkt_too_short;
2501 2501  
2502 2502                  if (mp->b_cont != NULL &&
2503 2503                      rptr + ip_hdr_length + sizeof (ip6_frag_t) > mp->b_wptr) {
2504 2504                          ip6h = ip_pullup(mp,
2505 2505                              ip_hdr_length + sizeof (ip6_frag_t), ira);
2506 2506                          if (ip6h == NULL)
2507 2507                                  goto discard;
2508 2508                  }
2509 2509  
2510 2510                  fraghdr = (ip6_frag_t *)(rptr + ip_hdr_length);
2511 2511                  BUMP_MIB(ill->ill_ip_mib, ipIfStatsReasmReqds);
2512 2512  
2513 2513                  /*
2514 2514                   * Invoke the CGTP (multirouting) filtering module to
2515 2515                   * process the incoming packet. Packets identified as
2516 2516                   * duplicates must be discarded. Filtering is active
2517 2517                   * only if the ip_cgtp_filter ndd variable is
2518 2518                   * non-zero.
2519 2519                   */
2520 2520                  if (ipst->ips_ip_cgtp_filter &&
2521 2521                      ipst->ips_ip_cgtp_filter_ops != NULL) {
2522 2522                          int cgtp_flt_pkt;
2523 2523                          netstackid_t stackid;
2524 2524  
2525 2525                          stackid = ipst->ips_netstack->netstack_stackid;
2526 2526  
2527 2527                          /*
2528 2528                           * CGTP and IPMP are mutually exclusive so
2529 2529                           * phyint_ifindex is fine here.
2530 2530                           */
2531 2531                          cgtp_flt_pkt =
2532 2532                              ipst->ips_ip_cgtp_filter_ops->cfo_filter_v6(
2533 2533                              stackid, ill->ill_phyint->phyint_ifindex,
2534 2534                              ip6h, fraghdr);
2535 2535                          if (cgtp_flt_pkt == CGTP_IP_PKT_DUPLICATE) {
2536 2536                                  ip_drop_input("CGTP_IP_PKT_DUPLICATE", mp, ill);
2537 2537                                  freemsg(mp);
2538 2538                                  return;
2539 2539                          }
2540 2540                  }
2541 2541  
2542 2542                  /*
2543 2543                   * Update ip_hdr_length to skip the frag header
2544 2544                   * ip_input_fragment_v6 will determine the extension header
2545 2545                   * prior to the fragment header and update its nexthdr value,
2546 2546                   * and also set ira_protocol to the nexthdr that follows the
2547 2547                   * completed fragment.
2548 2548                   */
2549 2549                  ip_hdr_length += sizeof (ip6_frag_t);
2550 2550  
2551 2551                  /*
2552 2552                   * Make sure we have ira_l2src before we loose the original
2553 2553                   * mblk
2554 2554                   */
2555 2555                  if (!(ira->ira_flags & IRAF_L2SRC_SET))
2556 2556                          ip_setl2src(mp, ira, ira->ira_rill);
2557 2557  
2558 2558                  mp = ip_input_fragment_v6(mp, ip6h, fraghdr,
2559 2559                      ira->ira_pktlen - ip_hdr_length, ira);
2560 2560                  if (mp == NULL) {
2561 2561                          /* Reassembly is still pending */
2562 2562                          return;
2563 2563                  }
2564 2564                  BUMP_MIB(ill->ill_ip_mib, ipIfStatsReasmOKs);
2565 2565  
2566 2566                  /*
2567 2567                   * The mblk chain has the frag header removed and
2568 2568                   * ira_protocol, ira_pktlen, ira_ip_hdr_length as well as the
2569 2569                   * IP header has been updated to refleact the result.
2570 2570                   */
2571 2571                  ip6h = (ip6_t *)mp->b_rptr;
2572 2572                  ip_hdr_length = ira->ira_ip_hdr_length;
2573 2573                  goto repeat;
2574 2574          }
2575 2575          case IPPROTO_HOPOPTS:
2576 2576                  /*
2577 2577                   * Illegal header sequence.
2578 2578                   * (Hop-by-hop headers are processed above
2579 2579                   *  and required to immediately follow IPv6 header)
2580 2580                   */
2581 2581                  ip_drop_input("ICMP_PARAM_PROBLEM", mp, ill);
2582 2582                  icmp_param_problem_nexthdr_v6(mp, B_FALSE, ira);
2583 2583                  return;
2584 2584  
2585 2585          case IPPROTO_ROUTING: {
2586 2586                  uint_t ehdrlen;
2587 2587                  ip6_rthdr_t *rthdr;
2588 2588  
2589 2589                  /* Check if AH is present and needs to be processed. */
2590 2590                  mp = ipsec_early_ah_v6(mp, ira);
2591 2591                  if (mp == NULL)
2592 2592                          return;
2593 2593  
2594 2594                  /*
2595 2595                   * Reinitialize pointers, as ipsec_early_ah_v6() does
2596 2596                   * complete pullups.  We don't have to do more pullups
2597 2597                   * as a result.
2598 2598                   */
2599 2599                  ip6h = (ip6_t *)mp->b_rptr;
2600 2600  
2601 2601                  if (ira->ira_pktlen - ip_hdr_length < MIN_EHDR_LEN)
2602 2602                          goto pkt_too_short;
2603 2603  
2604 2604                  if (mp->b_cont != NULL &&
2605 2605                      rptr + ip_hdr_length + MIN_EHDR_LEN > mp->b_wptr) {
2606 2606                          ip6h = ip_pullup(mp, ip_hdr_length + MIN_EHDR_LEN, ira);
2607 2607                          if (ip6h == NULL)
2608 2608                                  goto discard;
2609 2609                  }
2610 2610                  rthdr = (ip6_rthdr_t *)(rptr + ip_hdr_length);
2611 2611                  protocol = ira->ira_protocol = rthdr->ip6r_nxt;
2612 2612                  ehdrlen = 8 * (rthdr->ip6r_len + 1);
2613 2613                  if (ira->ira_pktlen - ip_hdr_length < ehdrlen)
2614 2614                          goto pkt_too_short;
2615 2615                  if (mp->b_cont != NULL &&
2616 2616                      rptr + IPV6_HDR_LEN + ehdrlen > mp->b_wptr) {
2617 2617                          ip6h = ip_pullup(mp, IPV6_HDR_LEN + ehdrlen, ira);
2618 2618                          if (ip6h == NULL)
2619 2619                                  goto discard;
2620 2620                          rthdr = (ip6_rthdr_t *)(rptr + ip_hdr_length);
2621 2621                  }
2622 2622                  if (rthdr->ip6r_segleft != 0) {
2623 2623                          /* Not end of source route */
2624 2624                          if (ira->ira_flags &
2625 2625                              (IRAF_L2DST_MULTICAST|IRAF_L2DST_BROADCAST)) {
2626 2626                                  BUMP_MIB(ill->ill_ip_mib,
2627 2627                                      ipIfStatsForwProhibits);
2628 2628                                  ip_drop_input("ipIfStatsInForwProhibits",
2629 2629                                      mp, ill);
2630 2630                                  freemsg(mp);
2631 2631                                  return;
2632 2632                          }
2633 2633                          ip_process_rthdr(mp, ip6h, rthdr, ira);
2634 2634                          return;
2635 2635                  }
2636 2636                  ira->ira_ip_hdr_length += ehdrlen;
2637 2637                  goto repeat;
2638 2638          }
2639 2639  
2640 2640          case IPPROTO_AH:
2641 2641          case IPPROTO_ESP: {
2642 2642                  /*
2643 2643                   * Fast path for AH/ESP.
2644 2644                   */
2645 2645                  netstack_t *ns = ipst->ips_netstack;
2646 2646                  ipsec_stack_t *ipss = ns->netstack_ipsec;
2647 2647  
2648 2648                  IP_STAT(ipst, ipsec_proto_ahesp);
2649 2649  
2650 2650                  if (!ipsec_loaded(ipss)) {
2651 2651                          ip_proto_not_sup(mp, ira);
2652 2652                          return;
2653 2653                  }
2654 2654  
2655 2655                  BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers);
2656 2656                  /* select inbound SA and have IPsec process the pkt */
2657 2657                  if (protocol == IPPROTO_ESP) {
2658 2658                          esph_t *esph;
2659 2659  
2660 2660                          mp = ipsec_inbound_esp_sa(mp, ira, &esph);
2661 2661                          if (mp == NULL)
2662 2662                                  return;
2663 2663  
2664 2664                          ASSERT(esph != NULL);
2665 2665                          ASSERT(ira->ira_flags & IRAF_IPSEC_SECURE);
2666 2666                          ASSERT(ira->ira_ipsec_esp_sa != NULL);
2667 2667                          ASSERT(ira->ira_ipsec_esp_sa->ipsa_input_func != NULL);
2668 2668  
2669 2669                          mp = ira->ira_ipsec_esp_sa->ipsa_input_func(mp, esph,
2670 2670                              ira);
2671 2671                  } else {
2672 2672                          ah_t *ah;
2673 2673  
2674 2674                          mp = ipsec_inbound_ah_sa(mp, ira, &ah);
2675 2675                          if (mp == NULL)
2676 2676                                  return;
2677 2677  
2678 2678                          ASSERT(ah != NULL);
2679 2679                          ASSERT(ira->ira_flags & IRAF_IPSEC_SECURE);
2680 2680                          ASSERT(ira->ira_ipsec_ah_sa != NULL);
2681 2681                          ASSERT(ira->ira_ipsec_ah_sa->ipsa_input_func != NULL);
2682 2682                          mp = ira->ira_ipsec_ah_sa->ipsa_input_func(mp, ah,
2683 2683                              ira);
2684 2684                  }
2685 2685  
2686 2686                  if (mp == NULL) {
2687 2687                          /*
2688 2688                           * Either it failed or is pending. In the former case
2689 2689                           * ipIfStatsInDiscards was increased.
2690 2690                           */
2691 2691                          return;
2692 2692                  }
2693 2693                  /* we're done with IPsec processing, send it up */
2694 2694                  ip_input_post_ipsec(mp, ira);
2695 2695                  return;
2696 2696          }
2697 2697          case IPPROTO_NONE:
2698 2698                  /* All processing is done. Count as "delivered". */
2699 2699                  freemsg(mp);
2700 2700                  BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers);
2701 2701                  return;
2702 2702  
2703 2703          case IPPROTO_ENCAP:
2704 2704          case IPPROTO_IPV6:
2705 2705                  /* iptun will verify trusted label */
2706 2706                  connp = ipcl_classify_v6(mp, protocol, ip_hdr_length,
2707 2707                      ira, ipst);
2708 2708                  if (connp != NULL) {
2709 2709                          BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers);
2710 2710                          ira->ira_ill = ira->ira_rill = NULL;
2711 2711                          connp->conn_recv(connp, mp, NULL, ira);
2712 2712                          CONN_DEC_REF(connp);
2713 2713                          ira->ira_ill = ill;
2714 2714                          ira->ira_rill = rill;
2715 2715                          return;
2716 2716                  }
2717 2717                  /* FALLTHRU */
2718 2718          default:
2719 2719                  /*
2720 2720                   * On a labeled system, we have to check whether the zone
2721 2721                   * itself is permitted to receive raw traffic.
2722 2722                   */
2723 2723                  if (ira->ira_flags & IRAF_SYSTEM_LABELED) {
2724 2724                          if (!tsol_can_accept_raw(mp, ira, B_FALSE)) {
2725 2725                                  BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
2726 2726                                  ip_drop_input("ipIfStatsInDiscards", mp, ill);
2727 2727                                  freemsg(mp);
2728 2728                                  return;
2729 2729                          }
2730 2730                  }
2731 2731                  break;
2732 2732          }
2733 2733  
2734 2734          /*
2735 2735           * The above input functions may have returned the pulled up message.
2736 2736           * So ip6h need to be reinitialized.
2737 2737           */
2738 2738          ip6h = (ip6_t *)mp->b_rptr;
2739 2739          ira->ira_protocol = protocol;
2740 2740          if (ipst->ips_ipcl_proto_fanout_v6[protocol].connf_head == NULL) {
2741 2741                  /* No user-level listener for these packets packets */
2742 2742                  ip_proto_not_sup(mp, ira);
2743 2743                  return;
2744 2744          }
2745 2745  
2746 2746          /*
2747 2747           * Handle fanout to raw sockets.  There
2748 2748           * can be more than one stream bound to a particular
2749 2749           * protocol.  When this is the case, each one gets a copy
2750 2750           * of any incoming packets.
2751 2751           */
2752 2752          ASSERT(ira->ira_protocol == protocol);
2753 2753          ip_fanout_proto_v6(mp, ip6h, ira);
2754 2754          return;
2755 2755  
2756 2756  pkt_too_short:
2757 2757          BUMP_MIB(ill->ill_ip_mib, ipIfStatsInTruncatedPkts);
2758 2758          ip_drop_input("ipIfStatsInTruncatedPkts", mp, ill);
2759 2759          freemsg(mp);
2760 2760          return;
2761 2761  
2762 2762  discard:
2763 2763          BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
2764 2764          ip_drop_input("ipIfStatsInDiscards", mp, ill);
2765 2765          freemsg(mp);
2766 2766  #undef rptr
2767 2767  }
  
    | 
      ↓ open down ↓ | 
    732 lines elided | 
    
      ↑ open up ↑ | 
  
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX