Print this page
    
Don't create DCE for bad MTU.
    
      
        | Split | 
	Close | 
      
      | Expand all | 
      | Collapse all | 
    
    
          --- old/usr/src/uts/common/inet/ip/ip6.c
          +++ new/usr/src/uts/common/inet/ip/ip6.c
   1    1  /*
   2    2   * CDDL HEADER START
   3    3   *
   4    4   * The contents of this file are subject to the terms of the
   5    5   * Common Development and Distribution License (the "License").
   6    6   * You may not use this file except in compliance with the License.
   7    7   *
   8    8   * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9    9   * or http://www.opensolaris.org/os/licensing.
  10   10   * See the License for the specific language governing permissions
  11   11   * and limitations under the License.
  12   12   *
  13   13   * When distributing Covered Code, include this CDDL HEADER in each
  14   14   * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15   15   * If applicable, add the following below this CDDL HEADER, with the
  16   16   * fields enclosed by brackets "[]" replaced with your own identifying
  17   17   * information: Portions Copyright [yyyy] [name of copyright owner]
  18   18   *
  19   19   * CDDL HEADER END
  20   20   */
  21   21  /*
  22   22   * Copyright (c) 1991, 2010, Oracle and/or its affiliates. All rights reserved.
  23   23   * Copyright (c) 1990 Mentat Inc.
  24   24   * Copyright 2017 OmniTI Computer Consulting, Inc. All rights reserved.
  25   25   */
  26   26  
  27   27  #include <sys/types.h>
  28   28  #include <sys/stream.h>
  29   29  #include <sys/dlpi.h>
  30   30  #include <sys/stropts.h>
  31   31  #include <sys/sysmacros.h>
  32   32  #include <sys/strsun.h>
  33   33  #include <sys/strlog.h>
  34   34  #include <sys/strsubr.h>
  35   35  #define _SUN_TPI_VERSION        2
  36   36  #include <sys/tihdr.h>
  37   37  #include <sys/ddi.h>
  38   38  #include <sys/sunddi.h>
  39   39  #include <sys/cmn_err.h>
  40   40  #include <sys/debug.h>
  41   41  #include <sys/sdt.h>
  42   42  #include <sys/kobj.h>
  43   43  #include <sys/zone.h>
  44   44  #include <sys/neti.h>
  45   45  #include <sys/hook.h>
  46   46  
  47   47  #include <sys/kmem.h>
  48   48  #include <sys/systm.h>
  49   49  #include <sys/param.h>
  50   50  #include <sys/socket.h>
  51   51  #include <sys/vtrace.h>
  52   52  #include <sys/isa_defs.h>
  53   53  #include <sys/atomic.h>
  54   54  #include <sys/policy.h>
  55   55  #include <sys/mac.h>
  56   56  #include <net/if.h>
  57   57  #include <net/if_types.h>
  58   58  #include <net/route.h>
  59   59  #include <net/if_dl.h>
  60   60  #include <sys/sockio.h>
  61   61  #include <netinet/in.h>
  62   62  #include <netinet/ip6.h>
  63   63  #include <netinet/icmp6.h>
  64   64  #include <netinet/sctp.h>
  65   65  
  66   66  #include <inet/common.h>
  67   67  #include <inet/mi.h>
  68   68  #include <inet/optcom.h>
  69   69  #include <inet/mib2.h>
  70   70  #include <inet/nd.h>
  71   71  #include <inet/arp.h>
  72   72  
  73   73  #include <inet/ip.h>
  74   74  #include <inet/ip_impl.h>
  75   75  #include <inet/ip6.h>
  76   76  #include <inet/ip6_asp.h>
  77   77  #include <inet/tcp.h>
  78   78  #include <inet/tcp_impl.h>
  79   79  #include <inet/udp_impl.h>
  80   80  #include <inet/ipp_common.h>
  81   81  
  82   82  #include <inet/ip_multi.h>
  83   83  #include <inet/ip_if.h>
  84   84  #include <inet/ip_ire.h>
  85   85  #include <inet/ip_rts.h>
  86   86  #include <inet/ip_ndp.h>
  87   87  #include <net/pfkeyv2.h>
  88   88  #include <inet/sadb.h>
  89   89  #include <inet/ipsec_impl.h>
  90   90  #include <inet/iptun/iptun_impl.h>
  91   91  #include <inet/sctp_ip.h>
  92   92  #include <sys/pattr.h>
  93   93  #include <inet/ipclassifier.h>
  94   94  #include <inet/ipsecah.h>
  95   95  #include <inet/rawip_impl.h>
  96   96  #include <inet/rts_impl.h>
  97   97  #include <sys/squeue_impl.h>
  98   98  #include <sys/squeue.h>
  99   99  
 100  100  #include <sys/tsol/label.h>
 101  101  #include <sys/tsol/tnet.h>
 102  102  
 103  103  /* Temporary; for CR 6451644 work-around */
 104  104  #include <sys/ethernet.h>
 105  105  
 106  106  /*
 107  107   * Naming conventions:
 108  108   *      These rules should be judiciously applied
 109  109   *      if there is a need to identify something as IPv6 versus IPv4
 110  110   *      IPv6 funcions will end with _v6 in the ip module.
 111  111   *      IPv6 funcions will end with _ipv6 in the transport modules.
 112  112   *      IPv6 macros:
 113  113   *              Some macros end with _V6; e.g. ILL_FRAG_HASH_V6
 114  114   *              Some macros start with V6_; e.g. V6_OR_V4_INADDR_ANY
 115  115   *              And then there are ..V4_PART_OF_V6.
 116  116   *              The intent is that macros in the ip module end with _V6.
 117  117   *      IPv6 global variables will start with ipv6_
 118  118   *      IPv6 structures will start with ipv6
 119  119   *      IPv6 defined constants should start with IPV6_
 120  120   *              (but then there are NDP_DEFAULT_VERS_PRI_AND_FLOW, etc)
 121  121   */
 122  122  
 123  123  /*
 124  124   * ip6opt_ls is used to enable IPv6 (via /etc/system on TX systems).
 125  125   * We need to do this because we didn't obtain the IP6OPT_LS (0x0a)
 126  126   * from IANA. This mechanism will remain in effect until an official
 127  127   * number is obtained.
 128  128   */
 129  129  uchar_t ip6opt_ls;
 130  130  
 131  131  const in6_addr_t ipv6_all_ones =
 132  132          { 0xffffffffU, 0xffffffffU, 0xffffffffU, 0xffffffffU };
 133  133  const in6_addr_t ipv6_all_zeros = { 0, 0, 0, 0 };
 134  134  
 135  135  #ifdef  _BIG_ENDIAN
 136  136  const in6_addr_t ipv6_unspecified_group = { 0xff000000U, 0, 0, 0 };
 137  137  #else   /* _BIG_ENDIAN */
 138  138  const in6_addr_t ipv6_unspecified_group = { 0x000000ffU, 0, 0, 0 };
 139  139  #endif  /* _BIG_ENDIAN */
 140  140  
 141  141  #ifdef  _BIG_ENDIAN
 142  142  const in6_addr_t ipv6_loopback = { 0, 0, 0, 0x00000001U };
 143  143  #else  /* _BIG_ENDIAN */
 144  144  const in6_addr_t ipv6_loopback = { 0, 0, 0, 0x01000000U };
 145  145  #endif /* _BIG_ENDIAN */
 146  146  
 147  147  #ifdef _BIG_ENDIAN
 148  148  const in6_addr_t ipv6_all_hosts_mcast = { 0xff020000U, 0, 0, 0x00000001U };
 149  149  #else  /* _BIG_ENDIAN */
 150  150  const in6_addr_t ipv6_all_hosts_mcast = { 0x000002ffU, 0, 0, 0x01000000U };
 151  151  #endif /* _BIG_ENDIAN */
 152  152  
 153  153  #ifdef _BIG_ENDIAN
 154  154  const in6_addr_t ipv6_all_rtrs_mcast = { 0xff020000U, 0, 0, 0x00000002U };
 155  155  #else  /* _BIG_ENDIAN */
 156  156  const in6_addr_t ipv6_all_rtrs_mcast = { 0x000002ffU, 0, 0, 0x02000000U };
 157  157  #endif /* _BIG_ENDIAN */
 158  158  
 159  159  #ifdef _BIG_ENDIAN
 160  160  const in6_addr_t ipv6_all_v2rtrs_mcast = { 0xff020000U, 0, 0, 0x00000016U };
 161  161  #else  /* _BIG_ENDIAN */
 162  162  const in6_addr_t ipv6_all_v2rtrs_mcast = { 0x000002ffU, 0, 0, 0x16000000U };
 163  163  #endif /* _BIG_ENDIAN */
 164  164  
 165  165  #ifdef _BIG_ENDIAN
 166  166  const in6_addr_t ipv6_solicited_node_mcast =
 167  167                          { 0xff020000U, 0, 0x00000001U, 0xff000000U };
 168  168  #else  /* _BIG_ENDIAN */
 169  169  const in6_addr_t ipv6_solicited_node_mcast =
 170  170                          { 0x000002ffU, 0, 0x01000000U, 0x000000ffU };
 171  171  #endif /* _BIG_ENDIAN */
 172  172  
 173  173  static boolean_t icmp_inbound_verify_v6(mblk_t *, icmp6_t *, ip_recv_attr_t *);
 174  174  static void     icmp_inbound_too_big_v6(icmp6_t *, ip_recv_attr_t *);
 175  175  static void     icmp_pkt_v6(mblk_t *, void *, size_t, const in6_addr_t *,
 176  176      ip_recv_attr_t *);
 177  177  static void     icmp_redirect_v6(mblk_t *, ip6_t *, nd_redirect_t *,
 178  178      ip_recv_attr_t *);
 179  179  static void     icmp_send_redirect_v6(mblk_t *, in6_addr_t *,
 180  180      in6_addr_t *, ip_recv_attr_t *);
 181  181  static void     icmp_send_reply_v6(mblk_t *, ip6_t *, icmp6_t *,
 182  182      ip_recv_attr_t *);
 183  183  static boolean_t        ip_source_routed_v6(ip6_t *, mblk_t *, ip_stack_t *);
 184  184  
 185  185  /*
 186  186   * icmp_inbound_v6 deals with ICMP messages that are handled by IP.
 187  187   * If the ICMP message is consumed by IP, i.e., it should not be delivered
 188  188   * to any IPPROTO_ICMP raw sockets, then it returns NULL.
 189  189   * Likewise, if the ICMP error is misformed (too short, etc), then it
 190  190   * returns NULL. The caller uses this to determine whether or not to send
 191  191   * to raw sockets.
 192  192   *
 193  193   * All error messages are passed to the matching transport stream.
 194  194   *
 195  195   * See comment for icmp_inbound_v4() on how IPsec is handled.
 196  196   */
 197  197  mblk_t *
 198  198  icmp_inbound_v6(mblk_t *mp, ip_recv_attr_t *ira)
 199  199  {
 200  200          icmp6_t         *icmp6;
 201  201          ip6_t           *ip6h;          /* Outer header */
 202  202          int             ip_hdr_length;  /* Outer header length */
 203  203          boolean_t       interested;
 204  204          ill_t           *ill = ira->ira_ill;
 205  205          ip_stack_t      *ipst = ill->ill_ipst;
 206  206          mblk_t          *mp_ret = NULL;
 207  207  
 208  208          ip6h = (ip6_t *)mp->b_rptr;
 209  209  
 210  210          BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInMsgs);
 211  211  
 212  212          /* Check for Martian packets  */
 213  213          if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_src)) {
 214  214                  BUMP_MIB(ill->ill_ip_mib, ipIfStatsInAddrErrors);
 215  215                  ip_drop_input("ipIfStatsInAddrErrors: mcast src", mp, ill);
 216  216                  freemsg(mp);
 217  217                  return (NULL);
 218  218          }
 219  219  
 220  220          /* Make sure ira_l2src is set for ndp_input */
 221  221          if (!(ira->ira_flags & IRAF_L2SRC_SET))
 222  222                  ip_setl2src(mp, ira, ira->ira_rill);
 223  223  
 224  224          ip_hdr_length = ira->ira_ip_hdr_length;
 225  225          if ((mp->b_wptr - mp->b_rptr) < (ip_hdr_length + ICMP6_MINLEN)) {
 226  226                  if (ira->ira_pktlen < (ip_hdr_length + ICMP6_MINLEN)) {
 227  227                          BUMP_MIB(ill->ill_ip_mib, ipIfStatsInTruncatedPkts);
 228  228                          ip_drop_input("ipIfStatsInTruncatedPkts", mp, ill);
 229  229                          freemsg(mp);
 230  230                          return (NULL);
 231  231                  }
 232  232                  ip6h = ip_pullup(mp, ip_hdr_length + ICMP6_MINLEN, ira);
 233  233                  if (ip6h == NULL) {
 234  234                          BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors);
 235  235                          freemsg(mp);
 236  236                          return (NULL);
 237  237                  }
 238  238          }
 239  239  
 240  240          icmp6 = (icmp6_t *)(&mp->b_rptr[ip_hdr_length]);
 241  241          DTRACE_PROBE2(icmp__inbound__v6, ip6_t *, ip6h, icmp6_t *, icmp6);
 242  242          ip2dbg(("icmp_inbound_v6: type %d code %d\n", icmp6->icmp6_type,
 243  243              icmp6->icmp6_code));
 244  244  
 245  245          /*
 246  246           * We will set "interested" to "true" if we should pass a copy to
 247  247           * the transport i.e., if it is an error message.
 248  248           */
 249  249          interested = !(icmp6->icmp6_type & ICMP6_INFOMSG_MASK);
 250  250  
 251  251          switch (icmp6->icmp6_type) {
 252  252          case ICMP6_DST_UNREACH:
 253  253                  BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInDestUnreachs);
 254  254                  if (icmp6->icmp6_code == ICMP6_DST_UNREACH_ADMIN)
 255  255                          BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInAdminProhibs);
 256  256                  break;
 257  257  
 258  258          case ICMP6_TIME_EXCEEDED:
 259  259                  BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInTimeExcds);
 260  260                  break;
 261  261  
 262  262          case ICMP6_PARAM_PROB:
 263  263                  BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInParmProblems);
 264  264                  break;
 265  265  
 266  266          case ICMP6_PACKET_TOO_BIG:
 267  267                  BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInPktTooBigs);
 268  268                  break;
 269  269  
 270  270          case ICMP6_ECHO_REQUEST:
 271  271                  BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInEchos);
 272  272                  if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst) &&
 273  273                      !ipst->ips_ipv6_resp_echo_mcast)
 274  274                          break;
 275  275  
 276  276                  /*
 277  277                   * We must have exclusive use of the mblk to convert it to
 278  278                   * a response.
 279  279                   * If not, we copy it.
 280  280                   */
 281  281                  if (mp->b_datap->db_ref > 1) {
 282  282                          mblk_t  *mp1;
 283  283  
 284  284                          mp1 = copymsg(mp);
 285  285                          if (mp1 == NULL) {
 286  286                                  BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
 287  287                                  ip_drop_input("ipIfStatsInDiscards - copymsg",
 288  288                                      mp, ill);
 289  289                                  freemsg(mp);
 290  290                                  return (NULL);
 291  291                          }
 292  292                          freemsg(mp);
 293  293                          mp = mp1;
 294  294                          ip6h = (ip6_t *)mp->b_rptr;
 295  295                          icmp6 = (icmp6_t *)(&mp->b_rptr[ip_hdr_length]);
 296  296                  }
 297  297  
 298  298                  icmp6->icmp6_type = ICMP6_ECHO_REPLY;
 299  299                  icmp_send_reply_v6(mp, ip6h, icmp6, ira);
 300  300                  return (NULL);
 301  301  
 302  302          case ICMP6_ECHO_REPLY:
 303  303                  BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInEchoReplies);
 304  304                  break;
 305  305  
 306  306          case ND_ROUTER_SOLICIT:
 307  307                  BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInRouterSolicits);
 308  308                  break;
 309  309  
 310  310          case ND_ROUTER_ADVERT:
 311  311                  BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInRouterAdvertisements);
 312  312                  break;
 313  313  
 314  314          case ND_NEIGHBOR_SOLICIT:
 315  315                  BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInNeighborSolicits);
 316  316                  ndp_input(mp, ira);
 317  317                  return (NULL);
 318  318  
 319  319          case ND_NEIGHBOR_ADVERT:
 320  320                  BUMP_MIB(ill->ill_icmp6_mib,
 321  321                      ipv6IfIcmpInNeighborAdvertisements);
 322  322                  ndp_input(mp, ira);
 323  323                  return (NULL);
 324  324  
 325  325          case ND_REDIRECT:
 326  326                  BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInRedirects);
 327  327  
 328  328                  if (ipst->ips_ipv6_ignore_redirect)
 329  329                          break;
 330  330  
 331  331                  /* We now allow a RAW socket to receive this. */
 332  332                  interested = B_TRUE;
 333  333                  break;
 334  334  
 335  335          /*
 336  336           * The next three icmp messages will be handled by MLD.
 337  337           * Pass all valid MLD packets up to any process(es)
 338  338           * listening on a raw ICMP socket.
 339  339           */
 340  340          case MLD_LISTENER_QUERY:
 341  341          case MLD_LISTENER_REPORT:
 342  342          case MLD_LISTENER_REDUCTION:
 343  343                  mp = mld_input(mp, ira);
 344  344                  return (mp);
 345  345          default:
 346  346                  break;
 347  347          }
 348  348          /*
 349  349           * See if there is an ICMP client to avoid an extra copymsg/freemsg
 350  350           * if there isn't one.
 351  351           */
 352  352          if (ipst->ips_ipcl_proto_fanout_v6[IPPROTO_ICMPV6].connf_head != NULL) {
 353  353                  /* If there is an ICMP client and we want one too, copy it. */
 354  354  
 355  355                  if (!interested) {
 356  356                          /* Caller will deliver to RAW sockets */
 357  357                          return (mp);
 358  358                  }
 359  359                  mp_ret = copymsg(mp);
 360  360                  if (mp_ret == NULL) {
 361  361                          BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
 362  362                          ip_drop_input("ipIfStatsInDiscards - copymsg", mp, ill);
 363  363                  }
 364  364          } else if (!interested) {
 365  365                  /* Neither we nor raw sockets are interested. Drop packet now */
 366  366                  freemsg(mp);
 367  367                  return (NULL);
 368  368          }
 369  369  
 370  370          /*
 371  371           * ICMP error or redirect packet. Make sure we have enough of
 372  372           * the header and that db_ref == 1 since we might end up modifying
 373  373           * the packet.
 374  374           */
 375  375          if (mp->b_cont != NULL) {
 376  376                  if (ip_pullup(mp, -1, ira) == NULL) {
 377  377                          BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
 378  378                          ip_drop_input("ipIfStatsInDiscards - ip_pullup",
 379  379                              mp, ill);
 380  380                          freemsg(mp);
 381  381                          return (mp_ret);
 382  382                  }
 383  383          }
 384  384  
 385  385          if (mp->b_datap->db_ref > 1) {
 386  386                  mblk_t  *mp1;
 387  387  
 388  388                  mp1 = copymsg(mp);
 389  389                  if (mp1 == NULL) {
 390  390                          BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
 391  391                          ip_drop_input("ipIfStatsInDiscards - copymsg", mp, ill);
 392  392                          freemsg(mp);
 393  393                          return (mp_ret);
 394  394                  }
 395  395                  freemsg(mp);
 396  396                  mp = mp1;
 397  397          }
 398  398  
 399  399          /*
 400  400           * In case mp has changed, verify the message before any further
 401  401           * processes.
 402  402           */
 403  403          ip6h = (ip6_t *)mp->b_rptr;
 404  404          icmp6 = (icmp6_t *)(&mp->b_rptr[ip_hdr_length]);
 405  405          if (!icmp_inbound_verify_v6(mp, icmp6, ira)) {
 406  406                  freemsg(mp);
 407  407                  return (mp_ret);
 408  408          }
 409  409  
 410  410          switch (icmp6->icmp6_type) {
 411  411          case ND_REDIRECT:
 412  412                  icmp_redirect_v6(mp, ip6h, (nd_redirect_t *)icmp6, ira);
 413  413                  break;
 414  414          case ICMP6_PACKET_TOO_BIG:
 415  415                  /* Update DCE and adjust MTU is icmp header if needed */
 416  416                  icmp_inbound_too_big_v6(icmp6, ira);
 417  417                  /* FALLTHRU */
 418  418          default:
 419  419                  icmp_inbound_error_fanout_v6(mp, icmp6, ira);
 420  420                  break;
 421  421          }
 422  422  
 423  423          return (mp_ret);
 424  424  }
 425  425  
 426  426  /*
 427  427   * Send an ICMP echo reply.
 428  428   * The caller has already updated the payload part of the packet.
 429  429   * We handle the ICMP checksum, IP source address selection and feed
 430  430   * the packet into ip_output_simple.
 431  431   */
 432  432  static void
 433  433  icmp_send_reply_v6(mblk_t *mp, ip6_t *ip6h, icmp6_t *icmp6,
 434  434      ip_recv_attr_t *ira)
 435  435  {
 436  436          uint_t          ip_hdr_length = ira->ira_ip_hdr_length;
 437  437          ill_t           *ill = ira->ira_ill;
 438  438          ip_stack_t      *ipst = ill->ill_ipst;
 439  439          ip_xmit_attr_t  ixas;
 440  440          in6_addr_t      origsrc;
 441  441  
 442  442          /*
 443  443           * Remove any extension headers (do not reverse a source route)
 444  444           * and clear the flow id (keep traffic class for now).
 445  445           */
 446  446          if (ip_hdr_length != IPV6_HDR_LEN) {
 447  447                  int     i;
 448  448  
 449  449                  for (i = 0; i < IPV6_HDR_LEN; i++) {
 450  450                          mp->b_rptr[ip_hdr_length - i - 1] =
 451  451                              mp->b_rptr[IPV6_HDR_LEN - i - 1];
 452  452                  }
 453  453                  mp->b_rptr += (ip_hdr_length - IPV6_HDR_LEN);
 454  454                  ip6h = (ip6_t *)mp->b_rptr;
 455  455                  ip6h->ip6_nxt = IPPROTO_ICMPV6;
 456  456                  i = ntohs(ip6h->ip6_plen);
 457  457                  i -= (ip_hdr_length - IPV6_HDR_LEN);
 458  458                  ip6h->ip6_plen = htons(i);
 459  459                  ip_hdr_length = IPV6_HDR_LEN;
 460  460                  ASSERT(ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN == msgdsize(mp));
 461  461          }
 462  462          ip6h->ip6_vcf &= ~IPV6_FLOWINFO_FLOWLABEL;
 463  463  
 464  464          /* Reverse the source and destination addresses. */
 465  465          origsrc = ip6h->ip6_src;
 466  466          ip6h->ip6_src = ip6h->ip6_dst;
 467  467          ip6h->ip6_dst = origsrc;
 468  468  
 469  469          /* set the hop limit */
 470  470          ip6h->ip6_hops = ipst->ips_ipv6_def_hops;
 471  471  
 472  472          /*
 473  473           * Prepare for checksum by putting icmp length in the icmp
 474  474           * checksum field. The checksum is calculated in ip_output
 475  475           */
 476  476          icmp6->icmp6_cksum = ip6h->ip6_plen;
 477  477  
 478  478          bzero(&ixas, sizeof (ixas));
 479  479          ixas.ixa_flags = IXAF_BASIC_SIMPLE_V6;
 480  480          ixas.ixa_zoneid = ira->ira_zoneid;
 481  481          ixas.ixa_cred = kcred;
 482  482          ixas.ixa_cpid = NOPID;
 483  483          ixas.ixa_tsl = ira->ira_tsl;    /* Behave as a multi-level responder */
 484  484          ixas.ixa_ifindex = 0;
 485  485          ixas.ixa_ipst = ipst;
 486  486          ixas.ixa_multicast_ttl = IP_DEFAULT_MULTICAST_TTL;
 487  487  
 488  488          if (!(ira->ira_flags & IRAF_IPSEC_SECURE)) {
 489  489                  /*
 490  490                   * This packet should go out the same way as it
 491  491                   * came in i.e in clear, independent of the IPsec
 492  492                   * policy for transmitting packets.
 493  493                   */
 494  494                  ixas.ixa_flags |= IXAF_NO_IPSEC;
 495  495          } else {
 496  496                  if (!ipsec_in_to_out(ira, &ixas, mp, NULL, ip6h)) {
 497  497                          BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
 498  498                          /* Note: mp already consumed and ip_drop_packet done */
 499  499                          return;
 500  500                  }
 501  501          }
 502  502  
 503  503          /* Was the destination (now source) link-local? Send out same group */
 504  504          if (IN6_IS_ADDR_LINKSCOPE(&ip6h->ip6_src)) {
 505  505                  ixas.ixa_flags |= IXAF_SCOPEID_SET;
 506  506                  if (IS_UNDER_IPMP(ill))
 507  507                          ixas.ixa_scopeid = ill_get_upper_ifindex(ill);
 508  508                  else
 509  509                          ixas.ixa_scopeid = ill->ill_phyint->phyint_ifindex;
 510  510          }
 511  511  
 512  512          if (ira->ira_flags & IRAF_MULTIBROADCAST) {
 513  513                  /*
 514  514                   * Not one or our addresses (IRE_LOCALs), thus we let
 515  515                   * ip_output_simple pick the source.
 516  516                   */
 517  517                  ip6h->ip6_src = ipv6_all_zeros;
 518  518                  ixas.ixa_flags |= IXAF_SET_SOURCE;
 519  519          }
 520  520  
 521  521          /* Should we send using dce_pmtu? */
 522  522          if (ipst->ips_ipv6_icmp_return_pmtu)
 523  523                  ixas.ixa_flags |= IXAF_PMTU_DISCOVERY;
 524  524  
 525  525          (void) ip_output_simple(mp, &ixas);
 526  526          ixa_cleanup(&ixas);
 527  527  
 528  528  }
 529  529  
 530  530  /*
 531  531   * Verify the ICMP messages for either for ICMP error or redirect packet.
 532  532   * The caller should have fully pulled up the message. If it's a redirect
 533  533   * packet, only basic checks on IP header will be done; otherwise, verify
 534  534   * the packet by looking at the included ULP header.
 535  535   *
 536  536   * Called before icmp_inbound_error_fanout_v6 is called.
 537  537   */
 538  538  static boolean_t
 539  539  icmp_inbound_verify_v6(mblk_t *mp, icmp6_t *icmp6, ip_recv_attr_t *ira)
 540  540  {
 541  541          ill_t           *ill = ira->ira_ill;
 542  542          uint16_t        hdr_length;
 543  543          uint8_t         *nexthdrp;
 544  544          uint8_t         nexthdr;
 545  545          ip_stack_t      *ipst = ill->ill_ipst;
 546  546          conn_t          *connp;
 547  547          ip6_t           *ip6h;  /* Inner header */
 548  548  
 549  549          ip6h = (ip6_t *)&icmp6[1];
 550  550          if ((uchar_t *)ip6h + IPV6_HDR_LEN > mp->b_wptr)
 551  551                  goto truncated;
 552  552  
 553  553          if (icmp6->icmp6_type == ND_REDIRECT) {
 554  554                  hdr_length = sizeof (nd_redirect_t);
 555  555          } else {
 556  556                  if ((IPH_HDR_VERSION(ip6h) != IPV6_VERSION))
 557  557                          goto discard_pkt;
 558  558                  hdr_length = IPV6_HDR_LEN;
 559  559          }
 560  560  
 561  561          if ((uchar_t *)ip6h + hdr_length > mp->b_wptr)
 562  562                  goto truncated;
 563  563  
 564  564          /*
 565  565           * Stop here for ICMP_REDIRECT.
 566  566           */
 567  567          if (icmp6->icmp6_type == ND_REDIRECT)
 568  568                  return (B_TRUE);
 569  569  
 570  570          /*
 571  571           * ICMP errors only.
 572  572           */
 573  573          if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &hdr_length, &nexthdrp))
 574  574                  goto discard_pkt;
 575  575          nexthdr = *nexthdrp;
 576  576  
 577  577          /* Try to pass the ICMP message to clients who need it */
 578  578          switch (nexthdr) {
 579  579          case IPPROTO_UDP:
 580  580                  /*
 581  581                   * Verify we have at least ICMP_MIN_TP_HDR_LEN bytes of
 582  582                   * transport header.
 583  583                   */
 584  584                  if ((uchar_t *)ip6h + hdr_length + ICMP_MIN_TP_HDR_LEN >
 585  585                      mp->b_wptr)
 586  586                          goto truncated;
 587  587                  break;
 588  588          case IPPROTO_TCP: {
 589  589                  tcpha_t         *tcpha;
 590  590  
 591  591                  /*
 592  592                   * Verify we have at least ICMP_MIN_TP_HDR_LEN bytes of
 593  593                   * transport header.
 594  594                   */
 595  595                  if ((uchar_t *)ip6h + hdr_length + ICMP_MIN_TP_HDR_LEN >
 596  596                      mp->b_wptr)
 597  597                          goto truncated;
 598  598  
 599  599                  tcpha = (tcpha_t *)((uchar_t *)ip6h + hdr_length);
 600  600                  /*
 601  601                   * With IPMP we need to match across group, which we do
 602  602                   * since we have the upper ill from ira_ill.
 603  603                   */
 604  604                  connp = ipcl_tcp_lookup_reversed_ipv6(ip6h, tcpha, TCPS_LISTEN,
 605  605                      ill->ill_phyint->phyint_ifindex, ipst);
 606  606                  if (connp == NULL)
 607  607                          goto discard_pkt;
 608  608  
 609  609                  if ((connp->conn_verifyicmp != NULL) &&
 610  610                      !connp->conn_verifyicmp(connp, tcpha, NULL, icmp6, ira)) {
 611  611                          CONN_DEC_REF(connp);
 612  612                          goto discard_pkt;
 613  613                  }
 614  614                  CONN_DEC_REF(connp);
 615  615                  break;
 616  616          }
 617  617          case IPPROTO_SCTP:
 618  618                  /*
 619  619                   * Verify we have at least ICMP_MIN_TP_HDR_LEN bytes of
 620  620                   * transport header.
 621  621                   */
 622  622                  if ((uchar_t *)ip6h + hdr_length + ICMP_MIN_TP_HDR_LEN >
 623  623                      mp->b_wptr)
 624  624                          goto truncated;
 625  625                  break;
 626  626          case IPPROTO_ESP:
 627  627          case IPPROTO_AH:
 628  628                  break;
 629  629          case IPPROTO_ENCAP:
 630  630          case IPPROTO_IPV6: {
 631  631                  /* Look for self-encapsulated packets that caused an error */
 632  632                  ip6_t *in_ip6h;
 633  633  
 634  634                  in_ip6h = (ip6_t *)((uint8_t *)ip6h + hdr_length);
 635  635                  if ((uint8_t *)in_ip6h + (nexthdr == IPPROTO_ENCAP ?
 636  636                      sizeof (ipha_t) : sizeof (ip6_t)) > mp->b_wptr)
 637  637                          goto truncated;
 638  638                  break;
 639  639          }
 640  640          default:
 641  641                  break;
 642  642          }
 643  643  
 644  644          return (B_TRUE);
 645  645  
 646  646  discard_pkt:
 647  647          /* Bogus ICMP error. */
 648  648          BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
 649  649          return (B_FALSE);
 650  650  
 651  651  truncated:
 652  652          /* We pulled up everthing already. Must be truncated */
 653  653          BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors);
 654  654          return (B_FALSE);
 655  655  }
 656  656  
 657  657  /*
 658  658   * Process received IPv6 ICMP Packet too big.
 659  659   * The caller is responsible for validating the packet before passing it in
 660  660   * and also to fanout the ICMP error to any matching transport conns. Assumes
 661  661   * the message has been fully pulled up.
 662  662   *
 663  663   * Before getting here, the caller has called icmp_inbound_verify_v6()
 664  664   * that should have verified with ULP to prevent undoing the changes we're
 665  665   * going to make to DCE. For example, TCP might have verified that the packet
 666  666   * which generated error is in the send window.
 667  667   *
 668  668   * In some cases modified this MTU in the ICMP header packet; the caller
 669  669   * should pass to the matching ULP after this returns.
 670  670   */
 671  671  static void
 672  672  icmp_inbound_too_big_v6(icmp6_t *icmp6, ip_recv_attr_t *ira)
 673  673  {
 674  674          uint32_t        mtu;
 675  675          dce_t           *dce;
  
    | 
      ↓ open down ↓ | 
    675 lines elided | 
    
      ↑ open up ↑ | 
  
 676  676          ill_t           *ill = ira->ira_ill;    /* Upper ill if IPMP */
 677  677          ip_stack_t      *ipst = ill->ill_ipst;
 678  678          int             old_max_frag;
 679  679          in6_addr_t      final_dst;
 680  680          ip6_t           *ip6h;  /* Inner IP header */
 681  681  
 682  682          /* Caller has already pulled up everything. */
 683  683          ip6h = (ip6_t *)&icmp6[1];
 684  684          final_dst = ip_get_dst_v6(ip6h, NULL, NULL);
 685  685  
      686 +        mtu = ntohl(icmp6->icmp6_mtu);
      687 +        if (mtu < IPV6_MIN_MTU) {
      688 +                /*
      689 +                 * RFC 8021 suggests to ignore messages where mtu is
      690 +                 * less than the IPv6 minimum.
      691 +                 */
      692 +                ip1dbg(("Received mtu less than IPv6 "
      693 +                    "min mtu %d: %d\n", IPV6_MIN_MTU, mtu));
      694 +                DTRACE_PROBE1(icmp6__too__small__mtu, uint32_t, mtu);
      695 +                return;
      696 +        }
      697 +
 686  698          /*
 687  699           * For link local destinations matching simply on address is not
 688  700           * sufficient. Same link local addresses for different ILL's is
 689  701           * possible.
 690  702           */
 691  703          if (IN6_IS_ADDR_LINKSCOPE(&final_dst)) {
 692  704                  dce = dce_lookup_and_add_v6(&final_dst,
 693  705                      ill->ill_phyint->phyint_ifindex, ipst);
 694  706          } else {
 695  707                  dce = dce_lookup_and_add_v6(&final_dst, 0, ipst);
 696  708          }
 697  709          if (dce == NULL) {
  
    | 
      ↓ open down ↓ | 
    2 lines elided | 
    
      ↑ open up ↑ | 
  
 698  710                  /* Couldn't add a unique one - ENOMEM */
 699  711                  if (ip_debug > 2) {
 700  712                          /* ip1dbg */
 701  713                          pr_addr_dbg("icmp_inbound_too_big_v6:"
 702  714                              "no dce for dst %s\n", AF_INET6,
 703  715                              &final_dst);
 704  716                  }
 705  717                  return;
 706  718          }
 707  719  
 708      -        mtu = ntohl(icmp6->icmp6_mtu);
 709      -
 710  720          mutex_enter(&dce->dce_lock);
 711  721          if (dce->dce_flags & DCEF_PMTU)
 712  722                  old_max_frag = dce->dce_pmtu;
 713  723          else if (IN6_IS_ADDR_MULTICAST(&final_dst))
 714  724                  old_max_frag = ill->ill_mc_mtu;
 715  725          else
 716  726                  old_max_frag = ill->ill_mtu;
 717  727  
 718      -        if (mtu >= IPV6_MIN_MTU) {
 719      -                ip1dbg(("Received mtu from router: %d\n", mtu));
 720      -                DTRACE_PROBE1(icmp6__received__mtu, uint32_t, mtu);
 721      -                dce->dce_pmtu = MIN(old_max_frag, mtu);
 722      -                icmp6->icmp6_mtu = htonl(dce->dce_pmtu);
      728 +        ip1dbg(("Received mtu from router: %d\n", mtu));
      729 +        DTRACE_PROBE1(icmp6__received__mtu, uint32_t, mtu);
      730 +        dce->dce_pmtu = MIN(old_max_frag, mtu);
      731 +        icmp6->icmp6_mtu = htonl(dce->dce_pmtu);
 723  732  
 724      -                /* We now have a PMTU for sure */
 725      -                dce->dce_flags |= DCEF_PMTU;
 726      -                dce->dce_last_change_time = TICK_TO_SEC(ddi_get_lbolt64());
 727      -        } else {
 728      -                /*
 729      -                 * RFC 8021 suggests to ignore messages where mtu is
 730      -                 * less than the IPv6 minimum.
 731      -                 */
 732      -                ip1dbg(("Received mtu less than IPv6 "
 733      -                    "min mtu %d: %d\n", IPV6_MIN_MTU, mtu));
 734      -                DTRACE_PROBE1(icmp6__too__small__mtu, uint32_t, mtu);
 735      -        }
      733 +        /* We now have a PMTU for sure */
      734 +        dce->dce_flags |= DCEF_PMTU;
      735 +        dce->dce_last_change_time = TICK_TO_SEC(ddi_get_lbolt64());
 736  736  
 737  737          mutex_exit(&dce->dce_lock);
 738  738          /*
 739  739           * After dropping the lock the new value is visible to everyone.
 740  740           * Then we bump the generation number so any cached values reinspect
 741  741           * the dce_t.
 742  742           */
 743  743          dce_increment_generation(dce);
 744  744          dce_refrele(dce);
 745  745  }
 746  746  
 747  747  /*
 748  748   * Fanout received ICMPv6 error packets to the transports.
 749  749   * Assumes the IPv6 plus ICMPv6 headers have been pulled up but nothing else.
 750  750   *
 751  751   * The caller must have called icmp_inbound_verify_v6.
 752  752   */
 753  753  void
 754  754  icmp_inbound_error_fanout_v6(mblk_t *mp, icmp6_t *icmp6, ip_recv_attr_t *ira)
 755  755  {
 756  756          uint16_t        *up;    /* Pointer to ports in ULP header */
 757  757          uint32_t        ports;  /* reversed ports for fanout */
 758  758          ip6_t           rip6h;  /* With reversed addresses */
 759  759          ip6_t           *ip6h;  /* Inner IP header */
 760  760          uint16_t        hdr_length; /* Inner IP header length */
 761  761          uint8_t         *nexthdrp;
 762  762          uint8_t         nexthdr;
 763  763          tcpha_t         *tcpha;
 764  764          conn_t          *connp;
 765  765          ill_t           *ill = ira->ira_ill;    /* Upper in the case of IPMP */
 766  766          ip_stack_t      *ipst = ill->ill_ipst;
 767  767          ipsec_stack_t   *ipss = ipst->ips_netstack->netstack_ipsec;
 768  768  
 769  769          /* Caller has already pulled up everything. */
 770  770          ip6h = (ip6_t *)&icmp6[1];
 771  771          ASSERT(mp->b_cont == NULL);
 772  772          ASSERT((uchar_t *)&ip6h[1] <= mp->b_wptr);
 773  773  
 774  774          if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &hdr_length, &nexthdrp))
 775  775                  goto drop_pkt;
 776  776          nexthdr = *nexthdrp;
 777  777          ira->ira_protocol = nexthdr;
 778  778  
 779  779          /*
 780  780           * We need a separate IP header with the source and destination
 781  781           * addresses reversed to do fanout/classification because the ip6h in
 782  782           * the ICMPv6 error is in the form we sent it out.
 783  783           */
 784  784          rip6h.ip6_src = ip6h->ip6_dst;
 785  785          rip6h.ip6_dst = ip6h->ip6_src;
 786  786          rip6h.ip6_nxt = nexthdr;
 787  787  
 788  788          /* Try to pass the ICMP message to clients who need it */
 789  789          switch (nexthdr) {
 790  790          case IPPROTO_UDP: {
 791  791                  /* Attempt to find a client stream based on port. */
 792  792                  up = (uint16_t *)((uchar_t *)ip6h + hdr_length);
 793  793  
 794  794                  /* Note that we send error to all matches. */
 795  795                  ira->ira_flags |= IRAF_ICMP_ERROR;
 796  796                  ip_fanout_udp_multi_v6(mp, &rip6h, up[0], up[1], ira);
 797  797                  ira->ira_flags &= ~IRAF_ICMP_ERROR;
 798  798                  return;
 799  799          }
 800  800          case IPPROTO_TCP: {
 801  801                  /*
 802  802                   * Attempt to find a client stream based on port.
 803  803                   * Note that we do a reverse lookup since the header is
 804  804                   * in the form we sent it out.
 805  805                   */
 806  806                  tcpha = (tcpha_t *)((uchar_t *)ip6h + hdr_length);
 807  807                  /*
 808  808                   * With IPMP we need to match across group, which we do
 809  809                   * since we have the upper ill from ira_ill.
 810  810                   */
 811  811                  connp = ipcl_tcp_lookup_reversed_ipv6(ip6h, tcpha,
 812  812                      TCPS_LISTEN, ill->ill_phyint->phyint_ifindex, ipst);
 813  813                  if (connp == NULL) {
 814  814                          goto drop_pkt;
 815  815                  }
 816  816  
 817  817                  if (CONN_INBOUND_POLICY_PRESENT_V6(connp, ipss) ||
 818  818                      (ira->ira_flags & IRAF_IPSEC_SECURE)) {
 819  819                          mp = ipsec_check_inbound_policy(mp, connp,
 820  820                              NULL, ip6h, ira);
 821  821                          if (mp == NULL) {
 822  822                                  BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
 823  823                                  /* Note that mp is NULL */
 824  824                                  ip_drop_input("ipIfStatsInDiscards", mp, ill);
 825  825                                  CONN_DEC_REF(connp);
 826  826                                  return;
 827  827                          }
 828  828                  }
 829  829  
 830  830                  ira->ira_flags |= IRAF_ICMP_ERROR;
 831  831                  if (IPCL_IS_TCP(connp)) {
 832  832                          SQUEUE_ENTER_ONE(connp->conn_sqp, mp,
 833  833                              connp->conn_recvicmp, connp, ira, SQ_FILL,
 834  834                              SQTAG_TCP6_INPUT_ICMP_ERR);
 835  835                  } else {
 836  836                          /* Not TCP; must be SOCK_RAW, IPPROTO_TCP */
 837  837                          ill_t *rill = ira->ira_rill;
 838  838  
 839  839                          ira->ira_ill = ira->ira_rill = NULL;
 840  840                          (connp->conn_recv)(connp, mp, NULL, ira);
 841  841                          CONN_DEC_REF(connp);
 842  842                          ira->ira_ill = ill;
 843  843                          ira->ira_rill = rill;
 844  844                  }
 845  845                  ira->ira_flags &= ~IRAF_ICMP_ERROR;
 846  846                  return;
 847  847  
 848  848          }
 849  849          case IPPROTO_SCTP:
 850  850                  up = (uint16_t *)((uchar_t *)ip6h + hdr_length);
 851  851                  /* Find a SCTP client stream for this packet. */
 852  852                  ((uint16_t *)&ports)[0] = up[1];
 853  853                  ((uint16_t *)&ports)[1] = up[0];
 854  854  
 855  855                  ira->ira_flags |= IRAF_ICMP_ERROR;
 856  856                  ip_fanout_sctp(mp, NULL, &rip6h, ports, ira);
 857  857                  ira->ira_flags &= ~IRAF_ICMP_ERROR;
 858  858                  return;
 859  859  
 860  860          case IPPROTO_ESP:
 861  861          case IPPROTO_AH:
 862  862                  if (!ipsec_loaded(ipss)) {
 863  863                          ip_proto_not_sup(mp, ira);
 864  864                          return;
 865  865                  }
 866  866  
 867  867                  if (nexthdr == IPPROTO_ESP)
 868  868                          mp = ipsecesp_icmp_error(mp, ira);
 869  869                  else
 870  870                          mp = ipsecah_icmp_error(mp, ira);
 871  871                  if (mp == NULL)
 872  872                          return;
 873  873  
 874  874                  /* Just in case ipsec didn't preserve the NULL b_cont */
 875  875                  if (mp->b_cont != NULL) {
 876  876                          if (!pullupmsg(mp, -1))
 877  877                                  goto drop_pkt;
 878  878                  }
 879  879  
 880  880                  /*
 881  881                   * If succesful, the mp has been modified to not include
 882  882                   * the ESP/AH header so we can fanout to the ULP's icmp
 883  883                   * error handler.
 884  884                   */
 885  885                  if (mp->b_wptr - mp->b_rptr < IPV6_HDR_LEN)
 886  886                          goto drop_pkt;
 887  887  
 888  888                  ip6h = (ip6_t *)mp->b_rptr;
 889  889                  /* Don't call hdr_length_v6() unless you have to. */
 890  890                  if (ip6h->ip6_nxt != IPPROTO_ICMPV6)
 891  891                          hdr_length = ip_hdr_length_v6(mp, ip6h);
 892  892                  else
 893  893                          hdr_length = IPV6_HDR_LEN;
 894  894  
 895  895                  /* Verify the modified message before any further processes. */
 896  896                  icmp6 = (icmp6_t *)(&mp->b_rptr[hdr_length]);
 897  897                  if (!icmp_inbound_verify_v6(mp, icmp6, ira)) {
 898  898                          freemsg(mp);
 899  899                          return;
 900  900                  }
 901  901  
 902  902                  icmp_inbound_error_fanout_v6(mp, icmp6, ira);
 903  903                  return;
 904  904  
 905  905          case IPPROTO_IPV6: {
 906  906                  /* Look for self-encapsulated packets that caused an error */
 907  907                  ip6_t *in_ip6h;
 908  908  
 909  909                  in_ip6h = (ip6_t *)((uint8_t *)ip6h + hdr_length);
 910  910  
 911  911                  if (IN6_ARE_ADDR_EQUAL(&in_ip6h->ip6_src, &ip6h->ip6_src) &&
 912  912                      IN6_ARE_ADDR_EQUAL(&in_ip6h->ip6_dst, &ip6h->ip6_dst)) {
 913  913                          /*
 914  914                           * Self-encapsulated case. As in the ipv4 case,
 915  915                           * we need to strip the 2nd IP header. Since mp
 916  916                           * is already pulled-up, we can simply bcopy
 917  917                           * the 3rd header + data over the 2nd header.
 918  918                           */
 919  919                          uint16_t unused_len;
 920  920  
 921  921                          /*
 922  922                           * Make sure we don't do recursion more than once.
 923  923                           */
 924  924                          if (!ip_hdr_length_nexthdr_v6(mp, in_ip6h,
 925  925                              &unused_len, &nexthdrp) ||
 926  926                              *nexthdrp == IPPROTO_IPV6) {
 927  927                                  goto drop_pkt;
 928  928                          }
 929  929  
 930  930                          /*
 931  931                           * Copy the 3rd header + remaining data on top
 932  932                           * of the 2nd header.
 933  933                           */
 934  934                          bcopy(in_ip6h, ip6h, mp->b_wptr - (uchar_t *)in_ip6h);
 935  935  
 936  936                          /*
 937  937                           * Subtract length of the 2nd header.
 938  938                           */
 939  939                          mp->b_wptr -= hdr_length;
 940  940  
 941  941                          ip6h = (ip6_t *)mp->b_rptr;
 942  942                          /* Don't call hdr_length_v6() unless you have to. */
 943  943                          if (ip6h->ip6_nxt != IPPROTO_ICMPV6)
 944  944                                  hdr_length = ip_hdr_length_v6(mp, ip6h);
 945  945                          else
 946  946                                  hdr_length = IPV6_HDR_LEN;
 947  947  
 948  948                          /*
 949  949                           * Verify the modified message before any further
 950  950                           * processes.
 951  951                           */
 952  952                          icmp6 = (icmp6_t *)(&mp->b_rptr[hdr_length]);
 953  953                          if (!icmp_inbound_verify_v6(mp, icmp6, ira)) {
 954  954                                  freemsg(mp);
 955  955                                  return;
 956  956                          }
 957  957  
 958  958                          /*
 959  959                           * Now recurse, and see what I _really_ should be
 960  960                           * doing here.
 961  961                           */
 962  962                          icmp_inbound_error_fanout_v6(mp, icmp6, ira);
 963  963                          return;
 964  964                  }
 965  965                  /* FALLTHRU */
 966  966          }
 967  967          case IPPROTO_ENCAP:
 968  968                  if ((connp = ipcl_iptun_classify_v6(&rip6h.ip6_src,
 969  969                      &rip6h.ip6_dst, ipst)) != NULL) {
 970  970                          ira->ira_flags |= IRAF_ICMP_ERROR;
 971  971                          connp->conn_recvicmp(connp, mp, NULL, ira);
 972  972                          CONN_DEC_REF(connp);
 973  973                          ira->ira_flags &= ~IRAF_ICMP_ERROR;
 974  974                          return;
 975  975                  }
 976  976                  /*
 977  977                   * No IP tunnel is interested, fallthrough and see
 978  978                   * if a raw socket will want it.
 979  979                   */
 980  980                  /* FALLTHRU */
 981  981          default:
 982  982                  ira->ira_flags |= IRAF_ICMP_ERROR;
 983  983                  ASSERT(ira->ira_protocol == nexthdr);
 984  984                  ip_fanout_proto_v6(mp, &rip6h, ira);
 985  985                  ira->ira_flags &= ~IRAF_ICMP_ERROR;
 986  986                  return;
 987  987          }
 988  988          /* NOTREACHED */
 989  989  drop_pkt:
 990  990          BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors);
 991  991          ip1dbg(("icmp_inbound_error_fanout_v6: drop pkt\n"));
 992  992          freemsg(mp);
 993  993  }
 994  994  
 995  995  /*
 996  996   * Process received IPv6 ICMP Redirect messages.
 997  997   * Assumes the caller has verified that the headers are in the pulled up mblk.
 998  998   * Consumes mp.
 999  999   */
1000 1000  /* ARGSUSED */
1001 1001  static void
1002 1002  icmp_redirect_v6(mblk_t *mp, ip6_t *ip6h, nd_redirect_t *rd,
1003 1003      ip_recv_attr_t *ira)
1004 1004  {
1005 1005          ire_t           *ire, *nire;
1006 1006          ire_t           *prev_ire = NULL;
1007 1007          ire_t           *redir_ire;
1008 1008          in6_addr_t      *src, *dst, *gateway;
1009 1009          nd_opt_hdr_t    *opt;
1010 1010          nce_t           *nce;
1011 1011          int             ncec_flags = 0;
1012 1012          int             err = 0;
1013 1013          boolean_t       redirect_to_router = B_FALSE;
1014 1014          int             len;
1015 1015          int             optlen;
1016 1016          ill_t           *ill = ira->ira_rill;
1017 1017          ill_t           *rill = ira->ira_rill;
1018 1018          ip_stack_t      *ipst = ill->ill_ipst;
1019 1019  
1020 1020          /*
1021 1021           * Since ira_ill is where the IRE_LOCAL was hosted we use ira_rill
1022 1022           * and make it be the IPMP upper so avoid being confused by a packet
1023 1023           * addressed to a unicast address on a different ill.
1024 1024           */
1025 1025          if (IS_UNDER_IPMP(rill)) {
1026 1026                  rill = ipmp_ill_hold_ipmp_ill(rill);
1027 1027                  if (rill == NULL) {
1028 1028                          BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects);
1029 1029                          ip_drop_input("ipv6IfIcmpInBadRedirects - IPMP ill",
1030 1030                              mp, ill);
1031 1031                          freemsg(mp);
1032 1032                          return;
1033 1033                  }
1034 1034                  ASSERT(rill != ira->ira_rill);
1035 1035          }
1036 1036  
1037 1037          len = mp->b_wptr - (uchar_t *)rd;
1038 1038          src = &ip6h->ip6_src;
1039 1039          dst = &rd->nd_rd_dst;
1040 1040          gateway = &rd->nd_rd_target;
1041 1041  
1042 1042          /* Verify if it is a valid redirect */
1043 1043          if (!IN6_IS_ADDR_LINKLOCAL(src) ||
1044 1044              (ip6h->ip6_hops != IPV6_MAX_HOPS) ||
1045 1045              (rd->nd_rd_code != 0) ||
1046 1046              (len < sizeof (nd_redirect_t)) ||
1047 1047              (IN6_IS_ADDR_V4MAPPED(dst)) ||
1048 1048              (IN6_IS_ADDR_MULTICAST(dst))) {
1049 1049                  BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects);
1050 1050                  ip_drop_input("ipv6IfIcmpInBadRedirects - addr/len", mp, ill);
1051 1051                  goto fail_redirect;
1052 1052          }
1053 1053  
1054 1054          if (!(IN6_IS_ADDR_LINKLOCAL(gateway) ||
1055 1055              IN6_ARE_ADDR_EQUAL(gateway, dst))) {
1056 1056                  BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects);
1057 1057                  ip_drop_input("ipv6IfIcmpInBadRedirects - bad gateway",
1058 1058                      mp, ill);
1059 1059                  goto fail_redirect;
1060 1060          }
1061 1061  
1062 1062          optlen = len - sizeof (nd_redirect_t);
1063 1063          if (optlen != 0) {
1064 1064                  if (!ndp_verify_optlen((nd_opt_hdr_t *)&rd[1], optlen)) {
1065 1065                          BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects);
1066 1066                          ip_drop_input("ipv6IfIcmpInBadRedirects - options",
1067 1067                              mp, ill);
1068 1068                          goto fail_redirect;
1069 1069                  }
1070 1070          }
1071 1071  
1072 1072          if (!IN6_ARE_ADDR_EQUAL(gateway, dst)) {
1073 1073                  redirect_to_router = B_TRUE;
1074 1074                  ncec_flags |= NCE_F_ISROUTER;
1075 1075          } else {
1076 1076                  gateway = dst;  /* Add nce for dst */
1077 1077          }
1078 1078  
1079 1079  
1080 1080          /*
1081 1081           * Verify that the IP source address of the redirect is
1082 1082           * the same as the current first-hop router for the specified
1083 1083           * ICMP destination address.
1084 1084           * Also, Make sure we had a route for the dest in question and
1085 1085           * that route was pointing to the old gateway (the source of the
1086 1086           * redirect packet.)
1087 1087           * We do longest match and then compare ire_gateway_addr_v6 below.
1088 1088           */
1089 1089          prev_ire = ire_ftable_lookup_v6(dst, 0, 0, 0, rill,
1090 1090              ALL_ZONES, NULL, MATCH_IRE_ILL, 0, ipst, NULL);
1091 1091  
1092 1092          /*
1093 1093           * Check that
1094 1094           *      the redirect was not from ourselves
1095 1095           *      old gateway is still directly reachable
1096 1096           */
1097 1097          if (prev_ire == NULL ||
1098 1098              (prev_ire->ire_type & (IRE_LOCAL|IRE_LOOPBACK)) ||
1099 1099              (prev_ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE)) ||
1100 1100              !IN6_ARE_ADDR_EQUAL(src, &prev_ire->ire_gateway_addr_v6)) {
1101 1101                  BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects);
1102 1102                  ip_drop_input("ipv6IfIcmpInBadRedirects - ire", mp, ill);
1103 1103                  goto fail_redirect;
1104 1104          }
1105 1105  
1106 1106          ASSERT(prev_ire->ire_ill != NULL);
1107 1107          if (prev_ire->ire_ill->ill_flags & ILLF_NONUD)
1108 1108                  ncec_flags |= NCE_F_NONUD;
1109 1109  
1110 1110          opt = (nd_opt_hdr_t *)&rd[1];
1111 1111          opt = ndp_get_option(opt, optlen, ND_OPT_TARGET_LINKADDR);
1112 1112          if (opt != NULL) {
1113 1113                  err = nce_lookup_then_add_v6(rill,
1114 1114                      (uchar_t *)&opt[1],         /* Link layer address */
1115 1115                      rill->ill_phys_addr_length,
1116 1116                      gateway, ncec_flags, ND_STALE, &nce);
1117 1117                  switch (err) {
1118 1118                  case 0:
1119 1119                          nce_refrele(nce);
1120 1120                          break;
1121 1121                  case EEXIST:
1122 1122                          /*
1123 1123                           * Check to see if link layer address has changed and
1124 1124                           * process the ncec_state accordingly.
1125 1125                           */
1126 1126                          nce_process(nce->nce_common,
1127 1127                              (uchar_t *)&opt[1], 0, B_FALSE);
1128 1128                          nce_refrele(nce);
1129 1129                          break;
1130 1130                  default:
1131 1131                          ip1dbg(("icmp_redirect_v6: NCE create failed %d\n",
1132 1132                              err));
1133 1133                          goto fail_redirect;
1134 1134                  }
1135 1135          }
1136 1136          if (redirect_to_router) {
1137 1137                  ASSERT(IN6_IS_ADDR_LINKLOCAL(gateway));
1138 1138  
1139 1139                  /*
1140 1140                   * Create a Route Association.  This will allow us to remember
1141 1141                   * a router told us to use the particular gateway.
1142 1142                   */
1143 1143                  ire = ire_create_v6(
1144 1144                      dst,
1145 1145                      &ipv6_all_ones,             /* mask */
1146 1146                      gateway,                    /* gateway addr */
1147 1147                      IRE_HOST,
1148 1148                      prev_ire->ire_ill,
1149 1149                      ALL_ZONES,
1150 1150                      (RTF_DYNAMIC | RTF_GATEWAY | RTF_HOST),
1151 1151                      NULL,
1152 1152                      ipst);
1153 1153          } else {
1154 1154                  ipif_t *ipif;
1155 1155                  in6_addr_t gw;
1156 1156  
1157 1157                  /*
1158 1158                   * Just create an on link entry, i.e. interface route.
1159 1159                   * The gateway field is our link-local on the ill.
1160 1160                   */
1161 1161                  mutex_enter(&rill->ill_lock);
1162 1162                  for (ipif = rill->ill_ipif; ipif != NULL;
1163 1163                      ipif = ipif->ipif_next) {
1164 1164                          if (!(ipif->ipif_state_flags & IPIF_CONDEMNED) &&
1165 1165                              IN6_IS_ADDR_LINKLOCAL(&ipif->ipif_v6lcl_addr))
1166 1166                                  break;
1167 1167                  }
1168 1168                  if (ipif == NULL) {
1169 1169                          /* We have no link-local address! */
1170 1170                          mutex_exit(&rill->ill_lock);
1171 1171                          goto fail_redirect;
1172 1172                  }
1173 1173                  gw = ipif->ipif_v6lcl_addr;
1174 1174                  mutex_exit(&rill->ill_lock);
1175 1175  
1176 1176                  ire = ire_create_v6(
1177 1177                      dst,                                /* gateway == dst */
1178 1178                      &ipv6_all_ones,                     /* mask */
1179 1179                      &gw,                                /* gateway addr */
1180 1180                      rill->ill_net_type,                 /* IF_[NO]RESOLVER */
1181 1181                      prev_ire->ire_ill,
1182 1182                      ALL_ZONES,
1183 1183                      (RTF_DYNAMIC | RTF_HOST),
1184 1184                      NULL,
1185 1185                      ipst);
1186 1186          }
1187 1187  
1188 1188          if (ire == NULL)
1189 1189                  goto fail_redirect;
1190 1190  
1191 1191          nire = ire_add(ire);
1192 1192          /* Check if it was a duplicate entry */
1193 1193          if (nire != NULL && nire != ire) {
1194 1194                  ASSERT(nire->ire_identical_ref > 1);
1195 1195                  ire_delete(nire);
1196 1196                  ire_refrele(nire);
1197 1197                  nire = NULL;
1198 1198          }
1199 1199          ire = nire;
1200 1200          if (ire != NULL) {
1201 1201                  ire_refrele(ire);               /* Held in ire_add */
1202 1202  
1203 1203                  /* tell routing sockets that we received a redirect */
1204 1204                  ip_rts_change_v6(RTM_REDIRECT,
1205 1205                      &rd->nd_rd_dst,
1206 1206                      &rd->nd_rd_target,
1207 1207                      &ipv6_all_ones, 0, src,
1208 1208                      (RTF_DYNAMIC | RTF_GATEWAY | RTF_HOST), 0,
1209 1209                      (RTA_DST | RTA_GATEWAY | RTA_NETMASK | RTA_AUTHOR), ipst);
1210 1210  
1211 1211                  /*
1212 1212                   * Delete any existing IRE_HOST type ires for this destination.
1213 1213                   * This together with the added IRE has the effect of
1214 1214                   * modifying an existing redirect.
1215 1215                   */
1216 1216                  redir_ire = ire_ftable_lookup_v6(dst, 0, src, IRE_HOST,
1217 1217                      prev_ire->ire_ill, ALL_ZONES, NULL,
1218 1218                      (MATCH_IRE_GW | MATCH_IRE_TYPE | MATCH_IRE_ILL), 0, ipst,
1219 1219                      NULL);
1220 1220  
1221 1221                  if (redir_ire != NULL) {
1222 1222                          if (redir_ire->ire_flags & RTF_DYNAMIC)
1223 1223                                  ire_delete(redir_ire);
1224 1224                          ire_refrele(redir_ire);
1225 1225                  }
1226 1226          }
1227 1227  
1228 1228          ire_refrele(prev_ire);
1229 1229          prev_ire = NULL;
1230 1230  
1231 1231  fail_redirect:
1232 1232          if (prev_ire != NULL)
1233 1233                  ire_refrele(prev_ire);
1234 1234          freemsg(mp);
1235 1235          if (rill != ira->ira_rill)
1236 1236                  ill_refrele(rill);
1237 1237  }
1238 1238  
1239 1239  /*
1240 1240   * Build and ship an IPv6 ICMP message using the packet data in mp,
1241 1241   * and the ICMP header pointed to by "stuff".  (May be called as
1242 1242   * writer.)
1243 1243   * Note: assumes that icmp_pkt_err_ok_v6 has been called to
1244 1244   * verify that an icmp error packet can be sent.
1245 1245   *
1246 1246   * If v6src_ptr is set use it as a source. Otherwise select a reasonable
1247 1247   * source address (see above function).
1248 1248   */
1249 1249  static void
1250 1250  icmp_pkt_v6(mblk_t *mp, void *stuff, size_t len,
1251 1251      const in6_addr_t *v6src_ptr, ip_recv_attr_t *ira)
1252 1252  {
1253 1253          ip6_t           *ip6h;
1254 1254          in6_addr_t      v6dst;
1255 1255          size_t          len_needed;
1256 1256          size_t          msg_len;
1257 1257          mblk_t          *mp1;
1258 1258          icmp6_t         *icmp6;
1259 1259          in6_addr_t      v6src;
1260 1260          ill_t           *ill = ira->ira_ill;
1261 1261          ip_stack_t      *ipst = ill->ill_ipst;
1262 1262          ip_xmit_attr_t  ixas;
1263 1263  
1264 1264          ip6h = (ip6_t *)mp->b_rptr;
1265 1265  
1266 1266          bzero(&ixas, sizeof (ixas));
1267 1267          ixas.ixa_flags = IXAF_BASIC_SIMPLE_V6;
1268 1268          ixas.ixa_zoneid = ira->ira_zoneid;
1269 1269          ixas.ixa_ifindex = 0;
1270 1270          ixas.ixa_ipst = ipst;
1271 1271          ixas.ixa_cred = kcred;
1272 1272          ixas.ixa_cpid = NOPID;
1273 1273          ixas.ixa_tsl = ira->ira_tsl;    /* Behave as a multi-level responder */
1274 1274          ixas.ixa_multicast_ttl = IP_DEFAULT_MULTICAST_TTL;
1275 1275  
1276 1276          /*
1277 1277           * If the source of the original packet was link-local, then
1278 1278           * make sure we send on the same ill (group) as we received it on.
1279 1279           */
1280 1280          if (IN6_IS_ADDR_LINKSCOPE(&ip6h->ip6_src)) {
1281 1281                  ixas.ixa_flags |= IXAF_SCOPEID_SET;
1282 1282                  if (IS_UNDER_IPMP(ill))
1283 1283                          ixas.ixa_scopeid = ill_get_upper_ifindex(ill);
1284 1284                  else
1285 1285                          ixas.ixa_scopeid = ill->ill_phyint->phyint_ifindex;
1286 1286          }
1287 1287  
1288 1288          if (ira->ira_flags & IRAF_IPSEC_SECURE) {
1289 1289                  /*
1290 1290                   * Apply IPsec based on how IPsec was applied to
1291 1291                   * the packet that had the error.
1292 1292                   *
1293 1293                   * If it was an outbound packet that caused the ICMP
1294 1294                   * error, then the caller will have setup the IRA
1295 1295                   * appropriately.
1296 1296                   */
1297 1297                  if (!ipsec_in_to_out(ira, &ixas, mp, NULL, ip6h)) {
1298 1298                          BUMP_MIB(&ipst->ips_ip_mib, ipIfStatsOutDiscards);
1299 1299                          /* Note: mp already consumed and ip_drop_packet done */
1300 1300                          return;
1301 1301                  }
1302 1302          } else {
1303 1303                  /*
1304 1304                   * This is in clear. The icmp message we are building
1305 1305                   * here should go out in clear, independent of our policy.
1306 1306                   */
1307 1307                  ixas.ixa_flags |= IXAF_NO_IPSEC;
1308 1308          }
1309 1309  
1310 1310          /*
1311 1311           * If the caller specified the source we use that.
1312 1312           * Otherwise, if the packet was for one of our unicast addresses, make
1313 1313           * sure we respond with that as the source. Otherwise
1314 1314           * have ip_output_simple pick the source address.
1315 1315           */
1316 1316          if (v6src_ptr != NULL) {
1317 1317                  v6src = *v6src_ptr;
1318 1318          } else {
1319 1319                  ire_t *ire;
1320 1320                  uint_t match_flags = MATCH_IRE_TYPE | MATCH_IRE_ZONEONLY;
1321 1321  
1322 1322                  if (IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_src) ||
1323 1323                      IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_dst))
1324 1324                          match_flags |= MATCH_IRE_ILL;
1325 1325  
1326 1326                  ire = ire_ftable_lookup_v6(&ip6h->ip6_dst, 0, 0,
1327 1327                      (IRE_LOCAL|IRE_LOOPBACK), ill, ira->ira_zoneid, NULL,
1328 1328                      match_flags, 0, ipst, NULL);
1329 1329                  if (ire != NULL) {
1330 1330                          v6src = ip6h->ip6_dst;
1331 1331                          ire_refrele(ire);
1332 1332                  } else {
1333 1333                          v6src = ipv6_all_zeros;
1334 1334                          ixas.ixa_flags |= IXAF_SET_SOURCE;
1335 1335                  }
1336 1336          }
1337 1337          v6dst = ip6h->ip6_src;
1338 1338          len_needed = ipst->ips_ipv6_icmp_return - IPV6_HDR_LEN - len;
1339 1339          msg_len = msgdsize(mp);
1340 1340          if (msg_len > len_needed) {
1341 1341                  if (!adjmsg(mp, len_needed - msg_len)) {
1342 1342                          BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutErrors);
1343 1343                          freemsg(mp);
1344 1344                          return;
1345 1345                  }
1346 1346                  msg_len = len_needed;
1347 1347          }
1348 1348          mp1 = allocb(IPV6_HDR_LEN + len, BPRI_MED);
1349 1349          if (mp1 == NULL) {
1350 1350                  BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutErrors);
1351 1351                  freemsg(mp);
1352 1352                  return;
1353 1353          }
1354 1354          mp1->b_cont = mp;
1355 1355          mp = mp1;
1356 1356  
1357 1357          /*
1358 1358           * Set IXAF_TRUSTED_ICMP so we can let the ICMP messages this
1359 1359           * node generates be accepted in peace by all on-host destinations.
1360 1360           * If we do NOT assume that all on-host destinations trust
1361 1361           * self-generated ICMP messages, then rework here, ip6.c, and spd.c.
1362 1362           * (Look for IXAF_TRUSTED_ICMP).
1363 1363           */
1364 1364          ixas.ixa_flags |= IXAF_TRUSTED_ICMP;
1365 1365  
1366 1366          ip6h = (ip6_t *)mp->b_rptr;
1367 1367          mp1->b_wptr = (uchar_t *)ip6h + (IPV6_HDR_LEN + len);
1368 1368  
1369 1369          ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW;
1370 1370          ip6h->ip6_nxt = IPPROTO_ICMPV6;
1371 1371          ip6h->ip6_hops = ipst->ips_ipv6_def_hops;
1372 1372          ip6h->ip6_dst = v6dst;
1373 1373          ip6h->ip6_src = v6src;
1374 1374          msg_len += IPV6_HDR_LEN + len;
1375 1375          if (msg_len > IP_MAXPACKET + IPV6_HDR_LEN) {
1376 1376                  (void) adjmsg(mp, IP_MAXPACKET + IPV6_HDR_LEN - msg_len);
1377 1377                  msg_len = IP_MAXPACKET + IPV6_HDR_LEN;
1378 1378          }
1379 1379          ip6h->ip6_plen = htons((uint16_t)(msgdsize(mp) - IPV6_HDR_LEN));
1380 1380          icmp6 = (icmp6_t *)&ip6h[1];
1381 1381          bcopy(stuff, (char *)icmp6, len);
1382 1382          /*
1383 1383           * Prepare for checksum by putting icmp length in the icmp
1384 1384           * checksum field. The checksum is calculated in ip_output_wire_v6.
1385 1385           */
1386 1386          icmp6->icmp6_cksum = ip6h->ip6_plen;
1387 1387          if (icmp6->icmp6_type == ND_REDIRECT) {
1388 1388                  ip6h->ip6_hops = IPV6_MAX_HOPS;
1389 1389          }
1390 1390  
1391 1391          (void) ip_output_simple(mp, &ixas);
1392 1392          ixa_cleanup(&ixas);
1393 1393  }
1394 1394  
1395 1395  /*
1396 1396   * Update the output mib when ICMPv6 packets are sent.
1397 1397   */
1398 1398  void
1399 1399  icmp_update_out_mib_v6(ill_t *ill, icmp6_t *icmp6)
1400 1400  {
1401 1401          BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutMsgs);
1402 1402  
1403 1403          switch (icmp6->icmp6_type) {
1404 1404          case ICMP6_DST_UNREACH:
1405 1405                  BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutDestUnreachs);
1406 1406                  if (icmp6->icmp6_code == ICMP6_DST_UNREACH_ADMIN)
1407 1407                          BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutAdminProhibs);
1408 1408                  break;
1409 1409  
1410 1410          case ICMP6_TIME_EXCEEDED:
1411 1411                  BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutTimeExcds);
1412 1412                  break;
1413 1413  
1414 1414          case ICMP6_PARAM_PROB:
1415 1415                  BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutParmProblems);
1416 1416                  break;
1417 1417  
1418 1418          case ICMP6_PACKET_TOO_BIG:
1419 1419                  BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutPktTooBigs);
1420 1420                  break;
1421 1421  
1422 1422          case ICMP6_ECHO_REQUEST:
1423 1423                  BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutEchos);
1424 1424                  break;
1425 1425  
1426 1426          case ICMP6_ECHO_REPLY:
1427 1427                  BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutEchoReplies);
1428 1428                  break;
1429 1429  
1430 1430          case ND_ROUTER_SOLICIT:
1431 1431                  BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutRouterSolicits);
1432 1432                  break;
1433 1433  
1434 1434          case ND_ROUTER_ADVERT:
1435 1435                  BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutRouterAdvertisements);
1436 1436                  break;
1437 1437  
1438 1438          case ND_NEIGHBOR_SOLICIT:
1439 1439                  BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutNeighborSolicits);
1440 1440                  break;
1441 1441  
1442 1442          case ND_NEIGHBOR_ADVERT:
1443 1443                  BUMP_MIB(ill->ill_icmp6_mib,
1444 1444                      ipv6IfIcmpOutNeighborAdvertisements);
1445 1445                  break;
1446 1446  
1447 1447          case ND_REDIRECT:
1448 1448                  BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutRedirects);
1449 1449                  break;
1450 1450  
1451 1451          case MLD_LISTENER_QUERY:
1452 1452                  BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutGroupMembQueries);
1453 1453                  break;
1454 1454  
1455 1455          case MLD_LISTENER_REPORT:
1456 1456          case MLD_V2_LISTENER_REPORT:
1457 1457                  BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutGroupMembResponses);
1458 1458                  break;
1459 1459  
1460 1460          case MLD_LISTENER_REDUCTION:
1461 1461                  BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutGroupMembReductions);
1462 1462                  break;
1463 1463          }
1464 1464  }
1465 1465  
1466 1466  /*
1467 1467   * Check if it is ok to send an ICMPv6 error packet in
1468 1468   * response to the IP packet in mp.
1469 1469   * Free the message and return null if no
1470 1470   * ICMP error packet should be sent.
1471 1471   */
1472 1472  static mblk_t *
1473 1473  icmp_pkt_err_ok_v6(mblk_t *mp, boolean_t mcast_ok, ip_recv_attr_t *ira)
1474 1474  {
1475 1475          ill_t           *ill = ira->ira_ill;
1476 1476          ip_stack_t      *ipst = ill->ill_ipst;
1477 1477          boolean_t       llbcast;
1478 1478          ip6_t           *ip6h;
1479 1479  
1480 1480          if (!mp)
1481 1481                  return (NULL);
1482 1482  
1483 1483          /* We view multicast and broadcast as the same.. */
1484 1484          llbcast = (ira->ira_flags &
1485 1485              (IRAF_L2DST_MULTICAST|IRAF_L2DST_BROADCAST)) != 0;
1486 1486          ip6h = (ip6_t *)mp->b_rptr;
1487 1487  
1488 1488          /* Check if source address uniquely identifies the host */
1489 1489  
1490 1490          if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_src) ||
1491 1491              IN6_IS_ADDR_V4MAPPED(&ip6h->ip6_src) ||
1492 1492              IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)) {
1493 1493                  freemsg(mp);
1494 1494                  return (NULL);
1495 1495          }
1496 1496  
1497 1497          if (ip6h->ip6_nxt == IPPROTO_ICMPV6) {
1498 1498                  size_t  len_needed = IPV6_HDR_LEN + ICMP6_MINLEN;
1499 1499                  icmp6_t         *icmp6;
1500 1500  
1501 1501                  if (mp->b_wptr - mp->b_rptr < len_needed) {
1502 1502                          if (!pullupmsg(mp, len_needed)) {
1503 1503                                  BUMP_MIB(ill->ill_icmp6_mib,
1504 1504                                      ipv6IfIcmpInErrors);
1505 1505                                  freemsg(mp);
1506 1506                                  return (NULL);
1507 1507                          }
1508 1508                          ip6h = (ip6_t *)mp->b_rptr;
1509 1509                  }
1510 1510                  icmp6 = (icmp6_t *)&ip6h[1];
1511 1511                  /* Explicitly do not generate errors in response to redirects */
1512 1512                  if (ICMP6_IS_ERROR(icmp6->icmp6_type) ||
1513 1513                      icmp6->icmp6_type == ND_REDIRECT) {
1514 1514                          freemsg(mp);
1515 1515                          return (NULL);
1516 1516                  }
1517 1517          }
1518 1518          /*
1519 1519           * Check that the destination is not multicast and that the packet
1520 1520           * was not sent on link layer broadcast or multicast.  (Exception
1521 1521           * is Packet too big message as per the draft - when mcast_ok is set.)
1522 1522           */
1523 1523          if (!mcast_ok &&
1524 1524              (llbcast || IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst))) {
1525 1525                  freemsg(mp);
1526 1526                  return (NULL);
1527 1527          }
1528 1528          /*
1529 1529           * If this is a labeled system, then check to see if we're allowed to
1530 1530           * send a response to this particular sender.  If not, then just drop.
1531 1531           */
1532 1532          if (is_system_labeled() && !tsol_can_reply_error(mp, ira)) {
1533 1533                  BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutErrors);
1534 1534                  freemsg(mp);
1535 1535                  return (NULL);
1536 1536          }
1537 1537  
1538 1538          if (icmp_err_rate_limit(ipst)) {
1539 1539                  /*
1540 1540                   * Only send ICMP error packets every so often.
1541 1541                   * This should be done on a per port/source basis,
1542 1542                   * but for now this will suffice.
1543 1543                   */
1544 1544                  freemsg(mp);
1545 1545                  return (NULL);
1546 1546          }
1547 1547          return (mp);
1548 1548  }
1549 1549  
1550 1550  /*
1551 1551   * Called when a packet was sent out the same link that it arrived on.
1552 1552   * Check if it is ok to send a redirect and then send it.
1553 1553   */
1554 1554  void
1555 1555  ip_send_potential_redirect_v6(mblk_t *mp, ip6_t *ip6h, ire_t *ire,
1556 1556      ip_recv_attr_t *ira)
1557 1557  {
1558 1558          ill_t           *ill = ira->ira_ill;
1559 1559          ip_stack_t      *ipst = ill->ill_ipst;
1560 1560          in6_addr_t      *v6targ;
1561 1561          ire_t           *src_ire_v6 = NULL;
1562 1562          mblk_t          *mp1;
1563 1563          ire_t           *nhop_ire = NULL;
1564 1564  
1565 1565          /*
1566 1566           * Don't send a redirect when forwarding a source
1567 1567           * routed packet.
1568 1568           */
1569 1569          if (ip_source_routed_v6(ip6h, mp, ipst))
1570 1570                  return;
1571 1571  
1572 1572          if (ire->ire_type & IRE_ONLINK) {
1573 1573                  /* Target is directly connected */
1574 1574                  v6targ = &ip6h->ip6_dst;
1575 1575          } else {
1576 1576                  /* Determine the most specific IRE used to send the packets */
1577 1577                  nhop_ire = ire_nexthop(ire);
1578 1578                  if (nhop_ire == NULL)
1579 1579                          return;
1580 1580  
1581 1581                  /*
1582 1582                   * We won't send redirects to a router
1583 1583                   * that doesn't have a link local
1584 1584                   * address, but will forward.
1585 1585                   */
1586 1586                  if (!IN6_IS_ADDR_LINKLOCAL(&nhop_ire->ire_addr_v6)) {
1587 1587                          BUMP_MIB(ill->ill_ip_mib, ipIfStatsInAddrErrors);
1588 1588                          ip_drop_input("ipIfStatsInAddrErrors", mp, ill);
1589 1589                          ire_refrele(nhop_ire);
1590 1590                          return;
1591 1591                  }
1592 1592                  v6targ = &nhop_ire->ire_addr_v6;
1593 1593          }
1594 1594          src_ire_v6 = ire_ftable_lookup_v6(&ip6h->ip6_src,
1595 1595              NULL, NULL, IRE_INTERFACE, ire->ire_ill, ALL_ZONES, NULL,
1596 1596              MATCH_IRE_ILL | MATCH_IRE_TYPE, 0, ipst, NULL);
1597 1597  
1598 1598          if (src_ire_v6 == NULL) {
1599 1599                  if (nhop_ire != NULL)
1600 1600                          ire_refrele(nhop_ire);
1601 1601                  return;
1602 1602          }
1603 1603  
1604 1604          /*
1605 1605           * The source is directly connected.
1606 1606           */
1607 1607          mp1 = copymsg(mp);
1608 1608          if (mp1 != NULL)
1609 1609                  icmp_send_redirect_v6(mp1, v6targ, &ip6h->ip6_dst, ira);
1610 1610  
1611 1611          if (nhop_ire != NULL)
1612 1612                  ire_refrele(nhop_ire);
1613 1613          ire_refrele(src_ire_v6);
1614 1614  }
1615 1615  
1616 1616  /*
1617 1617   * Generate an ICMPv6 redirect message.
1618 1618   * Include target link layer address option if it exits.
1619 1619   * Always include redirect header.
1620 1620   */
1621 1621  static void
1622 1622  icmp_send_redirect_v6(mblk_t *mp, in6_addr_t *targetp, in6_addr_t *dest,
1623 1623      ip_recv_attr_t *ira)
1624 1624  {
1625 1625          nd_redirect_t   *rd;
1626 1626          nd_opt_rd_hdr_t *rdh;
1627 1627          uchar_t         *buf;
1628 1628          ncec_t          *ncec = NULL;
1629 1629          nd_opt_hdr_t    *opt;
1630 1630          int             len;
1631 1631          int             ll_opt_len = 0;
1632 1632          int             max_redir_hdr_data_len;
1633 1633          int             pkt_len;
1634 1634          in6_addr_t      *srcp;
1635 1635          ill_t           *ill;
1636 1636          boolean_t       need_refrele;
1637 1637          ip_stack_t      *ipst = ira->ira_ill->ill_ipst;
1638 1638  
1639 1639          mp = icmp_pkt_err_ok_v6(mp, B_FALSE, ira);
1640 1640          if (mp == NULL)
1641 1641                  return;
1642 1642  
1643 1643          if (IS_UNDER_IPMP(ira->ira_ill)) {
1644 1644                  ill = ipmp_ill_hold_ipmp_ill(ira->ira_ill);
1645 1645                  if (ill == NULL) {
1646 1646                          ill = ira->ira_ill;
1647 1647                          BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects);
1648 1648                          ip_drop_output("no IPMP ill for sending redirect",
1649 1649                              mp, ill);
1650 1650                          freemsg(mp);
1651 1651                          return;
1652 1652                  }
1653 1653                  need_refrele = B_TRUE;
1654 1654          } else {
1655 1655                  ill = ira->ira_ill;
1656 1656                  need_refrele = B_FALSE;
1657 1657          }
1658 1658  
1659 1659          ncec = ncec_lookup_illgrp_v6(ill, targetp);
1660 1660          if (ncec != NULL && ncec->ncec_state != ND_INCOMPLETE &&
1661 1661              ncec->ncec_lladdr != NULL) {
1662 1662                  ll_opt_len = (sizeof (nd_opt_hdr_t) +
1663 1663                      ill->ill_phys_addr_length + 7)/8 * 8;
1664 1664          }
1665 1665          len = sizeof (nd_redirect_t) + sizeof (nd_opt_rd_hdr_t) + ll_opt_len;
1666 1666          ASSERT(len % 4 == 0);
1667 1667          buf = kmem_alloc(len, KM_NOSLEEP);
1668 1668          if (buf == NULL) {
1669 1669                  if (ncec != NULL)
1670 1670                          ncec_refrele(ncec);
1671 1671                  if (need_refrele)
1672 1672                          ill_refrele(ill);
1673 1673                  freemsg(mp);
1674 1674                  return;
1675 1675          }
1676 1676  
1677 1677          rd = (nd_redirect_t *)buf;
1678 1678          rd->nd_rd_type = (uint8_t)ND_REDIRECT;
1679 1679          rd->nd_rd_code = 0;
1680 1680          rd->nd_rd_reserved = 0;
1681 1681          rd->nd_rd_target = *targetp;
1682 1682          rd->nd_rd_dst = *dest;
1683 1683  
1684 1684          opt = (nd_opt_hdr_t *)(buf + sizeof (nd_redirect_t));
1685 1685          if (ncec != NULL && ll_opt_len != 0) {
1686 1686                  opt->nd_opt_type = ND_OPT_TARGET_LINKADDR;
1687 1687                  opt->nd_opt_len = ll_opt_len/8;
1688 1688                  bcopy((char *)ncec->ncec_lladdr, &opt[1],
1689 1689                      ill->ill_phys_addr_length);
1690 1690          }
1691 1691          if (ncec != NULL)
1692 1692                  ncec_refrele(ncec);
1693 1693          rdh = (nd_opt_rd_hdr_t *)(buf + sizeof (nd_redirect_t) + ll_opt_len);
1694 1694          rdh->nd_opt_rh_type = (uint8_t)ND_OPT_REDIRECTED_HEADER;
1695 1695          /* max_redir_hdr_data_len and nd_opt_rh_len must be multiple of 8 */
1696 1696          max_redir_hdr_data_len =
1697 1697              (ipst->ips_ipv6_icmp_return - IPV6_HDR_LEN - len)/8*8;
1698 1698          pkt_len = msgdsize(mp);
1699 1699          /* Make sure mp is 8 byte aligned */
1700 1700          if (pkt_len > max_redir_hdr_data_len) {
1701 1701                  rdh->nd_opt_rh_len = (max_redir_hdr_data_len +
1702 1702                      sizeof (nd_opt_rd_hdr_t))/8;
1703 1703                  (void) adjmsg(mp, max_redir_hdr_data_len - pkt_len);
1704 1704          } else {
1705 1705                  rdh->nd_opt_rh_len = (pkt_len + sizeof (nd_opt_rd_hdr_t))/8;
1706 1706                  (void) adjmsg(mp, -(pkt_len % 8));
1707 1707          }
1708 1708          rdh->nd_opt_rh_reserved1 = 0;
1709 1709          rdh->nd_opt_rh_reserved2 = 0;
1710 1710          /* ipif_v6lcl_addr contains the link-local source address */
1711 1711          srcp = &ill->ill_ipif->ipif_v6lcl_addr;
1712 1712  
1713 1713          /* Redirects sent by router, and router is global zone */
1714 1714          ASSERT(ira->ira_zoneid == ALL_ZONES);
1715 1715          ira->ira_zoneid = GLOBAL_ZONEID;
1716 1716          icmp_pkt_v6(mp, buf, len, srcp, ira);
1717 1717          kmem_free(buf, len);
1718 1718          if (need_refrele)
1719 1719                  ill_refrele(ill);
1720 1720  }
1721 1721  
1722 1722  
1723 1723  /* Generate an ICMP time exceeded message.  (May be called as writer.) */
1724 1724  void
1725 1725  icmp_time_exceeded_v6(mblk_t *mp, uint8_t code, boolean_t mcast_ok,
1726 1726      ip_recv_attr_t *ira)
1727 1727  {
1728 1728          icmp6_t icmp6;
1729 1729  
1730 1730          mp = icmp_pkt_err_ok_v6(mp, mcast_ok, ira);
1731 1731          if (mp == NULL)
1732 1732                  return;
1733 1733  
1734 1734          bzero(&icmp6, sizeof (icmp6_t));
1735 1735          icmp6.icmp6_type = ICMP6_TIME_EXCEEDED;
1736 1736          icmp6.icmp6_code = code;
1737 1737          icmp_pkt_v6(mp, &icmp6, sizeof (icmp6_t), NULL, ira);
1738 1738  }
1739 1739  
1740 1740  /*
1741 1741   * Generate an ICMP unreachable message.
1742 1742   * When called from ip_output side a minimal ip_recv_attr_t needs to be
1743 1743   * constructed by the caller.
1744 1744   */
1745 1745  void
1746 1746  icmp_unreachable_v6(mblk_t *mp, uint8_t code, boolean_t mcast_ok,
1747 1747      ip_recv_attr_t *ira)
1748 1748  {
1749 1749          icmp6_t icmp6;
1750 1750  
1751 1751          mp = icmp_pkt_err_ok_v6(mp, mcast_ok, ira);
1752 1752          if (mp == NULL)
1753 1753                  return;
1754 1754  
1755 1755          bzero(&icmp6, sizeof (icmp6_t));
1756 1756          icmp6.icmp6_type = ICMP6_DST_UNREACH;
1757 1757          icmp6.icmp6_code = code;
1758 1758          icmp_pkt_v6(mp, &icmp6, sizeof (icmp6_t), NULL, ira);
1759 1759  }
1760 1760  
1761 1761  /*
1762 1762   * Generate an ICMP pkt too big message.
1763 1763   * When called from ip_output side a minimal ip_recv_attr_t needs to be
1764 1764   * constructed by the caller.
1765 1765   */
1766 1766  void
1767 1767  icmp_pkt2big_v6(mblk_t *mp, uint32_t mtu, boolean_t mcast_ok,
1768 1768      ip_recv_attr_t *ira)
1769 1769  {
1770 1770          icmp6_t icmp6;
1771 1771  
1772 1772          mp = icmp_pkt_err_ok_v6(mp, mcast_ok, ira);
1773 1773          if (mp == NULL)
1774 1774                  return;
1775 1775  
1776 1776          bzero(&icmp6, sizeof (icmp6_t));
1777 1777          icmp6.icmp6_type = ICMP6_PACKET_TOO_BIG;
1778 1778          icmp6.icmp6_code = 0;
1779 1779          icmp6.icmp6_mtu = htonl(mtu);
1780 1780  
1781 1781          icmp_pkt_v6(mp, &icmp6, sizeof (icmp6_t), NULL, ira);
1782 1782  }
1783 1783  
1784 1784  /*
1785 1785   * Generate an ICMP parameter problem message. (May be called as writer.)
1786 1786   * 'offset' is the offset from the beginning of the packet in error.
1787 1787   * When called from ip_output side a minimal ip_recv_attr_t needs to be
1788 1788   * constructed by the caller.
1789 1789   */
1790 1790  static void
1791 1791  icmp_param_problem_v6(mblk_t *mp, uint8_t code, uint32_t offset,
1792 1792      boolean_t mcast_ok, ip_recv_attr_t *ira)
1793 1793  {
1794 1794          icmp6_t icmp6;
1795 1795  
1796 1796          mp = icmp_pkt_err_ok_v6(mp, mcast_ok, ira);
1797 1797          if (mp == NULL)
1798 1798                  return;
1799 1799  
1800 1800          bzero((char *)&icmp6, sizeof (icmp6_t));
1801 1801          icmp6.icmp6_type = ICMP6_PARAM_PROB;
1802 1802          icmp6.icmp6_code = code;
1803 1803          icmp6.icmp6_pptr = htonl(offset);
1804 1804          icmp_pkt_v6(mp, &icmp6, sizeof (icmp6_t), NULL, ira);
1805 1805  }
1806 1806  
1807 1807  void
1808 1808  icmp_param_problem_nexthdr_v6(mblk_t *mp, boolean_t mcast_ok,
1809 1809      ip_recv_attr_t *ira)
1810 1810  {
1811 1811          ip6_t           *ip6h = (ip6_t *)mp->b_rptr;
1812 1812          uint16_t        hdr_length;
1813 1813          uint8_t         *nexthdrp;
1814 1814          uint32_t        offset;
1815 1815          ill_t           *ill = ira->ira_ill;
1816 1816  
1817 1817          /* Determine the offset of the bad nexthdr value */
1818 1818          if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &hdr_length, &nexthdrp)) {
1819 1819                  /* Malformed packet */
1820 1820                  BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
1821 1821                  ip_drop_input("ipIfStatsInDiscards", mp, ill);
1822 1822                  freemsg(mp);
1823 1823                  return;
1824 1824          }
1825 1825  
1826 1826          offset = nexthdrp - mp->b_rptr;
1827 1827          icmp_param_problem_v6(mp, ICMP6_PARAMPROB_NEXTHEADER, offset,
1828 1828              mcast_ok, ira);
1829 1829  }
1830 1830  
1831 1831  /*
1832 1832   * Verify whether or not the IP address is a valid local address.
1833 1833   * Could be a unicast, including one for a down interface.
1834 1834   * If allow_mcbc then a multicast or broadcast address is also
1835 1835   * acceptable.
1836 1836   *
1837 1837   * In the case of a multicast address, however, the
1838 1838   * upper protocol is expected to reset the src address
1839 1839   * to zero when we return IPVL_MCAST so that
1840 1840   * no packets are emitted with multicast address as
1841 1841   * source address.
1842 1842   * The addresses valid for bind are:
1843 1843   *      (1) - in6addr_any
1844 1844   *      (2) - IP address of an UP interface
1845 1845   *      (3) - IP address of a DOWN interface
1846 1846   *      (4) - a multicast address. In this case
1847 1847   *      the conn will only receive packets destined to
1848 1848   *      the specified multicast address. Note: the
1849 1849   *      application still has to issue an
1850 1850   *      IPV6_JOIN_GROUP socket option.
1851 1851   *
1852 1852   * In all the above cases, the bound address must be valid in the current zone.
1853 1853   * When the address is loopback or multicast, there might be many matching IREs
1854 1854   * so bind has to look up based on the zone.
1855 1855   */
1856 1856  ip_laddr_t
1857 1857  ip_laddr_verify_v6(const in6_addr_t *v6src, zoneid_t zoneid,
1858 1858      ip_stack_t *ipst, boolean_t allow_mcbc, uint_t scopeid)
1859 1859  {
1860 1860          ire_t           *src_ire;
1861 1861          uint_t          match_flags;
1862 1862          ill_t           *ill = NULL;
1863 1863  
1864 1864          ASSERT(!IN6_IS_ADDR_V4MAPPED(v6src));
1865 1865          ASSERT(!IN6_IS_ADDR_UNSPECIFIED(v6src));
1866 1866  
1867 1867          match_flags = MATCH_IRE_ZONEONLY;
1868 1868          if (scopeid != 0) {
1869 1869                  ill = ill_lookup_on_ifindex(scopeid, B_TRUE, ipst);
1870 1870                  if (ill == NULL)
1871 1871                          return (IPVL_BAD);
1872 1872                  match_flags |= MATCH_IRE_ILL;
1873 1873          }
1874 1874  
1875 1875          src_ire = ire_ftable_lookup_v6(v6src, NULL, NULL, 0,
1876 1876              ill, zoneid, NULL, match_flags, 0, ipst, NULL);
1877 1877          if (ill != NULL)
1878 1878                  ill_refrele(ill);
1879 1879  
1880 1880          /*
1881 1881           * If an address other than in6addr_any is requested,
1882 1882           * we verify that it is a valid address for bind
1883 1883           * Note: Following code is in if-else-if form for
1884 1884           * readability compared to a condition check.
1885 1885           */
1886 1886          if (src_ire != NULL && (src_ire->ire_type & (IRE_LOCAL|IRE_LOOPBACK))) {
1887 1887                  /*
1888 1888                   * (2) Bind to address of local UP interface
1889 1889                   */
1890 1890                  ire_refrele(src_ire);
1891 1891                  return (IPVL_UNICAST_UP);
1892 1892          } else if (IN6_IS_ADDR_MULTICAST(v6src)) {
1893 1893                  /* (4) bind to multicast address. */
1894 1894                  if (src_ire != NULL)
1895 1895                          ire_refrele(src_ire);
1896 1896  
1897 1897                  /*
1898 1898                   * Note: caller should take IPV6_MULTICAST_IF
1899 1899                   * into account when selecting a real source address.
1900 1900                   */
1901 1901                  if (allow_mcbc)
1902 1902                          return (IPVL_MCAST);
1903 1903                  else
1904 1904                          return (IPVL_BAD);
1905 1905          } else {
1906 1906                  ipif_t *ipif;
1907 1907  
1908 1908                  /*
1909 1909                   * (3) Bind to address of local DOWN interface?
1910 1910                   * (ipif_lookup_addr() looks up all interfaces
1911 1911                   * but we do not get here for UP interfaces
1912 1912                   * - case (2) above)
1913 1913                   */
1914 1914                  if (src_ire != NULL)
1915 1915                          ire_refrele(src_ire);
1916 1916  
1917 1917                  ipif = ipif_lookup_addr_v6(v6src, NULL, zoneid, ipst);
1918 1918                  if (ipif == NULL)
1919 1919                          return (IPVL_BAD);
1920 1920  
1921 1921                  /* Not a useful source? */
1922 1922                  if (ipif->ipif_flags & (IPIF_NOLOCAL | IPIF_ANYCAST)) {
1923 1923                          ipif_refrele(ipif);
1924 1924                          return (IPVL_BAD);
1925 1925                  }
1926 1926                  ipif_refrele(ipif);
1927 1927                  return (IPVL_UNICAST_DOWN);
1928 1928          }
1929 1929  }
1930 1930  
1931 1931  /*
1932 1932   * Verify that both the source and destination addresses are valid.  If
1933 1933   * IPDF_VERIFY_DST is not set, then the destination address may be unreachable,
1934 1934   * i.e. have no route to it.  Protocols like TCP want to verify destination
1935 1935   * reachability, while tunnels do not.
1936 1936   *
1937 1937   * Determine the route, the interface, and (optionally) the source address
1938 1938   * to use to reach a given destination.
1939 1939   * Note that we allow connect to broadcast and multicast addresses when
1940 1940   * IPDF_ALLOW_MCBC is set.
1941 1941   * first_hop and dst_addr are normally the same, but if source routing
1942 1942   * they will differ; in that case the first_hop is what we'll use for the
1943 1943   * routing lookup but the dce and label checks will be done on dst_addr,
1944 1944   *
1945 1945   * If uinfo is set, then we fill in the best available information
1946 1946   * we have for the destination. This is based on (in priority order) any
1947 1947   * metrics and path MTU stored in a dce_t, route metrics, and finally the
1948 1948   * ill_mtu/ill_mc_mtu.
1949 1949   *
1950 1950   * Tsol note: If we have a source route then dst_addr != firsthop. But we
1951 1951   * always do the label check on dst_addr.
1952 1952   *
1953 1953   * Assumes that the caller has set ixa_scopeid for link-local communication.
1954 1954   */
1955 1955  int
1956 1956  ip_set_destination_v6(in6_addr_t *src_addrp, const in6_addr_t *dst_addr,
1957 1957      const in6_addr_t *firsthop, ip_xmit_attr_t *ixa, iulp_t *uinfo,
1958 1958      uint32_t flags, uint_t mac_mode)
1959 1959  {
1960 1960          ire_t           *ire;
1961 1961          int             error = 0;
1962 1962          in6_addr_t      setsrc;                         /* RTF_SETSRC */
1963 1963          zoneid_t        zoneid = ixa->ixa_zoneid;       /* Honors SO_ALLZONES */
1964 1964          ip_stack_t      *ipst = ixa->ixa_ipst;
1965 1965          dce_t           *dce;
1966 1966          uint_t          pmtu;
1967 1967          uint_t          ifindex;
1968 1968          uint_t          generation;
1969 1969          nce_t           *nce;
1970 1970          ill_t           *ill = NULL;
1971 1971          boolean_t       multirt = B_FALSE;
1972 1972  
1973 1973          ASSERT(!IN6_IS_ADDR_V4MAPPED(dst_addr));
1974 1974  
1975 1975          ASSERT(!(ixa->ixa_flags & IXAF_IS_IPV4));
1976 1976  
1977 1977          /*
1978 1978           * We never send to zero; the ULPs map it to the loopback address.
1979 1979           * We can't allow it since we use zero to mean unitialized in some
1980 1980           * places.
1981 1981           */
1982 1982          ASSERT(!IN6_IS_ADDR_UNSPECIFIED(dst_addr));
1983 1983  
1984 1984          if (is_system_labeled()) {
1985 1985                  ts_label_t *tsl = NULL;
1986 1986  
1987 1987                  error = tsol_check_dest(ixa->ixa_tsl, dst_addr, IPV6_VERSION,
1988 1988                      mac_mode, (flags & IPDF_ZONE_IS_GLOBAL) != 0, &tsl);
1989 1989                  if (error != 0)
1990 1990                          return (error);
1991 1991                  if (tsl != NULL) {
1992 1992                          /* Update the label */
1993 1993                          ip_xmit_attr_replace_tsl(ixa, tsl);
1994 1994                  }
1995 1995          }
1996 1996  
1997 1997          setsrc = ipv6_all_zeros;
1998 1998          /*
1999 1999           * Select a route; For IPMP interfaces, we would only select
2000 2000           * a "hidden" route (i.e., going through a specific under_ill)
2001 2001           * if ixa_ifindex has been specified.
2002 2002           */
2003 2003          ire = ip_select_route_v6(firsthop, *src_addrp, ixa, &generation,
2004 2004              &setsrc, &error, &multirt);
2005 2005          ASSERT(ire != NULL);    /* IRE_NOROUTE if none found */
2006 2006          if (error != 0)
2007 2007                  goto bad_addr;
2008 2008  
2009 2009          /*
2010 2010           * ire can't be a broadcast or multicast unless IPDF_ALLOW_MCBC is set.
2011 2011           * If IPDF_VERIFY_DST is set, the destination must be reachable.
2012 2012           * Otherwise the destination needn't be reachable.
2013 2013           *
2014 2014           * If we match on a reject or black hole, then we've got a
2015 2015           * local failure.  May as well fail out the connect() attempt,
2016 2016           * since it's never going to succeed.
2017 2017           */
2018 2018          if (ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE)) {
2019 2019                  /*
2020 2020                   * If we're verifying destination reachability, we always want
2021 2021                   * to complain here.
2022 2022                   *
2023 2023                   * If we're not verifying destination reachability but the
2024 2024                   * destination has a route, we still want to fail on the
2025 2025                   * temporary address and broadcast address tests.
2026 2026                   *
2027 2027                   * In both cases do we let the code continue so some reasonable
2028 2028                   * information is returned to the caller. That enables the
2029 2029                   * caller to use (and even cache) the IRE. conn_ip_ouput will
2030 2030                   * use the generation mismatch path to check for the unreachable
2031 2031                   * case thereby avoiding any specific check in the main path.
2032 2032                   */
2033 2033                  ASSERT(generation == IRE_GENERATION_VERIFY);
2034 2034                  if (flags & IPDF_VERIFY_DST) {
2035 2035                          /*
2036 2036                           * Set errno but continue to set up ixa_ire to be
2037 2037                           * the RTF_REJECT|RTF_BLACKHOLE IRE.
2038 2038                           * That allows callers to use ip_output to get an
2039 2039                           * ICMP error back.
2040 2040                           */
2041 2041                          if (!(ire->ire_type & IRE_HOST))
2042 2042                                  error = ENETUNREACH;
2043 2043                          else
2044 2044                                  error = EHOSTUNREACH;
2045 2045                  }
2046 2046          }
2047 2047  
2048 2048          if ((ire->ire_type & (IRE_BROADCAST|IRE_MULTICAST)) &&
2049 2049              !(flags & IPDF_ALLOW_MCBC)) {
2050 2050                  ire_refrele(ire);
2051 2051                  ire = ire_reject(ipst, B_FALSE);
2052 2052                  generation = IRE_GENERATION_VERIFY;
2053 2053                  error = ENETUNREACH;
2054 2054          }
2055 2055  
2056 2056          /* Cache things */
2057 2057          if (ixa->ixa_ire != NULL)
2058 2058                  ire_refrele_notr(ixa->ixa_ire);
2059 2059  #ifdef DEBUG
2060 2060          ire_refhold_notr(ire);
2061 2061          ire_refrele(ire);
2062 2062  #endif
2063 2063          ixa->ixa_ire = ire;
2064 2064          ixa->ixa_ire_generation = generation;
2065 2065  
2066 2066          /*
2067 2067           * Ensure that ixa_dce is always set any time that ixa_ire is set,
2068 2068           * since some callers will send a packet to conn_ip_output() even if
2069 2069           * there's an error.
2070 2070           */
2071 2071          ifindex = 0;
2072 2072          if (IN6_IS_ADDR_LINKSCOPE(dst_addr)) {
2073 2073                  /* If we are creating a DCE we'd better have an ifindex */
2074 2074                  if (ill != NULL)
2075 2075                          ifindex = ill->ill_phyint->phyint_ifindex;
2076 2076                  else
2077 2077                          flags &= ~IPDF_UNIQUE_DCE;
2078 2078          }
2079 2079  
2080 2080          if (flags & IPDF_UNIQUE_DCE) {
2081 2081                  /* Fallback to the default dce if allocation fails */
2082 2082                  dce = dce_lookup_and_add_v6(dst_addr, ifindex, ipst);
2083 2083                  if (dce != NULL) {
2084 2084                          generation = dce->dce_generation;
2085 2085                  } else {
2086 2086                          dce = dce_lookup_v6(dst_addr, ifindex, ipst,
2087 2087                              &generation);
2088 2088                  }
2089 2089          } else {
2090 2090                  dce = dce_lookup_v6(dst_addr, ifindex, ipst, &generation);
2091 2091          }
2092 2092          ASSERT(dce != NULL);
2093 2093          if (ixa->ixa_dce != NULL)
2094 2094                  dce_refrele_notr(ixa->ixa_dce);
2095 2095  #ifdef DEBUG
2096 2096          dce_refhold_notr(dce);
2097 2097          dce_refrele(dce);
2098 2098  #endif
2099 2099          ixa->ixa_dce = dce;
2100 2100          ixa->ixa_dce_generation = generation;
2101 2101  
2102 2102  
2103 2103          /*
2104 2104           * For multicast with multirt we have a flag passed back from
2105 2105           * ire_lookup_multi_ill_v6 since we don't have an IRE for each
2106 2106           * possible multicast address.
2107 2107           * We also need a flag for multicast since we can't check
2108 2108           * whether RTF_MULTIRT is set in ixa_ire for multicast.
2109 2109           */
2110 2110          if (multirt) {
2111 2111                  ixa->ixa_postfragfn = ip_postfrag_multirt_v6;
2112 2112                  ixa->ixa_flags |= IXAF_MULTIRT_MULTICAST;
2113 2113          } else {
2114 2114                  ixa->ixa_postfragfn = ire->ire_postfragfn;
2115 2115                  ixa->ixa_flags &= ~IXAF_MULTIRT_MULTICAST;
2116 2116          }
2117 2117          if (!(ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE))) {
2118 2118                  /* Get an nce to cache. */
2119 2119                  nce = ire_to_nce(ire, NULL, firsthop);
2120 2120                  if (nce == NULL) {
2121 2121                          /* Allocation failure? */
2122 2122                          ixa->ixa_ire_generation = IRE_GENERATION_VERIFY;
2123 2123                  } else {
2124 2124                          if (ixa->ixa_nce != NULL)
2125 2125                                  nce_refrele(ixa->ixa_nce);
2126 2126                          ixa->ixa_nce = nce;
2127 2127                  }
2128 2128          }
2129 2129  
2130 2130          /*
2131 2131           * If the source address is a loopback address, the
2132 2132           * destination had best be local or multicast.
2133 2133           * If we are sending to an IRE_LOCAL using a loopback source then
2134 2134           * it had better be the same zoneid.
2135 2135           */
2136 2136          if (IN6_IS_ADDR_LOOPBACK(src_addrp)) {
2137 2137                  if ((ire->ire_type & IRE_LOCAL) && ire->ire_zoneid != zoneid) {
2138 2138                          ire = NULL;     /* Stored in ixa_ire */
2139 2139                          error = EADDRNOTAVAIL;
2140 2140                          goto bad_addr;
2141 2141                  }
2142 2142                  if (!(ire->ire_type & (IRE_LOOPBACK|IRE_LOCAL|IRE_MULTICAST))) {
2143 2143                          ire = NULL;     /* Stored in ixa_ire */
2144 2144                          error = EADDRNOTAVAIL;
2145 2145                          goto bad_addr;
2146 2146                  }
2147 2147          }
2148 2148  
2149 2149          /*
2150 2150           * Does the caller want us to pick a source address?
2151 2151           */
2152 2152          if (flags & IPDF_SELECT_SRC) {
2153 2153                  in6_addr_t      src_addr;
2154 2154  
2155 2155                  /*
2156 2156                   * We use use ire_nexthop_ill to avoid the under ipmp
2157 2157                   * interface for source address selection. Note that for ipmp
2158 2158                   * probe packets, ixa_ifindex would have been specified, and
2159 2159                   * the ip_select_route() invocation would have picked an ire
2160 2160                   * will ire_ill pointing at an under interface.
2161 2161                   */
2162 2162                  ill = ire_nexthop_ill(ire);
2163 2163  
2164 2164                  /* If unreachable we have no ill but need some source */
2165 2165                  if (ill == NULL) {
2166 2166                          src_addr = ipv6_loopback;
2167 2167                          /* Make sure we look for a better source address */
2168 2168                          generation = SRC_GENERATION_VERIFY;
2169 2169                  } else {
2170 2170                          error = ip_select_source_v6(ill, &setsrc, dst_addr,
2171 2171                              zoneid, ipst, B_FALSE, ixa->ixa_src_preferences,
2172 2172                              &src_addr, &generation, NULL);
2173 2173                          if (error != 0) {
2174 2174                                  ire = NULL;     /* Stored in ixa_ire */
2175 2175                                  goto bad_addr;
2176 2176                          }
2177 2177                  }
2178 2178  
2179 2179                  /*
2180 2180                   * We allow the source address to to down.
2181 2181                   * However, we check that we don't use the loopback address
2182 2182                   * as a source when sending out on the wire.
2183 2183                   */
2184 2184                  if (IN6_IS_ADDR_LOOPBACK(&src_addr) &&
2185 2185                      !(ire->ire_type & (IRE_LOCAL|IRE_LOOPBACK|IRE_MULTICAST)) &&
2186 2186                      !(ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE))) {
2187 2187                          ire = NULL;     /* Stored in ixa_ire */
2188 2188                          error = EADDRNOTAVAIL;
2189 2189                          goto bad_addr;
2190 2190                  }
2191 2191  
2192 2192                  *src_addrp = src_addr;
2193 2193                  ixa->ixa_src_generation = generation;
2194 2194          }
2195 2195  
2196 2196          /*
2197 2197           * Make sure we don't leave an unreachable ixa_nce in place
2198 2198           * since ip_select_route is used when we unplumb i.e., remove
2199 2199           * references on ixa_ire, ixa_nce, and ixa_dce.
2200 2200           */
2201 2201          nce = ixa->ixa_nce;
2202 2202          if (nce != NULL && nce->nce_is_condemned) {
2203 2203                  nce_refrele(nce);
2204 2204                  ixa->ixa_nce = NULL;
2205 2205                  ixa->ixa_ire_generation = IRE_GENERATION_VERIFY;
2206 2206          }
2207 2207  
2208 2208          /*
2209 2209           * Note that IPv6 multicast supports PMTU discovery unlike IPv4
2210 2210           * multicast. But pmtu discovery is only enabled for connected
2211 2211           * sockets in general.
2212 2212           */
2213 2213  
2214 2214          /*
2215 2215           * Set initial value for fragmentation limit.  Either conn_ip_output
2216 2216           * or ULP might updates it when there are routing changes.
2217 2217           * Handles a NULL ixa_ire->ire_ill or a NULL ixa_nce for RTF_REJECT.
2218 2218           */
2219 2219          pmtu = ip_get_pmtu(ixa);
2220 2220          ixa->ixa_fragsize = pmtu;
2221 2221          /* Make sure ixa_fragsize and ixa_pmtu remain identical */
2222 2222          if (ixa->ixa_flags & IXAF_VERIFY_PMTU)
2223 2223                  ixa->ixa_pmtu = pmtu;
2224 2224  
2225 2225          /*
2226 2226           * Extract information useful for some transports.
2227 2227           * First we look for DCE metrics. Then we take what we have in
2228 2228           * the metrics in the route, where the offlink is used if we have
2229 2229           * one.
2230 2230           */
2231 2231          if (uinfo != NULL) {
2232 2232                  bzero(uinfo, sizeof (*uinfo));
2233 2233  
2234 2234                  if (dce->dce_flags & DCEF_UINFO)
2235 2235                          *uinfo = dce->dce_uinfo;
2236 2236  
2237 2237                  rts_merge_metrics(uinfo, &ire->ire_metrics);
2238 2238  
2239 2239                  /* Allow ire_metrics to decrease the path MTU from above */
2240 2240                  if (uinfo->iulp_mtu == 0 || uinfo->iulp_mtu > pmtu)
2241 2241                          uinfo->iulp_mtu = pmtu;
2242 2242  
2243 2243                  uinfo->iulp_localnet = (ire->ire_type & IRE_ONLINK) != 0;
2244 2244                  uinfo->iulp_loopback = (ire->ire_type & IRE_LOOPBACK) != 0;
2245 2245                  uinfo->iulp_local = (ire->ire_type & IRE_LOCAL) != 0;
2246 2246          }
2247 2247  
2248 2248          if (ill != NULL)
2249 2249                  ill_refrele(ill);
2250 2250  
2251 2251          return (error);
2252 2252  
2253 2253  bad_addr:
2254 2254          if (ire != NULL)
2255 2255                  ire_refrele(ire);
2256 2256  
2257 2257          if (ill != NULL)
2258 2258                  ill_refrele(ill);
2259 2259  
2260 2260          /*
2261 2261           * Make sure we don't leave an unreachable ixa_nce in place
2262 2262           * since ip_select_route is used when we unplumb i.e., remove
2263 2263           * references on ixa_ire, ixa_nce, and ixa_dce.
2264 2264           */
2265 2265          nce = ixa->ixa_nce;
2266 2266          if (nce != NULL && nce->nce_is_condemned) {
2267 2267                  nce_refrele(nce);
2268 2268                  ixa->ixa_nce = NULL;
2269 2269                  ixa->ixa_ire_generation = IRE_GENERATION_VERIFY;
2270 2270          }
2271 2271  
2272 2272          return (error);
2273 2273  }
2274 2274  
2275 2275  /*
2276 2276   * Handle protocols with which IP is less intimate.  There
2277 2277   * can be more than one stream bound to a particular
2278 2278   * protocol.  When this is the case, normally each one gets a copy
2279 2279   * of any incoming packets.
2280 2280   *
2281 2281   * Zones notes:
2282 2282   * Packets will be distributed to conns in all zones. This is really only
2283 2283   * useful for ICMPv6 as only applications in the global zone can create raw
2284 2284   * sockets for other protocols.
2285 2285   */
2286 2286  void
2287 2287  ip_fanout_proto_v6(mblk_t *mp, ip6_t *ip6h, ip_recv_attr_t *ira)
2288 2288  {
2289 2289          mblk_t          *mp1;
2290 2290          in6_addr_t      laddr = ip6h->ip6_dst;
2291 2291          conn_t          *connp, *first_connp, *next_connp;
2292 2292          connf_t         *connfp;
2293 2293          ill_t           *ill = ira->ira_ill;
2294 2294          ip_stack_t      *ipst = ill->ill_ipst;
2295 2295  
2296 2296          connfp = &ipst->ips_ipcl_proto_fanout_v6[ira->ira_protocol];
2297 2297          mutex_enter(&connfp->connf_lock);
2298 2298          connp = connfp->connf_head;
2299 2299          for (connp = connfp->connf_head; connp != NULL;
2300 2300              connp = connp->conn_next) {
2301 2301                  /* Note: IPCL_PROTO_MATCH_V6 includes conn_wantpacket */
2302 2302                  if (IPCL_PROTO_MATCH_V6(connp, ira, ip6h) &&
2303 2303                      (!(ira->ira_flags & IRAF_SYSTEM_LABELED) ||
2304 2304                      tsol_receive_local(mp, &laddr, IPV6_VERSION, ira, connp)))
2305 2305                          break;
2306 2306          }
2307 2307  
2308 2308          if (connp == NULL) {
2309 2309                  /*
2310 2310                   * No one bound to this port.  Is
2311 2311                   * there a client that wants all
2312 2312                   * unclaimed datagrams?
2313 2313                   */
2314 2314                  mutex_exit(&connfp->connf_lock);
2315 2315                  ip_fanout_send_icmp_v6(mp, ICMP6_PARAM_PROB,
2316 2316                      ICMP6_PARAMPROB_NEXTHEADER, ira);
2317 2317                  return;
2318 2318          }
2319 2319  
2320 2320          ASSERT(IPCL_IS_NONSTR(connp) || connp->conn_rq != NULL);
2321 2321  
2322 2322          CONN_INC_REF(connp);
2323 2323          first_connp = connp;
2324 2324  
2325 2325          /*
2326 2326           * XXX: Fix the multiple protocol listeners case. We should not
2327 2327           * be walking the conn->conn_next list here.
2328 2328           */
2329 2329          connp = connp->conn_next;
2330 2330          for (;;) {
2331 2331                  while (connp != NULL) {
2332 2332                          /* Note: IPCL_PROTO_MATCH_V6 includes conn_wantpacket */
2333 2333                          if (IPCL_PROTO_MATCH_V6(connp, ira, ip6h) &&
2334 2334                              (!(ira->ira_flags & IRAF_SYSTEM_LABELED) ||
2335 2335                              tsol_receive_local(mp, &laddr, IPV6_VERSION,
2336 2336                              ira, connp)))
2337 2337                                  break;
2338 2338                          connp = connp->conn_next;
2339 2339                  }
2340 2340  
2341 2341                  if (connp == NULL) {
2342 2342                          /* No more interested clients */
2343 2343                          connp = first_connp;
2344 2344                          break;
2345 2345                  }
2346 2346                  if (((mp1 = dupmsg(mp)) == NULL) &&
2347 2347                      ((mp1 = copymsg(mp)) == NULL)) {
2348 2348                          /* Memory allocation failed */
2349 2349                          BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
2350 2350                          ip_drop_input("ipIfStatsInDiscards", mp, ill);
2351 2351                          connp = first_connp;
2352 2352                          break;
2353 2353                  }
2354 2354  
2355 2355                  CONN_INC_REF(connp);
2356 2356                  mutex_exit(&connfp->connf_lock);
2357 2357  
2358 2358                  ip_fanout_proto_conn(connp, mp1, NULL, (ip6_t *)mp1->b_rptr,
2359 2359                      ira);
2360 2360  
2361 2361                  mutex_enter(&connfp->connf_lock);
2362 2362                  /* Follow the next pointer before releasing the conn. */
2363 2363                  next_connp = connp->conn_next;
2364 2364                  CONN_DEC_REF(connp);
2365 2365                  connp = next_connp;
2366 2366          }
2367 2367  
2368 2368          /* Last one.  Send it upstream. */
2369 2369          mutex_exit(&connfp->connf_lock);
2370 2370  
2371 2371          ip_fanout_proto_conn(connp, mp, NULL, ip6h, ira);
2372 2372  
2373 2373          CONN_DEC_REF(connp);
2374 2374  }
2375 2375  
2376 2376  /*
2377 2377   * Called when it is conceptually a ULP that would sent the packet
2378 2378   * e.g., port unreachable and nexthdr unknown. Check that the packet
2379 2379   * would have passed the IPsec global policy before sending the error.
2380 2380   *
2381 2381   * Send an ICMP error after patching up the packet appropriately.
2382 2382   * Uses ip_drop_input and bumps the appropriate MIB.
2383 2383   * For ICMP6_PARAMPROB_NEXTHEADER we determine the offset to use.
2384 2384   */
2385 2385  void
2386 2386  ip_fanout_send_icmp_v6(mblk_t *mp, uint_t icmp_type, uint8_t icmp_code,
2387 2387      ip_recv_attr_t *ira)
2388 2388  {
2389 2389          ip6_t           *ip6h;
2390 2390          boolean_t       secure;
2391 2391          ill_t           *ill = ira->ira_ill;
2392 2392          ip_stack_t      *ipst = ill->ill_ipst;
2393 2393          netstack_t      *ns = ipst->ips_netstack;
2394 2394          ipsec_stack_t   *ipss = ns->netstack_ipsec;
2395 2395  
2396 2396          secure = ira->ira_flags & IRAF_IPSEC_SECURE;
2397 2397  
2398 2398          /*
2399 2399           * We are generating an icmp error for some inbound packet.
2400 2400           * Called from all ip_fanout_(udp, tcp, proto) functions.
2401 2401           * Before we generate an error, check with global policy
2402 2402           * to see whether this is allowed to enter the system. As
2403 2403           * there is no "conn", we are checking with global policy.
2404 2404           */
2405 2405          ip6h = (ip6_t *)mp->b_rptr;
2406 2406          if (secure || ipss->ipsec_inbound_v6_policy_present) {
2407 2407                  mp = ipsec_check_global_policy(mp, NULL, NULL, ip6h, ira, ns);
2408 2408                  if (mp == NULL)
2409 2409                          return;
2410 2410          }
2411 2411  
2412 2412          /* We never send errors for protocols that we do implement */
2413 2413          if (ira->ira_protocol == IPPROTO_ICMPV6) {
2414 2414                  BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
2415 2415                  ip_drop_input("ip_fanout_send_icmp_v6", mp, ill);
2416 2416                  freemsg(mp);
2417 2417                  return;
2418 2418          }
2419 2419  
2420 2420          switch (icmp_type) {
2421 2421          case ICMP6_DST_UNREACH:
2422 2422                  ASSERT(icmp_code == ICMP6_DST_UNREACH_NOPORT);
2423 2423  
2424 2424                  BUMP_MIB(ill->ill_ip_mib, udpIfStatsNoPorts);
2425 2425                  ip_drop_input("ipIfStatsNoPorts", mp, ill);
2426 2426  
2427 2427                  icmp_unreachable_v6(mp, icmp_code, B_FALSE, ira);
2428 2428                  break;
2429 2429          case ICMP6_PARAM_PROB:
2430 2430                  ASSERT(icmp_code == ICMP6_PARAMPROB_NEXTHEADER);
2431 2431  
2432 2432                  BUMP_MIB(ill->ill_ip_mib, ipIfStatsInUnknownProtos);
2433 2433                  ip_drop_input("ipIfStatsInUnknownProtos", mp, ill);
2434 2434  
2435 2435                  /* Let the system determine the offset for this one */
2436 2436                  icmp_param_problem_nexthdr_v6(mp, B_FALSE, ira);
2437 2437                  break;
2438 2438          default:
2439 2439  #ifdef DEBUG
2440 2440                  panic("ip_fanout_send_icmp_v6: wrong type");
2441 2441                  /*NOTREACHED*/
2442 2442  #else
2443 2443                  freemsg(mp);
2444 2444                  break;
2445 2445  #endif
2446 2446          }
2447 2447  }
2448 2448  
2449 2449  /*
2450 2450   * Fanout for UDP packets that are multicast or ICMP errors.
2451 2451   * (Unicast fanout is handled in ip_input_v6.)
2452 2452   *
2453 2453   * If SO_REUSEADDR is set all multicast packets
2454 2454   * will be delivered to all conns bound to the same port.
2455 2455   *
2456 2456   * Fanout for UDP packets.
2457 2457   * The caller puts <fport, lport> in the ports parameter.
2458 2458   * ire_type must be IRE_BROADCAST for multicast and broadcast packets.
2459 2459   *
2460 2460   * If SO_REUSEADDR is set all multicast and broadcast packets
2461 2461   * will be delivered to all conns bound to the same port.
2462 2462   *
2463 2463   * Zones notes:
2464 2464   * Earlier in ip_input on a system with multiple shared-IP zones we
2465 2465   * duplicate the multicast and broadcast packets and send them up
2466 2466   * with each explicit zoneid that exists on that ill.
2467 2467   * This means that here we can match the zoneid with SO_ALLZONES being special.
2468 2468   */
2469 2469  void
2470 2470  ip_fanout_udp_multi_v6(mblk_t *mp, ip6_t *ip6h, uint16_t lport, uint16_t fport,
2471 2471      ip_recv_attr_t *ira)
2472 2472  {
2473 2473          in6_addr_t      laddr;
2474 2474          conn_t          *connp;
2475 2475          connf_t         *connfp;
2476 2476          in6_addr_t      faddr;
2477 2477          ill_t           *ill = ira->ira_ill;
2478 2478          ip_stack_t      *ipst = ill->ill_ipst;
2479 2479  
2480 2480          ASSERT(ira->ira_flags & (IRAF_MULTIBROADCAST|IRAF_ICMP_ERROR));
2481 2481  
2482 2482          laddr = ip6h->ip6_dst;
2483 2483          faddr = ip6h->ip6_src;
2484 2484  
2485 2485          /* Attempt to find a client stream based on destination port. */
2486 2486          connfp = &ipst->ips_ipcl_udp_fanout[IPCL_UDP_HASH(lport, ipst)];
2487 2487          mutex_enter(&connfp->connf_lock);
2488 2488          connp = connfp->connf_head;
2489 2489          while (connp != NULL) {
2490 2490                  if ((IPCL_UDP_MATCH_V6(connp, lport, laddr, fport, faddr)) &&
2491 2491                      conn_wantpacket_v6(connp, ira, ip6h) &&
2492 2492                      (!(ira->ira_flags & IRAF_SYSTEM_LABELED) ||
2493 2493                      tsol_receive_local(mp, &laddr, IPV6_VERSION, ira, connp)))
2494 2494                          break;
2495 2495                  connp = connp->conn_next;
2496 2496          }
2497 2497  
2498 2498          if (connp == NULL)
2499 2499                  goto notfound;
2500 2500  
2501 2501          CONN_INC_REF(connp);
2502 2502  
2503 2503          if (connp->conn_reuseaddr) {
2504 2504                  conn_t          *first_connp = connp;
2505 2505                  conn_t          *next_connp;
2506 2506                  mblk_t          *mp1;
2507 2507  
2508 2508                  connp = connp->conn_next;
2509 2509                  for (;;) {
2510 2510                          while (connp != NULL) {
2511 2511                                  if (IPCL_UDP_MATCH_V6(connp, lport, laddr,
2512 2512                                      fport, faddr) &&
2513 2513                                      conn_wantpacket_v6(connp, ira, ip6h) &&
2514 2514                                      (!(ira->ira_flags & IRAF_SYSTEM_LABELED) ||
2515 2515                                      tsol_receive_local(mp, &laddr, IPV6_VERSION,
2516 2516                                      ira, connp)))
2517 2517                                          break;
2518 2518                                  connp = connp->conn_next;
2519 2519                          }
2520 2520                          if (connp == NULL) {
2521 2521                                  /* No more interested clients */
2522 2522                                  connp = first_connp;
2523 2523                                  break;
2524 2524                          }
2525 2525                          if (((mp1 = dupmsg(mp)) == NULL) &&
2526 2526                              ((mp1 = copymsg(mp)) == NULL)) {
2527 2527                                  /* Memory allocation failed */
2528 2528                                  BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
2529 2529                                  ip_drop_input("ipIfStatsInDiscards", mp, ill);
2530 2530                                  connp = first_connp;
2531 2531                                  break;
2532 2532                          }
2533 2533  
2534 2534                          CONN_INC_REF(connp);
2535 2535                          mutex_exit(&connfp->connf_lock);
2536 2536  
2537 2537                          IP6_STAT(ipst, ip6_udp_fanmb);
2538 2538                          ip_fanout_udp_conn(connp, mp1, NULL,
2539 2539                              (ip6_t *)mp1->b_rptr, ira);
2540 2540  
2541 2541                          mutex_enter(&connfp->connf_lock);
2542 2542                          /* Follow the next pointer before releasing the conn. */
2543 2543                          next_connp = connp->conn_next;
2544 2544                          IP6_STAT(ipst, ip6_udp_fanmb);
2545 2545                          CONN_DEC_REF(connp);
2546 2546                          connp = next_connp;
2547 2547                  }
2548 2548          }
2549 2549  
2550 2550          /* Last one.  Send it upstream. */
2551 2551          mutex_exit(&connfp->connf_lock);
2552 2552  
2553 2553          IP6_STAT(ipst, ip6_udp_fanmb);
2554 2554          ip_fanout_udp_conn(connp, mp, NULL, ip6h, ira);
2555 2555          CONN_DEC_REF(connp);
2556 2556          return;
2557 2557  
2558 2558  notfound:
2559 2559          mutex_exit(&connfp->connf_lock);
2560 2560          /*
2561 2561           * No one bound to this port.  Is
2562 2562           * there a client that wants all
2563 2563           * unclaimed datagrams?
2564 2564           */
2565 2565          if (ipst->ips_ipcl_proto_fanout_v6[IPPROTO_UDP].connf_head != NULL) {
2566 2566                  ASSERT(ira->ira_protocol == IPPROTO_UDP);
2567 2567                  ip_fanout_proto_v6(mp, ip6h, ira);
2568 2568          } else {
2569 2569                  ip_fanout_send_icmp_v6(mp, ICMP6_DST_UNREACH,
2570 2570                      ICMP6_DST_UNREACH_NOPORT, ira);
2571 2571          }
2572 2572  }
2573 2573  
2574 2574  /*
2575 2575   * int ip_find_hdr_v6()
2576 2576   *
2577 2577   * This routine is used by the upper layer protocols, iptun, and IPsec:
2578 2578   * - Set extension header pointers to appropriate locations
2579 2579   * - Determine IPv6 header length and return it
2580 2580   * - Return a pointer to the last nexthdr value
2581 2581   *
2582 2582   * The caller must initialize ipp_fields.
2583 2583   * The upper layer protocols normally set label_separate which makes the
2584 2584   * routine put the TX label in ipp_label_v6. If this is not set then
2585 2585   * the hop-by-hop options including the label are placed in ipp_hopopts.
2586 2586   *
2587 2587   * NOTE: If multiple extension headers of the same type are present,
2588 2588   * ip_find_hdr_v6() will set the respective extension header pointers
2589 2589   * to the first one that it encounters in the IPv6 header.  It also
2590 2590   * skips fragment headers.  This routine deals with malformed packets
2591 2591   * of various sorts in which case the returned length is up to the
2592 2592   * malformed part.
2593 2593   */
2594 2594  int
2595 2595  ip_find_hdr_v6(mblk_t *mp, ip6_t *ip6h, boolean_t label_separate, ip_pkt_t *ipp,
2596 2596      uint8_t *nexthdrp)
2597 2597  {
2598 2598          uint_t  length, ehdrlen;
2599 2599          uint8_t nexthdr;
2600 2600          uint8_t *whereptr, *endptr;
2601 2601          ip6_dest_t *tmpdstopts;
2602 2602          ip6_rthdr_t *tmprthdr;
2603 2603          ip6_hbh_t *tmphopopts;
2604 2604          ip6_frag_t *tmpfraghdr;
2605 2605  
2606 2606          ipp->ipp_fields |= IPPF_HOPLIMIT | IPPF_TCLASS | IPPF_ADDR;
2607 2607          ipp->ipp_hoplimit = ip6h->ip6_hops;
2608 2608          ipp->ipp_tclass = IPV6_FLOW_TCLASS(ip6h->ip6_flow);
2609 2609          ipp->ipp_addr = ip6h->ip6_dst;
2610 2610  
2611 2611          length = IPV6_HDR_LEN;
2612 2612          whereptr = ((uint8_t *)&ip6h[1]); /* point to next hdr */
2613 2613          endptr = mp->b_wptr;
2614 2614  
2615 2615          nexthdr = ip6h->ip6_nxt;
2616 2616          while (whereptr < endptr) {
2617 2617                  /* Is there enough left for len + nexthdr? */
2618 2618                  if (whereptr + MIN_EHDR_LEN > endptr)
2619 2619                          goto done;
2620 2620  
2621 2621                  switch (nexthdr) {
2622 2622                  case IPPROTO_HOPOPTS: {
2623 2623                          /* We check for any CIPSO */
2624 2624                          uchar_t *secopt;
2625 2625                          boolean_t hbh_needed;
2626 2626                          uchar_t *after_secopt;
2627 2627  
2628 2628                          tmphopopts = (ip6_hbh_t *)whereptr;
2629 2629                          ehdrlen = 8 * (tmphopopts->ip6h_len + 1);
2630 2630                          if ((uchar_t *)tmphopopts +  ehdrlen > endptr)
2631 2631                                  goto done;
2632 2632                          nexthdr = tmphopopts->ip6h_nxt;
2633 2633  
2634 2634                          if (!label_separate) {
2635 2635                                  secopt = NULL;
2636 2636                                  after_secopt = whereptr;
2637 2637                          } else {
2638 2638                                  /*
2639 2639                                   * We have dropped packets with bad options in
2640 2640                                   * ip6_input. No need to check return value
2641 2641                                   * here.
2642 2642                                   */
2643 2643                                  (void) tsol_find_secopt_v6(whereptr, ehdrlen,
2644 2644                                      &secopt, &after_secopt, &hbh_needed);
2645 2645                          }
2646 2646                          if (secopt != NULL && after_secopt - whereptr > 0) {
2647 2647                                  ipp->ipp_fields |= IPPF_LABEL_V6;
2648 2648                                  ipp->ipp_label_v6 = secopt;
2649 2649                                  ipp->ipp_label_len_v6 = after_secopt - whereptr;
2650 2650                          } else {
2651 2651                                  ipp->ipp_label_len_v6 = 0;
2652 2652                                  after_secopt = whereptr;
2653 2653                                  hbh_needed = B_TRUE;
2654 2654                          }
2655 2655                          /* return only 1st hbh */
2656 2656                          if (hbh_needed && !(ipp->ipp_fields & IPPF_HOPOPTS)) {
2657 2657                                  ipp->ipp_fields |= IPPF_HOPOPTS;
2658 2658                                  ipp->ipp_hopopts = (ip6_hbh_t *)after_secopt;
2659 2659                                  ipp->ipp_hopoptslen = ehdrlen -
2660 2660                                      ipp->ipp_label_len_v6;
2661 2661                          }
2662 2662                          break;
2663 2663                  }
2664 2664                  case IPPROTO_DSTOPTS:
2665 2665                          tmpdstopts = (ip6_dest_t *)whereptr;
2666 2666                          ehdrlen = 8 * (tmpdstopts->ip6d_len + 1);
2667 2667                          if ((uchar_t *)tmpdstopts +  ehdrlen > endptr)
2668 2668                                  goto done;
2669 2669                          nexthdr = tmpdstopts->ip6d_nxt;
2670 2670                          /*
2671 2671                           * ipp_dstopts is set to the destination header after a
2672 2672                           * routing header.
2673 2673                           * Assume it is a post-rthdr destination header
2674 2674                           * and adjust when we find an rthdr.
2675 2675                           */
2676 2676                          if (!(ipp->ipp_fields & IPPF_DSTOPTS)) {
2677 2677                                  ipp->ipp_fields |= IPPF_DSTOPTS;
2678 2678                                  ipp->ipp_dstopts = tmpdstopts;
2679 2679                                  ipp->ipp_dstoptslen = ehdrlen;
2680 2680                          }
2681 2681                          break;
2682 2682                  case IPPROTO_ROUTING:
2683 2683                          tmprthdr = (ip6_rthdr_t *)whereptr;
2684 2684                          ehdrlen = 8 * (tmprthdr->ip6r_len + 1);
2685 2685                          if ((uchar_t *)tmprthdr +  ehdrlen > endptr)
2686 2686                                  goto done;
2687 2687                          nexthdr = tmprthdr->ip6r_nxt;
2688 2688                          /* return only 1st rthdr */
2689 2689                          if (!(ipp->ipp_fields & IPPF_RTHDR)) {
2690 2690                                  ipp->ipp_fields |= IPPF_RTHDR;
2691 2691                                  ipp->ipp_rthdr = tmprthdr;
2692 2692                                  ipp->ipp_rthdrlen = ehdrlen;
2693 2693                          }
2694 2694                          /*
2695 2695                           * Make any destination header we've seen be a
2696 2696                           * pre-rthdr destination header.
2697 2697                           */
2698 2698                          if (ipp->ipp_fields & IPPF_DSTOPTS) {
2699 2699                                  ipp->ipp_fields &= ~IPPF_DSTOPTS;
2700 2700                                  ipp->ipp_fields |= IPPF_RTHDRDSTOPTS;
2701 2701                                  ipp->ipp_rthdrdstopts = ipp->ipp_dstopts;
2702 2702                                  ipp->ipp_dstopts = NULL;
2703 2703                                  ipp->ipp_rthdrdstoptslen = ipp->ipp_dstoptslen;
2704 2704                                  ipp->ipp_dstoptslen = 0;
2705 2705                          }
2706 2706                          break;
2707 2707                  case IPPROTO_FRAGMENT:
2708 2708                          tmpfraghdr = (ip6_frag_t *)whereptr;
2709 2709                          ehdrlen = sizeof (ip6_frag_t);
2710 2710                          if ((uchar_t *)tmpfraghdr + ehdrlen > endptr)
2711 2711                                  goto done;
2712 2712                          nexthdr = tmpfraghdr->ip6f_nxt;
2713 2713                          if (!(ipp->ipp_fields & IPPF_FRAGHDR)) {
2714 2714                                  ipp->ipp_fields |= IPPF_FRAGHDR;
2715 2715                                  ipp->ipp_fraghdr = tmpfraghdr;
2716 2716                                  ipp->ipp_fraghdrlen = ehdrlen;
2717 2717                          }
2718 2718                          break;
2719 2719                  case IPPROTO_NONE:
2720 2720                  default:
2721 2721                          goto done;
2722 2722                  }
2723 2723                  length += ehdrlen;
2724 2724                  whereptr += ehdrlen;
2725 2725          }
2726 2726  done:
2727 2727          if (nexthdrp != NULL)
2728 2728                  *nexthdrp = nexthdr;
2729 2729          return (length);
2730 2730  }
2731 2731  
2732 2732  /*
2733 2733   * Try to determine where and what are the IPv6 header length and
2734 2734   * pointer to nexthdr value for the upper layer protocol (or an
2735 2735   * unknown next hdr).
2736 2736   *
2737 2737   * Parameters returns a pointer to the nexthdr value;
2738 2738   * Must handle malformed packets of various sorts.
2739 2739   * Function returns failure for malformed cases.
2740 2740   */
2741 2741  boolean_t
2742 2742  ip_hdr_length_nexthdr_v6(mblk_t *mp, ip6_t *ip6h, uint16_t *hdr_length_ptr,
2743 2743      uint8_t **nexthdrpp)
2744 2744  {
2745 2745          uint16_t length;
2746 2746          uint_t  ehdrlen;
2747 2747          uint8_t *nexthdrp;
2748 2748          uint8_t *whereptr;
2749 2749          uint8_t *endptr;
2750 2750          ip6_dest_t *desthdr;
2751 2751          ip6_rthdr_t *rthdr;
2752 2752          ip6_frag_t *fraghdr;
2753 2753  
2754 2754          ASSERT(IPH_HDR_VERSION(ip6h) == IPV6_VERSION);
2755 2755          length = IPV6_HDR_LEN;
2756 2756          whereptr = ((uint8_t *)&ip6h[1]); /* point to next hdr */
2757 2757          endptr = mp->b_wptr;
2758 2758  
2759 2759          nexthdrp = &ip6h->ip6_nxt;
2760 2760          while (whereptr < endptr) {
2761 2761                  /* Is there enough left for len + nexthdr? */
2762 2762                  if (whereptr + MIN_EHDR_LEN > endptr)
2763 2763                          break;
2764 2764  
2765 2765                  switch (*nexthdrp) {
2766 2766                  case IPPROTO_HOPOPTS:
2767 2767                  case IPPROTO_DSTOPTS:
2768 2768                          /* Assumes the headers are identical for hbh and dst */
2769 2769                          desthdr = (ip6_dest_t *)whereptr;
2770 2770                          ehdrlen = 8 * (desthdr->ip6d_len + 1);
2771 2771                          if ((uchar_t *)desthdr +  ehdrlen > endptr)
2772 2772                                  return (B_FALSE);
2773 2773                          nexthdrp = &desthdr->ip6d_nxt;
2774 2774                          break;
2775 2775                  case IPPROTO_ROUTING:
2776 2776                          rthdr = (ip6_rthdr_t *)whereptr;
2777 2777                          ehdrlen =  8 * (rthdr->ip6r_len + 1);
2778 2778                          if ((uchar_t *)rthdr +  ehdrlen > endptr)
2779 2779                                  return (B_FALSE);
2780 2780                          nexthdrp = &rthdr->ip6r_nxt;
2781 2781                          break;
2782 2782                  case IPPROTO_FRAGMENT:
2783 2783                          fraghdr = (ip6_frag_t *)whereptr;
2784 2784                          ehdrlen = sizeof (ip6_frag_t);
2785 2785                          if ((uchar_t *)&fraghdr[1] > endptr)
2786 2786                                  return (B_FALSE);
2787 2787                          nexthdrp = &fraghdr->ip6f_nxt;
2788 2788                          break;
2789 2789                  case IPPROTO_NONE:
2790 2790                          /* No next header means we're finished */
2791 2791                  default:
2792 2792                          *hdr_length_ptr = length;
2793 2793                          *nexthdrpp = nexthdrp;
2794 2794                          return (B_TRUE);
2795 2795                  }
2796 2796                  length += ehdrlen;
2797 2797                  whereptr += ehdrlen;
2798 2798                  *hdr_length_ptr = length;
2799 2799                  *nexthdrpp = nexthdrp;
2800 2800          }
2801 2801          switch (*nexthdrp) {
2802 2802          case IPPROTO_HOPOPTS:
2803 2803          case IPPROTO_DSTOPTS:
2804 2804          case IPPROTO_ROUTING:
2805 2805          case IPPROTO_FRAGMENT:
2806 2806                  /*
2807 2807                   * If any know extension headers are still to be processed,
2808 2808                   * the packet's malformed (or at least all the IP header(s) are
2809 2809                   * not in the same mblk - and that should never happen.
2810 2810                   */
2811 2811                  return (B_FALSE);
2812 2812  
2813 2813          default:
2814 2814                  /*
2815 2815                   * If we get here, we know that all of the IP headers were in
2816 2816                   * the same mblk, even if the ULP header is in the next mblk.
2817 2817                   */
2818 2818                  *hdr_length_ptr = length;
2819 2819                  *nexthdrpp = nexthdrp;
2820 2820                  return (B_TRUE);
2821 2821          }
2822 2822  }
2823 2823  
2824 2824  /*
2825 2825   * Return the length of the IPv6 related headers (including extension headers)
2826 2826   * Returns a length even if the packet is malformed.
2827 2827   */
2828 2828  int
2829 2829  ip_hdr_length_v6(mblk_t *mp, ip6_t *ip6h)
2830 2830  {
2831 2831          uint16_t hdr_len;
2832 2832          uint8_t *nexthdrp;
2833 2833  
2834 2834          (void) ip_hdr_length_nexthdr_v6(mp, ip6h, &hdr_len, &nexthdrp);
2835 2835          return (hdr_len);
2836 2836  }
2837 2837  
2838 2838  /*
2839 2839   * Parse and process any hop-by-hop or destination options.
2840 2840   *
2841 2841   * Assumes that q is an ill read queue so that ICMP errors for link-local
2842 2842   * destinations are sent out the correct interface.
2843 2843   *
2844 2844   * Returns -1 if there was an error and mp has been consumed.
2845 2845   * Returns 0 if no special action is needed.
2846 2846   * Returns 1 if the packet contained a router alert option for this node
2847 2847   * which is verified to be "interesting/known" for our implementation.
2848 2848   *
2849 2849   * XXX Note: In future as more hbh or dest options are defined,
2850 2850   * it may be better to have different routines for hbh and dest
2851 2851   * options as opt_type fields other than IP6OPT_PAD1 and IP6OPT_PADN
2852 2852   * may have same value in different namespaces. Or is it same namespace ??
2853 2853   * Current code checks for each opt_type (other than pads) if it is in
2854 2854   * the expected  nexthdr (hbh or dest)
2855 2855   */
2856 2856  int
2857 2857  ip_process_options_v6(mblk_t *mp, ip6_t *ip6h,
2858 2858      uint8_t *optptr, uint_t optlen, uint8_t hdr_type, ip_recv_attr_t *ira)
2859 2859  {
2860 2860          uint8_t opt_type;
2861 2861          uint_t optused;
2862 2862          int ret = 0;
2863 2863          const char *errtype;
2864 2864          ill_t           *ill = ira->ira_ill;
2865 2865          ip_stack_t      *ipst = ill->ill_ipst;
2866 2866  
2867 2867          while (optlen != 0) {
2868 2868                  opt_type = *optptr;
2869 2869                  if (opt_type == IP6OPT_PAD1) {
2870 2870                          optused = 1;
2871 2871                  } else {
2872 2872                          if (optlen < 2)
2873 2873                                  goto bad_opt;
2874 2874                          errtype = "malformed";
2875 2875                          if (opt_type == ip6opt_ls) {
2876 2876                                  optused = 2 + optptr[1];
2877 2877                                  if (optused > optlen)
2878 2878                                          goto bad_opt;
2879 2879                          } else switch (opt_type) {
2880 2880                          case IP6OPT_PADN:
2881 2881                                  /*
2882 2882                                   * Note:We don't verify that (N-2) pad octets
2883 2883                                   * are zero as required by spec. Adhere to
2884 2884                                   * "be liberal in what you accept..." part of
2885 2885                                   * implementation philosophy (RFC791,RFC1122)
2886 2886                                   */
2887 2887                                  optused = 2 + optptr[1];
2888 2888                                  if (optused > optlen)
2889 2889                                          goto bad_opt;
2890 2890                                  break;
2891 2891  
2892 2892                          case IP6OPT_JUMBO:
2893 2893                                  if (hdr_type != IPPROTO_HOPOPTS)
2894 2894                                          goto opt_error;
2895 2895                                  goto opt_error; /* XXX Not implemented! */
2896 2896  
2897 2897                          case IP6OPT_ROUTER_ALERT: {
2898 2898                                  struct ip6_opt_router *or;
2899 2899  
2900 2900                                  if (hdr_type != IPPROTO_HOPOPTS)
2901 2901                                          goto opt_error;
2902 2902                                  optused = 2 + optptr[1];
2903 2903                                  if (optused > optlen)
2904 2904                                          goto bad_opt;
2905 2905                                  or = (struct ip6_opt_router *)optptr;
2906 2906                                  /* Check total length and alignment */
2907 2907                                  if (optused != sizeof (*or) ||
2908 2908                                      ((uintptr_t)or->ip6or_value & 0x1) != 0)
2909 2909                                          goto opt_error;
2910 2910                                  /* Check value */
2911 2911                                  switch (*((uint16_t *)or->ip6or_value)) {
2912 2912                                  case IP6_ALERT_MLD:
2913 2913                                  case IP6_ALERT_RSVP:
2914 2914                                          ret = 1;
2915 2915                                  }
2916 2916                                  break;
2917 2917                          }
2918 2918                          case IP6OPT_HOME_ADDRESS: {
2919 2919                                  /*
2920 2920                                   * Minimal support for the home address option
2921 2921                                   * (which is required by all IPv6 nodes).
2922 2922                                   * Implement by just swapping the home address
2923 2923                                   * and source address.
2924 2924                                   * XXX Note: this has IPsec implications since
2925 2925                                   * AH needs to take this into account.
2926 2926                                   * Also, when IPsec is used we need to ensure
2927 2927                                   * that this is only processed once
2928 2928                                   * in the received packet (to avoid swapping
2929 2929                                   * back and forth).
2930 2930                                   * NOTE:This option processing is considered
2931 2931                                   * to be unsafe and prone to a denial of
2932 2932                                   * service attack.
2933 2933                                   * The current processing is not safe even with
2934 2934                                   * IPsec secured IP packets. Since the home
2935 2935                                   * address option processing requirement still
2936 2936                                   * is in the IETF draft and in the process of
2937 2937                                   * being redefined for its usage, it has been
2938 2938                                   * decided to turn off the option by default.
2939 2939                                   * If this section of code needs to be executed,
2940 2940                                   * ndd variable ip6_ignore_home_address_opt
2941 2941                                   * should be set to 0 at the user's own risk.
2942 2942                                   */
2943 2943                                  struct ip6_opt_home_address *oh;
2944 2944                                  in6_addr_t tmp;
2945 2945  
2946 2946                                  if (ipst->ips_ipv6_ignore_home_address_opt)
2947 2947                                          goto opt_error;
2948 2948  
2949 2949                                  if (hdr_type != IPPROTO_DSTOPTS)
2950 2950                                          goto opt_error;
2951 2951                                  optused = 2 + optptr[1];
2952 2952                                  if (optused > optlen)
2953 2953                                          goto bad_opt;
2954 2954  
2955 2955                                  /*
2956 2956                                   * We did this dest. opt the first time
2957 2957                                   * around (i.e. before AH processing).
2958 2958                                   * If we've done AH... stop now.
2959 2959                                   */
2960 2960                                  if ((ira->ira_flags & IRAF_IPSEC_SECURE) &&
2961 2961                                      ira->ira_ipsec_ah_sa != NULL)
2962 2962                                          break;
2963 2963  
2964 2964                                  oh = (struct ip6_opt_home_address *)optptr;
2965 2965                                  /* Check total length and alignment */
2966 2966                                  if (optused < sizeof (*oh) ||
2967 2967                                      ((uintptr_t)oh->ip6oh_addr & 0x7) != 0)
2968 2968                                          goto opt_error;
2969 2969                                  /* Swap ip6_src and the home address */
2970 2970                                  tmp = ip6h->ip6_src;
2971 2971                                  /* XXX Note: only 8 byte alignment option */
2972 2972                                  ip6h->ip6_src = *(in6_addr_t *)oh->ip6oh_addr;
2973 2973                                  *(in6_addr_t *)oh->ip6oh_addr = tmp;
2974 2974                                  break;
2975 2975                          }
2976 2976  
2977 2977                          case IP6OPT_TUNNEL_LIMIT:
2978 2978                                  if (hdr_type != IPPROTO_DSTOPTS) {
2979 2979                                          goto opt_error;
2980 2980                                  }
2981 2981                                  optused = 2 + optptr[1];
2982 2982                                  if (optused > optlen) {
2983 2983                                          goto bad_opt;
2984 2984                                  }
2985 2985                                  if (optused != 3) {
2986 2986                                          goto opt_error;
2987 2987                                  }
2988 2988                                  break;
2989 2989  
2990 2990                          default:
2991 2991                                  errtype = "unknown";
2992 2992                                  /* FALLTHROUGH */
2993 2993                          opt_error:
2994 2994                                  /* Determine which zone should send error */
2995 2995                                  switch (IP6OPT_TYPE(opt_type)) {
2996 2996                                  case IP6OPT_TYPE_SKIP:
2997 2997                                          optused = 2 + optptr[1];
2998 2998                                          if (optused > optlen)
2999 2999                                                  goto bad_opt;
3000 3000                                          ip1dbg(("ip_process_options_v6: %s "
3001 3001                                              "opt 0x%x skipped\n",
3002 3002                                              errtype, opt_type));
3003 3003                                          break;
3004 3004                                  case IP6OPT_TYPE_DISCARD:
3005 3005                                          ip1dbg(("ip_process_options_v6: %s "
3006 3006                                              "opt 0x%x; packet dropped\n",
3007 3007                                              errtype, opt_type));
3008 3008                                          BUMP_MIB(ill->ill_ip_mib,
3009 3009                                              ipIfStatsInHdrErrors);
3010 3010                                          ip_drop_input("ipIfStatsInHdrErrors",
3011 3011                                              mp, ill);
3012 3012                                          freemsg(mp);
3013 3013                                          return (-1);
3014 3014                                  case IP6OPT_TYPE_ICMP:
3015 3015                                          BUMP_MIB(ill->ill_ip_mib,
3016 3016                                              ipIfStatsInHdrErrors);
3017 3017                                          ip_drop_input("ipIfStatsInHdrErrors",
3018 3018                                              mp, ill);
3019 3019                                          icmp_param_problem_v6(mp,
3020 3020                                              ICMP6_PARAMPROB_OPTION,
3021 3021                                              (uint32_t)(optptr -
3022 3022                                              (uint8_t *)ip6h),
3023 3023                                              B_FALSE, ira);
3024 3024                                          return (-1);
3025 3025                                  case IP6OPT_TYPE_FORCEICMP:
3026 3026                                          BUMP_MIB(ill->ill_ip_mib,
3027 3027                                              ipIfStatsInHdrErrors);
3028 3028                                          ip_drop_input("ipIfStatsInHdrErrors",
3029 3029                                              mp, ill);
3030 3030                                          icmp_param_problem_v6(mp,
3031 3031                                              ICMP6_PARAMPROB_OPTION,
3032 3032                                              (uint32_t)(optptr -
3033 3033                                              (uint8_t *)ip6h),
3034 3034                                              B_TRUE, ira);
3035 3035                                          return (-1);
3036 3036                                  default:
3037 3037                                          ASSERT(0);
3038 3038                                  }
3039 3039                          }
3040 3040                  }
3041 3041                  optlen -= optused;
3042 3042                  optptr += optused;
3043 3043          }
3044 3044          return (ret);
3045 3045  
3046 3046  bad_opt:
3047 3047          /* Determine which zone should send error */
3048 3048          ip_drop_input("ICMP_PARAM_PROBLEM", mp, ill);
3049 3049          icmp_param_problem_v6(mp, ICMP6_PARAMPROB_OPTION,
3050 3050              (uint32_t)(optptr - (uint8_t *)ip6h),
3051 3051              B_FALSE, ira);
3052 3052          return (-1);
3053 3053  }
3054 3054  
3055 3055  /*
3056 3056   * Process a routing header that is not yet empty.
3057 3057   * Because of RFC 5095, we now reject all route headers.
3058 3058   */
3059 3059  void
3060 3060  ip_process_rthdr(mblk_t *mp, ip6_t *ip6h, ip6_rthdr_t *rth,
3061 3061      ip_recv_attr_t *ira)
3062 3062  {
3063 3063          ill_t           *ill = ira->ira_ill;
3064 3064          ip_stack_t      *ipst = ill->ill_ipst;
3065 3065  
3066 3066          ASSERT(rth->ip6r_segleft != 0);
3067 3067  
3068 3068          if (!ipst->ips_ipv6_forward_src_routed) {
3069 3069                  /* XXX Check for source routed out same interface? */
3070 3070                  BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits);
3071 3071                  BUMP_MIB(ill->ill_ip_mib, ipIfStatsInAddrErrors);
3072 3072                  ip_drop_input("ipIfStatsInAddrErrors", mp, ill);
3073 3073                  freemsg(mp);
3074 3074                  return;
3075 3075          }
3076 3076  
3077 3077          ip_drop_input("ICMP_PARAM_PROBLEM", mp, ill);
3078 3078          icmp_param_problem_v6(mp, ICMP6_PARAMPROB_HEADER,
3079 3079              (uint32_t)((uchar_t *)&rth->ip6r_type - (uchar_t *)ip6h),
3080 3080              B_FALSE, ira);
3081 3081  }
3082 3082  
3083 3083  /*
3084 3084   * Read side put procedure for IPv6 module.
3085 3085   */
3086 3086  void
3087 3087  ip_rput_v6(queue_t *q, mblk_t *mp)
3088 3088  {
3089 3089          ill_t           *ill;
3090 3090  
3091 3091          ill = (ill_t *)q->q_ptr;
3092 3092          if (ill->ill_state_flags & (ILL_CONDEMNED | ILL_LL_SUBNET_PENDING)) {
3093 3093                  union DL_primitives *dl;
3094 3094  
3095 3095                  dl = (union DL_primitives *)mp->b_rptr;
3096 3096                  /*
3097 3097                   * Things are opening or closing - only accept DLPI
3098 3098                   * ack messages. If the stream is closing and ip_wsrv
3099 3099                   * has completed, ip_close is out of the qwait, but has
3100 3100                   * not yet completed qprocsoff. Don't proceed any further
3101 3101                   * because the ill has been cleaned up and things hanging
3102 3102                   * off the ill have been freed.
3103 3103                   */
3104 3104                  if ((mp->b_datap->db_type != M_PCPROTO) ||
3105 3105                      (dl->dl_primitive == DL_UNITDATA_IND)) {
3106 3106                          inet_freemsg(mp);
3107 3107                          return;
3108 3108                  }
3109 3109          }
3110 3110          if (DB_TYPE(mp) == M_DATA) {
3111 3111                  struct mac_header_info_s mhi;
3112 3112  
3113 3113                  ip_mdata_to_mhi(ill, mp, &mhi);
3114 3114                  ip_input_v6(ill, NULL, mp, &mhi);
3115 3115          } else {
3116 3116                  ip_rput_notdata(ill, mp);
3117 3117          }
3118 3118  }
3119 3119  
3120 3120  /*
3121 3121   * Walk through the IPv6 packet in mp and see if there's an AH header
3122 3122   * in it.  See if the AH header needs to get done before other headers in
3123 3123   * the packet.  (Worker function for ipsec_early_ah_v6().)
3124 3124   */
3125 3125  #define IPSEC_HDR_DONT_PROCESS  0
3126 3126  #define IPSEC_HDR_PROCESS       1
3127 3127  #define IPSEC_MEMORY_ERROR      2 /* or malformed packet */
3128 3128  static int
3129 3129  ipsec_needs_processing_v6(mblk_t *mp, uint8_t *nexthdr)
3130 3130  {
3131 3131          uint_t  length;
3132 3132          uint_t  ehdrlen;
3133 3133          uint8_t *whereptr;
3134 3134          uint8_t *endptr;
3135 3135          uint8_t *nexthdrp;
3136 3136          ip6_dest_t *desthdr;
3137 3137          ip6_rthdr_t *rthdr;
3138 3138          ip6_t   *ip6h;
3139 3139  
3140 3140          /*
3141 3141           * For now just pullup everything.  In general, the less pullups,
3142 3142           * the better, but there's so much squirrelling through anyway,
3143 3143           * it's just easier this way.
3144 3144           */
3145 3145          if (!pullupmsg(mp, -1)) {
3146 3146                  return (IPSEC_MEMORY_ERROR);
3147 3147          }
3148 3148  
3149 3149          ip6h = (ip6_t *)mp->b_rptr;
3150 3150          length = IPV6_HDR_LEN;
3151 3151          whereptr = ((uint8_t *)&ip6h[1]); /* point to next hdr */
3152 3152          endptr = mp->b_wptr;
3153 3153  
3154 3154          /*
3155 3155           * We can't just use the argument nexthdr in the place
3156 3156           * of nexthdrp becaue we don't dereference nexthdrp
3157 3157           * till we confirm whether it is a valid address.
3158 3158           */
3159 3159          nexthdrp = &ip6h->ip6_nxt;
3160 3160          while (whereptr < endptr) {
3161 3161                  /* Is there enough left for len + nexthdr? */
3162 3162                  if (whereptr + MIN_EHDR_LEN > endptr)
3163 3163                          return (IPSEC_MEMORY_ERROR);
3164 3164  
3165 3165                  switch (*nexthdrp) {
3166 3166                  case IPPROTO_HOPOPTS:
3167 3167                  case IPPROTO_DSTOPTS:
3168 3168                          /* Assumes the headers are identical for hbh and dst */
3169 3169                          desthdr = (ip6_dest_t *)whereptr;
3170 3170                          ehdrlen = 8 * (desthdr->ip6d_len + 1);
3171 3171                          if ((uchar_t *)desthdr +  ehdrlen > endptr)
3172 3172                                  return (IPSEC_MEMORY_ERROR);
3173 3173                          /*
3174 3174                           * Return DONT_PROCESS because the destination
3175 3175                           * options header may be for each hop in a
3176 3176                           * routing-header, and we only want AH if we're
3177 3177                           * finished with routing headers.
3178 3178                           */
3179 3179                          if (*nexthdrp == IPPROTO_DSTOPTS)
3180 3180                                  return (IPSEC_HDR_DONT_PROCESS);
3181 3181                          nexthdrp = &desthdr->ip6d_nxt;
3182 3182                          break;
3183 3183                  case IPPROTO_ROUTING:
3184 3184                          rthdr = (ip6_rthdr_t *)whereptr;
3185 3185  
3186 3186                          /*
3187 3187                           * If there's more hops left on the routing header,
3188 3188                           * return now with DON'T PROCESS.
3189 3189                           */
3190 3190                          if (rthdr->ip6r_segleft > 0)
3191 3191                                  return (IPSEC_HDR_DONT_PROCESS);
3192 3192  
3193 3193                          ehdrlen =  8 * (rthdr->ip6r_len + 1);
3194 3194                          if ((uchar_t *)rthdr +  ehdrlen > endptr)
3195 3195                                  return (IPSEC_MEMORY_ERROR);
3196 3196                          nexthdrp = &rthdr->ip6r_nxt;
3197 3197                          break;
3198 3198                  case IPPROTO_FRAGMENT:
3199 3199                          /* Wait for reassembly */
3200 3200                          return (IPSEC_HDR_DONT_PROCESS);
3201 3201                  case IPPROTO_AH:
3202 3202                          *nexthdr = IPPROTO_AH;
3203 3203                          return (IPSEC_HDR_PROCESS);
3204 3204                  case IPPROTO_NONE:
3205 3205                          /* No next header means we're finished */
3206 3206                  default:
3207 3207                          return (IPSEC_HDR_DONT_PROCESS);
3208 3208                  }
3209 3209                  length += ehdrlen;
3210 3210                  whereptr += ehdrlen;
3211 3211          }
3212 3212          /*
3213 3213           * Malformed/truncated packet.
3214 3214           */
3215 3215          return (IPSEC_MEMORY_ERROR);
3216 3216  }
3217 3217  
3218 3218  /*
3219 3219   * Path for AH if options are present.
3220 3220   * Returns NULL if the mblk was consumed.
3221 3221   *
3222 3222   * Sometimes AH needs to be done before other IPv6 headers for security
3223 3223   * reasons.  This function (and its ipsec_needs_processing_v6() above)
3224 3224   * indicates if that is so, and fans out to the appropriate IPsec protocol
3225 3225   * for the datagram passed in.
3226 3226   */
3227 3227  mblk_t *
3228 3228  ipsec_early_ah_v6(mblk_t *mp, ip_recv_attr_t *ira)
3229 3229  {
3230 3230          uint8_t nexthdr;
3231 3231          ah_t *ah;
3232 3232          ill_t           *ill = ira->ira_ill;
3233 3233          ip_stack_t      *ipst = ill->ill_ipst;
3234 3234          ipsec_stack_t   *ipss = ipst->ips_netstack->netstack_ipsec;
3235 3235  
3236 3236          switch (ipsec_needs_processing_v6(mp, &nexthdr)) {
3237 3237          case IPSEC_MEMORY_ERROR:
3238 3238                  BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
3239 3239                  ip_drop_input("ipIfStatsInDiscards", mp, ill);
3240 3240                  freemsg(mp);
3241 3241                  return (NULL);
3242 3242          case IPSEC_HDR_DONT_PROCESS:
3243 3243                  return (mp);
3244 3244          }
3245 3245  
3246 3246          /* Default means send it to AH! */
3247 3247          ASSERT(nexthdr == IPPROTO_AH);
3248 3248  
3249 3249          if (!ipsec_loaded(ipss)) {
3250 3250                  ip_proto_not_sup(mp, ira);
3251 3251                  return (NULL);
3252 3252          }
3253 3253  
3254 3254          mp = ipsec_inbound_ah_sa(mp, ira, &ah);
3255 3255          if (mp == NULL)
3256 3256                  return (NULL);
3257 3257          ASSERT(ah != NULL);
3258 3258          ASSERT(ira->ira_flags & IRAF_IPSEC_SECURE);
3259 3259          ASSERT(ira->ira_ipsec_ah_sa != NULL);
3260 3260          ASSERT(ira->ira_ipsec_ah_sa->ipsa_input_func != NULL);
3261 3261          mp = ira->ira_ipsec_ah_sa->ipsa_input_func(mp, ah, ira);
3262 3262  
3263 3263          if (mp == NULL) {
3264 3264                  /*
3265 3265                   * Either it failed or is pending. In the former case
3266 3266                   * ipIfStatsInDiscards was increased.
3267 3267                   */
3268 3268                  return (NULL);
3269 3269          }
3270 3270  
3271 3271          /* we're done with IPsec processing, send it up */
3272 3272          ip_input_post_ipsec(mp, ira);
3273 3273          return (NULL);
3274 3274  }
3275 3275  
3276 3276  /*
3277 3277   * Reassemble fragment.
3278 3278   * When it returns a completed message the first mblk will only contain
3279 3279   * the headers prior to the fragment header, with the nexthdr value updated
3280 3280   * to be the header after the fragment header.
3281 3281   */
3282 3282  mblk_t *
3283 3283  ip_input_fragment_v6(mblk_t *mp, ip6_t *ip6h,
3284 3284      ip6_frag_t *fraghdr, uint_t remlen, ip_recv_attr_t *ira)
3285 3285  {
3286 3286          uint32_t        ident = ntohl(fraghdr->ip6f_ident);
3287 3287          uint16_t        offset;
3288 3288          boolean_t       more_frags;
3289 3289          uint8_t         nexthdr = fraghdr->ip6f_nxt;
3290 3290          in6_addr_t      *v6dst_ptr;
3291 3291          in6_addr_t      *v6src_ptr;
3292 3292          uint_t          end;
3293 3293          uint_t          hdr_length;
3294 3294          size_t          count;
3295 3295          ipf_t           *ipf;
3296 3296          ipf_t           **ipfp;
3297 3297          ipfb_t          *ipfb;
3298 3298          mblk_t          *mp1;
3299 3299          uint8_t         ecn_info = 0;
3300 3300          size_t          msg_len;
3301 3301          mblk_t          *tail_mp;
3302 3302          mblk_t          *t_mp;
3303 3303          boolean_t       pruned = B_FALSE;
3304 3304          uint32_t        sum_val;
3305 3305          uint16_t        sum_flags;
3306 3306          ill_t           *ill = ira->ira_ill;
3307 3307          ip_stack_t      *ipst = ill->ill_ipst;
3308 3308          uint_t          prev_nexthdr_offset;
3309 3309          uint8_t         prev_nexthdr;
3310 3310          uint8_t         *ptr;
3311 3311          uint32_t        packet_size;
3312 3312  
3313 3313          /*
3314 3314           * We utilize hardware computed checksum info only for UDP since
3315 3315           * IP fragmentation is a normal occurence for the protocol.  In
3316 3316           * addition, checksum offload support for IP fragments carrying
3317 3317           * UDP payload is commonly implemented across network adapters.
3318 3318           */
3319 3319          ASSERT(ira->ira_rill != NULL);
3320 3320          if (nexthdr == IPPROTO_UDP && dohwcksum &&
3321 3321              ILL_HCKSUM_CAPABLE(ira->ira_rill) &&
3322 3322              (DB_CKSUMFLAGS(mp) & (HCK_FULLCKSUM | HCK_PARTIALCKSUM))) {
3323 3323                  mblk_t *mp1 = mp->b_cont;
3324 3324                  int32_t len;
3325 3325  
3326 3326                  /* Record checksum information from the packet */
3327 3327                  sum_val = (uint32_t)DB_CKSUM16(mp);
3328 3328                  sum_flags = DB_CKSUMFLAGS(mp);
3329 3329  
3330 3330                  /* fragmented payload offset from beginning of mblk */
3331 3331                  offset = (uint16_t)((uchar_t *)&fraghdr[1] - mp->b_rptr);
3332 3332  
3333 3333                  if ((sum_flags & HCK_PARTIALCKSUM) &&
3334 3334                      (mp1 == NULL || mp1->b_cont == NULL) &&
3335 3335                      offset >= DB_CKSUMSTART(mp) &&
3336 3336                      ((len = offset - DB_CKSUMSTART(mp)) & 1) == 0) {
3337 3337                          uint32_t adj;
3338 3338                          /*
3339 3339                           * Partial checksum has been calculated by hardware
3340 3340                           * and attached to the packet; in addition, any
3341 3341                           * prepended extraneous data is even byte aligned.
3342 3342                           * If any such data exists, we adjust the checksum;
3343 3343                           * this would also handle any postpended data.
3344 3344                           */
3345 3345                          IP_ADJCKSUM_PARTIAL(mp->b_rptr + DB_CKSUMSTART(mp),
3346 3346                              mp, mp1, len, adj);
3347 3347  
3348 3348                          /* One's complement subtract extraneous checksum */
3349 3349                          if (adj >= sum_val)
3350 3350                                  sum_val = ~(adj - sum_val) & 0xFFFF;
3351 3351                          else
3352 3352                                  sum_val -= adj;
3353 3353                  }
3354 3354          } else {
3355 3355                  sum_val = 0;
3356 3356                  sum_flags = 0;
3357 3357          }
3358 3358  
3359 3359          /* Clear hardware checksumming flag */
3360 3360          DB_CKSUMFLAGS(mp) = 0;
3361 3361  
3362 3362          /*
3363 3363           * Determine the offset (from the begining of the IP header)
3364 3364           * of the nexthdr value which has IPPROTO_FRAGMENT. We use
3365 3365           * this when removing the fragment header from the packet.
3366 3366           * This packet consists of the IPv6 header, a potential
3367 3367           * hop-by-hop options header, a potential pre-routing-header
3368 3368           * destination options header, and a potential routing header.
3369 3369           */
3370 3370          prev_nexthdr_offset = (uint8_t *)&ip6h->ip6_nxt - (uint8_t *)ip6h;
3371 3371          prev_nexthdr = ip6h->ip6_nxt;
3372 3372          ptr = (uint8_t *)&ip6h[1];
3373 3373  
3374 3374          if (prev_nexthdr == IPPROTO_HOPOPTS) {
3375 3375                  ip6_hbh_t       *hbh_hdr;
3376 3376                  uint_t          hdr_len;
3377 3377  
3378 3378                  hbh_hdr = (ip6_hbh_t *)ptr;
3379 3379                  hdr_len = 8 * (hbh_hdr->ip6h_len + 1);
3380 3380                  prev_nexthdr = hbh_hdr->ip6h_nxt;
3381 3381                  prev_nexthdr_offset = (uint8_t *)&hbh_hdr->ip6h_nxt
3382 3382                      - (uint8_t *)ip6h;
3383 3383                  ptr += hdr_len;
3384 3384          }
3385 3385          if (prev_nexthdr == IPPROTO_DSTOPTS) {
3386 3386                  ip6_dest_t      *dest_hdr;
3387 3387                  uint_t          hdr_len;
3388 3388  
3389 3389                  dest_hdr = (ip6_dest_t *)ptr;
3390 3390                  hdr_len = 8 * (dest_hdr->ip6d_len + 1);
3391 3391                  prev_nexthdr = dest_hdr->ip6d_nxt;
3392 3392                  prev_nexthdr_offset = (uint8_t *)&dest_hdr->ip6d_nxt
3393 3393                      - (uint8_t *)ip6h;
3394 3394                  ptr += hdr_len;
3395 3395          }
3396 3396          if (prev_nexthdr == IPPROTO_ROUTING) {
3397 3397                  ip6_rthdr_t     *rthdr;
3398 3398                  uint_t          hdr_len;
3399 3399  
3400 3400                  rthdr = (ip6_rthdr_t *)ptr;
3401 3401                  prev_nexthdr = rthdr->ip6r_nxt;
3402 3402                  prev_nexthdr_offset = (uint8_t *)&rthdr->ip6r_nxt
3403 3403                      - (uint8_t *)ip6h;
3404 3404                  hdr_len = 8 * (rthdr->ip6r_len + 1);
3405 3405                  ptr += hdr_len;
3406 3406          }
3407 3407          if (prev_nexthdr != IPPROTO_FRAGMENT) {
3408 3408                  /* Can't handle other headers before the fragment header */
3409 3409                  BUMP_MIB(ill->ill_ip_mib, ipIfStatsInHdrErrors);
3410 3410                  ip_drop_input("ipIfStatsInHdrErrors", mp, ill);
3411 3411                  freemsg(mp);
3412 3412                  return (NULL);
3413 3413          }
3414 3414  
3415 3415          /*
3416 3416           * Note: Fragment offset in header is in 8-octet units.
3417 3417           * Clearing least significant 3 bits not only extracts
3418 3418           * it but also gets it in units of octets.
3419 3419           */
3420 3420          offset = ntohs(fraghdr->ip6f_offlg) & ~7;
3421 3421          more_frags = (fraghdr->ip6f_offlg & IP6F_MORE_FRAG);
3422 3422  
3423 3423          /*
3424 3424           * Is the more frags flag on and the payload length not a multiple
3425 3425           * of eight?
3426 3426           */
3427 3427          if (more_frags && (ntohs(ip6h->ip6_plen) & 7)) {
3428 3428                  ip_drop_input("ICMP_PARAM_PROBLEM", mp, ill);
3429 3429                  icmp_param_problem_v6(mp, ICMP6_PARAMPROB_HEADER,
3430 3430                      (uint32_t)((char *)&ip6h->ip6_plen -
3431 3431                      (char *)ip6h), B_FALSE, ira);
3432 3432                  return (NULL);
3433 3433          }
3434 3434  
3435 3435          v6src_ptr = &ip6h->ip6_src;
3436 3436          v6dst_ptr = &ip6h->ip6_dst;
3437 3437          end = remlen;
3438 3438  
3439 3439          hdr_length = (uint_t)((char *)&fraghdr[1] - (char *)ip6h);
3440 3440          end += offset;
3441 3441  
3442 3442          /*
3443 3443           * Would fragment cause reassembled packet to have a payload length
3444 3444           * greater than IP_MAXPACKET - the max payload size?
3445 3445           */
3446 3446          if (end > IP_MAXPACKET) {
3447 3447                  BUMP_MIB(ill->ill_ip_mib, ipIfStatsInHdrErrors);
3448 3448                  ip_drop_input("Reassembled packet too large", mp, ill);
3449 3449                  icmp_param_problem_v6(mp, ICMP6_PARAMPROB_HEADER,
3450 3450                      (uint32_t)((char *)&fraghdr->ip6f_offlg -
3451 3451                      (char *)ip6h), B_FALSE, ira);
3452 3452                  return (NULL);
3453 3453          }
3454 3454  
3455 3455          /*
3456 3456           * This packet just has one fragment. Reassembly not
3457 3457           * needed.
3458 3458           */
3459 3459          if (!more_frags && offset == 0) {
3460 3460                  goto reass_done;
3461 3461          }
3462 3462  
3463 3463          /*
3464 3464           * Drop the fragmented as early as possible, if
3465 3465           * we don't have resource(s) to re-assemble.
3466 3466           */
3467 3467          if (ipst->ips_ip_reass_queue_bytes == 0) {
3468 3468                  freemsg(mp);
3469 3469                  return (NULL);
3470 3470          }
3471 3471  
3472 3472          /* Record the ECN field info. */
3473 3473          ecn_info = (uint8_t)(ntohl(ip6h->ip6_vcf & htonl(~0xFFCFFFFF)) >> 20);
3474 3474          /*
3475 3475           * If this is not the first fragment, dump the unfragmentable
3476 3476           * portion of the packet.
3477 3477           */
3478 3478          if (offset)
3479 3479                  mp->b_rptr = (uchar_t *)&fraghdr[1];
3480 3480  
3481 3481          /*
3482 3482           * Fragmentation reassembly.  Each ILL has a hash table for
3483 3483           * queueing packets undergoing reassembly for all IPIFs
3484 3484           * associated with the ILL.  The hash is based on the packet
3485 3485           * IP ident field.  The ILL frag hash table was allocated
3486 3486           * as a timer block at the time the ILL was created.  Whenever
3487 3487           * there is anything on the reassembly queue, the timer will
3488 3488           * be running.
3489 3489           */
3490 3490          /* Handle vnic loopback of fragments */
3491 3491          if (mp->b_datap->db_ref > 2)
3492 3492                  msg_len = 0;
3493 3493          else
3494 3494                  msg_len = MBLKSIZE(mp);
3495 3495  
3496 3496          tail_mp = mp;
3497 3497          while (tail_mp->b_cont != NULL) {
3498 3498                  tail_mp = tail_mp->b_cont;
3499 3499                  if (tail_mp->b_datap->db_ref <= 2)
3500 3500                          msg_len += MBLKSIZE(tail_mp);
3501 3501          }
3502 3502          /*
3503 3503           * If the reassembly list for this ILL will get too big
3504 3504           * prune it.
3505 3505           */
3506 3506  
3507 3507          if ((msg_len + sizeof (*ipf) + ill->ill_frag_count) >=
3508 3508              ipst->ips_ip_reass_queue_bytes) {
3509 3509                  DTRACE_PROBE3(ip_reass_queue_bytes, uint_t, msg_len,
3510 3510                      uint_t, ill->ill_frag_count,
3511 3511                      uint_t, ipst->ips_ip_reass_queue_bytes);
3512 3512                  ill_frag_prune(ill,
3513 3513                      (ipst->ips_ip_reass_queue_bytes < msg_len) ? 0 :
3514 3514                      (ipst->ips_ip_reass_queue_bytes - msg_len));
3515 3515                  pruned = B_TRUE;
3516 3516          }
3517 3517  
3518 3518          ipfb = &ill->ill_frag_hash_tbl[ILL_FRAG_HASH_V6(*v6src_ptr, ident)];
3519 3519          mutex_enter(&ipfb->ipfb_lock);
3520 3520  
3521 3521          ipfp = &ipfb->ipfb_ipf;
3522 3522          /* Try to find an existing fragment queue for this packet. */
3523 3523          for (;;) {
3524 3524                  ipf = ipfp[0];
3525 3525                  if (ipf) {
3526 3526                          /*
3527 3527                           * It has to match on ident, source address, and
3528 3528                           * dest address.
3529 3529                           */
3530 3530                          if (ipf->ipf_ident == ident &&
3531 3531                              IN6_ARE_ADDR_EQUAL(&ipf->ipf_v6src, v6src_ptr) &&
3532 3532                              IN6_ARE_ADDR_EQUAL(&ipf->ipf_v6dst, v6dst_ptr)) {
3533 3533  
3534 3534                                  /*
3535 3535                                   * If we have received too many
3536 3536                                   * duplicate fragments for this packet
3537 3537                                   * free it.
3538 3538                                   */
3539 3539                                  if (ipf->ipf_num_dups > ip_max_frag_dups) {
3540 3540                                          ill_frag_free_pkts(ill, ipfb, ipf, 1);
3541 3541                                          freemsg(mp);
3542 3542                                          mutex_exit(&ipfb->ipfb_lock);
3543 3543                                          return (NULL);
3544 3544                                  }
3545 3545  
3546 3546                                  break;
3547 3547                          }
3548 3548                          ipfp = &ipf->ipf_hash_next;
3549 3549                          continue;
3550 3550                  }
3551 3551  
3552 3552  
3553 3553                  /*
3554 3554                   * If we pruned the list, do we want to store this new
3555 3555                   * fragment?. We apply an optimization here based on the
3556 3556                   * fact that most fragments will be received in order.
3557 3557                   * So if the offset of this incoming fragment is zero,
3558 3558                   * it is the first fragment of a new packet. We will
3559 3559                   * keep it.  Otherwise drop the fragment, as we have
3560 3560                   * probably pruned the packet already (since the
3561 3561                   * packet cannot be found).
3562 3562                   */
3563 3563  
3564 3564                  if (pruned && offset != 0) {
3565 3565                          mutex_exit(&ipfb->ipfb_lock);
3566 3566                          freemsg(mp);
3567 3567                          return (NULL);
3568 3568                  }
3569 3569  
3570 3570                  /* New guy.  Allocate a frag message. */
3571 3571                  mp1 = allocb(sizeof (*ipf), BPRI_MED);
3572 3572                  if (!mp1) {
3573 3573                          BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
3574 3574                          ip_drop_input("ipIfStatsInDiscards", mp, ill);
3575 3575                          freemsg(mp);
3576 3576          partial_reass_done:
3577 3577                          mutex_exit(&ipfb->ipfb_lock);
3578 3578                          return (NULL);
3579 3579                  }
3580 3580  
3581 3581                  if (ipfb->ipfb_frag_pkts >= MAX_FRAG_PKTS(ipst))  {
3582 3582                          /*
3583 3583                           * Too many fragmented packets in this hash bucket.
3584 3584                           * Free the oldest.
3585 3585                           */
3586 3586                          ill_frag_free_pkts(ill, ipfb, ipfb->ipfb_ipf, 1);
3587 3587                  }
3588 3588  
3589 3589                  mp1->b_cont = mp;
3590 3590  
3591 3591                  /* Initialize the fragment header. */
3592 3592                  ipf = (ipf_t *)mp1->b_rptr;
3593 3593                  ipf->ipf_mp = mp1;
3594 3594                  ipf->ipf_ptphn = ipfp;
3595 3595                  ipfp[0] = ipf;
3596 3596                  ipf->ipf_hash_next = NULL;
3597 3597                  ipf->ipf_ident = ident;
3598 3598                  ipf->ipf_v6src = *v6src_ptr;
3599 3599                  ipf->ipf_v6dst = *v6dst_ptr;
3600 3600                  /* Record reassembly start time. */
3601 3601                  ipf->ipf_timestamp = gethrestime_sec();
3602 3602                  /* Record ipf generation and account for frag header */
3603 3603                  ipf->ipf_gen = ill->ill_ipf_gen++;
3604 3604                  ipf->ipf_count = MBLKSIZE(mp1);
3605 3605                  ipf->ipf_protocol = nexthdr;
3606 3606                  ipf->ipf_nf_hdr_len = 0;
3607 3607                  ipf->ipf_prev_nexthdr_offset = 0;
3608 3608                  ipf->ipf_last_frag_seen = B_FALSE;
3609 3609                  ipf->ipf_ecn = ecn_info;
3610 3610                  ipf->ipf_num_dups = 0;
3611 3611                  ipfb->ipfb_frag_pkts++;
3612 3612                  ipf->ipf_checksum = 0;
3613 3613                  ipf->ipf_checksum_flags = 0;
3614 3614  
3615 3615                  /* Store checksum value in fragment header */
3616 3616                  if (sum_flags != 0) {
3617 3617                          sum_val = (sum_val & 0xFFFF) + (sum_val >> 16);
3618 3618                          sum_val = (sum_val & 0xFFFF) + (sum_val >> 16);
3619 3619                          ipf->ipf_checksum = sum_val;
3620 3620                          ipf->ipf_checksum_flags = sum_flags;
3621 3621                  }
3622 3622  
3623 3623                  /*
3624 3624                   * We handle reassembly two ways.  In the easy case,
3625 3625                   * where all the fragments show up in order, we do
3626 3626                   * minimal bookkeeping, and just clip new pieces on
3627 3627                   * the end.  If we ever see a hole, then we go off
3628 3628                   * to ip_reassemble which has to mark the pieces and
3629 3629                   * keep track of the number of holes, etc.  Obviously,
3630 3630                   * the point of having both mechanisms is so we can
3631 3631                   * handle the easy case as efficiently as possible.
3632 3632                   */
3633 3633                  if (offset == 0) {
3634 3634                          /* Easy case, in-order reassembly so far. */
3635 3635                          /* Update the byte count */
3636 3636                          ipf->ipf_count += msg_len;
3637 3637                          ipf->ipf_tail_mp = tail_mp;
3638 3638                          /*
3639 3639                           * Keep track of next expected offset in
3640 3640                           * ipf_end.
3641 3641                           */
3642 3642                          ipf->ipf_end = end;
3643 3643                          ipf->ipf_nf_hdr_len = hdr_length;
3644 3644                          ipf->ipf_prev_nexthdr_offset = prev_nexthdr_offset;
3645 3645                  } else {
3646 3646                          /* Hard case, hole at the beginning. */
3647 3647                          ipf->ipf_tail_mp = NULL;
3648 3648                          /*
3649 3649                           * ipf_end == 0 means that we have given up
3650 3650                           * on easy reassembly.
3651 3651                           */
3652 3652                          ipf->ipf_end = 0;
3653 3653  
3654 3654                          /* Forget checksum offload from now on */
3655 3655                          ipf->ipf_checksum_flags = 0;
3656 3656  
3657 3657                          /*
3658 3658                           * ipf_hole_cnt is set by ip_reassemble.
3659 3659                           * ipf_count is updated by ip_reassemble.
3660 3660                           * No need to check for return value here
3661 3661                           * as we don't expect reassembly to complete or
3662 3662                           * fail for the first fragment itself.
3663 3663                           */
3664 3664                          (void) ip_reassemble(mp, ipf, offset, more_frags, ill,
3665 3665                              msg_len);
3666 3666                  }
3667 3667                  /* Update per ipfb and ill byte counts */
3668 3668                  ipfb->ipfb_count += ipf->ipf_count;
3669 3669                  ASSERT(ipfb->ipfb_count > 0);   /* Wraparound */
3670 3670                  atomic_add_32(&ill->ill_frag_count, ipf->ipf_count);
3671 3671                  /* If the frag timer wasn't already going, start it. */
3672 3672                  mutex_enter(&ill->ill_lock);
3673 3673                  ill_frag_timer_start(ill);
3674 3674                  mutex_exit(&ill->ill_lock);
3675 3675                  goto partial_reass_done;
3676 3676          }
3677 3677  
3678 3678          /*
3679 3679           * If the packet's flag has changed (it could be coming up
3680 3680           * from an interface different than the previous, therefore
3681 3681           * possibly different checksum capability), then forget about
3682 3682           * any stored checksum states.  Otherwise add the value to
3683 3683           * the existing one stored in the fragment header.
3684 3684           */
3685 3685          if (sum_flags != 0 && sum_flags == ipf->ipf_checksum_flags) {
3686 3686                  sum_val += ipf->ipf_checksum;
3687 3687                  sum_val = (sum_val & 0xFFFF) + (sum_val >> 16);
3688 3688                  sum_val = (sum_val & 0xFFFF) + (sum_val >> 16);
3689 3689                  ipf->ipf_checksum = sum_val;
3690 3690          } else if (ipf->ipf_checksum_flags != 0) {
3691 3691                  /* Forget checksum offload from now on */
3692 3692                  ipf->ipf_checksum_flags = 0;
3693 3693          }
3694 3694  
3695 3695          /*
3696 3696           * We have a new piece of a datagram which is already being
3697 3697           * reassembled.  Update the ECN info if all IP fragments
3698 3698           * are ECN capable.  If there is one which is not, clear
3699 3699           * all the info.  If there is at least one which has CE
3700 3700           * code point, IP needs to report that up to transport.
3701 3701           */
3702 3702          if (ecn_info != IPH_ECN_NECT && ipf->ipf_ecn != IPH_ECN_NECT) {
3703 3703                  if (ecn_info == IPH_ECN_CE)
3704 3704                          ipf->ipf_ecn = IPH_ECN_CE;
3705 3705          } else {
3706 3706                  ipf->ipf_ecn = IPH_ECN_NECT;
3707 3707          }
3708 3708  
3709 3709          if (offset && ipf->ipf_end == offset) {
3710 3710                  /* The new fragment fits at the end */
3711 3711                  ipf->ipf_tail_mp->b_cont = mp;
3712 3712                  /* Update the byte count */
3713 3713                  ipf->ipf_count += msg_len;
3714 3714                  /* Update per ipfb and ill byte counts */
3715 3715                  ipfb->ipfb_count += msg_len;
3716 3716                  ASSERT(ipfb->ipfb_count > 0);   /* Wraparound */
3717 3717                  atomic_add_32(&ill->ill_frag_count, msg_len);
3718 3718                  if (more_frags) {
3719 3719                          /* More to come. */
3720 3720                          ipf->ipf_end = end;
3721 3721                          ipf->ipf_tail_mp = tail_mp;
3722 3722                          goto partial_reass_done;
3723 3723                  }
3724 3724          } else {
3725 3725                  /*
3726 3726                   * Go do the hard cases.
3727 3727                   * Call ip_reassemble().
3728 3728                   */
3729 3729                  int ret;
3730 3730  
3731 3731                  if (offset == 0) {
3732 3732                          if (ipf->ipf_prev_nexthdr_offset == 0) {
3733 3733                                  ipf->ipf_nf_hdr_len = hdr_length;
3734 3734                                  ipf->ipf_prev_nexthdr_offset =
3735 3735                                      prev_nexthdr_offset;
3736 3736                          }
3737 3737                  }
3738 3738                  /* Save current byte count */
3739 3739                  count = ipf->ipf_count;
3740 3740                  ret = ip_reassemble(mp, ipf, offset, more_frags, ill, msg_len);
3741 3741  
3742 3742                  /* Count of bytes added and subtracted (freeb()ed) */
3743 3743                  count = ipf->ipf_count - count;
3744 3744                  if (count) {
3745 3745                          /* Update per ipfb and ill byte counts */
3746 3746                          ipfb->ipfb_count += count;
3747 3747                          ASSERT(ipfb->ipfb_count > 0);   /* Wraparound */
3748 3748                          atomic_add_32(&ill->ill_frag_count, count);
3749 3749                  }
3750 3750                  if (ret == IP_REASS_PARTIAL) {
3751 3751                          goto partial_reass_done;
3752 3752                  } else if (ret == IP_REASS_FAILED) {
3753 3753                          /* Reassembly failed. Free up all resources */
3754 3754                          ill_frag_free_pkts(ill, ipfb, ipf, 1);
3755 3755                          for (t_mp = mp; t_mp != NULL; t_mp = t_mp->b_cont) {
3756 3756                                  IP_REASS_SET_START(t_mp, 0);
3757 3757                                  IP_REASS_SET_END(t_mp, 0);
3758 3758                          }
3759 3759                          freemsg(mp);
3760 3760                          goto partial_reass_done;
3761 3761                  }
3762 3762  
3763 3763                  /* We will reach here iff 'ret' is IP_REASS_COMPLETE */
3764 3764          }
3765 3765          /*
3766 3766           * We have completed reassembly.  Unhook the frag header from
3767 3767           * the reassembly list.
3768 3768           *
3769 3769           * Grab the unfragmentable header length next header value out
3770 3770           * of the first fragment
3771 3771           */
3772 3772          ASSERT(ipf->ipf_nf_hdr_len != 0);
3773 3773          hdr_length = ipf->ipf_nf_hdr_len;
3774 3774  
3775 3775          /*
3776 3776           * Before we free the frag header, record the ECN info
3777 3777           * to report back to the transport.
3778 3778           */
3779 3779          ecn_info = ipf->ipf_ecn;
3780 3780  
3781 3781          /*
3782 3782           * Store the nextheader field in the header preceding the fragment
3783 3783           * header
3784 3784           */
3785 3785          nexthdr = ipf->ipf_protocol;
3786 3786          prev_nexthdr_offset = ipf->ipf_prev_nexthdr_offset;
3787 3787          ipfp = ipf->ipf_ptphn;
3788 3788  
3789 3789          /* We need to supply these to caller */
3790 3790          if ((sum_flags = ipf->ipf_checksum_flags) != 0)
3791 3791                  sum_val = ipf->ipf_checksum;
3792 3792          else
3793 3793                  sum_val = 0;
3794 3794  
3795 3795          mp1 = ipf->ipf_mp;
3796 3796          count = ipf->ipf_count;
3797 3797          ipf = ipf->ipf_hash_next;
3798 3798          if (ipf)
3799 3799                  ipf->ipf_ptphn = ipfp;
3800 3800          ipfp[0] = ipf;
3801 3801          atomic_add_32(&ill->ill_frag_count, -count);
3802 3802          ASSERT(ipfb->ipfb_count >= count);
3803 3803          ipfb->ipfb_count -= count;
3804 3804          ipfb->ipfb_frag_pkts--;
3805 3805          mutex_exit(&ipfb->ipfb_lock);
3806 3806          /* Ditch the frag header. */
3807 3807          mp = mp1->b_cont;
3808 3808          freeb(mp1);
3809 3809  
3810 3810          /*
3811 3811           * Make sure the packet is good by doing some sanity
3812 3812           * check. If bad we can silentely drop the packet.
3813 3813           */
3814 3814  reass_done:
3815 3815          if (hdr_length < sizeof (ip6_frag_t)) {
3816 3816                  BUMP_MIB(ill->ill_ip_mib, ipIfStatsInHdrErrors);
3817 3817                  ip_drop_input("ipIfStatsInHdrErrors", mp, ill);
3818 3818                  ip1dbg(("ip_input_fragment_v6: bad packet\n"));
3819 3819                  freemsg(mp);
3820 3820                  return (NULL);
3821 3821          }
3822 3822  
3823 3823          /*
3824 3824           * Remove the fragment header from the initial header by
3825 3825           * splitting the mblk into the non-fragmentable header and
3826 3826           * everthing after the fragment extension header.  This has the
3827 3827           * side effect of putting all the headers that need destination
3828 3828           * processing into the b_cont block-- on return this fact is
3829 3829           * used in order to avoid having to look at the extensions
3830 3830           * already processed.
3831 3831           *
3832 3832           * Note that this code assumes that the unfragmentable portion
3833 3833           * of the header is in the first mblk and increments
3834 3834           * the read pointer past it.  If this assumption is broken
3835 3835           * this code fails badly.
3836 3836           */
3837 3837          if (mp->b_rptr + hdr_length != mp->b_wptr) {
3838 3838                  mblk_t *nmp;
3839 3839  
3840 3840                  if (!(nmp = dupb(mp))) {
3841 3841                          ip1dbg(("ip_input_fragment_v6: dupb failed\n"));
3842 3842                          BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
3843 3843                          ip_drop_input("ipIfStatsInDiscards", mp, ill);
3844 3844                          freemsg(mp);
3845 3845                          return (NULL);
3846 3846                  }
3847 3847                  nmp->b_cont = mp->b_cont;
3848 3848                  mp->b_cont = nmp;
3849 3849                  nmp->b_rptr += hdr_length;
3850 3850          }
3851 3851          mp->b_wptr = mp->b_rptr + hdr_length - sizeof (ip6_frag_t);
3852 3852  
3853 3853          ip6h = (ip6_t *)mp->b_rptr;
3854 3854          ((char *)ip6h)[prev_nexthdr_offset] = nexthdr;
3855 3855  
3856 3856          /* Restore original IP length in header. */
3857 3857          packet_size = msgdsize(mp);
3858 3858          ip6h->ip6_plen = htons((uint16_t)(packet_size - IPV6_HDR_LEN));
3859 3859          /* Record the ECN info. */
3860 3860          ip6h->ip6_vcf &= htonl(0xFFCFFFFF);
3861 3861          ip6h->ip6_vcf |= htonl(ecn_info << 20);
3862 3862  
3863 3863          /* Update the receive attributes */
3864 3864          ira->ira_pktlen = packet_size;
3865 3865          ira->ira_ip_hdr_length = hdr_length - sizeof (ip6_frag_t);
3866 3866          ira->ira_protocol = nexthdr;
3867 3867  
3868 3868          /* Reassembly is successful; set checksum information in packet */
3869 3869          DB_CKSUM16(mp) = (uint16_t)sum_val;
3870 3870          DB_CKSUMFLAGS(mp) = sum_flags;
3871 3871          DB_CKSUMSTART(mp) = ira->ira_ip_hdr_length;
3872 3872  
3873 3873          return (mp);
3874 3874  }
3875 3875  
3876 3876  /*
3877 3877   * Given an mblk and a ptr, find the destination address in an IPv6 routing
3878 3878   * header.
3879 3879   */
3880 3880  static in6_addr_t
3881 3881  pluck_out_dst(const mblk_t *mp, uint8_t *whereptr, in6_addr_t oldrv)
3882 3882  {
3883 3883          ip6_rthdr0_t *rt0;
3884 3884          int segleft, numaddr;
3885 3885          in6_addr_t *ap, rv = oldrv;
3886 3886  
3887 3887          rt0 = (ip6_rthdr0_t *)whereptr;
3888 3888          if (rt0->ip6r0_type != 0 && rt0->ip6r0_type != 2) {
3889 3889                  DTRACE_PROBE2(pluck_out_dst_unknown_type, mblk_t *, mp,
3890 3890                      uint8_t *, whereptr);
3891 3891                  return (rv);
3892 3892          }
3893 3893          segleft = rt0->ip6r0_segleft;
3894 3894          numaddr = rt0->ip6r0_len / 2;
3895 3895  
3896 3896          if ((rt0->ip6r0_len & 0x1) ||
3897 3897              (mp != NULL && whereptr + (rt0->ip6r0_len + 1) * 8 > mp->b_wptr) ||
3898 3898              (segleft > rt0->ip6r0_len / 2)) {
3899 3899                  /*
3900 3900                   * Corrupt packet.  Either the routing header length is odd
3901 3901                   * (can't happen) or mismatched compared to the packet, or the
3902 3902                   * number of addresses is.  Return what we can.  This will
3903 3903                   * only be a problem on forwarded packets that get squeezed
3904 3904                   * through an outbound tunnel enforcing IPsec Tunnel Mode.
3905 3905                   */
3906 3906                  DTRACE_PROBE2(pluck_out_dst_badpkt, mblk_t *, mp, uint8_t *,
3907 3907                      whereptr);
3908 3908                  return (rv);
3909 3909          }
3910 3910  
3911 3911          if (segleft != 0) {
3912 3912                  ap = (in6_addr_t *)((char *)rt0 + sizeof (*rt0));
3913 3913                  rv = ap[numaddr - 1];
3914 3914          }
3915 3915  
3916 3916          return (rv);
3917 3917  }
3918 3918  
3919 3919  /*
3920 3920   * Walk through the options to see if there is a routing header.
3921 3921   * If present get the destination which is the last address of
3922 3922   * the option.
3923 3923   * mp needs to be provided in cases when the extension headers might span
3924 3924   * b_cont; mp is never modified by this function.
3925 3925   */
3926 3926  in6_addr_t
3927 3927  ip_get_dst_v6(ip6_t *ip6h, const mblk_t *mp, boolean_t *is_fragment)
3928 3928  {
3929 3929          const mblk_t *current_mp = mp;
3930 3930          uint8_t nexthdr;
3931 3931          uint8_t *whereptr;
3932 3932          int ehdrlen;
3933 3933          in6_addr_t rv;
3934 3934  
3935 3935          whereptr = (uint8_t *)ip6h;
3936 3936          ehdrlen = sizeof (ip6_t);
3937 3937  
3938 3938          /* We assume at least the IPv6 base header is within one mblk. */
3939 3939          ASSERT(mp == NULL ||
3940 3940              (mp->b_rptr <= whereptr && mp->b_wptr >= whereptr + ehdrlen));
3941 3941  
3942 3942          rv = ip6h->ip6_dst;
3943 3943          nexthdr = ip6h->ip6_nxt;
3944 3944          if (is_fragment != NULL)
3945 3945                  *is_fragment = B_FALSE;
3946 3946  
3947 3947          /*
3948 3948           * We also assume (thanks to ipsec_tun_outbound()'s pullup) that
3949 3949           * no extension headers will be split across mblks.
3950 3950           */
3951 3951  
3952 3952          while (nexthdr == IPPROTO_HOPOPTS || nexthdr == IPPROTO_DSTOPTS ||
3953 3953              nexthdr == IPPROTO_ROUTING) {
3954 3954                  if (nexthdr == IPPROTO_ROUTING)
3955 3955                          rv = pluck_out_dst(current_mp, whereptr, rv);
3956 3956  
3957 3957                  /*
3958 3958                   * All IPv6 extension headers have the next-header in byte
3959 3959                   * 0, and the (length - 8) in 8-byte-words.
3960 3960                   */
3961 3961                  while (current_mp != NULL &&
3962 3962                      whereptr + ehdrlen >= current_mp->b_wptr) {
3963 3963                          ehdrlen -= (current_mp->b_wptr - whereptr);
3964 3964                          current_mp = current_mp->b_cont;
3965 3965                          if (current_mp == NULL) {
3966 3966                                  /* Bad packet.  Return what we can. */
3967 3967                                  DTRACE_PROBE3(ip_get_dst_v6_badpkt, mblk_t *,
3968 3968                                      mp, mblk_t *, current_mp, ip6_t *, ip6h);
3969 3969                                  goto done;
3970 3970                          }
3971 3971                          whereptr = current_mp->b_rptr;
3972 3972                  }
3973 3973                  whereptr += ehdrlen;
3974 3974  
3975 3975                  nexthdr = *whereptr;
3976 3976                  ASSERT(current_mp == NULL || whereptr + 1 < current_mp->b_wptr);
3977 3977                  ehdrlen = (*(whereptr + 1) + 1) * 8;
3978 3978          }
3979 3979  
3980 3980  done:
3981 3981          if (nexthdr == IPPROTO_FRAGMENT && is_fragment != NULL)
3982 3982                  *is_fragment = B_TRUE;
3983 3983          return (rv);
3984 3984  }
3985 3985  
3986 3986  /*
3987 3987   * ip_source_routed_v6:
3988 3988   * This function is called by redirect code (called from ip_input_v6) to
3989 3989   * know whether this packet is source routed through this node i.e
3990 3990   * whether this node (router) is part of the journey. This
3991 3991   * function is called under two cases :
3992 3992   *
3993 3993   * case 1 : Routing header was processed by this node and
3994 3994   *          ip_process_rthdr replaced ip6_dst with the next hop
3995 3995   *          and we are forwarding the packet to the next hop.
3996 3996   *
3997 3997   * case 2 : Routing header was not processed by this node and we
3998 3998   *          are just forwarding the packet.
3999 3999   *
4000 4000   * For case (1) we don't want to send redirects. For case(2) we
4001 4001   * want to send redirects.
4002 4002   */
4003 4003  static boolean_t
4004 4004  ip_source_routed_v6(ip6_t *ip6h, mblk_t *mp, ip_stack_t *ipst)
4005 4005  {
4006 4006          uint8_t         nexthdr;
4007 4007          in6_addr_t      *addrptr;
4008 4008          ip6_rthdr0_t    *rthdr;
4009 4009          uint8_t         numaddr;
4010 4010          ip6_hbh_t       *hbhhdr;
4011 4011          uint_t          ehdrlen;
4012 4012          uint8_t         *byteptr;
4013 4013  
4014 4014          ip2dbg(("ip_source_routed_v6\n"));
4015 4015          nexthdr = ip6h->ip6_nxt;
4016 4016          ehdrlen = IPV6_HDR_LEN;
4017 4017  
4018 4018          /* if a routing hdr is preceeded by HOPOPT or DSTOPT */
4019 4019          while (nexthdr == IPPROTO_HOPOPTS ||
4020 4020              nexthdr == IPPROTO_DSTOPTS) {
4021 4021                  byteptr = (uint8_t *)ip6h + ehdrlen;
4022 4022                  /*
4023 4023                   * Check if we have already processed
4024 4024                   * packets or we are just a forwarding
4025 4025                   * router which only pulled up msgs up
4026 4026                   * to IPV6HDR and  one HBH ext header
4027 4027                   */
4028 4028                  if (byteptr + MIN_EHDR_LEN > mp->b_wptr) {
4029 4029                          ip2dbg(("ip_source_routed_v6: Extension"
4030 4030                              " headers not processed\n"));
4031 4031                          return (B_FALSE);
4032 4032                  }
4033 4033                  hbhhdr = (ip6_hbh_t *)byteptr;
4034 4034                  nexthdr = hbhhdr->ip6h_nxt;
4035 4035                  ehdrlen = ehdrlen + 8 * (hbhhdr->ip6h_len + 1);
4036 4036          }
4037 4037          switch (nexthdr) {
4038 4038          case IPPROTO_ROUTING:
4039 4039                  byteptr = (uint8_t *)ip6h + ehdrlen;
4040 4040                  /*
4041 4041                   * If for some reason, we haven't pulled up
4042 4042                   * the routing hdr data mblk, then we must
4043 4043                   * not have processed it at all. So for sure
4044 4044                   * we are not part of the source routed journey.
4045 4045                   */
4046 4046                  if (byteptr + MIN_EHDR_LEN > mp->b_wptr) {
4047 4047                          ip2dbg(("ip_source_routed_v6: Routing"
4048 4048                              " header not processed\n"));
4049 4049                          return (B_FALSE);
4050 4050                  }
4051 4051                  rthdr = (ip6_rthdr0_t *)byteptr;
4052 4052                  /*
4053 4053                   * Either we are an intermediate router or the
4054 4054                   * last hop before destination and we have
4055 4055                   * already processed the routing header.
4056 4056                   * If segment_left is greater than or equal to zero,
4057 4057                   * then we must be the (numaddr - segleft) entry
4058 4058                   * of the routing header. Although ip6r0_segleft
4059 4059                   * is a unit8_t variable, we still check for zero
4060 4060                   * or greater value, if in case the data type
4061 4061                   * is changed someday in future.
4062 4062                   */
4063 4063                  if (rthdr->ip6r0_segleft > 0 ||
4064 4064                      rthdr->ip6r0_segleft == 0) {
4065 4065                          numaddr = rthdr->ip6r0_len / 2;
4066 4066                          addrptr = (in6_addr_t *)((char *)rthdr +
4067 4067                              sizeof (*rthdr));
4068 4068                          addrptr += (numaddr - (rthdr->ip6r0_segleft + 1));
4069 4069                          if (addrptr != NULL) {
4070 4070                                  if (ip_type_v6(addrptr, ipst) == IRE_LOCAL)
4071 4071                                          return (B_TRUE);
4072 4072                                  ip1dbg(("ip_source_routed_v6: Not local\n"));
4073 4073                          }
4074 4074                  }
4075 4075          /* FALLTHRU */
4076 4076          default:
4077 4077                  ip2dbg(("ip_source_routed_v6: Not source routed here\n"));
4078 4078                  return (B_FALSE);
4079 4079          }
4080 4080  }
4081 4081  
4082 4082  /*
4083 4083   * IPv6 fragmentation.  Essentially the same as IPv4 fragmentation.
4084 4084   * We have not optimized this in terms of number of mblks
4085 4085   * allocated. For instance, for each fragment sent we always allocate a
4086 4086   * mblk to hold the IPv6 header and fragment header.
4087 4087   *
4088 4088   * Assumes that all the extension headers are contained in the first mblk
4089 4089   * and that the fragment header has has already been added by calling
4090 4090   * ip_fraghdr_add_v6.
4091 4091   */
4092 4092  int
4093 4093  ip_fragment_v6(mblk_t *mp, nce_t *nce, iaflags_t ixaflags, uint_t pkt_len,
4094 4094      uint32_t max_frag, uint32_t xmit_hint, zoneid_t szone, zoneid_t nolzid,
4095 4095      pfirepostfrag_t postfragfn, uintptr_t *ixa_cookie)
4096 4096  {
4097 4097          ip6_t           *ip6h = (ip6_t *)mp->b_rptr;
4098 4098          ip6_t           *fip6h;
4099 4099          mblk_t          *hmp;
4100 4100          mblk_t          *hmp0;
4101 4101          mblk_t          *dmp;
4102 4102          ip6_frag_t      *fraghdr;
4103 4103          size_t          unfragmentable_len;
4104 4104          size_t          mlen;
4105 4105          size_t          max_chunk;
4106 4106          uint16_t        off_flags;
4107 4107          uint16_t        offset = 0;
4108 4108          ill_t           *ill = nce->nce_ill;
4109 4109          uint8_t         nexthdr;
4110 4110          uint8_t         *ptr;
4111 4111          ip_stack_t      *ipst = ill->ill_ipst;
4112 4112          uint_t          priority = mp->b_band;
4113 4113          int             error = 0;
4114 4114  
4115 4115          BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragReqds);
4116 4116          if (max_frag == 0) {
4117 4117                  BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragFails);
4118 4118                  ip_drop_output("FragFails: zero max_frag", mp, ill);
4119 4119                  freemsg(mp);
4120 4120                  return (EINVAL);
4121 4121          }
4122 4122  
4123 4123          /*
4124 4124           * Caller should have added fraghdr_t to pkt_len, and also
4125 4125           * updated ip6_plen.
4126 4126           */
4127 4127          ASSERT(ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN == pkt_len);
4128 4128          ASSERT(msgdsize(mp) == pkt_len);
4129 4129  
4130 4130          /*
4131 4131           * Determine the length of the unfragmentable portion of this
4132 4132           * datagram.  This consists of the IPv6 header, a potential
4133 4133           * hop-by-hop options header, a potential pre-routing-header
4134 4134           * destination options header, and a potential routing header.
4135 4135           */
4136 4136          nexthdr = ip6h->ip6_nxt;
4137 4137          ptr = (uint8_t *)&ip6h[1];
4138 4138  
4139 4139          if (nexthdr == IPPROTO_HOPOPTS) {
4140 4140                  ip6_hbh_t       *hbh_hdr;
4141 4141                  uint_t          hdr_len;
4142 4142  
4143 4143                  hbh_hdr = (ip6_hbh_t *)ptr;
4144 4144                  hdr_len = 8 * (hbh_hdr->ip6h_len + 1);
4145 4145                  nexthdr = hbh_hdr->ip6h_nxt;
4146 4146                  ptr += hdr_len;
4147 4147          }
4148 4148          if (nexthdr == IPPROTO_DSTOPTS) {
4149 4149                  ip6_dest_t      *dest_hdr;
4150 4150                  uint_t          hdr_len;
4151 4151  
4152 4152                  dest_hdr = (ip6_dest_t *)ptr;
4153 4153                  if (dest_hdr->ip6d_nxt == IPPROTO_ROUTING) {
4154 4154                          hdr_len = 8 * (dest_hdr->ip6d_len + 1);
4155 4155                          nexthdr = dest_hdr->ip6d_nxt;
4156 4156                          ptr += hdr_len;
4157 4157                  }
4158 4158          }
4159 4159          if (nexthdr == IPPROTO_ROUTING) {
4160 4160                  ip6_rthdr_t     *rthdr;
4161 4161                  uint_t          hdr_len;
4162 4162  
4163 4163                  rthdr = (ip6_rthdr_t *)ptr;
4164 4164                  nexthdr = rthdr->ip6r_nxt;
4165 4165                  hdr_len = 8 * (rthdr->ip6r_len + 1);
4166 4166                  ptr += hdr_len;
4167 4167          }
4168 4168          if (nexthdr != IPPROTO_FRAGMENT) {
4169 4169                  BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragFails);
4170 4170                  ip_drop_output("FragFails: bad nexthdr", mp, ill);
4171 4171                  freemsg(mp);
4172 4172                  return (EINVAL);
4173 4173          }
4174 4174          unfragmentable_len = (uint_t)(ptr - (uint8_t *)ip6h);
4175 4175          unfragmentable_len += sizeof (ip6_frag_t);
4176 4176  
4177 4177          max_chunk = (max_frag - unfragmentable_len) & ~7;
4178 4178  
4179 4179          /*
4180 4180           * Allocate an mblk with enough room for the link-layer
4181 4181           * header and the unfragmentable part of the datagram, which includes
4182 4182           * the fragment header.  This (or a copy) will be used as the
4183 4183           * first mblk for each fragment we send.
4184 4184           */
4185 4185          hmp = allocb_tmpl(unfragmentable_len + ipst->ips_ip_wroff_extra, mp);
4186 4186          if (hmp == NULL) {
4187 4187                  BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragFails);
4188 4188                  ip_drop_output("FragFails: no hmp", mp, ill);
4189 4189                  freemsg(mp);
4190 4190                  return (ENOBUFS);
4191 4191          }
4192 4192          hmp->b_rptr += ipst->ips_ip_wroff_extra;
4193 4193          hmp->b_wptr = hmp->b_rptr + unfragmentable_len;
4194 4194  
4195 4195          fip6h = (ip6_t *)hmp->b_rptr;
4196 4196          bcopy(ip6h, fip6h, unfragmentable_len);
4197 4197  
4198 4198          /*
4199 4199           * pkt_len is set to the total length of the fragmentable data in this
4200 4200           * datagram.  For each fragment sent, we will decrement pkt_len
4201 4201           * by the amount of fragmentable data sent in that fragment
4202 4202           * until len reaches zero.
4203 4203           */
4204 4204          pkt_len -= unfragmentable_len;
4205 4205  
4206 4206          /*
4207 4207           * Move read ptr past unfragmentable portion, we don't want this part
4208 4208           * of the data in our fragments.
4209 4209           */
4210 4210          mp->b_rptr += unfragmentable_len;
4211 4211          if (mp->b_rptr == mp->b_wptr) {
4212 4212                  mblk_t *mp1 = mp->b_cont;
4213 4213                  freeb(mp);
4214 4214                  mp = mp1;
4215 4215          }
4216 4216  
4217 4217          while (pkt_len != 0) {
4218 4218                  mlen = MIN(pkt_len, max_chunk);
4219 4219                  pkt_len -= mlen;
4220 4220                  if (pkt_len != 0) {
4221 4221                          /* Not last */
4222 4222                          hmp0 = copyb(hmp);
4223 4223                          if (hmp0 == NULL) {
4224 4224                                  BUMP_MIB(ill->ill_ip_mib,
4225 4225                                      ipIfStatsOutFragFails);
4226 4226                                  ip_drop_output("FragFails: copyb failed",
4227 4227                                      mp, ill);
4228 4228                                  freeb(hmp);
4229 4229                                  freemsg(mp);
4230 4230                                  ip1dbg(("ip_fragment_v6: copyb failed\n"));
4231 4231                                  return (ENOBUFS);
4232 4232                          }
4233 4233                          off_flags = IP6F_MORE_FRAG;
4234 4234                  } else {
4235 4235                          /* Last fragment */
4236 4236                          hmp0 = hmp;
4237 4237                          hmp = NULL;
4238 4238                          off_flags = 0;
4239 4239                  }
4240 4240                  fip6h = (ip6_t *)(hmp0->b_rptr);
4241 4241                  fraghdr = (ip6_frag_t *)(hmp0->b_rptr + unfragmentable_len -
4242 4242                      sizeof (ip6_frag_t));
4243 4243  
4244 4244                  fip6h->ip6_plen = htons((uint16_t)(mlen +
4245 4245                      unfragmentable_len - IPV6_HDR_LEN));
4246 4246                  /*
4247 4247                   * Note: Optimization alert.
4248 4248                   * In IPv6 (and IPv4) protocol header, Fragment Offset
4249 4249                   * ("offset") is 13 bits wide and in 8-octet units.
4250 4250                   * In IPv6 protocol header (unlike IPv4) in a 16 bit field,
4251 4251                   * it occupies the most significant 13 bits.
4252 4252                   * (least significant 13 bits in IPv4).
4253 4253                   * We do not do any shifts here. Not shifting is same effect
4254 4254                   * as taking offset value in octet units, dividing by 8 and
4255 4255                   * then shifting 3 bits left to line it up in place in proper
4256 4256                   * place protocol header.
4257 4257                   */
4258 4258                  fraghdr->ip6f_offlg = htons(offset) | off_flags;
4259 4259  
4260 4260                  if (!(dmp = ip_carve_mp(&mp, mlen))) {
4261 4261                          /* mp has already been freed by ip_carve_mp() */
4262 4262                          BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragFails);
4263 4263                          ip_drop_output("FragFails: could not carve mp",
4264 4264                              hmp0, ill);
4265 4265                          if (hmp != NULL)
4266 4266                                  freeb(hmp);
4267 4267                          freeb(hmp0);
4268 4268                          ip1dbg(("ip_carve_mp: failed\n"));
4269 4269                          return (ENOBUFS);
4270 4270                  }
4271 4271                  hmp0->b_cont = dmp;
4272 4272                  /* Get the priority marking, if any */
4273 4273                  hmp0->b_band = priority;
4274 4274  
4275 4275                  BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragCreates);
4276 4276  
4277 4277                  error = postfragfn(hmp0, nce, ixaflags,
4278 4278                      mlen + unfragmentable_len, xmit_hint, szone, nolzid,
4279 4279                      ixa_cookie);
4280 4280                  if (error != 0 && error != EWOULDBLOCK && hmp != NULL) {
4281 4281                          /* No point in sending the other fragments */
4282 4282                          BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragFails);
4283 4283                          ip_drop_output("FragFails: postfragfn failed",
4284 4284                              hmp, ill);
4285 4285                          freeb(hmp);
4286 4286                          freemsg(mp);
4287 4287                          return (error);
4288 4288                  }
4289 4289                  /* No need to redo state machine in loop */
4290 4290                  ixaflags &= ~IXAF_REACH_CONF;
4291 4291  
4292 4292                  offset += mlen;
4293 4293          }
4294 4294          BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragOKs);
4295 4295          return (error);
4296 4296  }
4297 4297  
4298 4298  /*
4299 4299   * Add a fragment header to an IPv6 packet.
4300 4300   * Assumes that all the extension headers are contained in the first mblk.
4301 4301   *
4302 4302   * The fragment header is inserted after an hop-by-hop options header
4303 4303   * and after [an optional destinations header followed by] a routing header.
4304 4304   */
4305 4305  mblk_t *
4306 4306  ip_fraghdr_add_v6(mblk_t *mp, uint32_t ident, ip_xmit_attr_t *ixa)
4307 4307  {
4308 4308          ip6_t           *ip6h = (ip6_t *)mp->b_rptr;
4309 4309          ip6_t           *fip6h;
4310 4310          mblk_t          *hmp;
4311 4311          ip6_frag_t      *fraghdr;
4312 4312          size_t          unfragmentable_len;
4313 4313          uint8_t         nexthdr;
4314 4314          uint_t          prev_nexthdr_offset;
4315 4315          uint8_t         *ptr;
4316 4316          uint_t          priority = mp->b_band;
4317 4317          ip_stack_t      *ipst = ixa->ixa_ipst;
4318 4318  
4319 4319          /*
4320 4320           * Determine the length of the unfragmentable portion of this
4321 4321           * datagram.  This consists of the IPv6 header, a potential
4322 4322           * hop-by-hop options header, a potential pre-routing-header
4323 4323           * destination options header, and a potential routing header.
4324 4324           */
4325 4325          nexthdr = ip6h->ip6_nxt;
4326 4326          prev_nexthdr_offset = (uint8_t *)&ip6h->ip6_nxt - (uint8_t *)ip6h;
4327 4327          ptr = (uint8_t *)&ip6h[1];
4328 4328  
4329 4329          if (nexthdr == IPPROTO_HOPOPTS) {
4330 4330                  ip6_hbh_t       *hbh_hdr;
4331 4331                  uint_t          hdr_len;
4332 4332  
4333 4333                  hbh_hdr = (ip6_hbh_t *)ptr;
4334 4334                  hdr_len = 8 * (hbh_hdr->ip6h_len + 1);
4335 4335                  nexthdr = hbh_hdr->ip6h_nxt;
4336 4336                  prev_nexthdr_offset = (uint8_t *)&hbh_hdr->ip6h_nxt
4337 4337                      - (uint8_t *)ip6h;
4338 4338                  ptr += hdr_len;
4339 4339          }
4340 4340          if (nexthdr == IPPROTO_DSTOPTS) {
4341 4341                  ip6_dest_t      *dest_hdr;
4342 4342                  uint_t          hdr_len;
4343 4343  
4344 4344                  dest_hdr = (ip6_dest_t *)ptr;
4345 4345                  if (dest_hdr->ip6d_nxt == IPPROTO_ROUTING) {
4346 4346                          hdr_len = 8 * (dest_hdr->ip6d_len + 1);
4347 4347                          nexthdr = dest_hdr->ip6d_nxt;
4348 4348                          prev_nexthdr_offset = (uint8_t *)&dest_hdr->ip6d_nxt
4349 4349                              - (uint8_t *)ip6h;
4350 4350                          ptr += hdr_len;
4351 4351                  }
4352 4352          }
4353 4353          if (nexthdr == IPPROTO_ROUTING) {
4354 4354                  ip6_rthdr_t     *rthdr;
4355 4355                  uint_t          hdr_len;
4356 4356  
4357 4357                  rthdr = (ip6_rthdr_t *)ptr;
4358 4358                  nexthdr = rthdr->ip6r_nxt;
4359 4359                  prev_nexthdr_offset = (uint8_t *)&rthdr->ip6r_nxt
4360 4360                      - (uint8_t *)ip6h;
4361 4361                  hdr_len = 8 * (rthdr->ip6r_len + 1);
4362 4362                  ptr += hdr_len;
4363 4363          }
4364 4364          unfragmentable_len = (uint_t)(ptr - (uint8_t *)ip6h);
4365 4365  
4366 4366          /*
4367 4367           * Allocate an mblk with enough room for the link-layer
4368 4368           * header, the unfragmentable part of the datagram, and the
4369 4369           * fragment header.
4370 4370           */
4371 4371          hmp = allocb_tmpl(unfragmentable_len + sizeof (ip6_frag_t) +
4372 4372              ipst->ips_ip_wroff_extra, mp);
4373 4373          if (hmp == NULL) {
4374 4374                  ill_t *ill = ixa->ixa_nce->nce_ill;
4375 4375  
4376 4376                  BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards);
4377 4377                  ip_drop_output("ipIfStatsOutDiscards: allocb failure", mp, ill);
4378 4378                  freemsg(mp);
4379 4379                  return (NULL);
4380 4380          }
4381 4381          hmp->b_rptr += ipst->ips_ip_wroff_extra;
4382 4382          hmp->b_wptr = hmp->b_rptr + unfragmentable_len + sizeof (ip6_frag_t);
4383 4383  
4384 4384          fip6h = (ip6_t *)hmp->b_rptr;
4385 4385          fraghdr = (ip6_frag_t *)(hmp->b_rptr + unfragmentable_len);
4386 4386  
4387 4387          bcopy(ip6h, fip6h, unfragmentable_len);
4388 4388          fip6h->ip6_plen = htons(ntohs(fip6h->ip6_plen) + sizeof (ip6_frag_t));
4389 4389          hmp->b_rptr[prev_nexthdr_offset] = IPPROTO_FRAGMENT;
4390 4390  
4391 4391          fraghdr->ip6f_nxt = nexthdr;
4392 4392          fraghdr->ip6f_reserved = 0;
4393 4393          fraghdr->ip6f_offlg = 0;
4394 4394          fraghdr->ip6f_ident = htonl(ident);
4395 4395  
4396 4396          /* Get the priority marking, if any */
4397 4397          hmp->b_band = priority;
4398 4398  
4399 4399          /*
4400 4400           * Move read ptr past unfragmentable portion, we don't want this part
4401 4401           * of the data in our fragments.
4402 4402           */
4403 4403          mp->b_rptr += unfragmentable_len;
4404 4404          hmp->b_cont = mp;
4405 4405          return (hmp);
4406 4406  }
4407 4407  
4408 4408  /*
4409 4409   * Determine if the ill and multicast aspects of that packets
4410 4410   * "matches" the conn.
4411 4411   */
4412 4412  boolean_t
4413 4413  conn_wantpacket_v6(conn_t *connp, ip_recv_attr_t *ira, ip6_t *ip6h)
4414 4414  {
4415 4415          ill_t           *ill = ira->ira_rill;
4416 4416          zoneid_t        zoneid = ira->ira_zoneid;
4417 4417          uint_t          in_ifindex;
4418 4418          in6_addr_t      *v6dst_ptr = &ip6h->ip6_dst;
4419 4419          in6_addr_t      *v6src_ptr = &ip6h->ip6_src;
4420 4420  
4421 4421          /*
4422 4422           * conn_incoming_ifindex is set by IPV6_BOUND_IF and as link-local
4423 4423           * scopeid. This is used to limit
4424 4424           * unicast and multicast reception to conn_incoming_ifindex.
4425 4425           * conn_wantpacket_v6 is called both for unicast and
4426 4426           * multicast packets.
4427 4427           */
4428 4428          in_ifindex = connp->conn_incoming_ifindex;
4429 4429  
4430 4430          /* mpathd can bind to the under IPMP interface, which we allow */
4431 4431          if (in_ifindex != 0 && in_ifindex != ill->ill_phyint->phyint_ifindex) {
4432 4432                  if (!IS_UNDER_IPMP(ill))
4433 4433                          return (B_FALSE);
4434 4434  
4435 4435                  if (in_ifindex != ipmp_ill_get_ipmp_ifindex(ill))
4436 4436                          return (B_FALSE);
4437 4437          }
4438 4438  
4439 4439          if (!IPCL_ZONE_MATCH(connp, zoneid))
4440 4440                  return (B_FALSE);
4441 4441  
4442 4442          if (!(ira->ira_flags & IRAF_MULTICAST))
4443 4443                  return (B_TRUE);
4444 4444  
4445 4445          if (connp->conn_multi_router)
4446 4446                  return (B_TRUE);
4447 4447  
4448 4448          if (ira->ira_protocol == IPPROTO_RSVP)
4449 4449                  return (B_TRUE);
4450 4450  
4451 4451          return (conn_hasmembers_ill_withsrc_v6(connp, v6dst_ptr, v6src_ptr,
4452 4452              ira->ira_ill));
4453 4453  }
4454 4454  
4455 4455  /*
4456 4456   * pr_addr_dbg function provides the needed buffer space to call
4457 4457   * inet_ntop() function's 3rd argument. This function should be
4458 4458   * used by any kernel routine which wants to save INET6_ADDRSTRLEN
4459 4459   * stack buffer space in it's own stack frame. This function uses
4460 4460   * a buffer from it's own stack and prints the information.
4461 4461   * Example: pr_addr_dbg("func: no route for %s\n ", AF_INET, addr)
4462 4462   *
4463 4463   * Note:    This function can call inet_ntop() once.
4464 4464   */
4465 4465  void
4466 4466  pr_addr_dbg(char *fmt1, int af, const void *addr)
4467 4467  {
4468 4468          char    buf[INET6_ADDRSTRLEN];
4469 4469  
4470 4470          if (fmt1 == NULL) {
4471 4471                  ip0dbg(("pr_addr_dbg: Wrong arguments\n"));
4472 4472                  return;
4473 4473          }
4474 4474  
4475 4475          /*
4476 4476           * This does not compare debug level and just prints
4477 4477           * out. Thus it is the responsibility of the caller
4478 4478           * to check the appropriate debug-level before calling
4479 4479           * this function.
4480 4480           */
4481 4481          if (ip_debug > 0) {
4482 4482                  printf(fmt1, inet_ntop(af, addr, buf, sizeof (buf)));
4483 4483          }
4484 4484  
4485 4485  
4486 4486  }
4487 4487  
4488 4488  
4489 4489  /*
4490 4490   * Return the length in bytes of the IPv6 headers (base header
4491 4491   * extension headers) that will be needed based on the
4492 4492   * ip_pkt_t structure passed by the caller.
4493 4493   *
4494 4494   * The returned length does not include the length of the upper level
4495 4495   * protocol (ULP) header.
4496 4496   */
4497 4497  int
4498 4498  ip_total_hdrs_len_v6(const ip_pkt_t *ipp)
4499 4499  {
4500 4500          int len;
4501 4501  
4502 4502          len = IPV6_HDR_LEN;
4503 4503  
4504 4504          /*
4505 4505           * If there's a security label here, then we ignore any hop-by-hop
4506 4506           * options the user may try to set.
4507 4507           */
4508 4508          if (ipp->ipp_fields & IPPF_LABEL_V6) {
4509 4509                  uint_t hopoptslen;
4510 4510                  /*
4511 4511                   * Note that ipp_label_len_v6 is just the option - not
4512 4512                   * the hopopts extension header. It also needs to be padded
4513 4513                   * to a multiple of 8 bytes.
4514 4514                   */
4515 4515                  ASSERT(ipp->ipp_label_len_v6 != 0);
4516 4516                  hopoptslen = ipp->ipp_label_len_v6 + sizeof (ip6_hbh_t);
4517 4517                  hopoptslen = (hopoptslen + 7)/8 * 8;
4518 4518                  len += hopoptslen;
4519 4519          } else if (ipp->ipp_fields & IPPF_HOPOPTS) {
4520 4520                  ASSERT(ipp->ipp_hopoptslen != 0);
4521 4521                  len += ipp->ipp_hopoptslen;
4522 4522          }
4523 4523  
4524 4524          /*
4525 4525           * En-route destination options
4526 4526           * Only do them if there's a routing header as well
4527 4527           */
4528 4528          if ((ipp->ipp_fields & (IPPF_RTHDRDSTOPTS|IPPF_RTHDR)) ==
4529 4529              (IPPF_RTHDRDSTOPTS|IPPF_RTHDR)) {
4530 4530                  ASSERT(ipp->ipp_rthdrdstoptslen != 0);
4531 4531                  len += ipp->ipp_rthdrdstoptslen;
4532 4532          }
4533 4533          if (ipp->ipp_fields & IPPF_RTHDR) {
4534 4534                  ASSERT(ipp->ipp_rthdrlen != 0);
4535 4535                  len += ipp->ipp_rthdrlen;
4536 4536          }
4537 4537          if (ipp->ipp_fields & IPPF_DSTOPTS) {
4538 4538                  ASSERT(ipp->ipp_dstoptslen != 0);
4539 4539                  len += ipp->ipp_dstoptslen;
4540 4540          }
4541 4541          return (len);
4542 4542  }
4543 4543  
4544 4544  /*
4545 4545   * All-purpose routine to build a header chain of an IPv6 header
4546 4546   * followed by any required extension headers and a proto header.
4547 4547   *
4548 4548   * The caller has to set the source and destination address as well as
4549 4549   * ip6_plen. The caller has to massage any routing header and compensate
4550 4550   * for the ULP pseudo-header checksum due to the source route.
4551 4551   *
4552 4552   * The extension headers will all be fully filled in.
4553 4553   */
4554 4554  void
4555 4555  ip_build_hdrs_v6(uchar_t *buf, uint_t buf_len, const ip_pkt_t *ipp,
4556 4556      uint8_t protocol, uint32_t flowinfo)
4557 4557  {
4558 4558          uint8_t *nxthdr_ptr;
4559 4559          uint8_t *cp;
4560 4560          ip6_t   *ip6h = (ip6_t *)buf;
4561 4561  
4562 4562          /* Initialize IPv6 header */
4563 4563          ip6h->ip6_vcf =
4564 4564              (IPV6_DEFAULT_VERS_AND_FLOW & IPV6_VERS_AND_FLOW_MASK) |
4565 4565              (flowinfo & ~IPV6_VERS_AND_FLOW_MASK);
4566 4566  
4567 4567          if (ipp->ipp_fields & IPPF_TCLASS) {
4568 4568                  /* Overrides the class part of flowinfo */
4569 4569                  ip6h->ip6_vcf = IPV6_TCLASS_FLOW(ip6h->ip6_vcf,
4570 4570                      ipp->ipp_tclass);
4571 4571          }
4572 4572  
4573 4573          if (ipp->ipp_fields & IPPF_HOPLIMIT)
4574 4574                  ip6h->ip6_hops = ipp->ipp_hoplimit;
4575 4575          else
4576 4576                  ip6h->ip6_hops = ipp->ipp_unicast_hops;
4577 4577  
4578 4578          if ((ipp->ipp_fields & IPPF_ADDR) &&
4579 4579              !IN6_IS_ADDR_V4MAPPED(&ipp->ipp_addr))
4580 4580                  ip6h->ip6_src = ipp->ipp_addr;
4581 4581  
4582 4582          nxthdr_ptr = (uint8_t *)&ip6h->ip6_nxt;
4583 4583          cp = (uint8_t *)&ip6h[1];
4584 4584          /*
4585 4585           * Here's where we have to start stringing together
4586 4586           * any extension headers in the right order:
4587 4587           * Hop-by-hop, destination, routing, and final destination opts.
4588 4588           */
4589 4589          /*
4590 4590           * If there's a security label here, then we ignore any hop-by-hop
4591 4591           * options the user may try to set.
4592 4592           */
4593 4593          if (ipp->ipp_fields & IPPF_LABEL_V6) {
4594 4594                  /*
4595 4595                   * Hop-by-hop options with the label.
4596 4596                   * Note that ipp_label_v6 is just the option - not
4597 4597                   * the hopopts extension header. It also needs to be padded
4598 4598                   * to a multiple of 8 bytes.
4599 4599                   */
4600 4600                  ip6_hbh_t *hbh = (ip6_hbh_t *)cp;
4601 4601                  uint_t hopoptslen;
4602 4602                  uint_t padlen;
4603 4603  
4604 4604                  padlen = ipp->ipp_label_len_v6 + sizeof (ip6_hbh_t);
4605 4605                  hopoptslen = (padlen + 7)/8 * 8;
4606 4606                  padlen = hopoptslen - padlen;
4607 4607  
4608 4608                  *nxthdr_ptr = IPPROTO_HOPOPTS;
4609 4609                  nxthdr_ptr = &hbh->ip6h_nxt;
4610 4610                  hbh->ip6h_len = hopoptslen/8 - 1;
4611 4611                  cp += sizeof (ip6_hbh_t);
4612 4612                  bcopy(ipp->ipp_label_v6, cp, ipp->ipp_label_len_v6);
4613 4613                  cp += ipp->ipp_label_len_v6;
4614 4614  
4615 4615                  ASSERT(padlen <= 7);
4616 4616                  switch (padlen) {
4617 4617                  case 0:
4618 4618                          break;
4619 4619                  case 1:
4620 4620                          cp[0] = IP6OPT_PAD1;
4621 4621                          break;
4622 4622                  default:
4623 4623                          cp[0] = IP6OPT_PADN;
4624 4624                          cp[1] = padlen - 2;
4625 4625                          bzero(&cp[2], padlen - 2);
4626 4626                          break;
4627 4627                  }
4628 4628                  cp += padlen;
4629 4629          } else if (ipp->ipp_fields & IPPF_HOPOPTS) {
4630 4630                  /* Hop-by-hop options */
4631 4631                  ip6_hbh_t *hbh = (ip6_hbh_t *)cp;
4632 4632  
4633 4633                  *nxthdr_ptr = IPPROTO_HOPOPTS;
4634 4634                  nxthdr_ptr = &hbh->ip6h_nxt;
4635 4635  
4636 4636                  bcopy(ipp->ipp_hopopts, cp, ipp->ipp_hopoptslen);
4637 4637                  cp += ipp->ipp_hopoptslen;
4638 4638          }
4639 4639          /*
4640 4640           * En-route destination options
4641 4641           * Only do them if there's a routing header as well
4642 4642           */
4643 4643          if ((ipp->ipp_fields & (IPPF_RTHDRDSTOPTS|IPPF_RTHDR)) ==
4644 4644              (IPPF_RTHDRDSTOPTS|IPPF_RTHDR)) {
4645 4645                  ip6_dest_t *dst = (ip6_dest_t *)cp;
4646 4646  
4647 4647                  *nxthdr_ptr = IPPROTO_DSTOPTS;
4648 4648                  nxthdr_ptr = &dst->ip6d_nxt;
4649 4649  
4650 4650                  bcopy(ipp->ipp_rthdrdstopts, cp, ipp->ipp_rthdrdstoptslen);
4651 4651                  cp += ipp->ipp_rthdrdstoptslen;
4652 4652          }
4653 4653          /*
4654 4654           * Routing header next
4655 4655           */
4656 4656          if (ipp->ipp_fields & IPPF_RTHDR) {
4657 4657                  ip6_rthdr_t *rt = (ip6_rthdr_t *)cp;
4658 4658  
4659 4659                  *nxthdr_ptr = IPPROTO_ROUTING;
4660 4660                  nxthdr_ptr = &rt->ip6r_nxt;
4661 4661  
4662 4662                  bcopy(ipp->ipp_rthdr, cp, ipp->ipp_rthdrlen);
4663 4663                  cp += ipp->ipp_rthdrlen;
4664 4664          }
4665 4665          /*
4666 4666           * Do ultimate destination options
4667 4667           */
4668 4668          if (ipp->ipp_fields & IPPF_DSTOPTS) {
4669 4669                  ip6_dest_t *dest = (ip6_dest_t *)cp;
4670 4670  
4671 4671                  *nxthdr_ptr = IPPROTO_DSTOPTS;
4672 4672                  nxthdr_ptr = &dest->ip6d_nxt;
4673 4673  
4674 4674                  bcopy(ipp->ipp_dstopts, cp, ipp->ipp_dstoptslen);
4675 4675                  cp += ipp->ipp_dstoptslen;
4676 4676          }
4677 4677          /*
4678 4678           * Now set the last header pointer to the proto passed in
4679 4679           */
4680 4680          *nxthdr_ptr = protocol;
4681 4681          ASSERT((int)(cp - buf) == buf_len);
4682 4682  }
4683 4683  
4684 4684  /*
4685 4685   * Return a pointer to the routing header extension header
4686 4686   * in the IPv6 header(s) chain passed in.
4687 4687   * If none found, return NULL
4688 4688   * Assumes that all extension headers are in same mblk as the v6 header
4689 4689   */
4690 4690  ip6_rthdr_t *
4691 4691  ip_find_rthdr_v6(ip6_t *ip6h, uint8_t *endptr)
4692 4692  {
4693 4693          ip6_dest_t      *desthdr;
4694 4694          ip6_frag_t      *fraghdr;
4695 4695          uint_t          hdrlen;
4696 4696          uint8_t         nexthdr;
4697 4697          uint8_t         *ptr = (uint8_t *)&ip6h[1];
4698 4698  
4699 4699          if (ip6h->ip6_nxt == IPPROTO_ROUTING)
4700 4700                  return ((ip6_rthdr_t *)ptr);
4701 4701  
4702 4702          /*
4703 4703           * The routing header will precede all extension headers
4704 4704           * other than the hop-by-hop and destination options
4705 4705           * extension headers, so if we see anything other than those,
4706 4706           * we're done and didn't find it.
4707 4707           * We could see a destination options header alone but no
4708 4708           * routing header, in which case we'll return NULL as soon as
4709 4709           * we see anything after that.
4710 4710           * Hop-by-hop and destination option headers are identical,
4711 4711           * so we can use either one we want as a template.
4712 4712           */
4713 4713          nexthdr = ip6h->ip6_nxt;
4714 4714          while (ptr < endptr) {
4715 4715                  /* Is there enough left for len + nexthdr? */
4716 4716                  if (ptr + MIN_EHDR_LEN > endptr)
4717 4717                          return (NULL);
4718 4718  
4719 4719                  switch (nexthdr) {
4720 4720                  case IPPROTO_HOPOPTS:
4721 4721                  case IPPROTO_DSTOPTS:
4722 4722                          /* Assumes the headers are identical for hbh and dst */
4723 4723                          desthdr = (ip6_dest_t *)ptr;
4724 4724                          hdrlen = 8 * (desthdr->ip6d_len + 1);
4725 4725                          nexthdr = desthdr->ip6d_nxt;
4726 4726                          break;
4727 4727  
4728 4728                  case IPPROTO_ROUTING:
4729 4729                          return ((ip6_rthdr_t *)ptr);
4730 4730  
4731 4731                  case IPPROTO_FRAGMENT:
4732 4732                          fraghdr = (ip6_frag_t *)ptr;
4733 4733                          hdrlen = sizeof (ip6_frag_t);
4734 4734                          nexthdr = fraghdr->ip6f_nxt;
4735 4735                          break;
4736 4736  
4737 4737                  default:
4738 4738                          return (NULL);
4739 4739                  }
4740 4740                  ptr += hdrlen;
4741 4741          }
4742 4742          return (NULL);
4743 4743  }
4744 4744  
4745 4745  /*
4746 4746   * Called for source-routed packets originating on this node.
4747 4747   * Manipulates the original routing header by moving every entry up
4748 4748   * one slot, placing the first entry in the v6 header's v6_dst field,
4749 4749   * and placing the ultimate destination in the routing header's last
4750 4750   * slot.
4751 4751   *
4752 4752   * Returns the checksum diference between the ultimate destination
4753 4753   * (last hop in the routing header when the packet is sent) and
4754 4754   * the first hop (ip6_dst when the packet is sent)
4755 4755   */
4756 4756  /* ARGSUSED2 */
4757 4757  uint32_t
4758 4758  ip_massage_options_v6(ip6_t *ip6h, ip6_rthdr_t *rth, netstack_t *ns)
4759 4759  {
4760 4760          uint_t          numaddr;
4761 4761          uint_t          i;
4762 4762          in6_addr_t      *addrptr;
4763 4763          in6_addr_t      tmp;
4764 4764          ip6_rthdr0_t    *rthdr = (ip6_rthdr0_t *)rth;
4765 4765          uint32_t        cksm;
4766 4766          uint32_t        addrsum = 0;
4767 4767          uint16_t        *ptr;
4768 4768  
4769 4769          /*
4770 4770           * Perform any processing needed for source routing.
4771 4771           * We know that all extension headers will be in the same mblk
4772 4772           * as the IPv6 header.
4773 4773           */
4774 4774  
4775 4775          /*
4776 4776           * If no segments left in header, or the header length field is zero,
4777 4777           * don't move hop addresses around;
4778 4778           * Checksum difference is zero.
4779 4779           */
4780 4780          if ((rthdr->ip6r0_segleft == 0) || (rthdr->ip6r0_len == 0))
4781 4781                  return (0);
4782 4782  
4783 4783          ptr = (uint16_t *)&ip6h->ip6_dst;
4784 4784          cksm = 0;
4785 4785          for (i = 0; i < (sizeof (in6_addr_t) / sizeof (uint16_t)); i++) {
4786 4786                  cksm += ptr[i];
4787 4787          }
4788 4788          cksm = (cksm & 0xFFFF) + (cksm >> 16);
4789 4789  
4790 4790          /*
4791 4791           * Here's where the fun begins - we have to
4792 4792           * move all addresses up one spot, take the
4793 4793           * first hop and make it our first ip6_dst,
4794 4794           * and place the ultimate destination in the
4795 4795           * newly-opened last slot.
4796 4796           */
4797 4797          addrptr = (in6_addr_t *)((char *)rthdr + sizeof (*rthdr));
4798 4798          numaddr = rthdr->ip6r0_len / 2;
4799 4799          tmp = *addrptr;
4800 4800          for (i = 0; i < (numaddr - 1); addrptr++, i++) {
4801 4801                  *addrptr = addrptr[1];
4802 4802          }
4803 4803          *addrptr = ip6h->ip6_dst;
4804 4804          ip6h->ip6_dst = tmp;
4805 4805  
4806 4806          /*
4807 4807           * From the checksummed ultimate destination subtract the checksummed
4808 4808           * current ip6_dst (the first hop address). Return that number.
4809 4809           * (In the v4 case, the second part of this is done in each routine
4810 4810           *  that calls ip_massage_options(). We do it all in this one place
4811 4811           *  for v6).
4812 4812           */
4813 4813          ptr = (uint16_t *)&ip6h->ip6_dst;
4814 4814          for (i = 0; i < (sizeof (in6_addr_t) / sizeof (uint16_t)); i++) {
4815 4815                  addrsum += ptr[i];
4816 4816          }
4817 4817          cksm -= ((addrsum >> 16) + (addrsum & 0xFFFF));
4818 4818          if ((int)cksm < 0)
4819 4819                  cksm--;
4820 4820          cksm = (cksm & 0xFFFF) + (cksm >> 16);
4821 4821  
4822 4822          return (cksm);
4823 4823  }
4824 4824  
4825 4825  void
4826 4826  *ip6_kstat_init(netstackid_t stackid, ip6_stat_t *ip6_statisticsp)
4827 4827  {
4828 4828          kstat_t *ksp;
4829 4829  
4830 4830          ip6_stat_t template = {
4831 4831                  { "ip6_udp_fannorm",    KSTAT_DATA_UINT64 },
4832 4832                  { "ip6_udp_fanmb",      KSTAT_DATA_UINT64 },
4833 4833                  { "ip6_recv_pullup",            KSTAT_DATA_UINT64 },
4834 4834                  { "ip6_db_ref",                 KSTAT_DATA_UINT64 },
4835 4835                  { "ip6_notaligned",             KSTAT_DATA_UINT64 },
4836 4836                  { "ip6_multimblk",              KSTAT_DATA_UINT64 },
4837 4837                  { "ipsec_proto_ahesp",          KSTAT_DATA_UINT64 },
4838 4838                  { "ip6_out_sw_cksum",                   KSTAT_DATA_UINT64 },
4839 4839                  { "ip6_out_sw_cksum_bytes",             KSTAT_DATA_UINT64 },
4840 4840                  { "ip6_in_sw_cksum",                    KSTAT_DATA_UINT64 },
4841 4841                  { "ip6_tcp_in_full_hw_cksum_err",       KSTAT_DATA_UINT64 },
4842 4842                  { "ip6_tcp_in_part_hw_cksum_err",       KSTAT_DATA_UINT64 },
4843 4843                  { "ip6_tcp_in_sw_cksum_err",            KSTAT_DATA_UINT64 },
4844 4844                  { "ip6_udp_in_full_hw_cksum_err",       KSTAT_DATA_UINT64 },
4845 4845                  { "ip6_udp_in_part_hw_cksum_err",       KSTAT_DATA_UINT64 },
4846 4846                  { "ip6_udp_in_sw_cksum_err",            KSTAT_DATA_UINT64 },
4847 4847          };
4848 4848          ksp = kstat_create_netstack("ip", 0, "ip6stat", "net",
4849 4849              KSTAT_TYPE_NAMED, sizeof (template) / sizeof (kstat_named_t),
4850 4850              KSTAT_FLAG_VIRTUAL, stackid);
4851 4851  
4852 4852          if (ksp == NULL)
4853 4853                  return (NULL);
4854 4854  
4855 4855          bcopy(&template, ip6_statisticsp, sizeof (template));
4856 4856          ksp->ks_data = (void *)ip6_statisticsp;
4857 4857          ksp->ks_private = (void *)(uintptr_t)stackid;
4858 4858  
4859 4859          kstat_install(ksp);
4860 4860          return (ksp);
4861 4861  }
4862 4862  
4863 4863  void
4864 4864  ip6_kstat_fini(netstackid_t stackid, kstat_t *ksp)
4865 4865  {
4866 4866          if (ksp != NULL) {
4867 4867                  ASSERT(stackid == (netstackid_t)(uintptr_t)ksp->ks_private);
4868 4868                  kstat_delete_netstack(ksp, stackid);
4869 4869          }
4870 4870  }
4871 4871  
4872 4872  /*
4873 4873   * The following two functions set and get the value for the
4874 4874   * IPV6_SRC_PREFERENCES socket option.
4875 4875   */
4876 4876  int
4877 4877  ip6_set_src_preferences(ip_xmit_attr_t *ixa, uint32_t prefs)
4878 4878  {
4879 4879          /*
4880 4880           * We only support preferences that are covered by
4881 4881           * IPV6_PREFER_SRC_MASK.
4882 4882           */
4883 4883          if (prefs & ~IPV6_PREFER_SRC_MASK)
4884 4884                  return (EINVAL);
4885 4885  
4886 4886          /*
4887 4887           * Look for conflicting preferences or default preferences.  If
4888 4888           * both bits of a related pair are clear, the application wants the
4889 4889           * system's default value for that pair.  Both bits in a pair can't
4890 4890           * be set.
4891 4891           */
4892 4892          if ((prefs & IPV6_PREFER_SRC_MIPMASK) == 0) {
4893 4893                  prefs |= IPV6_PREFER_SRC_MIPDEFAULT;
4894 4894          } else if ((prefs & IPV6_PREFER_SRC_MIPMASK) ==
4895 4895              IPV6_PREFER_SRC_MIPMASK) {
4896 4896                  return (EINVAL);
4897 4897          }
4898 4898          if ((prefs & IPV6_PREFER_SRC_TMPMASK) == 0) {
4899 4899                  prefs |= IPV6_PREFER_SRC_TMPDEFAULT;
4900 4900          } else if ((prefs & IPV6_PREFER_SRC_TMPMASK) ==
4901 4901              IPV6_PREFER_SRC_TMPMASK) {
4902 4902                  return (EINVAL);
4903 4903          }
4904 4904          if ((prefs & IPV6_PREFER_SRC_CGAMASK) == 0) {
4905 4905                  prefs |= IPV6_PREFER_SRC_CGADEFAULT;
4906 4906          } else if ((prefs & IPV6_PREFER_SRC_CGAMASK) ==
4907 4907              IPV6_PREFER_SRC_CGAMASK) {
4908 4908                  return (EINVAL);
4909 4909          }
4910 4910  
4911 4911          ixa->ixa_src_preferences = prefs;
4912 4912          return (0);
4913 4913  }
4914 4914  
4915 4915  size_t
4916 4916  ip6_get_src_preferences(ip_xmit_attr_t *ixa, uint32_t *val)
4917 4917  {
4918 4918          *val = ixa->ixa_src_preferences;
4919 4919          return (sizeof (ixa->ixa_src_preferences));
4920 4920  }
4921 4921  
4922 4922  /*
4923 4923   * Get the size of the IP options (including the IP headers size)
4924 4924   * without including the AH header's size. If till_ah is B_FALSE,
4925 4925   * and if AH header is present, dest options beyond AH header will
4926 4926   * also be included in the returned size.
4927 4927   */
4928 4928  int
4929 4929  ipsec_ah_get_hdr_size_v6(mblk_t *mp, boolean_t till_ah)
4930 4930  {
4931 4931          ip6_t *ip6h;
4932 4932          uint8_t nexthdr;
4933 4933          uint8_t *whereptr;
4934 4934          ip6_hbh_t *hbhhdr;
4935 4935          ip6_dest_t *dsthdr;
4936 4936          ip6_rthdr_t *rthdr;
4937 4937          int ehdrlen;
4938 4938          int size;
4939 4939          ah_t *ah;
4940 4940  
4941 4941          ip6h = (ip6_t *)mp->b_rptr;
4942 4942          size = IPV6_HDR_LEN;
4943 4943          nexthdr = ip6h->ip6_nxt;
4944 4944          whereptr = (uint8_t *)&ip6h[1];
4945 4945          for (;;) {
4946 4946                  /* Assume IP has already stripped it */
4947 4947                  ASSERT(nexthdr != IPPROTO_FRAGMENT);
4948 4948                  switch (nexthdr) {
4949 4949                  case IPPROTO_HOPOPTS:
4950 4950                          hbhhdr = (ip6_hbh_t *)whereptr;
4951 4951                          nexthdr = hbhhdr->ip6h_nxt;
4952 4952                          ehdrlen = 8 * (hbhhdr->ip6h_len + 1);
4953 4953                          break;
4954 4954                  case IPPROTO_DSTOPTS:
4955 4955                          dsthdr = (ip6_dest_t *)whereptr;
4956 4956                          nexthdr = dsthdr->ip6d_nxt;
4957 4957                          ehdrlen = 8 * (dsthdr->ip6d_len + 1);
4958 4958                          break;
4959 4959                  case IPPROTO_ROUTING:
4960 4960                          rthdr = (ip6_rthdr_t *)whereptr;
4961 4961                          nexthdr = rthdr->ip6r_nxt;
4962 4962                          ehdrlen = 8 * (rthdr->ip6r_len + 1);
4963 4963                          break;
4964 4964                  default :
4965 4965                          if (till_ah) {
4966 4966                                  ASSERT(nexthdr == IPPROTO_AH);
4967 4967                                  return (size);
4968 4968                          }
4969 4969                          /*
4970 4970                           * If we don't have a AH header to traverse,
4971 4971                           * return now. This happens normally for
4972 4972                           * outbound datagrams where we have not inserted
4973 4973                           * the AH header.
4974 4974                           */
4975 4975                          if (nexthdr != IPPROTO_AH) {
4976 4976                                  return (size);
4977 4977                          }
4978 4978  
4979 4979                          /*
4980 4980                           * We don't include the AH header's size
4981 4981                           * to be symmetrical with other cases where
4982 4982                           * we either don't have a AH header (outbound)
4983 4983                           * or peek into the AH header yet (inbound and
4984 4984                           * not pulled up yet).
4985 4985                           */
4986 4986                          ah = (ah_t *)whereptr;
4987 4987                          nexthdr = ah->ah_nexthdr;
4988 4988                          ehdrlen = (ah->ah_length << 2) + 8;
4989 4989  
4990 4990                          if (nexthdr == IPPROTO_DSTOPTS) {
4991 4991                                  if (whereptr + ehdrlen >= mp->b_wptr) {
4992 4992                                          /*
4993 4993                                           * The destination options header
4994 4994                                           * is not part of the first mblk.
4995 4995                                           */
4996 4996                                          whereptr = mp->b_cont->b_rptr;
4997 4997                                  } else {
4998 4998                                          whereptr += ehdrlen;
4999 4999                                  }
5000 5000  
5001 5001                                  dsthdr = (ip6_dest_t *)whereptr;
5002 5002                                  ehdrlen = 8 * (dsthdr->ip6d_len + 1);
5003 5003                                  size += ehdrlen;
5004 5004                          }
5005 5005                          return (size);
5006 5006                  }
5007 5007                  whereptr += ehdrlen;
5008 5008                  size += ehdrlen;
5009 5009          }
5010 5010  }
5011 5011  
5012 5012  /*
5013 5013   * Utility routine that checks if `v6srcp' is a valid address on underlying
5014 5014   * interface `ill'.  If `ipifp' is non-NULL, it's set to a held ipif
5015 5015   * associated with `v6srcp' on success.  NOTE: if this is not called from
5016 5016   * inside the IPSQ (ill_g_lock is not held), `ill' may be removed from the
5017 5017   * group during or after this lookup.
5018 5018   */
5019 5019  boolean_t
5020 5020  ipif_lookup_testaddr_v6(ill_t *ill, const in6_addr_t *v6srcp, ipif_t **ipifp)
5021 5021  {
5022 5022          ipif_t *ipif;
5023 5023  
5024 5024  
5025 5025          ipif = ipif_lookup_addr_exact_v6(v6srcp, ill, ill->ill_ipst);
5026 5026          if (ipif != NULL) {
5027 5027                  if (ipifp != NULL)
5028 5028                          *ipifp = ipif;
5029 5029                  else
5030 5030                          ipif_refrele(ipif);
5031 5031                  return (B_TRUE);
5032 5032          }
5033 5033  
5034 5034          if (ip_debug > 2) {
5035 5035                  pr_addr_dbg("ipif_lookup_testaddr_v6: cannot find ipif for "
5036 5036                      "src %s\n", AF_INET6, v6srcp);
5037 5037          }
5038 5038          return (B_FALSE);
5039 5039  }
  
    | 
      ↓ open down ↓ | 
    4294 lines elided | 
    
      ↑ open up ↑ | 
  
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX