Print this page
    
7819 IPv6 Packet and MTU bug
    
      
        | Split | 
	Close | 
      
      | Expand all | 
      | Collapse all | 
    
    
          --- old/usr/src/uts/common/inet/ip/ip6_output.c
          +++ new/usr/src/uts/common/inet/ip/ip6_output.c
   1    1  /*
   2    2   * CDDL HEADER START
   3    3   *
   4    4   * The contents of this file are subject to the terms of the
   5    5   * Common Development and Distribution License (the "License").
   6    6   * You may not use this file except in compliance with the License.
   7    7   *
   8    8   * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9    9   * or http://www.opensolaris.org/os/licensing.
  10   10   * See the License for the specific language governing permissions
  11   11   * and limitations under the License.
  12   12   *
  13   13   * When distributing Covered Code, include this CDDL HEADER in each
  14   14   * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  
    | 
      ↓ open down ↓ | 
    14 lines elided | 
    
      ↑ open up ↑ | 
  
  15   15   * If applicable, add the following below this CDDL HEADER, with the
  16   16   * fields enclosed by brackets "[]" replaced with your own identifying
  17   17   * information: Portions Copyright [yyyy] [name of copyright owner]
  18   18   *
  19   19   * CDDL HEADER END
  20   20   */
  21   21  
  22   22  /*
  23   23   * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
  24   24   * Use is subject to license terms.
       25 + * Copyright 2017 OmniTI Computer Consulting, Inc. All rights reserved.
  25   26   */
  26   27  /* Copyright (c) 1990 Mentat Inc. */
  27   28  
  28   29  #include <sys/types.h>
  29   30  #include <sys/stream.h>
  30   31  #include <sys/strsubr.h>
  31   32  #include <sys/dlpi.h>
  32   33  #include <sys/strsun.h>
  33   34  #include <sys/zone.h>
  34   35  #include <sys/ddi.h>
  35   36  #include <sys/sunddi.h>
  36   37  #include <sys/cmn_err.h>
  37   38  #include <sys/debug.h>
  38   39  #include <sys/atomic.h>
  39   40  
  40   41  #include <sys/systm.h>
  41   42  #include <sys/param.h>
  42   43  #include <sys/kmem.h>
  43   44  #include <sys/sdt.h>
  44   45  #include <sys/socket.h>
  45   46  #include <sys/mac.h>
  46   47  #include <net/if.h>
  47   48  #include <net/if_arp.h>
  48   49  #include <net/route.h>
  49   50  #include <sys/sockio.h>
  50   51  #include <netinet/in.h>
  51   52  #include <net/if_dl.h>
  52   53  
  53   54  #include <inet/common.h>
  54   55  #include <inet/mi.h>
  55   56  #include <inet/mib2.h>
  56   57  #include <inet/nd.h>
  57   58  #include <inet/arp.h>
  58   59  #include <inet/snmpcom.h>
  59   60  #include <inet/kstatcom.h>
  60   61  
  61   62  #include <netinet/igmp_var.h>
  62   63  #include <netinet/ip6.h>
  63   64  #include <netinet/icmp6.h>
  64   65  #include <netinet/sctp.h>
  65   66  
  66   67  #include <inet/ip.h>
  67   68  #include <inet/ip_impl.h>
  68   69  #include <inet/ip6.h>
  69   70  #include <inet/ip6_asp.h>
  70   71  #include <inet/tcp.h>
  71   72  #include <inet/ip_multi.h>
  72   73  #include <inet/ip_if.h>
  73   74  #include <inet/ip_ire.h>
  74   75  #include <inet/ip_ftable.h>
  75   76  #include <inet/ip_rts.h>
  76   77  #include <inet/optcom.h>
  77   78  #include <inet/ip_ndp.h>
  78   79  #include <inet/ip_listutils.h>
  79   80  #include <netinet/igmp.h>
  80   81  #include <netinet/ip_mroute.h>
  81   82  #include <inet/ipp_common.h>
  82   83  
  83   84  #include <net/pfkeyv2.h>
  84   85  #include <inet/sadb.h>
  85   86  #include <inet/ipsec_impl.h>
  86   87  #include <inet/ipdrop.h>
  87   88  #include <inet/ip_netinfo.h>
  88   89  
  89   90  #include <sys/pattr.h>
  90   91  #include <inet/ipclassifier.h>
  91   92  #include <inet/sctp_ip.h>
  92   93  #include <inet/sctp/sctp_impl.h>
  93   94  #include <inet/udp_impl.h>
  94   95  #include <sys/sunddi.h>
  95   96  
  96   97  #include <sys/tsol/label.h>
  97   98  #include <sys/tsol/tnet.h>
  98   99  
  99  100  #ifdef  DEBUG
 100  101  extern boolean_t skip_sctp_cksum;
 101  102  #endif
 102  103  
 103  104  int
 104  105  ip_output_simple_v6(mblk_t *mp, ip_xmit_attr_t *ixa)
 105  106  {
 106  107          ip6_t           *ip6h;
 107  108          in6_addr_t      firsthop; /* In IP header */
 108  109          in6_addr_t      dst;    /* End of source route, or ip6_dst if none */
 109  110          ire_t           *ire;
 110  111          in6_addr_t      setsrc;
 111  112          int             error;
 112  113          ill_t           *ill = NULL;
 113  114          dce_t           *dce = NULL;
 114  115          nce_t           *nce;
 115  116          iaflags_t       ixaflags = ixa->ixa_flags;
 116  117          ip_stack_t      *ipst = ixa->ixa_ipst;
 117  118          uint8_t         *nexthdrp;
 118  119          boolean_t       repeat = B_FALSE;
 119  120          boolean_t       multirt = B_FALSE;
 120  121          uint_t          ifindex;
 121  122          int64_t         now;
 122  123  
 123  124          ip6h = (ip6_t *)mp->b_rptr;
 124  125          ASSERT(IPH_HDR_VERSION(ip6h) == IPV6_VERSION);
 125  126  
 126  127          ASSERT(ixa->ixa_nce == NULL);
 127  128  
 128  129          ixa->ixa_pktlen = ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN;
 129  130          ASSERT(ixa->ixa_pktlen == msgdsize(mp));
 130  131          if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &ixa->ixa_ip_hdr_length,
 131  132              &nexthdrp)) {
 132  133                  /* Malformed packet */
 133  134                  BUMP_MIB(&ipst->ips_ip_mib, ipIfStatsHCOutRequests);
 134  135                  BUMP_MIB(&ipst->ips_ip_mib, ipIfStatsOutDiscards);
 135  136                  ip_drop_output("ipIfStatsOutDiscards", mp, NULL);
 136  137                  freemsg(mp);
 137  138                  return (EINVAL);
 138  139          }
 139  140          ixa->ixa_protocol = *nexthdrp;
 140  141  
 141  142          /*
 142  143           * Assumes that source routed packets have already been massaged by
 143  144           * the ULP (ip_massage_options_v6) and as a result ip6_dst is the next
 144  145           * hop in the source route. The final destination is used for IPsec
 145  146           * policy and DCE lookup.
 146  147           */
 147  148          firsthop = ip6h->ip6_dst;
 148  149          dst = ip_get_dst_v6(ip6h, mp, NULL);
 149  150  
 150  151  repeat_ire:
 151  152          error = 0;
 152  153          setsrc = ipv6_all_zeros;
 153  154          ire = ip_select_route_v6(&firsthop, ip6h->ip6_src, ixa, NULL, &setsrc,
 154  155              &error, &multirt);
 155  156          ASSERT(ire != NULL);    /* IRE_NOROUTE if none found */
 156  157          if (error != 0) {
 157  158                  BUMP_MIB(&ipst->ips_ip_mib, ipIfStatsHCOutRequests);
 158  159                  BUMP_MIB(&ipst->ips_ip_mib, ipIfStatsOutDiscards);
 159  160                  ip_drop_output("ipIfStatsOutDiscards", mp, NULL);
 160  161                  freemsg(mp);
 161  162                  goto done;
 162  163          }
 163  164  
 164  165          if (ire->ire_flags & (RTF_BLACKHOLE|RTF_REJECT)) {
 165  166                  /* ire_ill might be NULL hence need to skip some code */
 166  167                  if (ixaflags & IXAF_SET_SOURCE)
 167  168                          ip6h->ip6_src = ipv6_loopback;
 168  169                  ixa->ixa_fragsize = IP_MAXPACKET;
 169  170                  ire->ire_ob_pkt_count++;
 170  171                  BUMP_MIB(&ipst->ips_ip_mib, ipIfStatsHCOutRequests);
 171  172                  /* No dce yet; use default one */
 172  173                  error = (ire->ire_sendfn)(ire, mp, ip6h, ixa,
 173  174                      &ipst->ips_dce_default->dce_ident);
 174  175                  goto done;
 175  176          }
 176  177  
 177  178          /* Note that ip6_dst is only used for IRE_MULTICAST */
 178  179          nce = ire_to_nce(ire, INADDR_ANY, &ip6h->ip6_dst);
 179  180          if (nce == NULL) {
 180  181                  /* Allocation failure? */
 181  182                  ip_drop_output("ire_to_nce", mp, ill);
 182  183                  freemsg(mp);
 183  184                  error = ENOBUFS;
 184  185                  goto done;
 185  186          }
 186  187          if (nce->nce_is_condemned) {
 187  188                  nce_t *nce1;
 188  189  
 189  190                  nce1 = ire_handle_condemned_nce(nce, ire, NULL, ip6h, B_TRUE);
 190  191                  nce_refrele(nce);
 191  192                  if (nce1 == NULL) {
 192  193                          if (!repeat) {
 193  194                                  /* Try finding a better IRE */
 194  195                                  repeat = B_TRUE;
 195  196                                  ire_refrele(ire);
 196  197                                  goto repeat_ire;
 197  198                          }
 198  199                          /* Tried twice - drop packet */
 199  200                          BUMP_MIB(&ipst->ips_ip_mib, ipIfStatsOutDiscards);
 200  201                          ip_drop_output("No nce", mp, ill);
 201  202                          freemsg(mp);
 202  203                          error = ENOBUFS;
 203  204                          goto done;
 204  205                  }
 205  206                  nce = nce1;
 206  207          }
 207  208          /*
 208  209           * For multicast with multirt we have a flag passed back from
 209  210           * ire_lookup_multi_ill_v6 since we don't have an IRE for each
 210  211           * possible multicast address.
 211  212           * We also need a flag for multicast since we can't check
 212  213           * whether RTF_MULTIRT is set in ixa_ire for multicast.
 213  214           */
 214  215          if (multirt) {
 215  216                  ixa->ixa_postfragfn = ip_postfrag_multirt_v6;
 216  217                  ixa->ixa_flags |= IXAF_MULTIRT_MULTICAST;
 217  218          } else {
 218  219                  ixa->ixa_postfragfn = ire->ire_postfragfn;
 219  220                  ixa->ixa_flags &= ~IXAF_MULTIRT_MULTICAST;
 220  221          }
 221  222          ASSERT(ixa->ixa_nce == NULL);
 222  223          ixa->ixa_nce = nce;
 223  224  
 224  225          /*
 225  226           * Check for a dce_t with a path mtu.
 226  227           */
 227  228          ifindex = 0;
 228  229          if (IN6_IS_ADDR_LINKSCOPE(&dst))
 229  230                  ifindex = nce->nce_common->ncec_ill->ill_phyint->phyint_ifindex;
 230  231  
 231  232          dce = dce_lookup_v6(&dst, ifindex, ipst, NULL);
 232  233          ASSERT(dce != NULL);
 233  234  
 234  235          if (!(ixaflags & IXAF_PMTU_DISCOVERY)) {
 235  236                  ixa->ixa_fragsize = IPV6_MIN_MTU;
 236  237          } else if (dce->dce_flags & DCEF_PMTU) {
 237  238                  /*
  
    | 
      ↓ open down ↓ | 
    203 lines elided | 
    
      ↑ open up ↑ | 
  
 238  239                   * To avoid a periodic timer to increase the path MTU we
 239  240                   * look at dce_last_change_time each time we send a packet.
 240  241                   */
 241  242                  now = ddi_get_lbolt64();
 242  243                  if (TICK_TO_SEC(now) - dce->dce_last_change_time >
 243  244                      ipst->ips_ip_pathmtu_interval) {
 244  245                          /*
 245  246                           * Older than 20 minutes. Drop the path MTU information.
 246  247                           */
 247  248                          mutex_enter(&dce->dce_lock);
 248      -                        dce->dce_flags &= ~(DCEF_PMTU|DCEF_TOO_SMALL_PMTU);
      249 +                        dce->dce_flags &= ~DCEF_PMTU;
 249  250                          dce->dce_last_change_time = TICK_TO_SEC(now);
 250  251                          mutex_exit(&dce->dce_lock);
 251  252                          dce_increment_generation(dce);
 252  253                          ixa->ixa_fragsize = ip_get_base_mtu(nce->nce_ill, ire);
 253  254                  } else {
 254  255                          uint_t fragsize;
 255  256  
 256  257                          fragsize = ip_get_base_mtu(nce->nce_ill, ire);
 257  258                          if (fragsize > dce->dce_pmtu)
 258  259                                  fragsize = dce->dce_pmtu;
 259  260                          ixa->ixa_fragsize = fragsize;
 260  261                  }
 261  262          } else {
 262  263                  ixa->ixa_fragsize = ip_get_base_mtu(nce->nce_ill, ire);
 263  264          }
 264  265  
 265  266          /*
 266  267           * We use use ire_nexthop_ill (and not ncec_ill) to avoid the under ipmp
 267  268           * interface for source address selection.
 268  269           */
 269  270          ill = ire_nexthop_ill(ire);
 270  271  
 271  272          if (ixaflags & IXAF_SET_SOURCE) {
 272  273                  in6_addr_t      src;
 273  274  
 274  275                  /*
 275  276                   * We use the final destination to get
 276  277                   * correct selection for source routed packets
 277  278                   */
 278  279  
 279  280                  /* If unreachable we have no ill but need some source */
 280  281                  if (ill == NULL) {
 281  282                          src = ipv6_loopback;
 282  283                          error = 0;
 283  284                  } else {
 284  285                          error = ip_select_source_v6(ill, &setsrc, &dst,
 285  286                              ixa->ixa_zoneid, ipst, B_FALSE,
 286  287                              ixa->ixa_src_preferences, &src, NULL, NULL);
 287  288                  }
 288  289                  if (error != 0) {
 289  290                          BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCOutRequests);
 290  291                          BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards);
 291  292                          ip_drop_output("ipIfStatsOutDiscards - no source",
 292  293                              mp, ill);
 293  294                          freemsg(mp);
 294  295                          goto done;
 295  296                  }
 296  297                  ip6h->ip6_src = src;
 297  298          } else if (ixaflags & IXAF_VERIFY_SOURCE) {
 298  299                  /* Check if the IP source is assigned to the host. */
 299  300                  if (!ip_verify_src(mp, ixa, NULL)) {
 300  301                          /* Don't send a packet with a source that isn't ours */
 301  302                          BUMP_MIB(&ipst->ips_ip_mib, ipIfStatsHCOutRequests);
 302  303                          BUMP_MIB(&ipst->ips_ip_mib, ipIfStatsOutDiscards);
 303  304                          ip_drop_output("ipIfStatsOutDiscards - invalid source",
 304  305                              mp, ill);
 305  306                          freemsg(mp);
 306  307                          error = EADDRNOTAVAIL;
 307  308                          goto done;
 308  309                  }
 309  310          }
 310  311  
 311  312          /*
 312  313           * Check against global IPsec policy to set the AH/ESP attributes.
 313  314           * IPsec will set IXAF_IPSEC_* and ixa_ipsec_* as appropriate.
 314  315           */
 315  316          if (!(ixaflags & (IXAF_NO_IPSEC|IXAF_IPSEC_SECURE))) {
 316  317                  ASSERT(ixa->ixa_ipsec_policy == NULL);
 317  318                  mp = ip_output_attach_policy(mp, NULL, ip6h, NULL, ixa);
 318  319                  if (mp == NULL) {
 319  320                          /* MIB and ip_drop_packet already done */
 320  321                          return (EHOSTUNREACH);  /* IPsec policy failure */
 321  322                  }
 322  323          }
 323  324  
 324  325          if (ill != NULL) {
 325  326                  BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCOutRequests);
 326  327          } else {
 327  328                  BUMP_MIB(&ipst->ips_ip_mib, ipIfStatsHCOutRequests);
 328  329          }
 329  330  
 330  331          /*
 331  332           * We update the statistics on the most specific IRE i.e., the first
 332  333           * one we found.
 333  334           * We don't have an IRE when we fragment, hence ire_ob_pkt_count
 334  335           * can only count the use prior to fragmentation. However the MIB
 335  336           * counters on the ill will be incremented in post fragmentation.
 336  337           */
 337  338          ire->ire_ob_pkt_count++;
 338  339  
 339  340          /*
 340  341           * Based on ire_type and ire_flags call one of:
 341  342           *      ire_send_local_v6 - for IRE_LOCAL and IRE_LOOPBACK
 342  343           *      ire_send_multirt_v6 - if RTF_MULTIRT
 343  344           *      ire_send_noroute_v6 - if RTF_REJECT or RTF_BLACHOLE
 344  345           *      ire_send_multicast_v6 - for IRE_MULTICAST
 345  346           *      ire_send_wire_v6 - for the rest.
 346  347           */
 347  348          error = (ire->ire_sendfn)(ire, mp, ip6h, ixa, &dce->dce_ident);
 348  349  done:
 349  350          ire_refrele(ire);
 350  351          if (dce != NULL)
 351  352                  dce_refrele(dce);
 352  353          if (ill != NULL)
 353  354                  ill_refrele(ill);
 354  355          if (ixa->ixa_nce != NULL)
 355  356                  nce_refrele(ixa->ixa_nce);
 356  357          ixa->ixa_nce = NULL;
 357  358          return (error);
 358  359  }
 359  360  
 360  361  /*
 361  362   * ire_sendfn() functions.
 362  363   * These functions use the following xmit_attr:
 363  364   *  - ixa_fragsize - read to determine whether or not to fragment
 364  365   *  - IXAF_IPSEC_SECURE - to determine whether or not to invoke IPsec
 365  366   *  - ixa_ipsec_*  are used inside IPsec
 366  367   *  - IXAF_LOOPBACK_COPY - for multicast
 367  368   */
 368  369  
 369  370  
 370  371  /*
 371  372   * ire_sendfn for IRE_LOCAL and IRE_LOOPBACK
 372  373   *
 373  374   * The checks for restrict_interzone_loopback are done in ire_route_recursive.
 374  375   */
 375  376  /* ARGSUSED4 */
 376  377  int
 377  378  ire_send_local_v6(ire_t *ire, mblk_t *mp, void *iph_arg,
 378  379      ip_xmit_attr_t *ixa, uint32_t *identp)
 379  380  {
 380  381          ip6_t           *ip6h = (ip6_t *)iph_arg;
 381  382          ip_stack_t      *ipst = ixa->ixa_ipst;
 382  383          ill_t           *ill = ire->ire_ill;
 383  384          ip_recv_attr_t  iras;   /* NOTE: No bzero for performance */
 384  385          uint_t          pktlen = ixa->ixa_pktlen;
 385  386  
 386  387          /*
 387  388           * No fragmentation, no nce, and no application of IPsec.
 388  389           *
 389  390           *
 390  391           * Note different order between IP provider and FW_HOOKS than in
 391  392           * send_wire case.
 392  393           */
 393  394  
 394  395          /*
 395  396           * DTrace this as ip:::send.  A packet blocked by FW_HOOKS will fire the
 396  397           * send probe, but not the receive probe.
 397  398           */
 398  399          DTRACE_IP7(send, mblk_t *, mp, conn_t *, NULL, void_ip_t *,
 399  400              ip6h, __dtrace_ipsr_ill_t *, ill, ipha_t *, NULL, ip6_t *, ip6h,
 400  401              int, 1);
 401  402  
 402  403          DTRACE_PROBE4(ip6__loopback__out__start,
 403  404              ill_t *, NULL, ill_t *, ill,
 404  405              ip6_t *, ip6h, mblk_t *, mp);
 405  406  
 406  407          if (HOOKS6_INTERESTED_LOOPBACK_OUT(ipst)) {
 407  408                  int     error;
 408  409  
 409  410                  FW_HOOKS(ipst->ips_ip6_loopback_out_event,
 410  411                      ipst->ips_ipv6firewall_loopback_out,
 411  412                      NULL, ill, ip6h, mp, mp, 0, ipst, error);
 412  413  
 413  414                  DTRACE_PROBE1(ip6__loopback__out__end, mblk_t *, mp);
 414  415                  if (mp == NULL)
 415  416                          return (error);
 416  417  
 417  418                  /*
 418  419                   * Even if the destination was changed by the filter we use the
 419  420                   * forwarding decision that was made based on the address
 420  421                   * in ip_output/ip_set_destination.
 421  422                   */
 422  423                  /* Length could be different */
 423  424                  ip6h = (ip6_t *)mp->b_rptr;
 424  425                  pktlen = ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN;
 425  426          }
 426  427  
 427  428          /*
 428  429           * If a callback is enabled then we need to know the
 429  430           * source and destination zoneids for the packet. We already
 430  431           * have those handy.
 431  432           */
 432  433          if (ipst->ips_ip6_observe.he_interested) {
 433  434                  zoneid_t szone, dzone;
 434  435                  zoneid_t stackzoneid;
 435  436  
 436  437                  stackzoneid = netstackid_to_zoneid(
 437  438                      ipst->ips_netstack->netstack_stackid);
 438  439  
 439  440                  if (stackzoneid == GLOBAL_ZONEID) {
 440  441                          /* Shared-IP zone */
 441  442                          dzone = ire->ire_zoneid;
 442  443                          szone = ixa->ixa_zoneid;
 443  444                  } else {
 444  445                          szone = dzone = stackzoneid;
 445  446                  }
 446  447                  ipobs_hook(mp, IPOBS_HOOK_LOCAL, szone, dzone, ill, ipst);
 447  448          }
 448  449  
 449  450          /* Handle lo0 stats */
 450  451          ipst->ips_loopback_packets++;
 451  452  
 452  453          /*
 453  454           * Update output mib stats. Note that we can't move into the icmp
 454  455           * sender (icmp_output etc) since they don't know the ill and the
 455  456           * stats are per ill.
 456  457           */
 457  458          if (ixa->ixa_protocol == IPPROTO_ICMPV6) {
 458  459                  icmp6_t         *icmp6;
 459  460  
 460  461                  icmp6 = (icmp6_t *)((uchar_t *)ip6h + ixa->ixa_ip_hdr_length);
 461  462                  icmp_update_out_mib_v6(ill, icmp6);
 462  463          }
 463  464  
 464  465          DTRACE_PROBE4(ip6__loopback__in__start,
 465  466              ill_t *, ill, ill_t *, NULL,
 466  467              ip6_t *, ip6h, mblk_t *, mp);
 467  468  
 468  469          if (HOOKS6_INTERESTED_LOOPBACK_IN(ipst)) {
 469  470                  int     error;
 470  471  
 471  472                  FW_HOOKS(ipst->ips_ip6_loopback_in_event,
 472  473                      ipst->ips_ipv6firewall_loopback_in,
 473  474                      ill, NULL, ip6h, mp, mp, 0, ipst, error);
 474  475  
 475  476                  DTRACE_PROBE1(ip6__loopback__in__end, mblk_t *, mp);
 476  477                  if (mp == NULL)
 477  478                          return (error);
 478  479  
 479  480                  /*
 480  481                   * Even if the destination was changed by the filter we use the
 481  482                   * forwarding decision that was made based on the address
 482  483                   * in ip_output/ip_set_destination.
 483  484                   */
 484  485                  /* Length could be different */
 485  486                  ip6h = (ip6_t *)mp->b_rptr;
 486  487                  pktlen = ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN;
 487  488          }
 488  489  
 489  490          DTRACE_IP7(receive, mblk_t *, mp, conn_t *, NULL, void_ip_t *,
 490  491              ip6h, __dtrace_ipsr_ill_t *, ill, ipha_t *, NULL, ip6_t *, ip6h,
 491  492              int, 1);
 492  493  
 493  494          /* Map ixa to ira including IPsec policies */
 494  495          ipsec_out_to_in(ixa, ill, &iras);
 495  496          iras.ira_pktlen = pktlen;
 496  497  
 497  498          ire->ire_ib_pkt_count++;
 498  499          BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInReceives);
 499  500          UPDATE_MIB(ill->ill_ip_mib, ipIfStatsHCInOctets, pktlen);
 500  501  
 501  502          /* Destined to ire_zoneid - use that for fanout */
 502  503          iras.ira_zoneid = ire->ire_zoneid;
 503  504  
 504  505          if (is_system_labeled()) {
 505  506                  iras.ira_flags |= IRAF_SYSTEM_LABELED;
 506  507  
 507  508                  /*
 508  509                   * This updates ira_cred, ira_tsl and ira_free_flags based
 509  510                   * on the label. We don't expect this to ever fail for
 510  511                   * loopback packets, so we silently drop the packet should it
 511  512                   * fail.
 512  513                   */
 513  514                  if (!tsol_get_pkt_label(mp, IPV6_VERSION, &iras)) {
 514  515                          BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
 515  516                          ip_drop_input("tsol_get_pkt_label", mp, ill);
 516  517                          freemsg(mp);
 517  518                          return (0);
 518  519                  }
 519  520                  ASSERT(iras.ira_tsl != NULL);
 520  521  
 521  522                  /* tsol_get_pkt_label sometimes does pullupmsg */
 522  523                  ip6h = (ip6_t *)mp->b_rptr;
 523  524          }
 524  525  
 525  526          ip_fanout_v6(mp, ip6h, &iras);
 526  527  
 527  528          /* We moved any IPsec refs from ixa to iras */
 528  529          ira_cleanup(&iras, B_FALSE);
 529  530          return (0);
 530  531  }
 531  532  
 532  533  static void
 533  534  multirt_check_v6(ire_t *ire, ip6_t *ip6h, ip_xmit_attr_t *ixa)
 534  535  {
 535  536          ip_stack_t *ipst = ixa->ixa_ipst;
 536  537  
 537  538          /* Limit the TTL on multirt packets. Do this even if IPV6_HOPLIMIT */
 538  539          if (ire->ire_type & IRE_MULTICAST) {
 539  540                  if (ip6h->ip6_hops > 1) {
 540  541                          ip2dbg(("ire_send_multirt_v6: forcing multicast "
 541  542                              "multirt TTL to 1 (was %d)\n", ip6h->ip6_hops));
 542  543                          ip6h->ip6_hops = 1;
 543  544                  }
 544  545                  ixa->ixa_flags |= IXAF_NO_TTL_CHANGE;
 545  546          } else if ((ipst->ips_ip_multirt_ttl > 0) &&
 546  547              (ip6h->ip6_hops > ipst->ips_ip_multirt_ttl)) {
 547  548                  ip6h->ip6_hops = ipst->ips_ip_multirt_ttl;
 548  549                  /*
 549  550                   * Need to ensure we don't increase the ttl should we go through
 550  551                   * ire_send_multicast.
 551  552                   */
 552  553                  ixa->ixa_flags |= IXAF_NO_TTL_CHANGE;
 553  554          }
 554  555  
 555  556          /* For IPv6 this also needs to insert a fragment header */
 556  557          ixa->ixa_flags |= IXAF_IPV6_ADD_FRAGHDR;
 557  558  }
 558  559  
 559  560  /*
 560  561   * ire_sendfn for IRE_MULTICAST
 561  562   *
 562  563   * Note that we do path MTU discovery by default for IPv6 multicast. But
 563  564   * since unconnected UDP and RAW sockets don't set IXAF_PMTU_DISCOVERY
 564  565   * only connected sockets get this by default.
 565  566   */
 566  567  int
 567  568  ire_send_multicast_v6(ire_t *ire, mblk_t *mp, void *iph_arg,
 568  569      ip_xmit_attr_t *ixa, uint32_t *identp)
 569  570  {
 570  571          ip6_t           *ip6h = (ip6_t *)iph_arg;
 571  572          ip_stack_t      *ipst = ixa->ixa_ipst;
 572  573          ill_t           *ill = ire->ire_ill;
 573  574          iaflags_t       ixaflags = ixa->ixa_flags;
 574  575  
 575  576          /*
 576  577           * The IRE_MULTICAST is the same whether or not multirt is in use.
 577  578           * Hence we need special-case code.
 578  579           */
 579  580          if (ixaflags & IXAF_MULTIRT_MULTICAST)
 580  581                  multirt_check_v6(ire, ip6h, ixa);
 581  582  
 582  583          /*
 583  584           * Check if anything in ip_input_v6 wants a copy of the transmitted
 584  585           * packet (after IPsec and fragmentation)
 585  586           *
 586  587           * 1. Multicast routers always need a copy unless SO_DONTROUTE is set
 587  588           *    RSVP and the rsvp daemon is an example of a
 588  589           *    protocol and user level process that
 589  590           *    handles it's own routing. Hence, it uses the
 590  591           *    SO_DONTROUTE option to accomplish this.
 591  592           * 2. If the sender has set IP_MULTICAST_LOOP, then we just
 592  593           *    check whether there are any receivers for the group on the ill
 593  594           *    (ignoring the zoneid).
 594  595           * 3. If IP_MULTICAST_LOOP is not set, then we check if there are
 595  596           *    any members in other shared-IP zones.
 596  597           *    If such members exist, then we indicate that the sending zone
 597  598           *    shouldn't get a loopback copy to preserve the IP_MULTICAST_LOOP
 598  599           *    behavior.
 599  600           *
 600  601           * When we loopback we skip hardware checksum to make sure loopback
 601  602           * copy is checksumed.
 602  603           *
 603  604           * Note that ire_ill is the upper in the case of IPMP.
 604  605           */
 605  606          ixa->ixa_flags &= ~(IXAF_LOOPBACK_COPY | IXAF_NO_HW_CKSUM);
 606  607          if (ipst->ips_ip_g_mrouter && ill->ill_mrouter_cnt > 0 &&
 607  608              !(ixaflags & IXAF_DONTROUTE)) {
 608  609                  ixa->ixa_flags |= IXAF_LOOPBACK_COPY | IXAF_NO_HW_CKSUM;
 609  610          } else if (ixaflags & IXAF_MULTICAST_LOOP) {
 610  611                  /*
 611  612                   * If this zone or any other zone has members then loopback
 612  613                   * a copy.
 613  614                   */
 614  615                  if (ill_hasmembers_v6(ill, &ip6h->ip6_dst))
 615  616                          ixa->ixa_flags |= IXAF_LOOPBACK_COPY | IXAF_NO_HW_CKSUM;
 616  617          } else if (ipst->ips_netstack->netstack_numzones > 1) {
 617  618                  /*
 618  619                   * This zone should not have a copy. But there are some other
 619  620                   * zones which might have members.
 620  621                   */
 621  622                  if (ill_hasmembers_otherzones_v6(ill, &ip6h->ip6_dst,
 622  623                      ixa->ixa_zoneid)) {
 623  624                          ixa->ixa_flags |= IXAF_NO_LOOP_ZONEID_SET;
 624  625                          ixa->ixa_no_loop_zoneid = ixa->ixa_zoneid;
 625  626                          ixa->ixa_flags |= IXAF_LOOPBACK_COPY | IXAF_NO_HW_CKSUM;
 626  627                  }
 627  628          }
 628  629  
 629  630          /*
 630  631           * Unless IPV6_HOPLIMIT or ire_send_multirt_v6 already set a ttl,
 631  632           * force the ttl to the IP_MULTICAST_TTL value
 632  633           */
 633  634          if (!(ixaflags & IXAF_NO_TTL_CHANGE)) {
 634  635                  ip6h->ip6_hops = ixa->ixa_multicast_ttl;
 635  636          }
 636  637  
 637  638          return (ire_send_wire_v6(ire, mp, ip6h, ixa, identp));
 638  639  }
 639  640  
 640  641  /*
 641  642   * ire_sendfn for IREs with RTF_MULTIRT
 642  643   */
 643  644  int
 644  645  ire_send_multirt_v6(ire_t *ire, mblk_t *mp, void *iph_arg,
 645  646      ip_xmit_attr_t *ixa, uint32_t *identp)
 646  647  {
 647  648          ip6_t           *ip6h = (ip6_t *)iph_arg;
 648  649  
 649  650          multirt_check_v6(ire, ip6h, ixa);
 650  651  
 651  652          if (ire->ire_type & IRE_MULTICAST)
 652  653                  return (ire_send_multicast_v6(ire, mp, ip6h, ixa, identp));
 653  654          else
 654  655                  return (ire_send_wire_v6(ire, mp, ip6h, ixa, identp));
 655  656  }
 656  657  
 657  658  /*
 658  659   * ire_sendfn for IREs with RTF_REJECT/RTF_BLACKHOLE, including IRE_NOROUTE
 659  660   */
 660  661  /* ARGSUSED4 */
 661  662  int
 662  663  ire_send_noroute_v6(ire_t *ire, mblk_t *mp, void *iph_arg,
 663  664      ip_xmit_attr_t *ixa, uint32_t *identp)
 664  665  {
 665  666          ip6_t           *ip6h = (ip6_t *)iph_arg;
 666  667          ip_stack_t      *ipst = ixa->ixa_ipst;
 667  668          ill_t           *ill;
 668  669          ip_recv_attr_t  iras;
 669  670          boolean_t       dummy;
 670  671  
 671  672          BUMP_MIB(&ipst->ips_ip_mib, ipIfStatsOutNoRoutes);
 672  673  
 673  674          if (ire->ire_type & IRE_NOROUTE) {
 674  675                  /* A lack of a route as opposed to RTF_REJECT|BLACKHOLE */
 675  676                  ip_rts_change_v6(RTM_MISS, &ip6h->ip6_dst, 0, 0, 0, 0, 0, 0,
 676  677                      RTA_DST, ipst);
 677  678          }
 678  679  
 679  680          if (ire->ire_flags & RTF_BLACKHOLE) {
 680  681                  ip_drop_output("ipIfStatsOutNoRoutes RTF_BLACKHOLE", mp, NULL);
 681  682                  freemsg(mp);
 682  683                  /* No error even for local senders - silent blackhole */
 683  684                  return (0);
 684  685          }
 685  686          ip_drop_output("ipIfStatsOutNoRoutes RTF_REJECT", mp, NULL);
 686  687  
 687  688          /*
 688  689           * We need an ill_t for the ip_recv_attr_t even though this packet
 689  690           * was never received and icmp_unreachable doesn't currently use
 690  691           * ira_ill.
 691  692           */
 692  693          ill = ill_lookup_on_name("lo0", B_FALSE,
 693  694              !(ixa->ixa_flags & IRAF_IS_IPV4), &dummy, ipst);
 694  695          if (ill == NULL) {
 695  696                  freemsg(mp);
 696  697                  return (EHOSTUNREACH);
 697  698          }
 698  699  
 699  700          bzero(&iras, sizeof (iras));
 700  701          /* Map ixa to ira including IPsec policies */
 701  702          ipsec_out_to_in(ixa, ill, &iras);
 702  703  
 703  704          icmp_unreachable_v6(mp, ICMP6_DST_UNREACH_NOROUTE, B_FALSE, &iras);
 704  705          /* We moved any IPsec refs from ixa to iras */
 705  706          ira_cleanup(&iras, B_FALSE);
 706  707  
 707  708          ill_refrele(ill);
 708  709          return (EHOSTUNREACH);
 709  710  }
 710  711  
 711  712  /*
 712  713   * Calculate a checksum ignoring any hardware capabilities
 713  714   *
 714  715   * Returns B_FALSE if the packet was too short for the checksum. Caller
 715  716   * should free and do stats.
 716  717   */
 717  718  static boolean_t
 718  719  ip_output_sw_cksum_v6(mblk_t *mp, ip6_t *ip6h, ip_xmit_attr_t *ixa)
 719  720  {
 720  721          ip_stack_t      *ipst = ixa->ixa_ipst;
 721  722          uint_t          pktlen = ixa->ixa_pktlen;
 722  723          uint16_t        *cksump;
 723  724          uint32_t        cksum;
 724  725          uint8_t         protocol = ixa->ixa_protocol;
 725  726          uint16_t        ip_hdr_length = ixa->ixa_ip_hdr_length;
 726  727  
 727  728  #define iphs    ((uint16_t *)ip6h)
 728  729  
 729  730          /* Just in case it contained garbage */
 730  731          DB_CKSUMFLAGS(mp) &= ~HCK_FLAGS;
 731  732  
 732  733          /*
 733  734           * Calculate ULP checksum
 734  735           */
 735  736          if (protocol == IPPROTO_TCP) {
 736  737                  cksump = IPH_TCPH_CHECKSUMP(ip6h, ip_hdr_length);
 737  738                  cksum = IP_TCP_CSUM_COMP;
 738  739          } else if (protocol == IPPROTO_UDP) {
 739  740                  cksump = IPH_UDPH_CHECKSUMP(ip6h, ip_hdr_length);
 740  741                  cksum = IP_UDP_CSUM_COMP;
 741  742          } else if (protocol == IPPROTO_SCTP) {
 742  743                  sctp_hdr_t      *sctph;
 743  744  
 744  745                  ASSERT(MBLKL(mp) >= (ip_hdr_length + sizeof (*sctph)));
 745  746                  sctph = (sctp_hdr_t *)(mp->b_rptr + ip_hdr_length);
 746  747                  /*
 747  748                   * Zero out the checksum field to ensure proper
 748  749                   * checksum calculation.
 749  750                   */
 750  751                  sctph->sh_chksum = 0;
 751  752  #ifdef  DEBUG
 752  753                  if (!skip_sctp_cksum)
 753  754  #endif
 754  755                          sctph->sh_chksum = sctp_cksum(mp, ip_hdr_length);
 755  756                  return (B_TRUE);
 756  757          } else if (ixa->ixa_flags & IXAF_SET_RAW_CKSUM) {
 757  758                  /*
 758  759                   * icmp has placed length and routing
 759  760                   * header adjustment in the checksum field.
 760  761                   */
 761  762                  cksump = (uint16_t *)(((uint8_t *)ip6h) + ip_hdr_length +
 762  763                      ixa->ixa_raw_cksum_offset);
 763  764                  cksum = htons(protocol);
 764  765          } else if (protocol == IPPROTO_ICMPV6) {
 765  766                  cksump = IPH_ICMPV6_CHECKSUMP(ip6h, ip_hdr_length);
 766  767                  cksum = IP_ICMPV6_CSUM_COMP;    /* Pseudo-header cksum */
 767  768          } else {
 768  769                  return (B_TRUE);
 769  770          }
 770  771  
 771  772          /* ULP puts the checksum field is in the first mblk */
 772  773          ASSERT(((uchar_t *)cksump) + sizeof (uint16_t) <= mp->b_wptr);
 773  774  
 774  775          /*
 775  776           * We accumulate the pseudo header checksum in cksum.
 776  777           * This is pretty hairy code, so watch close.  One
 777  778           * thing to keep in mind is that UDP and TCP have
 778  779           * stored their respective datagram lengths in their
 779  780           * checksum fields.  This lines things up real nice.
 780  781           */
 781  782          cksum += iphs[4] + iphs[5] + iphs[6] + iphs[7] +
 782  783              iphs[8] + iphs[9] + iphs[10] + iphs[11] +
 783  784              iphs[12] + iphs[13] + iphs[14] + iphs[15] +
 784  785              iphs[16] + iphs[17] + iphs[18] + iphs[19];
 785  786          cksum = IP_CSUM(mp, ip_hdr_length, cksum);
 786  787  
 787  788          /*
 788  789           * For UDP/IPv6 a zero UDP checksum is not allowed.
 789  790           * Change to 0xffff
 790  791           */
 791  792          if (protocol == IPPROTO_UDP && cksum == 0)
 792  793                  *cksump = ~cksum;
 793  794          else
 794  795                  *cksump = cksum;
 795  796  
 796  797          IP6_STAT(ipst, ip6_out_sw_cksum);
 797  798          IP6_STAT_UPDATE(ipst, ip6_out_sw_cksum_bytes, pktlen);
 798  799  
 799  800          /* No IP header checksum for IPv6 */
 800  801  
 801  802          return (B_TRUE);
 802  803  #undef  iphs
 803  804  }
 804  805  
 805  806  /* There are drivers that can't do partial checksum for ICMPv6 */
 806  807  int nxge_cksum_workaround = 1;
 807  808  
 808  809  /*
 809  810   * Calculate the ULP checksum - try to use hardware.
 810  811   * In the case of MULTIRT or multicast the
 811  812   * IXAF_NO_HW_CKSUM is set in which case we use software.
 812  813   *
 813  814   * Returns B_FALSE if the packet was too short for the checksum. Caller
 814  815   * should free and do stats.
 815  816   */
 816  817  static boolean_t
 817  818  ip_output_cksum_v6(iaflags_t ixaflags, mblk_t *mp, ip6_t *ip6h,
 818  819      ip_xmit_attr_t *ixa, ill_t *ill)
 819  820  {
 820  821          uint_t          pktlen = ixa->ixa_pktlen;
 821  822          uint16_t        *cksump;
 822  823          uint16_t        hck_flags;
 823  824          uint32_t        cksum;
 824  825          uint8_t         protocol = ixa->ixa_protocol;
 825  826          uint16_t        ip_hdr_length = ixa->ixa_ip_hdr_length;
 826  827  
 827  828  #define iphs    ((uint16_t *)ip6h)
 828  829  
 829  830          if ((ixaflags & IXAF_NO_HW_CKSUM) || !ILL_HCKSUM_CAPABLE(ill) ||
 830  831              !dohwcksum) {
 831  832                  return (ip_output_sw_cksum_v6(mp, ip6h, ixa));
 832  833          }
 833  834  
 834  835          /*
 835  836           * Calculate ULP checksum. Note that we don't use cksump and cksum
 836  837           * if the ill has FULL support.
 837  838           */
 838  839          if (protocol == IPPROTO_TCP) {
 839  840                  cksump = IPH_TCPH_CHECKSUMP(ip6h, ip_hdr_length);
 840  841                  cksum = IP_TCP_CSUM_COMP;       /* Pseudo-header cksum */
 841  842          } else if (protocol == IPPROTO_UDP) {
 842  843                  cksump = IPH_UDPH_CHECKSUMP(ip6h, ip_hdr_length);
 843  844                  cksum = IP_UDP_CSUM_COMP;       /* Pseudo-header cksum */
 844  845          } else if (protocol == IPPROTO_SCTP) {
 845  846                  sctp_hdr_t      *sctph;
 846  847  
 847  848                  ASSERT(MBLKL(mp) >= (ip_hdr_length + sizeof (*sctph)));
 848  849                  sctph = (sctp_hdr_t *)(mp->b_rptr + ip_hdr_length);
 849  850                  /*
 850  851                   * Zero out the checksum field to ensure proper
 851  852                   * checksum calculation.
 852  853                   */
 853  854                  sctph->sh_chksum = 0;
 854  855  #ifdef  DEBUG
 855  856                  if (!skip_sctp_cksum)
 856  857  #endif
 857  858                          sctph->sh_chksum = sctp_cksum(mp, ip_hdr_length);
 858  859                  goto ip_hdr_cksum;
 859  860          } else if (ixa->ixa_flags & IXAF_SET_RAW_CKSUM) {
 860  861                  /*
 861  862                   * icmp has placed length and routing
 862  863                   * header adjustment in the checksum field.
 863  864                   */
 864  865                  cksump = (uint16_t *)(((uint8_t *)ip6h) + ip_hdr_length +
 865  866                      ixa->ixa_raw_cksum_offset);
 866  867                  cksum = htons(protocol);
 867  868          } else if (protocol == IPPROTO_ICMPV6) {
 868  869                  cksump = IPH_ICMPV6_CHECKSUMP(ip6h, ip_hdr_length);
 869  870                  cksum = IP_ICMPV6_CSUM_COMP;    /* Pseudo-header cksum */
 870  871          } else {
 871  872          ip_hdr_cksum:
 872  873                  /* No IP header checksum for IPv6 */
 873  874                  return (B_TRUE);
 874  875          }
 875  876  
 876  877          /* ULP puts the checksum field is in the first mblk */
 877  878          ASSERT(((uchar_t *)cksump) + sizeof (uint16_t) <= mp->b_wptr);
 878  879  
 879  880          /*
 880  881           * Underlying interface supports hardware checksum offload for
 881  882           * the payload; leave the payload checksum for the hardware to
 882  883           * calculate.  N.B: We only need to set up checksum info on the
 883  884           * first mblk.
 884  885           */
 885  886          hck_flags = ill->ill_hcksum_capab->ill_hcksum_txflags;
 886  887  
 887  888          DB_CKSUMFLAGS(mp) &= ~HCK_FLAGS;
 888  889          if (hck_flags & HCKSUM_INET_FULL_V6) {
 889  890                  /*
 890  891                   * Hardware calculates pseudo-header, header and the
 891  892                   * payload checksums, so clear the checksum field in
 892  893                   * the protocol header.
 893  894                   */
 894  895                  *cksump = 0;
 895  896                  DB_CKSUMFLAGS(mp) |= HCK_FULLCKSUM;
 896  897                  return (B_TRUE);
 897  898          }
 898  899          if (((hck_flags) & HCKSUM_INET_PARTIAL) &&
 899  900              (protocol != IPPROTO_ICMPV6 || !nxge_cksum_workaround)) {
 900  901                  /*
 901  902                   * Partial checksum offload has been enabled.  Fill
 902  903                   * the checksum field in the protocol header with the
 903  904                   * pseudo-header checksum value.
 904  905                   *
 905  906                   * We accumulate the pseudo header checksum in cksum.
 906  907                   * This is pretty hairy code, so watch close.  One
 907  908                   * thing to keep in mind is that UDP and TCP have
 908  909                   * stored their respective datagram lengths in their
 909  910                   * checksum fields.  This lines things up real nice.
 910  911                   */
 911  912                  cksum += iphs[4] + iphs[5] + iphs[6] + iphs[7] +
 912  913                      iphs[8] + iphs[9] + iphs[10] + iphs[11] +
 913  914                      iphs[12] + iphs[13] + iphs[14] + iphs[15] +
 914  915                      iphs[16] + iphs[17] + iphs[18] + iphs[19];
 915  916                  cksum += *(cksump);
 916  917                  cksum = (cksum & 0xFFFF) + (cksum >> 16);
 917  918                  *(cksump) = (cksum & 0xFFFF) + (cksum >> 16);
 918  919  
 919  920                  /*
 920  921                   * Offsets are relative to beginning of IP header.
 921  922                   */
 922  923                  DB_CKSUMSTART(mp) = ip_hdr_length;
 923  924                  DB_CKSUMSTUFF(mp) = (uint8_t *)cksump - (uint8_t *)ip6h;
 924  925                  DB_CKSUMEND(mp) = pktlen;
 925  926                  DB_CKSUMFLAGS(mp) |= HCK_PARTIALCKSUM;
 926  927                  return (B_TRUE);
 927  928          }
 928  929          /* Hardware capabilities include neither full nor partial IPv6 */
 929  930          return (ip_output_sw_cksum_v6(mp, ip6h, ixa));
 930  931  #undef  iphs
 931  932  }
 932  933  
 933  934  /*
 934  935   * ire_sendfn for offlink and onlink destinations.
 935  936   * Also called from the multicast, and multirt send functions.
 936  937   *
 937  938   * Assumes that the caller has a hold on the ire.
 938  939   *
 939  940   * This function doesn't care if the IRE just became condemned since that
 940  941   * can happen at any time.
 941  942   */
 942  943  /* ARGSUSED */
 943  944  int
 944  945  ire_send_wire_v6(ire_t *ire, mblk_t *mp, void *iph_arg,
 945  946      ip_xmit_attr_t *ixa, uint32_t *identp)
 946  947  {
 947  948          ip_stack_t      *ipst = ixa->ixa_ipst;
 948  949          ip6_t           *ip6h = (ip6_t *)iph_arg;
 949  950          iaflags_t       ixaflags = ixa->ixa_flags;
 950  951          ill_t           *ill;
 951  952          uint32_t        pktlen = ixa->ixa_pktlen;
 952  953  
 953  954          ASSERT(ixa->ixa_nce != NULL);
 954  955          ill = ixa->ixa_nce->nce_ill;
 955  956  
 956  957          /*
 957  958           * Update output mib stats. Note that we can't move into the icmp
 958  959           * sender (icmp_output etc) since they don't know the ill and the
 959  960           * stats are per ill.
 960  961           *
 961  962           * With IPMP we record the stats on the upper ill.
 962  963           */
 963  964          if (ixa->ixa_protocol == IPPROTO_ICMPV6) {
 964  965                  icmp6_t         *icmp6;
 965  966  
 966  967                  icmp6 = (icmp6_t *)((uchar_t *)ip6h + ixa->ixa_ip_hdr_length);
 967  968                  icmp_update_out_mib_v6(ixa->ixa_nce->nce_common->ncec_ill,
 968  969                      icmp6);
 969  970          }
 970  971  
 971  972          if (ixaflags & IXAF_DONTROUTE)
 972  973                  ip6h->ip6_hops = 1;
 973  974  
 974  975          /*
 975  976           * This might set b_band, thus the IPsec and fragmentation
 976  977           * code in IP ensures that b_band is updated in the first mblk.
 977  978           */
 978  979          if (IPP_ENABLED(IPP_LOCAL_OUT, ipst)) {
 979  980                  /* ip_process translates an IS_UNDER_IPMP */
 980  981                  mp = ip_process(IPP_LOCAL_OUT, mp, ill, ill);
 981  982                  if (mp == NULL) {
 982  983                          /* ip_drop_packet and MIB done */
 983  984                          return (0);     /* Might just be delayed */
 984  985                  }
 985  986          }
 986  987  
 987  988          /*
 988  989           * To handle IPsec/iptun's labeling needs we need to tag packets
 989  990           * while we still have ixa_tsl
 990  991           */
 991  992          if (is_system_labeled() && ixa->ixa_tsl != NULL &&
 992  993              (ill->ill_mactype == DL_6TO4 || ill->ill_mactype == DL_IPV4 ||
 993  994              ill->ill_mactype == DL_IPV6)) {
 994  995                  cred_t *newcr;
 995  996  
 996  997                  newcr = copycred_from_tslabel(ixa->ixa_cred, ixa->ixa_tsl,
 997  998                      KM_NOSLEEP);
 998  999                  if (newcr == NULL) {
 999 1000                          BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards);
1000 1001                          ip_drop_output("ipIfStatsOutDiscards - newcr",
1001 1002                              mp, ill);
1002 1003                          freemsg(mp);
1003 1004                          return (ENOBUFS);
1004 1005                  }
1005 1006                  mblk_setcred(mp, newcr, NOPID);
1006 1007                  crfree(newcr);  /* mblk_setcred did its own crhold */
1007 1008          }
1008 1009  
1009 1010          /*
1010 1011           * IXAF_IPV6_ADD_FRAGHDR is set for CGTP so that we will add a
1011 1012           * fragment header without fragmenting. CGTP on the receiver will
1012 1013           * filter duplicates on the ident field.
1013 1014           */
1014 1015          if (pktlen > ixa->ixa_fragsize ||
1015 1016              (ixaflags & (IXAF_IPSEC_SECURE|IXAF_IPV6_ADD_FRAGHDR))) {
1016 1017                  uint32_t ident;
1017 1018  
1018 1019                  if (ixaflags & IXAF_IPSEC_SECURE)
1019 1020                          pktlen += ipsec_out_extra_length(ixa);
1020 1021  
1021 1022                  if (pktlen > IP_MAXPACKET)
1022 1023                          return (EMSGSIZE);
1023 1024  
1024 1025                  if (ixaflags & IXAF_SET_ULP_CKSUM) {
1025 1026                          /*
1026 1027                           * Compute ULP checksum using software
1027 1028                           */
1028 1029                          if (!ip_output_sw_cksum_v6(mp, ip6h, ixa)) {
1029 1030                                  BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards);
1030 1031                                  ip_drop_output("ipIfStatsOutDiscards", mp, ill);
1031 1032                                  freemsg(mp);
1032 1033                                  return (EINVAL);
1033 1034                          }
1034 1035                          /* Avoid checksum again below if we only add fraghdr */
1035 1036                          ixaflags &= ~IXAF_SET_ULP_CKSUM;
1036 1037                  }
1037 1038  
1038 1039                  /*
1039 1040                   * If we need a fragment header, pick the ident and insert
1040 1041                   * the header before IPsec to we have a place to store
1041 1042                   * the ident value.
1042 1043                   */
1043 1044                  if ((ixaflags & IXAF_IPV6_ADD_FRAGHDR) ||
1044 1045                      pktlen > ixa->ixa_fragsize) {
1045 1046                          /*
1046 1047                           * If this packet would generate a icmp_frag_needed
1047 1048                           * message, we need to handle it before we do the IPsec
1048 1049                           * processing. Otherwise, we need to strip the IPsec
1049 1050                           * headers before we send up the message to the ULPs
1050 1051                           * which becomes messy and difficult.
1051 1052                           */
1052 1053                          if ((pktlen > ixa->ixa_fragsize) &&
1053 1054                              (ixaflags & IXAF_DONTFRAG)) {
1054 1055                                  /* Generate ICMP and return error */
1055 1056                                  ip_recv_attr_t  iras;
1056 1057  
1057 1058                                  DTRACE_PROBE4(ip6__fragsize__fail,
1058 1059                                      uint_t, pktlen, uint_t, ixa->ixa_fragsize,
1059 1060                                      uint_t, ixa->ixa_pktlen,
1060 1061                                      uint_t, ixa->ixa_pmtu);
1061 1062  
1062 1063                                  bzero(&iras, sizeof (iras));
1063 1064                                  /* Map ixa to ira including IPsec policies */
1064 1065                                  ipsec_out_to_in(ixa, ill, &iras);
1065 1066  
1066 1067                                  ip_drop_output("ICMP6_PKT_TOO_BIG", mp, ill);
1067 1068                                  icmp_pkt2big_v6(mp, ixa->ixa_fragsize, B_TRUE,
1068 1069                                      &iras);
1069 1070                                  /* We moved any IPsec refs from ixa to iras */
1070 1071                                  ira_cleanup(&iras, B_FALSE);
1071 1072                                  return (EMSGSIZE);
1072 1073                          }
1073 1074                          DTRACE_PROBE4(ip6__fragsize__ok, uint_t, pktlen,
1074 1075                              uint_t, ixa->ixa_fragsize, uint_t, ixa->ixa_pktlen,
1075 1076                              uint_t, ixa->ixa_pmtu);
1076 1077                          /*
1077 1078                           * Assign an ident value for this packet. There could
1078 1079                           * be other threads targeting the same destination, so
1079 1080                           * we have to arrange for a atomic increment.
1080 1081                           * Normally ixa_extra_ident is 0, but in the case of
1081 1082                           * LSO it will be the number of TCP segments  that the
1082 1083                           * driver/hardware will extraly construct.
1083 1084                           *
1084 1085                           * Note that cl_inet_ipident has only been used for
1085 1086                           * IPv4. We don't use it here.
1086 1087                           */
1087 1088                          ident = atomic_add_32_nv(identp, ixa->ixa_extra_ident +
1088 1089                              1);
1089 1090                          ixa->ixa_ident = ident; /* In case we do IPsec */
1090 1091                  }
1091 1092                  if (ixaflags & IXAF_IPSEC_SECURE) {
1092 1093                          /*
1093 1094                           * Pass in sufficient information so that
1094 1095                           * IPsec can determine whether to fragment, and
1095 1096                           * which function to call after fragmentation.
1096 1097                           */
1097 1098                          return (ipsec_out_process(mp, ixa));
1098 1099                  }
1099 1100  
1100 1101                  mp = ip_fraghdr_add_v6(mp, ident, ixa);
1101 1102                  if (mp == NULL) {
1102 1103                          /* MIB and ip_drop_output already done */
1103 1104                          return (ENOMEM);
1104 1105                  }
1105 1106                  ASSERT(pktlen == ixa->ixa_pktlen);
1106 1107                  pktlen += sizeof (ip6_frag_t);
1107 1108  
1108 1109                  if (pktlen > ixa->ixa_fragsize) {
1109 1110                          return (ip_fragment_v6(mp, ixa->ixa_nce, ixaflags,
1110 1111                              pktlen, ixa->ixa_fragsize,
1111 1112                              ixa->ixa_xmit_hint, ixa->ixa_zoneid,
1112 1113                              ixa->ixa_no_loop_zoneid, ixa->ixa_postfragfn,
1113 1114                              &ixa->ixa_cookie));
1114 1115                  }
1115 1116          }
1116 1117          if (ixaflags & IXAF_SET_ULP_CKSUM) {
1117 1118                  /* Compute ULP checksum and IP header checksum */
1118 1119                  /* An IS_UNDER_IPMP ill is ok here */
1119 1120                  if (!ip_output_cksum_v6(ixaflags, mp, ip6h, ixa, ill)) {
1120 1121                          BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards);
1121 1122                          ip_drop_output("ipIfStatsOutDiscards", mp, ill);
1122 1123                          freemsg(mp);
1123 1124                          return (EINVAL);
1124 1125                  }
1125 1126          }
1126 1127          return ((ixa->ixa_postfragfn)(mp, ixa->ixa_nce, ixaflags,
1127 1128              pktlen, ixa->ixa_xmit_hint, ixa->ixa_zoneid,
1128 1129              ixa->ixa_no_loop_zoneid, &ixa->ixa_cookie));
1129 1130  }
1130 1131  
1131 1132  /*
1132 1133   * Post fragmentation function for RTF_MULTIRT routes.
1133 1134   * Since IRE_MULTICASTs might have RTF_MULTIRT, this function
1134 1135   * checks IXAF_LOOPBACK_COPY.
1135 1136   *
1136 1137   * If no packet is sent due to failures then we return an errno, but if at
1137 1138   * least one succeeded we return zero.
1138 1139   */
1139 1140  int
1140 1141  ip_postfrag_multirt_v6(mblk_t *mp, nce_t *nce, iaflags_t ixaflags,
1141 1142      uint_t pkt_len, uint32_t xmit_hint, zoneid_t szone, zoneid_t nolzid,
1142 1143      uintptr_t *ixacookie)
1143 1144  {
1144 1145          irb_t           *irb;
1145 1146          ip6_t           *ip6h = (ip6_t *)mp->b_rptr;
1146 1147          ire_t           *ire;
1147 1148          ire_t           *ire1;
1148 1149          mblk_t          *mp1;
1149 1150          nce_t           *nce1;
1150 1151          ill_t           *ill = nce->nce_ill;
1151 1152          ill_t           *ill1;
1152 1153          ip_stack_t      *ipst = ill->ill_ipst;
1153 1154          int             error = 0;
1154 1155          int             num_sent = 0;
1155 1156          int             err;
1156 1157          uint_t          ire_type;
1157 1158          in6_addr_t      nexthop;
1158 1159  
1159 1160          ASSERT(!(ixaflags & IXAF_IS_IPV4));
1160 1161  
1161 1162          /* Check for IXAF_LOOPBACK_COPY */
1162 1163          if (ixaflags & IXAF_LOOPBACK_COPY) {
1163 1164                  mblk_t *mp1;
1164 1165  
1165 1166                  mp1 = copymsg(mp);
1166 1167                  if (mp1 == NULL) {
1167 1168                          /* Failed to deliver the loopback copy. */
1168 1169                          BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards);
1169 1170                          ip_drop_output("ipIfStatsOutDiscards", mp, ill);
1170 1171                          error = ENOBUFS;
1171 1172                  } else {
1172 1173                          ip_postfrag_loopback(mp1, nce, ixaflags, pkt_len,
1173 1174                              nolzid);
1174 1175                  }
1175 1176          }
1176 1177  
1177 1178          /*
1178 1179           * Loop over RTF_MULTIRT for ip6_dst in the same bucket. Send
1179 1180           * a copy to each one.
1180 1181           * Use the nce (nexthop) and ip6_dst to find the ire.
1181 1182           *
1182 1183           * MULTIRT is not designed to work with shared-IP zones thus we don't
1183 1184           * need to pass a zoneid or a label to the IRE lookup.
1184 1185           */
1185 1186          if (IN6_ARE_ADDR_EQUAL(&nce->nce_addr, &ip6h->ip6_dst)) {
1186 1187                  /* Broadcast and multicast case */
1187 1188                  ire = ire_ftable_lookup_v6(&ip6h->ip6_dst, 0, 0, 0, NULL,
1188 1189                      ALL_ZONES, NULL, MATCH_IRE_DSTONLY, 0, ipst, NULL);
1189 1190          } else {
1190 1191                  /* Unicast case */
1191 1192                  ire = ire_ftable_lookup_v6(&ip6h->ip6_dst, 0, &nce->nce_addr,
1192 1193                      0, NULL, ALL_ZONES, NULL, MATCH_IRE_GW, 0, ipst, NULL);
1193 1194          }
1194 1195  
1195 1196          if (ire == NULL ||
1196 1197              (ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE)) ||
1197 1198              !(ire->ire_flags & RTF_MULTIRT)) {
1198 1199                  /* Drop */
1199 1200                  ip_drop_output("ip_postfrag_multirt didn't find route",
1200 1201                      mp, nce->nce_ill);
1201 1202                  if (ire != NULL)
1202 1203                          ire_refrele(ire);
1203 1204                  return (ENETUNREACH);
1204 1205          }
1205 1206  
1206 1207          irb = ire->ire_bucket;
1207 1208          irb_refhold(irb);
1208 1209          for (ire1 = irb->irb_ire; ire1 != NULL; ire1 = ire1->ire_next) {
1209 1210                  if (IRE_IS_CONDEMNED(ire1) ||
1210 1211                      !(ire1->ire_flags & RTF_MULTIRT))
1211 1212                          continue;
1212 1213  
1213 1214                  /* Note: When IPv6 uses radix tree we don't need this check */
1214 1215                  if (!IN6_ARE_ADDR_EQUAL(&ire->ire_addr_v6, &ire1->ire_addr_v6))
1215 1216                          continue;
1216 1217  
1217 1218                  /* Do the ire argument one after the loop */
1218 1219                  if (ire1 == ire)
1219 1220                          continue;
1220 1221  
1221 1222                  ill1 = ire_nexthop_ill(ire1);
1222 1223                  if (ill1 == NULL) {
1223 1224                          /*
1224 1225                           * This ire might not have been picked by
1225 1226                           * ire_route_recursive, in which case ire_dep might
1226 1227                           * not have been setup yet.
1227 1228                           * We kick ire_route_recursive to try to resolve
1228 1229                           * starting at ire1.
1229 1230                           */
1230 1231                          ire_t *ire2;
1231 1232                          uint_t match_flags = MATCH_IRE_DSTONLY;
1232 1233  
1233 1234                          if (ire1->ire_ill != NULL)
1234 1235                                  match_flags |= MATCH_IRE_ILL;
1235 1236                          ire2 = ire_route_recursive_impl_v6(ire1,
1236 1237                              &ire1->ire_addr_v6, ire1->ire_type, ire1->ire_ill,
1237 1238                              ire1->ire_zoneid, NULL, match_flags,
1238 1239                              IRR_ALLOCATE, 0, ipst, NULL, NULL, NULL);
1239 1240                          if (ire2 != NULL)
1240 1241                                  ire_refrele(ire2);
1241 1242                          ill1 = ire_nexthop_ill(ire1);
1242 1243                  }
1243 1244                  if (ill1 == NULL) {
1244 1245                          BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards);
1245 1246                          ip_drop_output("ipIfStatsOutDiscards - no ill",
1246 1247                              mp, ill);
1247 1248                          error = ENETUNREACH;
1248 1249                          continue;
1249 1250                  }
1250 1251                  /* Pick the addr and type to use for ndp_nce_init */
1251 1252                  if (nce->nce_common->ncec_flags & NCE_F_MCAST) {
1252 1253                          ire_type = IRE_MULTICAST;
1253 1254                          nexthop = ip6h->ip6_dst;
1254 1255                  } else {
1255 1256                          ire_type = ire1->ire_type;      /* Doesn't matter */
1256 1257                          nexthop = ire1->ire_gateway_addr_v6;
1257 1258                  }
1258 1259  
1259 1260                  /* If IPMP meta or under, then we just drop */
1260 1261                  if (ill1->ill_grp != NULL) {
1261 1262                          BUMP_MIB(ill1->ill_ip_mib, ipIfStatsOutDiscards);
1262 1263                          ip_drop_output("ipIfStatsOutDiscards - IPMP",
1263 1264                              mp, ill1);
1264 1265                          ill_refrele(ill1);
1265 1266                          error = ENETUNREACH;
1266 1267                          continue;
1267 1268                  }
1268 1269  
1269 1270                  nce1 = ndp_nce_init(ill1, &nexthop, ire_type);
1270 1271                  if (nce1 == NULL) {
1271 1272                          BUMP_MIB(ill1->ill_ip_mib, ipIfStatsOutDiscards);
1272 1273                          ip_drop_output("ipIfStatsOutDiscards - no nce",
1273 1274                              mp, ill1);
1274 1275                          ill_refrele(ill1);
1275 1276                          error = ENOBUFS;
1276 1277                          continue;
1277 1278                  }
1278 1279                  mp1 = copymsg(mp);
1279 1280                  if (mp1 == NULL) {
1280 1281                          BUMP_MIB(ill1->ill_ip_mib, ipIfStatsOutDiscards);
1281 1282                          ip_drop_output("ipIfStatsOutDiscards", mp, ill1);
1282 1283                          nce_refrele(nce1);
1283 1284                          ill_refrele(ill1);
1284 1285                          error = ENOBUFS;
1285 1286                          continue;
1286 1287                  }
1287 1288                  /* Preserve HW checksum for this copy */
1288 1289                  DB_CKSUMSTART(mp1) = DB_CKSUMSTART(mp);
1289 1290                  DB_CKSUMSTUFF(mp1) = DB_CKSUMSTUFF(mp);
1290 1291                  DB_CKSUMEND(mp1) = DB_CKSUMEND(mp);
1291 1292                  DB_CKSUMFLAGS(mp1) = DB_CKSUMFLAGS(mp);
1292 1293                  DB_LSOMSS(mp1) = DB_LSOMSS(mp);
1293 1294  
1294 1295                  ire1->ire_ob_pkt_count++;
1295 1296                  err = ip_xmit(mp1, nce1, ixaflags, pkt_len, xmit_hint, szone,
1296 1297                      0, ixacookie);
1297 1298                  if (err == 0)
1298 1299                          num_sent++;
1299 1300                  else
1300 1301                          error = err;
1301 1302                  nce_refrele(nce1);
1302 1303                  ill_refrele(ill1);
1303 1304          }
1304 1305          irb_refrele(irb);
1305 1306          ire_refrele(ire);
1306 1307          /* Finally, the main one */
1307 1308          err = ip_xmit(mp, nce, ixaflags, pkt_len, xmit_hint, szone, 0,
1308 1309              ixacookie);
1309 1310          if (err == 0)
1310 1311                  num_sent++;
1311 1312          else
1312 1313                  error = err;
1313 1314          if (num_sent > 0)
1314 1315                  return (0);
1315 1316          else
1316 1317                  return (error);
1317 1318  }
  
    | 
      ↓ open down ↓ | 
    1059 lines elided | 
    
      ↑ open up ↑ | 
  
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX