Print this page
    
OS-4699 lxbrand netty complains about SO_LINGER (really IP_TOS)
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
OS-4018 lxbrand support TCP SO_REUSEPORT
Reviewed by: Robert Mustacchi <rm@joyent.com>
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
Reviewed by: Cody Mello <cody.mello@joyent.com>
    
      
        | Split | 
	Close | 
      
      | Expand all | 
      | Collapse all | 
    
    
          --- old/usr/src/uts/common/inet/ip/conn_opt.c
          +++ new/usr/src/uts/common/inet/ip/conn_opt.c
   1    1  /*
   2    2   * CDDL HEADER START
   3    3   *
   4    4   * The contents of this file are subject to the terms of the
   5    5   * Common Development and Distribution License (the "License").
   6    6   * You may not use this file except in compliance with the License.
   7    7   *
   8    8   * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9    9   * or http://www.opensolaris.org/os/licensing.
  10   10   * See the License for the specific language governing permissions
  11   11   * and limitations under the License.
  12   12   *
  13   13   * When distributing Covered Code, include this CDDL HEADER in each
  
    | 
      ↓ open down ↓ | 
    13 lines elided | 
    
      ↑ open up ↑ | 
  
  14   14   * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15   15   * If applicable, add the following below this CDDL HEADER, with the
  16   16   * fields enclosed by brackets "[]" replaced with your own identifying
  17   17   * information: Portions Copyright [yyyy] [name of copyright owner]
  18   18   *
  19   19   * CDDL HEADER END
  20   20   */
  21   21  
  22   22  /*
  23   23   * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
       24 + * Copyright 2016 Joyent, Inc.
  24   25   */
  25   26  /* Copyright (c) 1990 Mentat Inc. */
  26   27  
  27   28  #include <sys/types.h>
  28   29  #include <sys/stream.h>
  29   30  #include <sys/strsun.h>
  30   31  #define _SUN_TPI_VERSION 2
  31   32  #include <sys/tihdr.h>
  32   33  #include <sys/xti_inet.h>
  33   34  #include <sys/ucred.h>
  34   35  #include <sys/zone.h>
  35   36  #include <sys/ddi.h>
  36   37  #include <sys/sunddi.h>
  37   38  #include <sys/cmn_err.h>
  38   39  #include <sys/debug.h>
  39   40  #include <sys/atomic.h>
  40   41  #include <sys/policy.h>
  41   42  
  42   43  #include <sys/systm.h>
  43   44  #include <sys/param.h>
  44   45  #include <sys/kmem.h>
  45   46  #include <sys/sdt.h>
  46   47  #include <sys/socket.h>
  47   48  #include <sys/ethernet.h>
  48   49  #include <sys/mac.h>
  49   50  #include <net/if.h>
  50   51  #include <net/if_types.h>
  51   52  #include <net/if_arp.h>
  52   53  #include <net/route.h>
  53   54  #include <sys/sockio.h>
  54   55  #include <netinet/in.h>
  55   56  #include <net/if_dl.h>
  56   57  
  57   58  #include <inet/common.h>
  58   59  #include <inet/mi.h>
  59   60  #include <inet/mib2.h>
  60   61  #include <inet/nd.h>
  61   62  #include <inet/arp.h>
  62   63  #include <inet/snmpcom.h>
  63   64  #include <inet/kstatcom.h>
  64   65  
  65   66  #include <netinet/igmp_var.h>
  66   67  #include <netinet/ip6.h>
  67   68  #include <netinet/icmp6.h>
  68   69  #include <netinet/sctp.h>
  69   70  
  70   71  #include <inet/ip.h>
  71   72  #include <inet/ip_impl.h>
  72   73  #include <inet/ip6.h>
  73   74  #include <inet/ip6_asp.h>
  74   75  #include <inet/tcp.h>
  75   76  #include <inet/ip_multi.h>
  76   77  #include <inet/ip_if.h>
  77   78  #include <inet/ip_ire.h>
  78   79  #include <inet/ip_ftable.h>
  79   80  #include <inet/ip_rts.h>
  80   81  #include <inet/optcom.h>
  81   82  #include <inet/ip_ndp.h>
  82   83  #include <inet/ip_listutils.h>
  83   84  #include <netinet/igmp.h>
  84   85  #include <netinet/ip_mroute.h>
  85   86  #include <netinet/udp.h>
  86   87  #include <inet/ipp_common.h>
  87   88  
  88   89  #include <net/pfkeyv2.h>
  89   90  #include <inet/sadb.h>
  90   91  #include <inet/ipsec_impl.h>
  91   92  #include <inet/ipdrop.h>
  92   93  #include <inet/ip_netinfo.h>
  93   94  
  94   95  #include <inet/ipclassifier.h>
  95   96  #include <inet/sctp_ip.h>
  96   97  #include <inet/sctp/sctp_impl.h>
  97   98  #include <inet/udp_impl.h>
  98   99  #include <sys/sunddi.h>
  99  100  
 100  101  #include <sys/tsol/label.h>
 101  102  #include <sys/tsol/tnet.h>
 102  103  
 103  104  /*
 104  105   * Return how much size is needed for the different ancillary data items
 105  106   */
 106  107  uint_t
 107  108  conn_recvancillary_size(conn_t *connp, crb_t recv_ancillary,
 108  109      ip_recv_attr_t *ira, mblk_t *mp, ip_pkt_t *ipp)
 109  110  {
 110  111          uint_t          ancil_size;
 111  112          ip_stack_t      *ipst = connp->conn_netstack->netstack_ip;
 112  113  
 113  114          /*
 114  115           * If IP_RECVDSTADDR is set we include the destination IP
 115  116           * address as an option. With IP_RECVOPTS we include all
 116  117           * the IP options.
 117  118           */
 118  119          ancil_size = 0;
 119  120          if (recv_ancillary.crb_recvdstaddr &&
 120  121              (ira->ira_flags & IRAF_IS_IPV4)) {
 121  122                  ancil_size += sizeof (struct T_opthdr) +
 122  123                      sizeof (struct in_addr);
 123  124                  IP_STAT(ipst, conn_in_recvdstaddr);
 124  125          }
 125  126  
 126  127          /*
 127  128           * ip_recvpktinfo is used for both AF_INET and AF_INET6 but
 128  129           * are different
 129  130           */
 130  131          if (recv_ancillary.crb_ip_recvpktinfo &&
 131  132              connp->conn_family == AF_INET) {
 132  133                  ancil_size += sizeof (struct T_opthdr) +
 133  134                      sizeof (struct in_pktinfo);
 134  135                  IP_STAT(ipst, conn_in_recvpktinfo);
 135  136          }
 136  137  
 137  138          if ((recv_ancillary.crb_recvopts) &&
 138  139              (ipp->ipp_fields & IPPF_IPV4_OPTIONS)) {
 139  140                  ancil_size += sizeof (struct T_opthdr) +
 140  141                      ipp->ipp_ipv4_options_len;
 141  142                  IP_STAT(ipst, conn_in_recvopts);
 142  143          }
 143  144  
 144  145          if (recv_ancillary.crb_recvslla) {
 145  146                  ip_stack_t *ipst = connp->conn_netstack->netstack_ip;
 146  147                  ill_t *ill;
 147  148  
 148  149                  /* Make sure ira_l2src is setup if not already */
 149  150                  if (!(ira->ira_flags & IRAF_L2SRC_SET)) {
 150  151                          ill = ill_lookup_on_ifindex(ira->ira_rifindex, B_FALSE,
 151  152                              ipst);
 152  153                          if (ill != NULL) {
 153  154                                  ip_setl2src(mp, ira, ill);
 154  155                                  ill_refrele(ill);
 155  156                          }
 156  157                  }
 157  158                  ancil_size += sizeof (struct T_opthdr) +
 158  159                      sizeof (struct sockaddr_dl);
 159  160                  IP_STAT(ipst, conn_in_recvslla);
 160  161          }
 161  162  
 162  163          if (recv_ancillary.crb_recvif) {
 163  164                  ancil_size += sizeof (struct T_opthdr) + sizeof (uint_t);
 164  165                  IP_STAT(ipst, conn_in_recvif);
 165  166          }
 166  167  
 167  168          /*
 168  169           * ip_recvpktinfo is used for both AF_INET and AF_INET6 but
 169  170           * are different
 170  171           */
 171  172          if (recv_ancillary.crb_ip_recvpktinfo &&
 172  173              connp->conn_family == AF_INET6) {
 173  174                  ancil_size += sizeof (struct T_opthdr) +
 174  175                      sizeof (struct in6_pktinfo);
 175  176                  IP_STAT(ipst, conn_in_recvpktinfo);
 176  177          }
 177  178  
 178  179          if (recv_ancillary.crb_ipv6_recvhoplimit) {
 179  180                  ancil_size += sizeof (struct T_opthdr) + sizeof (int);
 180  181                  IP_STAT(ipst, conn_in_recvhoplimit);
 181  182          }
 182  183  
 183  184          if (recv_ancillary.crb_ipv6_recvtclass) {
 184  185                  ancil_size += sizeof (struct T_opthdr) + sizeof (int);
 185  186                  IP_STAT(ipst, conn_in_recvtclass);
 186  187          }
 187  188  
 188  189          if (recv_ancillary.crb_ipv6_recvhopopts &&
 189  190              (ipp->ipp_fields & IPPF_HOPOPTS)) {
 190  191                  ancil_size += sizeof (struct T_opthdr) + ipp->ipp_hopoptslen;
 191  192                  IP_STAT(ipst, conn_in_recvhopopts);
 192  193          }
 193  194          /*
 194  195           * To honor RFC3542 when an application asks for both IPV6_RECVDSTOPTS
 195  196           * and IPV6_RECVRTHDR, we pass up the item rthdrdstopts (the destination
 196  197           * options that appear before a routing header.
 197  198           * We also pass them up if IPV6_RECVRTHDRDSTOPTS is set.
 198  199           */
 199  200          if (ipp->ipp_fields & IPPF_RTHDRDSTOPTS) {
 200  201                  if (recv_ancillary.crb_ipv6_recvrthdrdstopts ||
 201  202                      (recv_ancillary.crb_ipv6_recvdstopts &&
 202  203                      recv_ancillary.crb_ipv6_recvrthdr)) {
 203  204                          ancil_size += sizeof (struct T_opthdr) +
 204  205                              ipp->ipp_rthdrdstoptslen;
 205  206                          IP_STAT(ipst, conn_in_recvrthdrdstopts);
 206  207                  }
 207  208          }
 208  209          if ((recv_ancillary.crb_ipv6_recvrthdr) &&
 209  210              (ipp->ipp_fields & IPPF_RTHDR)) {
 210  211                  ancil_size += sizeof (struct T_opthdr) + ipp->ipp_rthdrlen;
 211  212                  IP_STAT(ipst, conn_in_recvrthdr);
 212  213          }
 213  214          if ((recv_ancillary.crb_ipv6_recvdstopts ||
 214  215              recv_ancillary.crb_old_ipv6_recvdstopts) &&
 215  216              (ipp->ipp_fields & IPPF_DSTOPTS)) {
 216  217                  ancil_size += sizeof (struct T_opthdr) + ipp->ipp_dstoptslen;
 217  218                  IP_STAT(ipst, conn_in_recvdstopts);
 218  219          }
 219  220          if (recv_ancillary.crb_recvucred && ira->ira_cred != NULL) {
 220  221                  ancil_size += sizeof (struct T_opthdr) +
 221  222                      ucredminsize(ira->ira_cred);
 222  223                  IP_STAT(ipst, conn_in_recvucred);
 223  224          }
 224  225  
 225  226          /*
 226  227           * If SO_TIMESTAMP is set allocate the appropriate sized
 227  228           * buffer. Since gethrestime() expects a pointer aligned
 228  229           * argument, we allocate space necessary for extra
 229  230           * alignment (even though it might not be used).
 230  231           */
 231  232          if (recv_ancillary.crb_timestamp) {
 232  233                  ancil_size += sizeof (struct T_opthdr) +
 233  234                      sizeof (timestruc_t) + _POINTER_ALIGNMENT;
 234  235                  IP_STAT(ipst, conn_in_timestamp);
 235  236          }
 236  237  
 237  238          /*
 238  239           * If IP_RECVTTL is set allocate the appropriate sized buffer
 239  240           */
 240  241          if (recv_ancillary.crb_recvttl &&
 241  242              (ira->ira_flags & IRAF_IS_IPV4)) {
 242  243                  ancil_size += sizeof (struct T_opthdr) + sizeof (uint8_t);
 243  244                  IP_STAT(ipst, conn_in_recvttl);
 244  245          }
 245  246  
 246  247          return (ancil_size);
 247  248  }
 248  249  
 249  250  /*
 250  251   * Lay down the ancillary data items at "ancil_buf".
 251  252   * Assumes caller has used conn_recvancillary_size to allocate a sufficiently
 252  253   * large buffer - ancil_size.
 253  254   */
 254  255  void
 255  256  conn_recvancillary_add(conn_t *connp, crb_t recv_ancillary,
 256  257      ip_recv_attr_t *ira, ip_pkt_t *ipp, uchar_t *ancil_buf, uint_t ancil_size)
 257  258  {
 258  259          /*
 259  260           * Copy in destination address before options to avoid
 260  261           * any padding issues.
 261  262           */
 262  263          if (recv_ancillary.crb_recvdstaddr &&
 263  264              (ira->ira_flags & IRAF_IS_IPV4)) {
 264  265                  struct T_opthdr *toh;
 265  266                  ipaddr_t *dstptr;
 266  267  
 267  268                  toh = (struct T_opthdr *)ancil_buf;
 268  269                  toh->level = IPPROTO_IP;
 269  270                  toh->name = IP_RECVDSTADDR;
 270  271                  toh->len = sizeof (struct T_opthdr) + sizeof (ipaddr_t);
 271  272                  toh->status = 0;
 272  273                  ancil_buf += sizeof (struct T_opthdr);
 273  274                  dstptr = (ipaddr_t *)ancil_buf;
 274  275                  *dstptr = ipp->ipp_addr_v4;
 275  276                  ancil_buf += sizeof (ipaddr_t);
 276  277                  ancil_size -= toh->len;
 277  278          }
 278  279  
 279  280          /*
 280  281           * ip_recvpktinfo is used for both AF_INET and AF_INET6 but
 281  282           * are different
 282  283           */
 283  284          if (recv_ancillary.crb_ip_recvpktinfo &&
 284  285              connp->conn_family == AF_INET) {
 285  286                  ip_stack_t *ipst = connp->conn_netstack->netstack_ip;
 286  287                  struct T_opthdr *toh;
 287  288                  struct in_pktinfo *pktinfop;
 288  289                  ill_t *ill;
 289  290                  ipif_t *ipif;
 290  291  
 291  292                  toh = (struct T_opthdr *)ancil_buf;
 292  293                  toh->level = IPPROTO_IP;
 293  294                  toh->name = IP_PKTINFO;
 294  295                  toh->len = sizeof (struct T_opthdr) + sizeof (*pktinfop);
 295  296                  toh->status = 0;
 296  297                  ancil_buf += sizeof (struct T_opthdr);
 297  298                  pktinfop = (struct in_pktinfo *)ancil_buf;
 298  299  
 299  300                  pktinfop->ipi_ifindex = ira->ira_ruifindex;
 300  301                  pktinfop->ipi_spec_dst.s_addr = INADDR_ANY;
 301  302  
 302  303                  /* Find a good address to report */
 303  304                  ill = ill_lookup_on_ifindex(ira->ira_ruifindex, B_FALSE, ipst);
 304  305                  if (ill != NULL) {
 305  306                          ipif = ipif_good_addr(ill, IPCL_ZONEID(connp));
 306  307                          if (ipif != NULL) {
 307  308                                  pktinfop->ipi_spec_dst.s_addr =
 308  309                                      ipif->ipif_lcl_addr;
 309  310                                  ipif_refrele(ipif);
 310  311                          }
 311  312                          ill_refrele(ill);
 312  313                  }
 313  314                  pktinfop->ipi_addr.s_addr = ipp->ipp_addr_v4;
 314  315                  ancil_buf += sizeof (struct in_pktinfo);
 315  316                  ancil_size -= toh->len;
 316  317          }
 317  318  
 318  319          if ((recv_ancillary.crb_recvopts) &&
 319  320              (ipp->ipp_fields & IPPF_IPV4_OPTIONS)) {
 320  321                  struct T_opthdr *toh;
 321  322  
 322  323                  toh = (struct T_opthdr *)ancil_buf;
 323  324                  toh->level = IPPROTO_IP;
 324  325                  toh->name = IP_RECVOPTS;
 325  326                  toh->len = sizeof (struct T_opthdr) + ipp->ipp_ipv4_options_len;
 326  327                  toh->status = 0;
 327  328                  ancil_buf += sizeof (struct T_opthdr);
 328  329                  bcopy(ipp->ipp_ipv4_options, ancil_buf,
 329  330                      ipp->ipp_ipv4_options_len);
 330  331                  ancil_buf += ipp->ipp_ipv4_options_len;
 331  332                  ancil_size -= toh->len;
 332  333          }
 333  334  
 334  335          if (recv_ancillary.crb_recvslla) {
 335  336                  ip_stack_t *ipst = connp->conn_netstack->netstack_ip;
 336  337                  struct T_opthdr *toh;
 337  338                  struct sockaddr_dl *dstptr;
 338  339                  ill_t *ill;
 339  340                  int alen = 0;
 340  341  
 341  342                  ill = ill_lookup_on_ifindex(ira->ira_rifindex, B_FALSE, ipst);
 342  343                  if (ill != NULL)
 343  344                          alen = ill->ill_phys_addr_length;
 344  345  
 345  346                  /*
 346  347                   * For loopback multicast and broadcast the packet arrives
 347  348                   * with ira_ruifdex being the physical interface, but
 348  349                   * ira_l2src is all zero since ip_postfrag_loopback doesn't
 349  350                   * know our l2src. We don't report the address in that case.
 350  351                   */
 351  352                  if (ira->ira_flags & IRAF_LOOPBACK)
 352  353                          alen = 0;
 353  354  
 354  355                  toh = (struct T_opthdr *)ancil_buf;
 355  356                  toh->level = IPPROTO_IP;
 356  357                  toh->name = IP_RECVSLLA;
 357  358                  toh->len = sizeof (struct T_opthdr) +
 358  359                      sizeof (struct sockaddr_dl);
 359  360                  toh->status = 0;
 360  361                  ancil_buf += sizeof (struct T_opthdr);
 361  362                  dstptr = (struct sockaddr_dl *)ancil_buf;
 362  363                  dstptr->sdl_family = AF_LINK;
 363  364                  dstptr->sdl_index = ira->ira_ruifindex;
 364  365                  if (ill != NULL)
 365  366                          dstptr->sdl_type = ill->ill_type;
 366  367                  else
 367  368                          dstptr->sdl_type = 0;
 368  369                  dstptr->sdl_nlen = 0;
 369  370                  dstptr->sdl_alen = alen;
 370  371                  dstptr->sdl_slen = 0;
 371  372                  bcopy(ira->ira_l2src, dstptr->sdl_data, alen);
 372  373                  ancil_buf += sizeof (struct sockaddr_dl);
 373  374                  ancil_size -= toh->len;
 374  375                  if (ill != NULL)
 375  376                          ill_refrele(ill);
 376  377          }
 377  378  
 378  379          if (recv_ancillary.crb_recvif) {
 379  380                  struct T_opthdr *toh;
 380  381                  uint_t          *dstptr;
 381  382  
 382  383                  toh = (struct T_opthdr *)ancil_buf;
 383  384                  toh->level = IPPROTO_IP;
 384  385                  toh->name = IP_RECVIF;
 385  386                  toh->len = sizeof (struct T_opthdr) + sizeof (uint_t);
 386  387                  toh->status = 0;
 387  388                  ancil_buf += sizeof (struct T_opthdr);
 388  389                  dstptr = (uint_t *)ancil_buf;
 389  390                  *dstptr = ira->ira_ruifindex;
 390  391                  ancil_buf += sizeof (uint_t);
 391  392                  ancil_size -= toh->len;
 392  393          }
 393  394  
 394  395          /*
 395  396           * ip_recvpktinfo is used for both AF_INET and AF_INET6 but
 396  397           * are different
 397  398           */
 398  399          if (recv_ancillary.crb_ip_recvpktinfo &&
 399  400              connp->conn_family == AF_INET6) {
 400  401                  struct T_opthdr *toh;
 401  402                  struct in6_pktinfo *pkti;
 402  403  
 403  404                  toh = (struct T_opthdr *)ancil_buf;
 404  405                  toh->level = IPPROTO_IPV6;
 405  406                  toh->name = IPV6_PKTINFO;
 406  407                  toh->len = sizeof (struct T_opthdr) + sizeof (*pkti);
 407  408                  toh->status = 0;
 408  409                  ancil_buf += sizeof (struct T_opthdr);
 409  410                  pkti = (struct in6_pktinfo *)ancil_buf;
 410  411                  if (ira->ira_flags & IRAF_IS_IPV4) {
 411  412                          IN6_IPADDR_TO_V4MAPPED(ipp->ipp_addr_v4,
 412  413                              &pkti->ipi6_addr);
 413  414                  } else {
 414  415                          pkti->ipi6_addr = ipp->ipp_addr;
 415  416                  }
 416  417                  pkti->ipi6_ifindex = ira->ira_ruifindex;
 417  418  
 418  419                  ancil_buf += sizeof (*pkti);
 419  420                  ancil_size -= toh->len;
 420  421          }
 421  422          if (recv_ancillary.crb_ipv6_recvhoplimit) {
 422  423                  struct T_opthdr *toh;
 423  424  
 424  425                  toh = (struct T_opthdr *)ancil_buf;
 425  426                  toh->level = IPPROTO_IPV6;
 426  427                  toh->name = IPV6_HOPLIMIT;
 427  428                  toh->len = sizeof (struct T_opthdr) + sizeof (uint_t);
 428  429                  toh->status = 0;
 429  430                  ancil_buf += sizeof (struct T_opthdr);
 430  431                  *(uint_t *)ancil_buf = ipp->ipp_hoplimit;
 431  432                  ancil_buf += sizeof (uint_t);
 432  433                  ancil_size -= toh->len;
 433  434          }
 434  435          if (recv_ancillary.crb_ipv6_recvtclass) {
 435  436                  struct T_opthdr *toh;
 436  437  
 437  438                  toh = (struct T_opthdr *)ancil_buf;
 438  439                  toh->level = IPPROTO_IPV6;
 439  440                  toh->name = IPV6_TCLASS;
 440  441                  toh->len = sizeof (struct T_opthdr) + sizeof (uint_t);
 441  442                  toh->status = 0;
 442  443                  ancil_buf += sizeof (struct T_opthdr);
 443  444  
 444  445                  if (ira->ira_flags & IRAF_IS_IPV4)
 445  446                          *(uint_t *)ancil_buf = ipp->ipp_type_of_service;
 446  447                  else
 447  448                          *(uint_t *)ancil_buf = ipp->ipp_tclass;
 448  449                  ancil_buf += sizeof (uint_t);
 449  450                  ancil_size -= toh->len;
 450  451          }
 451  452          if (recv_ancillary.crb_ipv6_recvhopopts &&
 452  453              (ipp->ipp_fields & IPPF_HOPOPTS)) {
 453  454                  struct T_opthdr *toh;
 454  455  
 455  456                  toh = (struct T_opthdr *)ancil_buf;
 456  457                  toh->level = IPPROTO_IPV6;
 457  458                  toh->name = IPV6_HOPOPTS;
 458  459                  toh->len = sizeof (struct T_opthdr) + ipp->ipp_hopoptslen;
 459  460                  toh->status = 0;
 460  461                  ancil_buf += sizeof (struct T_opthdr);
 461  462                  bcopy(ipp->ipp_hopopts, ancil_buf, ipp->ipp_hopoptslen);
 462  463                  ancil_buf += ipp->ipp_hopoptslen;
 463  464                  ancil_size -= toh->len;
 464  465          }
 465  466          /*
 466  467           * To honor RFC3542 when an application asks for both IPV6_RECVDSTOPTS
 467  468           * and IPV6_RECVRTHDR, we pass up the item rthdrdstopts (the destination
 468  469           * options that appear before a routing header.
 469  470           * We also pass them up if IPV6_RECVRTHDRDSTOPTS is set.
 470  471           */
 471  472          if (ipp->ipp_fields & IPPF_RTHDRDSTOPTS) {
 472  473                  if (recv_ancillary.crb_ipv6_recvrthdrdstopts ||
 473  474                      (recv_ancillary.crb_ipv6_recvdstopts &&
 474  475                      recv_ancillary.crb_ipv6_recvrthdr)) {
 475  476                          struct T_opthdr *toh;
 476  477  
 477  478                          toh = (struct T_opthdr *)ancil_buf;
 478  479                          toh->level = IPPROTO_IPV6;
 479  480                          toh->name = IPV6_DSTOPTS;
 480  481                          toh->len = sizeof (struct T_opthdr) +
 481  482                              ipp->ipp_rthdrdstoptslen;
 482  483                          toh->status = 0;
 483  484                          ancil_buf += sizeof (struct T_opthdr);
 484  485                          bcopy(ipp->ipp_rthdrdstopts, ancil_buf,
 485  486                              ipp->ipp_rthdrdstoptslen);
 486  487                          ancil_buf += ipp->ipp_rthdrdstoptslen;
 487  488                          ancil_size -= toh->len;
 488  489                  }
 489  490          }
 490  491          if (recv_ancillary.crb_ipv6_recvrthdr &&
 491  492              (ipp->ipp_fields & IPPF_RTHDR)) {
 492  493                  struct T_opthdr *toh;
 493  494  
 494  495                  toh = (struct T_opthdr *)ancil_buf;
 495  496                  toh->level = IPPROTO_IPV6;
 496  497                  toh->name = IPV6_RTHDR;
 497  498                  toh->len = sizeof (struct T_opthdr) + ipp->ipp_rthdrlen;
 498  499                  toh->status = 0;
 499  500                  ancil_buf += sizeof (struct T_opthdr);
 500  501                  bcopy(ipp->ipp_rthdr, ancil_buf, ipp->ipp_rthdrlen);
 501  502                  ancil_buf += ipp->ipp_rthdrlen;
 502  503                  ancil_size -= toh->len;
 503  504          }
 504  505          if ((recv_ancillary.crb_ipv6_recvdstopts ||
 505  506              recv_ancillary.crb_old_ipv6_recvdstopts) &&
 506  507              (ipp->ipp_fields & IPPF_DSTOPTS)) {
 507  508                  struct T_opthdr *toh;
 508  509  
 509  510                  toh = (struct T_opthdr *)ancil_buf;
 510  511                  toh->level = IPPROTO_IPV6;
 511  512                  toh->name = IPV6_DSTOPTS;
 512  513                  toh->len = sizeof (struct T_opthdr) + ipp->ipp_dstoptslen;
 513  514                  toh->status = 0;
 514  515                  ancil_buf += sizeof (struct T_opthdr);
 515  516                  bcopy(ipp->ipp_dstopts, ancil_buf, ipp->ipp_dstoptslen);
 516  517                  ancil_buf += ipp->ipp_dstoptslen;
 517  518                  ancil_size -= toh->len;
 518  519          }
 519  520  
 520  521          if (recv_ancillary.crb_recvucred && ira->ira_cred != NULL) {
 521  522                  struct T_opthdr *toh;
 522  523                  cred_t          *rcr = connp->conn_cred;
 523  524  
 524  525                  toh = (struct T_opthdr *)ancil_buf;
 525  526                  toh->level = SOL_SOCKET;
 526  527                  toh->name = SCM_UCRED;
 527  528                  toh->len = sizeof (struct T_opthdr) +
 528  529                      ucredminsize(ira->ira_cred);
 529  530                  toh->status = 0;
 530  531                  (void) cred2ucred(ira->ira_cred, ira->ira_cpid, &toh[1], rcr);
 531  532                  ancil_buf += toh->len;
 532  533                  ancil_size -= toh->len;
 533  534          }
 534  535          if (recv_ancillary.crb_timestamp) {
 535  536                  struct  T_opthdr *toh;
 536  537  
 537  538                  toh = (struct T_opthdr *)ancil_buf;
 538  539                  toh->level = SOL_SOCKET;
 539  540                  toh->name = SCM_TIMESTAMP;
 540  541                  toh->len = sizeof (struct T_opthdr) +
 541  542                      sizeof (timestruc_t) + _POINTER_ALIGNMENT;
 542  543                  toh->status = 0;
 543  544                  ancil_buf += sizeof (struct T_opthdr);
 544  545                  /* Align for gethrestime() */
 545  546                  ancil_buf = (uchar_t *)P2ROUNDUP((intptr_t)ancil_buf,
 546  547                      sizeof (intptr_t));
 547  548                  gethrestime((timestruc_t *)ancil_buf);
 548  549                  ancil_buf = (uchar_t *)toh + toh->len;
 549  550                  ancil_size -= toh->len;
 550  551          }
 551  552  
 552  553          /*
 553  554           * CAUTION:
 554  555           * Due to aligment issues
 555  556           * Processing of IP_RECVTTL option
 556  557           * should always be the last. Adding
 557  558           * any option processing after this will
 558  559           * cause alignment panic.
 559  560           */
 560  561          if (recv_ancillary.crb_recvttl &&
 561  562              (ira->ira_flags & IRAF_IS_IPV4)) {
 562  563                  struct  T_opthdr *toh;
 563  564                  uint8_t *dstptr;
 564  565  
 565  566                  toh = (struct T_opthdr *)ancil_buf;
 566  567                  toh->level = IPPROTO_IP;
 567  568                  toh->name = IP_RECVTTL;
 568  569                  toh->len = sizeof (struct T_opthdr) + sizeof (uint8_t);
 569  570                  toh->status = 0;
 570  571                  ancil_buf += sizeof (struct T_opthdr);
 571  572                  dstptr = (uint8_t *)ancil_buf;
 572  573                  *dstptr = ipp->ipp_hoplimit;
 573  574                  ancil_buf += sizeof (uint8_t);
 574  575                  ancil_size -= toh->len;
 575  576          }
 576  577  
 577  578          /* Consumed all of allocated space */
 578  579          ASSERT(ancil_size == 0);
 579  580  
 580  581  }
 581  582  
 582  583  /*
 583  584   * This routine retrieves the current status of socket options.
 584  585   * It returns the size of the option retrieved, or -1.
 585  586   */
 586  587  int
 587  588  conn_opt_get(conn_opt_arg_t *coa, t_scalar_t level, t_scalar_t name,
 588  589      uchar_t *ptr)
 589  590  {
 590  591          int             *i1 = (int *)ptr;
 591  592          conn_t          *connp = coa->coa_connp;
 592  593          ip_xmit_attr_t  *ixa = coa->coa_ixa;
 593  594          ip_pkt_t        *ipp = coa->coa_ipp;
 594  595          ip_stack_t      *ipst = ixa->ixa_ipst;
 595  596          uint_t          len;
 596  597  
 597  598          ASSERT(MUTEX_HELD(&coa->coa_connp->conn_lock));
 598  599  
 599  600          switch (level) {
 600  601          case SOL_SOCKET:
 601  602                  switch (name) {
 602  603                  case SO_DEBUG:
 603  604                          *i1 = connp->conn_debug ? SO_DEBUG : 0;
 604  605                          break;  /* goto sizeof (int) option return */
 605  606                  case SO_KEEPALIVE:
 606  607                          *i1 = connp->conn_keepalive ? SO_KEEPALIVE : 0;
 607  608                          break;
 608  609                  case SO_LINGER: {
 609  610                          struct linger *lgr = (struct linger *)ptr;
 610  611  
 611  612                          lgr->l_onoff = connp->conn_linger ? SO_LINGER : 0;
  
    | 
      ↓ open down ↓ | 
    578 lines elided | 
    
      ↑ open up ↑ | 
  
 612  613                          lgr->l_linger = connp->conn_lingertime;
 613  614                          }
 614  615                          return (sizeof (struct linger));
 615  616  
 616  617                  case SO_OOBINLINE:
 617  618                          *i1 = connp->conn_oobinline ? SO_OOBINLINE : 0;
 618  619                          break;
 619  620                  case SO_REUSEADDR:
 620  621                          *i1 = connp->conn_reuseaddr ? SO_REUSEADDR : 0;
 621  622                          break;  /* goto sizeof (int) option return */
      623 +                case SO_REUSEPORT:
      624 +                        *i1 = connp->conn_reuseport;
      625 +                        break;  /* goto sizeof (int) option return */
 622  626                  case SO_TYPE:
 623  627                          *i1 = connp->conn_so_type;
 624  628                          break;  /* goto sizeof (int) option return */
 625  629                  case SO_DONTROUTE:
 626  630                          *i1 = (ixa->ixa_flags & IXAF_DONTROUTE) ?
 627  631                              SO_DONTROUTE : 0;
 628  632                          break;  /* goto sizeof (int) option return */
 629  633                  case SO_USELOOPBACK:
 630  634                          *i1 = connp->conn_useloopback ? SO_USELOOPBACK : 0;
 631  635                          break;  /* goto sizeof (int) option return */
 632  636                  case SO_BROADCAST:
 633  637                          *i1 = connp->conn_broadcast ? SO_BROADCAST : 0;
 634  638                          break;  /* goto sizeof (int) option return */
 635  639  
 636  640                  case SO_SNDBUF:
 637  641                          *i1 = connp->conn_sndbuf;
 638  642                          break;  /* goto sizeof (int) option return */
 639  643                  case SO_RCVBUF:
 640  644                          *i1 = connp->conn_rcvbuf;
 641  645                          break;  /* goto sizeof (int) option return */
 642  646                  case SO_RCVTIMEO:
 643  647                  case SO_SNDTIMEO:
 644  648                          /*
 645  649                           * Pass these two options in order for third part
 646  650                           * protocol usage. Here just return directly.
 647  651                           */
 648  652                          *i1 = 0;
 649  653                          break;
 650  654                  case SO_DGRAM_ERRIND:
 651  655                          *i1 = connp->conn_dgram_errind ? SO_DGRAM_ERRIND : 0;
 652  656                          break;  /* goto sizeof (int) option return */
 653  657                  case SO_RECVUCRED:
 654  658                          *i1 = connp->conn_recv_ancillary.crb_recvucred;
 655  659                          break;  /* goto sizeof (int) option return */
 656  660                  case SO_TIMESTAMP:
 657  661                          *i1 = connp->conn_recv_ancillary.crb_timestamp;
 658  662                          break;  /* goto sizeof (int) option return */
 659  663                  case SO_VRRP:
 660  664                          *i1 = connp->conn_isvrrp;
 661  665                          break;  /* goto sizeof (int) option return */
 662  666                  case SO_ANON_MLP:
 663  667                          *i1 = connp->conn_anon_mlp;
 664  668                          break;  /* goto sizeof (int) option return */
 665  669                  case SO_MAC_EXEMPT:
 666  670                          *i1 = (connp->conn_mac_mode == CONN_MAC_AWARE);
 667  671                          break;  /* goto sizeof (int) option return */
 668  672                  case SO_MAC_IMPLICIT:
 669  673                          *i1 = (connp->conn_mac_mode == CONN_MAC_IMPLICIT);
 670  674                          break;  /* goto sizeof (int) option return */
 671  675                  case SO_ALLZONES:
 672  676                          *i1 = connp->conn_allzones;
 673  677                          break;  /* goto sizeof (int) option return */
 674  678                  case SO_EXCLBIND:
 675  679                          *i1 = connp->conn_exclbind ? SO_EXCLBIND : 0;
 676  680                          break;
 677  681                  case SO_PROTOTYPE:
 678  682                          *i1 = connp->conn_proto;
 679  683                          break;
 680  684  
 681  685                  case SO_DOMAIN:
 682  686                          *i1 = connp->conn_family;
 683  687                          break;
 684  688                  default:
 685  689                          return (-1);
 686  690                  }
 687  691                  break;
 688  692          case IPPROTO_IP:
 689  693                  if (connp->conn_family != AF_INET)
 690  694                          return (-1);
 691  695                  switch (name) {
 692  696                  case IP_OPTIONS:
 693  697                  case T_IP_OPTIONS:
 694  698                          if (!(ipp->ipp_fields & IPPF_IPV4_OPTIONS))
 695  699                                  return (0);
 696  700  
 697  701                          len = ipp->ipp_ipv4_options_len;
 698  702                          if (len > 0) {
 699  703                                  bcopy(ipp->ipp_ipv4_options, ptr, len);
 700  704                          }
 701  705                          return (len);
 702  706  
 703  707                  case IP_PKTINFO: {
 704  708                          /*
 705  709                           * This also handles IP_RECVPKTINFO.
 706  710                           * IP_PKTINFO and IP_RECVPKTINFO have same value.
 707  711                           * Differentiation is based on the size of the
 708  712                           * argument passed in.
 709  713                           */
 710  714                          struct in_pktinfo *pktinfo;
 711  715  
 712  716  #ifdef notdef
 713  717                          /* optcom doesn't provide a length with "get" */
 714  718                          if (inlen == sizeof (int)) {
 715  719                                  /* This is IP_RECVPKTINFO option. */
 716  720                                  *i1 = connp->conn_recv_ancillary.
 717  721                                      crb_ip_recvpktinfo;
 718  722                                  return (sizeof (int));
 719  723                          }
 720  724  #endif
 721  725                          /* XXX assumes that caller has room for max size! */
 722  726  
 723  727                          pktinfo = (struct in_pktinfo *)ptr;
 724  728                          pktinfo->ipi_ifindex = ixa->ixa_ifindex;
 725  729                          if (ipp->ipp_fields & IPPF_ADDR)
 726  730                                  pktinfo->ipi_spec_dst.s_addr = ipp->ipp_addr_v4;
 727  731                          else
 728  732                                  pktinfo->ipi_spec_dst.s_addr = INADDR_ANY;
 729  733                          return (sizeof (struct in_pktinfo));
 730  734                  }
 731  735                  case IP_DONTFRAG:
 732  736                          *i1 = (ixa->ixa_flags & IXAF_DONTFRAG) != 0;
 733  737                          return (sizeof (int));
 734  738                  case IP_TOS:
 735  739                  case T_IP_TOS:
 736  740                          *i1 = (int)ipp->ipp_type_of_service;
 737  741                          break;  /* goto sizeof (int) option return */
 738  742                  case IP_TTL:
 739  743                          *i1 = (int)ipp->ipp_unicast_hops;
 740  744                          break;  /* goto sizeof (int) option return */
 741  745                  case IP_DHCPINIT_IF:
 742  746                          return (-1);
 743  747                  case IP_NEXTHOP:
 744  748                          if (ixa->ixa_flags & IXAF_NEXTHOP_SET) {
 745  749                                  *(ipaddr_t *)ptr = ixa->ixa_nexthop_v4;
 746  750                                  return (sizeof (ipaddr_t));
 747  751                          } else {
 748  752                                  return (0);
 749  753                          }
 750  754  
 751  755                  case IP_MULTICAST_IF:
 752  756                          /* 0 address if not set */
 753  757                          *(ipaddr_t *)ptr = ixa->ixa_multicast_ifaddr;
 754  758                          return (sizeof (ipaddr_t));
 755  759                  case IP_MULTICAST_TTL:
 756  760                          *(uchar_t *)ptr = ixa->ixa_multicast_ttl;
 757  761                          return (sizeof (uchar_t));
 758  762                  case IP_MULTICAST_LOOP:
 759  763                          *ptr = (ixa->ixa_flags & IXAF_MULTICAST_LOOP) ? 1 : 0;
 760  764                          return (sizeof (uint8_t));
 761  765                  case IP_RECVOPTS:
 762  766                          *i1 = connp->conn_recv_ancillary.crb_recvopts;
 763  767                          break;  /* goto sizeof (int) option return */
 764  768                  case IP_RECVDSTADDR:
 765  769                          *i1 = connp->conn_recv_ancillary.crb_recvdstaddr;
 766  770                          break;  /* goto sizeof (int) option return */
 767  771                  case IP_RECVIF:
 768  772                          *i1 = connp->conn_recv_ancillary.crb_recvif;
 769  773                          break;  /* goto sizeof (int) option return */
 770  774                  case IP_RECVSLLA:
 771  775                          *i1 = connp->conn_recv_ancillary.crb_recvslla;
 772  776                          break;  /* goto sizeof (int) option return */
 773  777                  case IP_RECVTTL:
 774  778                          *i1 = connp->conn_recv_ancillary.crb_recvttl;
 775  779                          break;  /* goto sizeof (int) option return */
 776  780                  case IP_ADD_MEMBERSHIP:
 777  781                  case IP_DROP_MEMBERSHIP:
 778  782                  case MCAST_JOIN_GROUP:
 779  783                  case MCAST_LEAVE_GROUP:
 780  784                  case IP_BLOCK_SOURCE:
 781  785                  case IP_UNBLOCK_SOURCE:
 782  786                  case IP_ADD_SOURCE_MEMBERSHIP:
 783  787                  case IP_DROP_SOURCE_MEMBERSHIP:
 784  788                  case MCAST_BLOCK_SOURCE:
 785  789                  case MCAST_UNBLOCK_SOURCE:
 786  790                  case MCAST_JOIN_SOURCE_GROUP:
 787  791                  case MCAST_LEAVE_SOURCE_GROUP:
 788  792                  case MRT_INIT:
 789  793                  case MRT_DONE:
 790  794                  case MRT_ADD_VIF:
 791  795                  case MRT_DEL_VIF:
 792  796                  case MRT_ADD_MFC:
 793  797                  case MRT_DEL_MFC:
 794  798                          /* cannot "get" the value for these */
 795  799                          return (-1);
 796  800                  case MRT_VERSION:
 797  801                  case MRT_ASSERT:
 798  802                          (void) ip_mrouter_get(name, connp, ptr);
 799  803                          return (sizeof (int));
 800  804                  case IP_SEC_OPT:
 801  805                          return (ipsec_req_from_conn(connp, (ipsec_req_t *)ptr,
 802  806                              IPSEC_AF_V4));
 803  807                  case IP_BOUND_IF:
 804  808                          /* Zero if not set */
 805  809                          *i1 = connp->conn_bound_if;
 806  810                          break;  /* goto sizeof (int) option return */
 807  811                  case IP_UNSPEC_SRC:
 808  812                          *i1 = connp->conn_unspec_src;
 809  813                          break;  /* goto sizeof (int) option return */
 810  814                  case IP_BROADCAST_TTL:
 811  815                          if (ixa->ixa_flags & IXAF_BROADCAST_TTL_SET)
 812  816                                  *(uchar_t *)ptr = ixa->ixa_broadcast_ttl;
 813  817                          else
 814  818                                  *(uchar_t *)ptr = ipst->ips_ip_broadcast_ttl;
 815  819                          return (sizeof (uchar_t));
 816  820                  default:
 817  821                          return (-1);
 818  822                  }
 819  823                  break;
 820  824          case IPPROTO_IPV6:
 821  825                  if (connp->conn_family != AF_INET6)
 822  826                          return (-1);
 823  827                  switch (name) {
 824  828                  case IPV6_UNICAST_HOPS:
 825  829                          *i1 = (int)ipp->ipp_unicast_hops;
 826  830                          break;  /* goto sizeof (int) option return */
 827  831                  case IPV6_MULTICAST_IF:
 828  832                          /* 0 index if not set */
 829  833                          *i1 = ixa->ixa_multicast_ifindex;
 830  834                          break;  /* goto sizeof (int) option return */
 831  835                  case IPV6_MULTICAST_HOPS:
 832  836                          *i1 = ixa->ixa_multicast_ttl;
 833  837                          break;  /* goto sizeof (int) option return */
 834  838                  case IPV6_MULTICAST_LOOP:
 835  839                          *i1 = (ixa->ixa_flags & IXAF_MULTICAST_LOOP) ? 1 : 0;
 836  840                          break;  /* goto sizeof (int) option return */
 837  841                  case IPV6_JOIN_GROUP:
 838  842                  case IPV6_LEAVE_GROUP:
 839  843                  case MCAST_JOIN_GROUP:
 840  844                  case MCAST_LEAVE_GROUP:
 841  845                  case MCAST_BLOCK_SOURCE:
 842  846                  case MCAST_UNBLOCK_SOURCE:
 843  847                  case MCAST_JOIN_SOURCE_GROUP:
 844  848                  case MCAST_LEAVE_SOURCE_GROUP:
 845  849                          /* cannot "get" the value for these */
 846  850                          return (-1);
 847  851                  case IPV6_BOUND_IF:
 848  852                          /* Zero if not set */
 849  853                          *i1 = connp->conn_bound_if;
 850  854                          break;  /* goto sizeof (int) option return */
 851  855                  case IPV6_UNSPEC_SRC:
 852  856                          *i1 = connp->conn_unspec_src;
 853  857                          break;  /* goto sizeof (int) option return */
 854  858                  case IPV6_RECVPKTINFO:
 855  859                          *i1 = connp->conn_recv_ancillary.crb_ip_recvpktinfo;
 856  860                          break;  /* goto sizeof (int) option return */
 857  861                  case IPV6_RECVTCLASS:
 858  862                          *i1 = connp->conn_recv_ancillary.crb_ipv6_recvtclass;
 859  863                          break;  /* goto sizeof (int) option return */
 860  864                  case IPV6_RECVPATHMTU:
 861  865                          *i1 = connp->conn_ipv6_recvpathmtu;
 862  866                          break;  /* goto sizeof (int) option return */
 863  867                  case IPV6_RECVHOPLIMIT:
 864  868                          *i1 = connp->conn_recv_ancillary.crb_ipv6_recvhoplimit;
 865  869                          break;  /* goto sizeof (int) option return */
 866  870                  case IPV6_RECVHOPOPTS:
 867  871                          *i1 = connp->conn_recv_ancillary.crb_ipv6_recvhopopts;
 868  872                          break;  /* goto sizeof (int) option return */
 869  873                  case IPV6_RECVDSTOPTS:
 870  874                          *i1 = connp->conn_recv_ancillary.crb_ipv6_recvdstopts;
 871  875                          break;  /* goto sizeof (int) option return */
 872  876                  case _OLD_IPV6_RECVDSTOPTS:
 873  877                          *i1 =
 874  878                              connp->conn_recv_ancillary.crb_old_ipv6_recvdstopts;
 875  879                          break;  /* goto sizeof (int) option return */
 876  880                  case IPV6_RECVRTHDRDSTOPTS:
 877  881                          *i1 = connp->conn_recv_ancillary.
 878  882                              crb_ipv6_recvrthdrdstopts;
 879  883                          break;  /* goto sizeof (int) option return */
 880  884                  case IPV6_RECVRTHDR:
 881  885                          *i1 = connp->conn_recv_ancillary.crb_ipv6_recvrthdr;
 882  886                          break;  /* goto sizeof (int) option return */
 883  887                  case IPV6_PKTINFO: {
 884  888                          /* XXX assumes that caller has room for max size! */
 885  889                          struct in6_pktinfo *pkti;
 886  890  
 887  891                          pkti = (struct in6_pktinfo *)ptr;
 888  892                          pkti->ipi6_ifindex = ixa->ixa_ifindex;
 889  893                          if (ipp->ipp_fields & IPPF_ADDR)
 890  894                                  pkti->ipi6_addr = ipp->ipp_addr;
 891  895                          else
 892  896                                  pkti->ipi6_addr = ipv6_all_zeros;
 893  897                          return (sizeof (struct in6_pktinfo));
 894  898                  }
 895  899                  case IPV6_TCLASS:
 896  900                          *i1 = ipp->ipp_tclass;
 897  901                          break;  /* goto sizeof (int) option return */
 898  902                  case IPV6_NEXTHOP: {
 899  903                          sin6_t *sin6 = (sin6_t *)ptr;
 900  904  
 901  905                          if (ixa->ixa_flags & IXAF_NEXTHOP_SET)
 902  906                                  return (0);
 903  907  
 904  908                          *sin6 = sin6_null;
 905  909                          sin6->sin6_family = AF_INET6;
 906  910                          sin6->sin6_addr = ixa->ixa_nexthop_v6;
 907  911  
 908  912                          return (sizeof (sin6_t));
 909  913                  }
 910  914                  case IPV6_HOPOPTS:
 911  915                          if (!(ipp->ipp_fields & IPPF_HOPOPTS))
 912  916                                  return (0);
 913  917                          bcopy(ipp->ipp_hopopts, ptr,
 914  918                              ipp->ipp_hopoptslen);
 915  919                          return (ipp->ipp_hopoptslen);
 916  920                  case IPV6_RTHDRDSTOPTS:
 917  921                          if (!(ipp->ipp_fields & IPPF_RTHDRDSTOPTS))
 918  922                                  return (0);
 919  923                          bcopy(ipp->ipp_rthdrdstopts, ptr,
 920  924                              ipp->ipp_rthdrdstoptslen);
 921  925                          return (ipp->ipp_rthdrdstoptslen);
 922  926                  case IPV6_RTHDR:
 923  927                          if (!(ipp->ipp_fields & IPPF_RTHDR))
 924  928                                  return (0);
 925  929                          bcopy(ipp->ipp_rthdr, ptr, ipp->ipp_rthdrlen);
 926  930                          return (ipp->ipp_rthdrlen);
 927  931                  case IPV6_DSTOPTS:
 928  932                          if (!(ipp->ipp_fields & IPPF_DSTOPTS))
 929  933                                  return (0);
 930  934                          bcopy(ipp->ipp_dstopts, ptr, ipp->ipp_dstoptslen);
 931  935                          return (ipp->ipp_dstoptslen);
 932  936                  case IPV6_PATHMTU:
 933  937                          return (ip_fill_mtuinfo(connp, ixa,
 934  938                              (struct ip6_mtuinfo *)ptr));
 935  939                  case IPV6_SEC_OPT:
 936  940                          return (ipsec_req_from_conn(connp, (ipsec_req_t *)ptr,
 937  941                              IPSEC_AF_V6));
 938  942                  case IPV6_SRC_PREFERENCES:
 939  943                          return (ip6_get_src_preferences(ixa, (uint32_t *)ptr));
 940  944                  case IPV6_DONTFRAG:
 941  945                          *i1 = (ixa->ixa_flags & IXAF_DONTFRAG) != 0;
 942  946                          return (sizeof (int));
 943  947                  case IPV6_USE_MIN_MTU:
 944  948                          if (ixa->ixa_flags & IXAF_USE_MIN_MTU)
 945  949                                  *i1 = ixa->ixa_use_min_mtu;
 946  950                          else
 947  951                                  *i1 = IPV6_USE_MIN_MTU_MULTICAST;
 948  952                          break;
 949  953                  case IPV6_V6ONLY:
 950  954                          *i1 = connp->conn_ipv6_v6only;
 951  955                          return (sizeof (int));
 952  956                  default:
 953  957                          return (-1);
 954  958                  }
 955  959                  break;
 956  960          case IPPROTO_UDP:
 957  961                  switch (name) {
 958  962                  case UDP_ANONPRIVBIND:
 959  963                          *i1 = connp->conn_anon_priv_bind;
 960  964                          break;
 961  965                  case UDP_EXCLBIND:
 962  966                          *i1 = connp->conn_exclbind ? UDP_EXCLBIND : 0;
 963  967                          break;
 964  968                  default:
 965  969                          return (-1);
 966  970                  }
 967  971                  break;
 968  972          case IPPROTO_TCP:
 969  973                  switch (name) {
 970  974                  case TCP_RECVDSTADDR:
 971  975                          *i1 = connp->conn_recv_ancillary.crb_recvdstaddr;
 972  976                          break;
 973  977                  case TCP_ANONPRIVBIND:
 974  978                          *i1 = connp->conn_anon_priv_bind;
 975  979                          break;
 976  980                  case TCP_EXCLBIND:
 977  981                          *i1 = connp->conn_exclbind ? TCP_EXCLBIND : 0;
 978  982                          break;
 979  983                  default:
 980  984                          return (-1);
 981  985                  }
 982  986                  break;
 983  987          default:
 984  988                  return (-1);
 985  989          }
 986  990          return (sizeof (int));
 987  991  }
 988  992  
 989  993  static int conn_opt_set_socket(conn_opt_arg_t *coa, t_scalar_t name,
 990  994      uint_t inlen, uchar_t *invalp, boolean_t checkonly, cred_t *cr);
 991  995  static int conn_opt_set_ip(conn_opt_arg_t *coa, t_scalar_t name,
 992  996      uint_t inlen, uchar_t *invalp, boolean_t checkonly, cred_t *cr);
 993  997  static int conn_opt_set_ipv6(conn_opt_arg_t *coa, t_scalar_t name,
 994  998      uint_t inlen, uchar_t *invalp, boolean_t checkonly, cred_t *cr);
 995  999  static int conn_opt_set_udp(conn_opt_arg_t *coa, t_scalar_t name,
 996 1000      uint_t inlen, uchar_t *invalp, boolean_t checkonly, cred_t *cr);
 997 1001  static int conn_opt_set_tcp(conn_opt_arg_t *coa, t_scalar_t name,
 998 1002      uint_t inlen, uchar_t *invalp, boolean_t checkonly, cred_t *cr);
 999 1003  
1000 1004  /*
1001 1005   * This routine sets the most common socket options including some
1002 1006   * that are transport/ULP specific.
1003 1007   * It returns errno or zero.
1004 1008   *
1005 1009   * For fixed length options, there is no sanity check
1006 1010   * of passed in length is done. It is assumed *_optcom_req()
1007 1011   * routines do the right thing.
1008 1012   */
1009 1013  int
1010 1014  conn_opt_set(conn_opt_arg_t *coa, t_scalar_t level, t_scalar_t name,
1011 1015      uint_t inlen, uchar_t *invalp, boolean_t checkonly, cred_t *cr)
1012 1016  {
1013 1017          ASSERT(MUTEX_NOT_HELD(&coa->coa_connp->conn_lock));
1014 1018  
1015 1019          /* We have different functions for different levels */
1016 1020          switch (level) {
1017 1021          case SOL_SOCKET:
1018 1022                  return (conn_opt_set_socket(coa, name, inlen, invalp,
1019 1023                      checkonly, cr));
1020 1024          case IPPROTO_IP:
1021 1025                  return (conn_opt_set_ip(coa, name, inlen, invalp,
1022 1026                      checkonly, cr));
1023 1027          case IPPROTO_IPV6:
1024 1028                  return (conn_opt_set_ipv6(coa, name, inlen, invalp,
1025 1029                      checkonly, cr));
1026 1030          case IPPROTO_UDP:
1027 1031                  return (conn_opt_set_udp(coa, name, inlen, invalp,
1028 1032                      checkonly, cr));
1029 1033          case IPPROTO_TCP:
1030 1034                  return (conn_opt_set_tcp(coa, name, inlen, invalp,
1031 1035                      checkonly, cr));
1032 1036          default:
1033 1037                  return (0);
1034 1038          }
1035 1039  }
1036 1040  
1037 1041  /*
1038 1042   * Handle SOL_SOCKET
1039 1043   * Note that we do not handle SO_PROTOTYPE here. The ULPs that support
1040 1044   * it implement their own checks and setting of conn_proto.
1041 1045   */
1042 1046  /* ARGSUSED1 */
1043 1047  static int
1044 1048  conn_opt_set_socket(conn_opt_arg_t *coa, t_scalar_t name, uint_t inlen,
1045 1049      uchar_t *invalp, boolean_t checkonly, cred_t *cr)
1046 1050  {
1047 1051          conn_t          *connp = coa->coa_connp;
1048 1052          ip_xmit_attr_t  *ixa = coa->coa_ixa;
1049 1053          int             *i1 = (int *)invalp;
1050 1054          boolean_t       onoff = (*i1 == 0) ? 0 : 1;
1051 1055  
1052 1056          switch (name) {
1053 1057          case SO_ALLZONES:
1054 1058                  if (IPCL_IS_BOUND(connp))
1055 1059                          return (EINVAL);
1056 1060                  break;
1057 1061          case SO_VRRP:
1058 1062                  if (secpolicy_ip_config(cr, checkonly) != 0)
1059 1063                          return (EACCES);
1060 1064                  break;
1061 1065          case SO_MAC_EXEMPT:
1062 1066                  if (secpolicy_net_mac_aware(cr) != 0)
1063 1067                          return (EACCES);
1064 1068                  if (IPCL_IS_BOUND(connp))
1065 1069                          return (EINVAL);
1066 1070                  break;
1067 1071          case SO_MAC_IMPLICIT:
1068 1072                  if (secpolicy_net_mac_implicit(cr) != 0)
1069 1073                          return (EACCES);
1070 1074                  break;
1071 1075          }
1072 1076          if (checkonly)
1073 1077                  return (0);
1074 1078  
1075 1079          mutex_enter(&connp->conn_lock);
1076 1080          /* Here we set the actual option value */
1077 1081          switch (name) {
1078 1082          case SO_DEBUG:
1079 1083                  connp->conn_debug = onoff;
1080 1084                  break;
1081 1085          case SO_KEEPALIVE:
1082 1086                  connp->conn_keepalive = onoff;
1083 1087                  break;
1084 1088          case SO_LINGER: {
1085 1089                  struct linger *lgr = (struct linger *)invalp;
1086 1090  
1087 1091                  if (lgr->l_onoff) {
1088 1092                          connp->conn_linger = 1;
1089 1093                          connp->conn_lingertime = lgr->l_linger;
1090 1094                  } else {
1091 1095                          connp->conn_linger = 0;
1092 1096                          connp->conn_lingertime = 0;
1093 1097                  }
1094 1098                  break;
1095 1099          }
1096 1100          case SO_OOBINLINE:
1097 1101                  connp->conn_oobinline = onoff;
1098 1102                  coa->coa_changed |= COA_OOBINLINE_CHANGED;
1099 1103                  break;
1100 1104          case SO_REUSEADDR:
1101 1105                  connp->conn_reuseaddr = onoff;
1102 1106                  break;
1103 1107          case SO_DONTROUTE:
1104 1108                  if (onoff)
1105 1109                          ixa->ixa_flags |= IXAF_DONTROUTE;
1106 1110                  else
1107 1111                          ixa->ixa_flags &= ~IXAF_DONTROUTE;
1108 1112                  coa->coa_changed |= COA_ROUTE_CHANGED;
1109 1113                  break;
1110 1114          case SO_USELOOPBACK:
1111 1115                  connp->conn_useloopback = onoff;
1112 1116                  break;
1113 1117          case SO_BROADCAST:
1114 1118                  connp->conn_broadcast = onoff;
1115 1119                  break;
1116 1120          case SO_SNDBUF:
1117 1121                  /* ULP has range checked the value */
1118 1122                  connp->conn_sndbuf = *i1;
1119 1123                  coa->coa_changed |= COA_SNDBUF_CHANGED;
1120 1124                  break;
1121 1125          case SO_RCVBUF:
1122 1126                  /* ULP has range checked the value */
1123 1127                  connp->conn_rcvbuf = *i1;
1124 1128                  coa->coa_changed |= COA_RCVBUF_CHANGED;
1125 1129                  break;
1126 1130          case SO_RCVTIMEO:
1127 1131          case SO_SNDTIMEO:
1128 1132                  /*
1129 1133                   * Pass these two options in order for third part
1130 1134                   * protocol usage.
1131 1135                   */
1132 1136                  break;
1133 1137          case SO_DGRAM_ERRIND:
1134 1138                  connp->conn_dgram_errind = onoff;
1135 1139                  break;
1136 1140          case SO_RECVUCRED:
1137 1141                  connp->conn_recv_ancillary.crb_recvucred = onoff;
1138 1142                  break;
1139 1143          case SO_ALLZONES:
1140 1144                  connp->conn_allzones = onoff;
1141 1145                  coa->coa_changed |= COA_ROUTE_CHANGED;
1142 1146                  if (onoff)
1143 1147                          ixa->ixa_zoneid = ALL_ZONES;
1144 1148                  else
1145 1149                          ixa->ixa_zoneid = connp->conn_zoneid;
1146 1150                  break;
1147 1151          case SO_TIMESTAMP:
1148 1152                  connp->conn_recv_ancillary.crb_timestamp = onoff;
1149 1153                  break;
1150 1154          case SO_VRRP:
1151 1155                  connp->conn_isvrrp = onoff;
1152 1156                  break;
1153 1157          case SO_ANON_MLP:
1154 1158                  connp->conn_anon_mlp = onoff;
1155 1159                  break;
1156 1160          case SO_MAC_EXEMPT:
1157 1161                  connp->conn_mac_mode = onoff ?
1158 1162                      CONN_MAC_AWARE : CONN_MAC_DEFAULT;
1159 1163                  break;
1160 1164          case SO_MAC_IMPLICIT:
1161 1165                  connp->conn_mac_mode = onoff ?
1162 1166                      CONN_MAC_IMPLICIT : CONN_MAC_DEFAULT;
1163 1167                  break;
1164 1168          case SO_EXCLBIND:
1165 1169                  connp->conn_exclbind = onoff;
1166 1170                  break;
1167 1171          }
1168 1172          mutex_exit(&connp->conn_lock);
1169 1173          return (0);
1170 1174  }
1171 1175  
1172 1176  /* Handle IPPROTO_IP */
1173 1177  static int
1174 1178  conn_opt_set_ip(conn_opt_arg_t *coa, t_scalar_t name, uint_t inlen,
1175 1179      uchar_t *invalp, boolean_t checkonly, cred_t *cr)
1176 1180  {
1177 1181          conn_t          *connp = coa->coa_connp;
1178 1182          ip_xmit_attr_t  *ixa = coa->coa_ixa;
  
    | 
      ↓ open down ↓ | 
    547 lines elided | 
    
      ↑ open up ↑ | 
  
1179 1183          ip_pkt_t        *ipp = coa->coa_ipp;
1180 1184          int             *i1 = (int *)invalp;
1181 1185          boolean_t       onoff = (*i1 == 0) ? 0 : 1;
1182 1186          ipaddr_t        addr = (ipaddr_t)*i1;
1183 1187          uint_t          ifindex;
1184 1188          zoneid_t        zoneid = IPCL_ZONEID(connp);
1185 1189          ipif_t          *ipif;
1186 1190          ip_stack_t      *ipst = connp->conn_netstack->netstack_ip;
1187 1191          int             error;
1188 1192  
1189      -        if (connp->conn_family != AF_INET)
     1193 +        if (connp->conn_family == AF_INET6 &&
     1194 +            connp->conn_ipversion == IPV4_VERSION) {
     1195 +                /*
     1196 +                 * Allow certain IPv4 options to be set on an AF_INET6 socket
     1197 +                 * if the connection is still IPv4.
     1198 +                 */
     1199 +                switch (name) {
     1200 +                case IP_TOS:
     1201 +                case T_IP_TOS:
     1202 +                case IP_TTL:
     1203 +                case IP_DONTFRAG:
     1204 +                        break;
     1205 +                default:
     1206 +                        return (EINVAL);
     1207 +                }
     1208 +        } else if (connp->conn_family != AF_INET) {
1190 1209                  return (EINVAL);
     1210 +        }
1191 1211  
1192 1212          switch (name) {
1193 1213          case IP_TTL:
1194 1214                  /* Don't allow zero */
1195 1215                  if (*i1 < 1 || *i1 > 255)
1196 1216                          return (EINVAL);
1197 1217                  break;
1198 1218          case IP_MULTICAST_IF:
1199 1219                  if (addr == INADDR_ANY) {
1200 1220                          /* Clear */
1201 1221                          ifindex = 0;
1202 1222                          break;
1203 1223                  }
1204 1224                  ipif = ipif_lookup_addr(addr, NULL, zoneid, ipst);
1205 1225                  if (ipif == NULL)
1206 1226                          return (EHOSTUNREACH);
1207 1227                  /* not supported by the virtual network iface */
1208 1228                  if (IS_VNI(ipif->ipif_ill)) {
1209 1229                          ipif_refrele(ipif);
1210 1230                          return (EINVAL);
1211 1231                  }
1212 1232                  ifindex = ipif->ipif_ill->ill_phyint->phyint_ifindex;
1213 1233                  ipif_refrele(ipif);
1214 1234                  break;
1215 1235          case IP_NEXTHOP: {
1216 1236                  ire_t   *ire;
1217 1237  
1218 1238                  if (addr == INADDR_ANY) {
1219 1239                          /* Clear */
1220 1240                          break;
1221 1241                  }
1222 1242                  /* Verify that the next-hop is on-link */
1223 1243                  ire = ire_ftable_lookup_v4(addr, 0, 0, IRE_ONLINK, NULL, zoneid,
1224 1244                      NULL, MATCH_IRE_TYPE, 0, ipst, NULL);
1225 1245                  if (ire == NULL)
1226 1246                          return (EHOSTUNREACH);
1227 1247                  ire_refrele(ire);
1228 1248                  break;
1229 1249          }
1230 1250          case IP_OPTIONS:
1231 1251          case T_IP_OPTIONS: {
1232 1252                  uint_t newlen;
1233 1253  
1234 1254                  if (ipp->ipp_fields & IPPF_LABEL_V4)
1235 1255                          newlen = inlen + (ipp->ipp_label_len_v4 + 3) & ~3;
1236 1256                  else
1237 1257                          newlen = inlen;
1238 1258                  if ((inlen & 0x3) || newlen > IP_MAX_OPT_LENGTH) {
1239 1259                          return (EINVAL);
1240 1260                  }
1241 1261                  break;
1242 1262          }
1243 1263          case IP_PKTINFO: {
1244 1264                  struct in_pktinfo *pktinfo;
1245 1265  
1246 1266                  /* Two different valid lengths */
1247 1267                  if (inlen != sizeof (int) &&
1248 1268                      inlen != sizeof (struct in_pktinfo))
1249 1269                          return (EINVAL);
1250 1270                  if (inlen == sizeof (int))
1251 1271                          break;
1252 1272  
1253 1273                  pktinfo = (struct in_pktinfo *)invalp;
1254 1274                  if (pktinfo->ipi_spec_dst.s_addr != INADDR_ANY) {
1255 1275                          switch (ip_laddr_verify_v4(pktinfo->ipi_spec_dst.s_addr,
1256 1276                              zoneid, ipst, B_FALSE)) {
1257 1277                          case IPVL_UNICAST_UP:
1258 1278                          case IPVL_UNICAST_DOWN:
1259 1279                                  break;
1260 1280                          default:
1261 1281                                  return (EADDRNOTAVAIL);
1262 1282                          }
1263 1283                  }
1264 1284                  if (!ip_xmit_ifindex_valid(pktinfo->ipi_ifindex, zoneid,
1265 1285                      B_FALSE, ipst))
1266 1286                          return (ENXIO);
1267 1287                  break;
1268 1288          }
1269 1289          case IP_BOUND_IF:
1270 1290                  ifindex = *(uint_t *)i1;
1271 1291  
1272 1292                  /* Just check it is ok. */
1273 1293                  if (!ip_xmit_ifindex_valid(ifindex, zoneid, B_FALSE, ipst))
1274 1294                          return (ENXIO);
1275 1295                  break;
1276 1296          }
1277 1297          if (checkonly)
1278 1298                  return (0);
1279 1299  
1280 1300          /* Here we set the actual option value */
1281 1301          /*
1282 1302           * conn_lock protects the bitfields, and is used to
1283 1303           * set the fields atomically. Not needed for ixa settings since
1284 1304           * the caller has an exclusive copy of the ixa.
1285 1305           * We can not hold conn_lock across the multicast options though.
1286 1306           */
1287 1307          switch (name) {
1288 1308          case IP_OPTIONS:
1289 1309          case T_IP_OPTIONS:
1290 1310                  /* Save options for use by IP. */
1291 1311                  mutex_enter(&connp->conn_lock);
1292 1312                  error = optcom_pkt_set(invalp, inlen,
1293 1313                      (uchar_t **)&ipp->ipp_ipv4_options,
1294 1314                      &ipp->ipp_ipv4_options_len);
1295 1315                  if (error != 0) {
1296 1316                          mutex_exit(&connp->conn_lock);
1297 1317                          return (error);
1298 1318                  }
1299 1319                  if (ipp->ipp_ipv4_options_len == 0) {
1300 1320                          ipp->ipp_fields &= ~IPPF_IPV4_OPTIONS;
1301 1321                  } else {
1302 1322                          ipp->ipp_fields |= IPPF_IPV4_OPTIONS;
1303 1323                  }
1304 1324                  mutex_exit(&connp->conn_lock);
1305 1325                  coa->coa_changed |= COA_HEADER_CHANGED;
1306 1326                  coa->coa_changed |= COA_WROFF_CHANGED;
1307 1327                  break;
1308 1328  
1309 1329          case IP_TTL:
1310 1330                  mutex_enter(&connp->conn_lock);
1311 1331                  ipp->ipp_unicast_hops = *i1;
1312 1332                  mutex_exit(&connp->conn_lock);
1313 1333                  coa->coa_changed |= COA_HEADER_CHANGED;
1314 1334                  break;
1315 1335          case IP_TOS:
1316 1336          case T_IP_TOS:
1317 1337                  mutex_enter(&connp->conn_lock);
1318 1338                  if (*i1 == -1) {
1319 1339                          ipp->ipp_type_of_service = 0;
1320 1340                  } else {
1321 1341                          ipp->ipp_type_of_service = *i1;
1322 1342                  }
1323 1343                  mutex_exit(&connp->conn_lock);
1324 1344                  coa->coa_changed |= COA_HEADER_CHANGED;
1325 1345                  break;
1326 1346          case IP_MULTICAST_IF:
1327 1347                  ixa->ixa_multicast_ifindex = ifindex;
1328 1348                  ixa->ixa_multicast_ifaddr = addr;
1329 1349                  coa->coa_changed |= COA_ROUTE_CHANGED;
1330 1350                  break;
1331 1351          case IP_MULTICAST_TTL:
1332 1352                  ixa->ixa_multicast_ttl = *invalp;
1333 1353                  /* Handled automatically by ip_output */
1334 1354                  break;
1335 1355          case IP_MULTICAST_LOOP:
1336 1356                  if (*invalp != 0)
1337 1357                          ixa->ixa_flags |= IXAF_MULTICAST_LOOP;
1338 1358                  else
1339 1359                          ixa->ixa_flags &= ~IXAF_MULTICAST_LOOP;
1340 1360                  /* Handled automatically by ip_output */
1341 1361                  break;
1342 1362          case IP_RECVOPTS:
1343 1363                  mutex_enter(&connp->conn_lock);
1344 1364                  connp->conn_recv_ancillary.crb_recvopts = onoff;
1345 1365                  mutex_exit(&connp->conn_lock);
1346 1366                  break;
1347 1367          case IP_RECVDSTADDR:
1348 1368                  mutex_enter(&connp->conn_lock);
1349 1369                  connp->conn_recv_ancillary.crb_recvdstaddr = onoff;
1350 1370                  mutex_exit(&connp->conn_lock);
1351 1371                  break;
1352 1372          case IP_RECVIF:
1353 1373                  mutex_enter(&connp->conn_lock);
1354 1374                  connp->conn_recv_ancillary.crb_recvif = onoff;
1355 1375                  mutex_exit(&connp->conn_lock);
1356 1376                  break;
1357 1377          case IP_RECVSLLA:
1358 1378                  mutex_enter(&connp->conn_lock);
1359 1379                  connp->conn_recv_ancillary.crb_recvslla = onoff;
1360 1380                  mutex_exit(&connp->conn_lock);
1361 1381                  break;
1362 1382          case IP_RECVTTL:
1363 1383                  mutex_enter(&connp->conn_lock);
1364 1384                  connp->conn_recv_ancillary.crb_recvttl = onoff;
1365 1385                  mutex_exit(&connp->conn_lock);
1366 1386                  break;
1367 1387          case IP_PKTINFO: {
1368 1388                  /*
1369 1389                   * This also handles IP_RECVPKTINFO.
1370 1390                   * IP_PKTINFO and IP_RECVPKTINFO have same value.
1371 1391                   * Differentiation is based on the size of the
1372 1392                   * argument passed in.
1373 1393                   */
1374 1394                  struct in_pktinfo *pktinfo;
1375 1395  
1376 1396                  if (inlen == sizeof (int)) {
1377 1397                          /* This is IP_RECVPKTINFO option. */
1378 1398                          mutex_enter(&connp->conn_lock);
1379 1399                          connp->conn_recv_ancillary.crb_ip_recvpktinfo =
1380 1400                              onoff;
1381 1401                          mutex_exit(&connp->conn_lock);
1382 1402                          break;
1383 1403                  }
1384 1404  
1385 1405                  /* This is IP_PKTINFO option. */
1386 1406                  mutex_enter(&connp->conn_lock);
1387 1407                  pktinfo = (struct in_pktinfo *)invalp;
1388 1408                  if (pktinfo->ipi_spec_dst.s_addr != INADDR_ANY) {
1389 1409                          ipp->ipp_fields |= IPPF_ADDR;
1390 1410                          IN6_INADDR_TO_V4MAPPED(&pktinfo->ipi_spec_dst,
1391 1411                              &ipp->ipp_addr);
1392 1412                  } else {
1393 1413                          ipp->ipp_fields &= ~IPPF_ADDR;
1394 1414                          ipp->ipp_addr = ipv6_all_zeros;
1395 1415                  }
1396 1416                  mutex_exit(&connp->conn_lock);
1397 1417                  ixa->ixa_ifindex = pktinfo->ipi_ifindex;
1398 1418                  coa->coa_changed |= COA_ROUTE_CHANGED;
1399 1419                  coa->coa_changed |= COA_HEADER_CHANGED;
1400 1420                  break;
1401 1421          }
1402 1422          case IP_DONTFRAG:
1403 1423                  if (onoff) {
1404 1424                          ixa->ixa_flags |= (IXAF_DONTFRAG | IXAF_PMTU_IPV4_DF);
1405 1425                          ixa->ixa_flags &= ~IXAF_PMTU_DISCOVERY;
1406 1426                  } else {
1407 1427                          ixa->ixa_flags &= ~(IXAF_DONTFRAG | IXAF_PMTU_IPV4_DF);
1408 1428                          ixa->ixa_flags |= IXAF_PMTU_DISCOVERY;
1409 1429                  }
1410 1430                  /* Need to redo ip_attr_connect */
1411 1431                  coa->coa_changed |= COA_ROUTE_CHANGED;
1412 1432                  break;
1413 1433          case IP_ADD_MEMBERSHIP:
1414 1434          case IP_DROP_MEMBERSHIP:
1415 1435          case MCAST_JOIN_GROUP:
1416 1436          case MCAST_LEAVE_GROUP:
1417 1437                  return (ip_opt_set_multicast_group(connp, name,
1418 1438                      invalp, B_FALSE, checkonly));
1419 1439  
1420 1440          case IP_BLOCK_SOURCE:
1421 1441          case IP_UNBLOCK_SOURCE:
1422 1442          case IP_ADD_SOURCE_MEMBERSHIP:
1423 1443          case IP_DROP_SOURCE_MEMBERSHIP:
1424 1444          case MCAST_BLOCK_SOURCE:
1425 1445          case MCAST_UNBLOCK_SOURCE:
1426 1446          case MCAST_JOIN_SOURCE_GROUP:
1427 1447          case MCAST_LEAVE_SOURCE_GROUP:
1428 1448                  return (ip_opt_set_multicast_sources(connp, name,
1429 1449                      invalp, B_FALSE, checkonly));
1430 1450  
1431 1451          case IP_SEC_OPT:
1432 1452                  mutex_enter(&connp->conn_lock);
1433 1453                  error = ipsec_set_req(cr, connp, (ipsec_req_t *)invalp);
1434 1454                  mutex_exit(&connp->conn_lock);
1435 1455                  if (error != 0) {
1436 1456                          return (error);
1437 1457                  }
1438 1458                  /* This is an IPsec policy change - redo ip_attr_connect */
1439 1459                  coa->coa_changed |= COA_ROUTE_CHANGED;
1440 1460                  break;
1441 1461          case IP_NEXTHOP:
1442 1462                  ixa->ixa_nexthop_v4 = addr;
1443 1463                  if (addr != INADDR_ANY)
1444 1464                          ixa->ixa_flags |= IXAF_NEXTHOP_SET;
1445 1465                  else
1446 1466                          ixa->ixa_flags &= ~IXAF_NEXTHOP_SET;
1447 1467                  coa->coa_changed |= COA_ROUTE_CHANGED;
1448 1468                  break;
1449 1469  
1450 1470          case IP_BOUND_IF:
1451 1471                  ixa->ixa_ifindex = ifindex;             /* Send */
1452 1472                  mutex_enter(&connp->conn_lock);
1453 1473                  connp->conn_incoming_ifindex = ifindex; /* Receive */
1454 1474                  connp->conn_bound_if = ifindex;         /* getsockopt */
1455 1475                  mutex_exit(&connp->conn_lock);
1456 1476                  coa->coa_changed |= COA_ROUTE_CHANGED;
1457 1477                  break;
1458 1478          case IP_UNSPEC_SRC:
1459 1479                  mutex_enter(&connp->conn_lock);
1460 1480                  connp->conn_unspec_src = onoff;
1461 1481                  if (onoff)
1462 1482                          ixa->ixa_flags &= ~IXAF_VERIFY_SOURCE;
1463 1483                  else
1464 1484                          ixa->ixa_flags |= IXAF_VERIFY_SOURCE;
1465 1485  
1466 1486                  mutex_exit(&connp->conn_lock);
1467 1487                  break;
1468 1488          case IP_BROADCAST_TTL:
1469 1489                  ixa->ixa_broadcast_ttl = *invalp;
1470 1490                  ixa->ixa_flags |= IXAF_BROADCAST_TTL_SET;
1471 1491                  /* Handled automatically by ip_output */
1472 1492                  break;
1473 1493          case MRT_INIT:
1474 1494          case MRT_DONE:
1475 1495          case MRT_ADD_VIF:
1476 1496          case MRT_DEL_VIF:
1477 1497          case MRT_ADD_MFC:
1478 1498          case MRT_DEL_MFC:
1479 1499          case MRT_ASSERT:
1480 1500                  if ((error = secpolicy_ip_config(cr, B_FALSE)) != 0) {
1481 1501                          return (error);
1482 1502                  }
1483 1503                  error = ip_mrouter_set((int)name, connp, checkonly,
1484 1504                      (uchar_t *)invalp, inlen);
1485 1505                  if (error) {
1486 1506                          return (error);
1487 1507                  }
1488 1508                  return (0);
1489 1509  
1490 1510          }
1491 1511          return (0);
1492 1512  }
1493 1513  
1494 1514  /* Handle IPPROTO_IPV6 */
1495 1515  static int
1496 1516  conn_opt_set_ipv6(conn_opt_arg_t *coa, t_scalar_t name, uint_t inlen,
1497 1517      uchar_t *invalp, boolean_t checkonly, cred_t *cr)
1498 1518  {
1499 1519          conn_t          *connp = coa->coa_connp;
1500 1520          ip_xmit_attr_t  *ixa = coa->coa_ixa;
1501 1521          ip_pkt_t        *ipp = coa->coa_ipp;
1502 1522          int             *i1 = (int *)invalp;
1503 1523          boolean_t       onoff = (*i1 == 0) ? 0 : 1;
1504 1524          uint_t          ifindex;
1505 1525          zoneid_t        zoneid = IPCL_ZONEID(connp);
1506 1526          ip_stack_t      *ipst = connp->conn_netstack->netstack_ip;
1507 1527          int             error;
1508 1528  
1509 1529          if (connp->conn_family != AF_INET6)
1510 1530                  return (EINVAL);
1511 1531  
1512 1532          switch (name) {
1513 1533          case IPV6_MULTICAST_IF:
1514 1534                  /*
1515 1535                   * The only possible error is EINVAL.
1516 1536                   * We call this option on both V4 and V6
1517 1537                   * If both fail, then this call returns
1518 1538                   * EINVAL. If at least one of them succeeds we
1519 1539                   * return success.
1520 1540                   */
1521 1541                  ifindex = *(uint_t *)i1;
1522 1542  
1523 1543                  if (!ip_xmit_ifindex_valid(ifindex, zoneid, B_TRUE, ipst) &&
1524 1544                      !ip_xmit_ifindex_valid(ifindex, zoneid, B_FALSE, ipst))
1525 1545                          return (EINVAL);
1526 1546                  break;
1527 1547          case IPV6_UNICAST_HOPS:
1528 1548                  /* Don't allow zero. -1 means to use default */
1529 1549                  if (*i1 < -1 || *i1 == 0 || *i1 > IPV6_MAX_HOPS)
1530 1550                          return (EINVAL);
1531 1551                  break;
1532 1552          case IPV6_MULTICAST_HOPS:
1533 1553                  /* -1 means use default */
1534 1554                  if (*i1 < -1 || *i1 > IPV6_MAX_HOPS)
1535 1555                          return (EINVAL);
1536 1556                  break;
1537 1557          case IPV6_MULTICAST_LOOP:
1538 1558                  if (*i1 != 0 && *i1 != 1)
1539 1559                          return (EINVAL);
1540 1560                  break;
1541 1561          case IPV6_BOUND_IF:
1542 1562                  ifindex = *(uint_t *)i1;
1543 1563  
1544 1564                  if (!ip_xmit_ifindex_valid(ifindex, zoneid, B_TRUE, ipst))
1545 1565                          return (ENXIO);
1546 1566                  break;
1547 1567          case IPV6_PKTINFO: {
1548 1568                  struct in6_pktinfo *pkti;
1549 1569                  boolean_t isv6;
1550 1570  
1551 1571                  if (inlen != 0 && inlen != sizeof (struct in6_pktinfo))
1552 1572                          return (EINVAL);
1553 1573                  if (inlen == 0)
1554 1574                          break;  /* Clear values below */
1555 1575  
1556 1576                  /*
1557 1577                   * Verify the source address and ifindex. Privileged users
1558 1578                   * can use any source address.
1559 1579                   */
1560 1580                  pkti = (struct in6_pktinfo *)invalp;
1561 1581  
1562 1582                  /*
1563 1583                   * For link-local addresses we use the ipi6_ifindex when
1564 1584                   * we verify the local address.
1565 1585                   * If net_rawaccess then any source address can be used.
1566 1586                   */
1567 1587                  if (!IN6_IS_ADDR_UNSPECIFIED(&pkti->ipi6_addr) &&
1568 1588                      secpolicy_net_rawaccess(cr) != 0) {
1569 1589                          uint_t scopeid = 0;
1570 1590                          in6_addr_t *v6src = &pkti->ipi6_addr;
1571 1591                          ipaddr_t v4src;
1572 1592                          ip_laddr_t laddr_type = IPVL_UNICAST_UP;
1573 1593  
1574 1594                          if (IN6_IS_ADDR_V4MAPPED(v6src)) {
1575 1595                                  IN6_V4MAPPED_TO_IPADDR(v6src, v4src);
1576 1596                                  if (v4src != INADDR_ANY) {
1577 1597                                          laddr_type = ip_laddr_verify_v4(v4src,
1578 1598                                              zoneid, ipst, B_FALSE);
1579 1599                                  }
1580 1600                          } else {
1581 1601                                  if (IN6_IS_ADDR_LINKSCOPE(v6src))
1582 1602                                          scopeid = pkti->ipi6_ifindex;
1583 1603  
1584 1604                                  laddr_type = ip_laddr_verify_v6(v6src, zoneid,
1585 1605                                      ipst, B_FALSE, scopeid);
1586 1606                          }
1587 1607                          switch (laddr_type) {
1588 1608                          case IPVL_UNICAST_UP:
1589 1609                          case IPVL_UNICAST_DOWN:
1590 1610                                  break;
1591 1611                          default:
1592 1612                                  return (EADDRNOTAVAIL);
1593 1613                          }
1594 1614                          ixa->ixa_flags |= IXAF_VERIFY_SOURCE;
1595 1615                  } else if (!IN6_IS_ADDR_UNSPECIFIED(&pkti->ipi6_addr)) {
1596 1616                          /* Allow any source */
1597 1617                          ixa->ixa_flags &= ~IXAF_VERIFY_SOURCE;
1598 1618                  }
1599 1619                  isv6 = !(IN6_IS_ADDR_V4MAPPED(&pkti->ipi6_addr));
1600 1620                  if (!ip_xmit_ifindex_valid(pkti->ipi6_ifindex, zoneid, isv6,
1601 1621                      ipst))
1602 1622                          return (ENXIO);
1603 1623                  break;
1604 1624          }
1605 1625          case IPV6_HOPLIMIT:
1606 1626                  /* It is only allowed as ancilary data */
1607 1627                  if (!coa->coa_ancillary)
1608 1628                          return (EINVAL);
1609 1629  
1610 1630                  if (inlen != 0 && inlen != sizeof (int))
1611 1631                          return (EINVAL);
1612 1632                  if (inlen == sizeof (int)) {
1613 1633                          if (*i1 > 255 || *i1 < -1 || *i1 == 0)
1614 1634                                  return (EINVAL);
1615 1635                  }
1616 1636                  break;
1617 1637          case IPV6_TCLASS:
1618 1638                  if (inlen != 0 && inlen != sizeof (int))
1619 1639                          return (EINVAL);
1620 1640                  if (inlen == sizeof (int)) {
1621 1641                          if (*i1 > 255 || *i1 < -1)
1622 1642                                  return (EINVAL);
1623 1643                  }
1624 1644                  break;
1625 1645          case IPV6_NEXTHOP:
1626 1646                  if (inlen != 0 && inlen != sizeof (sin6_t))
1627 1647                          return (EINVAL);
1628 1648                  if (inlen == sizeof (sin6_t)) {
1629 1649                          sin6_t *sin6 = (sin6_t *)invalp;
1630 1650                          ire_t   *ire;
1631 1651  
1632 1652                          if (sin6->sin6_family != AF_INET6)
1633 1653                                  return (EAFNOSUPPORT);
1634 1654                          if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr))
1635 1655                                  return (EADDRNOTAVAIL);
1636 1656  
1637 1657                          /* Verify that the next-hop is on-link */
1638 1658                          ire = ire_ftable_lookup_v6(&sin6->sin6_addr,
1639 1659                              0, 0, IRE_ONLINK, NULL, zoneid,
1640 1660                              NULL, MATCH_IRE_TYPE, 0, ipst, NULL);
1641 1661                          if (ire == NULL)
1642 1662                                  return (EHOSTUNREACH);
1643 1663                          ire_refrele(ire);
1644 1664                          break;
1645 1665                  }
1646 1666                  break;
1647 1667          case IPV6_RTHDR:
1648 1668          case IPV6_DSTOPTS:
1649 1669          case IPV6_RTHDRDSTOPTS:
1650 1670          case IPV6_HOPOPTS: {
1651 1671                  /* All have the length field in the same place */
1652 1672                  ip6_hbh_t *hopts = (ip6_hbh_t *)invalp;
1653 1673                  /*
1654 1674                   * Sanity checks - minimum size, size a multiple of
1655 1675                   * eight bytes, and matching size passed in.
1656 1676                   */
1657 1677                  if (inlen != 0 &&
1658 1678                      inlen != (8 * (hopts->ip6h_len + 1)))
1659 1679                          return (EINVAL);
1660 1680                  break;
1661 1681          }
1662 1682          case IPV6_PATHMTU:
1663 1683                  /* Can't be set */
1664 1684                  return (EINVAL);
1665 1685  
1666 1686          case IPV6_USE_MIN_MTU:
1667 1687                  if (inlen != sizeof (int))
1668 1688                          return (EINVAL);
1669 1689                  if (*i1 < -1 || *i1 > 1)
1670 1690                          return (EINVAL);
1671 1691                  break;
1672 1692          case IPV6_SRC_PREFERENCES:
1673 1693                  if (inlen != sizeof (uint32_t))
1674 1694                          return (EINVAL);
1675 1695                  break;
1676 1696          case IPV6_V6ONLY:
1677 1697                  if (*i1 < 0 || *i1 > 1) {
1678 1698                          return (EINVAL);
1679 1699                  }
1680 1700                  break;
1681 1701          }
1682 1702          if (checkonly)
1683 1703                  return (0);
1684 1704  
1685 1705          /* Here we set the actual option value */
1686 1706          /*
1687 1707           * conn_lock protects the bitfields, and is used to
1688 1708           * set the fields atomically. Not needed for ixa settings since
1689 1709           * the caller has an exclusive copy of the ixa.
1690 1710           * We can not hold conn_lock across the multicast options though.
1691 1711           */
1692 1712          ASSERT(MUTEX_NOT_HELD(&coa->coa_connp->conn_lock));
1693 1713          switch (name) {
1694 1714          case IPV6_MULTICAST_IF:
1695 1715                  ixa->ixa_multicast_ifindex = ifindex;
1696 1716                  /* Need to redo ip_attr_connect */
1697 1717                  coa->coa_changed |= COA_ROUTE_CHANGED;
1698 1718                  break;
1699 1719          case IPV6_UNICAST_HOPS:
1700 1720                  /* -1 means use default */
1701 1721                  mutex_enter(&connp->conn_lock);
1702 1722                  if (*i1 == -1) {
1703 1723                          ipp->ipp_unicast_hops = connp->conn_default_ttl;
1704 1724                  } else {
1705 1725                          ipp->ipp_unicast_hops = (uint8_t)*i1;
1706 1726                  }
1707 1727                  mutex_exit(&connp->conn_lock);
1708 1728                  coa->coa_changed |= COA_HEADER_CHANGED;
1709 1729                  break;
1710 1730          case IPV6_MULTICAST_HOPS:
1711 1731                  /* -1 means use default */
1712 1732                  if (*i1 == -1) {
1713 1733                          ixa->ixa_multicast_ttl = IP_DEFAULT_MULTICAST_TTL;
1714 1734                  } else {
1715 1735                          ixa->ixa_multicast_ttl = (uint8_t)*i1;
1716 1736                  }
1717 1737                  /* Handled automatically by ip_output */
1718 1738                  break;
1719 1739          case IPV6_MULTICAST_LOOP:
1720 1740                  if (*i1 != 0)
1721 1741                          ixa->ixa_flags |= IXAF_MULTICAST_LOOP;
1722 1742                  else
1723 1743                          ixa->ixa_flags &= ~IXAF_MULTICAST_LOOP;
1724 1744                  /* Handled automatically by ip_output */
1725 1745                  break;
1726 1746          case IPV6_JOIN_GROUP:
1727 1747          case IPV6_LEAVE_GROUP:
1728 1748          case MCAST_JOIN_GROUP:
1729 1749          case MCAST_LEAVE_GROUP:
1730 1750                  return (ip_opt_set_multicast_group(connp, name,
1731 1751                      invalp, B_TRUE, checkonly));
1732 1752  
1733 1753          case MCAST_BLOCK_SOURCE:
1734 1754          case MCAST_UNBLOCK_SOURCE:
1735 1755          case MCAST_JOIN_SOURCE_GROUP:
1736 1756          case MCAST_LEAVE_SOURCE_GROUP:
1737 1757                  return (ip_opt_set_multicast_sources(connp, name,
1738 1758                      invalp, B_TRUE, checkonly));
1739 1759  
1740 1760          case IPV6_BOUND_IF:
1741 1761                  ixa->ixa_ifindex = ifindex;             /* Send */
1742 1762                  mutex_enter(&connp->conn_lock);
1743 1763                  connp->conn_incoming_ifindex = ifindex; /* Receive */
1744 1764                  connp->conn_bound_if = ifindex;         /* getsockopt */
1745 1765                  mutex_exit(&connp->conn_lock);
1746 1766                  coa->coa_changed |= COA_ROUTE_CHANGED;
1747 1767                  break;
1748 1768          case IPV6_UNSPEC_SRC:
1749 1769                  mutex_enter(&connp->conn_lock);
1750 1770                  connp->conn_unspec_src = onoff;
1751 1771                  if (onoff)
1752 1772                          ixa->ixa_flags &= ~IXAF_VERIFY_SOURCE;
1753 1773                  else
1754 1774                          ixa->ixa_flags |= IXAF_VERIFY_SOURCE;
1755 1775                  mutex_exit(&connp->conn_lock);
1756 1776                  break;
1757 1777          case IPV6_RECVPKTINFO:
1758 1778                  mutex_enter(&connp->conn_lock);
1759 1779                  connp->conn_recv_ancillary.crb_ip_recvpktinfo = onoff;
1760 1780                  mutex_exit(&connp->conn_lock);
1761 1781                  break;
1762 1782          case IPV6_RECVTCLASS:
1763 1783                  mutex_enter(&connp->conn_lock);
1764 1784                  connp->conn_recv_ancillary.crb_ipv6_recvtclass = onoff;
1765 1785                  mutex_exit(&connp->conn_lock);
1766 1786                  break;
1767 1787          case IPV6_RECVPATHMTU:
1768 1788                  mutex_enter(&connp->conn_lock);
1769 1789                  connp->conn_ipv6_recvpathmtu = onoff;
1770 1790                  mutex_exit(&connp->conn_lock);
1771 1791                  break;
1772 1792          case IPV6_RECVHOPLIMIT:
1773 1793                  mutex_enter(&connp->conn_lock);
1774 1794                  connp->conn_recv_ancillary.crb_ipv6_recvhoplimit =
1775 1795                      onoff;
1776 1796                  mutex_exit(&connp->conn_lock);
1777 1797                  break;
1778 1798          case IPV6_RECVHOPOPTS:
1779 1799                  mutex_enter(&connp->conn_lock);
1780 1800                  connp->conn_recv_ancillary.crb_ipv6_recvhopopts = onoff;
1781 1801                  mutex_exit(&connp->conn_lock);
1782 1802                  break;
1783 1803          case IPV6_RECVDSTOPTS:
1784 1804                  mutex_enter(&connp->conn_lock);
1785 1805                  connp->conn_recv_ancillary.crb_ipv6_recvdstopts = onoff;
1786 1806                  mutex_exit(&connp->conn_lock);
1787 1807                  break;
1788 1808          case _OLD_IPV6_RECVDSTOPTS:
1789 1809                  mutex_enter(&connp->conn_lock);
1790 1810                  connp->conn_recv_ancillary.crb_old_ipv6_recvdstopts =
1791 1811                      onoff;
1792 1812                  mutex_exit(&connp->conn_lock);
1793 1813                  break;
1794 1814          case IPV6_RECVRTHDRDSTOPTS:
1795 1815                  mutex_enter(&connp->conn_lock);
1796 1816                  connp->conn_recv_ancillary.crb_ipv6_recvrthdrdstopts =
1797 1817                      onoff;
1798 1818                  mutex_exit(&connp->conn_lock);
1799 1819                  break;
1800 1820          case IPV6_RECVRTHDR:
1801 1821                  mutex_enter(&connp->conn_lock);
1802 1822                  connp->conn_recv_ancillary.crb_ipv6_recvrthdr = onoff;
1803 1823                  mutex_exit(&connp->conn_lock);
1804 1824                  break;
1805 1825          case IPV6_PKTINFO:
1806 1826                  mutex_enter(&connp->conn_lock);
1807 1827                  if (inlen == 0) {
1808 1828                          ipp->ipp_fields &= ~IPPF_ADDR;
1809 1829                          ipp->ipp_addr = ipv6_all_zeros;
1810 1830                          ixa->ixa_ifindex = 0;
1811 1831                  } else {
1812 1832                          struct in6_pktinfo *pkti;
1813 1833  
1814 1834                          pkti = (struct in6_pktinfo *)invalp;
1815 1835                          ipp->ipp_addr = pkti->ipi6_addr;
1816 1836                          if (!IN6_IS_ADDR_UNSPECIFIED(&ipp->ipp_addr))
1817 1837                                  ipp->ipp_fields |= IPPF_ADDR;
1818 1838                          else
1819 1839                                  ipp->ipp_fields &= ~IPPF_ADDR;
1820 1840                          ixa->ixa_ifindex = pkti->ipi6_ifindex;
1821 1841                  }
1822 1842                  mutex_exit(&connp->conn_lock);
1823 1843                  /* Source and ifindex might have changed */
1824 1844                  coa->coa_changed |= COA_HEADER_CHANGED;
1825 1845                  coa->coa_changed |= COA_ROUTE_CHANGED;
1826 1846                  break;
1827 1847          case IPV6_HOPLIMIT:
1828 1848                  mutex_enter(&connp->conn_lock);
1829 1849                  if (inlen == 0 || *i1 == -1) {
1830 1850                          /* Revert to default */
1831 1851                          ipp->ipp_fields &= ~IPPF_HOPLIMIT;
1832 1852                          ixa->ixa_flags &= ~IXAF_NO_TTL_CHANGE;
1833 1853                  } else {
1834 1854                          ipp->ipp_hoplimit = *i1;
1835 1855                          ipp->ipp_fields |= IPPF_HOPLIMIT;
1836 1856                          /* Ensure that it sticks for multicast packets */
1837 1857                          ixa->ixa_flags |= IXAF_NO_TTL_CHANGE;
1838 1858                  }
1839 1859                  mutex_exit(&connp->conn_lock);
1840 1860                  coa->coa_changed |= COA_HEADER_CHANGED;
1841 1861                  break;
1842 1862          case IPV6_TCLASS:
1843 1863                  /*
1844 1864                   * IPV6_TCLASS accepts -1 as use kernel default
1845 1865                   * and [0, 255] as the actualy traffic class.
1846 1866                   */
1847 1867                  mutex_enter(&connp->conn_lock);
1848 1868                  if (inlen == 0 || *i1 == -1) {
1849 1869                          ipp->ipp_tclass = 0;
1850 1870                          ipp->ipp_fields &= ~IPPF_TCLASS;
1851 1871                  } else {
1852 1872                          ipp->ipp_tclass = *i1;
1853 1873                          ipp->ipp_fields |= IPPF_TCLASS;
1854 1874                  }
1855 1875                  mutex_exit(&connp->conn_lock);
1856 1876                  coa->coa_changed |= COA_HEADER_CHANGED;
1857 1877                  break;
1858 1878          case IPV6_NEXTHOP:
1859 1879                  if (inlen == 0) {
1860 1880                          ixa->ixa_flags &= ~IXAF_NEXTHOP_SET;
1861 1881                  } else {
1862 1882                          sin6_t *sin6 = (sin6_t *)invalp;
1863 1883  
1864 1884                          ixa->ixa_nexthop_v6 = sin6->sin6_addr;
1865 1885                          if (!IN6_IS_ADDR_UNSPECIFIED(&ixa->ixa_nexthop_v6))
1866 1886                                  ixa->ixa_flags |= IXAF_NEXTHOP_SET;
1867 1887                          else
1868 1888                                  ixa->ixa_flags &= ~IXAF_NEXTHOP_SET;
1869 1889                  }
1870 1890                  coa->coa_changed |= COA_ROUTE_CHANGED;
1871 1891                  break;
1872 1892          case IPV6_HOPOPTS:
1873 1893                  mutex_enter(&connp->conn_lock);
1874 1894                  error = optcom_pkt_set(invalp, inlen,
1875 1895                      (uchar_t **)&ipp->ipp_hopopts, &ipp->ipp_hopoptslen);
1876 1896                  if (error != 0) {
1877 1897                          mutex_exit(&connp->conn_lock);
1878 1898                          return (error);
1879 1899                  }
1880 1900                  if (ipp->ipp_hopoptslen == 0) {
1881 1901                          ipp->ipp_fields &= ~IPPF_HOPOPTS;
1882 1902                  } else {
1883 1903                          ipp->ipp_fields |= IPPF_HOPOPTS;
1884 1904                  }
1885 1905                  mutex_exit(&connp->conn_lock);
1886 1906                  coa->coa_changed |= COA_HEADER_CHANGED;
1887 1907                  coa->coa_changed |= COA_WROFF_CHANGED;
1888 1908                  break;
1889 1909          case IPV6_RTHDRDSTOPTS:
1890 1910                  mutex_enter(&connp->conn_lock);
1891 1911                  error = optcom_pkt_set(invalp, inlen,
1892 1912                      (uchar_t **)&ipp->ipp_rthdrdstopts,
1893 1913                      &ipp->ipp_rthdrdstoptslen);
1894 1914                  if (error != 0) {
1895 1915                          mutex_exit(&connp->conn_lock);
1896 1916                          return (error);
1897 1917                  }
1898 1918                  if (ipp->ipp_rthdrdstoptslen == 0) {
1899 1919                          ipp->ipp_fields &= ~IPPF_RTHDRDSTOPTS;
1900 1920                  } else {
1901 1921                          ipp->ipp_fields |= IPPF_RTHDRDSTOPTS;
1902 1922                  }
1903 1923                  mutex_exit(&connp->conn_lock);
1904 1924                  coa->coa_changed |= COA_HEADER_CHANGED;
1905 1925                  coa->coa_changed |= COA_WROFF_CHANGED;
1906 1926                  break;
1907 1927          case IPV6_DSTOPTS:
1908 1928                  mutex_enter(&connp->conn_lock);
1909 1929                  error = optcom_pkt_set(invalp, inlen,
1910 1930                      (uchar_t **)&ipp->ipp_dstopts, &ipp->ipp_dstoptslen);
1911 1931                  if (error != 0) {
1912 1932                          mutex_exit(&connp->conn_lock);
1913 1933                          return (error);
1914 1934                  }
1915 1935                  if (ipp->ipp_dstoptslen == 0) {
1916 1936                          ipp->ipp_fields &= ~IPPF_DSTOPTS;
1917 1937                  } else {
1918 1938                          ipp->ipp_fields |= IPPF_DSTOPTS;
1919 1939                  }
1920 1940                  mutex_exit(&connp->conn_lock);
1921 1941                  coa->coa_changed |= COA_HEADER_CHANGED;
1922 1942                  coa->coa_changed |= COA_WROFF_CHANGED;
1923 1943                  break;
1924 1944          case IPV6_RTHDR:
1925 1945                  mutex_enter(&connp->conn_lock);
1926 1946                  error = optcom_pkt_set(invalp, inlen,
1927 1947                      (uchar_t **)&ipp->ipp_rthdr, &ipp->ipp_rthdrlen);
1928 1948                  if (error != 0) {
1929 1949                          mutex_exit(&connp->conn_lock);
1930 1950                          return (error);
1931 1951                  }
1932 1952                  if (ipp->ipp_rthdrlen == 0) {
1933 1953                          ipp->ipp_fields &= ~IPPF_RTHDR;
1934 1954                  } else {
1935 1955                          ipp->ipp_fields |= IPPF_RTHDR;
1936 1956                  }
1937 1957                  mutex_exit(&connp->conn_lock);
1938 1958                  coa->coa_changed |= COA_HEADER_CHANGED;
1939 1959                  coa->coa_changed |= COA_WROFF_CHANGED;
1940 1960                  break;
1941 1961  
1942 1962          case IPV6_DONTFRAG:
1943 1963                  if (onoff) {
1944 1964                          ixa->ixa_flags |= IXAF_DONTFRAG;
1945 1965                          ixa->ixa_flags &= ~IXAF_PMTU_DISCOVERY;
1946 1966                  } else {
1947 1967                          ixa->ixa_flags &= ~IXAF_DONTFRAG;
1948 1968                          ixa->ixa_flags |= IXAF_PMTU_DISCOVERY;
1949 1969                  }
1950 1970                  /* Need to redo ip_attr_connect */
1951 1971                  coa->coa_changed |= COA_ROUTE_CHANGED;
1952 1972                  break;
1953 1973  
1954 1974          case IPV6_USE_MIN_MTU:
1955 1975                  ixa->ixa_flags |= IXAF_USE_MIN_MTU;
1956 1976                  ixa->ixa_use_min_mtu = *i1;
1957 1977                  /* Need to redo ip_attr_connect */
1958 1978                  coa->coa_changed |= COA_ROUTE_CHANGED;
1959 1979                  break;
1960 1980  
1961 1981          case IPV6_SEC_OPT:
1962 1982                  mutex_enter(&connp->conn_lock);
1963 1983                  error = ipsec_set_req(cr, connp, (ipsec_req_t *)invalp);
1964 1984                  mutex_exit(&connp->conn_lock);
1965 1985                  if (error != 0) {
1966 1986                          return (error);
1967 1987                  }
1968 1988                  /* This is an IPsec policy change - redo ip_attr_connect */
1969 1989                  coa->coa_changed |= COA_ROUTE_CHANGED;
1970 1990                  break;
1971 1991          case IPV6_SRC_PREFERENCES:
1972 1992                  /*
1973 1993                   * This socket option only affects connected
1974 1994                   * sockets that haven't already bound to a specific
1975 1995                   * IPv6 address.  In other words, sockets that
1976 1996                   * don't call bind() with an address other than the
1977 1997                   * unspecified address and that call connect().
1978 1998                   * ip_set_destination_v6() passes these preferences
1979 1999                   * to the ipif_select_source_v6() function.
1980 2000                   */
1981 2001                  mutex_enter(&connp->conn_lock);
1982 2002                  error = ip6_set_src_preferences(ixa, *(uint32_t *)invalp);
1983 2003                  mutex_exit(&connp->conn_lock);
1984 2004                  if (error != 0) {
1985 2005                          return (error);
1986 2006                  }
1987 2007                  break;
1988 2008          case IPV6_V6ONLY:
1989 2009                  mutex_enter(&connp->conn_lock);
1990 2010                  connp->conn_ipv6_v6only = onoff;
1991 2011                  mutex_exit(&connp->conn_lock);
1992 2012                  break;
1993 2013          }
1994 2014          return (0);
1995 2015  }
1996 2016  
1997 2017  /* Handle IPPROTO_UDP */
1998 2018  /* ARGSUSED1 */
1999 2019  static int
2000 2020  conn_opt_set_udp(conn_opt_arg_t *coa, t_scalar_t name, uint_t inlen,
2001 2021      uchar_t *invalp, boolean_t checkonly, cred_t *cr)
2002 2022  {
2003 2023          conn_t          *connp = coa->coa_connp;
2004 2024          int             *i1 = (int *)invalp;
2005 2025          boolean_t       onoff = (*i1 == 0) ? 0 : 1;
2006 2026          int             error;
2007 2027  
2008 2028          switch (name) {
2009 2029          case UDP_ANONPRIVBIND:
2010 2030                  if ((error = secpolicy_net_privaddr(cr, 0, IPPROTO_UDP)) != 0) {
2011 2031                          return (error);
2012 2032                  }
2013 2033                  break;
2014 2034          }
2015 2035          if (checkonly)
2016 2036                  return (0);
2017 2037  
2018 2038          /* Here we set the actual option value */
2019 2039          mutex_enter(&connp->conn_lock);
2020 2040          switch (name) {
2021 2041          case UDP_ANONPRIVBIND:
2022 2042                  connp->conn_anon_priv_bind = onoff;
2023 2043                  break;
2024 2044          case UDP_EXCLBIND:
2025 2045                  connp->conn_exclbind = onoff;
2026 2046                  break;
2027 2047          }
2028 2048          mutex_exit(&connp->conn_lock);
2029 2049          return (0);
2030 2050  }
2031 2051  
2032 2052  /* Handle IPPROTO_TCP */
2033 2053  /* ARGSUSED1 */
2034 2054  static int
2035 2055  conn_opt_set_tcp(conn_opt_arg_t *coa, t_scalar_t name, uint_t inlen,
2036 2056      uchar_t *invalp, boolean_t checkonly, cred_t *cr)
2037 2057  {
2038 2058          conn_t          *connp = coa->coa_connp;
2039 2059          int             *i1 = (int *)invalp;
2040 2060          boolean_t       onoff = (*i1 == 0) ? 0 : 1;
2041 2061          int             error;
2042 2062  
2043 2063          switch (name) {
2044 2064          case TCP_ANONPRIVBIND:
2045 2065                  if ((error = secpolicy_net_privaddr(cr, 0, IPPROTO_TCP)) != 0) {
2046 2066                          return (error);
2047 2067                  }
2048 2068                  break;
2049 2069          }
2050 2070          if (checkonly)
2051 2071                  return (0);
2052 2072  
2053 2073          /* Here we set the actual option value */
2054 2074          mutex_enter(&connp->conn_lock);
2055 2075          switch (name) {
2056 2076          case TCP_ANONPRIVBIND:
2057 2077                  connp->conn_anon_priv_bind = onoff;
2058 2078                  break;
2059 2079          case TCP_EXCLBIND:
2060 2080                  connp->conn_exclbind = onoff;
2061 2081                  break;
2062 2082          case TCP_RECVDSTADDR:
2063 2083                  connp->conn_recv_ancillary.crb_recvdstaddr = onoff;
2064 2084                  break;
2065 2085          }
2066 2086          mutex_exit(&connp->conn_lock);
2067 2087          return (0);
2068 2088  }
2069 2089  
2070 2090  int
2071 2091  conn_getsockname(conn_t *connp, struct sockaddr *sa, uint_t *salenp)
2072 2092  {
2073 2093          sin_t           *sin;
2074 2094          sin6_t          *sin6;
2075 2095  
2076 2096          if (connp->conn_family == AF_INET) {
2077 2097                  if (*salenp < sizeof (sin_t))
2078 2098                          return (EINVAL);
2079 2099  
2080 2100                  *salenp = sizeof (sin_t);
2081 2101                  /* Fill zeroes and then initialize non-zero fields */
2082 2102                  sin = (sin_t *)sa;
2083 2103                  *sin = sin_null;
2084 2104                  sin->sin_family = AF_INET;
2085 2105                  if (!IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_saddr_v6) &&
2086 2106                      !IN6_IS_ADDR_UNSPECIFIED(&connp->conn_saddr_v6)) {
2087 2107                          sin->sin_addr.s_addr = connp->conn_saddr_v4;
2088 2108                  } else {
2089 2109                          /*
2090 2110                           * INADDR_ANY
2091 2111                           * conn_saddr is not set, we might be bound to
2092 2112                           * broadcast/multicast. Use conn_bound_addr as
2093 2113                           * local address instead (that could
2094 2114                           * also still be INADDR_ANY)
2095 2115                           */
2096 2116                          sin->sin_addr.s_addr = connp->conn_bound_addr_v4;
2097 2117                  }
2098 2118                  sin->sin_port = connp->conn_lport;
2099 2119          } else {
2100 2120                  if (*salenp < sizeof (sin6_t))
2101 2121                          return (EINVAL);
2102 2122  
2103 2123                  *salenp = sizeof (sin6_t);
2104 2124                  /* Fill zeroes and then initialize non-zero fields */
2105 2125                  sin6 = (sin6_t *)sa;
2106 2126                  *sin6 = sin6_null;
2107 2127                  sin6->sin6_family = AF_INET6;
2108 2128                  if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_saddr_v6)) {
2109 2129                          sin6->sin6_addr = connp->conn_saddr_v6;
2110 2130                  } else {
2111 2131                          /*
2112 2132                           * conn_saddr is not set, we might be bound to
2113 2133                           * broadcast/multicast. Use conn_bound_addr as
2114 2134                           * local address instead (which could
2115 2135                           * also still be unspecified)
2116 2136                           */
2117 2137                          sin6->sin6_addr = connp->conn_bound_addr_v6;
2118 2138                  }
2119 2139                  sin6->sin6_port = connp->conn_lport;
2120 2140                  if (IN6_IS_ADDR_LINKSCOPE(&sin6->sin6_addr) &&
2121 2141                      (connp->conn_ixa->ixa_flags & IXAF_SCOPEID_SET))
2122 2142                          sin6->sin6_scope_id = connp->conn_ixa->ixa_scopeid;
2123 2143          }
2124 2144          return (0);
2125 2145  }
2126 2146  
2127 2147  int
2128 2148  conn_getpeername(conn_t *connp, struct sockaddr *sa, uint_t *salenp)
2129 2149  {
2130 2150          struct sockaddr_in      *sin;
2131 2151          struct sockaddr_in6     *sin6;
2132 2152  
2133 2153          if (connp->conn_family == AF_INET) {
2134 2154                  if (*salenp < sizeof (sin_t))
2135 2155                          return (EINVAL);
2136 2156  
2137 2157                  *salenp = sizeof (sin_t);
2138 2158                  /* initialize */
2139 2159                  sin = (sin_t *)sa;
2140 2160                  *sin = sin_null;
2141 2161                  sin->sin_family = AF_INET;
2142 2162                  sin->sin_addr.s_addr = connp->conn_faddr_v4;
2143 2163                  sin->sin_port = connp->conn_fport;
2144 2164          } else {
2145 2165                  if (*salenp < sizeof (sin6_t))
2146 2166                          return (EINVAL);
2147 2167  
2148 2168                  *salenp = sizeof (sin6_t);
2149 2169                  /* initialize */
2150 2170                  sin6 = (sin6_t *)sa;
2151 2171                  *sin6 = sin6_null;
2152 2172                  sin6->sin6_family = AF_INET6;
2153 2173                  sin6->sin6_addr = connp->conn_faddr_v6;
2154 2174                  sin6->sin6_port =  connp->conn_fport;
2155 2175                  sin6->sin6_flowinfo = connp->conn_flowinfo;
2156 2176                  if (IN6_IS_ADDR_LINKSCOPE(&sin6->sin6_addr) &&
2157 2177                      (connp->conn_ixa->ixa_flags & IXAF_SCOPEID_SET))
2158 2178                          sin6->sin6_scope_id = connp->conn_ixa->ixa_scopeid;
2159 2179          }
2160 2180          return (0);
2161 2181  }
2162 2182  
2163 2183  static uint32_t cksum_massage_options_v4(ipha_t *, netstack_t *);
2164 2184  static uint32_t cksum_massage_options_v6(ip6_t *, uint_t, netstack_t *);
2165 2185  
2166 2186  /*
2167 2187   * Allocate and fill in conn_ht_iphc based on the current information
2168 2188   * in the conn.
2169 2189   * Normally used when we bind() and connect().
2170 2190   * Returns failure if can't allocate memory, or if there is a problem
2171 2191   * with a routing header/option.
2172 2192   *
2173 2193   * We allocate space for the transport header (ulp_hdr_len + extra) and
2174 2194   * indicate the offset of the ulp header by setting ixa_ip_hdr_length.
2175 2195   * The extra is there for transports that want some spare room for future
2176 2196   * options. conn_ht_iphc_allocated is what was allocated; conn_ht_iphc_len
2177 2197   * excludes the extra part.
2178 2198   *
2179 2199   * We massage an routing option/header and store the ckecksum difference
2180 2200   * in conn_sum.
2181 2201   *
2182 2202   * Caller needs to update conn_wroff if desired.
2183 2203   */
2184 2204  int
2185 2205  conn_build_hdr_template(conn_t *connp, uint_t ulp_hdr_length, uint_t extra,
2186 2206      const in6_addr_t *v6src, const in6_addr_t *v6dst, uint32_t flowinfo)
2187 2207  {
2188 2208          ip_xmit_attr_t  *ixa = connp->conn_ixa;
2189 2209          ip_pkt_t        *ipp = &connp->conn_xmit_ipp;
2190 2210          uint_t          ip_hdr_length;
2191 2211          uchar_t         *hdrs;
2192 2212          uint_t          hdrs_len;
2193 2213  
2194 2214          ASSERT(MUTEX_HELD(&connp->conn_lock));
2195 2215  
2196 2216          if (ixa->ixa_flags & IXAF_IS_IPV4) {
2197 2217                  ip_hdr_length = ip_total_hdrs_len_v4(ipp);
2198 2218                  /* In case of TX label and IP options it can be too much */
2199 2219                  if (ip_hdr_length > IP_MAX_HDR_LENGTH) {
2200 2220                          /* Preserves existing TX errno for this */
2201 2221                          return (EHOSTUNREACH);
2202 2222                  }
2203 2223          } else {
2204 2224                  ip_hdr_length = ip_total_hdrs_len_v6(ipp);
2205 2225          }
2206 2226          ixa->ixa_ip_hdr_length = ip_hdr_length;
2207 2227          hdrs_len = ip_hdr_length + ulp_hdr_length + extra;
2208 2228          ASSERT(hdrs_len != 0);
2209 2229  
2210 2230          if (hdrs_len != connp->conn_ht_iphc_allocated) {
2211 2231                  /* Allocate new before we free any old */
2212 2232                  hdrs = kmem_alloc(hdrs_len, KM_NOSLEEP);
2213 2233                  if (hdrs == NULL)
2214 2234                          return (ENOMEM);
2215 2235  
2216 2236                  if (connp->conn_ht_iphc != NULL) {
2217 2237                          kmem_free(connp->conn_ht_iphc,
2218 2238                              connp->conn_ht_iphc_allocated);
2219 2239                  }
2220 2240                  connp->conn_ht_iphc = hdrs;
2221 2241                  connp->conn_ht_iphc_allocated = hdrs_len;
2222 2242          } else {
2223 2243                  hdrs = connp->conn_ht_iphc;
2224 2244          }
2225 2245          hdrs_len -= extra;
2226 2246          connp->conn_ht_iphc_len = hdrs_len;
2227 2247  
2228 2248          connp->conn_ht_ulp = hdrs + ip_hdr_length;
2229 2249          connp->conn_ht_ulp_len = ulp_hdr_length;
2230 2250  
2231 2251          if (ixa->ixa_flags & IXAF_IS_IPV4) {
2232 2252                  ipha_t  *ipha = (ipha_t *)hdrs;
2233 2253  
2234 2254                  IN6_V4MAPPED_TO_IPADDR(v6src, ipha->ipha_src);
2235 2255                  IN6_V4MAPPED_TO_IPADDR(v6dst, ipha->ipha_dst);
2236 2256                  ip_build_hdrs_v4(hdrs, ip_hdr_length, ipp, connp->conn_proto);
2237 2257                  ipha->ipha_length = htons(hdrs_len);
2238 2258                  if (ixa->ixa_flags & IXAF_PMTU_IPV4_DF)
2239 2259                          ipha->ipha_fragment_offset_and_flags |= IPH_DF_HTONS;
2240 2260                  else
2241 2261                          ipha->ipha_fragment_offset_and_flags &= ~IPH_DF_HTONS;
2242 2262  
2243 2263                  if (ipp->ipp_fields & IPPF_IPV4_OPTIONS) {
2244 2264                          connp->conn_sum = cksum_massage_options_v4(ipha,
2245 2265                              connp->conn_netstack);
2246 2266                  } else {
2247 2267                          connp->conn_sum = 0;
2248 2268                  }
2249 2269          } else {
2250 2270                  ip6_t   *ip6h = (ip6_t *)hdrs;
2251 2271  
2252 2272                  ip6h->ip6_src = *v6src;
2253 2273                  ip6h->ip6_dst = *v6dst;
2254 2274                  ip_build_hdrs_v6(hdrs, ip_hdr_length, ipp, connp->conn_proto,
2255 2275                      flowinfo);
2256 2276                  ip6h->ip6_plen = htons(hdrs_len - IPV6_HDR_LEN);
2257 2277  
2258 2278                  if (ipp->ipp_fields & IPPF_RTHDR) {
2259 2279                          connp->conn_sum = cksum_massage_options_v6(ip6h,
2260 2280                              ip_hdr_length, connp->conn_netstack);
2261 2281  
2262 2282                          /*
2263 2283                           * Verify that the first hop isn't a mapped address.
2264 2284                           * Routers along the path need to do this verification
2265 2285                           * for subsequent hops.
2266 2286                           */
2267 2287                          if (IN6_IS_ADDR_V4MAPPED(&ip6h->ip6_dst))
2268 2288                                  return (EADDRNOTAVAIL);
2269 2289  
2270 2290                  } else {
2271 2291                          connp->conn_sum = 0;
2272 2292                  }
2273 2293          }
2274 2294          return (0);
2275 2295  }
2276 2296  
2277 2297  /*
2278 2298   * Prepend a header template to data_mp based on the ip_pkt_t
2279 2299   * and the passed in source, destination and protocol.
2280 2300   *
2281 2301   * Returns failure if can't allocate memory, in which case data_mp is freed.
2282 2302   * We allocate space for the transport header (ulp_hdr_len) and
2283 2303   * indicate the offset of the ulp header by setting ixa_ip_hdr_length.
2284 2304   *
2285 2305   * We massage an routing option/header and return the ckecksum difference
2286 2306   * in *sump. This is in host byte order.
2287 2307   *
2288 2308   * Caller needs to update conn_wroff if desired.
2289 2309   */
2290 2310  mblk_t *
2291 2311  conn_prepend_hdr(ip_xmit_attr_t *ixa, const ip_pkt_t *ipp,
2292 2312      const in6_addr_t *v6src, const in6_addr_t *v6dst,
2293 2313      uint8_t protocol, uint32_t flowinfo, uint_t ulp_hdr_length, mblk_t *data_mp,
2294 2314      uint_t data_length, uint_t wroff_extra, uint32_t *sump, int *errorp)
2295 2315  {
2296 2316          uint_t          ip_hdr_length;
2297 2317          uchar_t         *hdrs;
2298 2318          uint_t          hdrs_len;
2299 2319          mblk_t          *mp;
2300 2320  
2301 2321          if (ixa->ixa_flags & IXAF_IS_IPV4) {
2302 2322                  ip_hdr_length = ip_total_hdrs_len_v4(ipp);
2303 2323                  ASSERT(ip_hdr_length <= IP_MAX_HDR_LENGTH);
2304 2324          } else {
2305 2325                  ip_hdr_length = ip_total_hdrs_len_v6(ipp);
2306 2326          }
2307 2327          hdrs_len = ip_hdr_length + ulp_hdr_length;
2308 2328          ASSERT(hdrs_len != 0);
2309 2329  
2310 2330          ixa->ixa_ip_hdr_length = ip_hdr_length;
2311 2331  
2312 2332          /* Can we prepend to data_mp? */
2313 2333          if (data_mp != NULL &&
2314 2334              data_mp->b_rptr - data_mp->b_datap->db_base >= hdrs_len &&
2315 2335              data_mp->b_datap->db_ref == 1) {
2316 2336                  hdrs = data_mp->b_rptr - hdrs_len;
2317 2337                  data_mp->b_rptr = hdrs;
2318 2338                  mp = data_mp;
2319 2339          } else {
2320 2340                  mp = allocb(hdrs_len + wroff_extra, BPRI_MED);
2321 2341                  if (mp == NULL) {
2322 2342                          freemsg(data_mp);
2323 2343                          *errorp = ENOMEM;
2324 2344                          return (NULL);
2325 2345                  }
2326 2346                  mp->b_wptr = mp->b_datap->db_lim;
2327 2347                  hdrs = mp->b_rptr = mp->b_wptr - hdrs_len;
2328 2348                  mp->b_cont = data_mp;
2329 2349          }
2330 2350  
2331 2351          /*
2332 2352           * Set the source in the header. ip_build_hdrs_v4/v6 will overwrite it
2333 2353           * if PKTINFO (aka IPPF_ADDR) was set.
2334 2354           */
2335 2355          if (ixa->ixa_flags & IXAF_IS_IPV4) {
2336 2356                  ipha_t *ipha = (ipha_t *)hdrs;
2337 2357  
2338 2358                  ASSERT(IN6_IS_ADDR_V4MAPPED(v6dst));
2339 2359                  IN6_V4MAPPED_TO_IPADDR(v6src, ipha->ipha_src);
2340 2360                  IN6_V4MAPPED_TO_IPADDR(v6dst, ipha->ipha_dst);
2341 2361                  ip_build_hdrs_v4(hdrs, ip_hdr_length, ipp, protocol);
2342 2362                  ipha->ipha_length = htons(hdrs_len + data_length);
2343 2363                  if (ixa->ixa_flags & IXAF_PMTU_IPV4_DF)
2344 2364                          ipha->ipha_fragment_offset_and_flags |= IPH_DF_HTONS;
2345 2365                  else
2346 2366                          ipha->ipha_fragment_offset_and_flags &= ~IPH_DF_HTONS;
2347 2367  
2348 2368                  if (ipp->ipp_fields & IPPF_IPV4_OPTIONS) {
2349 2369                          *sump = cksum_massage_options_v4(ipha,
2350 2370                              ixa->ixa_ipst->ips_netstack);
2351 2371                  } else {
2352 2372                          *sump = 0;
2353 2373                  }
2354 2374          } else {
2355 2375                  ip6_t *ip6h = (ip6_t *)hdrs;
2356 2376  
2357 2377                  ip6h->ip6_src = *v6src;
2358 2378                  ip6h->ip6_dst = *v6dst;
2359 2379                  ip_build_hdrs_v6(hdrs, ip_hdr_length, ipp, protocol, flowinfo);
2360 2380                  ip6h->ip6_plen = htons(hdrs_len + data_length - IPV6_HDR_LEN);
2361 2381  
2362 2382                  if (ipp->ipp_fields & IPPF_RTHDR) {
2363 2383                          *sump = cksum_massage_options_v6(ip6h,
2364 2384                              ip_hdr_length, ixa->ixa_ipst->ips_netstack);
2365 2385  
2366 2386                          /*
2367 2387                           * Verify that the first hop isn't a mapped address.
2368 2388                           * Routers along the path need to do this verification
2369 2389                           * for subsequent hops.
2370 2390                           */
2371 2391                          if (IN6_IS_ADDR_V4MAPPED(&ip6h->ip6_dst)) {
2372 2392                                  *errorp = EADDRNOTAVAIL;
2373 2393                                  freemsg(mp);
2374 2394                                  return (NULL);
2375 2395                          }
2376 2396                  } else {
2377 2397                          *sump = 0;
2378 2398                  }
2379 2399          }
2380 2400          return (mp);
2381 2401  }
2382 2402  
2383 2403  /*
2384 2404   * Massage a source route if any putting the first hop
2385 2405   * in ipha_dst. Compute a starting value for the checksum which
2386 2406   * takes into account that the original ipha_dst should be
2387 2407   * included in the checksum but that IP will include the
2388 2408   * first hop from the source route in the tcp checksum.
2389 2409   */
2390 2410  static uint32_t
2391 2411  cksum_massage_options_v4(ipha_t *ipha, netstack_t *ns)
2392 2412  {
2393 2413          in_addr_t       dst;
2394 2414          uint32_t        cksum;
2395 2415  
2396 2416          /* Get last hop then diff against first hop */
2397 2417          cksum = ip_massage_options(ipha, ns);
2398 2418          cksum = (cksum & 0xFFFF) + (cksum >> 16);
2399 2419          dst = ipha->ipha_dst;
2400 2420          cksum -= ((dst >> 16) + (dst & 0xffff));
2401 2421          if ((int)cksum < 0)
2402 2422                  cksum--;
2403 2423          cksum = (cksum & 0xFFFF) + (cksum >> 16);
2404 2424          cksum = (cksum & 0xFFFF) + (cksum >> 16);
2405 2425          ASSERT(cksum < 0x10000);
2406 2426          return (ntohs(cksum));
2407 2427  }
2408 2428  
2409 2429  static uint32_t
2410 2430  cksum_massage_options_v6(ip6_t *ip6h, uint_t ip_hdr_len, netstack_t *ns)
2411 2431  {
2412 2432          uint8_t         *end;
2413 2433          ip6_rthdr_t     *rth;
2414 2434          uint32_t        cksum;
2415 2435  
2416 2436          end = (uint8_t *)ip6h + ip_hdr_len;
2417 2437          rth = ip_find_rthdr_v6(ip6h, end);
2418 2438          if (rth == NULL)
2419 2439                  return (0);
2420 2440  
2421 2441          cksum = ip_massage_options_v6(ip6h, rth, ns);
2422 2442          cksum = (cksum & 0xFFFF) + (cksum >> 16);
2423 2443          ASSERT(cksum < 0x10000);
2424 2444          return (ntohs(cksum));
2425 2445  }
2426 2446  
2427 2447  /*
2428 2448   * ULPs that change the destination address need to call this for each
2429 2449   * change to discard any state about a previous destination that might
2430 2450   * have been multicast or multirt.
2431 2451   */
2432 2452  void
2433 2453  ip_attr_newdst(ip_xmit_attr_t *ixa)
2434 2454  {
2435 2455          ixa->ixa_flags &= ~(IXAF_LOOPBACK_COPY | IXAF_NO_HW_CKSUM |
2436 2456              IXAF_NO_TTL_CHANGE | IXAF_IPV6_ADD_FRAGHDR |
2437 2457              IXAF_NO_LOOP_ZONEID_SET);
2438 2458  }
2439 2459  
2440 2460  /*
2441 2461   * Determine the nexthop which will be used.
2442 2462   * Normally this is just the destination, but if a IPv4 source route, or
2443 2463   * IPv6 routing header, is in the ip_pkt_t then we extract the nexthop from
2444 2464   * there.
2445 2465   */
2446 2466  void
2447 2467  ip_attr_nexthop(const ip_pkt_t *ipp, const ip_xmit_attr_t *ixa,
2448 2468      const in6_addr_t *dst, in6_addr_t *nexthop)
2449 2469  {
2450 2470          if (!(ipp->ipp_fields & (IPPF_IPV4_OPTIONS|IPPF_RTHDR))) {
2451 2471                  *nexthop = *dst;
2452 2472                  return;
2453 2473          }
2454 2474          if (ixa->ixa_flags & IXAF_IS_IPV4) {
2455 2475                  ipaddr_t v4dst;
2456 2476                  ipaddr_t v4nexthop;
2457 2477  
2458 2478                  IN6_V4MAPPED_TO_IPADDR(dst, v4dst);
2459 2479                  v4nexthop = ip_pkt_source_route_v4(ipp);
2460 2480                  if (v4nexthop == INADDR_ANY)
2461 2481                          v4nexthop = v4dst;
2462 2482  
2463 2483                  IN6_IPADDR_TO_V4MAPPED(v4nexthop, nexthop);
2464 2484          } else {
2465 2485                  const in6_addr_t *v6nexthop;
2466 2486  
2467 2487                  v6nexthop = ip_pkt_source_route_v6(ipp);
2468 2488                  if (v6nexthop == NULL)
2469 2489                          v6nexthop = dst;
2470 2490  
2471 2491                  *nexthop = *v6nexthop;
2472 2492          }
2473 2493  }
2474 2494  
2475 2495  /*
2476 2496   * Update the ip_xmit_attr_t based the addresses, conn_xmit_ipp and conn_ixa.
2477 2497   * If IPDF_IPSEC is set we cache the IPsec policy to handle the unconnected
2478 2498   * case (connected latching is done in conn_connect).
2479 2499   * Note that IPsec policy lookup requires conn_proto and conn_laddr to be
2480 2500   * set, but doesn't otherwise use the conn_t.
2481 2501   *
2482 2502   * Caller must set/clear IXAF_IS_IPV4 as appropriately.
2483 2503   * Caller must use ip_attr_nexthop() to determine the nexthop argument.
2484 2504   *
2485 2505   * The caller must NOT hold conn_lock (to avoid problems with ill_refrele
2486 2506   * causing the squeue to run doing ipcl_walk grabbing conn_lock.)
2487 2507   *
2488 2508   * Updates laddrp and uinfo if they are non-NULL.
2489 2509   *
2490 2510   * TSOL notes: The callers if ip_attr_connect must check if the destination
2491 2511   * is different than before and in that case redo conn_update_label.
2492 2512   * The callers of conn_connect do not need that since conn_connect
2493 2513   * performs the conn_update_label.
2494 2514   */
2495 2515  int
2496 2516  ip_attr_connect(const conn_t *connp, ip_xmit_attr_t *ixa,
2497 2517      const in6_addr_t *v6src, const in6_addr_t *v6dst,
2498 2518      const in6_addr_t *v6nexthop, in_port_t dstport, in6_addr_t *laddrp,
2499 2519      iulp_t *uinfo, uint32_t flags)
2500 2520  {
2501 2521          in6_addr_t              laddr = *v6src;
2502 2522          int                     error;
2503 2523  
2504 2524          ASSERT(MUTEX_NOT_HELD(&connp->conn_lock));
2505 2525  
2506 2526          if (connp->conn_zone_is_global)
2507 2527                  flags |= IPDF_ZONE_IS_GLOBAL;
2508 2528          else
2509 2529                  flags &= ~IPDF_ZONE_IS_GLOBAL;
2510 2530  
2511 2531          /*
2512 2532           * Lookup the route to determine a source address and the uinfo.
2513 2533           * If the ULP has a source route option then the caller will
2514 2534           * have set v6nexthop to be the first hop.
2515 2535           */
2516 2536          if (ixa->ixa_flags & IXAF_IS_IPV4) {
2517 2537                  ipaddr_t v4dst;
2518 2538                  ipaddr_t v4src, v4nexthop;
2519 2539  
2520 2540                  IN6_V4MAPPED_TO_IPADDR(v6dst, v4dst);
2521 2541                  IN6_V4MAPPED_TO_IPADDR(v6nexthop, v4nexthop);
2522 2542                  IN6_V4MAPPED_TO_IPADDR(v6src, v4src);
2523 2543  
2524 2544                  if (connp->conn_unspec_src || v4src != INADDR_ANY)
2525 2545                          flags &= ~IPDF_SELECT_SRC;
2526 2546                  else
2527 2547                          flags |= IPDF_SELECT_SRC;
2528 2548  
2529 2549                  error = ip_set_destination_v4(&v4src, v4dst, v4nexthop, ixa,
2530 2550                      uinfo, flags, connp->conn_mac_mode);
2531 2551                  IN6_IPADDR_TO_V4MAPPED(v4src, &laddr);
2532 2552          } else {
2533 2553                  if (connp->conn_unspec_src || !IN6_IS_ADDR_UNSPECIFIED(v6src))
2534 2554                          flags &= ~IPDF_SELECT_SRC;
2535 2555                  else
2536 2556                          flags |= IPDF_SELECT_SRC;
2537 2557  
2538 2558                  error = ip_set_destination_v6(&laddr, v6dst, v6nexthop, ixa,
2539 2559                      uinfo, flags, connp->conn_mac_mode);
2540 2560          }
2541 2561          /* Pass out some address even if we hit a RTF_REJECT etc */
2542 2562          if (laddrp != NULL)
2543 2563                  *laddrp = laddr;
2544 2564  
2545 2565          if (error != 0)
2546 2566                  return (error);
2547 2567  
2548 2568          if (flags & IPDF_IPSEC) {
2549 2569                  /*
2550 2570                   * Set any IPsec policy in ixa. Routine also looks at ULP
2551 2571                   * ports.
2552 2572                   */
2553 2573                  ipsec_cache_outbound_policy(connp, v6src, v6dst, dstport, ixa);
2554 2574          }
2555 2575          return (0);
2556 2576  }
2557 2577  
2558 2578  /*
2559 2579   * Connect the conn based on the addresses, conn_xmit_ipp and conn_ixa.
2560 2580   * Assumes that conn_faddr and conn_fport are already set. As such it is not
2561 2581   * usable for SCTP, since SCTP has multiple faddrs.
2562 2582   *
2563 2583   * Caller must hold conn_lock to provide atomic constency between the
2564 2584   * conn_t's addresses and the ixa.
2565 2585   * NOTE: this function drops and reaquires conn_lock since it can't be
2566 2586   * held across ip_attr_connect/ip_set_destination.
2567 2587   *
2568 2588   * The caller needs to handle inserting in the receive-side fanout when
2569 2589   * appropriate after conn_connect returns.
2570 2590   */
2571 2591  int
2572 2592  conn_connect(conn_t *connp, iulp_t *uinfo, uint32_t flags)
2573 2593  {
2574 2594          ip_xmit_attr_t  *ixa = connp->conn_ixa;
2575 2595          in6_addr_t      nexthop;
2576 2596          in6_addr_t      saddr, faddr;
2577 2597          in_port_t       fport;
2578 2598          int             error;
2579 2599  
2580 2600          ASSERT(MUTEX_HELD(&connp->conn_lock));
2581 2601  
2582 2602          if (connp->conn_ipversion == IPV4_VERSION)
2583 2603                  ixa->ixa_flags |= IXAF_IS_IPV4;
2584 2604          else
2585 2605                  ixa->ixa_flags &= ~IXAF_IS_IPV4;
2586 2606  
2587 2607          /* We do IPsec latching below - hence no caching in ip_attr_connect */
2588 2608          flags &= ~IPDF_IPSEC;
2589 2609  
2590 2610          /* In case we had previously done an ip_attr_connect */
2591 2611          ip_attr_newdst(ixa);
2592 2612  
2593 2613          /*
2594 2614           * Determine the nexthop and copy the addresses before dropping
2595 2615           * conn_lock.
2596 2616           */
2597 2617          ip_attr_nexthop(&connp->conn_xmit_ipp, connp->conn_ixa,
2598 2618              &connp->conn_faddr_v6, &nexthop);
2599 2619          saddr = connp->conn_saddr_v6;
2600 2620          faddr = connp->conn_faddr_v6;
2601 2621          fport = connp->conn_fport;
2602 2622  
2603 2623          mutex_exit(&connp->conn_lock);
2604 2624          error = ip_attr_connect(connp, ixa, &saddr, &faddr, &nexthop, fport,
2605 2625              &saddr, uinfo, flags | IPDF_VERIFY_DST);
2606 2626          mutex_enter(&connp->conn_lock);
2607 2627  
2608 2628          /* Could have changed even if an error */
2609 2629          connp->conn_saddr_v6 = saddr;
2610 2630          if (error != 0)
2611 2631                  return (error);
2612 2632  
2613 2633          /*
2614 2634           * Check whether Trusted Solaris policy allows communication with this
2615 2635           * host, and pretend that the destination is unreachable if not.
2616 2636           * Compute any needed label and place it in ipp_label_v4/v6.
2617 2637           *
2618 2638           * Later conn_build_hdr_template() takes ipp_label_v4/v6 to form
2619 2639           * the packet.
2620 2640           *
2621 2641           * TSOL Note: Any concurrent threads would pick a different ixa
2622 2642           * (and ipp if they are to change the ipp)  so we
2623 2643           * don't have to worry about concurrent threads.
2624 2644           */
2625 2645          if (is_system_labeled()) {
2626 2646                  if (connp->conn_mlp_type != mlptSingle)
2627 2647                          return (ECONNREFUSED);
2628 2648  
2629 2649                  /*
2630 2650                   * conn_update_label will set ipp_label* which will later
2631 2651                   * be used by conn_build_hdr_template.
2632 2652                   */
2633 2653                  error = conn_update_label(connp, ixa,
2634 2654                      &connp->conn_faddr_v6, &connp->conn_xmit_ipp);
2635 2655                  if (error != 0)
2636 2656                          return (error);
2637 2657          }
2638 2658  
2639 2659          /*
2640 2660           * Ensure that we match on the selected local address.
2641 2661           * This overrides conn_laddr in the case we had earlier bound to a
2642 2662           * multicast or broadcast address.
2643 2663           */
2644 2664          connp->conn_laddr_v6 = connp->conn_saddr_v6;
2645 2665  
2646 2666          /*
2647 2667           * Allow setting new policies.
2648 2668           * The addresses/ports are already set, thus the IPsec policy calls
2649 2669           * can handle their passed-in conn's.
2650 2670           */
2651 2671          connp->conn_policy_cached = B_FALSE;
2652 2672  
2653 2673          /*
2654 2674           * Cache IPsec policy in this conn.  If we have per-socket policy,
2655 2675           * we'll cache that.  If we don't, we'll inherit global policy.
2656 2676           *
2657 2677           * This is done before the caller inserts in the receive-side fanout.
2658 2678           * Note that conn_policy_cached is set by ipsec_conn_cache_policy() even
2659 2679           * for connections where we don't have a policy. This is to prevent
2660 2680           * global policy lookups in the inbound path.
2661 2681           *
2662 2682           * If we insert before we set conn_policy_cached,
2663 2683           * CONN_INBOUND_POLICY_PRESENT() check can still evaluate true
2664 2684           * because global policy cound be non-empty. We normally call
2665 2685           * ipsec_check_policy() for conn_policy_cached connections only if
2666 2686           * conn_in_enforce_policy is set. But in this case,
2667 2687           * conn_policy_cached can get set anytime since we made the
2668 2688           * CONN_INBOUND_POLICY_PRESENT() check and ipsec_check_policy() is
2669 2689           * called, which will make the above assumption false.  Thus, we
2670 2690           * need to insert after we set conn_policy_cached.
2671 2691           */
2672 2692          error = ipsec_conn_cache_policy(connp,
2673 2693              connp->conn_ipversion == IPV4_VERSION);
2674 2694          if (error != 0)
2675 2695                  return (error);
2676 2696  
2677 2697          /*
2678 2698           * We defer to do LSO check until here since now we have better idea
2679 2699           * whether IPsec is present. If the underlying ill is LSO capable,
2680 2700           * copy its capability in so the ULP can decide whether to enable LSO
2681 2701           * on this connection. So far, only TCP/IPv4 is implemented, so won't
2682 2702           * claim LSO for IPv6.
2683 2703           *
2684 2704           * Currently, won't enable LSO for IRE_LOOPBACK or IRE_LOCAL, because
2685 2705           * the receiver can not handle it. Also not to enable LSO for MULTIRT.
2686 2706           */
2687 2707          ixa->ixa_flags &= ~IXAF_LSO_CAPAB;
2688 2708  
2689 2709          ASSERT(ixa->ixa_ire != NULL);
2690 2710          if (ixa->ixa_ipst->ips_ip_lso_outbound && (flags & IPDF_LSO) &&
2691 2711              !(ixa->ixa_flags & IXAF_IPSEC_SECURE) &&
2692 2712              !(ixa->ixa_ire->ire_type & (IRE_LOCAL | IRE_LOOPBACK)) &&
2693 2713              !(ixa->ixa_ire->ire_flags & RTF_MULTIRT) &&
2694 2714              (ixa->ixa_nce != NULL) &&
2695 2715              ((ixa->ixa_flags & IXAF_IS_IPV4) ?
2696 2716              ILL_LSO_TCP_IPV4_USABLE(ixa->ixa_nce->nce_ill) :
2697 2717              ILL_LSO_TCP_IPV6_USABLE(ixa->ixa_nce->nce_ill))) {
2698 2718                  ixa->ixa_lso_capab = *ixa->ixa_nce->nce_ill->ill_lso_capab;
2699 2719                  ixa->ixa_flags |= IXAF_LSO_CAPAB;
2700 2720          }
2701 2721  
2702 2722          /* Check whether ZEROCOPY capability is usable for this connection. */
2703 2723          ixa->ixa_flags &= ~IXAF_ZCOPY_CAPAB;
2704 2724  
2705 2725          if ((flags & IPDF_ZCOPY) &&
2706 2726              !(ixa->ixa_flags & IXAF_IPSEC_SECURE) &&
2707 2727              !(ixa->ixa_ire->ire_type & (IRE_LOCAL | IRE_LOOPBACK)) &&
2708 2728              !(ixa->ixa_ire->ire_flags & RTF_MULTIRT) &&
2709 2729              (ixa->ixa_nce != NULL) &&
2710 2730              ILL_ZCOPY_USABLE(ixa->ixa_nce->nce_ill)) {
2711 2731                  ixa->ixa_flags |= IXAF_ZCOPY_CAPAB;
2712 2732          }
2713 2733          return (0);
2714 2734  }
2715 2735  
2716 2736  /*
2717 2737   * Predicates to check if the addresses match conn_last*
2718 2738   */
2719 2739  
2720 2740  /*
2721 2741   * Compare the conn against an address.
2722 2742   * If using mapped addresses on AF_INET6 sockets, use the _v6 function
2723 2743   */
2724 2744  boolean_t
2725 2745  conn_same_as_last_v4(conn_t *connp, sin_t *sin)
2726 2746  {
2727 2747          ASSERT(connp->conn_family == AF_INET);
2728 2748          return (sin->sin_addr.s_addr == connp->conn_v4lastdst &&
2729 2749              sin->sin_port == connp->conn_lastdstport);
2730 2750  }
2731 2751  
2732 2752  /*
2733 2753   * Compare, including for mapped addresses
2734 2754   */
2735 2755  boolean_t
2736 2756  conn_same_as_last_v6(conn_t *connp, sin6_t *sin6)
2737 2757  {
2738 2758          return (IN6_ARE_ADDR_EQUAL(&connp->conn_v6lastdst, &sin6->sin6_addr) &&
2739 2759              sin6->sin6_port == connp->conn_lastdstport &&
2740 2760              sin6->sin6_flowinfo == connp->conn_lastflowinfo &&
2741 2761              sin6->sin6_scope_id == connp->conn_lastscopeid);
2742 2762  }
2743 2763  
2744 2764  /*
2745 2765   * Compute a label and place it in the ip_packet_t.
2746 2766   * Handles IPv4 and IPv6.
2747 2767   * The caller should have a correct ixa_tsl and ixa_zoneid and have
2748 2768   * already called conn_connect or ip_attr_connect to ensure that tsol_check_dest
2749 2769   * has been called.
2750 2770   */
2751 2771  int
2752 2772  conn_update_label(const conn_t *connp, const ip_xmit_attr_t *ixa,
2753 2773      const in6_addr_t *v6dst, ip_pkt_t *ipp)
2754 2774  {
2755 2775          int             err;
2756 2776          ipaddr_t        v4dst;
2757 2777  
2758 2778          if (IN6_IS_ADDR_V4MAPPED(v6dst)) {
2759 2779                  uchar_t         opt_storage[IP_MAX_OPT_LENGTH];
2760 2780  
2761 2781                  IN6_V4MAPPED_TO_IPADDR(v6dst, v4dst);
2762 2782  
2763 2783                  err = tsol_compute_label_v4(ixa->ixa_tsl, ixa->ixa_zoneid,
2764 2784                      v4dst, opt_storage, ixa->ixa_ipst);
2765 2785                  if (err == 0) {
2766 2786                          /* Length contained in opt_storage[IPOPT_OLEN] */
2767 2787                          err = optcom_pkt_set(opt_storage,
2768 2788                              opt_storage[IPOPT_OLEN],
2769 2789                              (uchar_t **)&ipp->ipp_label_v4,
2770 2790                              &ipp->ipp_label_len_v4);
2771 2791                  }
2772 2792                  if (err != 0) {
2773 2793                          DTRACE_PROBE4(tx__ip__log__info__updatelabel,
2774 2794                              char *, "conn(1) failed to update options(2) "
2775 2795                              "on ixa(3)",
2776 2796                              conn_t *, connp, char *, opt_storage,
2777 2797                              ip_xmit_attr_t *, ixa);
2778 2798                  }
2779 2799                  if (ipp->ipp_label_len_v4 != 0)
2780 2800                          ipp->ipp_fields |= IPPF_LABEL_V4;
2781 2801                  else
2782 2802                          ipp->ipp_fields &= ~IPPF_LABEL_V4;
2783 2803          } else {
2784 2804                  uchar_t         opt_storage[TSOL_MAX_IPV6_OPTION];
2785 2805                  uint_t          optlen;
2786 2806  
2787 2807                  err = tsol_compute_label_v6(ixa->ixa_tsl, ixa->ixa_zoneid,
2788 2808                      v6dst, opt_storage, ixa->ixa_ipst);
2789 2809                  if (err == 0) {
2790 2810                          /*
2791 2811                           * Note that ipp_label_v6 is just the option - not
2792 2812                           * the hopopts extension header.
2793 2813                           *
2794 2814                           * Length contained in opt_storage[IPOPT_OLEN], but
2795 2815                           * that doesn't include the two byte options header.
2796 2816                           */
2797 2817                          optlen = opt_storage[IPOPT_OLEN];
2798 2818                          if (optlen != 0)
2799 2819                                  optlen += 2;
2800 2820  
2801 2821                          err = optcom_pkt_set(opt_storage, optlen,
2802 2822                              (uchar_t **)&ipp->ipp_label_v6,
2803 2823                              &ipp->ipp_label_len_v6);
2804 2824                  }
2805 2825                  if (err != 0) {
2806 2826                          DTRACE_PROBE4(tx__ip__log__info__updatelabel,
2807 2827                              char *, "conn(1) failed to update options(2) "
2808 2828                              "on ixa(3)",
2809 2829                              conn_t *, connp, char *, opt_storage,
2810 2830                              ip_xmit_attr_t *, ixa);
2811 2831                  }
2812 2832                  if (ipp->ipp_label_len_v6 != 0)
2813 2833                          ipp->ipp_fields |= IPPF_LABEL_V6;
2814 2834                  else
2815 2835                          ipp->ipp_fields &= ~IPPF_LABEL_V6;
2816 2836          }
2817 2837          return (err);
2818 2838  }
2819 2839  
2820 2840  /*
2821 2841   * Inherit all options settings from the parent/listener to the eager.
2822 2842   * Returns zero on success; ENOMEM if memory allocation failed.
2823 2843   *
2824 2844   * We assume that the eager has not had any work done i.e., the conn_ixa
2825 2845   * and conn_xmit_ipp are all zero.
2826 2846   * Furthermore we assume that no other thread can access the eager (because
2827 2847   * it isn't inserted in any fanout list).
2828 2848   */
2829 2849  int
2830 2850  conn_inherit_parent(conn_t *lconnp, conn_t *econnp)
2831 2851  {
2832 2852          cred_t  *credp;
2833 2853          int     err;
2834 2854          void    *notify_cookie;
2835 2855          uint32_t xmit_hint;
2836 2856  
2837 2857          econnp->conn_family = lconnp->conn_family;
2838 2858          econnp->conn_ipv6_v6only = lconnp->conn_ipv6_v6only;
2839 2859          econnp->conn_wq = lconnp->conn_wq;
2840 2860          econnp->conn_rq = lconnp->conn_rq;
2841 2861  
2842 2862          /*
2843 2863           * Make a safe copy of the transmit attributes.
2844 2864           * conn_connect will later be used by the caller to setup the ire etc.
2845 2865           */
2846 2866          ASSERT(econnp->conn_ixa->ixa_refcnt == 1);
2847 2867          ASSERT(econnp->conn_ixa->ixa_ire == NULL);
2848 2868          ASSERT(econnp->conn_ixa->ixa_dce == NULL);
2849 2869          ASSERT(econnp->conn_ixa->ixa_nce == NULL);
2850 2870  
2851 2871          /* Preserve ixa_notify_cookie and xmit_hint */
2852 2872          notify_cookie = econnp->conn_ixa->ixa_notify_cookie;
2853 2873          xmit_hint = econnp->conn_ixa->ixa_xmit_hint;
2854 2874          ixa_safe_copy(lconnp->conn_ixa, econnp->conn_ixa);
2855 2875          econnp->conn_ixa->ixa_notify_cookie = notify_cookie;
2856 2876          econnp->conn_ixa->ixa_xmit_hint = xmit_hint;
2857 2877  
2858 2878          econnp->conn_bound_if = lconnp->conn_bound_if;
2859 2879          econnp->conn_incoming_ifindex = lconnp->conn_incoming_ifindex;
2860 2880  
2861 2881          /* Inherit all RECV options */
2862 2882          econnp->conn_recv_ancillary = lconnp->conn_recv_ancillary;
2863 2883  
2864 2884          err = ip_pkt_copy(&lconnp->conn_xmit_ipp, &econnp->conn_xmit_ipp,
2865 2885              KM_NOSLEEP);
2866 2886          if (err != 0)
2867 2887                  return (err);
2868 2888  
2869 2889          econnp->conn_zoneid = lconnp->conn_zoneid;
2870 2890          econnp->conn_allzones = lconnp->conn_allzones;
2871 2891  
2872 2892          /* This is odd. Pick a flowlabel for each connection instead? */
2873 2893          econnp->conn_flowinfo = lconnp->conn_flowinfo;
2874 2894  
2875 2895          econnp->conn_default_ttl = lconnp->conn_default_ttl;
2876 2896  
2877 2897          /*
2878 2898           * TSOL: tsol_input_proc() needs the eager's cred before the
2879 2899           * eager is accepted
2880 2900           */
2881 2901          ASSERT(lconnp->conn_cred != NULL);
2882 2902          econnp->conn_cred = credp = lconnp->conn_cred;
2883 2903          crhold(credp);
2884 2904          econnp->conn_cpid = lconnp->conn_cpid;
2885 2905          econnp->conn_open_time = ddi_get_lbolt64();
2886 2906  
2887 2907          /*
2888 2908           * Cache things in the ixa without any refhold.
2889 2909           * Listener might not have set up ixa_cred
2890 2910           */
2891 2911          ASSERT(!(econnp->conn_ixa->ixa_free_flags & IXA_FREE_CRED));
2892 2912          econnp->conn_ixa->ixa_cred = econnp->conn_cred;
2893 2913          econnp->conn_ixa->ixa_cpid = econnp->conn_cpid;
2894 2914          if (is_system_labeled())
2895 2915                  econnp->conn_ixa->ixa_tsl = crgetlabel(econnp->conn_cred);
2896 2916  
2897 2917          /*
2898 2918           * If the caller has the process-wide flag set, then default to MAC
2899 2919           * exempt mode.  This allows read-down to unlabeled hosts.
2900 2920           */
2901 2921          if (getpflags(NET_MAC_AWARE, credp) != 0)
2902 2922                  econnp->conn_mac_mode = CONN_MAC_AWARE;
2903 2923  
2904 2924          econnp->conn_zone_is_global = lconnp->conn_zone_is_global;
2905 2925  
2906 2926          /*
2907 2927           * We eliminate the need for sockfs to send down a T_SVR4_OPTMGMT_REQ
2908 2928           * via soaccept()->soinheritoptions() which essentially applies
2909 2929           * all the listener options to the new connection. The options that we
2910 2930           * need to take care of are:
2911 2931           * SO_DEBUG, SO_REUSEADDR, SO_KEEPALIVE, SO_DONTROUTE, SO_BROADCAST,
2912 2932           * SO_USELOOPBACK, SO_OOBINLINE, SO_DGRAM_ERRIND, SO_LINGER,
2913 2933           * SO_SNDBUF, SO_RCVBUF.
2914 2934           *
2915 2935           * SO_RCVBUF:   conn_rcvbuf is set.
2916 2936           * SO_SNDBUF:   conn_sndbuf is set.
2917 2937           */
2918 2938  
2919 2939          /* Could we define a struct and use a struct copy for this? */
2920 2940          econnp->conn_sndbuf = lconnp->conn_sndbuf;
2921 2941          econnp->conn_rcvbuf = lconnp->conn_rcvbuf;
2922 2942          econnp->conn_sndlowat = lconnp->conn_sndlowat;
2923 2943          econnp->conn_rcvlowat = lconnp->conn_rcvlowat;
2924 2944          econnp->conn_dgram_errind = lconnp->conn_dgram_errind;
2925 2945          econnp->conn_oobinline = lconnp->conn_oobinline;
2926 2946          econnp->conn_debug = lconnp->conn_debug;
2927 2947          econnp->conn_keepalive = lconnp->conn_keepalive;
2928 2948          econnp->conn_linger = lconnp->conn_linger;
2929 2949          econnp->conn_lingertime = lconnp->conn_lingertime;
2930 2950  
2931 2951          /* Set the IP options */
2932 2952          econnp->conn_broadcast = lconnp->conn_broadcast;
2933 2953          econnp->conn_useloopback = lconnp->conn_useloopback;
2934 2954          econnp->conn_reuseaddr = lconnp->conn_reuseaddr;
2935 2955          return (0);
2936 2956  }
  
    | 
      ↓ open down ↓ | 
    1736 lines elided | 
    
      ↑ open up ↑ | 
  
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX