1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
  24  * Copyright 2016 Joyent, Inc.
  25  */
  26 /* Copyright (c) 1990 Mentat Inc. */
  27 
  28 #include <sys/types.h>
  29 #include <sys/stream.h>
  30 #include <sys/strsun.h>
  31 #define _SUN_TPI_VERSION 2
  32 #include <sys/tihdr.h>
  33 #include <sys/xti_inet.h>
  34 #include <sys/ucred.h>
  35 #include <sys/zone.h>
  36 #include <sys/ddi.h>
  37 #include <sys/sunddi.h>
  38 #include <sys/cmn_err.h>
  39 #include <sys/debug.h>
  40 #include <sys/atomic.h>
  41 #include <sys/policy.h>
  42 
  43 #include <sys/systm.h>
  44 #include <sys/param.h>
  45 #include <sys/kmem.h>
  46 #include <sys/sdt.h>
  47 #include <sys/socket.h>
  48 #include <sys/ethernet.h>
  49 #include <sys/mac.h>
  50 #include <net/if.h>
  51 #include <net/if_types.h>
  52 #include <net/if_arp.h>
  53 #include <net/route.h>
  54 #include <sys/sockio.h>
  55 #include <netinet/in.h>
  56 #include <net/if_dl.h>
  57 
  58 #include <inet/common.h>
  59 #include <inet/mi.h>
  60 #include <inet/mib2.h>
  61 #include <inet/nd.h>
  62 #include <inet/arp.h>
  63 #include <inet/snmpcom.h>
  64 #include <inet/kstatcom.h>
  65 
  66 #include <netinet/igmp_var.h>
  67 #include <netinet/ip6.h>
  68 #include <netinet/icmp6.h>
  69 #include <netinet/sctp.h>
  70 
  71 #include <inet/ip.h>
  72 #include <inet/ip_impl.h>
  73 #include <inet/ip6.h>
  74 #include <inet/ip6_asp.h>
  75 #include <inet/tcp.h>
  76 #include <inet/ip_multi.h>
  77 #include <inet/ip_if.h>
  78 #include <inet/ip_ire.h>
  79 #include <inet/ip_ftable.h>
  80 #include <inet/ip_rts.h>
  81 #include <inet/optcom.h>
  82 #include <inet/ip_ndp.h>
  83 #include <inet/ip_listutils.h>
  84 #include <netinet/igmp.h>
  85 #include <netinet/ip_mroute.h>
  86 #include <netinet/udp.h>
  87 #include <inet/ipp_common.h>
  88 
  89 #include <net/pfkeyv2.h>
  90 #include <inet/sadb.h>
  91 #include <inet/ipsec_impl.h>
  92 #include <inet/ipdrop.h>
  93 #include <inet/ip_netinfo.h>
  94 
  95 #include <inet/ipclassifier.h>
  96 #include <inet/sctp_ip.h>
  97 #include <inet/sctp/sctp_impl.h>
  98 #include <inet/udp_impl.h>
  99 #include <sys/sunddi.h>
 100 
 101 #include <sys/tsol/label.h>
 102 #include <sys/tsol/tnet.h>
 103 
 104 /*
 105  * Return how much size is needed for the different ancillary data items
 106  */
 107 uint_t
 108 conn_recvancillary_size(conn_t *connp, crb_t recv_ancillary,
 109     ip_recv_attr_t *ira, mblk_t *mp, ip_pkt_t *ipp)
 110 {
 111         uint_t          ancil_size;
 112         ip_stack_t      *ipst = connp->conn_netstack->netstack_ip;
 113 
 114         /*
 115          * If IP_RECVDSTADDR is set we include the destination IP
 116          * address as an option. With IP_RECVOPTS we include all
 117          * the IP options.
 118          */
 119         ancil_size = 0;
 120         if (recv_ancillary.crb_recvdstaddr &&
 121             (ira->ira_flags & IRAF_IS_IPV4)) {
 122                 ancil_size += sizeof (struct T_opthdr) +
 123                     sizeof (struct in_addr);
 124                 IP_STAT(ipst, conn_in_recvdstaddr);
 125         }
 126 
 127         /*
 128          * ip_recvpktinfo is used for both AF_INET and AF_INET6 but
 129          * are different
 130          */
 131         if (recv_ancillary.crb_ip_recvpktinfo &&
 132             connp->conn_family == AF_INET) {
 133                 ancil_size += sizeof (struct T_opthdr) +
 134                     sizeof (struct in_pktinfo);
 135                 IP_STAT(ipst, conn_in_recvpktinfo);
 136         }
 137 
 138         if ((recv_ancillary.crb_recvopts) &&
 139             (ipp->ipp_fields & IPPF_IPV4_OPTIONS)) {
 140                 ancil_size += sizeof (struct T_opthdr) +
 141                     ipp->ipp_ipv4_options_len;
 142                 IP_STAT(ipst, conn_in_recvopts);
 143         }
 144 
 145         if (recv_ancillary.crb_recvslla) {
 146                 ip_stack_t *ipst = connp->conn_netstack->netstack_ip;
 147                 ill_t *ill;
 148 
 149                 /* Make sure ira_l2src is setup if not already */
 150                 if (!(ira->ira_flags & IRAF_L2SRC_SET)) {
 151                         ill = ill_lookup_on_ifindex(ira->ira_rifindex, B_FALSE,
 152                             ipst);
 153                         if (ill != NULL) {
 154                                 ip_setl2src(mp, ira, ill);
 155                                 ill_refrele(ill);
 156                         }
 157                 }
 158                 ancil_size += sizeof (struct T_opthdr) +
 159                     sizeof (struct sockaddr_dl);
 160                 IP_STAT(ipst, conn_in_recvslla);
 161         }
 162 
 163         if (recv_ancillary.crb_recvif) {
 164                 ancil_size += sizeof (struct T_opthdr) + sizeof (uint_t);
 165                 IP_STAT(ipst, conn_in_recvif);
 166         }
 167 
 168         /*
 169          * ip_recvpktinfo is used for both AF_INET and AF_INET6 but
 170          * are different
 171          */
 172         if (recv_ancillary.crb_ip_recvpktinfo &&
 173             connp->conn_family == AF_INET6) {
 174                 ancil_size += sizeof (struct T_opthdr) +
 175                     sizeof (struct in6_pktinfo);
 176                 IP_STAT(ipst, conn_in_recvpktinfo);
 177         }
 178 
 179         if (recv_ancillary.crb_ipv6_recvhoplimit) {
 180                 ancil_size += sizeof (struct T_opthdr) + sizeof (int);
 181                 IP_STAT(ipst, conn_in_recvhoplimit);
 182         }
 183 
 184         if (recv_ancillary.crb_ipv6_recvtclass) {
 185                 ancil_size += sizeof (struct T_opthdr) + sizeof (int);
 186                 IP_STAT(ipst, conn_in_recvtclass);
 187         }
 188 
 189         if (recv_ancillary.crb_ipv6_recvhopopts &&
 190             (ipp->ipp_fields & IPPF_HOPOPTS)) {
 191                 ancil_size += sizeof (struct T_opthdr) + ipp->ipp_hopoptslen;
 192                 IP_STAT(ipst, conn_in_recvhopopts);
 193         }
 194         /*
 195          * To honor RFC3542 when an application asks for both IPV6_RECVDSTOPTS
 196          * and IPV6_RECVRTHDR, we pass up the item rthdrdstopts (the destination
 197          * options that appear before a routing header.
 198          * We also pass them up if IPV6_RECVRTHDRDSTOPTS is set.
 199          */
 200         if (ipp->ipp_fields & IPPF_RTHDRDSTOPTS) {
 201                 if (recv_ancillary.crb_ipv6_recvrthdrdstopts ||
 202                     (recv_ancillary.crb_ipv6_recvdstopts &&
 203                     recv_ancillary.crb_ipv6_recvrthdr)) {
 204                         ancil_size += sizeof (struct T_opthdr) +
 205                             ipp->ipp_rthdrdstoptslen;
 206                         IP_STAT(ipst, conn_in_recvrthdrdstopts);
 207                 }
 208         }
 209         if ((recv_ancillary.crb_ipv6_recvrthdr) &&
 210             (ipp->ipp_fields & IPPF_RTHDR)) {
 211                 ancil_size += sizeof (struct T_opthdr) + ipp->ipp_rthdrlen;
 212                 IP_STAT(ipst, conn_in_recvrthdr);
 213         }
 214         if ((recv_ancillary.crb_ipv6_recvdstopts ||
 215             recv_ancillary.crb_old_ipv6_recvdstopts) &&
 216             (ipp->ipp_fields & IPPF_DSTOPTS)) {
 217                 ancil_size += sizeof (struct T_opthdr) + ipp->ipp_dstoptslen;
 218                 IP_STAT(ipst, conn_in_recvdstopts);
 219         }
 220         if (recv_ancillary.crb_recvucred && ira->ira_cred != NULL) {
 221                 ancil_size += sizeof (struct T_opthdr) +
 222                     ucredminsize(ira->ira_cred);
 223                 IP_STAT(ipst, conn_in_recvucred);
 224         }
 225 
 226         /*
 227          * If SO_TIMESTAMP is set allocate the appropriate sized
 228          * buffer. Since gethrestime() expects a pointer aligned
 229          * argument, we allocate space necessary for extra
 230          * alignment (even though it might not be used).
 231          */
 232         if (recv_ancillary.crb_timestamp) {
 233                 ancil_size += sizeof (struct T_opthdr) +
 234                     sizeof (timestruc_t) + _POINTER_ALIGNMENT;
 235                 IP_STAT(ipst, conn_in_timestamp);
 236         }
 237 
 238         /*
 239          * If IP_RECVTTL is set allocate the appropriate sized buffer
 240          */
 241         if (recv_ancillary.crb_recvttl &&
 242             (ira->ira_flags & IRAF_IS_IPV4)) {
 243                 ancil_size += sizeof (struct T_opthdr) + sizeof (uint8_t);
 244                 IP_STAT(ipst, conn_in_recvttl);
 245         }
 246 
 247         return (ancil_size);
 248 }
 249 
 250 /*
 251  * Lay down the ancillary data items at "ancil_buf".
 252  * Assumes caller has used conn_recvancillary_size to allocate a sufficiently
 253  * large buffer - ancil_size.
 254  */
 255 void
 256 conn_recvancillary_add(conn_t *connp, crb_t recv_ancillary,
 257     ip_recv_attr_t *ira, ip_pkt_t *ipp, uchar_t *ancil_buf, uint_t ancil_size)
 258 {
 259         /*
 260          * Copy in destination address before options to avoid
 261          * any padding issues.
 262          */
 263         if (recv_ancillary.crb_recvdstaddr &&
 264             (ira->ira_flags & IRAF_IS_IPV4)) {
 265                 struct T_opthdr *toh;
 266                 ipaddr_t *dstptr;
 267 
 268                 toh = (struct T_opthdr *)ancil_buf;
 269                 toh->level = IPPROTO_IP;
 270                 toh->name = IP_RECVDSTADDR;
 271                 toh->len = sizeof (struct T_opthdr) + sizeof (ipaddr_t);
 272                 toh->status = 0;
 273                 ancil_buf += sizeof (struct T_opthdr);
 274                 dstptr = (ipaddr_t *)ancil_buf;
 275                 *dstptr = ipp->ipp_addr_v4;
 276                 ancil_buf += sizeof (ipaddr_t);
 277                 ancil_size -= toh->len;
 278         }
 279 
 280         /*
 281          * ip_recvpktinfo is used for both AF_INET and AF_INET6 but
 282          * are different
 283          */
 284         if (recv_ancillary.crb_ip_recvpktinfo &&
 285             connp->conn_family == AF_INET) {
 286                 ip_stack_t *ipst = connp->conn_netstack->netstack_ip;
 287                 struct T_opthdr *toh;
 288                 struct in_pktinfo *pktinfop;
 289                 ill_t *ill;
 290                 ipif_t *ipif;
 291 
 292                 toh = (struct T_opthdr *)ancil_buf;
 293                 toh->level = IPPROTO_IP;
 294                 toh->name = IP_PKTINFO;
 295                 toh->len = sizeof (struct T_opthdr) + sizeof (*pktinfop);
 296                 toh->status = 0;
 297                 ancil_buf += sizeof (struct T_opthdr);
 298                 pktinfop = (struct in_pktinfo *)ancil_buf;
 299 
 300                 pktinfop->ipi_ifindex = ira->ira_ruifindex;
 301                 pktinfop->ipi_spec_dst.s_addr = INADDR_ANY;
 302 
 303                 /* Find a good address to report */
 304                 ill = ill_lookup_on_ifindex(ira->ira_ruifindex, B_FALSE, ipst);
 305                 if (ill != NULL) {
 306                         ipif = ipif_good_addr(ill, IPCL_ZONEID(connp));
 307                         if (ipif != NULL) {
 308                                 pktinfop->ipi_spec_dst.s_addr =
 309                                     ipif->ipif_lcl_addr;
 310                                 ipif_refrele(ipif);
 311                         }
 312                         ill_refrele(ill);
 313                 }
 314                 pktinfop->ipi_addr.s_addr = ipp->ipp_addr_v4;
 315                 ancil_buf += sizeof (struct in_pktinfo);
 316                 ancil_size -= toh->len;
 317         }
 318 
 319         if ((recv_ancillary.crb_recvopts) &&
 320             (ipp->ipp_fields & IPPF_IPV4_OPTIONS)) {
 321                 struct T_opthdr *toh;
 322 
 323                 toh = (struct T_opthdr *)ancil_buf;
 324                 toh->level = IPPROTO_IP;
 325                 toh->name = IP_RECVOPTS;
 326                 toh->len = sizeof (struct T_opthdr) + ipp->ipp_ipv4_options_len;
 327                 toh->status = 0;
 328                 ancil_buf += sizeof (struct T_opthdr);
 329                 bcopy(ipp->ipp_ipv4_options, ancil_buf,
 330                     ipp->ipp_ipv4_options_len);
 331                 ancil_buf += ipp->ipp_ipv4_options_len;
 332                 ancil_size -= toh->len;
 333         }
 334 
 335         if (recv_ancillary.crb_recvslla) {
 336                 ip_stack_t *ipst = connp->conn_netstack->netstack_ip;
 337                 struct T_opthdr *toh;
 338                 struct sockaddr_dl *dstptr;
 339                 ill_t *ill;
 340                 int alen = 0;
 341 
 342                 ill = ill_lookup_on_ifindex(ira->ira_rifindex, B_FALSE, ipst);
 343                 if (ill != NULL)
 344                         alen = ill->ill_phys_addr_length;
 345 
 346                 /*
 347                  * For loopback multicast and broadcast the packet arrives
 348                  * with ira_ruifdex being the physical interface, but
 349                  * ira_l2src is all zero since ip_postfrag_loopback doesn't
 350                  * know our l2src. We don't report the address in that case.
 351                  */
 352                 if (ira->ira_flags & IRAF_LOOPBACK)
 353                         alen = 0;
 354 
 355                 toh = (struct T_opthdr *)ancil_buf;
 356                 toh->level = IPPROTO_IP;
 357                 toh->name = IP_RECVSLLA;
 358                 toh->len = sizeof (struct T_opthdr) +
 359                     sizeof (struct sockaddr_dl);
 360                 toh->status = 0;
 361                 ancil_buf += sizeof (struct T_opthdr);
 362                 dstptr = (struct sockaddr_dl *)ancil_buf;
 363                 dstptr->sdl_family = AF_LINK;
 364                 dstptr->sdl_index = ira->ira_ruifindex;
 365                 if (ill != NULL)
 366                         dstptr->sdl_type = ill->ill_type;
 367                 else
 368                         dstptr->sdl_type = 0;
 369                 dstptr->sdl_nlen = 0;
 370                 dstptr->sdl_alen = alen;
 371                 dstptr->sdl_slen = 0;
 372                 bcopy(ira->ira_l2src, dstptr->sdl_data, alen);
 373                 ancil_buf += sizeof (struct sockaddr_dl);
 374                 ancil_size -= toh->len;
 375                 if (ill != NULL)
 376                         ill_refrele(ill);
 377         }
 378 
 379         if (recv_ancillary.crb_recvif) {
 380                 struct T_opthdr *toh;
 381                 uint_t          *dstptr;
 382 
 383                 toh = (struct T_opthdr *)ancil_buf;
 384                 toh->level = IPPROTO_IP;
 385                 toh->name = IP_RECVIF;
 386                 toh->len = sizeof (struct T_opthdr) + sizeof (uint_t);
 387                 toh->status = 0;
 388                 ancil_buf += sizeof (struct T_opthdr);
 389                 dstptr = (uint_t *)ancil_buf;
 390                 *dstptr = ira->ira_ruifindex;
 391                 ancil_buf += sizeof (uint_t);
 392                 ancil_size -= toh->len;
 393         }
 394 
 395         /*
 396          * ip_recvpktinfo is used for both AF_INET and AF_INET6 but
 397          * are different
 398          */
 399         if (recv_ancillary.crb_ip_recvpktinfo &&
 400             connp->conn_family == AF_INET6) {
 401                 struct T_opthdr *toh;
 402                 struct in6_pktinfo *pkti;
 403 
 404                 toh = (struct T_opthdr *)ancil_buf;
 405                 toh->level = IPPROTO_IPV6;
 406                 toh->name = IPV6_PKTINFO;
 407                 toh->len = sizeof (struct T_opthdr) + sizeof (*pkti);
 408                 toh->status = 0;
 409                 ancil_buf += sizeof (struct T_opthdr);
 410                 pkti = (struct in6_pktinfo *)ancil_buf;
 411                 if (ira->ira_flags & IRAF_IS_IPV4) {
 412                         IN6_IPADDR_TO_V4MAPPED(ipp->ipp_addr_v4,
 413                             &pkti->ipi6_addr);
 414                 } else {
 415                         pkti->ipi6_addr = ipp->ipp_addr;
 416                 }
 417                 pkti->ipi6_ifindex = ira->ira_ruifindex;
 418 
 419                 ancil_buf += sizeof (*pkti);
 420                 ancil_size -= toh->len;
 421         }
 422         if (recv_ancillary.crb_ipv6_recvhoplimit) {
 423                 struct T_opthdr *toh;
 424 
 425                 toh = (struct T_opthdr *)ancil_buf;
 426                 toh->level = IPPROTO_IPV6;
 427                 toh->name = IPV6_HOPLIMIT;
 428                 toh->len = sizeof (struct T_opthdr) + sizeof (uint_t);
 429                 toh->status = 0;
 430                 ancil_buf += sizeof (struct T_opthdr);
 431                 *(uint_t *)ancil_buf = ipp->ipp_hoplimit;
 432                 ancil_buf += sizeof (uint_t);
 433                 ancil_size -= toh->len;
 434         }
 435         if (recv_ancillary.crb_ipv6_recvtclass) {
 436                 struct T_opthdr *toh;
 437 
 438                 toh = (struct T_opthdr *)ancil_buf;
 439                 toh->level = IPPROTO_IPV6;
 440                 toh->name = IPV6_TCLASS;
 441                 toh->len = sizeof (struct T_opthdr) + sizeof (uint_t);
 442                 toh->status = 0;
 443                 ancil_buf += sizeof (struct T_opthdr);
 444 
 445                 if (ira->ira_flags & IRAF_IS_IPV4)
 446                         *(uint_t *)ancil_buf = ipp->ipp_type_of_service;
 447                 else
 448                         *(uint_t *)ancil_buf = ipp->ipp_tclass;
 449                 ancil_buf += sizeof (uint_t);
 450                 ancil_size -= toh->len;
 451         }
 452         if (recv_ancillary.crb_ipv6_recvhopopts &&
 453             (ipp->ipp_fields & IPPF_HOPOPTS)) {
 454                 struct T_opthdr *toh;
 455 
 456                 toh = (struct T_opthdr *)ancil_buf;
 457                 toh->level = IPPROTO_IPV6;
 458                 toh->name = IPV6_HOPOPTS;
 459                 toh->len = sizeof (struct T_opthdr) + ipp->ipp_hopoptslen;
 460                 toh->status = 0;
 461                 ancil_buf += sizeof (struct T_opthdr);
 462                 bcopy(ipp->ipp_hopopts, ancil_buf, ipp->ipp_hopoptslen);
 463                 ancil_buf += ipp->ipp_hopoptslen;
 464                 ancil_size -= toh->len;
 465         }
 466         /*
 467          * To honor RFC3542 when an application asks for both IPV6_RECVDSTOPTS
 468          * and IPV6_RECVRTHDR, we pass up the item rthdrdstopts (the destination
 469          * options that appear before a routing header.
 470          * We also pass them up if IPV6_RECVRTHDRDSTOPTS is set.
 471          */
 472         if (ipp->ipp_fields & IPPF_RTHDRDSTOPTS) {
 473                 if (recv_ancillary.crb_ipv6_recvrthdrdstopts ||
 474                     (recv_ancillary.crb_ipv6_recvdstopts &&
 475                     recv_ancillary.crb_ipv6_recvrthdr)) {
 476                         struct T_opthdr *toh;
 477 
 478                         toh = (struct T_opthdr *)ancil_buf;
 479                         toh->level = IPPROTO_IPV6;
 480                         toh->name = IPV6_DSTOPTS;
 481                         toh->len = sizeof (struct T_opthdr) +
 482                             ipp->ipp_rthdrdstoptslen;
 483                         toh->status = 0;
 484                         ancil_buf += sizeof (struct T_opthdr);
 485                         bcopy(ipp->ipp_rthdrdstopts, ancil_buf,
 486                             ipp->ipp_rthdrdstoptslen);
 487                         ancil_buf += ipp->ipp_rthdrdstoptslen;
 488                         ancil_size -= toh->len;
 489                 }
 490         }
 491         if (recv_ancillary.crb_ipv6_recvrthdr &&
 492             (ipp->ipp_fields & IPPF_RTHDR)) {
 493                 struct T_opthdr *toh;
 494 
 495                 toh = (struct T_opthdr *)ancil_buf;
 496                 toh->level = IPPROTO_IPV6;
 497                 toh->name = IPV6_RTHDR;
 498                 toh->len = sizeof (struct T_opthdr) + ipp->ipp_rthdrlen;
 499                 toh->status = 0;
 500                 ancil_buf += sizeof (struct T_opthdr);
 501                 bcopy(ipp->ipp_rthdr, ancil_buf, ipp->ipp_rthdrlen);
 502                 ancil_buf += ipp->ipp_rthdrlen;
 503                 ancil_size -= toh->len;
 504         }
 505         if ((recv_ancillary.crb_ipv6_recvdstopts ||
 506             recv_ancillary.crb_old_ipv6_recvdstopts) &&
 507             (ipp->ipp_fields & IPPF_DSTOPTS)) {
 508                 struct T_opthdr *toh;
 509 
 510                 toh = (struct T_opthdr *)ancil_buf;
 511                 toh->level = IPPROTO_IPV6;
 512                 toh->name = IPV6_DSTOPTS;
 513                 toh->len = sizeof (struct T_opthdr) + ipp->ipp_dstoptslen;
 514                 toh->status = 0;
 515                 ancil_buf += sizeof (struct T_opthdr);
 516                 bcopy(ipp->ipp_dstopts, ancil_buf, ipp->ipp_dstoptslen);
 517                 ancil_buf += ipp->ipp_dstoptslen;
 518                 ancil_size -= toh->len;
 519         }
 520 
 521         if (recv_ancillary.crb_recvucred && ira->ira_cred != NULL) {
 522                 struct T_opthdr *toh;
 523                 cred_t          *rcr = connp->conn_cred;
 524 
 525                 toh = (struct T_opthdr *)ancil_buf;
 526                 toh->level = SOL_SOCKET;
 527                 toh->name = SCM_UCRED;
 528                 toh->len = sizeof (struct T_opthdr) +
 529                     ucredminsize(ira->ira_cred);
 530                 toh->status = 0;
 531                 (void) cred2ucred(ira->ira_cred, ira->ira_cpid, &toh[1], rcr);
 532                 ancil_buf += toh->len;
 533                 ancil_size -= toh->len;
 534         }
 535         if (recv_ancillary.crb_timestamp) {
 536                 struct  T_opthdr *toh;
 537 
 538                 toh = (struct T_opthdr *)ancil_buf;
 539                 toh->level = SOL_SOCKET;
 540                 toh->name = SCM_TIMESTAMP;
 541                 toh->len = sizeof (struct T_opthdr) +
 542                     sizeof (timestruc_t) + _POINTER_ALIGNMENT;
 543                 toh->status = 0;
 544                 ancil_buf += sizeof (struct T_opthdr);
 545                 /* Align for gethrestime() */
 546                 ancil_buf = (uchar_t *)P2ROUNDUP((intptr_t)ancil_buf,
 547                     sizeof (intptr_t));
 548                 gethrestime((timestruc_t *)ancil_buf);
 549                 ancil_buf = (uchar_t *)toh + toh->len;
 550                 ancil_size -= toh->len;
 551         }
 552 
 553         /*
 554          * CAUTION:
 555          * Due to aligment issues
 556          * Processing of IP_RECVTTL option
 557          * should always be the last. Adding
 558          * any option processing after this will
 559          * cause alignment panic.
 560          */
 561         if (recv_ancillary.crb_recvttl &&
 562             (ira->ira_flags & IRAF_IS_IPV4)) {
 563                 struct  T_opthdr *toh;
 564                 uint8_t *dstptr;
 565 
 566                 toh = (struct T_opthdr *)ancil_buf;
 567                 toh->level = IPPROTO_IP;
 568                 toh->name = IP_RECVTTL;
 569                 toh->len = sizeof (struct T_opthdr) + sizeof (uint8_t);
 570                 toh->status = 0;
 571                 ancil_buf += sizeof (struct T_opthdr);
 572                 dstptr = (uint8_t *)ancil_buf;
 573                 *dstptr = ipp->ipp_hoplimit;
 574                 ancil_buf += sizeof (uint8_t);
 575                 ancil_size -= toh->len;
 576         }
 577 
 578         /* Consumed all of allocated space */
 579         ASSERT(ancil_size == 0);
 580 
 581 }
 582 
 583 /*
 584  * This routine retrieves the current status of socket options.
 585  * It returns the size of the option retrieved, or -1.
 586  */
 587 int
 588 conn_opt_get(conn_opt_arg_t *coa, t_scalar_t level, t_scalar_t name,
 589     uchar_t *ptr)
 590 {
 591         int             *i1 = (int *)ptr;
 592         conn_t          *connp = coa->coa_connp;
 593         ip_xmit_attr_t  *ixa = coa->coa_ixa;
 594         ip_pkt_t        *ipp = coa->coa_ipp;
 595         ip_stack_t      *ipst = ixa->ixa_ipst;
 596         uint_t          len;
 597 
 598         ASSERT(MUTEX_HELD(&coa->coa_connp->conn_lock));
 599 
 600         switch (level) {
 601         case SOL_SOCKET:
 602                 switch (name) {
 603                 case SO_DEBUG:
 604                         *i1 = connp->conn_debug ? SO_DEBUG : 0;
 605                         break;  /* goto sizeof (int) option return */
 606                 case SO_KEEPALIVE:
 607                         *i1 = connp->conn_keepalive ? SO_KEEPALIVE : 0;
 608                         break;
 609                 case SO_LINGER: {
 610                         struct linger *lgr = (struct linger *)ptr;
 611 
 612                         lgr->l_onoff = connp->conn_linger ? SO_LINGER : 0;
 613                         lgr->l_linger = connp->conn_lingertime;
 614                         }
 615                         return (sizeof (struct linger));
 616 
 617                 case SO_OOBINLINE:
 618                         *i1 = connp->conn_oobinline ? SO_OOBINLINE : 0;
 619                         break;
 620                 case SO_REUSEADDR:
 621                         *i1 = connp->conn_reuseaddr ? SO_REUSEADDR : 0;
 622                         break;  /* goto sizeof (int) option return */
 623                 case SO_REUSEPORT:
 624                         *i1 = connp->conn_reuseport;
 625                         break;  /* goto sizeof (int) option return */
 626                 case SO_TYPE:
 627                         *i1 = connp->conn_so_type;
 628                         break;  /* goto sizeof (int) option return */
 629                 case SO_DONTROUTE:
 630                         *i1 = (ixa->ixa_flags & IXAF_DONTROUTE) ?
 631                             SO_DONTROUTE : 0;
 632                         break;  /* goto sizeof (int) option return */
 633                 case SO_USELOOPBACK:
 634                         *i1 = connp->conn_useloopback ? SO_USELOOPBACK : 0;
 635                         break;  /* goto sizeof (int) option return */
 636                 case SO_BROADCAST:
 637                         *i1 = connp->conn_broadcast ? SO_BROADCAST : 0;
 638                         break;  /* goto sizeof (int) option return */
 639 
 640                 case SO_SNDBUF:
 641                         *i1 = connp->conn_sndbuf;
 642                         break;  /* goto sizeof (int) option return */
 643                 case SO_RCVBUF:
 644                         *i1 = connp->conn_rcvbuf;
 645                         break;  /* goto sizeof (int) option return */
 646                 case SO_RCVTIMEO:
 647                 case SO_SNDTIMEO:
 648                         /*
 649                          * Pass these two options in order for third part
 650                          * protocol usage. Here just return directly.
 651                          */
 652                         *i1 = 0;
 653                         break;
 654                 case SO_DGRAM_ERRIND:
 655                         *i1 = connp->conn_dgram_errind ? SO_DGRAM_ERRIND : 0;
 656                         break;  /* goto sizeof (int) option return */
 657                 case SO_RECVUCRED:
 658                         *i1 = connp->conn_recv_ancillary.crb_recvucred;
 659                         break;  /* goto sizeof (int) option return */
 660                 case SO_TIMESTAMP:
 661                         *i1 = connp->conn_recv_ancillary.crb_timestamp;
 662                         break;  /* goto sizeof (int) option return */
 663                 case SO_VRRP:
 664                         *i1 = connp->conn_isvrrp;
 665                         break;  /* goto sizeof (int) option return */
 666                 case SO_ANON_MLP:
 667                         *i1 = connp->conn_anon_mlp;
 668                         break;  /* goto sizeof (int) option return */
 669                 case SO_MAC_EXEMPT:
 670                         *i1 = (connp->conn_mac_mode == CONN_MAC_AWARE);
 671                         break;  /* goto sizeof (int) option return */
 672                 case SO_MAC_IMPLICIT:
 673                         *i1 = (connp->conn_mac_mode == CONN_MAC_IMPLICIT);
 674                         break;  /* goto sizeof (int) option return */
 675                 case SO_ALLZONES:
 676                         *i1 = connp->conn_allzones;
 677                         break;  /* goto sizeof (int) option return */
 678                 case SO_EXCLBIND:
 679                         *i1 = connp->conn_exclbind ? SO_EXCLBIND : 0;
 680                         break;
 681                 case SO_PROTOTYPE:
 682                         *i1 = connp->conn_proto;
 683                         break;
 684 
 685                 case SO_DOMAIN:
 686                         *i1 = connp->conn_family;
 687                         break;
 688                 default:
 689                         return (-1);
 690                 }
 691                 break;
 692         case IPPROTO_IP:
 693                 if (connp->conn_family != AF_INET)
 694                         return (-1);
 695                 switch (name) {
 696                 case IP_OPTIONS:
 697                 case T_IP_OPTIONS:
 698                         if (!(ipp->ipp_fields & IPPF_IPV4_OPTIONS))
 699                                 return (0);
 700 
 701                         len = ipp->ipp_ipv4_options_len;
 702                         if (len > 0) {
 703                                 bcopy(ipp->ipp_ipv4_options, ptr, len);
 704                         }
 705                         return (len);
 706 
 707                 case IP_PKTINFO: {
 708                         /*
 709                          * This also handles IP_RECVPKTINFO.
 710                          * IP_PKTINFO and IP_RECVPKTINFO have same value.
 711                          * Differentiation is based on the size of the
 712                          * argument passed in.
 713                          */
 714                         struct in_pktinfo *pktinfo;
 715 
 716 #ifdef notdef
 717                         /* optcom doesn't provide a length with "get" */
 718                         if (inlen == sizeof (int)) {
 719                                 /* This is IP_RECVPKTINFO option. */
 720                                 *i1 = connp->conn_recv_ancillary.
 721                                     crb_ip_recvpktinfo;
 722                                 return (sizeof (int));
 723                         }
 724 #endif
 725                         /* XXX assumes that caller has room for max size! */
 726 
 727                         pktinfo = (struct in_pktinfo *)ptr;
 728                         pktinfo->ipi_ifindex = ixa->ixa_ifindex;
 729                         if (ipp->ipp_fields & IPPF_ADDR)
 730                                 pktinfo->ipi_spec_dst.s_addr = ipp->ipp_addr_v4;
 731                         else
 732                                 pktinfo->ipi_spec_dst.s_addr = INADDR_ANY;
 733                         return (sizeof (struct in_pktinfo));
 734                 }
 735                 case IP_DONTFRAG:
 736                         *i1 = (ixa->ixa_flags & IXAF_DONTFRAG) != 0;
 737                         return (sizeof (int));
 738                 case IP_TOS:
 739                 case T_IP_TOS:
 740                         *i1 = (int)ipp->ipp_type_of_service;
 741                         break;  /* goto sizeof (int) option return */
 742                 case IP_TTL:
 743                         *i1 = (int)ipp->ipp_unicast_hops;
 744                         break;  /* goto sizeof (int) option return */
 745                 case IP_DHCPINIT_IF:
 746                         return (-1);
 747                 case IP_NEXTHOP:
 748                         if (ixa->ixa_flags & IXAF_NEXTHOP_SET) {
 749                                 *(ipaddr_t *)ptr = ixa->ixa_nexthop_v4;
 750                                 return (sizeof (ipaddr_t));
 751                         } else {
 752                                 return (0);
 753                         }
 754 
 755                 case IP_MULTICAST_IF:
 756                         /* 0 address if not set */
 757                         *(ipaddr_t *)ptr = ixa->ixa_multicast_ifaddr;
 758                         return (sizeof (ipaddr_t));
 759                 case IP_MULTICAST_TTL:
 760                         *(uchar_t *)ptr = ixa->ixa_multicast_ttl;
 761                         return (sizeof (uchar_t));
 762                 case IP_MULTICAST_LOOP:
 763                         *ptr = (ixa->ixa_flags & IXAF_MULTICAST_LOOP) ? 1 : 0;
 764                         return (sizeof (uint8_t));
 765                 case IP_RECVOPTS:
 766                         *i1 = connp->conn_recv_ancillary.crb_recvopts;
 767                         break;  /* goto sizeof (int) option return */
 768                 case IP_RECVDSTADDR:
 769                         *i1 = connp->conn_recv_ancillary.crb_recvdstaddr;
 770                         break;  /* goto sizeof (int) option return */
 771                 case IP_RECVIF:
 772                         *i1 = connp->conn_recv_ancillary.crb_recvif;
 773                         break;  /* goto sizeof (int) option return */
 774                 case IP_RECVSLLA:
 775                         *i1 = connp->conn_recv_ancillary.crb_recvslla;
 776                         break;  /* goto sizeof (int) option return */
 777                 case IP_RECVTTL:
 778                         *i1 = connp->conn_recv_ancillary.crb_recvttl;
 779                         break;  /* goto sizeof (int) option return */
 780                 case IP_ADD_MEMBERSHIP:
 781                 case IP_DROP_MEMBERSHIP:
 782                 case MCAST_JOIN_GROUP:
 783                 case MCAST_LEAVE_GROUP:
 784                 case IP_BLOCK_SOURCE:
 785                 case IP_UNBLOCK_SOURCE:
 786                 case IP_ADD_SOURCE_MEMBERSHIP:
 787                 case IP_DROP_SOURCE_MEMBERSHIP:
 788                 case MCAST_BLOCK_SOURCE:
 789                 case MCAST_UNBLOCK_SOURCE:
 790                 case MCAST_JOIN_SOURCE_GROUP:
 791                 case MCAST_LEAVE_SOURCE_GROUP:
 792                 case MRT_INIT:
 793                 case MRT_DONE:
 794                 case MRT_ADD_VIF:
 795                 case MRT_DEL_VIF:
 796                 case MRT_ADD_MFC:
 797                 case MRT_DEL_MFC:
 798                         /* cannot "get" the value for these */
 799                         return (-1);
 800                 case MRT_VERSION:
 801                 case MRT_ASSERT:
 802                         (void) ip_mrouter_get(name, connp, ptr);
 803                         return (sizeof (int));
 804                 case IP_SEC_OPT:
 805                         return (ipsec_req_from_conn(connp, (ipsec_req_t *)ptr,
 806                             IPSEC_AF_V4));
 807                 case IP_BOUND_IF:
 808                         /* Zero if not set */
 809                         *i1 = connp->conn_bound_if;
 810                         break;  /* goto sizeof (int) option return */
 811                 case IP_UNSPEC_SRC:
 812                         *i1 = connp->conn_unspec_src;
 813                         break;  /* goto sizeof (int) option return */
 814                 case IP_BROADCAST_TTL:
 815                         if (ixa->ixa_flags & IXAF_BROADCAST_TTL_SET)
 816                                 *(uchar_t *)ptr = ixa->ixa_broadcast_ttl;
 817                         else
 818                                 *(uchar_t *)ptr = ipst->ips_ip_broadcast_ttl;
 819                         return (sizeof (uchar_t));
 820                 default:
 821                         return (-1);
 822                 }
 823                 break;
 824         case IPPROTO_IPV6:
 825                 if (connp->conn_family != AF_INET6)
 826                         return (-1);
 827                 switch (name) {
 828                 case IPV6_UNICAST_HOPS:
 829                         *i1 = (int)ipp->ipp_unicast_hops;
 830                         break;  /* goto sizeof (int) option return */
 831                 case IPV6_MULTICAST_IF:
 832                         /* 0 index if not set */
 833                         *i1 = ixa->ixa_multicast_ifindex;
 834                         break;  /* goto sizeof (int) option return */
 835                 case IPV6_MULTICAST_HOPS:
 836                         *i1 = ixa->ixa_multicast_ttl;
 837                         break;  /* goto sizeof (int) option return */
 838                 case IPV6_MULTICAST_LOOP:
 839                         *i1 = (ixa->ixa_flags & IXAF_MULTICAST_LOOP) ? 1 : 0;
 840                         break;  /* goto sizeof (int) option return */
 841                 case IPV6_JOIN_GROUP:
 842                 case IPV6_LEAVE_GROUP:
 843                 case MCAST_JOIN_GROUP:
 844                 case MCAST_LEAVE_GROUP:
 845                 case MCAST_BLOCK_SOURCE:
 846                 case MCAST_UNBLOCK_SOURCE:
 847                 case MCAST_JOIN_SOURCE_GROUP:
 848                 case MCAST_LEAVE_SOURCE_GROUP:
 849                         /* cannot "get" the value for these */
 850                         return (-1);
 851                 case IPV6_BOUND_IF:
 852                         /* Zero if not set */
 853                         *i1 = connp->conn_bound_if;
 854                         break;  /* goto sizeof (int) option return */
 855                 case IPV6_UNSPEC_SRC:
 856                         *i1 = connp->conn_unspec_src;
 857                         break;  /* goto sizeof (int) option return */
 858                 case IPV6_RECVPKTINFO:
 859                         *i1 = connp->conn_recv_ancillary.crb_ip_recvpktinfo;
 860                         break;  /* goto sizeof (int) option return */
 861                 case IPV6_RECVTCLASS:
 862                         *i1 = connp->conn_recv_ancillary.crb_ipv6_recvtclass;
 863                         break;  /* goto sizeof (int) option return */
 864                 case IPV6_RECVPATHMTU:
 865                         *i1 = connp->conn_ipv6_recvpathmtu;
 866                         break;  /* goto sizeof (int) option return */
 867                 case IPV6_RECVHOPLIMIT:
 868                         *i1 = connp->conn_recv_ancillary.crb_ipv6_recvhoplimit;
 869                         break;  /* goto sizeof (int) option return */
 870                 case IPV6_RECVHOPOPTS:
 871                         *i1 = connp->conn_recv_ancillary.crb_ipv6_recvhopopts;
 872                         break;  /* goto sizeof (int) option return */
 873                 case IPV6_RECVDSTOPTS:
 874                         *i1 = connp->conn_recv_ancillary.crb_ipv6_recvdstopts;
 875                         break;  /* goto sizeof (int) option return */
 876                 case _OLD_IPV6_RECVDSTOPTS:
 877                         *i1 =
 878                             connp->conn_recv_ancillary.crb_old_ipv6_recvdstopts;
 879                         break;  /* goto sizeof (int) option return */
 880                 case IPV6_RECVRTHDRDSTOPTS:
 881                         *i1 = connp->conn_recv_ancillary.
 882                             crb_ipv6_recvrthdrdstopts;
 883                         break;  /* goto sizeof (int) option return */
 884                 case IPV6_RECVRTHDR:
 885                         *i1 = connp->conn_recv_ancillary.crb_ipv6_recvrthdr;
 886                         break;  /* goto sizeof (int) option return */
 887                 case IPV6_PKTINFO: {
 888                         /* XXX assumes that caller has room for max size! */
 889                         struct in6_pktinfo *pkti;
 890 
 891                         pkti = (struct in6_pktinfo *)ptr;
 892                         pkti->ipi6_ifindex = ixa->ixa_ifindex;
 893                         if (ipp->ipp_fields & IPPF_ADDR)
 894                                 pkti->ipi6_addr = ipp->ipp_addr;
 895                         else
 896                                 pkti->ipi6_addr = ipv6_all_zeros;
 897                         return (sizeof (struct in6_pktinfo));
 898                 }
 899                 case IPV6_TCLASS:
 900                         *i1 = ipp->ipp_tclass;
 901                         break;  /* goto sizeof (int) option return */
 902                 case IPV6_NEXTHOP: {
 903                         sin6_t *sin6 = (sin6_t *)ptr;
 904 
 905                         if (ixa->ixa_flags & IXAF_NEXTHOP_SET)
 906                                 return (0);
 907 
 908                         *sin6 = sin6_null;
 909                         sin6->sin6_family = AF_INET6;
 910                         sin6->sin6_addr = ixa->ixa_nexthop_v6;
 911 
 912                         return (sizeof (sin6_t));
 913                 }
 914                 case IPV6_HOPOPTS:
 915                         if (!(ipp->ipp_fields & IPPF_HOPOPTS))
 916                                 return (0);
 917                         bcopy(ipp->ipp_hopopts, ptr,
 918                             ipp->ipp_hopoptslen);
 919                         return (ipp->ipp_hopoptslen);
 920                 case IPV6_RTHDRDSTOPTS:
 921                         if (!(ipp->ipp_fields & IPPF_RTHDRDSTOPTS))
 922                                 return (0);
 923                         bcopy(ipp->ipp_rthdrdstopts, ptr,
 924                             ipp->ipp_rthdrdstoptslen);
 925                         return (ipp->ipp_rthdrdstoptslen);
 926                 case IPV6_RTHDR:
 927                         if (!(ipp->ipp_fields & IPPF_RTHDR))
 928                                 return (0);
 929                         bcopy(ipp->ipp_rthdr, ptr, ipp->ipp_rthdrlen);
 930                         return (ipp->ipp_rthdrlen);
 931                 case IPV6_DSTOPTS:
 932                         if (!(ipp->ipp_fields & IPPF_DSTOPTS))
 933                                 return (0);
 934                         bcopy(ipp->ipp_dstopts, ptr, ipp->ipp_dstoptslen);
 935                         return (ipp->ipp_dstoptslen);
 936                 case IPV6_PATHMTU:
 937                         return (ip_fill_mtuinfo(connp, ixa,
 938                             (struct ip6_mtuinfo *)ptr));
 939                 case IPV6_SEC_OPT:
 940                         return (ipsec_req_from_conn(connp, (ipsec_req_t *)ptr,
 941                             IPSEC_AF_V6));
 942                 case IPV6_SRC_PREFERENCES:
 943                         return (ip6_get_src_preferences(ixa, (uint32_t *)ptr));
 944                 case IPV6_DONTFRAG:
 945                         *i1 = (ixa->ixa_flags & IXAF_DONTFRAG) != 0;
 946                         return (sizeof (int));
 947                 case IPV6_USE_MIN_MTU:
 948                         if (ixa->ixa_flags & IXAF_USE_MIN_MTU)
 949                                 *i1 = ixa->ixa_use_min_mtu;
 950                         else
 951                                 *i1 = IPV6_USE_MIN_MTU_MULTICAST;
 952                         break;
 953                 case IPV6_V6ONLY:
 954                         *i1 = connp->conn_ipv6_v6only;
 955                         return (sizeof (int));
 956                 default:
 957                         return (-1);
 958                 }
 959                 break;
 960         case IPPROTO_UDP:
 961                 switch (name) {
 962                 case UDP_ANONPRIVBIND:
 963                         *i1 = connp->conn_anon_priv_bind;
 964                         break;
 965                 case UDP_EXCLBIND:
 966                         *i1 = connp->conn_exclbind ? UDP_EXCLBIND : 0;
 967                         break;
 968                 default:
 969                         return (-1);
 970                 }
 971                 break;
 972         case IPPROTO_TCP:
 973                 switch (name) {
 974                 case TCP_RECVDSTADDR:
 975                         *i1 = connp->conn_recv_ancillary.crb_recvdstaddr;
 976                         break;
 977                 case TCP_ANONPRIVBIND:
 978                         *i1 = connp->conn_anon_priv_bind;
 979                         break;
 980                 case TCP_EXCLBIND:
 981                         *i1 = connp->conn_exclbind ? TCP_EXCLBIND : 0;
 982                         break;
 983                 default:
 984                         return (-1);
 985                 }
 986                 break;
 987         default:
 988                 return (-1);
 989         }
 990         return (sizeof (int));
 991 }
 992 
 993 static int conn_opt_set_socket(conn_opt_arg_t *coa, t_scalar_t name,
 994     uint_t inlen, uchar_t *invalp, boolean_t checkonly, cred_t *cr);
 995 static int conn_opt_set_ip(conn_opt_arg_t *coa, t_scalar_t name,
 996     uint_t inlen, uchar_t *invalp, boolean_t checkonly, cred_t *cr);
 997 static int conn_opt_set_ipv6(conn_opt_arg_t *coa, t_scalar_t name,
 998     uint_t inlen, uchar_t *invalp, boolean_t checkonly, cred_t *cr);
 999 static int conn_opt_set_udp(conn_opt_arg_t *coa, t_scalar_t name,
1000     uint_t inlen, uchar_t *invalp, boolean_t checkonly, cred_t *cr);
1001 static int conn_opt_set_tcp(conn_opt_arg_t *coa, t_scalar_t name,
1002     uint_t inlen, uchar_t *invalp, boolean_t checkonly, cred_t *cr);
1003 
1004 /*
1005  * This routine sets the most common socket options including some
1006  * that are transport/ULP specific.
1007  * It returns errno or zero.
1008  *
1009  * For fixed length options, there is no sanity check
1010  * of passed in length is done. It is assumed *_optcom_req()
1011  * routines do the right thing.
1012  */
1013 int
1014 conn_opt_set(conn_opt_arg_t *coa, t_scalar_t level, t_scalar_t name,
1015     uint_t inlen, uchar_t *invalp, boolean_t checkonly, cred_t *cr)
1016 {
1017         ASSERT(MUTEX_NOT_HELD(&coa->coa_connp->conn_lock));
1018 
1019         /* We have different functions for different levels */
1020         switch (level) {
1021         case SOL_SOCKET:
1022                 return (conn_opt_set_socket(coa, name, inlen, invalp,
1023                     checkonly, cr));
1024         case IPPROTO_IP:
1025                 return (conn_opt_set_ip(coa, name, inlen, invalp,
1026                     checkonly, cr));
1027         case IPPROTO_IPV6:
1028                 return (conn_opt_set_ipv6(coa, name, inlen, invalp,
1029                     checkonly, cr));
1030         case IPPROTO_UDP:
1031                 return (conn_opt_set_udp(coa, name, inlen, invalp,
1032                     checkonly, cr));
1033         case IPPROTO_TCP:
1034                 return (conn_opt_set_tcp(coa, name, inlen, invalp,
1035                     checkonly, cr));
1036         default:
1037                 return (0);
1038         }
1039 }
1040 
1041 /*
1042  * Handle SOL_SOCKET
1043  * Note that we do not handle SO_PROTOTYPE here. The ULPs that support
1044  * it implement their own checks and setting of conn_proto.
1045  */
1046 /* ARGSUSED1 */
1047 static int
1048 conn_opt_set_socket(conn_opt_arg_t *coa, t_scalar_t name, uint_t inlen,
1049     uchar_t *invalp, boolean_t checkonly, cred_t *cr)
1050 {
1051         conn_t          *connp = coa->coa_connp;
1052         ip_xmit_attr_t  *ixa = coa->coa_ixa;
1053         int             *i1 = (int *)invalp;
1054         boolean_t       onoff = (*i1 == 0) ? 0 : 1;
1055 
1056         switch (name) {
1057         case SO_ALLZONES:
1058                 if (IPCL_IS_BOUND(connp))
1059                         return (EINVAL);
1060                 break;
1061         case SO_VRRP:
1062                 if (secpolicy_ip_config(cr, checkonly) != 0)
1063                         return (EACCES);
1064                 break;
1065         case SO_MAC_EXEMPT:
1066                 if (secpolicy_net_mac_aware(cr) != 0)
1067                         return (EACCES);
1068                 if (IPCL_IS_BOUND(connp))
1069                         return (EINVAL);
1070                 break;
1071         case SO_MAC_IMPLICIT:
1072                 if (secpolicy_net_mac_implicit(cr) != 0)
1073                         return (EACCES);
1074                 break;
1075         }
1076         if (checkonly)
1077                 return (0);
1078 
1079         mutex_enter(&connp->conn_lock);
1080         /* Here we set the actual option value */
1081         switch (name) {
1082         case SO_DEBUG:
1083                 connp->conn_debug = onoff;
1084                 break;
1085         case SO_KEEPALIVE:
1086                 connp->conn_keepalive = onoff;
1087                 break;
1088         case SO_LINGER: {
1089                 struct linger *lgr = (struct linger *)invalp;
1090 
1091                 if (lgr->l_onoff) {
1092                         connp->conn_linger = 1;
1093                         connp->conn_lingertime = lgr->l_linger;
1094                 } else {
1095                         connp->conn_linger = 0;
1096                         connp->conn_lingertime = 0;
1097                 }
1098                 break;
1099         }
1100         case SO_OOBINLINE:
1101                 connp->conn_oobinline = onoff;
1102                 coa->coa_changed |= COA_OOBINLINE_CHANGED;
1103                 break;
1104         case SO_REUSEADDR:
1105                 connp->conn_reuseaddr = onoff;
1106                 break;
1107         case SO_DONTROUTE:
1108                 if (onoff)
1109                         ixa->ixa_flags |= IXAF_DONTROUTE;
1110                 else
1111                         ixa->ixa_flags &= ~IXAF_DONTROUTE;
1112                 coa->coa_changed |= COA_ROUTE_CHANGED;
1113                 break;
1114         case SO_USELOOPBACK:
1115                 connp->conn_useloopback = onoff;
1116                 break;
1117         case SO_BROADCAST:
1118                 connp->conn_broadcast = onoff;
1119                 break;
1120         case SO_SNDBUF:
1121                 /* ULP has range checked the value */
1122                 connp->conn_sndbuf = *i1;
1123                 coa->coa_changed |= COA_SNDBUF_CHANGED;
1124                 break;
1125         case SO_RCVBUF:
1126                 /* ULP has range checked the value */
1127                 connp->conn_rcvbuf = *i1;
1128                 coa->coa_changed |= COA_RCVBUF_CHANGED;
1129                 break;
1130         case SO_RCVTIMEO:
1131         case SO_SNDTIMEO:
1132                 /*
1133                  * Pass these two options in order for third part
1134                  * protocol usage.
1135                  */
1136                 break;
1137         case SO_DGRAM_ERRIND:
1138                 connp->conn_dgram_errind = onoff;
1139                 break;
1140         case SO_RECVUCRED:
1141                 connp->conn_recv_ancillary.crb_recvucred = onoff;
1142                 break;
1143         case SO_ALLZONES:
1144                 connp->conn_allzones = onoff;
1145                 coa->coa_changed |= COA_ROUTE_CHANGED;
1146                 if (onoff)
1147                         ixa->ixa_zoneid = ALL_ZONES;
1148                 else
1149                         ixa->ixa_zoneid = connp->conn_zoneid;
1150                 break;
1151         case SO_TIMESTAMP:
1152                 connp->conn_recv_ancillary.crb_timestamp = onoff;
1153                 break;
1154         case SO_VRRP:
1155                 connp->conn_isvrrp = onoff;
1156                 break;
1157         case SO_ANON_MLP:
1158                 connp->conn_anon_mlp = onoff;
1159                 break;
1160         case SO_MAC_EXEMPT:
1161                 connp->conn_mac_mode = onoff ?
1162                     CONN_MAC_AWARE : CONN_MAC_DEFAULT;
1163                 break;
1164         case SO_MAC_IMPLICIT:
1165                 connp->conn_mac_mode = onoff ?
1166                     CONN_MAC_IMPLICIT : CONN_MAC_DEFAULT;
1167                 break;
1168         case SO_EXCLBIND:
1169                 connp->conn_exclbind = onoff;
1170                 break;
1171         }
1172         mutex_exit(&connp->conn_lock);
1173         return (0);
1174 }
1175 
1176 /* Handle IPPROTO_IP */
1177 static int
1178 conn_opt_set_ip(conn_opt_arg_t *coa, t_scalar_t name, uint_t inlen,
1179     uchar_t *invalp, boolean_t checkonly, cred_t *cr)
1180 {
1181         conn_t          *connp = coa->coa_connp;
1182         ip_xmit_attr_t  *ixa = coa->coa_ixa;
1183         ip_pkt_t        *ipp = coa->coa_ipp;
1184         int             *i1 = (int *)invalp;
1185         boolean_t       onoff = (*i1 == 0) ? 0 : 1;
1186         ipaddr_t        addr = (ipaddr_t)*i1;
1187         uint_t          ifindex;
1188         zoneid_t        zoneid = IPCL_ZONEID(connp);
1189         ipif_t          *ipif;
1190         ip_stack_t      *ipst = connp->conn_netstack->netstack_ip;
1191         int             error;
1192 
1193         if (connp->conn_family == AF_INET6 &&
1194             connp->conn_ipversion == IPV4_VERSION) {
1195                 /*
1196                  * Allow certain IPv4 options to be set on an AF_INET6 socket
1197                  * if the connection is still IPv4.
1198                  */
1199                 switch (name) {
1200                 case IP_TOS:
1201                 case T_IP_TOS:
1202                 case IP_TTL:
1203                 case IP_DONTFRAG:
1204                         break;
1205                 default:
1206                         return (EINVAL);
1207                 }
1208         } else if (connp->conn_family != AF_INET) {
1209                 return (EINVAL);
1210         }
1211 
1212         switch (name) {
1213         case IP_TTL:
1214                 /* Don't allow zero */
1215                 if (*i1 < 1 || *i1 > 255)
1216                         return (EINVAL);
1217                 break;
1218         case IP_MULTICAST_IF:
1219                 if (addr == INADDR_ANY) {
1220                         /* Clear */
1221                         ifindex = 0;
1222                         break;
1223                 }
1224                 ipif = ipif_lookup_addr(addr, NULL, zoneid, ipst);
1225                 if (ipif == NULL)
1226                         return (EHOSTUNREACH);
1227                 /* not supported by the virtual network iface */
1228                 if (IS_VNI(ipif->ipif_ill)) {
1229                         ipif_refrele(ipif);
1230                         return (EINVAL);
1231                 }
1232                 ifindex = ipif->ipif_ill->ill_phyint->phyint_ifindex;
1233                 ipif_refrele(ipif);
1234                 break;
1235         case IP_NEXTHOP: {
1236                 ire_t   *ire;
1237 
1238                 if (addr == INADDR_ANY) {
1239                         /* Clear */
1240                         break;
1241                 }
1242                 /* Verify that the next-hop is on-link */
1243                 ire = ire_ftable_lookup_v4(addr, 0, 0, IRE_ONLINK, NULL, zoneid,
1244                     NULL, MATCH_IRE_TYPE, 0, ipst, NULL);
1245                 if (ire == NULL)
1246                         return (EHOSTUNREACH);
1247                 ire_refrele(ire);
1248                 break;
1249         }
1250         case IP_OPTIONS:
1251         case T_IP_OPTIONS: {
1252                 uint_t newlen;
1253 
1254                 if (ipp->ipp_fields & IPPF_LABEL_V4)
1255                         newlen = inlen + (ipp->ipp_label_len_v4 + 3) & ~3;
1256                 else
1257                         newlen = inlen;
1258                 if ((inlen & 0x3) || newlen > IP_MAX_OPT_LENGTH) {
1259                         return (EINVAL);
1260                 }
1261                 break;
1262         }
1263         case IP_PKTINFO: {
1264                 struct in_pktinfo *pktinfo;
1265 
1266                 /* Two different valid lengths */
1267                 if (inlen != sizeof (int) &&
1268                     inlen != sizeof (struct in_pktinfo))
1269                         return (EINVAL);
1270                 if (inlen == sizeof (int))
1271                         break;
1272 
1273                 pktinfo = (struct in_pktinfo *)invalp;
1274                 if (pktinfo->ipi_spec_dst.s_addr != INADDR_ANY) {
1275                         switch (ip_laddr_verify_v4(pktinfo->ipi_spec_dst.s_addr,
1276                             zoneid, ipst, B_FALSE)) {
1277                         case IPVL_UNICAST_UP:
1278                         case IPVL_UNICAST_DOWN:
1279                                 break;
1280                         default:
1281                                 return (EADDRNOTAVAIL);
1282                         }
1283                 }
1284                 if (!ip_xmit_ifindex_valid(pktinfo->ipi_ifindex, zoneid,
1285                     B_FALSE, ipst))
1286                         return (ENXIO);
1287                 break;
1288         }
1289         case IP_BOUND_IF:
1290                 ifindex = *(uint_t *)i1;
1291 
1292                 /* Just check it is ok. */
1293                 if (!ip_xmit_ifindex_valid(ifindex, zoneid, B_FALSE, ipst))
1294                         return (ENXIO);
1295                 break;
1296         }
1297         if (checkonly)
1298                 return (0);
1299 
1300         /* Here we set the actual option value */
1301         /*
1302          * conn_lock protects the bitfields, and is used to
1303          * set the fields atomically. Not needed for ixa settings since
1304          * the caller has an exclusive copy of the ixa.
1305          * We can not hold conn_lock across the multicast options though.
1306          */
1307         switch (name) {
1308         case IP_OPTIONS:
1309         case T_IP_OPTIONS:
1310                 /* Save options for use by IP. */
1311                 mutex_enter(&connp->conn_lock);
1312                 error = optcom_pkt_set(invalp, inlen,
1313                     (uchar_t **)&ipp->ipp_ipv4_options,
1314                     &ipp->ipp_ipv4_options_len);
1315                 if (error != 0) {
1316                         mutex_exit(&connp->conn_lock);
1317                         return (error);
1318                 }
1319                 if (ipp->ipp_ipv4_options_len == 0) {
1320                         ipp->ipp_fields &= ~IPPF_IPV4_OPTIONS;
1321                 } else {
1322                         ipp->ipp_fields |= IPPF_IPV4_OPTIONS;
1323                 }
1324                 mutex_exit(&connp->conn_lock);
1325                 coa->coa_changed |= COA_HEADER_CHANGED;
1326                 coa->coa_changed |= COA_WROFF_CHANGED;
1327                 break;
1328 
1329         case IP_TTL:
1330                 mutex_enter(&connp->conn_lock);
1331                 ipp->ipp_unicast_hops = *i1;
1332                 mutex_exit(&connp->conn_lock);
1333                 coa->coa_changed |= COA_HEADER_CHANGED;
1334                 break;
1335         case IP_TOS:
1336         case T_IP_TOS:
1337                 mutex_enter(&connp->conn_lock);
1338                 if (*i1 == -1) {
1339                         ipp->ipp_type_of_service = 0;
1340                 } else {
1341                         ipp->ipp_type_of_service = *i1;
1342                 }
1343                 mutex_exit(&connp->conn_lock);
1344                 coa->coa_changed |= COA_HEADER_CHANGED;
1345                 break;
1346         case IP_MULTICAST_IF:
1347                 ixa->ixa_multicast_ifindex = ifindex;
1348                 ixa->ixa_multicast_ifaddr = addr;
1349                 coa->coa_changed |= COA_ROUTE_CHANGED;
1350                 break;
1351         case IP_MULTICAST_TTL:
1352                 ixa->ixa_multicast_ttl = *invalp;
1353                 /* Handled automatically by ip_output */
1354                 break;
1355         case IP_MULTICAST_LOOP:
1356                 if (*invalp != 0)
1357                         ixa->ixa_flags |= IXAF_MULTICAST_LOOP;
1358                 else
1359                         ixa->ixa_flags &= ~IXAF_MULTICAST_LOOP;
1360                 /* Handled automatically by ip_output */
1361                 break;
1362         case IP_RECVOPTS:
1363                 mutex_enter(&connp->conn_lock);
1364                 connp->conn_recv_ancillary.crb_recvopts = onoff;
1365                 mutex_exit(&connp->conn_lock);
1366                 break;
1367         case IP_RECVDSTADDR:
1368                 mutex_enter(&connp->conn_lock);
1369                 connp->conn_recv_ancillary.crb_recvdstaddr = onoff;
1370                 mutex_exit(&connp->conn_lock);
1371                 break;
1372         case IP_RECVIF:
1373                 mutex_enter(&connp->conn_lock);
1374                 connp->conn_recv_ancillary.crb_recvif = onoff;
1375                 mutex_exit(&connp->conn_lock);
1376                 break;
1377         case IP_RECVSLLA:
1378                 mutex_enter(&connp->conn_lock);
1379                 connp->conn_recv_ancillary.crb_recvslla = onoff;
1380                 mutex_exit(&connp->conn_lock);
1381                 break;
1382         case IP_RECVTTL:
1383                 mutex_enter(&connp->conn_lock);
1384                 connp->conn_recv_ancillary.crb_recvttl = onoff;
1385                 mutex_exit(&connp->conn_lock);
1386                 break;
1387         case IP_PKTINFO: {
1388                 /*
1389                  * This also handles IP_RECVPKTINFO.
1390                  * IP_PKTINFO and IP_RECVPKTINFO have same value.
1391                  * Differentiation is based on the size of the
1392                  * argument passed in.
1393                  */
1394                 struct in_pktinfo *pktinfo;
1395 
1396                 if (inlen == sizeof (int)) {
1397                         /* This is IP_RECVPKTINFO option. */
1398                         mutex_enter(&connp->conn_lock);
1399                         connp->conn_recv_ancillary.crb_ip_recvpktinfo =
1400                             onoff;
1401                         mutex_exit(&connp->conn_lock);
1402                         break;
1403                 }
1404 
1405                 /* This is IP_PKTINFO option. */
1406                 mutex_enter(&connp->conn_lock);
1407                 pktinfo = (struct in_pktinfo *)invalp;
1408                 if (pktinfo->ipi_spec_dst.s_addr != INADDR_ANY) {
1409                         ipp->ipp_fields |= IPPF_ADDR;
1410                         IN6_INADDR_TO_V4MAPPED(&pktinfo->ipi_spec_dst,
1411                             &ipp->ipp_addr);
1412                 } else {
1413                         ipp->ipp_fields &= ~IPPF_ADDR;
1414                         ipp->ipp_addr = ipv6_all_zeros;
1415                 }
1416                 mutex_exit(&connp->conn_lock);
1417                 ixa->ixa_ifindex = pktinfo->ipi_ifindex;
1418                 coa->coa_changed |= COA_ROUTE_CHANGED;
1419                 coa->coa_changed |= COA_HEADER_CHANGED;
1420                 break;
1421         }
1422         case IP_DONTFRAG:
1423                 if (onoff) {
1424                         ixa->ixa_flags |= (IXAF_DONTFRAG | IXAF_PMTU_IPV4_DF);
1425                         ixa->ixa_flags &= ~IXAF_PMTU_DISCOVERY;
1426                 } else {
1427                         ixa->ixa_flags &= ~(IXAF_DONTFRAG | IXAF_PMTU_IPV4_DF);
1428                         ixa->ixa_flags |= IXAF_PMTU_DISCOVERY;
1429                 }
1430                 /* Need to redo ip_attr_connect */
1431                 coa->coa_changed |= COA_ROUTE_CHANGED;
1432                 break;
1433         case IP_ADD_MEMBERSHIP:
1434         case IP_DROP_MEMBERSHIP:
1435         case MCAST_JOIN_GROUP:
1436         case MCAST_LEAVE_GROUP:
1437                 return (ip_opt_set_multicast_group(connp, name,
1438                     invalp, B_FALSE, checkonly));
1439 
1440         case IP_BLOCK_SOURCE:
1441         case IP_UNBLOCK_SOURCE:
1442         case IP_ADD_SOURCE_MEMBERSHIP:
1443         case IP_DROP_SOURCE_MEMBERSHIP:
1444         case MCAST_BLOCK_SOURCE:
1445         case MCAST_UNBLOCK_SOURCE:
1446         case MCAST_JOIN_SOURCE_GROUP:
1447         case MCAST_LEAVE_SOURCE_GROUP:
1448                 return (ip_opt_set_multicast_sources(connp, name,
1449                     invalp, B_FALSE, checkonly));
1450 
1451         case IP_SEC_OPT:
1452                 mutex_enter(&connp->conn_lock);
1453                 error = ipsec_set_req(cr, connp, (ipsec_req_t *)invalp);
1454                 mutex_exit(&connp->conn_lock);
1455                 if (error != 0) {
1456                         return (error);
1457                 }
1458                 /* This is an IPsec policy change - redo ip_attr_connect */
1459                 coa->coa_changed |= COA_ROUTE_CHANGED;
1460                 break;
1461         case IP_NEXTHOP:
1462                 ixa->ixa_nexthop_v4 = addr;
1463                 if (addr != INADDR_ANY)
1464                         ixa->ixa_flags |= IXAF_NEXTHOP_SET;
1465                 else
1466                         ixa->ixa_flags &= ~IXAF_NEXTHOP_SET;
1467                 coa->coa_changed |= COA_ROUTE_CHANGED;
1468                 break;
1469 
1470         case IP_BOUND_IF:
1471                 ixa->ixa_ifindex = ifindex;          /* Send */
1472                 mutex_enter(&connp->conn_lock);
1473                 connp->conn_incoming_ifindex = ifindex;      /* Receive */
1474                 connp->conn_bound_if = ifindex;              /* getsockopt */
1475                 mutex_exit(&connp->conn_lock);
1476                 coa->coa_changed |= COA_ROUTE_CHANGED;
1477                 break;
1478         case IP_UNSPEC_SRC:
1479                 mutex_enter(&connp->conn_lock);
1480                 connp->conn_unspec_src = onoff;
1481                 if (onoff)
1482                         ixa->ixa_flags &= ~IXAF_VERIFY_SOURCE;
1483                 else
1484                         ixa->ixa_flags |= IXAF_VERIFY_SOURCE;
1485 
1486                 mutex_exit(&connp->conn_lock);
1487                 break;
1488         case IP_BROADCAST_TTL:
1489                 ixa->ixa_broadcast_ttl = *invalp;
1490                 ixa->ixa_flags |= IXAF_BROADCAST_TTL_SET;
1491                 /* Handled automatically by ip_output */
1492                 break;
1493         case MRT_INIT:
1494         case MRT_DONE:
1495         case MRT_ADD_VIF:
1496         case MRT_DEL_VIF:
1497         case MRT_ADD_MFC:
1498         case MRT_DEL_MFC:
1499         case MRT_ASSERT:
1500                 if ((error = secpolicy_ip_config(cr, B_FALSE)) != 0) {
1501                         return (error);
1502                 }
1503                 error = ip_mrouter_set((int)name, connp, checkonly,
1504                     (uchar_t *)invalp, inlen);
1505                 if (error) {
1506                         return (error);
1507                 }
1508                 return (0);
1509 
1510         }
1511         return (0);
1512 }
1513 
1514 /* Handle IPPROTO_IPV6 */
1515 static int
1516 conn_opt_set_ipv6(conn_opt_arg_t *coa, t_scalar_t name, uint_t inlen,
1517     uchar_t *invalp, boolean_t checkonly, cred_t *cr)
1518 {
1519         conn_t          *connp = coa->coa_connp;
1520         ip_xmit_attr_t  *ixa = coa->coa_ixa;
1521         ip_pkt_t        *ipp = coa->coa_ipp;
1522         int             *i1 = (int *)invalp;
1523         boolean_t       onoff = (*i1 == 0) ? 0 : 1;
1524         uint_t          ifindex;
1525         zoneid_t        zoneid = IPCL_ZONEID(connp);
1526         ip_stack_t      *ipst = connp->conn_netstack->netstack_ip;
1527         int             error;
1528 
1529         if (connp->conn_family != AF_INET6)
1530                 return (EINVAL);
1531 
1532         switch (name) {
1533         case IPV6_MULTICAST_IF:
1534                 /*
1535                  * The only possible error is EINVAL.
1536                  * We call this option on both V4 and V6
1537                  * If both fail, then this call returns
1538                  * EINVAL. If at least one of them succeeds we
1539                  * return success.
1540                  */
1541                 ifindex = *(uint_t *)i1;
1542 
1543                 if (!ip_xmit_ifindex_valid(ifindex, zoneid, B_TRUE, ipst) &&
1544                     !ip_xmit_ifindex_valid(ifindex, zoneid, B_FALSE, ipst))
1545                         return (EINVAL);
1546                 break;
1547         case IPV6_UNICAST_HOPS:
1548                 /* Don't allow zero. -1 means to use default */
1549                 if (*i1 < -1 || *i1 == 0 || *i1 > IPV6_MAX_HOPS)
1550                         return (EINVAL);
1551                 break;
1552         case IPV6_MULTICAST_HOPS:
1553                 /* -1 means use default */
1554                 if (*i1 < -1 || *i1 > IPV6_MAX_HOPS)
1555                         return (EINVAL);
1556                 break;
1557         case IPV6_MULTICAST_LOOP:
1558                 if (*i1 != 0 && *i1 != 1)
1559                         return (EINVAL);
1560                 break;
1561         case IPV6_BOUND_IF:
1562                 ifindex = *(uint_t *)i1;
1563 
1564                 if (!ip_xmit_ifindex_valid(ifindex, zoneid, B_TRUE, ipst))
1565                         return (ENXIO);
1566                 break;
1567         case IPV6_PKTINFO: {
1568                 struct in6_pktinfo *pkti;
1569                 boolean_t isv6;
1570 
1571                 if (inlen != 0 && inlen != sizeof (struct in6_pktinfo))
1572                         return (EINVAL);
1573                 if (inlen == 0)
1574                         break;  /* Clear values below */
1575 
1576                 /*
1577                  * Verify the source address and ifindex. Privileged users
1578                  * can use any source address.
1579                  */
1580                 pkti = (struct in6_pktinfo *)invalp;
1581 
1582                 /*
1583                  * For link-local addresses we use the ipi6_ifindex when
1584                  * we verify the local address.
1585                  * If net_rawaccess then any source address can be used.
1586                  */
1587                 if (!IN6_IS_ADDR_UNSPECIFIED(&pkti->ipi6_addr) &&
1588                     secpolicy_net_rawaccess(cr) != 0) {
1589                         uint_t scopeid = 0;
1590                         in6_addr_t *v6src = &pkti->ipi6_addr;
1591                         ipaddr_t v4src;
1592                         ip_laddr_t laddr_type = IPVL_UNICAST_UP;
1593 
1594                         if (IN6_IS_ADDR_V4MAPPED(v6src)) {
1595                                 IN6_V4MAPPED_TO_IPADDR(v6src, v4src);
1596                                 if (v4src != INADDR_ANY) {
1597                                         laddr_type = ip_laddr_verify_v4(v4src,
1598                                             zoneid, ipst, B_FALSE);
1599                                 }
1600                         } else {
1601                                 if (IN6_IS_ADDR_LINKSCOPE(v6src))
1602                                         scopeid = pkti->ipi6_ifindex;
1603 
1604                                 laddr_type = ip_laddr_verify_v6(v6src, zoneid,
1605                                     ipst, B_FALSE, scopeid);
1606                         }
1607                         switch (laddr_type) {
1608                         case IPVL_UNICAST_UP:
1609                         case IPVL_UNICAST_DOWN:
1610                                 break;
1611                         default:
1612                                 return (EADDRNOTAVAIL);
1613                         }
1614                         ixa->ixa_flags |= IXAF_VERIFY_SOURCE;
1615                 } else if (!IN6_IS_ADDR_UNSPECIFIED(&pkti->ipi6_addr)) {
1616                         /* Allow any source */
1617                         ixa->ixa_flags &= ~IXAF_VERIFY_SOURCE;
1618                 }
1619                 isv6 = !(IN6_IS_ADDR_V4MAPPED(&pkti->ipi6_addr));
1620                 if (!ip_xmit_ifindex_valid(pkti->ipi6_ifindex, zoneid, isv6,
1621                     ipst))
1622                         return (ENXIO);
1623                 break;
1624         }
1625         case IPV6_HOPLIMIT:
1626                 /* It is only allowed as ancilary data */
1627                 if (!coa->coa_ancillary)
1628                         return (EINVAL);
1629 
1630                 if (inlen != 0 && inlen != sizeof (int))
1631                         return (EINVAL);
1632                 if (inlen == sizeof (int)) {
1633                         if (*i1 > 255 || *i1 < -1 || *i1 == 0)
1634                                 return (EINVAL);
1635                 }
1636                 break;
1637         case IPV6_TCLASS:
1638                 if (inlen != 0 && inlen != sizeof (int))
1639                         return (EINVAL);
1640                 if (inlen == sizeof (int)) {
1641                         if (*i1 > 255 || *i1 < -1)
1642                                 return (EINVAL);
1643                 }
1644                 break;
1645         case IPV6_NEXTHOP:
1646                 if (inlen != 0 && inlen != sizeof (sin6_t))
1647                         return (EINVAL);
1648                 if (inlen == sizeof (sin6_t)) {
1649                         sin6_t *sin6 = (sin6_t *)invalp;
1650                         ire_t   *ire;
1651 
1652                         if (sin6->sin6_family != AF_INET6)
1653                                 return (EAFNOSUPPORT);
1654                         if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr))
1655                                 return (EADDRNOTAVAIL);
1656 
1657                         /* Verify that the next-hop is on-link */
1658                         ire = ire_ftable_lookup_v6(&sin6->sin6_addr,
1659                             0, 0, IRE_ONLINK, NULL, zoneid,
1660                             NULL, MATCH_IRE_TYPE, 0, ipst, NULL);
1661                         if (ire == NULL)
1662                                 return (EHOSTUNREACH);
1663                         ire_refrele(ire);
1664                         break;
1665                 }
1666                 break;
1667         case IPV6_RTHDR:
1668         case IPV6_DSTOPTS:
1669         case IPV6_RTHDRDSTOPTS:
1670         case IPV6_HOPOPTS: {
1671                 /* All have the length field in the same place */
1672                 ip6_hbh_t *hopts = (ip6_hbh_t *)invalp;
1673                 /*
1674                  * Sanity checks - minimum size, size a multiple of
1675                  * eight bytes, and matching size passed in.
1676                  */
1677                 if (inlen != 0 &&
1678                     inlen != (8 * (hopts->ip6h_len + 1)))
1679                         return (EINVAL);
1680                 break;
1681         }
1682         case IPV6_PATHMTU:
1683                 /* Can't be set */
1684                 return (EINVAL);
1685 
1686         case IPV6_USE_MIN_MTU:
1687                 if (inlen != sizeof (int))
1688                         return (EINVAL);
1689                 if (*i1 < -1 || *i1 > 1)
1690                         return (EINVAL);
1691                 break;
1692         case IPV6_SRC_PREFERENCES:
1693                 if (inlen != sizeof (uint32_t))
1694                         return (EINVAL);
1695                 break;
1696         case IPV6_V6ONLY:
1697                 if (*i1 < 0 || *i1 > 1) {
1698                         return (EINVAL);
1699                 }
1700                 break;
1701         }
1702         if (checkonly)
1703                 return (0);
1704 
1705         /* Here we set the actual option value */
1706         /*
1707          * conn_lock protects the bitfields, and is used to
1708          * set the fields atomically. Not needed for ixa settings since
1709          * the caller has an exclusive copy of the ixa.
1710          * We can not hold conn_lock across the multicast options though.
1711          */
1712         ASSERT(MUTEX_NOT_HELD(&coa->coa_connp->conn_lock));
1713         switch (name) {
1714         case IPV6_MULTICAST_IF:
1715                 ixa->ixa_multicast_ifindex = ifindex;
1716                 /* Need to redo ip_attr_connect */
1717                 coa->coa_changed |= COA_ROUTE_CHANGED;
1718                 break;
1719         case IPV6_UNICAST_HOPS:
1720                 /* -1 means use default */
1721                 mutex_enter(&connp->conn_lock);
1722                 if (*i1 == -1) {
1723                         ipp->ipp_unicast_hops = connp->conn_default_ttl;
1724                 } else {
1725                         ipp->ipp_unicast_hops = (uint8_t)*i1;
1726                 }
1727                 mutex_exit(&connp->conn_lock);
1728                 coa->coa_changed |= COA_HEADER_CHANGED;
1729                 break;
1730         case IPV6_MULTICAST_HOPS:
1731                 /* -1 means use default */
1732                 if (*i1 == -1) {
1733                         ixa->ixa_multicast_ttl = IP_DEFAULT_MULTICAST_TTL;
1734                 } else {
1735                         ixa->ixa_multicast_ttl = (uint8_t)*i1;
1736                 }
1737                 /* Handled automatically by ip_output */
1738                 break;
1739         case IPV6_MULTICAST_LOOP:
1740                 if (*i1 != 0)
1741                         ixa->ixa_flags |= IXAF_MULTICAST_LOOP;
1742                 else
1743                         ixa->ixa_flags &= ~IXAF_MULTICAST_LOOP;
1744                 /* Handled automatically by ip_output */
1745                 break;
1746         case IPV6_JOIN_GROUP:
1747         case IPV6_LEAVE_GROUP:
1748         case MCAST_JOIN_GROUP:
1749         case MCAST_LEAVE_GROUP:
1750                 return (ip_opt_set_multicast_group(connp, name,
1751                     invalp, B_TRUE, checkonly));
1752 
1753         case MCAST_BLOCK_SOURCE:
1754         case MCAST_UNBLOCK_SOURCE:
1755         case MCAST_JOIN_SOURCE_GROUP:
1756         case MCAST_LEAVE_SOURCE_GROUP:
1757                 return (ip_opt_set_multicast_sources(connp, name,
1758                     invalp, B_TRUE, checkonly));
1759 
1760         case IPV6_BOUND_IF:
1761                 ixa->ixa_ifindex = ifindex;          /* Send */
1762                 mutex_enter(&connp->conn_lock);
1763                 connp->conn_incoming_ifindex = ifindex;      /* Receive */
1764                 connp->conn_bound_if = ifindex;              /* getsockopt */
1765                 mutex_exit(&connp->conn_lock);
1766                 coa->coa_changed |= COA_ROUTE_CHANGED;
1767                 break;
1768         case IPV6_UNSPEC_SRC:
1769                 mutex_enter(&connp->conn_lock);
1770                 connp->conn_unspec_src = onoff;
1771                 if (onoff)
1772                         ixa->ixa_flags &= ~IXAF_VERIFY_SOURCE;
1773                 else
1774                         ixa->ixa_flags |= IXAF_VERIFY_SOURCE;
1775                 mutex_exit(&connp->conn_lock);
1776                 break;
1777         case IPV6_RECVPKTINFO:
1778                 mutex_enter(&connp->conn_lock);
1779                 connp->conn_recv_ancillary.crb_ip_recvpktinfo = onoff;
1780                 mutex_exit(&connp->conn_lock);
1781                 break;
1782         case IPV6_RECVTCLASS:
1783                 mutex_enter(&connp->conn_lock);
1784                 connp->conn_recv_ancillary.crb_ipv6_recvtclass = onoff;
1785                 mutex_exit(&connp->conn_lock);
1786                 break;
1787         case IPV6_RECVPATHMTU:
1788                 mutex_enter(&connp->conn_lock);
1789                 connp->conn_ipv6_recvpathmtu = onoff;
1790                 mutex_exit(&connp->conn_lock);
1791                 break;
1792         case IPV6_RECVHOPLIMIT:
1793                 mutex_enter(&connp->conn_lock);
1794                 connp->conn_recv_ancillary.crb_ipv6_recvhoplimit =
1795                     onoff;
1796                 mutex_exit(&connp->conn_lock);
1797                 break;
1798         case IPV6_RECVHOPOPTS:
1799                 mutex_enter(&connp->conn_lock);
1800                 connp->conn_recv_ancillary.crb_ipv6_recvhopopts = onoff;
1801                 mutex_exit(&connp->conn_lock);
1802                 break;
1803         case IPV6_RECVDSTOPTS:
1804                 mutex_enter(&connp->conn_lock);
1805                 connp->conn_recv_ancillary.crb_ipv6_recvdstopts = onoff;
1806                 mutex_exit(&connp->conn_lock);
1807                 break;
1808         case _OLD_IPV6_RECVDSTOPTS:
1809                 mutex_enter(&connp->conn_lock);
1810                 connp->conn_recv_ancillary.crb_old_ipv6_recvdstopts =
1811                     onoff;
1812                 mutex_exit(&connp->conn_lock);
1813                 break;
1814         case IPV6_RECVRTHDRDSTOPTS:
1815                 mutex_enter(&connp->conn_lock);
1816                 connp->conn_recv_ancillary.crb_ipv6_recvrthdrdstopts =
1817                     onoff;
1818                 mutex_exit(&connp->conn_lock);
1819                 break;
1820         case IPV6_RECVRTHDR:
1821                 mutex_enter(&connp->conn_lock);
1822                 connp->conn_recv_ancillary.crb_ipv6_recvrthdr = onoff;
1823                 mutex_exit(&connp->conn_lock);
1824                 break;
1825         case IPV6_PKTINFO:
1826                 mutex_enter(&connp->conn_lock);
1827                 if (inlen == 0) {
1828                         ipp->ipp_fields &= ~IPPF_ADDR;
1829                         ipp->ipp_addr = ipv6_all_zeros;
1830                         ixa->ixa_ifindex = 0;
1831                 } else {
1832                         struct in6_pktinfo *pkti;
1833 
1834                         pkti = (struct in6_pktinfo *)invalp;
1835                         ipp->ipp_addr = pkti->ipi6_addr;
1836                         if (!IN6_IS_ADDR_UNSPECIFIED(&ipp->ipp_addr))
1837                                 ipp->ipp_fields |= IPPF_ADDR;
1838                         else
1839                                 ipp->ipp_fields &= ~IPPF_ADDR;
1840                         ixa->ixa_ifindex = pkti->ipi6_ifindex;
1841                 }
1842                 mutex_exit(&connp->conn_lock);
1843                 /* Source and ifindex might have changed */
1844                 coa->coa_changed |= COA_HEADER_CHANGED;
1845                 coa->coa_changed |= COA_ROUTE_CHANGED;
1846                 break;
1847         case IPV6_HOPLIMIT:
1848                 mutex_enter(&connp->conn_lock);
1849                 if (inlen == 0 || *i1 == -1) {
1850                         /* Revert to default */
1851                         ipp->ipp_fields &= ~IPPF_HOPLIMIT;
1852                         ixa->ixa_flags &= ~IXAF_NO_TTL_CHANGE;
1853                 } else {
1854                         ipp->ipp_hoplimit = *i1;
1855                         ipp->ipp_fields |= IPPF_HOPLIMIT;
1856                         /* Ensure that it sticks for multicast packets */
1857                         ixa->ixa_flags |= IXAF_NO_TTL_CHANGE;
1858                 }
1859                 mutex_exit(&connp->conn_lock);
1860                 coa->coa_changed |= COA_HEADER_CHANGED;
1861                 break;
1862         case IPV6_TCLASS:
1863                 /*
1864                  * IPV6_TCLASS accepts -1 as use kernel default
1865                  * and [0, 255] as the actualy traffic class.
1866                  */
1867                 mutex_enter(&connp->conn_lock);
1868                 if (inlen == 0 || *i1 == -1) {
1869                         ipp->ipp_tclass = 0;
1870                         ipp->ipp_fields &= ~IPPF_TCLASS;
1871                 } else {
1872                         ipp->ipp_tclass = *i1;
1873                         ipp->ipp_fields |= IPPF_TCLASS;
1874                 }
1875                 mutex_exit(&connp->conn_lock);
1876                 coa->coa_changed |= COA_HEADER_CHANGED;
1877                 break;
1878         case IPV6_NEXTHOP:
1879                 if (inlen == 0) {
1880                         ixa->ixa_flags &= ~IXAF_NEXTHOP_SET;
1881                 } else {
1882                         sin6_t *sin6 = (sin6_t *)invalp;
1883 
1884                         ixa->ixa_nexthop_v6 = sin6->sin6_addr;
1885                         if (!IN6_IS_ADDR_UNSPECIFIED(&ixa->ixa_nexthop_v6))
1886                                 ixa->ixa_flags |= IXAF_NEXTHOP_SET;
1887                         else
1888                                 ixa->ixa_flags &= ~IXAF_NEXTHOP_SET;
1889                 }
1890                 coa->coa_changed |= COA_ROUTE_CHANGED;
1891                 break;
1892         case IPV6_HOPOPTS:
1893                 mutex_enter(&connp->conn_lock);
1894                 error = optcom_pkt_set(invalp, inlen,
1895                     (uchar_t **)&ipp->ipp_hopopts, &ipp->ipp_hopoptslen);
1896                 if (error != 0) {
1897                         mutex_exit(&connp->conn_lock);
1898                         return (error);
1899                 }
1900                 if (ipp->ipp_hopoptslen == 0) {
1901                         ipp->ipp_fields &= ~IPPF_HOPOPTS;
1902                 } else {
1903                         ipp->ipp_fields |= IPPF_HOPOPTS;
1904                 }
1905                 mutex_exit(&connp->conn_lock);
1906                 coa->coa_changed |= COA_HEADER_CHANGED;
1907                 coa->coa_changed |= COA_WROFF_CHANGED;
1908                 break;
1909         case IPV6_RTHDRDSTOPTS:
1910                 mutex_enter(&connp->conn_lock);
1911                 error = optcom_pkt_set(invalp, inlen,
1912                     (uchar_t **)&ipp->ipp_rthdrdstopts,
1913                     &ipp->ipp_rthdrdstoptslen);
1914                 if (error != 0) {
1915                         mutex_exit(&connp->conn_lock);
1916                         return (error);
1917                 }
1918                 if (ipp->ipp_rthdrdstoptslen == 0) {
1919                         ipp->ipp_fields &= ~IPPF_RTHDRDSTOPTS;
1920                 } else {
1921                         ipp->ipp_fields |= IPPF_RTHDRDSTOPTS;
1922                 }
1923                 mutex_exit(&connp->conn_lock);
1924                 coa->coa_changed |= COA_HEADER_CHANGED;
1925                 coa->coa_changed |= COA_WROFF_CHANGED;
1926                 break;
1927         case IPV6_DSTOPTS:
1928                 mutex_enter(&connp->conn_lock);
1929                 error = optcom_pkt_set(invalp, inlen,
1930                     (uchar_t **)&ipp->ipp_dstopts, &ipp->ipp_dstoptslen);
1931                 if (error != 0) {
1932                         mutex_exit(&connp->conn_lock);
1933                         return (error);
1934                 }
1935                 if (ipp->ipp_dstoptslen == 0) {
1936                         ipp->ipp_fields &= ~IPPF_DSTOPTS;
1937                 } else {
1938                         ipp->ipp_fields |= IPPF_DSTOPTS;
1939                 }
1940                 mutex_exit(&connp->conn_lock);
1941                 coa->coa_changed |= COA_HEADER_CHANGED;
1942                 coa->coa_changed |= COA_WROFF_CHANGED;
1943                 break;
1944         case IPV6_RTHDR:
1945                 mutex_enter(&connp->conn_lock);
1946                 error = optcom_pkt_set(invalp, inlen,
1947                     (uchar_t **)&ipp->ipp_rthdr, &ipp->ipp_rthdrlen);
1948                 if (error != 0) {
1949                         mutex_exit(&connp->conn_lock);
1950                         return (error);
1951                 }
1952                 if (ipp->ipp_rthdrlen == 0) {
1953                         ipp->ipp_fields &= ~IPPF_RTHDR;
1954                 } else {
1955                         ipp->ipp_fields |= IPPF_RTHDR;
1956                 }
1957                 mutex_exit(&connp->conn_lock);
1958                 coa->coa_changed |= COA_HEADER_CHANGED;
1959                 coa->coa_changed |= COA_WROFF_CHANGED;
1960                 break;
1961 
1962         case IPV6_DONTFRAG:
1963                 if (onoff) {
1964                         ixa->ixa_flags |= IXAF_DONTFRAG;
1965                         ixa->ixa_flags &= ~IXAF_PMTU_DISCOVERY;
1966                 } else {
1967                         ixa->ixa_flags &= ~IXAF_DONTFRAG;
1968                         ixa->ixa_flags |= IXAF_PMTU_DISCOVERY;
1969                 }
1970                 /* Need to redo ip_attr_connect */
1971                 coa->coa_changed |= COA_ROUTE_CHANGED;
1972                 break;
1973 
1974         case IPV6_USE_MIN_MTU:
1975                 ixa->ixa_flags |= IXAF_USE_MIN_MTU;
1976                 ixa->ixa_use_min_mtu = *i1;
1977                 /* Need to redo ip_attr_connect */
1978                 coa->coa_changed |= COA_ROUTE_CHANGED;
1979                 break;
1980 
1981         case IPV6_SEC_OPT:
1982                 mutex_enter(&connp->conn_lock);
1983                 error = ipsec_set_req(cr, connp, (ipsec_req_t *)invalp);
1984                 mutex_exit(&connp->conn_lock);
1985                 if (error != 0) {
1986                         return (error);
1987                 }
1988                 /* This is an IPsec policy change - redo ip_attr_connect */
1989                 coa->coa_changed |= COA_ROUTE_CHANGED;
1990                 break;
1991         case IPV6_SRC_PREFERENCES:
1992                 /*
1993                  * This socket option only affects connected
1994                  * sockets that haven't already bound to a specific
1995                  * IPv6 address.  In other words, sockets that
1996                  * don't call bind() with an address other than the
1997                  * unspecified address and that call connect().
1998                  * ip_set_destination_v6() passes these preferences
1999                  * to the ipif_select_source_v6() function.
2000                  */
2001                 mutex_enter(&connp->conn_lock);
2002                 error = ip6_set_src_preferences(ixa, *(uint32_t *)invalp);
2003                 mutex_exit(&connp->conn_lock);
2004                 if (error != 0) {
2005                         return (error);
2006                 }
2007                 break;
2008         case IPV6_V6ONLY:
2009                 mutex_enter(&connp->conn_lock);
2010                 connp->conn_ipv6_v6only = onoff;
2011                 mutex_exit(&connp->conn_lock);
2012                 break;
2013         }
2014         return (0);
2015 }
2016 
2017 /* Handle IPPROTO_UDP */
2018 /* ARGSUSED1 */
2019 static int
2020 conn_opt_set_udp(conn_opt_arg_t *coa, t_scalar_t name, uint_t inlen,
2021     uchar_t *invalp, boolean_t checkonly, cred_t *cr)
2022 {
2023         conn_t          *connp = coa->coa_connp;
2024         int             *i1 = (int *)invalp;
2025         boolean_t       onoff = (*i1 == 0) ? 0 : 1;
2026         int             error;
2027 
2028         switch (name) {
2029         case UDP_ANONPRIVBIND:
2030                 if ((error = secpolicy_net_privaddr(cr, 0, IPPROTO_UDP)) != 0) {
2031                         return (error);
2032                 }
2033                 break;
2034         }
2035         if (checkonly)
2036                 return (0);
2037 
2038         /* Here we set the actual option value */
2039         mutex_enter(&connp->conn_lock);
2040         switch (name) {
2041         case UDP_ANONPRIVBIND:
2042                 connp->conn_anon_priv_bind = onoff;
2043                 break;
2044         case UDP_EXCLBIND:
2045                 connp->conn_exclbind = onoff;
2046                 break;
2047         }
2048         mutex_exit(&connp->conn_lock);
2049         return (0);
2050 }
2051 
2052 /* Handle IPPROTO_TCP */
2053 /* ARGSUSED1 */
2054 static int
2055 conn_opt_set_tcp(conn_opt_arg_t *coa, t_scalar_t name, uint_t inlen,
2056     uchar_t *invalp, boolean_t checkonly, cred_t *cr)
2057 {
2058         conn_t          *connp = coa->coa_connp;
2059         int             *i1 = (int *)invalp;
2060         boolean_t       onoff = (*i1 == 0) ? 0 : 1;
2061         int             error;
2062 
2063         switch (name) {
2064         case TCP_ANONPRIVBIND:
2065                 if ((error = secpolicy_net_privaddr(cr, 0, IPPROTO_TCP)) != 0) {
2066                         return (error);
2067                 }
2068                 break;
2069         }
2070         if (checkonly)
2071                 return (0);
2072 
2073         /* Here we set the actual option value */
2074         mutex_enter(&connp->conn_lock);
2075         switch (name) {
2076         case TCP_ANONPRIVBIND:
2077                 connp->conn_anon_priv_bind = onoff;
2078                 break;
2079         case TCP_EXCLBIND:
2080                 connp->conn_exclbind = onoff;
2081                 break;
2082         case TCP_RECVDSTADDR:
2083                 connp->conn_recv_ancillary.crb_recvdstaddr = onoff;
2084                 break;
2085         }
2086         mutex_exit(&connp->conn_lock);
2087         return (0);
2088 }
2089 
2090 int
2091 conn_getsockname(conn_t *connp, struct sockaddr *sa, uint_t *salenp)
2092 {
2093         sin_t           *sin;
2094         sin6_t          *sin6;
2095 
2096         if (connp->conn_family == AF_INET) {
2097                 if (*salenp < sizeof (sin_t))
2098                         return (EINVAL);
2099 
2100                 *salenp = sizeof (sin_t);
2101                 /* Fill zeroes and then initialize non-zero fields */
2102                 sin = (sin_t *)sa;
2103                 *sin = sin_null;
2104                 sin->sin_family = AF_INET;
2105                 if (!IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_saddr_v6) &&
2106                     !IN6_IS_ADDR_UNSPECIFIED(&connp->conn_saddr_v6)) {
2107                         sin->sin_addr.s_addr = connp->conn_saddr_v4;
2108                 } else {
2109                         /*
2110                          * INADDR_ANY
2111                          * conn_saddr is not set, we might be bound to
2112                          * broadcast/multicast. Use conn_bound_addr as
2113                          * local address instead (that could
2114                          * also still be INADDR_ANY)
2115                          */
2116                         sin->sin_addr.s_addr = connp->conn_bound_addr_v4;
2117                 }
2118                 sin->sin_port = connp->conn_lport;
2119         } else {
2120                 if (*salenp < sizeof (sin6_t))
2121                         return (EINVAL);
2122 
2123                 *salenp = sizeof (sin6_t);
2124                 /* Fill zeroes and then initialize non-zero fields */
2125                 sin6 = (sin6_t *)sa;
2126                 *sin6 = sin6_null;
2127                 sin6->sin6_family = AF_INET6;
2128                 if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_saddr_v6)) {
2129                         sin6->sin6_addr = connp->conn_saddr_v6;
2130                 } else {
2131                         /*
2132                          * conn_saddr is not set, we might be bound to
2133                          * broadcast/multicast. Use conn_bound_addr as
2134                          * local address instead (which could
2135                          * also still be unspecified)
2136                          */
2137                         sin6->sin6_addr = connp->conn_bound_addr_v6;
2138                 }
2139                 sin6->sin6_port = connp->conn_lport;
2140                 if (IN6_IS_ADDR_LINKSCOPE(&sin6->sin6_addr) &&
2141                     (connp->conn_ixa->ixa_flags & IXAF_SCOPEID_SET))
2142                         sin6->sin6_scope_id = connp->conn_ixa->ixa_scopeid;
2143         }
2144         return (0);
2145 }
2146 
2147 int
2148 conn_getpeername(conn_t *connp, struct sockaddr *sa, uint_t *salenp)
2149 {
2150         struct sockaddr_in      *sin;
2151         struct sockaddr_in6     *sin6;
2152 
2153         if (connp->conn_family == AF_INET) {
2154                 if (*salenp < sizeof (sin_t))
2155                         return (EINVAL);
2156 
2157                 *salenp = sizeof (sin_t);
2158                 /* initialize */
2159                 sin = (sin_t *)sa;
2160                 *sin = sin_null;
2161                 sin->sin_family = AF_INET;
2162                 sin->sin_addr.s_addr = connp->conn_faddr_v4;
2163                 sin->sin_port = connp->conn_fport;
2164         } else {
2165                 if (*salenp < sizeof (sin6_t))
2166                         return (EINVAL);
2167 
2168                 *salenp = sizeof (sin6_t);
2169                 /* initialize */
2170                 sin6 = (sin6_t *)sa;
2171                 *sin6 = sin6_null;
2172                 sin6->sin6_family = AF_INET6;
2173                 sin6->sin6_addr = connp->conn_faddr_v6;
2174                 sin6->sin6_port =  connp->conn_fport;
2175                 sin6->sin6_flowinfo = connp->conn_flowinfo;
2176                 if (IN6_IS_ADDR_LINKSCOPE(&sin6->sin6_addr) &&
2177                     (connp->conn_ixa->ixa_flags & IXAF_SCOPEID_SET))
2178                         sin6->sin6_scope_id = connp->conn_ixa->ixa_scopeid;
2179         }
2180         return (0);
2181 }
2182 
2183 static uint32_t cksum_massage_options_v4(ipha_t *, netstack_t *);
2184 static uint32_t cksum_massage_options_v6(ip6_t *, uint_t, netstack_t *);
2185 
2186 /*
2187  * Allocate and fill in conn_ht_iphc based on the current information
2188  * in the conn.
2189  * Normally used when we bind() and connect().
2190  * Returns failure if can't allocate memory, or if there is a problem
2191  * with a routing header/option.
2192  *
2193  * We allocate space for the transport header (ulp_hdr_len + extra) and
2194  * indicate the offset of the ulp header by setting ixa_ip_hdr_length.
2195  * The extra is there for transports that want some spare room for future
2196  * options. conn_ht_iphc_allocated is what was allocated; conn_ht_iphc_len
2197  * excludes the extra part.
2198  *
2199  * We massage an routing option/header and store the ckecksum difference
2200  * in conn_sum.
2201  *
2202  * Caller needs to update conn_wroff if desired.
2203  */
2204 int
2205 conn_build_hdr_template(conn_t *connp, uint_t ulp_hdr_length, uint_t extra,
2206     const in6_addr_t *v6src, const in6_addr_t *v6dst, uint32_t flowinfo)
2207 {
2208         ip_xmit_attr_t  *ixa = connp->conn_ixa;
2209         ip_pkt_t        *ipp = &connp->conn_xmit_ipp;
2210         uint_t          ip_hdr_length;
2211         uchar_t         *hdrs;
2212         uint_t          hdrs_len;
2213 
2214         ASSERT(MUTEX_HELD(&connp->conn_lock));
2215 
2216         if (ixa->ixa_flags & IXAF_IS_IPV4) {
2217                 ip_hdr_length = ip_total_hdrs_len_v4(ipp);
2218                 /* In case of TX label and IP options it can be too much */
2219                 if (ip_hdr_length > IP_MAX_HDR_LENGTH) {
2220                         /* Preserves existing TX errno for this */
2221                         return (EHOSTUNREACH);
2222                 }
2223         } else {
2224                 ip_hdr_length = ip_total_hdrs_len_v6(ipp);
2225         }
2226         ixa->ixa_ip_hdr_length = ip_hdr_length;
2227         hdrs_len = ip_hdr_length + ulp_hdr_length + extra;
2228         ASSERT(hdrs_len != 0);
2229 
2230         if (hdrs_len != connp->conn_ht_iphc_allocated) {
2231                 /* Allocate new before we free any old */
2232                 hdrs = kmem_alloc(hdrs_len, KM_NOSLEEP);
2233                 if (hdrs == NULL)
2234                         return (ENOMEM);
2235 
2236                 if (connp->conn_ht_iphc != NULL) {
2237                         kmem_free(connp->conn_ht_iphc,
2238                             connp->conn_ht_iphc_allocated);
2239                 }
2240                 connp->conn_ht_iphc = hdrs;
2241                 connp->conn_ht_iphc_allocated = hdrs_len;
2242         } else {
2243                 hdrs = connp->conn_ht_iphc;
2244         }
2245         hdrs_len -= extra;
2246         connp->conn_ht_iphc_len = hdrs_len;
2247 
2248         connp->conn_ht_ulp = hdrs + ip_hdr_length;
2249         connp->conn_ht_ulp_len = ulp_hdr_length;
2250 
2251         if (ixa->ixa_flags & IXAF_IS_IPV4) {
2252                 ipha_t  *ipha = (ipha_t *)hdrs;
2253 
2254                 IN6_V4MAPPED_TO_IPADDR(v6src, ipha->ipha_src);
2255                 IN6_V4MAPPED_TO_IPADDR(v6dst, ipha->ipha_dst);
2256                 ip_build_hdrs_v4(hdrs, ip_hdr_length, ipp, connp->conn_proto);
2257                 ipha->ipha_length = htons(hdrs_len);
2258                 if (ixa->ixa_flags & IXAF_PMTU_IPV4_DF)
2259                         ipha->ipha_fragment_offset_and_flags |= IPH_DF_HTONS;
2260                 else
2261                         ipha->ipha_fragment_offset_and_flags &= ~IPH_DF_HTONS;
2262 
2263                 if (ipp->ipp_fields & IPPF_IPV4_OPTIONS) {
2264                         connp->conn_sum = cksum_massage_options_v4(ipha,
2265                             connp->conn_netstack);
2266                 } else {
2267                         connp->conn_sum = 0;
2268                 }
2269         } else {
2270                 ip6_t   *ip6h = (ip6_t *)hdrs;
2271 
2272                 ip6h->ip6_src = *v6src;
2273                 ip6h->ip6_dst = *v6dst;
2274                 ip_build_hdrs_v6(hdrs, ip_hdr_length, ipp, connp->conn_proto,
2275                     flowinfo);
2276                 ip6h->ip6_plen = htons(hdrs_len - IPV6_HDR_LEN);
2277 
2278                 if (ipp->ipp_fields & IPPF_RTHDR) {
2279                         connp->conn_sum = cksum_massage_options_v6(ip6h,
2280                             ip_hdr_length, connp->conn_netstack);
2281 
2282                         /*
2283                          * Verify that the first hop isn't a mapped address.
2284                          * Routers along the path need to do this verification
2285                          * for subsequent hops.
2286                          */
2287                         if (IN6_IS_ADDR_V4MAPPED(&ip6h->ip6_dst))
2288                                 return (EADDRNOTAVAIL);
2289 
2290                 } else {
2291                         connp->conn_sum = 0;
2292                 }
2293         }
2294         return (0);
2295 }
2296 
2297 /*
2298  * Prepend a header template to data_mp based on the ip_pkt_t
2299  * and the passed in source, destination and protocol.
2300  *
2301  * Returns failure if can't allocate memory, in which case data_mp is freed.
2302  * We allocate space for the transport header (ulp_hdr_len) and
2303  * indicate the offset of the ulp header by setting ixa_ip_hdr_length.
2304  *
2305  * We massage an routing option/header and return the ckecksum difference
2306  * in *sump. This is in host byte order.
2307  *
2308  * Caller needs to update conn_wroff if desired.
2309  */
2310 mblk_t *
2311 conn_prepend_hdr(ip_xmit_attr_t *ixa, const ip_pkt_t *ipp,
2312     const in6_addr_t *v6src, const in6_addr_t *v6dst,
2313     uint8_t protocol, uint32_t flowinfo, uint_t ulp_hdr_length, mblk_t *data_mp,
2314     uint_t data_length, uint_t wroff_extra, uint32_t *sump, int *errorp)
2315 {
2316         uint_t          ip_hdr_length;
2317         uchar_t         *hdrs;
2318         uint_t          hdrs_len;
2319         mblk_t          *mp;
2320 
2321         if (ixa->ixa_flags & IXAF_IS_IPV4) {
2322                 ip_hdr_length = ip_total_hdrs_len_v4(ipp);
2323                 ASSERT(ip_hdr_length <= IP_MAX_HDR_LENGTH);
2324         } else {
2325                 ip_hdr_length = ip_total_hdrs_len_v6(ipp);
2326         }
2327         hdrs_len = ip_hdr_length + ulp_hdr_length;
2328         ASSERT(hdrs_len != 0);
2329 
2330         ixa->ixa_ip_hdr_length = ip_hdr_length;
2331 
2332         /* Can we prepend to data_mp? */
2333         if (data_mp != NULL &&
2334             data_mp->b_rptr - data_mp->b_datap->db_base >= hdrs_len &&
2335             data_mp->b_datap->db_ref == 1) {
2336                 hdrs = data_mp->b_rptr - hdrs_len;
2337                 data_mp->b_rptr = hdrs;
2338                 mp = data_mp;
2339         } else {
2340                 mp = allocb(hdrs_len + wroff_extra, BPRI_MED);
2341                 if (mp == NULL) {
2342                         freemsg(data_mp);
2343                         *errorp = ENOMEM;
2344                         return (NULL);
2345                 }
2346                 mp->b_wptr = mp->b_datap->db_lim;
2347                 hdrs = mp->b_rptr = mp->b_wptr - hdrs_len;
2348                 mp->b_cont = data_mp;
2349         }
2350 
2351         /*
2352          * Set the source in the header. ip_build_hdrs_v4/v6 will overwrite it
2353          * if PKTINFO (aka IPPF_ADDR) was set.
2354          */
2355         if (ixa->ixa_flags & IXAF_IS_IPV4) {
2356                 ipha_t *ipha = (ipha_t *)hdrs;
2357 
2358                 ASSERT(IN6_IS_ADDR_V4MAPPED(v6dst));
2359                 IN6_V4MAPPED_TO_IPADDR(v6src, ipha->ipha_src);
2360                 IN6_V4MAPPED_TO_IPADDR(v6dst, ipha->ipha_dst);
2361                 ip_build_hdrs_v4(hdrs, ip_hdr_length, ipp, protocol);
2362                 ipha->ipha_length = htons(hdrs_len + data_length);
2363                 if (ixa->ixa_flags & IXAF_PMTU_IPV4_DF)
2364                         ipha->ipha_fragment_offset_and_flags |= IPH_DF_HTONS;
2365                 else
2366                         ipha->ipha_fragment_offset_and_flags &= ~IPH_DF_HTONS;
2367 
2368                 if (ipp->ipp_fields & IPPF_IPV4_OPTIONS) {
2369                         *sump = cksum_massage_options_v4(ipha,
2370                             ixa->ixa_ipst->ips_netstack);
2371                 } else {
2372                         *sump = 0;
2373                 }
2374         } else {
2375                 ip6_t *ip6h = (ip6_t *)hdrs;
2376 
2377                 ip6h->ip6_src = *v6src;
2378                 ip6h->ip6_dst = *v6dst;
2379                 ip_build_hdrs_v6(hdrs, ip_hdr_length, ipp, protocol, flowinfo);
2380                 ip6h->ip6_plen = htons(hdrs_len + data_length - IPV6_HDR_LEN);
2381 
2382                 if (ipp->ipp_fields & IPPF_RTHDR) {
2383                         *sump = cksum_massage_options_v6(ip6h,
2384                             ip_hdr_length, ixa->ixa_ipst->ips_netstack);
2385 
2386                         /*
2387                          * Verify that the first hop isn't a mapped address.
2388                          * Routers along the path need to do this verification
2389                          * for subsequent hops.
2390                          */
2391                         if (IN6_IS_ADDR_V4MAPPED(&ip6h->ip6_dst)) {
2392                                 *errorp = EADDRNOTAVAIL;
2393                                 freemsg(mp);
2394                                 return (NULL);
2395                         }
2396                 } else {
2397                         *sump = 0;
2398                 }
2399         }
2400         return (mp);
2401 }
2402 
2403 /*
2404  * Massage a source route if any putting the first hop
2405  * in ipha_dst. Compute a starting value for the checksum which
2406  * takes into account that the original ipha_dst should be
2407  * included in the checksum but that IP will include the
2408  * first hop from the source route in the tcp checksum.
2409  */
2410 static uint32_t
2411 cksum_massage_options_v4(ipha_t *ipha, netstack_t *ns)
2412 {
2413         in_addr_t       dst;
2414         uint32_t        cksum;
2415 
2416         /* Get last hop then diff against first hop */
2417         cksum = ip_massage_options(ipha, ns);
2418         cksum = (cksum & 0xFFFF) + (cksum >> 16);
2419         dst = ipha->ipha_dst;
2420         cksum -= ((dst >> 16) + (dst & 0xffff));
2421         if ((int)cksum < 0)
2422                 cksum--;
2423         cksum = (cksum & 0xFFFF) + (cksum >> 16);
2424         cksum = (cksum & 0xFFFF) + (cksum >> 16);
2425         ASSERT(cksum < 0x10000);
2426         return (ntohs(cksum));
2427 }
2428 
2429 static uint32_t
2430 cksum_massage_options_v6(ip6_t *ip6h, uint_t ip_hdr_len, netstack_t *ns)
2431 {
2432         uint8_t         *end;
2433         ip6_rthdr_t     *rth;
2434         uint32_t        cksum;
2435 
2436         end = (uint8_t *)ip6h + ip_hdr_len;
2437         rth = ip_find_rthdr_v6(ip6h, end);
2438         if (rth == NULL)
2439                 return (0);
2440 
2441         cksum = ip_massage_options_v6(ip6h, rth, ns);
2442         cksum = (cksum & 0xFFFF) + (cksum >> 16);
2443         ASSERT(cksum < 0x10000);
2444         return (ntohs(cksum));
2445 }
2446 
2447 /*
2448  * ULPs that change the destination address need to call this for each
2449  * change to discard any state about a previous destination that might
2450  * have been multicast or multirt.
2451  */
2452 void
2453 ip_attr_newdst(ip_xmit_attr_t *ixa)
2454 {
2455         ixa->ixa_flags &= ~(IXAF_LOOPBACK_COPY | IXAF_NO_HW_CKSUM |
2456             IXAF_NO_TTL_CHANGE | IXAF_IPV6_ADD_FRAGHDR |
2457             IXAF_NO_LOOP_ZONEID_SET);
2458 }
2459 
2460 /*
2461  * Determine the nexthop which will be used.
2462  * Normally this is just the destination, but if a IPv4 source route, or
2463  * IPv6 routing header, is in the ip_pkt_t then we extract the nexthop from
2464  * there.
2465  */
2466 void
2467 ip_attr_nexthop(const ip_pkt_t *ipp, const ip_xmit_attr_t *ixa,
2468     const in6_addr_t *dst, in6_addr_t *nexthop)
2469 {
2470         if (!(ipp->ipp_fields & (IPPF_IPV4_OPTIONS|IPPF_RTHDR))) {
2471                 *nexthop = *dst;
2472                 return;
2473         }
2474         if (ixa->ixa_flags & IXAF_IS_IPV4) {
2475                 ipaddr_t v4dst;
2476                 ipaddr_t v4nexthop;
2477 
2478                 IN6_V4MAPPED_TO_IPADDR(dst, v4dst);
2479                 v4nexthop = ip_pkt_source_route_v4(ipp);
2480                 if (v4nexthop == INADDR_ANY)
2481                         v4nexthop = v4dst;
2482 
2483                 IN6_IPADDR_TO_V4MAPPED(v4nexthop, nexthop);
2484         } else {
2485                 const in6_addr_t *v6nexthop;
2486 
2487                 v6nexthop = ip_pkt_source_route_v6(ipp);
2488                 if (v6nexthop == NULL)
2489                         v6nexthop = dst;
2490 
2491                 *nexthop = *v6nexthop;
2492         }
2493 }
2494 
2495 /*
2496  * Update the ip_xmit_attr_t based the addresses, conn_xmit_ipp and conn_ixa.
2497  * If IPDF_IPSEC is set we cache the IPsec policy to handle the unconnected
2498  * case (connected latching is done in conn_connect).
2499  * Note that IPsec policy lookup requires conn_proto and conn_laddr to be
2500  * set, but doesn't otherwise use the conn_t.
2501  *
2502  * Caller must set/clear IXAF_IS_IPV4 as appropriately.
2503  * Caller must use ip_attr_nexthop() to determine the nexthop argument.
2504  *
2505  * The caller must NOT hold conn_lock (to avoid problems with ill_refrele
2506  * causing the squeue to run doing ipcl_walk grabbing conn_lock.)
2507  *
2508  * Updates laddrp and uinfo if they are non-NULL.
2509  *
2510  * TSOL notes: The callers if ip_attr_connect must check if the destination
2511  * is different than before and in that case redo conn_update_label.
2512  * The callers of conn_connect do not need that since conn_connect
2513  * performs the conn_update_label.
2514  */
2515 int
2516 ip_attr_connect(const conn_t *connp, ip_xmit_attr_t *ixa,
2517     const in6_addr_t *v6src, const in6_addr_t *v6dst,
2518     const in6_addr_t *v6nexthop, in_port_t dstport, in6_addr_t *laddrp,
2519     iulp_t *uinfo, uint32_t flags)
2520 {
2521         in6_addr_t              laddr = *v6src;
2522         int                     error;
2523 
2524         ASSERT(MUTEX_NOT_HELD(&connp->conn_lock));
2525 
2526         if (connp->conn_zone_is_global)
2527                 flags |= IPDF_ZONE_IS_GLOBAL;
2528         else
2529                 flags &= ~IPDF_ZONE_IS_GLOBAL;
2530 
2531         /*
2532          * Lookup the route to determine a source address and the uinfo.
2533          * If the ULP has a source route option then the caller will
2534          * have set v6nexthop to be the first hop.
2535          */
2536         if (ixa->ixa_flags & IXAF_IS_IPV4) {
2537                 ipaddr_t v4dst;
2538                 ipaddr_t v4src, v4nexthop;
2539 
2540                 IN6_V4MAPPED_TO_IPADDR(v6dst, v4dst);
2541                 IN6_V4MAPPED_TO_IPADDR(v6nexthop, v4nexthop);
2542                 IN6_V4MAPPED_TO_IPADDR(v6src, v4src);
2543 
2544                 if (connp->conn_unspec_src || v4src != INADDR_ANY)
2545                         flags &= ~IPDF_SELECT_SRC;
2546                 else
2547                         flags |= IPDF_SELECT_SRC;
2548 
2549                 error = ip_set_destination_v4(&v4src, v4dst, v4nexthop, ixa,
2550                     uinfo, flags, connp->conn_mac_mode);
2551                 IN6_IPADDR_TO_V4MAPPED(v4src, &laddr);
2552         } else {
2553                 if (connp->conn_unspec_src || !IN6_IS_ADDR_UNSPECIFIED(v6src))
2554                         flags &= ~IPDF_SELECT_SRC;
2555                 else
2556                         flags |= IPDF_SELECT_SRC;
2557 
2558                 error = ip_set_destination_v6(&laddr, v6dst, v6nexthop, ixa,
2559                     uinfo, flags, connp->conn_mac_mode);
2560         }
2561         /* Pass out some address even if we hit a RTF_REJECT etc */
2562         if (laddrp != NULL)
2563                 *laddrp = laddr;
2564 
2565         if (error != 0)
2566                 return (error);
2567 
2568         if (flags & IPDF_IPSEC) {
2569                 /*
2570                  * Set any IPsec policy in ixa. Routine also looks at ULP
2571                  * ports.
2572                  */
2573                 ipsec_cache_outbound_policy(connp, v6src, v6dst, dstport, ixa);
2574         }
2575         return (0);
2576 }
2577 
2578 /*
2579  * Connect the conn based on the addresses, conn_xmit_ipp and conn_ixa.
2580  * Assumes that conn_faddr and conn_fport are already set. As such it is not
2581  * usable for SCTP, since SCTP has multiple faddrs.
2582  *
2583  * Caller must hold conn_lock to provide atomic constency between the
2584  * conn_t's addresses and the ixa.
2585  * NOTE: this function drops and reaquires conn_lock since it can't be
2586  * held across ip_attr_connect/ip_set_destination.
2587  *
2588  * The caller needs to handle inserting in the receive-side fanout when
2589  * appropriate after conn_connect returns.
2590  */
2591 int
2592 conn_connect(conn_t *connp, iulp_t *uinfo, uint32_t flags)
2593 {
2594         ip_xmit_attr_t  *ixa = connp->conn_ixa;
2595         in6_addr_t      nexthop;
2596         in6_addr_t      saddr, faddr;
2597         in_port_t       fport;
2598         int             error;
2599 
2600         ASSERT(MUTEX_HELD(&connp->conn_lock));
2601 
2602         if (connp->conn_ipversion == IPV4_VERSION)
2603                 ixa->ixa_flags |= IXAF_IS_IPV4;
2604         else
2605                 ixa->ixa_flags &= ~IXAF_IS_IPV4;
2606 
2607         /* We do IPsec latching below - hence no caching in ip_attr_connect */
2608         flags &= ~IPDF_IPSEC;
2609 
2610         /* In case we had previously done an ip_attr_connect */
2611         ip_attr_newdst(ixa);
2612 
2613         /*
2614          * Determine the nexthop and copy the addresses before dropping
2615          * conn_lock.
2616          */
2617         ip_attr_nexthop(&connp->conn_xmit_ipp, connp->conn_ixa,
2618             &connp->conn_faddr_v6, &nexthop);
2619         saddr = connp->conn_saddr_v6;
2620         faddr = connp->conn_faddr_v6;
2621         fport = connp->conn_fport;
2622 
2623         mutex_exit(&connp->conn_lock);
2624         error = ip_attr_connect(connp, ixa, &saddr, &faddr, &nexthop, fport,
2625             &saddr, uinfo, flags | IPDF_VERIFY_DST);
2626         mutex_enter(&connp->conn_lock);
2627 
2628         /* Could have changed even if an error */
2629         connp->conn_saddr_v6 = saddr;
2630         if (error != 0)
2631                 return (error);
2632 
2633         /*
2634          * Check whether Trusted Solaris policy allows communication with this
2635          * host, and pretend that the destination is unreachable if not.
2636          * Compute any needed label and place it in ipp_label_v4/v6.
2637          *
2638          * Later conn_build_hdr_template() takes ipp_label_v4/v6 to form
2639          * the packet.
2640          *
2641          * TSOL Note: Any concurrent threads would pick a different ixa
2642          * (and ipp if they are to change the ipp)  so we
2643          * don't have to worry about concurrent threads.
2644          */
2645         if (is_system_labeled()) {
2646                 if (connp->conn_mlp_type != mlptSingle)
2647                         return (ECONNREFUSED);
2648 
2649                 /*
2650                  * conn_update_label will set ipp_label* which will later
2651                  * be used by conn_build_hdr_template.
2652                  */
2653                 error = conn_update_label(connp, ixa,
2654                     &connp->conn_faddr_v6, &connp->conn_xmit_ipp);
2655                 if (error != 0)
2656                         return (error);
2657         }
2658 
2659         /*
2660          * Ensure that we match on the selected local address.
2661          * This overrides conn_laddr in the case we had earlier bound to a
2662          * multicast or broadcast address.
2663          */
2664         connp->conn_laddr_v6 = connp->conn_saddr_v6;
2665 
2666         /*
2667          * Allow setting new policies.
2668          * The addresses/ports are already set, thus the IPsec policy calls
2669          * can handle their passed-in conn's.
2670          */
2671         connp->conn_policy_cached = B_FALSE;
2672 
2673         /*
2674          * Cache IPsec policy in this conn.  If we have per-socket policy,
2675          * we'll cache that.  If we don't, we'll inherit global policy.
2676          *
2677          * This is done before the caller inserts in the receive-side fanout.
2678          * Note that conn_policy_cached is set by ipsec_conn_cache_policy() even
2679          * for connections where we don't have a policy. This is to prevent
2680          * global policy lookups in the inbound path.
2681          *
2682          * If we insert before we set conn_policy_cached,
2683          * CONN_INBOUND_POLICY_PRESENT() check can still evaluate true
2684          * because global policy cound be non-empty. We normally call
2685          * ipsec_check_policy() for conn_policy_cached connections only if
2686          * conn_in_enforce_policy is set. But in this case,
2687          * conn_policy_cached can get set anytime since we made the
2688          * CONN_INBOUND_POLICY_PRESENT() check and ipsec_check_policy() is
2689          * called, which will make the above assumption false.  Thus, we
2690          * need to insert after we set conn_policy_cached.
2691          */
2692         error = ipsec_conn_cache_policy(connp,
2693             connp->conn_ipversion == IPV4_VERSION);
2694         if (error != 0)
2695                 return (error);
2696 
2697         /*
2698          * We defer to do LSO check until here since now we have better idea
2699          * whether IPsec is present. If the underlying ill is LSO capable,
2700          * copy its capability in so the ULP can decide whether to enable LSO
2701          * on this connection. So far, only TCP/IPv4 is implemented, so won't
2702          * claim LSO for IPv6.
2703          *
2704          * Currently, won't enable LSO for IRE_LOOPBACK or IRE_LOCAL, because
2705          * the receiver can not handle it. Also not to enable LSO for MULTIRT.
2706          */
2707         ixa->ixa_flags &= ~IXAF_LSO_CAPAB;
2708 
2709         ASSERT(ixa->ixa_ire != NULL);
2710         if (ixa->ixa_ipst->ips_ip_lso_outbound && (flags & IPDF_LSO) &&
2711             !(ixa->ixa_flags & IXAF_IPSEC_SECURE) &&
2712             !(ixa->ixa_ire->ire_type & (IRE_LOCAL | IRE_LOOPBACK)) &&
2713             !(ixa->ixa_ire->ire_flags & RTF_MULTIRT) &&
2714             (ixa->ixa_nce != NULL) &&
2715             ((ixa->ixa_flags & IXAF_IS_IPV4) ?
2716             ILL_LSO_TCP_IPV4_USABLE(ixa->ixa_nce->nce_ill) :
2717             ILL_LSO_TCP_IPV6_USABLE(ixa->ixa_nce->nce_ill))) {
2718                 ixa->ixa_lso_capab = *ixa->ixa_nce->nce_ill->ill_lso_capab;
2719                 ixa->ixa_flags |= IXAF_LSO_CAPAB;
2720         }
2721 
2722         /* Check whether ZEROCOPY capability is usable for this connection. */
2723         ixa->ixa_flags &= ~IXAF_ZCOPY_CAPAB;
2724 
2725         if ((flags & IPDF_ZCOPY) &&
2726             !(ixa->ixa_flags & IXAF_IPSEC_SECURE) &&
2727             !(ixa->ixa_ire->ire_type & (IRE_LOCAL | IRE_LOOPBACK)) &&
2728             !(ixa->ixa_ire->ire_flags & RTF_MULTIRT) &&
2729             (ixa->ixa_nce != NULL) &&
2730             ILL_ZCOPY_USABLE(ixa->ixa_nce->nce_ill)) {
2731                 ixa->ixa_flags |= IXAF_ZCOPY_CAPAB;
2732         }
2733         return (0);
2734 }
2735 
2736 /*
2737  * Predicates to check if the addresses match conn_last*
2738  */
2739 
2740 /*
2741  * Compare the conn against an address.
2742  * If using mapped addresses on AF_INET6 sockets, use the _v6 function
2743  */
2744 boolean_t
2745 conn_same_as_last_v4(conn_t *connp, sin_t *sin)
2746 {
2747         ASSERT(connp->conn_family == AF_INET);
2748         return (sin->sin_addr.s_addr == connp->conn_v4lastdst &&
2749             sin->sin_port == connp->conn_lastdstport);
2750 }
2751 
2752 /*
2753  * Compare, including for mapped addresses
2754  */
2755 boolean_t
2756 conn_same_as_last_v6(conn_t *connp, sin6_t *sin6)
2757 {
2758         return (IN6_ARE_ADDR_EQUAL(&connp->conn_v6lastdst, &sin6->sin6_addr) &&
2759             sin6->sin6_port == connp->conn_lastdstport &&
2760             sin6->sin6_flowinfo == connp->conn_lastflowinfo &&
2761             sin6->sin6_scope_id == connp->conn_lastscopeid);
2762 }
2763 
2764 /*
2765  * Compute a label and place it in the ip_packet_t.
2766  * Handles IPv4 and IPv6.
2767  * The caller should have a correct ixa_tsl and ixa_zoneid and have
2768  * already called conn_connect or ip_attr_connect to ensure that tsol_check_dest
2769  * has been called.
2770  */
2771 int
2772 conn_update_label(const conn_t *connp, const ip_xmit_attr_t *ixa,
2773     const in6_addr_t *v6dst, ip_pkt_t *ipp)
2774 {
2775         int             err;
2776         ipaddr_t        v4dst;
2777 
2778         if (IN6_IS_ADDR_V4MAPPED(v6dst)) {
2779                 uchar_t         opt_storage[IP_MAX_OPT_LENGTH];
2780 
2781                 IN6_V4MAPPED_TO_IPADDR(v6dst, v4dst);
2782 
2783                 err = tsol_compute_label_v4(ixa->ixa_tsl, ixa->ixa_zoneid,
2784                     v4dst, opt_storage, ixa->ixa_ipst);
2785                 if (err == 0) {
2786                         /* Length contained in opt_storage[IPOPT_OLEN] */
2787                         err = optcom_pkt_set(opt_storage,
2788                             opt_storage[IPOPT_OLEN],
2789                             (uchar_t **)&ipp->ipp_label_v4,
2790                             &ipp->ipp_label_len_v4);
2791                 }
2792                 if (err != 0) {
2793                         DTRACE_PROBE4(tx__ip__log__info__updatelabel,
2794                             char *, "conn(1) failed to update options(2) "
2795                             "on ixa(3)",
2796                             conn_t *, connp, char *, opt_storage,
2797                             ip_xmit_attr_t *, ixa);
2798                 }
2799                 if (ipp->ipp_label_len_v4 != 0)
2800                         ipp->ipp_fields |= IPPF_LABEL_V4;
2801                 else
2802                         ipp->ipp_fields &= ~IPPF_LABEL_V4;
2803         } else {
2804                 uchar_t         opt_storage[TSOL_MAX_IPV6_OPTION];
2805                 uint_t          optlen;
2806 
2807                 err = tsol_compute_label_v6(ixa->ixa_tsl, ixa->ixa_zoneid,
2808                     v6dst, opt_storage, ixa->ixa_ipst);
2809                 if (err == 0) {
2810                         /*
2811                          * Note that ipp_label_v6 is just the option - not
2812                          * the hopopts extension header.
2813                          *
2814                          * Length contained in opt_storage[IPOPT_OLEN], but
2815                          * that doesn't include the two byte options header.
2816                          */
2817                         optlen = opt_storage[IPOPT_OLEN];
2818                         if (optlen != 0)
2819                                 optlen += 2;
2820 
2821                         err = optcom_pkt_set(opt_storage, optlen,
2822                             (uchar_t **)&ipp->ipp_label_v6,
2823                             &ipp->ipp_label_len_v6);
2824                 }
2825                 if (err != 0) {
2826                         DTRACE_PROBE4(tx__ip__log__info__updatelabel,
2827                             char *, "conn(1) failed to update options(2) "
2828                             "on ixa(3)",
2829                             conn_t *, connp, char *, opt_storage,
2830                             ip_xmit_attr_t *, ixa);
2831                 }
2832                 if (ipp->ipp_label_len_v6 != 0)
2833                         ipp->ipp_fields |= IPPF_LABEL_V6;
2834                 else
2835                         ipp->ipp_fields &= ~IPPF_LABEL_V6;
2836         }
2837         return (err);
2838 }
2839 
2840 /*
2841  * Inherit all options settings from the parent/listener to the eager.
2842  * Returns zero on success; ENOMEM if memory allocation failed.
2843  *
2844  * We assume that the eager has not had any work done i.e., the conn_ixa
2845  * and conn_xmit_ipp are all zero.
2846  * Furthermore we assume that no other thread can access the eager (because
2847  * it isn't inserted in any fanout list).
2848  */
2849 int
2850 conn_inherit_parent(conn_t *lconnp, conn_t *econnp)
2851 {
2852         cred_t  *credp;
2853         int     err;
2854         void    *notify_cookie;
2855         uint32_t xmit_hint;
2856 
2857         econnp->conn_family = lconnp->conn_family;
2858         econnp->conn_ipv6_v6only = lconnp->conn_ipv6_v6only;
2859         econnp->conn_wq = lconnp->conn_wq;
2860         econnp->conn_rq = lconnp->conn_rq;
2861 
2862         /*
2863          * Make a safe copy of the transmit attributes.
2864          * conn_connect will later be used by the caller to setup the ire etc.
2865          */
2866         ASSERT(econnp->conn_ixa->ixa_refcnt == 1);
2867         ASSERT(econnp->conn_ixa->ixa_ire == NULL);
2868         ASSERT(econnp->conn_ixa->ixa_dce == NULL);
2869         ASSERT(econnp->conn_ixa->ixa_nce == NULL);
2870 
2871         /* Preserve ixa_notify_cookie and xmit_hint */
2872         notify_cookie = econnp->conn_ixa->ixa_notify_cookie;
2873         xmit_hint = econnp->conn_ixa->ixa_xmit_hint;
2874         ixa_safe_copy(lconnp->conn_ixa, econnp->conn_ixa);
2875         econnp->conn_ixa->ixa_notify_cookie = notify_cookie;
2876         econnp->conn_ixa->ixa_xmit_hint = xmit_hint;
2877 
2878         econnp->conn_bound_if = lconnp->conn_bound_if;
2879         econnp->conn_incoming_ifindex = lconnp->conn_incoming_ifindex;
2880 
2881         /* Inherit all RECV options */
2882         econnp->conn_recv_ancillary = lconnp->conn_recv_ancillary;
2883 
2884         err = ip_pkt_copy(&lconnp->conn_xmit_ipp, &econnp->conn_xmit_ipp,
2885             KM_NOSLEEP);
2886         if (err != 0)
2887                 return (err);
2888 
2889         econnp->conn_zoneid = lconnp->conn_zoneid;
2890         econnp->conn_allzones = lconnp->conn_allzones;
2891 
2892         /* This is odd. Pick a flowlabel for each connection instead? */
2893         econnp->conn_flowinfo = lconnp->conn_flowinfo;
2894 
2895         econnp->conn_default_ttl = lconnp->conn_default_ttl;
2896 
2897         /*
2898          * TSOL: tsol_input_proc() needs the eager's cred before the
2899          * eager is accepted
2900          */
2901         ASSERT(lconnp->conn_cred != NULL);
2902         econnp->conn_cred = credp = lconnp->conn_cred;
2903         crhold(credp);
2904         econnp->conn_cpid = lconnp->conn_cpid;
2905         econnp->conn_open_time = ddi_get_lbolt64();
2906 
2907         /*
2908          * Cache things in the ixa without any refhold.
2909          * Listener might not have set up ixa_cred
2910          */
2911         ASSERT(!(econnp->conn_ixa->ixa_free_flags & IXA_FREE_CRED));
2912         econnp->conn_ixa->ixa_cred = econnp->conn_cred;
2913         econnp->conn_ixa->ixa_cpid = econnp->conn_cpid;
2914         if (is_system_labeled())
2915                 econnp->conn_ixa->ixa_tsl = crgetlabel(econnp->conn_cred);
2916 
2917         /*
2918          * If the caller has the process-wide flag set, then default to MAC
2919          * exempt mode.  This allows read-down to unlabeled hosts.
2920          */
2921         if (getpflags(NET_MAC_AWARE, credp) != 0)
2922                 econnp->conn_mac_mode = CONN_MAC_AWARE;
2923 
2924         econnp->conn_zone_is_global = lconnp->conn_zone_is_global;
2925 
2926         /*
2927          * We eliminate the need for sockfs to send down a T_SVR4_OPTMGMT_REQ
2928          * via soaccept()->soinheritoptions() which essentially applies
2929          * all the listener options to the new connection. The options that we
2930          * need to take care of are:
2931          * SO_DEBUG, SO_REUSEADDR, SO_KEEPALIVE, SO_DONTROUTE, SO_BROADCAST,
2932          * SO_USELOOPBACK, SO_OOBINLINE, SO_DGRAM_ERRIND, SO_LINGER,
2933          * SO_SNDBUF, SO_RCVBUF.
2934          *
2935          * SO_RCVBUF:   conn_rcvbuf is set.
2936          * SO_SNDBUF:   conn_sndbuf is set.
2937          */
2938 
2939         /* Could we define a struct and use a struct copy for this? */
2940         econnp->conn_sndbuf = lconnp->conn_sndbuf;
2941         econnp->conn_rcvbuf = lconnp->conn_rcvbuf;
2942         econnp->conn_sndlowat = lconnp->conn_sndlowat;
2943         econnp->conn_rcvlowat = lconnp->conn_rcvlowat;
2944         econnp->conn_dgram_errind = lconnp->conn_dgram_errind;
2945         econnp->conn_oobinline = lconnp->conn_oobinline;
2946         econnp->conn_debug = lconnp->conn_debug;
2947         econnp->conn_keepalive = lconnp->conn_keepalive;
2948         econnp->conn_linger = lconnp->conn_linger;
2949         econnp->conn_lingertime = lconnp->conn_lingertime;
2950 
2951         /* Set the IP options */
2952         econnp->conn_broadcast = lconnp->conn_broadcast;
2953         econnp->conn_useloopback = lconnp->conn_useloopback;
2954         econnp->conn_reuseaddr = lconnp->conn_reuseaddr;
2955         return (0);
2956 }