1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
  24  */
  25 /* Copyright (c) 1990 Mentat Inc. */
  26 
  27 #include <sys/types.h>
  28 #include <sys/stream.h>
  29 #include <sys/strsun.h>
  30 #define _SUN_TPI_VERSION 2
  31 #include <sys/tihdr.h>
  32 #include <sys/xti_inet.h>
  33 #include <sys/ucred.h>
  34 #include <sys/zone.h>
  35 #include <sys/ddi.h>
  36 #include <sys/sunddi.h>
  37 #include <sys/cmn_err.h>
  38 #include <sys/debug.h>
  39 #include <sys/atomic.h>
  40 #include <sys/policy.h>
  41 
  42 #include <sys/systm.h>
  43 #include <sys/param.h>
  44 #include <sys/kmem.h>
  45 #include <sys/sdt.h>
  46 #include <sys/socket.h>
  47 #include <sys/ethernet.h>
  48 #include <sys/mac.h>
  49 #include <net/if.h>
  50 #include <net/if_types.h>
  51 #include <net/if_arp.h>
  52 #include <net/route.h>
  53 #include <sys/sockio.h>
  54 #include <netinet/in.h>
  55 #include <net/if_dl.h>
  56 
  57 #include <inet/common.h>
  58 #include <inet/mi.h>
  59 #include <inet/mib2.h>
  60 #include <inet/nd.h>
  61 #include <inet/arp.h>
  62 #include <inet/snmpcom.h>
  63 #include <inet/kstatcom.h>
  64 
  65 #include <netinet/igmp_var.h>
  66 #include <netinet/ip6.h>
  67 #include <netinet/icmp6.h>
  68 #include <netinet/sctp.h>
  69 
  70 #include <inet/ip.h>
  71 #include <inet/ip_impl.h>
  72 #include <inet/ip6.h>
  73 #include <inet/ip6_asp.h>
  74 #include <inet/tcp.h>
  75 #include <inet/ip_multi.h>
  76 #include <inet/ip_if.h>
  77 #include <inet/ip_ire.h>
  78 #include <inet/ip_ftable.h>
  79 #include <inet/ip_rts.h>
  80 #include <inet/optcom.h>
  81 #include <inet/ip_ndp.h>
  82 #include <inet/ip_listutils.h>
  83 #include <netinet/igmp.h>
  84 #include <netinet/ip_mroute.h>
  85 #include <netinet/udp.h>
  86 #include <inet/ipp_common.h>
  87 
  88 #include <net/pfkeyv2.h>
  89 #include <inet/sadb.h>
  90 #include <inet/ipsec_impl.h>
  91 #include <inet/ipdrop.h>
  92 #include <inet/ip_netinfo.h>
  93 
  94 #include <inet/ipclassifier.h>
  95 #include <inet/sctp_ip.h>
  96 #include <inet/sctp/sctp_impl.h>
  97 #include <inet/udp_impl.h>
  98 #include <sys/sunddi.h>
  99 
 100 #include <sys/tsol/label.h>
 101 #include <sys/tsol/tnet.h>
 102 
 103 /*
 104  * Return how much size is needed for the different ancillary data items
 105  */
 106 uint_t
 107 conn_recvancillary_size(conn_t *connp, crb_t recv_ancillary,
 108     ip_recv_attr_t *ira, mblk_t *mp, ip_pkt_t *ipp)
 109 {
 110         uint_t          ancil_size;
 111         ip_stack_t      *ipst = connp->conn_netstack->netstack_ip;
 112 
 113         /*
 114          * If IP_RECVDSTADDR is set we include the destination IP
 115          * address as an option. With IP_RECVOPTS we include all
 116          * the IP options.
 117          */
 118         ancil_size = 0;
 119         if (recv_ancillary.crb_recvdstaddr &&
 120             (ira->ira_flags & IRAF_IS_IPV4)) {
 121                 ancil_size += sizeof (struct T_opthdr) +
 122                     sizeof (struct in_addr);
 123                 IP_STAT(ipst, conn_in_recvdstaddr);
 124         }
 125 
 126         /*
 127          * ip_recvpktinfo is used for both AF_INET and AF_INET6 but
 128          * are different
 129          */
 130         if (recv_ancillary.crb_ip_recvpktinfo &&
 131             connp->conn_family == AF_INET) {
 132                 ancil_size += sizeof (struct T_opthdr) +
 133                     sizeof (struct in_pktinfo);
 134                 IP_STAT(ipst, conn_in_recvpktinfo);
 135         }
 136 
 137         if ((recv_ancillary.crb_recvopts) &&
 138             (ipp->ipp_fields & IPPF_IPV4_OPTIONS)) {
 139                 ancil_size += sizeof (struct T_opthdr) +
 140                     ipp->ipp_ipv4_options_len;
 141                 IP_STAT(ipst, conn_in_recvopts);
 142         }
 143 
 144         if (recv_ancillary.crb_recvslla) {
 145                 ip_stack_t *ipst = connp->conn_netstack->netstack_ip;
 146                 ill_t *ill;
 147 
 148                 /* Make sure ira_l2src is setup if not already */
 149                 if (!(ira->ira_flags & IRAF_L2SRC_SET)) {
 150                         ill = ill_lookup_on_ifindex(ira->ira_rifindex, B_FALSE,
 151                             ipst);
 152                         if (ill != NULL) {
 153                                 ip_setl2src(mp, ira, ill);
 154                                 ill_refrele(ill);
 155                         }
 156                 }
 157                 ancil_size += sizeof (struct T_opthdr) +
 158                     sizeof (struct sockaddr_dl);
 159                 IP_STAT(ipst, conn_in_recvslla);
 160         }
 161 
 162         if (recv_ancillary.crb_recvif) {
 163                 ancil_size += sizeof (struct T_opthdr) + sizeof (uint_t);
 164                 IP_STAT(ipst, conn_in_recvif);
 165         }
 166 
 167         /*
 168          * ip_recvpktinfo is used for both AF_INET and AF_INET6 but
 169          * are different
 170          */
 171         if (recv_ancillary.crb_ip_recvpktinfo &&
 172             connp->conn_family == AF_INET6) {
 173                 ancil_size += sizeof (struct T_opthdr) +
 174                     sizeof (struct in6_pktinfo);
 175                 IP_STAT(ipst, conn_in_recvpktinfo);
 176         }
 177 
 178         if (recv_ancillary.crb_ipv6_recvhoplimit) {
 179                 ancil_size += sizeof (struct T_opthdr) + sizeof (int);
 180                 IP_STAT(ipst, conn_in_recvhoplimit);
 181         }
 182 
 183         if (recv_ancillary.crb_ipv6_recvtclass) {
 184                 ancil_size += sizeof (struct T_opthdr) + sizeof (int);
 185                 IP_STAT(ipst, conn_in_recvtclass);
 186         }
 187 
 188         if (recv_ancillary.crb_ipv6_recvhopopts &&
 189             (ipp->ipp_fields & IPPF_HOPOPTS)) {
 190                 ancil_size += sizeof (struct T_opthdr) + ipp->ipp_hopoptslen;
 191                 IP_STAT(ipst, conn_in_recvhopopts);
 192         }
 193         /*
 194          * To honor RFC3542 when an application asks for both IPV6_RECVDSTOPTS
 195          * and IPV6_RECVRTHDR, we pass up the item rthdrdstopts (the destination
 196          * options that appear before a routing header.
 197          * We also pass them up if IPV6_RECVRTHDRDSTOPTS is set.
 198          */
 199         if (ipp->ipp_fields & IPPF_RTHDRDSTOPTS) {
 200                 if (recv_ancillary.crb_ipv6_recvrthdrdstopts ||
 201                     (recv_ancillary.crb_ipv6_recvdstopts &&
 202                     recv_ancillary.crb_ipv6_recvrthdr)) {
 203                         ancil_size += sizeof (struct T_opthdr) +
 204                             ipp->ipp_rthdrdstoptslen;
 205                         IP_STAT(ipst, conn_in_recvrthdrdstopts);
 206                 }
 207         }
 208         if ((recv_ancillary.crb_ipv6_recvrthdr) &&
 209             (ipp->ipp_fields & IPPF_RTHDR)) {
 210                 ancil_size += sizeof (struct T_opthdr) + ipp->ipp_rthdrlen;
 211                 IP_STAT(ipst, conn_in_recvrthdr);
 212         }
 213         if ((recv_ancillary.crb_ipv6_recvdstopts ||
 214             recv_ancillary.crb_old_ipv6_recvdstopts) &&
 215             (ipp->ipp_fields & IPPF_DSTOPTS)) {
 216                 ancil_size += sizeof (struct T_opthdr) + ipp->ipp_dstoptslen;
 217                 IP_STAT(ipst, conn_in_recvdstopts);
 218         }
 219         if (recv_ancillary.crb_recvucred && ira->ira_cred != NULL) {
 220                 ancil_size += sizeof (struct T_opthdr) +
 221                     ucredminsize(ira->ira_cred);
 222                 IP_STAT(ipst, conn_in_recvucred);
 223         }
 224 
 225         /*
 226          * If SO_TIMESTAMP is set allocate the appropriate sized
 227          * buffer. Since gethrestime() expects a pointer aligned
 228          * argument, we allocate space necessary for extra
 229          * alignment (even though it might not be used).
 230          */
 231         if (recv_ancillary.crb_timestamp) {
 232                 ancil_size += sizeof (struct T_opthdr) +
 233                     sizeof (timestruc_t) + _POINTER_ALIGNMENT;
 234                 IP_STAT(ipst, conn_in_timestamp);
 235         }
 236 
 237         /*
 238          * If IP_RECVTTL is set allocate the appropriate sized buffer
 239          */
 240         if (recv_ancillary.crb_recvttl &&
 241             (ira->ira_flags & IRAF_IS_IPV4)) {
 242                 ancil_size += sizeof (struct T_opthdr) + sizeof (uint8_t);
 243                 IP_STAT(ipst, conn_in_recvttl);
 244         }
 245 
 246         return (ancil_size);
 247 }
 248 
 249 /*
 250  * Lay down the ancillary data items at "ancil_buf".
 251  * Assumes caller has used conn_recvancillary_size to allocate a sufficiently
 252  * large buffer - ancil_size.
 253  */
 254 void
 255 conn_recvancillary_add(conn_t *connp, crb_t recv_ancillary,
 256     ip_recv_attr_t *ira, ip_pkt_t *ipp, uchar_t *ancil_buf, uint_t ancil_size)
 257 {
 258         /*
 259          * Copy in destination address before options to avoid
 260          * any padding issues.
 261          */
 262         if (recv_ancillary.crb_recvdstaddr &&
 263             (ira->ira_flags & IRAF_IS_IPV4)) {
 264                 struct T_opthdr *toh;
 265                 ipaddr_t *dstptr;
 266 
 267                 toh = (struct T_opthdr *)ancil_buf;
 268                 toh->level = IPPROTO_IP;
 269                 toh->name = IP_RECVDSTADDR;
 270                 toh->len = sizeof (struct T_opthdr) + sizeof (ipaddr_t);
 271                 toh->status = 0;
 272                 ancil_buf += sizeof (struct T_opthdr);
 273                 dstptr = (ipaddr_t *)ancil_buf;
 274                 *dstptr = ipp->ipp_addr_v4;
 275                 ancil_buf += sizeof (ipaddr_t);
 276                 ancil_size -= toh->len;
 277         }
 278 
 279         /*
 280          * ip_recvpktinfo is used for both AF_INET and AF_INET6 but
 281          * are different
 282          */
 283         if (recv_ancillary.crb_ip_recvpktinfo &&
 284             connp->conn_family == AF_INET) {
 285                 ip_stack_t *ipst = connp->conn_netstack->netstack_ip;
 286                 struct T_opthdr *toh;
 287                 struct in_pktinfo *pktinfop;
 288                 ill_t *ill;
 289                 ipif_t *ipif;
 290 
 291                 toh = (struct T_opthdr *)ancil_buf;
 292                 toh->level = IPPROTO_IP;
 293                 toh->name = IP_PKTINFO;
 294                 toh->len = sizeof (struct T_opthdr) + sizeof (*pktinfop);
 295                 toh->status = 0;
 296                 ancil_buf += sizeof (struct T_opthdr);
 297                 pktinfop = (struct in_pktinfo *)ancil_buf;
 298 
 299                 pktinfop->ipi_ifindex = ira->ira_ruifindex;
 300                 pktinfop->ipi_spec_dst.s_addr = INADDR_ANY;
 301 
 302                 /* Find a good address to report */
 303                 ill = ill_lookup_on_ifindex(ira->ira_ruifindex, B_FALSE, ipst);
 304                 if (ill != NULL) {
 305                         ipif = ipif_good_addr(ill, IPCL_ZONEID(connp));
 306                         if (ipif != NULL) {
 307                                 pktinfop->ipi_spec_dst.s_addr =
 308                                     ipif->ipif_lcl_addr;
 309                                 ipif_refrele(ipif);
 310                         }
 311                         ill_refrele(ill);
 312                 }
 313                 pktinfop->ipi_addr.s_addr = ipp->ipp_addr_v4;
 314                 ancil_buf += sizeof (struct in_pktinfo);
 315                 ancil_size -= toh->len;
 316         }
 317 
 318         if ((recv_ancillary.crb_recvopts) &&
 319             (ipp->ipp_fields & IPPF_IPV4_OPTIONS)) {
 320                 struct T_opthdr *toh;
 321 
 322                 toh = (struct T_opthdr *)ancil_buf;
 323                 toh->level = IPPROTO_IP;
 324                 toh->name = IP_RECVOPTS;
 325                 toh->len = sizeof (struct T_opthdr) + ipp->ipp_ipv4_options_len;
 326                 toh->status = 0;
 327                 ancil_buf += sizeof (struct T_opthdr);
 328                 bcopy(ipp->ipp_ipv4_options, ancil_buf,
 329                     ipp->ipp_ipv4_options_len);
 330                 ancil_buf += ipp->ipp_ipv4_options_len;
 331                 ancil_size -= toh->len;
 332         }
 333 
 334         if (recv_ancillary.crb_recvslla) {
 335                 ip_stack_t *ipst = connp->conn_netstack->netstack_ip;
 336                 struct T_opthdr *toh;
 337                 struct sockaddr_dl *dstptr;
 338                 ill_t *ill;
 339                 int alen = 0;
 340 
 341                 ill = ill_lookup_on_ifindex(ira->ira_rifindex, B_FALSE, ipst);
 342                 if (ill != NULL)
 343                         alen = ill->ill_phys_addr_length;
 344 
 345                 /*
 346                  * For loopback multicast and broadcast the packet arrives
 347                  * with ira_ruifdex being the physical interface, but
 348                  * ira_l2src is all zero since ip_postfrag_loopback doesn't
 349                  * know our l2src. We don't report the address in that case.
 350                  */
 351                 if (ira->ira_flags & IRAF_LOOPBACK)
 352                         alen = 0;
 353 
 354                 toh = (struct T_opthdr *)ancil_buf;
 355                 toh->level = IPPROTO_IP;
 356                 toh->name = IP_RECVSLLA;
 357                 toh->len = sizeof (struct T_opthdr) +
 358                     sizeof (struct sockaddr_dl);
 359                 toh->status = 0;
 360                 ancil_buf += sizeof (struct T_opthdr);
 361                 dstptr = (struct sockaddr_dl *)ancil_buf;
 362                 dstptr->sdl_family = AF_LINK;
 363                 dstptr->sdl_index = ira->ira_ruifindex;
 364                 if (ill != NULL)
 365                         dstptr->sdl_type = ill->ill_type;
 366                 else
 367                         dstptr->sdl_type = 0;
 368                 dstptr->sdl_nlen = 0;
 369                 dstptr->sdl_alen = alen;
 370                 dstptr->sdl_slen = 0;
 371                 bcopy(ira->ira_l2src, dstptr->sdl_data, alen);
 372                 ancil_buf += sizeof (struct sockaddr_dl);
 373                 ancil_size -= toh->len;
 374                 if (ill != NULL)
 375                         ill_refrele(ill);
 376         }
 377 
 378         if (recv_ancillary.crb_recvif) {
 379                 struct T_opthdr *toh;
 380                 uint_t          *dstptr;
 381 
 382                 toh = (struct T_opthdr *)ancil_buf;
 383                 toh->level = IPPROTO_IP;
 384                 toh->name = IP_RECVIF;
 385                 toh->len = sizeof (struct T_opthdr) + sizeof (uint_t);
 386                 toh->status = 0;
 387                 ancil_buf += sizeof (struct T_opthdr);
 388                 dstptr = (uint_t *)ancil_buf;
 389                 *dstptr = ira->ira_ruifindex;
 390                 ancil_buf += sizeof (uint_t);
 391                 ancil_size -= toh->len;
 392         }
 393 
 394         /*
 395          * ip_recvpktinfo is used for both AF_INET and AF_INET6 but
 396          * are different
 397          */
 398         if (recv_ancillary.crb_ip_recvpktinfo &&
 399             connp->conn_family == AF_INET6) {
 400                 struct T_opthdr *toh;
 401                 struct in6_pktinfo *pkti;
 402 
 403                 toh = (struct T_opthdr *)ancil_buf;
 404                 toh->level = IPPROTO_IPV6;
 405                 toh->name = IPV6_PKTINFO;
 406                 toh->len = sizeof (struct T_opthdr) + sizeof (*pkti);
 407                 toh->status = 0;
 408                 ancil_buf += sizeof (struct T_opthdr);
 409                 pkti = (struct in6_pktinfo *)ancil_buf;
 410                 if (ira->ira_flags & IRAF_IS_IPV4) {
 411                         IN6_IPADDR_TO_V4MAPPED(ipp->ipp_addr_v4,
 412                             &pkti->ipi6_addr);
 413                 } else {
 414                         pkti->ipi6_addr = ipp->ipp_addr;
 415                 }
 416                 pkti->ipi6_ifindex = ira->ira_ruifindex;
 417 
 418                 ancil_buf += sizeof (*pkti);
 419                 ancil_size -= toh->len;
 420         }
 421         if (recv_ancillary.crb_ipv6_recvhoplimit) {
 422                 struct T_opthdr *toh;
 423 
 424                 toh = (struct T_opthdr *)ancil_buf;
 425                 toh->level = IPPROTO_IPV6;
 426                 toh->name = IPV6_HOPLIMIT;
 427                 toh->len = sizeof (struct T_opthdr) + sizeof (uint_t);
 428                 toh->status = 0;
 429                 ancil_buf += sizeof (struct T_opthdr);
 430                 *(uint_t *)ancil_buf = ipp->ipp_hoplimit;
 431                 ancil_buf += sizeof (uint_t);
 432                 ancil_size -= toh->len;
 433         }
 434         if (recv_ancillary.crb_ipv6_recvtclass) {
 435                 struct T_opthdr *toh;
 436 
 437                 toh = (struct T_opthdr *)ancil_buf;
 438                 toh->level = IPPROTO_IPV6;
 439                 toh->name = IPV6_TCLASS;
 440                 toh->len = sizeof (struct T_opthdr) + sizeof (uint_t);
 441                 toh->status = 0;
 442                 ancil_buf += sizeof (struct T_opthdr);
 443 
 444                 if (ira->ira_flags & IRAF_IS_IPV4)
 445                         *(uint_t *)ancil_buf = ipp->ipp_type_of_service;
 446                 else
 447                         *(uint_t *)ancil_buf = ipp->ipp_tclass;
 448                 ancil_buf += sizeof (uint_t);
 449                 ancil_size -= toh->len;
 450         }
 451         if (recv_ancillary.crb_ipv6_recvhopopts &&
 452             (ipp->ipp_fields & IPPF_HOPOPTS)) {
 453                 struct T_opthdr *toh;
 454 
 455                 toh = (struct T_opthdr *)ancil_buf;
 456                 toh->level = IPPROTO_IPV6;
 457                 toh->name = IPV6_HOPOPTS;
 458                 toh->len = sizeof (struct T_opthdr) + ipp->ipp_hopoptslen;
 459                 toh->status = 0;
 460                 ancil_buf += sizeof (struct T_opthdr);
 461                 bcopy(ipp->ipp_hopopts, ancil_buf, ipp->ipp_hopoptslen);
 462                 ancil_buf += ipp->ipp_hopoptslen;
 463                 ancil_size -= toh->len;
 464         }
 465         /*
 466          * To honor RFC3542 when an application asks for both IPV6_RECVDSTOPTS
 467          * and IPV6_RECVRTHDR, we pass up the item rthdrdstopts (the destination
 468          * options that appear before a routing header.
 469          * We also pass them up if IPV6_RECVRTHDRDSTOPTS is set.
 470          */
 471         if (ipp->ipp_fields & IPPF_RTHDRDSTOPTS) {
 472                 if (recv_ancillary.crb_ipv6_recvrthdrdstopts ||
 473                     (recv_ancillary.crb_ipv6_recvdstopts &&
 474                     recv_ancillary.crb_ipv6_recvrthdr)) {
 475                         struct T_opthdr *toh;
 476 
 477                         toh = (struct T_opthdr *)ancil_buf;
 478                         toh->level = IPPROTO_IPV6;
 479                         toh->name = IPV6_DSTOPTS;
 480                         toh->len = sizeof (struct T_opthdr) +
 481                             ipp->ipp_rthdrdstoptslen;
 482                         toh->status = 0;
 483                         ancil_buf += sizeof (struct T_opthdr);
 484                         bcopy(ipp->ipp_rthdrdstopts, ancil_buf,
 485                             ipp->ipp_rthdrdstoptslen);
 486                         ancil_buf += ipp->ipp_rthdrdstoptslen;
 487                         ancil_size -= toh->len;
 488                 }
 489         }
 490         if (recv_ancillary.crb_ipv6_recvrthdr &&
 491             (ipp->ipp_fields & IPPF_RTHDR)) {
 492                 struct T_opthdr *toh;
 493 
 494                 toh = (struct T_opthdr *)ancil_buf;
 495                 toh->level = IPPROTO_IPV6;
 496                 toh->name = IPV6_RTHDR;
 497                 toh->len = sizeof (struct T_opthdr) + ipp->ipp_rthdrlen;
 498                 toh->status = 0;
 499                 ancil_buf += sizeof (struct T_opthdr);
 500                 bcopy(ipp->ipp_rthdr, ancil_buf, ipp->ipp_rthdrlen);
 501                 ancil_buf += ipp->ipp_rthdrlen;
 502                 ancil_size -= toh->len;
 503         }
 504         if ((recv_ancillary.crb_ipv6_recvdstopts ||
 505             recv_ancillary.crb_old_ipv6_recvdstopts) &&
 506             (ipp->ipp_fields & IPPF_DSTOPTS)) {
 507                 struct T_opthdr *toh;
 508 
 509                 toh = (struct T_opthdr *)ancil_buf;
 510                 toh->level = IPPROTO_IPV6;
 511                 toh->name = IPV6_DSTOPTS;
 512                 toh->len = sizeof (struct T_opthdr) + ipp->ipp_dstoptslen;
 513                 toh->status = 0;
 514                 ancil_buf += sizeof (struct T_opthdr);
 515                 bcopy(ipp->ipp_dstopts, ancil_buf, ipp->ipp_dstoptslen);
 516                 ancil_buf += ipp->ipp_dstoptslen;
 517                 ancil_size -= toh->len;
 518         }
 519 
 520         if (recv_ancillary.crb_recvucred && ira->ira_cred != NULL) {
 521                 struct T_opthdr *toh;
 522                 cred_t          *rcr = connp->conn_cred;
 523 
 524                 toh = (struct T_opthdr *)ancil_buf;
 525                 toh->level = SOL_SOCKET;
 526                 toh->name = SCM_UCRED;
 527                 toh->len = sizeof (struct T_opthdr) +
 528                     ucredminsize(ira->ira_cred);
 529                 toh->status = 0;
 530                 (void) cred2ucred(ira->ira_cred, ira->ira_cpid, &toh[1], rcr);
 531                 ancil_buf += toh->len;
 532                 ancil_size -= toh->len;
 533         }
 534         if (recv_ancillary.crb_timestamp) {
 535                 struct  T_opthdr *toh;
 536 
 537                 toh = (struct T_opthdr *)ancil_buf;
 538                 toh->level = SOL_SOCKET;
 539                 toh->name = SCM_TIMESTAMP;
 540                 toh->len = sizeof (struct T_opthdr) +
 541                     sizeof (timestruc_t) + _POINTER_ALIGNMENT;
 542                 toh->status = 0;
 543                 ancil_buf += sizeof (struct T_opthdr);
 544                 /* Align for gethrestime() */
 545                 ancil_buf = (uchar_t *)P2ROUNDUP((intptr_t)ancil_buf,
 546                     sizeof (intptr_t));
 547                 gethrestime((timestruc_t *)ancil_buf);
 548                 ancil_buf = (uchar_t *)toh + toh->len;
 549                 ancil_size -= toh->len;
 550         }
 551 
 552         /*
 553          * CAUTION:
 554          * Due to aligment issues
 555          * Processing of IP_RECVTTL option
 556          * should always be the last. Adding
 557          * any option processing after this will
 558          * cause alignment panic.
 559          */
 560         if (recv_ancillary.crb_recvttl &&
 561             (ira->ira_flags & IRAF_IS_IPV4)) {
 562                 struct  T_opthdr *toh;
 563                 uint8_t *dstptr;
 564 
 565                 toh = (struct T_opthdr *)ancil_buf;
 566                 toh->level = IPPROTO_IP;
 567                 toh->name = IP_RECVTTL;
 568                 toh->len = sizeof (struct T_opthdr) + sizeof (uint8_t);
 569                 toh->status = 0;
 570                 ancil_buf += sizeof (struct T_opthdr);
 571                 dstptr = (uint8_t *)ancil_buf;
 572                 *dstptr = ipp->ipp_hoplimit;
 573                 ancil_buf += sizeof (uint8_t);
 574                 ancil_size -= toh->len;
 575         }
 576 
 577         /* Consumed all of allocated space */
 578         ASSERT(ancil_size == 0);
 579 
 580 }
 581 
 582 /*
 583  * This routine retrieves the current status of socket options.
 584  * It returns the size of the option retrieved, or -1.
 585  */
 586 int
 587 conn_opt_get(conn_opt_arg_t *coa, t_scalar_t level, t_scalar_t name,
 588     uchar_t *ptr)
 589 {
 590         int             *i1 = (int *)ptr;
 591         conn_t          *connp = coa->coa_connp;
 592         ip_xmit_attr_t  *ixa = coa->coa_ixa;
 593         ip_pkt_t        *ipp = coa->coa_ipp;
 594         ip_stack_t      *ipst = ixa->ixa_ipst;
 595         uint_t          len;
 596 
 597         ASSERT(MUTEX_HELD(&coa->coa_connp->conn_lock));
 598 
 599         switch (level) {
 600         case SOL_SOCKET:
 601                 switch (name) {
 602                 case SO_DEBUG:
 603                         *i1 = connp->conn_debug ? SO_DEBUG : 0;
 604                         break;  /* goto sizeof (int) option return */
 605                 case SO_KEEPALIVE:
 606                         *i1 = connp->conn_keepalive ? SO_KEEPALIVE : 0;
 607                         break;
 608                 case SO_LINGER: {
 609                         struct linger *lgr = (struct linger *)ptr;
 610 
 611                         lgr->l_onoff = connp->conn_linger ? SO_LINGER : 0;
 612                         lgr->l_linger = connp->conn_lingertime;
 613                         }
 614                         return (sizeof (struct linger));
 615 
 616                 case SO_OOBINLINE:
 617                         *i1 = connp->conn_oobinline ? SO_OOBINLINE : 0;
 618                         break;
 619                 case SO_REUSEADDR:
 620                         *i1 = connp->conn_reuseaddr ? SO_REUSEADDR : 0;
 621                         break;  /* goto sizeof (int) option return */
 622                 case SO_TYPE:
 623                         *i1 = connp->conn_so_type;
 624                         break;  /* goto sizeof (int) option return */
 625                 case SO_DONTROUTE:
 626                         *i1 = (ixa->ixa_flags & IXAF_DONTROUTE) ?
 627                             SO_DONTROUTE : 0;
 628                         break;  /* goto sizeof (int) option return */
 629                 case SO_USELOOPBACK:
 630                         *i1 = connp->conn_useloopback ? SO_USELOOPBACK : 0;
 631                         break;  /* goto sizeof (int) option return */
 632                 case SO_BROADCAST:
 633                         *i1 = connp->conn_broadcast ? SO_BROADCAST : 0;
 634                         break;  /* goto sizeof (int) option return */
 635 
 636                 case SO_SNDBUF:
 637                         *i1 = connp->conn_sndbuf;
 638                         break;  /* goto sizeof (int) option return */
 639                 case SO_RCVBUF:
 640                         *i1 = connp->conn_rcvbuf;
 641                         break;  /* goto sizeof (int) option return */
 642                 case SO_RCVTIMEO:
 643                 case SO_SNDTIMEO:
 644                         /*
 645                          * Pass these two options in order for third part
 646                          * protocol usage. Here just return directly.
 647                          */
 648                         *i1 = 0;
 649                         break;
 650                 case SO_DGRAM_ERRIND:
 651                         *i1 = connp->conn_dgram_errind ? SO_DGRAM_ERRIND : 0;
 652                         break;  /* goto sizeof (int) option return */
 653                 case SO_RECVUCRED:
 654                         *i1 = connp->conn_recv_ancillary.crb_recvucred;
 655                         break;  /* goto sizeof (int) option return */
 656                 case SO_TIMESTAMP:
 657                         *i1 = connp->conn_recv_ancillary.crb_timestamp;
 658                         break;  /* goto sizeof (int) option return */
 659                 case SO_VRRP:
 660                         *i1 = connp->conn_isvrrp;
 661                         break;  /* goto sizeof (int) option return */
 662                 case SO_ANON_MLP:
 663                         *i1 = connp->conn_anon_mlp;
 664                         break;  /* goto sizeof (int) option return */
 665                 case SO_MAC_EXEMPT:
 666                         *i1 = (connp->conn_mac_mode == CONN_MAC_AWARE);
 667                         break;  /* goto sizeof (int) option return */
 668                 case SO_MAC_IMPLICIT:
 669                         *i1 = (connp->conn_mac_mode == CONN_MAC_IMPLICIT);
 670                         break;  /* goto sizeof (int) option return */
 671                 case SO_ALLZONES:
 672                         *i1 = connp->conn_allzones;
 673                         break;  /* goto sizeof (int) option return */
 674                 case SO_EXCLBIND:
 675                         *i1 = connp->conn_exclbind ? SO_EXCLBIND : 0;
 676                         break;
 677                 case SO_PROTOTYPE:
 678                         *i1 = connp->conn_proto;
 679                         break;
 680 
 681                 case SO_DOMAIN:
 682                         *i1 = connp->conn_family;
 683                         break;
 684                 default:
 685                         return (-1);
 686                 }
 687                 break;
 688         case IPPROTO_IP:
 689                 if (connp->conn_family != AF_INET)
 690                         return (-1);
 691                 switch (name) {
 692                 case IP_OPTIONS:
 693                 case T_IP_OPTIONS:
 694                         if (!(ipp->ipp_fields & IPPF_IPV4_OPTIONS))
 695                                 return (0);
 696 
 697                         len = ipp->ipp_ipv4_options_len;
 698                         if (len > 0) {
 699                                 bcopy(ipp->ipp_ipv4_options, ptr, len);
 700                         }
 701                         return (len);
 702 
 703                 case IP_PKTINFO: {
 704                         /*
 705                          * This also handles IP_RECVPKTINFO.
 706                          * IP_PKTINFO and IP_RECVPKTINFO have same value.
 707                          * Differentiation is based on the size of the
 708                          * argument passed in.
 709                          */
 710                         struct in_pktinfo *pktinfo;
 711 
 712 #ifdef notdef
 713                         /* optcom doesn't provide a length with "get" */
 714                         if (inlen == sizeof (int)) {
 715                                 /* This is IP_RECVPKTINFO option. */
 716                                 *i1 = connp->conn_recv_ancillary.
 717                                     crb_ip_recvpktinfo;
 718                                 return (sizeof (int));
 719                         }
 720 #endif
 721                         /* XXX assumes that caller has room for max size! */
 722 
 723                         pktinfo = (struct in_pktinfo *)ptr;
 724                         pktinfo->ipi_ifindex = ixa->ixa_ifindex;
 725                         if (ipp->ipp_fields & IPPF_ADDR)
 726                                 pktinfo->ipi_spec_dst.s_addr = ipp->ipp_addr_v4;
 727                         else
 728                                 pktinfo->ipi_spec_dst.s_addr = INADDR_ANY;
 729                         return (sizeof (struct in_pktinfo));
 730                 }
 731                 case IP_DONTFRAG:
 732                         *i1 = (ixa->ixa_flags & IXAF_DONTFRAG) != 0;
 733                         return (sizeof (int));
 734                 case IP_TOS:
 735                 case T_IP_TOS:
 736                         *i1 = (int)ipp->ipp_type_of_service;
 737                         break;  /* goto sizeof (int) option return */
 738                 case IP_TTL:
 739                         *i1 = (int)ipp->ipp_unicast_hops;
 740                         break;  /* goto sizeof (int) option return */
 741                 case IP_DHCPINIT_IF:
 742                         return (-1);
 743                 case IP_NEXTHOP:
 744                         if (ixa->ixa_flags & IXAF_NEXTHOP_SET) {
 745                                 *(ipaddr_t *)ptr = ixa->ixa_nexthop_v4;
 746                                 return (sizeof (ipaddr_t));
 747                         } else {
 748                                 return (0);
 749                         }
 750 
 751                 case IP_MULTICAST_IF:
 752                         /* 0 address if not set */
 753                         *(ipaddr_t *)ptr = ixa->ixa_multicast_ifaddr;
 754                         return (sizeof (ipaddr_t));
 755                 case IP_MULTICAST_TTL:
 756                         *(uchar_t *)ptr = ixa->ixa_multicast_ttl;
 757                         return (sizeof (uchar_t));
 758                 case IP_MULTICAST_LOOP:
 759                         *ptr = (ixa->ixa_flags & IXAF_MULTICAST_LOOP) ? 1 : 0;
 760                         return (sizeof (uint8_t));
 761                 case IP_RECVOPTS:
 762                         *i1 = connp->conn_recv_ancillary.crb_recvopts;
 763                         break;  /* goto sizeof (int) option return */
 764                 case IP_RECVDSTADDR:
 765                         *i1 = connp->conn_recv_ancillary.crb_recvdstaddr;
 766                         break;  /* goto sizeof (int) option return */
 767                 case IP_RECVIF:
 768                         *i1 = connp->conn_recv_ancillary.crb_recvif;
 769                         break;  /* goto sizeof (int) option return */
 770                 case IP_RECVSLLA:
 771                         *i1 = connp->conn_recv_ancillary.crb_recvslla;
 772                         break;  /* goto sizeof (int) option return */
 773                 case IP_RECVTTL:
 774                         *i1 = connp->conn_recv_ancillary.crb_recvttl;
 775                         break;  /* goto sizeof (int) option return */
 776                 case IP_ADD_MEMBERSHIP:
 777                 case IP_DROP_MEMBERSHIP:
 778                 case MCAST_JOIN_GROUP:
 779                 case MCAST_LEAVE_GROUP:
 780                 case IP_BLOCK_SOURCE:
 781                 case IP_UNBLOCK_SOURCE:
 782                 case IP_ADD_SOURCE_MEMBERSHIP:
 783                 case IP_DROP_SOURCE_MEMBERSHIP:
 784                 case MCAST_BLOCK_SOURCE:
 785                 case MCAST_UNBLOCK_SOURCE:
 786                 case MCAST_JOIN_SOURCE_GROUP:
 787                 case MCAST_LEAVE_SOURCE_GROUP:
 788                 case MRT_INIT:
 789                 case MRT_DONE:
 790                 case MRT_ADD_VIF:
 791                 case MRT_DEL_VIF:
 792                 case MRT_ADD_MFC:
 793                 case MRT_DEL_MFC:
 794                         /* cannot "get" the value for these */
 795                         return (-1);
 796                 case MRT_VERSION:
 797                 case MRT_ASSERT:
 798                         (void) ip_mrouter_get(name, connp, ptr);
 799                         return (sizeof (int));
 800                 case IP_SEC_OPT:
 801                         return (ipsec_req_from_conn(connp, (ipsec_req_t *)ptr,
 802                             IPSEC_AF_V4));
 803                 case IP_BOUND_IF:
 804                         /* Zero if not set */
 805                         *i1 = connp->conn_bound_if;
 806                         break;  /* goto sizeof (int) option return */
 807                 case IP_UNSPEC_SRC:
 808                         *i1 = connp->conn_unspec_src;
 809                         break;  /* goto sizeof (int) option return */
 810                 case IP_BROADCAST_TTL:
 811                         if (ixa->ixa_flags & IXAF_BROADCAST_TTL_SET)
 812                                 *(uchar_t *)ptr = ixa->ixa_broadcast_ttl;
 813                         else
 814                                 *(uchar_t *)ptr = ipst->ips_ip_broadcast_ttl;
 815                         return (sizeof (uchar_t));
 816                 default:
 817                         return (-1);
 818                 }
 819                 break;
 820         case IPPROTO_IPV6:
 821                 if (connp->conn_family != AF_INET6)
 822                         return (-1);
 823                 switch (name) {
 824                 case IPV6_UNICAST_HOPS:
 825                         *i1 = (int)ipp->ipp_unicast_hops;
 826                         break;  /* goto sizeof (int) option return */
 827                 case IPV6_MULTICAST_IF:
 828                         /* 0 index if not set */
 829                         *i1 = ixa->ixa_multicast_ifindex;
 830                         break;  /* goto sizeof (int) option return */
 831                 case IPV6_MULTICAST_HOPS:
 832                         *i1 = ixa->ixa_multicast_ttl;
 833                         break;  /* goto sizeof (int) option return */
 834                 case IPV6_MULTICAST_LOOP:
 835                         *i1 = (ixa->ixa_flags & IXAF_MULTICAST_LOOP) ? 1 : 0;
 836                         break;  /* goto sizeof (int) option return */
 837                 case IPV6_JOIN_GROUP:
 838                 case IPV6_LEAVE_GROUP:
 839                 case MCAST_JOIN_GROUP:
 840                 case MCAST_LEAVE_GROUP:
 841                 case MCAST_BLOCK_SOURCE:
 842                 case MCAST_UNBLOCK_SOURCE:
 843                 case MCAST_JOIN_SOURCE_GROUP:
 844                 case MCAST_LEAVE_SOURCE_GROUP:
 845                         /* cannot "get" the value for these */
 846                         return (-1);
 847                 case IPV6_BOUND_IF:
 848                         /* Zero if not set */
 849                         *i1 = connp->conn_bound_if;
 850                         break;  /* goto sizeof (int) option return */
 851                 case IPV6_UNSPEC_SRC:
 852                         *i1 = connp->conn_unspec_src;
 853                         break;  /* goto sizeof (int) option return */
 854                 case IPV6_RECVPKTINFO:
 855                         *i1 = connp->conn_recv_ancillary.crb_ip_recvpktinfo;
 856                         break;  /* goto sizeof (int) option return */
 857                 case IPV6_RECVTCLASS:
 858                         *i1 = connp->conn_recv_ancillary.crb_ipv6_recvtclass;
 859                         break;  /* goto sizeof (int) option return */
 860                 case IPV6_RECVPATHMTU:
 861                         *i1 = connp->conn_ipv6_recvpathmtu;
 862                         break;  /* goto sizeof (int) option return */
 863                 case IPV6_RECVHOPLIMIT:
 864                         *i1 = connp->conn_recv_ancillary.crb_ipv6_recvhoplimit;
 865                         break;  /* goto sizeof (int) option return */
 866                 case IPV6_RECVHOPOPTS:
 867                         *i1 = connp->conn_recv_ancillary.crb_ipv6_recvhopopts;
 868                         break;  /* goto sizeof (int) option return */
 869                 case IPV6_RECVDSTOPTS:
 870                         *i1 = connp->conn_recv_ancillary.crb_ipv6_recvdstopts;
 871                         break;  /* goto sizeof (int) option return */
 872                 case _OLD_IPV6_RECVDSTOPTS:
 873                         *i1 =
 874                             connp->conn_recv_ancillary.crb_old_ipv6_recvdstopts;
 875                         break;  /* goto sizeof (int) option return */
 876                 case IPV6_RECVRTHDRDSTOPTS:
 877                         *i1 = connp->conn_recv_ancillary.
 878                             crb_ipv6_recvrthdrdstopts;
 879                         break;  /* goto sizeof (int) option return */
 880                 case IPV6_RECVRTHDR:
 881                         *i1 = connp->conn_recv_ancillary.crb_ipv6_recvrthdr;
 882                         break;  /* goto sizeof (int) option return */
 883                 case IPV6_PKTINFO: {
 884                         /* XXX assumes that caller has room for max size! */
 885                         struct in6_pktinfo *pkti;
 886 
 887                         pkti = (struct in6_pktinfo *)ptr;
 888                         pkti->ipi6_ifindex = ixa->ixa_ifindex;
 889                         if (ipp->ipp_fields & IPPF_ADDR)
 890                                 pkti->ipi6_addr = ipp->ipp_addr;
 891                         else
 892                                 pkti->ipi6_addr = ipv6_all_zeros;
 893                         return (sizeof (struct in6_pktinfo));
 894                 }
 895                 case IPV6_TCLASS:
 896                         *i1 = ipp->ipp_tclass;
 897                         break;  /* goto sizeof (int) option return */
 898                 case IPV6_NEXTHOP: {
 899                         sin6_t *sin6 = (sin6_t *)ptr;
 900 
 901                         if (ixa->ixa_flags & IXAF_NEXTHOP_SET)
 902                                 return (0);
 903 
 904                         *sin6 = sin6_null;
 905                         sin6->sin6_family = AF_INET6;
 906                         sin6->sin6_addr = ixa->ixa_nexthop_v6;
 907 
 908                         return (sizeof (sin6_t));
 909                 }
 910                 case IPV6_HOPOPTS:
 911                         if (!(ipp->ipp_fields & IPPF_HOPOPTS))
 912                                 return (0);
 913                         bcopy(ipp->ipp_hopopts, ptr,
 914                             ipp->ipp_hopoptslen);
 915                         return (ipp->ipp_hopoptslen);
 916                 case IPV6_RTHDRDSTOPTS:
 917                         if (!(ipp->ipp_fields & IPPF_RTHDRDSTOPTS))
 918                                 return (0);
 919                         bcopy(ipp->ipp_rthdrdstopts, ptr,
 920                             ipp->ipp_rthdrdstoptslen);
 921                         return (ipp->ipp_rthdrdstoptslen);
 922                 case IPV6_RTHDR:
 923                         if (!(ipp->ipp_fields & IPPF_RTHDR))
 924                                 return (0);
 925                         bcopy(ipp->ipp_rthdr, ptr, ipp->ipp_rthdrlen);
 926                         return (ipp->ipp_rthdrlen);
 927                 case IPV6_DSTOPTS:
 928                         if (!(ipp->ipp_fields & IPPF_DSTOPTS))
 929                                 return (0);
 930                         bcopy(ipp->ipp_dstopts, ptr, ipp->ipp_dstoptslen);
 931                         return (ipp->ipp_dstoptslen);
 932                 case IPV6_PATHMTU:
 933                         return (ip_fill_mtuinfo(connp, ixa,
 934                             (struct ip6_mtuinfo *)ptr));
 935                 case IPV6_SEC_OPT:
 936                         return (ipsec_req_from_conn(connp, (ipsec_req_t *)ptr,
 937                             IPSEC_AF_V6));
 938                 case IPV6_SRC_PREFERENCES:
 939                         return (ip6_get_src_preferences(ixa, (uint32_t *)ptr));
 940                 case IPV6_DONTFRAG:
 941                         *i1 = (ixa->ixa_flags & IXAF_DONTFRAG) != 0;
 942                         return (sizeof (int));
 943                 case IPV6_USE_MIN_MTU:
 944                         if (ixa->ixa_flags & IXAF_USE_MIN_MTU)
 945                                 *i1 = ixa->ixa_use_min_mtu;
 946                         else
 947                                 *i1 = IPV6_USE_MIN_MTU_MULTICAST;
 948                         break;
 949                 case IPV6_V6ONLY:
 950                         *i1 = connp->conn_ipv6_v6only;
 951                         return (sizeof (int));
 952                 default:
 953                         return (-1);
 954                 }
 955                 break;
 956         case IPPROTO_UDP:
 957                 switch (name) {
 958                 case UDP_ANONPRIVBIND:
 959                         *i1 = connp->conn_anon_priv_bind;
 960                         break;
 961                 case UDP_EXCLBIND:
 962                         *i1 = connp->conn_exclbind ? UDP_EXCLBIND : 0;
 963                         break;
 964                 default:
 965                         return (-1);
 966                 }
 967                 break;
 968         case IPPROTO_TCP:
 969                 switch (name) {
 970                 case TCP_RECVDSTADDR:
 971                         *i1 = connp->conn_recv_ancillary.crb_recvdstaddr;
 972                         break;
 973                 case TCP_ANONPRIVBIND:
 974                         *i1 = connp->conn_anon_priv_bind;
 975                         break;
 976                 case TCP_EXCLBIND:
 977                         *i1 = connp->conn_exclbind ? TCP_EXCLBIND : 0;
 978                         break;
 979                 default:
 980                         return (-1);
 981                 }
 982                 break;
 983         default:
 984                 return (-1);
 985         }
 986         return (sizeof (int));
 987 }
 988 
 989 static int conn_opt_set_socket(conn_opt_arg_t *coa, t_scalar_t name,
 990     uint_t inlen, uchar_t *invalp, boolean_t checkonly, cred_t *cr);
 991 static int conn_opt_set_ip(conn_opt_arg_t *coa, t_scalar_t name,
 992     uint_t inlen, uchar_t *invalp, boolean_t checkonly, cred_t *cr);
 993 static int conn_opt_set_ipv6(conn_opt_arg_t *coa, t_scalar_t name,
 994     uint_t inlen, uchar_t *invalp, boolean_t checkonly, cred_t *cr);
 995 static int conn_opt_set_udp(conn_opt_arg_t *coa, t_scalar_t name,
 996     uint_t inlen, uchar_t *invalp, boolean_t checkonly, cred_t *cr);
 997 static int conn_opt_set_tcp(conn_opt_arg_t *coa, t_scalar_t name,
 998     uint_t inlen, uchar_t *invalp, boolean_t checkonly, cred_t *cr);
 999 
1000 /*
1001  * This routine sets the most common socket options including some
1002  * that are transport/ULP specific.
1003  * It returns errno or zero.
1004  *
1005  * For fixed length options, there is no sanity check
1006  * of passed in length is done. It is assumed *_optcom_req()
1007  * routines do the right thing.
1008  */
1009 int
1010 conn_opt_set(conn_opt_arg_t *coa, t_scalar_t level, t_scalar_t name,
1011     uint_t inlen, uchar_t *invalp, boolean_t checkonly, cred_t *cr)
1012 {
1013         ASSERT(MUTEX_NOT_HELD(&coa->coa_connp->conn_lock));
1014 
1015         /* We have different functions for different levels */
1016         switch (level) {
1017         case SOL_SOCKET:
1018                 return (conn_opt_set_socket(coa, name, inlen, invalp,
1019                     checkonly, cr));
1020         case IPPROTO_IP:
1021                 return (conn_opt_set_ip(coa, name, inlen, invalp,
1022                     checkonly, cr));
1023         case IPPROTO_IPV6:
1024                 return (conn_opt_set_ipv6(coa, name, inlen, invalp,
1025                     checkonly, cr));
1026         case IPPROTO_UDP:
1027                 return (conn_opt_set_udp(coa, name, inlen, invalp,
1028                     checkonly, cr));
1029         case IPPROTO_TCP:
1030                 return (conn_opt_set_tcp(coa, name, inlen, invalp,
1031                     checkonly, cr));
1032         default:
1033                 return (0);
1034         }
1035 }
1036 
1037 /*
1038  * Handle SOL_SOCKET
1039  * Note that we do not handle SO_PROTOTYPE here. The ULPs that support
1040  * it implement their own checks and setting of conn_proto.
1041  */
1042 /* ARGSUSED1 */
1043 static int
1044 conn_opt_set_socket(conn_opt_arg_t *coa, t_scalar_t name, uint_t inlen,
1045     uchar_t *invalp, boolean_t checkonly, cred_t *cr)
1046 {
1047         conn_t          *connp = coa->coa_connp;
1048         ip_xmit_attr_t  *ixa = coa->coa_ixa;
1049         int             *i1 = (int *)invalp;
1050         boolean_t       onoff = (*i1 == 0) ? 0 : 1;
1051 
1052         switch (name) {
1053         case SO_ALLZONES:
1054                 if (IPCL_IS_BOUND(connp))
1055                         return (EINVAL);
1056                 break;
1057         case SO_VRRP:
1058                 if (secpolicy_ip_config(cr, checkonly) != 0)
1059                         return (EACCES);
1060                 break;
1061         case SO_MAC_EXEMPT:
1062                 if (secpolicy_net_mac_aware(cr) != 0)
1063                         return (EACCES);
1064                 if (IPCL_IS_BOUND(connp))
1065                         return (EINVAL);
1066                 break;
1067         case SO_MAC_IMPLICIT:
1068                 if (secpolicy_net_mac_implicit(cr) != 0)
1069                         return (EACCES);
1070                 break;
1071         }
1072         if (checkonly)
1073                 return (0);
1074 
1075         mutex_enter(&connp->conn_lock);
1076         /* Here we set the actual option value */
1077         switch (name) {
1078         case SO_DEBUG:
1079                 connp->conn_debug = onoff;
1080                 break;
1081         case SO_KEEPALIVE:
1082                 connp->conn_keepalive = onoff;
1083                 break;
1084         case SO_LINGER: {
1085                 struct linger *lgr = (struct linger *)invalp;
1086 
1087                 if (lgr->l_onoff) {
1088                         connp->conn_linger = 1;
1089                         connp->conn_lingertime = lgr->l_linger;
1090                 } else {
1091                         connp->conn_linger = 0;
1092                         connp->conn_lingertime = 0;
1093                 }
1094                 break;
1095         }
1096         case SO_OOBINLINE:
1097                 connp->conn_oobinline = onoff;
1098                 coa->coa_changed |= COA_OOBINLINE_CHANGED;
1099                 break;
1100         case SO_REUSEADDR:
1101                 connp->conn_reuseaddr = onoff;
1102                 break;
1103         case SO_DONTROUTE:
1104                 if (onoff)
1105                         ixa->ixa_flags |= IXAF_DONTROUTE;
1106                 else
1107                         ixa->ixa_flags &= ~IXAF_DONTROUTE;
1108                 coa->coa_changed |= COA_ROUTE_CHANGED;
1109                 break;
1110         case SO_USELOOPBACK:
1111                 connp->conn_useloopback = onoff;
1112                 break;
1113         case SO_BROADCAST:
1114                 connp->conn_broadcast = onoff;
1115                 break;
1116         case SO_SNDBUF:
1117                 /* ULP has range checked the value */
1118                 connp->conn_sndbuf = *i1;
1119                 coa->coa_changed |= COA_SNDBUF_CHANGED;
1120                 break;
1121         case SO_RCVBUF:
1122                 /* ULP has range checked the value */
1123                 connp->conn_rcvbuf = *i1;
1124                 coa->coa_changed |= COA_RCVBUF_CHANGED;
1125                 break;
1126         case SO_RCVTIMEO:
1127         case SO_SNDTIMEO:
1128                 /*
1129                  * Pass these two options in order for third part
1130                  * protocol usage.
1131                  */
1132                 break;
1133         case SO_DGRAM_ERRIND:
1134                 connp->conn_dgram_errind = onoff;
1135                 break;
1136         case SO_RECVUCRED:
1137                 connp->conn_recv_ancillary.crb_recvucred = onoff;
1138                 break;
1139         case SO_ALLZONES:
1140                 connp->conn_allzones = onoff;
1141                 coa->coa_changed |= COA_ROUTE_CHANGED;
1142                 if (onoff)
1143                         ixa->ixa_zoneid = ALL_ZONES;
1144                 else
1145                         ixa->ixa_zoneid = connp->conn_zoneid;
1146                 break;
1147         case SO_TIMESTAMP:
1148                 connp->conn_recv_ancillary.crb_timestamp = onoff;
1149                 break;
1150         case SO_VRRP:
1151                 connp->conn_isvrrp = onoff;
1152                 break;
1153         case SO_ANON_MLP:
1154                 connp->conn_anon_mlp = onoff;
1155                 break;
1156         case SO_MAC_EXEMPT:
1157                 connp->conn_mac_mode = onoff ?
1158                     CONN_MAC_AWARE : CONN_MAC_DEFAULT;
1159                 break;
1160         case SO_MAC_IMPLICIT:
1161                 connp->conn_mac_mode = onoff ?
1162                     CONN_MAC_IMPLICIT : CONN_MAC_DEFAULT;
1163                 break;
1164         case SO_EXCLBIND:
1165                 connp->conn_exclbind = onoff;
1166                 break;
1167         }
1168         mutex_exit(&connp->conn_lock);
1169         return (0);
1170 }
1171 
1172 /* Handle IPPROTO_IP */
1173 static int
1174 conn_opt_set_ip(conn_opt_arg_t *coa, t_scalar_t name, uint_t inlen,
1175     uchar_t *invalp, boolean_t checkonly, cred_t *cr)
1176 {
1177         conn_t          *connp = coa->coa_connp;
1178         ip_xmit_attr_t  *ixa = coa->coa_ixa;
1179         ip_pkt_t        *ipp = coa->coa_ipp;
1180         int             *i1 = (int *)invalp;
1181         boolean_t       onoff = (*i1 == 0) ? 0 : 1;
1182         ipaddr_t        addr = (ipaddr_t)*i1;
1183         uint_t          ifindex;
1184         zoneid_t        zoneid = IPCL_ZONEID(connp);
1185         ipif_t          *ipif;
1186         ip_stack_t      *ipst = connp->conn_netstack->netstack_ip;
1187         int             error;
1188 
1189         if (connp->conn_family != AF_INET)
1190                 return (EINVAL);
1191 
1192         switch (name) {
1193         case IP_TTL:
1194                 /* Don't allow zero */
1195                 if (*i1 < 1 || *i1 > 255)
1196                         return (EINVAL);
1197                 break;
1198         case IP_MULTICAST_IF:
1199                 if (addr == INADDR_ANY) {
1200                         /* Clear */
1201                         ifindex = 0;
1202                         break;
1203                 }
1204                 ipif = ipif_lookup_addr(addr, NULL, zoneid, ipst);
1205                 if (ipif == NULL)
1206                         return (EHOSTUNREACH);
1207                 /* not supported by the virtual network iface */
1208                 if (IS_VNI(ipif->ipif_ill)) {
1209                         ipif_refrele(ipif);
1210                         return (EINVAL);
1211                 }
1212                 ifindex = ipif->ipif_ill->ill_phyint->phyint_ifindex;
1213                 ipif_refrele(ipif);
1214                 break;
1215         case IP_NEXTHOP: {
1216                 ire_t   *ire;
1217 
1218                 if (addr == INADDR_ANY) {
1219                         /* Clear */
1220                         break;
1221                 }
1222                 /* Verify that the next-hop is on-link */
1223                 ire = ire_ftable_lookup_v4(addr, 0, 0, IRE_ONLINK, NULL, zoneid,
1224                     NULL, MATCH_IRE_TYPE, 0, ipst, NULL);
1225                 if (ire == NULL)
1226                         return (EHOSTUNREACH);
1227                 ire_refrele(ire);
1228                 break;
1229         }
1230         case IP_OPTIONS:
1231         case T_IP_OPTIONS: {
1232                 uint_t newlen;
1233 
1234                 if (ipp->ipp_fields & IPPF_LABEL_V4)
1235                         newlen = inlen + (ipp->ipp_label_len_v4 + 3) & ~3;
1236                 else
1237                         newlen = inlen;
1238                 if ((inlen & 0x3) || newlen > IP_MAX_OPT_LENGTH) {
1239                         return (EINVAL);
1240                 }
1241                 break;
1242         }
1243         case IP_PKTINFO: {
1244                 struct in_pktinfo *pktinfo;
1245 
1246                 /* Two different valid lengths */
1247                 if (inlen != sizeof (int) &&
1248                     inlen != sizeof (struct in_pktinfo))
1249                         return (EINVAL);
1250                 if (inlen == sizeof (int))
1251                         break;
1252 
1253                 pktinfo = (struct in_pktinfo *)invalp;
1254                 if (pktinfo->ipi_spec_dst.s_addr != INADDR_ANY) {
1255                         switch (ip_laddr_verify_v4(pktinfo->ipi_spec_dst.s_addr,
1256                             zoneid, ipst, B_FALSE)) {
1257                         case IPVL_UNICAST_UP:
1258                         case IPVL_UNICAST_DOWN:
1259                                 break;
1260                         default:
1261                                 return (EADDRNOTAVAIL);
1262                         }
1263                 }
1264                 if (!ip_xmit_ifindex_valid(pktinfo->ipi_ifindex, zoneid,
1265                     B_FALSE, ipst))
1266                         return (ENXIO);
1267                 break;
1268         }
1269         case IP_BOUND_IF:
1270                 ifindex = *(uint_t *)i1;
1271 
1272                 /* Just check it is ok. */
1273                 if (!ip_xmit_ifindex_valid(ifindex, zoneid, B_FALSE, ipst))
1274                         return (ENXIO);
1275                 break;
1276         }
1277         if (checkonly)
1278                 return (0);
1279 
1280         /* Here we set the actual option value */
1281         /*
1282          * conn_lock protects the bitfields, and is used to
1283          * set the fields atomically. Not needed for ixa settings since
1284          * the caller has an exclusive copy of the ixa.
1285          * We can not hold conn_lock across the multicast options though.
1286          */
1287         switch (name) {
1288         case IP_OPTIONS:
1289         case T_IP_OPTIONS:
1290                 /* Save options for use by IP. */
1291                 mutex_enter(&connp->conn_lock);
1292                 error = optcom_pkt_set(invalp, inlen,
1293                     (uchar_t **)&ipp->ipp_ipv4_options,
1294                     &ipp->ipp_ipv4_options_len);
1295                 if (error != 0) {
1296                         mutex_exit(&connp->conn_lock);
1297                         return (error);
1298                 }
1299                 if (ipp->ipp_ipv4_options_len == 0) {
1300                         ipp->ipp_fields &= ~IPPF_IPV4_OPTIONS;
1301                 } else {
1302                         ipp->ipp_fields |= IPPF_IPV4_OPTIONS;
1303                 }
1304                 mutex_exit(&connp->conn_lock);
1305                 coa->coa_changed |= COA_HEADER_CHANGED;
1306                 coa->coa_changed |= COA_WROFF_CHANGED;
1307                 break;
1308 
1309         case IP_TTL:
1310                 mutex_enter(&connp->conn_lock);
1311                 ipp->ipp_unicast_hops = *i1;
1312                 mutex_exit(&connp->conn_lock);
1313                 coa->coa_changed |= COA_HEADER_CHANGED;
1314                 break;
1315         case IP_TOS:
1316         case T_IP_TOS:
1317                 mutex_enter(&connp->conn_lock);
1318                 if (*i1 == -1) {
1319                         ipp->ipp_type_of_service = 0;
1320                 } else {
1321                         ipp->ipp_type_of_service = *i1;
1322                 }
1323                 mutex_exit(&connp->conn_lock);
1324                 coa->coa_changed |= COA_HEADER_CHANGED;
1325                 break;
1326         case IP_MULTICAST_IF:
1327                 ixa->ixa_multicast_ifindex = ifindex;
1328                 ixa->ixa_multicast_ifaddr = addr;
1329                 coa->coa_changed |= COA_ROUTE_CHANGED;
1330                 break;
1331         case IP_MULTICAST_TTL:
1332                 ixa->ixa_multicast_ttl = *invalp;
1333                 /* Handled automatically by ip_output */
1334                 break;
1335         case IP_MULTICAST_LOOP:
1336                 if (*invalp != 0)
1337                         ixa->ixa_flags |= IXAF_MULTICAST_LOOP;
1338                 else
1339                         ixa->ixa_flags &= ~IXAF_MULTICAST_LOOP;
1340                 /* Handled automatically by ip_output */
1341                 break;
1342         case IP_RECVOPTS:
1343                 mutex_enter(&connp->conn_lock);
1344                 connp->conn_recv_ancillary.crb_recvopts = onoff;
1345                 mutex_exit(&connp->conn_lock);
1346                 break;
1347         case IP_RECVDSTADDR:
1348                 mutex_enter(&connp->conn_lock);
1349                 connp->conn_recv_ancillary.crb_recvdstaddr = onoff;
1350                 mutex_exit(&connp->conn_lock);
1351                 break;
1352         case IP_RECVIF:
1353                 mutex_enter(&connp->conn_lock);
1354                 connp->conn_recv_ancillary.crb_recvif = onoff;
1355                 mutex_exit(&connp->conn_lock);
1356                 break;
1357         case IP_RECVSLLA:
1358                 mutex_enter(&connp->conn_lock);
1359                 connp->conn_recv_ancillary.crb_recvslla = onoff;
1360                 mutex_exit(&connp->conn_lock);
1361                 break;
1362         case IP_RECVTTL:
1363                 mutex_enter(&connp->conn_lock);
1364                 connp->conn_recv_ancillary.crb_recvttl = onoff;
1365                 mutex_exit(&connp->conn_lock);
1366                 break;
1367         case IP_PKTINFO: {
1368                 /*
1369                  * This also handles IP_RECVPKTINFO.
1370                  * IP_PKTINFO and IP_RECVPKTINFO have same value.
1371                  * Differentiation is based on the size of the
1372                  * argument passed in.
1373                  */
1374                 struct in_pktinfo *pktinfo;
1375 
1376                 if (inlen == sizeof (int)) {
1377                         /* This is IP_RECVPKTINFO option. */
1378                         mutex_enter(&connp->conn_lock);
1379                         connp->conn_recv_ancillary.crb_ip_recvpktinfo =
1380                             onoff;
1381                         mutex_exit(&connp->conn_lock);
1382                         break;
1383                 }
1384 
1385                 /* This is IP_PKTINFO option. */
1386                 mutex_enter(&connp->conn_lock);
1387                 pktinfo = (struct in_pktinfo *)invalp;
1388                 if (pktinfo->ipi_spec_dst.s_addr != INADDR_ANY) {
1389                         ipp->ipp_fields |= IPPF_ADDR;
1390                         IN6_INADDR_TO_V4MAPPED(&pktinfo->ipi_spec_dst,
1391                             &ipp->ipp_addr);
1392                 } else {
1393                         ipp->ipp_fields &= ~IPPF_ADDR;
1394                         ipp->ipp_addr = ipv6_all_zeros;
1395                 }
1396                 mutex_exit(&connp->conn_lock);
1397                 ixa->ixa_ifindex = pktinfo->ipi_ifindex;
1398                 coa->coa_changed |= COA_ROUTE_CHANGED;
1399                 coa->coa_changed |= COA_HEADER_CHANGED;
1400                 break;
1401         }
1402         case IP_DONTFRAG:
1403                 if (onoff) {
1404                         ixa->ixa_flags |= (IXAF_DONTFRAG | IXAF_PMTU_IPV4_DF);
1405                         ixa->ixa_flags &= ~IXAF_PMTU_DISCOVERY;
1406                 } else {
1407                         ixa->ixa_flags &= ~(IXAF_DONTFRAG | IXAF_PMTU_IPV4_DF);
1408                         ixa->ixa_flags |= IXAF_PMTU_DISCOVERY;
1409                 }
1410                 /* Need to redo ip_attr_connect */
1411                 coa->coa_changed |= COA_ROUTE_CHANGED;
1412                 break;
1413         case IP_ADD_MEMBERSHIP:
1414         case IP_DROP_MEMBERSHIP:
1415         case MCAST_JOIN_GROUP:
1416         case MCAST_LEAVE_GROUP:
1417                 return (ip_opt_set_multicast_group(connp, name,
1418                     invalp, B_FALSE, checkonly));
1419 
1420         case IP_BLOCK_SOURCE:
1421         case IP_UNBLOCK_SOURCE:
1422         case IP_ADD_SOURCE_MEMBERSHIP:
1423         case IP_DROP_SOURCE_MEMBERSHIP:
1424         case MCAST_BLOCK_SOURCE:
1425         case MCAST_UNBLOCK_SOURCE:
1426         case MCAST_JOIN_SOURCE_GROUP:
1427         case MCAST_LEAVE_SOURCE_GROUP:
1428                 return (ip_opt_set_multicast_sources(connp, name,
1429                     invalp, B_FALSE, checkonly));
1430 
1431         case IP_SEC_OPT:
1432                 mutex_enter(&connp->conn_lock);
1433                 error = ipsec_set_req(cr, connp, (ipsec_req_t *)invalp);
1434                 mutex_exit(&connp->conn_lock);
1435                 if (error != 0) {
1436                         return (error);
1437                 }
1438                 /* This is an IPsec policy change - redo ip_attr_connect */
1439                 coa->coa_changed |= COA_ROUTE_CHANGED;
1440                 break;
1441         case IP_NEXTHOP:
1442                 ixa->ixa_nexthop_v4 = addr;
1443                 if (addr != INADDR_ANY)
1444                         ixa->ixa_flags |= IXAF_NEXTHOP_SET;
1445                 else
1446                         ixa->ixa_flags &= ~IXAF_NEXTHOP_SET;
1447                 coa->coa_changed |= COA_ROUTE_CHANGED;
1448                 break;
1449 
1450         case IP_BOUND_IF:
1451                 ixa->ixa_ifindex = ifindex;          /* Send */
1452                 mutex_enter(&connp->conn_lock);
1453                 connp->conn_incoming_ifindex = ifindex;      /* Receive */
1454                 connp->conn_bound_if = ifindex;              /* getsockopt */
1455                 mutex_exit(&connp->conn_lock);
1456                 coa->coa_changed |= COA_ROUTE_CHANGED;
1457                 break;
1458         case IP_UNSPEC_SRC:
1459                 mutex_enter(&connp->conn_lock);
1460                 connp->conn_unspec_src = onoff;
1461                 if (onoff)
1462                         ixa->ixa_flags &= ~IXAF_VERIFY_SOURCE;
1463                 else
1464                         ixa->ixa_flags |= IXAF_VERIFY_SOURCE;
1465 
1466                 mutex_exit(&connp->conn_lock);
1467                 break;
1468         case IP_BROADCAST_TTL:
1469                 ixa->ixa_broadcast_ttl = *invalp;
1470                 ixa->ixa_flags |= IXAF_BROADCAST_TTL_SET;
1471                 /* Handled automatically by ip_output */
1472                 break;
1473         case MRT_INIT:
1474         case MRT_DONE:
1475         case MRT_ADD_VIF:
1476         case MRT_DEL_VIF:
1477         case MRT_ADD_MFC:
1478         case MRT_DEL_MFC:
1479         case MRT_ASSERT:
1480                 if ((error = secpolicy_ip_config(cr, B_FALSE)) != 0) {
1481                         return (error);
1482                 }
1483                 error = ip_mrouter_set((int)name, connp, checkonly,
1484                     (uchar_t *)invalp, inlen);
1485                 if (error) {
1486                         return (error);
1487                 }
1488                 return (0);
1489 
1490         }
1491         return (0);
1492 }
1493 
1494 /* Handle IPPROTO_IPV6 */
1495 static int
1496 conn_opt_set_ipv6(conn_opt_arg_t *coa, t_scalar_t name, uint_t inlen,
1497     uchar_t *invalp, boolean_t checkonly, cred_t *cr)
1498 {
1499         conn_t          *connp = coa->coa_connp;
1500         ip_xmit_attr_t  *ixa = coa->coa_ixa;
1501         ip_pkt_t        *ipp = coa->coa_ipp;
1502         int             *i1 = (int *)invalp;
1503         boolean_t       onoff = (*i1 == 0) ? 0 : 1;
1504         uint_t          ifindex;
1505         zoneid_t        zoneid = IPCL_ZONEID(connp);
1506         ip_stack_t      *ipst = connp->conn_netstack->netstack_ip;
1507         int             error;
1508 
1509         if (connp->conn_family != AF_INET6)
1510                 return (EINVAL);
1511 
1512         switch (name) {
1513         case IPV6_MULTICAST_IF:
1514                 /*
1515                  * The only possible error is EINVAL.
1516                  * We call this option on both V4 and V6
1517                  * If both fail, then this call returns
1518                  * EINVAL. If at least one of them succeeds we
1519                  * return success.
1520                  */
1521                 ifindex = *(uint_t *)i1;
1522 
1523                 if (!ip_xmit_ifindex_valid(ifindex, zoneid, B_TRUE, ipst) &&
1524                     !ip_xmit_ifindex_valid(ifindex, zoneid, B_FALSE, ipst))
1525                         return (EINVAL);
1526                 break;
1527         case IPV6_UNICAST_HOPS:
1528                 /* Don't allow zero. -1 means to use default */
1529                 if (*i1 < -1 || *i1 == 0 || *i1 > IPV6_MAX_HOPS)
1530                         return (EINVAL);
1531                 break;
1532         case IPV6_MULTICAST_HOPS:
1533                 /* -1 means use default */
1534                 if (*i1 < -1 || *i1 > IPV6_MAX_HOPS)
1535                         return (EINVAL);
1536                 break;
1537         case IPV6_MULTICAST_LOOP:
1538                 if (*i1 != 0 && *i1 != 1)
1539                         return (EINVAL);
1540                 break;
1541         case IPV6_BOUND_IF:
1542                 ifindex = *(uint_t *)i1;
1543 
1544                 if (!ip_xmit_ifindex_valid(ifindex, zoneid, B_TRUE, ipst))
1545                         return (ENXIO);
1546                 break;
1547         case IPV6_PKTINFO: {
1548                 struct in6_pktinfo *pkti;
1549                 boolean_t isv6;
1550 
1551                 if (inlen != 0 && inlen != sizeof (struct in6_pktinfo))
1552                         return (EINVAL);
1553                 if (inlen == 0)
1554                         break;  /* Clear values below */
1555 
1556                 /*
1557                  * Verify the source address and ifindex. Privileged users
1558                  * can use any source address.
1559                  */
1560                 pkti = (struct in6_pktinfo *)invalp;
1561 
1562                 /*
1563                  * For link-local addresses we use the ipi6_ifindex when
1564                  * we verify the local address.
1565                  * If net_rawaccess then any source address can be used.
1566                  */
1567                 if (!IN6_IS_ADDR_UNSPECIFIED(&pkti->ipi6_addr) &&
1568                     secpolicy_net_rawaccess(cr) != 0) {
1569                         uint_t scopeid = 0;
1570                         in6_addr_t *v6src = &pkti->ipi6_addr;
1571                         ipaddr_t v4src;
1572                         ip_laddr_t laddr_type = IPVL_UNICAST_UP;
1573 
1574                         if (IN6_IS_ADDR_V4MAPPED(v6src)) {
1575                                 IN6_V4MAPPED_TO_IPADDR(v6src, v4src);
1576                                 if (v4src != INADDR_ANY) {
1577                                         laddr_type = ip_laddr_verify_v4(v4src,
1578                                             zoneid, ipst, B_FALSE);
1579                                 }
1580                         } else {
1581                                 if (IN6_IS_ADDR_LINKSCOPE(v6src))
1582                                         scopeid = pkti->ipi6_ifindex;
1583 
1584                                 laddr_type = ip_laddr_verify_v6(v6src, zoneid,
1585                                     ipst, B_FALSE, scopeid);
1586                         }
1587                         switch (laddr_type) {
1588                         case IPVL_UNICAST_UP:
1589                         case IPVL_UNICAST_DOWN:
1590                                 break;
1591                         default:
1592                                 return (EADDRNOTAVAIL);
1593                         }
1594                         ixa->ixa_flags |= IXAF_VERIFY_SOURCE;
1595                 } else if (!IN6_IS_ADDR_UNSPECIFIED(&pkti->ipi6_addr)) {
1596                         /* Allow any source */
1597                         ixa->ixa_flags &= ~IXAF_VERIFY_SOURCE;
1598                 }
1599                 isv6 = !(IN6_IS_ADDR_V4MAPPED(&pkti->ipi6_addr));
1600                 if (!ip_xmit_ifindex_valid(pkti->ipi6_ifindex, zoneid, isv6,
1601                     ipst))
1602                         return (ENXIO);
1603                 break;
1604         }
1605         case IPV6_HOPLIMIT:
1606                 /* It is only allowed as ancilary data */
1607                 if (!coa->coa_ancillary)
1608                         return (EINVAL);
1609 
1610                 if (inlen != 0 && inlen != sizeof (int))
1611                         return (EINVAL);
1612                 if (inlen == sizeof (int)) {
1613                         if (*i1 > 255 || *i1 < -1 || *i1 == 0)
1614                                 return (EINVAL);
1615                 }
1616                 break;
1617         case IPV6_TCLASS:
1618                 if (inlen != 0 && inlen != sizeof (int))
1619                         return (EINVAL);
1620                 if (inlen == sizeof (int)) {
1621                         if (*i1 > 255 || *i1 < -1)
1622                                 return (EINVAL);
1623                 }
1624                 break;
1625         case IPV6_NEXTHOP:
1626                 if (inlen != 0 && inlen != sizeof (sin6_t))
1627                         return (EINVAL);
1628                 if (inlen == sizeof (sin6_t)) {
1629                         sin6_t *sin6 = (sin6_t *)invalp;
1630                         ire_t   *ire;
1631 
1632                         if (sin6->sin6_family != AF_INET6)
1633                                 return (EAFNOSUPPORT);
1634                         if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr))
1635                                 return (EADDRNOTAVAIL);
1636 
1637                         /* Verify that the next-hop is on-link */
1638                         ire = ire_ftable_lookup_v6(&sin6->sin6_addr,
1639                             0, 0, IRE_ONLINK, NULL, zoneid,
1640                             NULL, MATCH_IRE_TYPE, 0, ipst, NULL);
1641                         if (ire == NULL)
1642                                 return (EHOSTUNREACH);
1643                         ire_refrele(ire);
1644                         break;
1645                 }
1646                 break;
1647         case IPV6_RTHDR:
1648         case IPV6_DSTOPTS:
1649         case IPV6_RTHDRDSTOPTS:
1650         case IPV6_HOPOPTS: {
1651                 /* All have the length field in the same place */
1652                 ip6_hbh_t *hopts = (ip6_hbh_t *)invalp;
1653                 /*
1654                  * Sanity checks - minimum size, size a multiple of
1655                  * eight bytes, and matching size passed in.
1656                  */
1657                 if (inlen != 0 &&
1658                     inlen != (8 * (hopts->ip6h_len + 1)))
1659                         return (EINVAL);
1660                 break;
1661         }
1662         case IPV6_PATHMTU:
1663                 /* Can't be set */
1664                 return (EINVAL);
1665 
1666         case IPV6_USE_MIN_MTU:
1667                 if (inlen != sizeof (int))
1668                         return (EINVAL);
1669                 if (*i1 < -1 || *i1 > 1)
1670                         return (EINVAL);
1671                 break;
1672         case IPV6_SRC_PREFERENCES:
1673                 if (inlen != sizeof (uint32_t))
1674                         return (EINVAL);
1675                 break;
1676         case IPV6_V6ONLY:
1677                 if (*i1 < 0 || *i1 > 1) {
1678                         return (EINVAL);
1679                 }
1680                 break;
1681         }
1682         if (checkonly)
1683                 return (0);
1684 
1685         /* Here we set the actual option value */
1686         /*
1687          * conn_lock protects the bitfields, and is used to
1688          * set the fields atomically. Not needed for ixa settings since
1689          * the caller has an exclusive copy of the ixa.
1690          * We can not hold conn_lock across the multicast options though.
1691          */
1692         ASSERT(MUTEX_NOT_HELD(&coa->coa_connp->conn_lock));
1693         switch (name) {
1694         case IPV6_MULTICAST_IF:
1695                 ixa->ixa_multicast_ifindex = ifindex;
1696                 /* Need to redo ip_attr_connect */
1697                 coa->coa_changed |= COA_ROUTE_CHANGED;
1698                 break;
1699         case IPV6_UNICAST_HOPS:
1700                 /* -1 means use default */
1701                 mutex_enter(&connp->conn_lock);
1702                 if (*i1 == -1) {
1703                         ipp->ipp_unicast_hops = connp->conn_default_ttl;
1704                 } else {
1705                         ipp->ipp_unicast_hops = (uint8_t)*i1;
1706                 }
1707                 mutex_exit(&connp->conn_lock);
1708                 coa->coa_changed |= COA_HEADER_CHANGED;
1709                 break;
1710         case IPV6_MULTICAST_HOPS:
1711                 /* -1 means use default */
1712                 if (*i1 == -1) {
1713                         ixa->ixa_multicast_ttl = IP_DEFAULT_MULTICAST_TTL;
1714                 } else {
1715                         ixa->ixa_multicast_ttl = (uint8_t)*i1;
1716                 }
1717                 /* Handled automatically by ip_output */
1718                 break;
1719         case IPV6_MULTICAST_LOOP:
1720                 if (*i1 != 0)
1721                         ixa->ixa_flags |= IXAF_MULTICAST_LOOP;
1722                 else
1723                         ixa->ixa_flags &= ~IXAF_MULTICAST_LOOP;
1724                 /* Handled automatically by ip_output */
1725                 break;
1726         case IPV6_JOIN_GROUP:
1727         case IPV6_LEAVE_GROUP:
1728         case MCAST_JOIN_GROUP:
1729         case MCAST_LEAVE_GROUP:
1730                 return (ip_opt_set_multicast_group(connp, name,
1731                     invalp, B_TRUE, checkonly));
1732 
1733         case MCAST_BLOCK_SOURCE:
1734         case MCAST_UNBLOCK_SOURCE:
1735         case MCAST_JOIN_SOURCE_GROUP:
1736         case MCAST_LEAVE_SOURCE_GROUP:
1737                 return (ip_opt_set_multicast_sources(connp, name,
1738                     invalp, B_TRUE, checkonly));
1739 
1740         case IPV6_BOUND_IF:
1741                 ixa->ixa_ifindex = ifindex;          /* Send */
1742                 mutex_enter(&connp->conn_lock);
1743                 connp->conn_incoming_ifindex = ifindex;      /* Receive */
1744                 connp->conn_bound_if = ifindex;              /* getsockopt */
1745                 mutex_exit(&connp->conn_lock);
1746                 coa->coa_changed |= COA_ROUTE_CHANGED;
1747                 break;
1748         case IPV6_UNSPEC_SRC:
1749                 mutex_enter(&connp->conn_lock);
1750                 connp->conn_unspec_src = onoff;
1751                 if (onoff)
1752                         ixa->ixa_flags &= ~IXAF_VERIFY_SOURCE;
1753                 else
1754                         ixa->ixa_flags |= IXAF_VERIFY_SOURCE;
1755                 mutex_exit(&connp->conn_lock);
1756                 break;
1757         case IPV6_RECVPKTINFO:
1758                 mutex_enter(&connp->conn_lock);
1759                 connp->conn_recv_ancillary.crb_ip_recvpktinfo = onoff;
1760                 mutex_exit(&connp->conn_lock);
1761                 break;
1762         case IPV6_RECVTCLASS:
1763                 mutex_enter(&connp->conn_lock);
1764                 connp->conn_recv_ancillary.crb_ipv6_recvtclass = onoff;
1765                 mutex_exit(&connp->conn_lock);
1766                 break;
1767         case IPV6_RECVPATHMTU:
1768                 mutex_enter(&connp->conn_lock);
1769                 connp->conn_ipv6_recvpathmtu = onoff;
1770                 mutex_exit(&connp->conn_lock);
1771                 break;
1772         case IPV6_RECVHOPLIMIT:
1773                 mutex_enter(&connp->conn_lock);
1774                 connp->conn_recv_ancillary.crb_ipv6_recvhoplimit =
1775                     onoff;
1776                 mutex_exit(&connp->conn_lock);
1777                 break;
1778         case IPV6_RECVHOPOPTS:
1779                 mutex_enter(&connp->conn_lock);
1780                 connp->conn_recv_ancillary.crb_ipv6_recvhopopts = onoff;
1781                 mutex_exit(&connp->conn_lock);
1782                 break;
1783         case IPV6_RECVDSTOPTS:
1784                 mutex_enter(&connp->conn_lock);
1785                 connp->conn_recv_ancillary.crb_ipv6_recvdstopts = onoff;
1786                 mutex_exit(&connp->conn_lock);
1787                 break;
1788         case _OLD_IPV6_RECVDSTOPTS:
1789                 mutex_enter(&connp->conn_lock);
1790                 connp->conn_recv_ancillary.crb_old_ipv6_recvdstopts =
1791                     onoff;
1792                 mutex_exit(&connp->conn_lock);
1793                 break;
1794         case IPV6_RECVRTHDRDSTOPTS:
1795                 mutex_enter(&connp->conn_lock);
1796                 connp->conn_recv_ancillary.crb_ipv6_recvrthdrdstopts =
1797                     onoff;
1798                 mutex_exit(&connp->conn_lock);
1799                 break;
1800         case IPV6_RECVRTHDR:
1801                 mutex_enter(&connp->conn_lock);
1802                 connp->conn_recv_ancillary.crb_ipv6_recvrthdr = onoff;
1803                 mutex_exit(&connp->conn_lock);
1804                 break;
1805         case IPV6_PKTINFO:
1806                 mutex_enter(&connp->conn_lock);
1807                 if (inlen == 0) {
1808                         ipp->ipp_fields &= ~IPPF_ADDR;
1809                         ipp->ipp_addr = ipv6_all_zeros;
1810                         ixa->ixa_ifindex = 0;
1811                 } else {
1812                         struct in6_pktinfo *pkti;
1813 
1814                         pkti = (struct in6_pktinfo *)invalp;
1815                         ipp->ipp_addr = pkti->ipi6_addr;
1816                         if (!IN6_IS_ADDR_UNSPECIFIED(&ipp->ipp_addr))
1817                                 ipp->ipp_fields |= IPPF_ADDR;
1818                         else
1819                                 ipp->ipp_fields &= ~IPPF_ADDR;
1820                         ixa->ixa_ifindex = pkti->ipi6_ifindex;
1821                 }
1822                 mutex_exit(&connp->conn_lock);
1823                 /* Source and ifindex might have changed */
1824                 coa->coa_changed |= COA_HEADER_CHANGED;
1825                 coa->coa_changed |= COA_ROUTE_CHANGED;
1826                 break;
1827         case IPV6_HOPLIMIT:
1828                 mutex_enter(&connp->conn_lock);
1829                 if (inlen == 0 || *i1 == -1) {
1830                         /* Revert to default */
1831                         ipp->ipp_fields &= ~IPPF_HOPLIMIT;
1832                         ixa->ixa_flags &= ~IXAF_NO_TTL_CHANGE;
1833                 } else {
1834                         ipp->ipp_hoplimit = *i1;
1835                         ipp->ipp_fields |= IPPF_HOPLIMIT;
1836                         /* Ensure that it sticks for multicast packets */
1837                         ixa->ixa_flags |= IXAF_NO_TTL_CHANGE;
1838                 }
1839                 mutex_exit(&connp->conn_lock);
1840                 coa->coa_changed |= COA_HEADER_CHANGED;
1841                 break;
1842         case IPV6_TCLASS:
1843                 /*
1844                  * IPV6_TCLASS accepts -1 as use kernel default
1845                  * and [0, 255] as the actualy traffic class.
1846                  */
1847                 mutex_enter(&connp->conn_lock);
1848                 if (inlen == 0 || *i1 == -1) {
1849                         ipp->ipp_tclass = 0;
1850                         ipp->ipp_fields &= ~IPPF_TCLASS;
1851                 } else {
1852                         ipp->ipp_tclass = *i1;
1853                         ipp->ipp_fields |= IPPF_TCLASS;
1854                 }
1855                 mutex_exit(&connp->conn_lock);
1856                 coa->coa_changed |= COA_HEADER_CHANGED;
1857                 break;
1858         case IPV6_NEXTHOP:
1859                 if (inlen == 0) {
1860                         ixa->ixa_flags &= ~IXAF_NEXTHOP_SET;
1861                 } else {
1862                         sin6_t *sin6 = (sin6_t *)invalp;
1863 
1864                         ixa->ixa_nexthop_v6 = sin6->sin6_addr;
1865                         if (!IN6_IS_ADDR_UNSPECIFIED(&ixa->ixa_nexthop_v6))
1866                                 ixa->ixa_flags |= IXAF_NEXTHOP_SET;
1867                         else
1868                                 ixa->ixa_flags &= ~IXAF_NEXTHOP_SET;
1869                 }
1870                 coa->coa_changed |= COA_ROUTE_CHANGED;
1871                 break;
1872         case IPV6_HOPOPTS:
1873                 mutex_enter(&connp->conn_lock);
1874                 error = optcom_pkt_set(invalp, inlen,
1875                     (uchar_t **)&ipp->ipp_hopopts, &ipp->ipp_hopoptslen);
1876                 if (error != 0) {
1877                         mutex_exit(&connp->conn_lock);
1878                         return (error);
1879                 }
1880                 if (ipp->ipp_hopoptslen == 0) {
1881                         ipp->ipp_fields &= ~IPPF_HOPOPTS;
1882                 } else {
1883                         ipp->ipp_fields |= IPPF_HOPOPTS;
1884                 }
1885                 mutex_exit(&connp->conn_lock);
1886                 coa->coa_changed |= COA_HEADER_CHANGED;
1887                 coa->coa_changed |= COA_WROFF_CHANGED;
1888                 break;
1889         case IPV6_RTHDRDSTOPTS:
1890                 mutex_enter(&connp->conn_lock);
1891                 error = optcom_pkt_set(invalp, inlen,
1892                     (uchar_t **)&ipp->ipp_rthdrdstopts,
1893                     &ipp->ipp_rthdrdstoptslen);
1894                 if (error != 0) {
1895                         mutex_exit(&connp->conn_lock);
1896                         return (error);
1897                 }
1898                 if (ipp->ipp_rthdrdstoptslen == 0) {
1899                         ipp->ipp_fields &= ~IPPF_RTHDRDSTOPTS;
1900                 } else {
1901                         ipp->ipp_fields |= IPPF_RTHDRDSTOPTS;
1902                 }
1903                 mutex_exit(&connp->conn_lock);
1904                 coa->coa_changed |= COA_HEADER_CHANGED;
1905                 coa->coa_changed |= COA_WROFF_CHANGED;
1906                 break;
1907         case IPV6_DSTOPTS:
1908                 mutex_enter(&connp->conn_lock);
1909                 error = optcom_pkt_set(invalp, inlen,
1910                     (uchar_t **)&ipp->ipp_dstopts, &ipp->ipp_dstoptslen);
1911                 if (error != 0) {
1912                         mutex_exit(&connp->conn_lock);
1913                         return (error);
1914                 }
1915                 if (ipp->ipp_dstoptslen == 0) {
1916                         ipp->ipp_fields &= ~IPPF_DSTOPTS;
1917                 } else {
1918                         ipp->ipp_fields |= IPPF_DSTOPTS;
1919                 }
1920                 mutex_exit(&connp->conn_lock);
1921                 coa->coa_changed |= COA_HEADER_CHANGED;
1922                 coa->coa_changed |= COA_WROFF_CHANGED;
1923                 break;
1924         case IPV6_RTHDR:
1925                 mutex_enter(&connp->conn_lock);
1926                 error = optcom_pkt_set(invalp, inlen,
1927                     (uchar_t **)&ipp->ipp_rthdr, &ipp->ipp_rthdrlen);
1928                 if (error != 0) {
1929                         mutex_exit(&connp->conn_lock);
1930                         return (error);
1931                 }
1932                 if (ipp->ipp_rthdrlen == 0) {
1933                         ipp->ipp_fields &= ~IPPF_RTHDR;
1934                 } else {
1935                         ipp->ipp_fields |= IPPF_RTHDR;
1936                 }
1937                 mutex_exit(&connp->conn_lock);
1938                 coa->coa_changed |= COA_HEADER_CHANGED;
1939                 coa->coa_changed |= COA_WROFF_CHANGED;
1940                 break;
1941 
1942         case IPV6_DONTFRAG:
1943                 if (onoff) {
1944                         ixa->ixa_flags |= IXAF_DONTFRAG;
1945                         ixa->ixa_flags &= ~IXAF_PMTU_DISCOVERY;
1946                 } else {
1947                         ixa->ixa_flags &= ~IXAF_DONTFRAG;
1948                         ixa->ixa_flags |= IXAF_PMTU_DISCOVERY;
1949                 }
1950                 /* Need to redo ip_attr_connect */
1951                 coa->coa_changed |= COA_ROUTE_CHANGED;
1952                 break;
1953 
1954         case IPV6_USE_MIN_MTU:
1955                 ixa->ixa_flags |= IXAF_USE_MIN_MTU;
1956                 ixa->ixa_use_min_mtu = *i1;
1957                 /* Need to redo ip_attr_connect */
1958                 coa->coa_changed |= COA_ROUTE_CHANGED;
1959                 break;
1960 
1961         case IPV6_SEC_OPT:
1962                 mutex_enter(&connp->conn_lock);
1963                 error = ipsec_set_req(cr, connp, (ipsec_req_t *)invalp);
1964                 mutex_exit(&connp->conn_lock);
1965                 if (error != 0) {
1966                         return (error);
1967                 }
1968                 /* This is an IPsec policy change - redo ip_attr_connect */
1969                 coa->coa_changed |= COA_ROUTE_CHANGED;
1970                 break;
1971         case IPV6_SRC_PREFERENCES:
1972                 /*
1973                  * This socket option only affects connected
1974                  * sockets that haven't already bound to a specific
1975                  * IPv6 address.  In other words, sockets that
1976                  * don't call bind() with an address other than the
1977                  * unspecified address and that call connect().
1978                  * ip_set_destination_v6() passes these preferences
1979                  * to the ipif_select_source_v6() function.
1980                  */
1981                 mutex_enter(&connp->conn_lock);
1982                 error = ip6_set_src_preferences(ixa, *(uint32_t *)invalp);
1983                 mutex_exit(&connp->conn_lock);
1984                 if (error != 0) {
1985                         return (error);
1986                 }
1987                 break;
1988         case IPV6_V6ONLY:
1989                 mutex_enter(&connp->conn_lock);
1990                 connp->conn_ipv6_v6only = onoff;
1991                 mutex_exit(&connp->conn_lock);
1992                 break;
1993         }
1994         return (0);
1995 }
1996 
1997 /* Handle IPPROTO_UDP */
1998 /* ARGSUSED1 */
1999 static int
2000 conn_opt_set_udp(conn_opt_arg_t *coa, t_scalar_t name, uint_t inlen,
2001     uchar_t *invalp, boolean_t checkonly, cred_t *cr)
2002 {
2003         conn_t          *connp = coa->coa_connp;
2004         int             *i1 = (int *)invalp;
2005         boolean_t       onoff = (*i1 == 0) ? 0 : 1;
2006         int             error;
2007 
2008         switch (name) {
2009         case UDP_ANONPRIVBIND:
2010                 if ((error = secpolicy_net_privaddr(cr, 0, IPPROTO_UDP)) != 0) {
2011                         return (error);
2012                 }
2013                 break;
2014         }
2015         if (checkonly)
2016                 return (0);
2017 
2018         /* Here we set the actual option value */
2019         mutex_enter(&connp->conn_lock);
2020         switch (name) {
2021         case UDP_ANONPRIVBIND:
2022                 connp->conn_anon_priv_bind = onoff;
2023                 break;
2024         case UDP_EXCLBIND:
2025                 connp->conn_exclbind = onoff;
2026                 break;
2027         }
2028         mutex_exit(&connp->conn_lock);
2029         return (0);
2030 }
2031 
2032 /* Handle IPPROTO_TCP */
2033 /* ARGSUSED1 */
2034 static int
2035 conn_opt_set_tcp(conn_opt_arg_t *coa, t_scalar_t name, uint_t inlen,
2036     uchar_t *invalp, boolean_t checkonly, cred_t *cr)
2037 {
2038         conn_t          *connp = coa->coa_connp;
2039         int             *i1 = (int *)invalp;
2040         boolean_t       onoff = (*i1 == 0) ? 0 : 1;
2041         int             error;
2042 
2043         switch (name) {
2044         case TCP_ANONPRIVBIND:
2045                 if ((error = secpolicy_net_privaddr(cr, 0, IPPROTO_TCP)) != 0) {
2046                         return (error);
2047                 }
2048                 break;
2049         }
2050         if (checkonly)
2051                 return (0);
2052 
2053         /* Here we set the actual option value */
2054         mutex_enter(&connp->conn_lock);
2055         switch (name) {
2056         case TCP_ANONPRIVBIND:
2057                 connp->conn_anon_priv_bind = onoff;
2058                 break;
2059         case TCP_EXCLBIND:
2060                 connp->conn_exclbind = onoff;
2061                 break;
2062         case TCP_RECVDSTADDR:
2063                 connp->conn_recv_ancillary.crb_recvdstaddr = onoff;
2064                 break;
2065         }
2066         mutex_exit(&connp->conn_lock);
2067         return (0);
2068 }
2069 
2070 int
2071 conn_getsockname(conn_t *connp, struct sockaddr *sa, uint_t *salenp)
2072 {
2073         sin_t           *sin;
2074         sin6_t          *sin6;
2075 
2076         if (connp->conn_family == AF_INET) {
2077                 if (*salenp < sizeof (sin_t))
2078                         return (EINVAL);
2079 
2080                 *salenp = sizeof (sin_t);
2081                 /* Fill zeroes and then initialize non-zero fields */
2082                 sin = (sin_t *)sa;
2083                 *sin = sin_null;
2084                 sin->sin_family = AF_INET;
2085                 if (!IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_saddr_v6) &&
2086                     !IN6_IS_ADDR_UNSPECIFIED(&connp->conn_saddr_v6)) {
2087                         sin->sin_addr.s_addr = connp->conn_saddr_v4;
2088                 } else {
2089                         /*
2090                          * INADDR_ANY
2091                          * conn_saddr is not set, we might be bound to
2092                          * broadcast/multicast. Use conn_bound_addr as
2093                          * local address instead (that could
2094                          * also still be INADDR_ANY)
2095                          */
2096                         sin->sin_addr.s_addr = connp->conn_bound_addr_v4;
2097                 }
2098                 sin->sin_port = connp->conn_lport;
2099         } else {
2100                 if (*salenp < sizeof (sin6_t))
2101                         return (EINVAL);
2102 
2103                 *salenp = sizeof (sin6_t);
2104                 /* Fill zeroes and then initialize non-zero fields */
2105                 sin6 = (sin6_t *)sa;
2106                 *sin6 = sin6_null;
2107                 sin6->sin6_family = AF_INET6;
2108                 if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_saddr_v6)) {
2109                         sin6->sin6_addr = connp->conn_saddr_v6;
2110                 } else {
2111                         /*
2112                          * conn_saddr is not set, we might be bound to
2113                          * broadcast/multicast. Use conn_bound_addr as
2114                          * local address instead (which could
2115                          * also still be unspecified)
2116                          */
2117                         sin6->sin6_addr = connp->conn_bound_addr_v6;
2118                 }
2119                 sin6->sin6_port = connp->conn_lport;
2120                 if (IN6_IS_ADDR_LINKSCOPE(&sin6->sin6_addr) &&
2121                     (connp->conn_ixa->ixa_flags & IXAF_SCOPEID_SET))
2122                         sin6->sin6_scope_id = connp->conn_ixa->ixa_scopeid;
2123         }
2124         return (0);
2125 }
2126 
2127 int
2128 conn_getpeername(conn_t *connp, struct sockaddr *sa, uint_t *salenp)
2129 {
2130         struct sockaddr_in      *sin;
2131         struct sockaddr_in6     *sin6;
2132 
2133         if (connp->conn_family == AF_INET) {
2134                 if (*salenp < sizeof (sin_t))
2135                         return (EINVAL);
2136 
2137                 *salenp = sizeof (sin_t);
2138                 /* initialize */
2139                 sin = (sin_t *)sa;
2140                 *sin = sin_null;
2141                 sin->sin_family = AF_INET;
2142                 sin->sin_addr.s_addr = connp->conn_faddr_v4;
2143                 sin->sin_port = connp->conn_fport;
2144         } else {
2145                 if (*salenp < sizeof (sin6_t))
2146                         return (EINVAL);
2147 
2148                 *salenp = sizeof (sin6_t);
2149                 /* initialize */
2150                 sin6 = (sin6_t *)sa;
2151                 *sin6 = sin6_null;
2152                 sin6->sin6_family = AF_INET6;
2153                 sin6->sin6_addr = connp->conn_faddr_v6;
2154                 sin6->sin6_port =  connp->conn_fport;
2155                 sin6->sin6_flowinfo = connp->conn_flowinfo;
2156                 if (IN6_IS_ADDR_LINKSCOPE(&sin6->sin6_addr) &&
2157                     (connp->conn_ixa->ixa_flags & IXAF_SCOPEID_SET))
2158                         sin6->sin6_scope_id = connp->conn_ixa->ixa_scopeid;
2159         }
2160         return (0);
2161 }
2162 
2163 static uint32_t cksum_massage_options_v4(ipha_t *, netstack_t *);
2164 static uint32_t cksum_massage_options_v6(ip6_t *, uint_t, netstack_t *);
2165 
2166 /*
2167  * Allocate and fill in conn_ht_iphc based on the current information
2168  * in the conn.
2169  * Normally used when we bind() and connect().
2170  * Returns failure if can't allocate memory, or if there is a problem
2171  * with a routing header/option.
2172  *
2173  * We allocate space for the transport header (ulp_hdr_len + extra) and
2174  * indicate the offset of the ulp header by setting ixa_ip_hdr_length.
2175  * The extra is there for transports that want some spare room for future
2176  * options. conn_ht_iphc_allocated is what was allocated; conn_ht_iphc_len
2177  * excludes the extra part.
2178  *
2179  * We massage an routing option/header and store the ckecksum difference
2180  * in conn_sum.
2181  *
2182  * Caller needs to update conn_wroff if desired.
2183  */
2184 int
2185 conn_build_hdr_template(conn_t *connp, uint_t ulp_hdr_length, uint_t extra,
2186     const in6_addr_t *v6src, const in6_addr_t *v6dst, uint32_t flowinfo)
2187 {
2188         ip_xmit_attr_t  *ixa = connp->conn_ixa;
2189         ip_pkt_t        *ipp = &connp->conn_xmit_ipp;
2190         uint_t          ip_hdr_length;
2191         uchar_t         *hdrs;
2192         uint_t          hdrs_len;
2193 
2194         ASSERT(MUTEX_HELD(&connp->conn_lock));
2195 
2196         if (ixa->ixa_flags & IXAF_IS_IPV4) {
2197                 ip_hdr_length = ip_total_hdrs_len_v4(ipp);
2198                 /* In case of TX label and IP options it can be too much */
2199                 if (ip_hdr_length > IP_MAX_HDR_LENGTH) {
2200                         /* Preserves existing TX errno for this */
2201                         return (EHOSTUNREACH);
2202                 }
2203         } else {
2204                 ip_hdr_length = ip_total_hdrs_len_v6(ipp);
2205         }
2206         ixa->ixa_ip_hdr_length = ip_hdr_length;
2207         hdrs_len = ip_hdr_length + ulp_hdr_length + extra;
2208         ASSERT(hdrs_len != 0);
2209 
2210         if (hdrs_len != connp->conn_ht_iphc_allocated) {
2211                 /* Allocate new before we free any old */
2212                 hdrs = kmem_alloc(hdrs_len, KM_NOSLEEP);
2213                 if (hdrs == NULL)
2214                         return (ENOMEM);
2215 
2216                 if (connp->conn_ht_iphc != NULL) {
2217                         kmem_free(connp->conn_ht_iphc,
2218                             connp->conn_ht_iphc_allocated);
2219                 }
2220                 connp->conn_ht_iphc = hdrs;
2221                 connp->conn_ht_iphc_allocated = hdrs_len;
2222         } else {
2223                 hdrs = connp->conn_ht_iphc;
2224         }
2225         hdrs_len -= extra;
2226         connp->conn_ht_iphc_len = hdrs_len;
2227 
2228         connp->conn_ht_ulp = hdrs + ip_hdr_length;
2229         connp->conn_ht_ulp_len = ulp_hdr_length;
2230 
2231         if (ixa->ixa_flags & IXAF_IS_IPV4) {
2232                 ipha_t  *ipha = (ipha_t *)hdrs;
2233 
2234                 IN6_V4MAPPED_TO_IPADDR(v6src, ipha->ipha_src);
2235                 IN6_V4MAPPED_TO_IPADDR(v6dst, ipha->ipha_dst);
2236                 ip_build_hdrs_v4(hdrs, ip_hdr_length, ipp, connp->conn_proto);
2237                 ipha->ipha_length = htons(hdrs_len);
2238                 if (ixa->ixa_flags & IXAF_PMTU_IPV4_DF)
2239                         ipha->ipha_fragment_offset_and_flags |= IPH_DF_HTONS;
2240                 else
2241                         ipha->ipha_fragment_offset_and_flags &= ~IPH_DF_HTONS;
2242 
2243                 if (ipp->ipp_fields & IPPF_IPV4_OPTIONS) {
2244                         connp->conn_sum = cksum_massage_options_v4(ipha,
2245                             connp->conn_netstack);
2246                 } else {
2247                         connp->conn_sum = 0;
2248                 }
2249         } else {
2250                 ip6_t   *ip6h = (ip6_t *)hdrs;
2251 
2252                 ip6h->ip6_src = *v6src;
2253                 ip6h->ip6_dst = *v6dst;
2254                 ip_build_hdrs_v6(hdrs, ip_hdr_length, ipp, connp->conn_proto,
2255                     flowinfo);
2256                 ip6h->ip6_plen = htons(hdrs_len - IPV6_HDR_LEN);
2257 
2258                 if (ipp->ipp_fields & IPPF_RTHDR) {
2259                         connp->conn_sum = cksum_massage_options_v6(ip6h,
2260                             ip_hdr_length, connp->conn_netstack);
2261 
2262                         /*
2263                          * Verify that the first hop isn't a mapped address.
2264                          * Routers along the path need to do this verification
2265                          * for subsequent hops.
2266                          */
2267                         if (IN6_IS_ADDR_V4MAPPED(&ip6h->ip6_dst))
2268                                 return (EADDRNOTAVAIL);
2269 
2270                 } else {
2271                         connp->conn_sum = 0;
2272                 }
2273         }
2274         return (0);
2275 }
2276 
2277 /*
2278  * Prepend a header template to data_mp based on the ip_pkt_t
2279  * and the passed in source, destination and protocol.
2280  *
2281  * Returns failure if can't allocate memory, in which case data_mp is freed.
2282  * We allocate space for the transport header (ulp_hdr_len) and
2283  * indicate the offset of the ulp header by setting ixa_ip_hdr_length.
2284  *
2285  * We massage an routing option/header and return the ckecksum difference
2286  * in *sump. This is in host byte order.
2287  *
2288  * Caller needs to update conn_wroff if desired.
2289  */
2290 mblk_t *
2291 conn_prepend_hdr(ip_xmit_attr_t *ixa, const ip_pkt_t *ipp,
2292     const in6_addr_t *v6src, const in6_addr_t *v6dst,
2293     uint8_t protocol, uint32_t flowinfo, uint_t ulp_hdr_length, mblk_t *data_mp,
2294     uint_t data_length, uint_t wroff_extra, uint32_t *sump, int *errorp)
2295 {
2296         uint_t          ip_hdr_length;
2297         uchar_t         *hdrs;
2298         uint_t          hdrs_len;
2299         mblk_t          *mp;
2300 
2301         if (ixa->ixa_flags & IXAF_IS_IPV4) {
2302                 ip_hdr_length = ip_total_hdrs_len_v4(ipp);
2303                 ASSERT(ip_hdr_length <= IP_MAX_HDR_LENGTH);
2304         } else {
2305                 ip_hdr_length = ip_total_hdrs_len_v6(ipp);
2306         }
2307         hdrs_len = ip_hdr_length + ulp_hdr_length;
2308         ASSERT(hdrs_len != 0);
2309 
2310         ixa->ixa_ip_hdr_length = ip_hdr_length;
2311 
2312         /* Can we prepend to data_mp? */
2313         if (data_mp != NULL &&
2314             data_mp->b_rptr - data_mp->b_datap->db_base >= hdrs_len &&
2315             data_mp->b_datap->db_ref == 1) {
2316                 hdrs = data_mp->b_rptr - hdrs_len;
2317                 data_mp->b_rptr = hdrs;
2318                 mp = data_mp;
2319         } else {
2320                 mp = allocb(hdrs_len + wroff_extra, BPRI_MED);
2321                 if (mp == NULL) {
2322                         freemsg(data_mp);
2323                         *errorp = ENOMEM;
2324                         return (NULL);
2325                 }
2326                 mp->b_wptr = mp->b_datap->db_lim;
2327                 hdrs = mp->b_rptr = mp->b_wptr - hdrs_len;
2328                 mp->b_cont = data_mp;
2329         }
2330 
2331         /*
2332          * Set the source in the header. ip_build_hdrs_v4/v6 will overwrite it
2333          * if PKTINFO (aka IPPF_ADDR) was set.
2334          */
2335         if (ixa->ixa_flags & IXAF_IS_IPV4) {
2336                 ipha_t *ipha = (ipha_t *)hdrs;
2337 
2338                 ASSERT(IN6_IS_ADDR_V4MAPPED(v6dst));
2339                 IN6_V4MAPPED_TO_IPADDR(v6src, ipha->ipha_src);
2340                 IN6_V4MAPPED_TO_IPADDR(v6dst, ipha->ipha_dst);
2341                 ip_build_hdrs_v4(hdrs, ip_hdr_length, ipp, protocol);
2342                 ipha->ipha_length = htons(hdrs_len + data_length);
2343                 if (ixa->ixa_flags & IXAF_PMTU_IPV4_DF)
2344                         ipha->ipha_fragment_offset_and_flags |= IPH_DF_HTONS;
2345                 else
2346                         ipha->ipha_fragment_offset_and_flags &= ~IPH_DF_HTONS;
2347 
2348                 if (ipp->ipp_fields & IPPF_IPV4_OPTIONS) {
2349                         *sump = cksum_massage_options_v4(ipha,
2350                             ixa->ixa_ipst->ips_netstack);
2351                 } else {
2352                         *sump = 0;
2353                 }
2354         } else {
2355                 ip6_t *ip6h = (ip6_t *)hdrs;
2356 
2357                 ip6h->ip6_src = *v6src;
2358                 ip6h->ip6_dst = *v6dst;
2359                 ip_build_hdrs_v6(hdrs, ip_hdr_length, ipp, protocol, flowinfo);
2360                 ip6h->ip6_plen = htons(hdrs_len + data_length - IPV6_HDR_LEN);
2361 
2362                 if (ipp->ipp_fields & IPPF_RTHDR) {
2363                         *sump = cksum_massage_options_v6(ip6h,
2364                             ip_hdr_length, ixa->ixa_ipst->ips_netstack);
2365 
2366                         /*
2367                          * Verify that the first hop isn't a mapped address.
2368                          * Routers along the path need to do this verification
2369                          * for subsequent hops.
2370                          */
2371                         if (IN6_IS_ADDR_V4MAPPED(&ip6h->ip6_dst)) {
2372                                 *errorp = EADDRNOTAVAIL;
2373                                 freemsg(mp);
2374                                 return (NULL);
2375                         }
2376                 } else {
2377                         *sump = 0;
2378                 }
2379         }
2380         return (mp);
2381 }
2382 
2383 /*
2384  * Massage a source route if any putting the first hop
2385  * in ipha_dst. Compute a starting value for the checksum which
2386  * takes into account that the original ipha_dst should be
2387  * included in the checksum but that IP will include the
2388  * first hop from the source route in the tcp checksum.
2389  */
2390 static uint32_t
2391 cksum_massage_options_v4(ipha_t *ipha, netstack_t *ns)
2392 {
2393         in_addr_t       dst;
2394         uint32_t        cksum;
2395 
2396         /* Get last hop then diff against first hop */
2397         cksum = ip_massage_options(ipha, ns);
2398         cksum = (cksum & 0xFFFF) + (cksum >> 16);
2399         dst = ipha->ipha_dst;
2400         cksum -= ((dst >> 16) + (dst & 0xffff));
2401         if ((int)cksum < 0)
2402                 cksum--;
2403         cksum = (cksum & 0xFFFF) + (cksum >> 16);
2404         cksum = (cksum & 0xFFFF) + (cksum >> 16);
2405         ASSERT(cksum < 0x10000);
2406         return (ntohs(cksum));
2407 }
2408 
2409 static uint32_t
2410 cksum_massage_options_v6(ip6_t *ip6h, uint_t ip_hdr_len, netstack_t *ns)
2411 {
2412         uint8_t         *end;
2413         ip6_rthdr_t     *rth;
2414         uint32_t        cksum;
2415 
2416         end = (uint8_t *)ip6h + ip_hdr_len;
2417         rth = ip_find_rthdr_v6(ip6h, end);
2418         if (rth == NULL)
2419                 return (0);
2420 
2421         cksum = ip_massage_options_v6(ip6h, rth, ns);
2422         cksum = (cksum & 0xFFFF) + (cksum >> 16);
2423         ASSERT(cksum < 0x10000);
2424         return (ntohs(cksum));
2425 }
2426 
2427 /*
2428  * ULPs that change the destination address need to call this for each
2429  * change to discard any state about a previous destination that might
2430  * have been multicast or multirt.
2431  */
2432 void
2433 ip_attr_newdst(ip_xmit_attr_t *ixa)
2434 {
2435         ixa->ixa_flags &= ~(IXAF_LOOPBACK_COPY | IXAF_NO_HW_CKSUM |
2436             IXAF_NO_TTL_CHANGE | IXAF_IPV6_ADD_FRAGHDR |
2437             IXAF_NO_LOOP_ZONEID_SET);
2438 }
2439 
2440 /*
2441  * Determine the nexthop which will be used.
2442  * Normally this is just the destination, but if a IPv4 source route, or
2443  * IPv6 routing header, is in the ip_pkt_t then we extract the nexthop from
2444  * there.
2445  */
2446 void
2447 ip_attr_nexthop(const ip_pkt_t *ipp, const ip_xmit_attr_t *ixa,
2448     const in6_addr_t *dst, in6_addr_t *nexthop)
2449 {
2450         if (!(ipp->ipp_fields & (IPPF_IPV4_OPTIONS|IPPF_RTHDR))) {
2451                 *nexthop = *dst;
2452                 return;
2453         }
2454         if (ixa->ixa_flags & IXAF_IS_IPV4) {
2455                 ipaddr_t v4dst;
2456                 ipaddr_t v4nexthop;
2457 
2458                 IN6_V4MAPPED_TO_IPADDR(dst, v4dst);
2459                 v4nexthop = ip_pkt_source_route_v4(ipp);
2460                 if (v4nexthop == INADDR_ANY)
2461                         v4nexthop = v4dst;
2462 
2463                 IN6_IPADDR_TO_V4MAPPED(v4nexthop, nexthop);
2464         } else {
2465                 const in6_addr_t *v6nexthop;
2466 
2467                 v6nexthop = ip_pkt_source_route_v6(ipp);
2468                 if (v6nexthop == NULL)
2469                         v6nexthop = dst;
2470 
2471                 *nexthop = *v6nexthop;
2472         }
2473 }
2474 
2475 /*
2476  * Update the ip_xmit_attr_t based the addresses, conn_xmit_ipp and conn_ixa.
2477  * If IPDF_IPSEC is set we cache the IPsec policy to handle the unconnected
2478  * case (connected latching is done in conn_connect).
2479  * Note that IPsec policy lookup requires conn_proto and conn_laddr to be
2480  * set, but doesn't otherwise use the conn_t.
2481  *
2482  * Caller must set/clear IXAF_IS_IPV4 as appropriately.
2483  * Caller must use ip_attr_nexthop() to determine the nexthop argument.
2484  *
2485  * The caller must NOT hold conn_lock (to avoid problems with ill_refrele
2486  * causing the squeue to run doing ipcl_walk grabbing conn_lock.)
2487  *
2488  * Updates laddrp and uinfo if they are non-NULL.
2489  *
2490  * TSOL notes: The callers if ip_attr_connect must check if the destination
2491  * is different than before and in that case redo conn_update_label.
2492  * The callers of conn_connect do not need that since conn_connect
2493  * performs the conn_update_label.
2494  */
2495 int
2496 ip_attr_connect(const conn_t *connp, ip_xmit_attr_t *ixa,
2497     const in6_addr_t *v6src, const in6_addr_t *v6dst,
2498     const in6_addr_t *v6nexthop, in_port_t dstport, in6_addr_t *laddrp,
2499     iulp_t *uinfo, uint32_t flags)
2500 {
2501         in6_addr_t              laddr = *v6src;
2502         int                     error;
2503 
2504         ASSERT(MUTEX_NOT_HELD(&connp->conn_lock));
2505 
2506         if (connp->conn_zone_is_global)
2507                 flags |= IPDF_ZONE_IS_GLOBAL;
2508         else
2509                 flags &= ~IPDF_ZONE_IS_GLOBAL;
2510 
2511         /*
2512          * Lookup the route to determine a source address and the uinfo.
2513          * If the ULP has a source route option then the caller will
2514          * have set v6nexthop to be the first hop.
2515          */
2516         if (ixa->ixa_flags & IXAF_IS_IPV4) {
2517                 ipaddr_t v4dst;
2518                 ipaddr_t v4src, v4nexthop;
2519 
2520                 IN6_V4MAPPED_TO_IPADDR(v6dst, v4dst);
2521                 IN6_V4MAPPED_TO_IPADDR(v6nexthop, v4nexthop);
2522                 IN6_V4MAPPED_TO_IPADDR(v6src, v4src);
2523 
2524                 if (connp->conn_unspec_src || v4src != INADDR_ANY)
2525                         flags &= ~IPDF_SELECT_SRC;
2526                 else
2527                         flags |= IPDF_SELECT_SRC;
2528 
2529                 error = ip_set_destination_v4(&v4src, v4dst, v4nexthop, ixa,
2530                     uinfo, flags, connp->conn_mac_mode);
2531                 IN6_IPADDR_TO_V4MAPPED(v4src, &laddr);
2532         } else {
2533                 if (connp->conn_unspec_src || !IN6_IS_ADDR_UNSPECIFIED(v6src))
2534                         flags &= ~IPDF_SELECT_SRC;
2535                 else
2536                         flags |= IPDF_SELECT_SRC;
2537 
2538                 error = ip_set_destination_v6(&laddr, v6dst, v6nexthop, ixa,
2539                     uinfo, flags, connp->conn_mac_mode);
2540         }
2541         /* Pass out some address even if we hit a RTF_REJECT etc */
2542         if (laddrp != NULL)
2543                 *laddrp = laddr;
2544 
2545         if (error != 0)
2546                 return (error);
2547 
2548         if (flags & IPDF_IPSEC) {
2549                 /*
2550                  * Set any IPsec policy in ixa. Routine also looks at ULP
2551                  * ports.
2552                  */
2553                 ipsec_cache_outbound_policy(connp, v6src, v6dst, dstport, ixa);
2554         }
2555         return (0);
2556 }
2557 
2558 /*
2559  * Connect the conn based on the addresses, conn_xmit_ipp and conn_ixa.
2560  * Assumes that conn_faddr and conn_fport are already set. As such it is not
2561  * usable for SCTP, since SCTP has multiple faddrs.
2562  *
2563  * Caller must hold conn_lock to provide atomic constency between the
2564  * conn_t's addresses and the ixa.
2565  * NOTE: this function drops and reaquires conn_lock since it can't be
2566  * held across ip_attr_connect/ip_set_destination.
2567  *
2568  * The caller needs to handle inserting in the receive-side fanout when
2569  * appropriate after conn_connect returns.
2570  */
2571 int
2572 conn_connect(conn_t *connp, iulp_t *uinfo, uint32_t flags)
2573 {
2574         ip_xmit_attr_t  *ixa = connp->conn_ixa;
2575         in6_addr_t      nexthop;
2576         in6_addr_t      saddr, faddr;
2577         in_port_t       fport;
2578         int             error;
2579 
2580         ASSERT(MUTEX_HELD(&connp->conn_lock));
2581 
2582         if (connp->conn_ipversion == IPV4_VERSION)
2583                 ixa->ixa_flags |= IXAF_IS_IPV4;
2584         else
2585                 ixa->ixa_flags &= ~IXAF_IS_IPV4;
2586 
2587         /* We do IPsec latching below - hence no caching in ip_attr_connect */
2588         flags &= ~IPDF_IPSEC;
2589 
2590         /* In case we had previously done an ip_attr_connect */
2591         ip_attr_newdst(ixa);
2592 
2593         /*
2594          * Determine the nexthop and copy the addresses before dropping
2595          * conn_lock.
2596          */
2597         ip_attr_nexthop(&connp->conn_xmit_ipp, connp->conn_ixa,
2598             &connp->conn_faddr_v6, &nexthop);
2599         saddr = connp->conn_saddr_v6;
2600         faddr = connp->conn_faddr_v6;
2601         fport = connp->conn_fport;
2602 
2603         mutex_exit(&connp->conn_lock);
2604         error = ip_attr_connect(connp, ixa, &saddr, &faddr, &nexthop, fport,
2605             &saddr, uinfo, flags | IPDF_VERIFY_DST);
2606         mutex_enter(&connp->conn_lock);
2607 
2608         /* Could have changed even if an error */
2609         connp->conn_saddr_v6 = saddr;
2610         if (error != 0)
2611                 return (error);
2612 
2613         /*
2614          * Check whether Trusted Solaris policy allows communication with this
2615          * host, and pretend that the destination is unreachable if not.
2616          * Compute any needed label and place it in ipp_label_v4/v6.
2617          *
2618          * Later conn_build_hdr_template() takes ipp_label_v4/v6 to form
2619          * the packet.
2620          *
2621          * TSOL Note: Any concurrent threads would pick a different ixa
2622          * (and ipp if they are to change the ipp)  so we
2623          * don't have to worry about concurrent threads.
2624          */
2625         if (is_system_labeled()) {
2626                 if (connp->conn_mlp_type != mlptSingle)
2627                         return (ECONNREFUSED);
2628 
2629                 /*
2630                  * conn_update_label will set ipp_label* which will later
2631                  * be used by conn_build_hdr_template.
2632                  */
2633                 error = conn_update_label(connp, ixa,
2634                     &connp->conn_faddr_v6, &connp->conn_xmit_ipp);
2635                 if (error != 0)
2636                         return (error);
2637         }
2638 
2639         /*
2640          * Ensure that we match on the selected local address.
2641          * This overrides conn_laddr in the case we had earlier bound to a
2642          * multicast or broadcast address.
2643          */
2644         connp->conn_laddr_v6 = connp->conn_saddr_v6;
2645 
2646         /*
2647          * Allow setting new policies.
2648          * The addresses/ports are already set, thus the IPsec policy calls
2649          * can handle their passed-in conn's.
2650          */
2651         connp->conn_policy_cached = B_FALSE;
2652 
2653         /*
2654          * Cache IPsec policy in this conn.  If we have per-socket policy,
2655          * we'll cache that.  If we don't, we'll inherit global policy.
2656          *
2657          * This is done before the caller inserts in the receive-side fanout.
2658          * Note that conn_policy_cached is set by ipsec_conn_cache_policy() even
2659          * for connections where we don't have a policy. This is to prevent
2660          * global policy lookups in the inbound path.
2661          *
2662          * If we insert before we set conn_policy_cached,
2663          * CONN_INBOUND_POLICY_PRESENT() check can still evaluate true
2664          * because global policy cound be non-empty. We normally call
2665          * ipsec_check_policy() for conn_policy_cached connections only if
2666          * conn_in_enforce_policy is set. But in this case,
2667          * conn_policy_cached can get set anytime since we made the
2668          * CONN_INBOUND_POLICY_PRESENT() check and ipsec_check_policy() is
2669          * called, which will make the above assumption false.  Thus, we
2670          * need to insert after we set conn_policy_cached.
2671          */
2672         error = ipsec_conn_cache_policy(connp,
2673             connp->conn_ipversion == IPV4_VERSION);
2674         if (error != 0)
2675                 return (error);
2676 
2677         /*
2678          * We defer to do LSO check until here since now we have better idea
2679          * whether IPsec is present. If the underlying ill is LSO capable,
2680          * copy its capability in so the ULP can decide whether to enable LSO
2681          * on this connection. So far, only TCP/IPv4 is implemented, so won't
2682          * claim LSO for IPv6.
2683          *
2684          * Currently, won't enable LSO for IRE_LOOPBACK or IRE_LOCAL, because
2685          * the receiver can not handle it. Also not to enable LSO for MULTIRT.
2686          */
2687         ixa->ixa_flags &= ~IXAF_LSO_CAPAB;
2688 
2689         ASSERT(ixa->ixa_ire != NULL);
2690         if (ixa->ixa_ipst->ips_ip_lso_outbound && (flags & IPDF_LSO) &&
2691             !(ixa->ixa_flags & IXAF_IPSEC_SECURE) &&
2692             !(ixa->ixa_ire->ire_type & (IRE_LOCAL | IRE_LOOPBACK)) &&
2693             !(ixa->ixa_ire->ire_flags & RTF_MULTIRT) &&
2694             (ixa->ixa_nce != NULL) &&
2695             ((ixa->ixa_flags & IXAF_IS_IPV4) ?
2696             ILL_LSO_TCP_IPV4_USABLE(ixa->ixa_nce->nce_ill) :
2697             ILL_LSO_TCP_IPV6_USABLE(ixa->ixa_nce->nce_ill))) {
2698                 ixa->ixa_lso_capab = *ixa->ixa_nce->nce_ill->ill_lso_capab;
2699                 ixa->ixa_flags |= IXAF_LSO_CAPAB;
2700         }
2701 
2702         /* Check whether ZEROCOPY capability is usable for this connection. */
2703         ixa->ixa_flags &= ~IXAF_ZCOPY_CAPAB;
2704 
2705         if ((flags & IPDF_ZCOPY) &&
2706             !(ixa->ixa_flags & IXAF_IPSEC_SECURE) &&
2707             !(ixa->ixa_ire->ire_type & (IRE_LOCAL | IRE_LOOPBACK)) &&
2708             !(ixa->ixa_ire->ire_flags & RTF_MULTIRT) &&
2709             (ixa->ixa_nce != NULL) &&
2710             ILL_ZCOPY_USABLE(ixa->ixa_nce->nce_ill)) {
2711                 ixa->ixa_flags |= IXAF_ZCOPY_CAPAB;
2712         }
2713         return (0);
2714 }
2715 
2716 /*
2717  * Predicates to check if the addresses match conn_last*
2718  */
2719 
2720 /*
2721  * Compare the conn against an address.
2722  * If using mapped addresses on AF_INET6 sockets, use the _v6 function
2723  */
2724 boolean_t
2725 conn_same_as_last_v4(conn_t *connp, sin_t *sin)
2726 {
2727         ASSERT(connp->conn_family == AF_INET);
2728         return (sin->sin_addr.s_addr == connp->conn_v4lastdst &&
2729             sin->sin_port == connp->conn_lastdstport);
2730 }
2731 
2732 /*
2733  * Compare, including for mapped addresses
2734  */
2735 boolean_t
2736 conn_same_as_last_v6(conn_t *connp, sin6_t *sin6)
2737 {
2738         return (IN6_ARE_ADDR_EQUAL(&connp->conn_v6lastdst, &sin6->sin6_addr) &&
2739             sin6->sin6_port == connp->conn_lastdstport &&
2740             sin6->sin6_flowinfo == connp->conn_lastflowinfo &&
2741             sin6->sin6_scope_id == connp->conn_lastscopeid);
2742 }
2743 
2744 /*
2745  * Compute a label and place it in the ip_packet_t.
2746  * Handles IPv4 and IPv6.
2747  * The caller should have a correct ixa_tsl and ixa_zoneid and have
2748  * already called conn_connect or ip_attr_connect to ensure that tsol_check_dest
2749  * has been called.
2750  */
2751 int
2752 conn_update_label(const conn_t *connp, const ip_xmit_attr_t *ixa,
2753     const in6_addr_t *v6dst, ip_pkt_t *ipp)
2754 {
2755         int             err;
2756         ipaddr_t        v4dst;
2757 
2758         if (IN6_IS_ADDR_V4MAPPED(v6dst)) {
2759                 uchar_t         opt_storage[IP_MAX_OPT_LENGTH];
2760 
2761                 IN6_V4MAPPED_TO_IPADDR(v6dst, v4dst);
2762 
2763                 err = tsol_compute_label_v4(ixa->ixa_tsl, ixa->ixa_zoneid,
2764                     v4dst, opt_storage, ixa->ixa_ipst);
2765                 if (err == 0) {
2766                         /* Length contained in opt_storage[IPOPT_OLEN] */
2767                         err = optcom_pkt_set(opt_storage,
2768                             opt_storage[IPOPT_OLEN],
2769                             (uchar_t **)&ipp->ipp_label_v4,
2770                             &ipp->ipp_label_len_v4);
2771                 }
2772                 if (err != 0) {
2773                         DTRACE_PROBE4(tx__ip__log__info__updatelabel,
2774                             char *, "conn(1) failed to update options(2) "
2775                             "on ixa(3)",
2776                             conn_t *, connp, char *, opt_storage,
2777                             ip_xmit_attr_t *, ixa);
2778                 }
2779                 if (ipp->ipp_label_len_v4 != 0)
2780                         ipp->ipp_fields |= IPPF_LABEL_V4;
2781                 else
2782                         ipp->ipp_fields &= ~IPPF_LABEL_V4;
2783         } else {
2784                 uchar_t         opt_storage[TSOL_MAX_IPV6_OPTION];
2785                 uint_t          optlen;
2786 
2787                 err = tsol_compute_label_v6(ixa->ixa_tsl, ixa->ixa_zoneid,
2788                     v6dst, opt_storage, ixa->ixa_ipst);
2789                 if (err == 0) {
2790                         /*
2791                          * Note that ipp_label_v6 is just the option - not
2792                          * the hopopts extension header.
2793                          *
2794                          * Length contained in opt_storage[IPOPT_OLEN], but
2795                          * that doesn't include the two byte options header.
2796                          */
2797                         optlen = opt_storage[IPOPT_OLEN];
2798                         if (optlen != 0)
2799                                 optlen += 2;
2800 
2801                         err = optcom_pkt_set(opt_storage, optlen,
2802                             (uchar_t **)&ipp->ipp_label_v6,
2803                             &ipp->ipp_label_len_v6);
2804                 }
2805                 if (err != 0) {
2806                         DTRACE_PROBE4(tx__ip__log__info__updatelabel,
2807                             char *, "conn(1) failed to update options(2) "
2808                             "on ixa(3)",
2809                             conn_t *, connp, char *, opt_storage,
2810                             ip_xmit_attr_t *, ixa);
2811                 }
2812                 if (ipp->ipp_label_len_v6 != 0)
2813                         ipp->ipp_fields |= IPPF_LABEL_V6;
2814                 else
2815                         ipp->ipp_fields &= ~IPPF_LABEL_V6;
2816         }
2817         return (err);
2818 }
2819 
2820 /*
2821  * Inherit all options settings from the parent/listener to the eager.
2822  * Returns zero on success; ENOMEM if memory allocation failed.
2823  *
2824  * We assume that the eager has not had any work done i.e., the conn_ixa
2825  * and conn_xmit_ipp are all zero.
2826  * Furthermore we assume that no other thread can access the eager (because
2827  * it isn't inserted in any fanout list).
2828  */
2829 int
2830 conn_inherit_parent(conn_t *lconnp, conn_t *econnp)
2831 {
2832         cred_t  *credp;
2833         int     err;
2834         void    *notify_cookie;
2835         uint32_t xmit_hint;
2836 
2837         econnp->conn_family = lconnp->conn_family;
2838         econnp->conn_ipv6_v6only = lconnp->conn_ipv6_v6only;
2839         econnp->conn_wq = lconnp->conn_wq;
2840         econnp->conn_rq = lconnp->conn_rq;
2841 
2842         /*
2843          * Make a safe copy of the transmit attributes.
2844          * conn_connect will later be used by the caller to setup the ire etc.
2845          */
2846         ASSERT(econnp->conn_ixa->ixa_refcnt == 1);
2847         ASSERT(econnp->conn_ixa->ixa_ire == NULL);
2848         ASSERT(econnp->conn_ixa->ixa_dce == NULL);
2849         ASSERT(econnp->conn_ixa->ixa_nce == NULL);
2850 
2851         /* Preserve ixa_notify_cookie and xmit_hint */
2852         notify_cookie = econnp->conn_ixa->ixa_notify_cookie;
2853         xmit_hint = econnp->conn_ixa->ixa_xmit_hint;
2854         ixa_safe_copy(lconnp->conn_ixa, econnp->conn_ixa);
2855         econnp->conn_ixa->ixa_notify_cookie = notify_cookie;
2856         econnp->conn_ixa->ixa_xmit_hint = xmit_hint;
2857 
2858         econnp->conn_bound_if = lconnp->conn_bound_if;
2859         econnp->conn_incoming_ifindex = lconnp->conn_incoming_ifindex;
2860 
2861         /* Inherit all RECV options */
2862         econnp->conn_recv_ancillary = lconnp->conn_recv_ancillary;
2863 
2864         err = ip_pkt_copy(&lconnp->conn_xmit_ipp, &econnp->conn_xmit_ipp,
2865             KM_NOSLEEP);
2866         if (err != 0)
2867                 return (err);
2868 
2869         econnp->conn_zoneid = lconnp->conn_zoneid;
2870         econnp->conn_allzones = lconnp->conn_allzones;
2871 
2872         /* This is odd. Pick a flowlabel for each connection instead? */
2873         econnp->conn_flowinfo = lconnp->conn_flowinfo;
2874 
2875         econnp->conn_default_ttl = lconnp->conn_default_ttl;
2876 
2877         /*
2878          * TSOL: tsol_input_proc() needs the eager's cred before the
2879          * eager is accepted
2880          */
2881         ASSERT(lconnp->conn_cred != NULL);
2882         econnp->conn_cred = credp = lconnp->conn_cred;
2883         crhold(credp);
2884         econnp->conn_cpid = lconnp->conn_cpid;
2885         econnp->conn_open_time = ddi_get_lbolt64();
2886 
2887         /*
2888          * Cache things in the ixa without any refhold.
2889          * Listener might not have set up ixa_cred
2890          */
2891         ASSERT(!(econnp->conn_ixa->ixa_free_flags & IXA_FREE_CRED));
2892         econnp->conn_ixa->ixa_cred = econnp->conn_cred;
2893         econnp->conn_ixa->ixa_cpid = econnp->conn_cpid;
2894         if (is_system_labeled())
2895                 econnp->conn_ixa->ixa_tsl = crgetlabel(econnp->conn_cred);
2896 
2897         /*
2898          * If the caller has the process-wide flag set, then default to MAC
2899          * exempt mode.  This allows read-down to unlabeled hosts.
2900          */
2901         if (getpflags(NET_MAC_AWARE, credp) != 0)
2902                 econnp->conn_mac_mode = CONN_MAC_AWARE;
2903 
2904         econnp->conn_zone_is_global = lconnp->conn_zone_is_global;
2905 
2906         /*
2907          * We eliminate the need for sockfs to send down a T_SVR4_OPTMGMT_REQ
2908          * via soaccept()->soinheritoptions() which essentially applies
2909          * all the listener options to the new connection. The options that we
2910          * need to take care of are:
2911          * SO_DEBUG, SO_REUSEADDR, SO_KEEPALIVE, SO_DONTROUTE, SO_BROADCAST,
2912          * SO_USELOOPBACK, SO_OOBINLINE, SO_DGRAM_ERRIND, SO_LINGER,
2913          * SO_SNDBUF, SO_RCVBUF.
2914          *
2915          * SO_RCVBUF:   conn_rcvbuf is set.
2916          * SO_SNDBUF:   conn_sndbuf is set.
2917          */
2918 
2919         /* Could we define a struct and use a struct copy for this? */
2920         econnp->conn_sndbuf = lconnp->conn_sndbuf;
2921         econnp->conn_rcvbuf = lconnp->conn_rcvbuf;
2922         econnp->conn_sndlowat = lconnp->conn_sndlowat;
2923         econnp->conn_rcvlowat = lconnp->conn_rcvlowat;
2924         econnp->conn_dgram_errind = lconnp->conn_dgram_errind;
2925         econnp->conn_oobinline = lconnp->conn_oobinline;
2926         econnp->conn_debug = lconnp->conn_debug;
2927         econnp->conn_keepalive = lconnp->conn_keepalive;
2928         econnp->conn_linger = lconnp->conn_linger;
2929         econnp->conn_lingertime = lconnp->conn_lingertime;
2930 
2931         /* Set the IP options */
2932         econnp->conn_broadcast = lconnp->conn_broadcast;
2933         econnp->conn_useloopback = lconnp->conn_useloopback;
2934         econnp->conn_reuseaddr = lconnp->conn_reuseaddr;
2935         return (0);
2936 }