1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
  23  * Copyright 2019 OmniOS Community Edition (OmniOSce) Association.
  24  */
  25 
  26 /*
  27  * IP PACKET CLASSIFIER
  28  *
  29  * The IP packet classifier provides mapping between IP packets and persistent
  30  * connection state for connection-oriented protocols. It also provides
  31  * interface for managing connection states.
  32  *
  33  * The connection state is kept in conn_t data structure and contains, among
  34  * other things:
  35  *
  36  *      o local/remote address and ports
  37  *      o Transport protocol
  38  *      o squeue for the connection (for TCP only)
  39  *      o reference counter
  40  *      o Connection state
  41  *      o hash table linkage
  42  *      o interface/ire information
  43  *      o credentials
  44  *      o ipsec policy
  45  *      o send and receive functions.
  46  *      o mutex lock.
  47  *
  48  * Connections use a reference counting scheme. They are freed when the
  49  * reference counter drops to zero. A reference is incremented when connection
  50  * is placed in a list or table, when incoming packet for the connection arrives
  51  * and when connection is processed via squeue (squeue processing may be
  52  * asynchronous and the reference protects the connection from being destroyed
  53  * before its processing is finished).
  54  *
  55  * conn_recv is used to pass up packets to the ULP.
  56  * For TCP conn_recv changes. It is tcp_input_listener_unbound initially for
  57  * a listener, and changes to tcp_input_listener as the listener has picked a
  58  * good squeue. For other cases it is set to tcp_input_data.
  59  *
  60  * conn_recvicmp is used to pass up ICMP errors to the ULP.
  61  *
  62  * Classifier uses several hash tables:
  63  *
  64  *      ipcl_conn_fanout:       contains all TCP connections in CONNECTED state
  65  *      ipcl_bind_fanout:       contains all connections in BOUND state
  66  *      ipcl_proto_fanout:      IPv4 protocol fanout
  67  *      ipcl_proto_fanout_v6:   IPv6 protocol fanout
  68  *      ipcl_udp_fanout:        contains all UDP connections
  69  *      ipcl_iptun_fanout:      contains all IP tunnel connections
  70  *      ipcl_globalhash_fanout: contains all connections
  71  *
  72  * The ipcl_globalhash_fanout is used for any walkers (like snmp and Clustering)
  73  * which need to view all existing connections.
  74  *
  75  * All tables are protected by per-bucket locks. When both per-bucket lock and
  76  * connection lock need to be held, the per-bucket lock should be acquired
  77  * first, followed by the connection lock.
  78  *
  79  * All functions doing search in one of these tables increment a reference
  80  * counter on the connection found (if any). This reference should be dropped
  81  * when the caller has finished processing the connection.
  82  *
  83  *
  84  * INTERFACES:
  85  * ===========
  86  *
  87  * Connection Lookup:
  88  * ------------------
  89  *
  90  * conn_t *ipcl_classify_v4(mp, protocol, hdr_len, ira, ip_stack)
  91  * conn_t *ipcl_classify_v6(mp, protocol, hdr_len, ira, ip_stack)
  92  *
  93  * Finds connection for an incoming IPv4 or IPv6 packet. Returns NULL if
  94  * it can't find any associated connection. If the connection is found, its
  95  * reference counter is incremented.
  96  *
  97  *      mp:     mblock, containing packet header. The full header should fit
  98  *              into a single mblock. It should also contain at least full IP
  99  *              and TCP or UDP header.
 100  *
 101  *      protocol: Either IPPROTO_TCP or IPPROTO_UDP.
 102  *
 103  *      hdr_len: The size of IP header. It is used to find TCP or UDP header in
 104  *               the packet.
 105  *
 106  *      ira->ira_zoneid: The zone in which the returned connection must be; the
 107  *              zoneid corresponding to the ire_zoneid on the IRE located for
 108  *              the packet's destination address.
 109  *
 110  *      ira->ira_flags: Contains the IRAF_TX_MAC_EXEMPTABLE and
 111  *              IRAF_TX_SHARED_ADDR flags
 112  *
 113  *      For TCP connections, the lookup order is as follows:
 114  *              5-tuple {src, dst, protocol, local port, remote port}
 115  *                      lookup in ipcl_conn_fanout table.
 116  *              3-tuple {dst, remote port, protocol} lookup in
 117  *                      ipcl_bind_fanout table.
 118  *
 119  *      For UDP connections, a 5-tuple {src, dst, protocol, local port,
 120  *      remote port} lookup is done on ipcl_udp_fanout. Note that,
 121  *      these interfaces do not handle cases where a packets belongs
 122  *      to multiple UDP clients, which is handled in IP itself.
 123  *
 124  * If the destination IRE is ALL_ZONES (indicated by zoneid), then we must
 125  * determine which actual zone gets the segment.  This is used only in a
 126  * labeled environment.  The matching rules are:
 127  *
 128  *      - If it's not a multilevel port, then the label on the packet selects
 129  *        the zone.  Unlabeled packets are delivered to the global zone.
 130  *
 131  *      - If it's a multilevel port, then only the zone registered to receive
 132  *        packets on that port matches.
 133  *
 134  * Also, in a labeled environment, packet labels need to be checked.  For fully
 135  * bound TCP connections, we can assume that the packet label was checked
 136  * during connection establishment, and doesn't need to be checked on each
 137  * packet.  For others, though, we need to check for strict equality or, for
 138  * multilevel ports, membership in the range or set.  This part currently does
 139  * a tnrh lookup on each packet, but could be optimized to use cached results
 140  * if that were necessary.  (SCTP doesn't come through here, but if it did,
 141  * we would apply the same rules as TCP.)
 142  *
 143  * An implication of the above is that fully-bound TCP sockets must always use
 144  * distinct 4-tuples; they can't be discriminated by label alone.
 145  *
 146  * Note that we cannot trust labels on packets sent to fully-bound UDP sockets,
 147  * as there's no connection set-up handshake and no shared state.
 148  *
 149  * Labels on looped-back packets within a single zone do not need to be
 150  * checked, as all processes in the same zone have the same label.
 151  *
 152  * Finally, for unlabeled packets received by a labeled system, special rules
 153  * apply.  We consider only the MLP if there is one.  Otherwise, we prefer a
 154  * socket in the zone whose label matches the default label of the sender, if
 155  * any.  In any event, the receiving socket must have SO_MAC_EXEMPT set and the
 156  * receiver's label must dominate the sender's default label.
 157  *
 158  * conn_t *ipcl_tcp_lookup_reversed_ipv4(ipha_t *, tcpha_t *, int, ip_stack);
 159  * conn_t *ipcl_tcp_lookup_reversed_ipv6(ip6_t *, tcpha_t *, int, uint_t,
 160  *                                       ip_stack);
 161  *
 162  *      Lookup routine to find a exact match for {src, dst, local port,
 163  *      remote port) for TCP connections in ipcl_conn_fanout. The address and
 164  *      ports are read from the IP and TCP header respectively.
 165  *
 166  * conn_t       *ipcl_lookup_listener_v4(lport, laddr, protocol,
 167  *                                       zoneid, ip_stack);
 168  * conn_t       *ipcl_lookup_listener_v6(lport, laddr, protocol, ifindex,
 169  *                                       zoneid, ip_stack);
 170  *
 171  *      Lookup routine to find a listener with the tuple {lport, laddr,
 172  *      protocol} in the ipcl_bind_fanout table. For IPv6, an additional
 173  *      parameter interface index is also compared.
 174  *
 175  * void ipcl_walk(func, arg, ip_stack)
 176  *
 177  *      Apply 'func' to every connection available. The 'func' is called as
 178  *      (*func)(connp, arg). The walk is non-atomic so connections may be
 179  *      created and destroyed during the walk. The CONN_CONDEMNED and
 180  *      CONN_INCIPIENT flags ensure that connections which are newly created
 181  *      or being destroyed are not selected by the walker.
 182  *
 183  * Table Updates
 184  * -------------
 185  *
 186  * int ipcl_conn_insert(connp);
 187  * int ipcl_conn_insert_v4(connp);
 188  * int ipcl_conn_insert_v6(connp);
 189  *
 190  *      Insert 'connp' in the ipcl_conn_fanout.
 191  *      Arguments :
 192  *              connp           conn_t to be inserted
 193  *
 194  *      Return value :
 195  *              0               if connp was inserted
 196  *              EADDRINUSE      if the connection with the same tuple
 197  *                              already exists.
 198  *
 199  * int ipcl_bind_insert(connp);
 200  * int ipcl_bind_insert_v4(connp);
 201  * int ipcl_bind_insert_v6(connp);
 202  *
 203  *      Insert 'connp' in ipcl_bind_fanout.
 204  *      Arguments :
 205  *              connp           conn_t to be inserted
 206  *
 207  *
 208  * void ipcl_hash_remove(connp);
 209  *
 210  *      Removes the 'connp' from the connection fanout table.
 211  *
 212  * Connection Creation/Destruction
 213  * -------------------------------
 214  *
 215  * conn_t *ipcl_conn_create(type, sleep, netstack_t *)
 216  *
 217  *      Creates a new conn based on the type flag, inserts it into
 218  *      globalhash table.
 219  *
 220  *      type:   This flag determines the type of conn_t which needs to be
 221  *              created i.e., which kmem_cache it comes from.
 222  *              IPCL_TCPCONN    indicates a TCP connection
 223  *              IPCL_SCTPCONN   indicates a SCTP connection
 224  *              IPCL_UDPCONN    indicates a UDP conn_t.
 225  *              IPCL_RAWIPCONN  indicates a RAWIP/ICMP conn_t.
 226  *              IPCL_RTSCONN    indicates a RTS conn_t.
 227  *              IPCL_IPCCONN    indicates all other connections.
 228  *
 229  * void ipcl_conn_destroy(connp)
 230  *
 231  *      Destroys the connection state, removes it from the global
 232  *      connection hash table and frees its memory.
 233  */
 234 
 235 #include <sys/types.h>
 236 #include <sys/stream.h>
 237 #include <sys/stropts.h>
 238 #include <sys/sysmacros.h>
 239 #include <sys/strsubr.h>
 240 #include <sys/strsun.h>
 241 #define _SUN_TPI_VERSION 2
 242 #include <sys/ddi.h>
 243 #include <sys/cmn_err.h>
 244 #include <sys/debug.h>
 245 
 246 #include <sys/systm.h>
 247 #include <sys/param.h>
 248 #include <sys/kmem.h>
 249 #include <sys/isa_defs.h>
 250 #include <inet/common.h>
 251 #include <netinet/ip6.h>
 252 #include <netinet/icmp6.h>
 253 
 254 #include <inet/ip.h>
 255 #include <inet/ip_if.h>
 256 #include <inet/ip_ire.h>
 257 #include <inet/ip6.h>
 258 #include <inet/ip_ndp.h>
 259 #include <inet/ip_impl.h>
 260 #include <inet/udp_impl.h>
 261 #include <inet/sctp_ip.h>
 262 #include <inet/sctp/sctp_impl.h>
 263 #include <inet/rawip_impl.h>
 264 #include <inet/rts_impl.h>
 265 #include <inet/iptun/iptun_impl.h>
 266 
 267 #include <sys/cpuvar.h>
 268 
 269 #include <inet/ipclassifier.h>
 270 #include <inet/tcp.h>
 271 #include <inet/ipsec_impl.h>
 272 
 273 #include <sys/tsol/tnet.h>
 274 #include <sys/sockio.h>
 275 
 276 /* Old value for compatibility. Setable in /etc/system */
 277 uint_t tcp_conn_hash_size = 0;
 278 
 279 /* New value. Zero means choose automatically.  Setable in /etc/system */
 280 uint_t ipcl_conn_hash_size = 0;
 281 uint_t ipcl_conn_hash_memfactor = 8192;
 282 uint_t ipcl_conn_hash_maxsize = 82500;
 283 
 284 /* bind/udp fanout table size */
 285 uint_t ipcl_bind_fanout_size = 512;
 286 uint_t ipcl_udp_fanout_size = 16384;
 287 
 288 /* Raw socket fanout size.  Must be a power of 2. */
 289 uint_t ipcl_raw_fanout_size = 256;
 290 
 291 /*
 292  * The IPCL_IPTUN_HASH() function works best with a prime table size.  We
 293  * expect that most large deployments would have hundreds of tunnels, and
 294  * thousands in the extreme case.
 295  */
 296 uint_t ipcl_iptun_fanout_size = 6143;
 297 
 298 /*
 299  * Power of 2^N Primes useful for hashing for N of 0-28,
 300  * these primes are the nearest prime <= 2^N - 2^(N-2).
 301  */
 302 
 303 #define P2Ps() {0, 0, 0, 5, 11, 23, 47, 89, 191, 383, 761, 1531, 3067,  \
 304                 6143, 12281, 24571, 49139, 98299, 196597, 393209,       \
 305                 786431, 1572853, 3145721, 6291449, 12582893, 25165813,  \
 306                 50331599, 100663291, 201326557, 0}
 307 
 308 /*
 309  * wrapper structure to ensure that conn and what follows it (tcp_t, etc)
 310  * are aligned on cache lines.
 311  */
 312 typedef union itc_s {
 313         conn_t  itc_conn;
 314         char    itcu_filler[CACHE_ALIGN(conn_s)];
 315 } itc_t;
 316 
 317 struct kmem_cache  *tcp_conn_cache;
 318 struct kmem_cache  *ip_conn_cache;
 319 extern struct kmem_cache  *sctp_conn_cache;
 320 struct kmem_cache  *udp_conn_cache;
 321 struct kmem_cache  *rawip_conn_cache;
 322 struct kmem_cache  *rts_conn_cache;
 323 
 324 extern void     tcp_timermp_free(tcp_t *);
 325 extern mblk_t   *tcp_timermp_alloc(int);
 326 
 327 static int      ip_conn_constructor(void *, void *, int);
 328 static void     ip_conn_destructor(void *, void *);
 329 
 330 static int      tcp_conn_constructor(void *, void *, int);
 331 static void     tcp_conn_destructor(void *, void *);
 332 
 333 static int      udp_conn_constructor(void *, void *, int);
 334 static void     udp_conn_destructor(void *, void *);
 335 
 336 static int      rawip_conn_constructor(void *, void *, int);
 337 static void     rawip_conn_destructor(void *, void *);
 338 
 339 static int      rts_conn_constructor(void *, void *, int);
 340 static void     rts_conn_destructor(void *, void *);
 341 
 342 /*
 343  * Global (for all stack instances) init routine
 344  */
 345 void
 346 ipcl_g_init(void)
 347 {
 348         ip_conn_cache = kmem_cache_create("ip_conn_cache",
 349             sizeof (conn_t), CACHE_ALIGN_SIZE,
 350             ip_conn_constructor, ip_conn_destructor,
 351             NULL, NULL, NULL, 0);
 352 
 353         tcp_conn_cache = kmem_cache_create("tcp_conn_cache",
 354             sizeof (itc_t) + sizeof (tcp_t), CACHE_ALIGN_SIZE,
 355             tcp_conn_constructor, tcp_conn_destructor,
 356             tcp_conn_reclaim, NULL, NULL, 0);
 357 
 358         udp_conn_cache = kmem_cache_create("udp_conn_cache",
 359             sizeof (itc_t) + sizeof (udp_t), CACHE_ALIGN_SIZE,
 360             udp_conn_constructor, udp_conn_destructor,
 361             NULL, NULL, NULL, 0);
 362 
 363         rawip_conn_cache = kmem_cache_create("rawip_conn_cache",
 364             sizeof (itc_t) + sizeof (icmp_t), CACHE_ALIGN_SIZE,
 365             rawip_conn_constructor, rawip_conn_destructor,
 366             NULL, NULL, NULL, 0);
 367 
 368         rts_conn_cache = kmem_cache_create("rts_conn_cache",
 369             sizeof (itc_t) + sizeof (rts_t), CACHE_ALIGN_SIZE,
 370             rts_conn_constructor, rts_conn_destructor,
 371             NULL, NULL, NULL, 0);
 372 }
 373 
 374 /*
 375  * ipclassifier intialization routine, sets up hash tables.
 376  */
 377 void
 378 ipcl_init(ip_stack_t *ipst)
 379 {
 380         int i;
 381         int sizes[] = P2Ps();
 382 
 383         /*
 384          * Calculate size of conn fanout table from /etc/system settings
 385          */
 386         if (ipcl_conn_hash_size != 0) {
 387                 ipst->ips_ipcl_conn_fanout_size = ipcl_conn_hash_size;
 388         } else if (tcp_conn_hash_size != 0) {
 389                 ipst->ips_ipcl_conn_fanout_size = tcp_conn_hash_size;
 390         } else {
 391                 extern pgcnt_t freemem;
 392 
 393                 ipst->ips_ipcl_conn_fanout_size =
 394                     (freemem * PAGESIZE) / ipcl_conn_hash_memfactor;
 395 
 396                 if (ipst->ips_ipcl_conn_fanout_size > ipcl_conn_hash_maxsize) {
 397                         ipst->ips_ipcl_conn_fanout_size =
 398                             ipcl_conn_hash_maxsize;
 399                 }
 400         }
 401 
 402         for (i = 9; i < sizeof (sizes) / sizeof (*sizes) - 1; i++) {
 403                 if (sizes[i] >= ipst->ips_ipcl_conn_fanout_size) {
 404                         break;
 405                 }
 406         }
 407         if ((ipst->ips_ipcl_conn_fanout_size = sizes[i]) == 0) {
 408                 /* Out of range, use the 2^16 value */
 409                 ipst->ips_ipcl_conn_fanout_size = sizes[16];
 410         }
 411 
 412         /* Take values from /etc/system */
 413         ipst->ips_ipcl_bind_fanout_size = ipcl_bind_fanout_size;
 414         ipst->ips_ipcl_udp_fanout_size = ipcl_udp_fanout_size;
 415         ipst->ips_ipcl_raw_fanout_size = ipcl_raw_fanout_size;
 416         ipst->ips_ipcl_iptun_fanout_size = ipcl_iptun_fanout_size;
 417 
 418         ASSERT(ipst->ips_ipcl_conn_fanout == NULL);
 419 
 420         ipst->ips_ipcl_conn_fanout = kmem_zalloc(
 421             ipst->ips_ipcl_conn_fanout_size * sizeof (connf_t), KM_SLEEP);
 422 
 423         for (i = 0; i < ipst->ips_ipcl_conn_fanout_size; i++) {
 424                 mutex_init(&ipst->ips_ipcl_conn_fanout[i].connf_lock, NULL,
 425                     MUTEX_DEFAULT, NULL);
 426         }
 427 
 428         ipst->ips_ipcl_bind_fanout = kmem_zalloc(
 429             ipst->ips_ipcl_bind_fanout_size * sizeof (connf_t), KM_SLEEP);
 430 
 431         for (i = 0; i < ipst->ips_ipcl_bind_fanout_size; i++) {
 432                 mutex_init(&ipst->ips_ipcl_bind_fanout[i].connf_lock, NULL,
 433                     MUTEX_DEFAULT, NULL);
 434         }
 435 
 436         ipst->ips_ipcl_proto_fanout_v4 = kmem_zalloc(IPPROTO_MAX *
 437             sizeof (connf_t), KM_SLEEP);
 438         for (i = 0; i < IPPROTO_MAX; i++) {
 439                 mutex_init(&ipst->ips_ipcl_proto_fanout_v4[i].connf_lock, NULL,
 440                     MUTEX_DEFAULT, NULL);
 441         }
 442 
 443         ipst->ips_ipcl_proto_fanout_v6 = kmem_zalloc(IPPROTO_MAX *
 444             sizeof (connf_t), KM_SLEEP);
 445         for (i = 0; i < IPPROTO_MAX; i++) {
 446                 mutex_init(&ipst->ips_ipcl_proto_fanout_v6[i].connf_lock, NULL,
 447                     MUTEX_DEFAULT, NULL);
 448         }
 449 
 450         ipst->ips_rts_clients = kmem_zalloc(sizeof (connf_t), KM_SLEEP);
 451         mutex_init(&ipst->ips_rts_clients->connf_lock,
 452             NULL, MUTEX_DEFAULT, NULL);
 453 
 454         ipst->ips_ipcl_udp_fanout = kmem_zalloc(
 455             ipst->ips_ipcl_udp_fanout_size * sizeof (connf_t), KM_SLEEP);
 456         for (i = 0; i < ipst->ips_ipcl_udp_fanout_size; i++) {
 457                 mutex_init(&ipst->ips_ipcl_udp_fanout[i].connf_lock, NULL,
 458                     MUTEX_DEFAULT, NULL);
 459         }
 460 
 461         ipst->ips_ipcl_iptun_fanout = kmem_zalloc(
 462             ipst->ips_ipcl_iptun_fanout_size * sizeof (connf_t), KM_SLEEP);
 463         for (i = 0; i < ipst->ips_ipcl_iptun_fanout_size; i++) {
 464                 mutex_init(&ipst->ips_ipcl_iptun_fanout[i].connf_lock, NULL,
 465                     MUTEX_DEFAULT, NULL);
 466         }
 467 
 468         ipst->ips_ipcl_raw_fanout = kmem_zalloc(
 469             ipst->ips_ipcl_raw_fanout_size * sizeof (connf_t), KM_SLEEP);
 470         for (i = 0; i < ipst->ips_ipcl_raw_fanout_size; i++) {
 471                 mutex_init(&ipst->ips_ipcl_raw_fanout[i].connf_lock, NULL,
 472                     MUTEX_DEFAULT, NULL);
 473         }
 474 
 475         ipst->ips_ipcl_globalhash_fanout = kmem_zalloc(
 476             sizeof (connf_t) * CONN_G_HASH_SIZE, KM_SLEEP);
 477         for (i = 0; i < CONN_G_HASH_SIZE; i++) {
 478                 mutex_init(&ipst->ips_ipcl_globalhash_fanout[i].connf_lock,
 479                     NULL, MUTEX_DEFAULT, NULL);
 480         }
 481 }
 482 
 483 void
 484 ipcl_g_destroy(void)
 485 {
 486         kmem_cache_destroy(ip_conn_cache);
 487         kmem_cache_destroy(tcp_conn_cache);
 488         kmem_cache_destroy(udp_conn_cache);
 489         kmem_cache_destroy(rawip_conn_cache);
 490         kmem_cache_destroy(rts_conn_cache);
 491 }
 492 
 493 /*
 494  * All user-level and kernel use of the stack must be gone
 495  * by now.
 496  */
 497 void
 498 ipcl_destroy(ip_stack_t *ipst)
 499 {
 500         int i;
 501 
 502         for (i = 0; i < ipst->ips_ipcl_conn_fanout_size; i++) {
 503                 ASSERT(ipst->ips_ipcl_conn_fanout[i].connf_head == NULL);
 504                 mutex_destroy(&ipst->ips_ipcl_conn_fanout[i].connf_lock);
 505         }
 506         kmem_free(ipst->ips_ipcl_conn_fanout, ipst->ips_ipcl_conn_fanout_size *
 507             sizeof (connf_t));
 508         ipst->ips_ipcl_conn_fanout = NULL;
 509 
 510         for (i = 0; i < ipst->ips_ipcl_bind_fanout_size; i++) {
 511                 ASSERT(ipst->ips_ipcl_bind_fanout[i].connf_head == NULL);
 512                 mutex_destroy(&ipst->ips_ipcl_bind_fanout[i].connf_lock);
 513         }
 514         kmem_free(ipst->ips_ipcl_bind_fanout, ipst->ips_ipcl_bind_fanout_size *
 515             sizeof (connf_t));
 516         ipst->ips_ipcl_bind_fanout = NULL;
 517 
 518         for (i = 0; i < IPPROTO_MAX; i++) {
 519                 ASSERT(ipst->ips_ipcl_proto_fanout_v4[i].connf_head == NULL);
 520                 mutex_destroy(&ipst->ips_ipcl_proto_fanout_v4[i].connf_lock);
 521         }
 522         kmem_free(ipst->ips_ipcl_proto_fanout_v4,
 523             IPPROTO_MAX * sizeof (connf_t));
 524         ipst->ips_ipcl_proto_fanout_v4 = NULL;
 525 
 526         for (i = 0; i < IPPROTO_MAX; i++) {
 527                 ASSERT(ipst->ips_ipcl_proto_fanout_v6[i].connf_head == NULL);
 528                 mutex_destroy(&ipst->ips_ipcl_proto_fanout_v6[i].connf_lock);
 529         }
 530         kmem_free(ipst->ips_ipcl_proto_fanout_v6,
 531             IPPROTO_MAX * sizeof (connf_t));
 532         ipst->ips_ipcl_proto_fanout_v6 = NULL;
 533 
 534         for (i = 0; i < ipst->ips_ipcl_udp_fanout_size; i++) {
 535                 ASSERT(ipst->ips_ipcl_udp_fanout[i].connf_head == NULL);
 536                 mutex_destroy(&ipst->ips_ipcl_udp_fanout[i].connf_lock);
 537         }
 538         kmem_free(ipst->ips_ipcl_udp_fanout, ipst->ips_ipcl_udp_fanout_size *
 539             sizeof (connf_t));
 540         ipst->ips_ipcl_udp_fanout = NULL;
 541 
 542         for (i = 0; i < ipst->ips_ipcl_iptun_fanout_size; i++) {
 543                 ASSERT(ipst->ips_ipcl_iptun_fanout[i].connf_head == NULL);
 544                 mutex_destroy(&ipst->ips_ipcl_iptun_fanout[i].connf_lock);
 545         }
 546         kmem_free(ipst->ips_ipcl_iptun_fanout,
 547             ipst->ips_ipcl_iptun_fanout_size * sizeof (connf_t));
 548         ipst->ips_ipcl_iptun_fanout = NULL;
 549 
 550         for (i = 0; i < ipst->ips_ipcl_raw_fanout_size; i++) {
 551                 ASSERT(ipst->ips_ipcl_raw_fanout[i].connf_head == NULL);
 552                 mutex_destroy(&ipst->ips_ipcl_raw_fanout[i].connf_lock);
 553         }
 554         kmem_free(ipst->ips_ipcl_raw_fanout, ipst->ips_ipcl_raw_fanout_size *
 555             sizeof (connf_t));
 556         ipst->ips_ipcl_raw_fanout = NULL;
 557 
 558         for (i = 0; i < CONN_G_HASH_SIZE; i++) {
 559                 ASSERT(ipst->ips_ipcl_globalhash_fanout[i].connf_head == NULL);
 560                 mutex_destroy(&ipst->ips_ipcl_globalhash_fanout[i].connf_lock);
 561         }
 562         kmem_free(ipst->ips_ipcl_globalhash_fanout,
 563             sizeof (connf_t) * CONN_G_HASH_SIZE);
 564         ipst->ips_ipcl_globalhash_fanout = NULL;
 565 
 566         ASSERT(ipst->ips_rts_clients->connf_head == NULL);
 567         mutex_destroy(&ipst->ips_rts_clients->connf_lock);
 568         kmem_free(ipst->ips_rts_clients, sizeof (connf_t));
 569         ipst->ips_rts_clients = NULL;
 570 }
 571 
 572 /*
 573  * conn creation routine. initialize the conn, sets the reference
 574  * and inserts it in the global hash table.
 575  */
 576 conn_t *
 577 ipcl_conn_create(uint32_t type, int sleep, netstack_t *ns)
 578 {
 579         conn_t  *connp;
 580         struct kmem_cache *conn_cache;
 581 
 582         switch (type) {
 583         case IPCL_SCTPCONN:
 584                 if ((connp = kmem_cache_alloc(sctp_conn_cache, sleep)) == NULL)
 585                         return (NULL);
 586                 sctp_conn_init(connp);
 587                 netstack_hold(ns);
 588                 connp->conn_netstack = ns;
 589                 connp->conn_ixa->ixa_ipst = ns->netstack_ip;
 590                 connp->conn_ixa->ixa_conn_id = (long)connp;
 591                 ipcl_globalhash_insert(connp);
 592                 return (connp);
 593 
 594         case IPCL_TCPCONN:
 595                 conn_cache = tcp_conn_cache;
 596                 break;
 597 
 598         case IPCL_UDPCONN:
 599                 conn_cache = udp_conn_cache;
 600                 break;
 601 
 602         case IPCL_RAWIPCONN:
 603                 conn_cache = rawip_conn_cache;
 604                 break;
 605 
 606         case IPCL_RTSCONN:
 607                 conn_cache = rts_conn_cache;
 608                 break;
 609 
 610         case IPCL_IPCCONN:
 611                 conn_cache = ip_conn_cache;
 612                 break;
 613 
 614         default:
 615                 conn_cache = NULL;
 616                 connp = NULL;
 617                 ASSERT(0);
 618         }
 619 
 620         if ((connp = kmem_cache_alloc(conn_cache, sleep)) == NULL)
 621                 return (NULL);
 622 
 623         connp->conn_ref = 1;
 624         netstack_hold(ns);
 625         connp->conn_netstack = ns;
 626         connp->conn_ixa->ixa_ipst = ns->netstack_ip;
 627         connp->conn_ixa->ixa_conn_id = (long)connp;
 628         ipcl_globalhash_insert(connp);
 629         return (connp);
 630 }
 631 
 632 void
 633 ipcl_conn_destroy(conn_t *connp)
 634 {
 635         mblk_t  *mp;
 636         netstack_t      *ns = connp->conn_netstack;
 637 
 638         ASSERT(!MUTEX_HELD(&connp->conn_lock));
 639         ASSERT(connp->conn_ref == 0);
 640         ASSERT(connp->conn_ioctlref == 0);
 641 
 642         DTRACE_PROBE1(conn__destroy, conn_t *, connp);
 643 
 644         if (connp->conn_cred != NULL) {
 645                 crfree(connp->conn_cred);
 646                 connp->conn_cred = NULL;
 647                 /* ixa_cred done in ipcl_conn_cleanup below */
 648         }
 649 
 650         if (connp->conn_ht_iphc != NULL) {
 651                 kmem_free(connp->conn_ht_iphc, connp->conn_ht_iphc_allocated);
 652                 connp->conn_ht_iphc = NULL;
 653                 connp->conn_ht_iphc_allocated = 0;
 654                 connp->conn_ht_iphc_len = 0;
 655                 connp->conn_ht_ulp = NULL;
 656                 connp->conn_ht_ulp_len = 0;
 657         }
 658         ip_pkt_free(&connp->conn_xmit_ipp);
 659 
 660         ipcl_globalhash_remove(connp);
 661 
 662         if (connp->conn_latch != NULL) {
 663                 IPLATCH_REFRELE(connp->conn_latch);
 664                 connp->conn_latch = NULL;
 665         }
 666         if (connp->conn_latch_in_policy != NULL) {
 667                 IPPOL_REFRELE(connp->conn_latch_in_policy);
 668                 connp->conn_latch_in_policy = NULL;
 669         }
 670         if (connp->conn_latch_in_action != NULL) {
 671                 IPACT_REFRELE(connp->conn_latch_in_action);
 672                 connp->conn_latch_in_action = NULL;
 673         }
 674         if (connp->conn_policy != NULL) {
 675                 IPPH_REFRELE(connp->conn_policy, ns);
 676                 connp->conn_policy = NULL;
 677         }
 678 
 679         if (connp->conn_ipsec_opt_mp != NULL) {
 680                 freemsg(connp->conn_ipsec_opt_mp);
 681                 connp->conn_ipsec_opt_mp = NULL;
 682         }
 683 
 684         if (connp->conn_flags & IPCL_TCPCONN) {
 685                 tcp_t *tcp = connp->conn_tcp;
 686 
 687                 tcp_free(tcp);
 688                 mp = tcp->tcp_timercache;
 689 
 690                 tcp->tcp_tcps = NULL;
 691 
 692                 /*
 693                  * tcp_rsrv_mp can be NULL if tcp_get_conn() fails to allocate
 694                  * the mblk.
 695                  */
 696                 if (tcp->tcp_rsrv_mp != NULL) {
 697                         freeb(tcp->tcp_rsrv_mp);
 698                         tcp->tcp_rsrv_mp = NULL;
 699                         mutex_destroy(&tcp->tcp_rsrv_mp_lock);
 700                 }
 701 
 702                 ipcl_conn_cleanup(connp);
 703                 connp->conn_flags = IPCL_TCPCONN;
 704                 if (ns != NULL) {
 705                         ASSERT(tcp->tcp_tcps == NULL);
 706                         connp->conn_netstack = NULL;
 707                         connp->conn_ixa->ixa_ipst = NULL;
 708                         netstack_rele(ns);
 709                 }
 710 
 711                 bzero(tcp, sizeof (tcp_t));
 712 
 713                 tcp->tcp_timercache = mp;
 714                 tcp->tcp_connp = connp;
 715                 kmem_cache_free(tcp_conn_cache, connp);
 716                 return;
 717         }
 718 
 719         if (connp->conn_flags & IPCL_SCTPCONN) {
 720                 ASSERT(ns != NULL);
 721                 sctp_free(connp);
 722                 return;
 723         }
 724 
 725         ipcl_conn_cleanup(connp);
 726         if (ns != NULL) {
 727                 connp->conn_netstack = NULL;
 728                 connp->conn_ixa->ixa_ipst = NULL;
 729                 netstack_rele(ns);
 730         }
 731 
 732         /* leave conn_priv aka conn_udp, conn_icmp, etc in place. */
 733         if (connp->conn_flags & IPCL_UDPCONN) {
 734                 connp->conn_flags = IPCL_UDPCONN;
 735                 kmem_cache_free(udp_conn_cache, connp);
 736         } else if (connp->conn_flags & IPCL_RAWIPCONN) {
 737                 connp->conn_flags = IPCL_RAWIPCONN;
 738                 connp->conn_proto = IPPROTO_ICMP;
 739                 connp->conn_ixa->ixa_protocol = connp->conn_proto;
 740                 kmem_cache_free(rawip_conn_cache, connp);
 741         } else if (connp->conn_flags & IPCL_RTSCONN) {
 742                 connp->conn_flags = IPCL_RTSCONN;
 743                 kmem_cache_free(rts_conn_cache, connp);
 744         } else {
 745                 connp->conn_flags = IPCL_IPCCONN;
 746                 ASSERT(connp->conn_flags & IPCL_IPCCONN);
 747                 ASSERT(connp->conn_priv == NULL);
 748                 kmem_cache_free(ip_conn_cache, connp);
 749         }
 750 }
 751 
 752 /*
 753  * Running in cluster mode - deregister listener information
 754  */
 755 static void
 756 ipcl_conn_unlisten(conn_t *connp)
 757 {
 758         ASSERT((connp->conn_flags & IPCL_CL_LISTENER) != 0);
 759         ASSERT(connp->conn_lport != 0);
 760 
 761         if (cl_inet_unlisten != NULL) {
 762                 sa_family_t     addr_family;
 763                 uint8_t         *laddrp;
 764 
 765                 if (connp->conn_ipversion == IPV6_VERSION) {
 766                         addr_family = AF_INET6;
 767                         laddrp = (uint8_t *)&connp->conn_bound_addr_v6;
 768                 } else {
 769                         addr_family = AF_INET;
 770                         laddrp = (uint8_t *)&connp->conn_bound_addr_v4;
 771                 }
 772                 (*cl_inet_unlisten)(connp->conn_netstack->netstack_stackid,
 773                     IPPROTO_TCP, addr_family, laddrp, connp->conn_lport, NULL);
 774         }
 775         connp->conn_flags &= ~IPCL_CL_LISTENER;
 776 }
 777 
 778 /*
 779  * We set the IPCL_REMOVED flag (instead of clearing the flag indicating
 780  * which table the conn belonged to). So for debugging we can see which hash
 781  * table this connection was in.
 782  */
 783 #define IPCL_HASH_REMOVE(connp) {                                       \
 784         connf_t *connfp = (connp)->conn_fanout;                              \
 785         ASSERT(!MUTEX_HELD(&((connp)->conn_lock)));                      \
 786         if (connfp != NULL) {                                           \
 787                 mutex_enter(&connfp->connf_lock);                        \
 788                 if ((connp)->conn_next != NULL)                              \
 789                         (connp)->conn_next->conn_prev =                   \
 790                             (connp)->conn_prev;                              \
 791                 if ((connp)->conn_prev != NULL)                              \
 792                         (connp)->conn_prev->conn_next =                   \
 793                             (connp)->conn_next;                              \
 794                 else                                                    \
 795                         connfp->connf_head = (connp)->conn_next;  \
 796                 (connp)->conn_fanout = NULL;                         \
 797                 (connp)->conn_next = NULL;                           \
 798                 (connp)->conn_prev = NULL;                           \
 799                 (connp)->conn_flags |= IPCL_REMOVED;                 \
 800                 if (((connp)->conn_flags & IPCL_CL_LISTENER) != 0)       \
 801                         ipcl_conn_unlisten((connp));                    \
 802                 CONN_DEC_REF((connp));                                  \
 803                 mutex_exit(&connfp->connf_lock);                 \
 804         }                                                               \
 805 }
 806 
 807 void
 808 ipcl_hash_remove(conn_t *connp)
 809 {
 810         uint8_t         protocol = connp->conn_proto;
 811 
 812         IPCL_HASH_REMOVE(connp);
 813         if (protocol == IPPROTO_RSVP)
 814                 ill_set_inputfn_all(connp->conn_netstack->netstack_ip);
 815 }
 816 
 817 /*
 818  * The whole purpose of this function is allow removal of
 819  * a conn_t from the connected hash for timewait reclaim.
 820  * This is essentially a TW reclaim fastpath where timewait
 821  * collector checks under fanout lock (so no one else can
 822  * get access to the conn_t) that refcnt is 2 i.e. one for
 823  * TCP and one for the classifier hash list. If ref count
 824  * is indeed 2, we can just remove the conn under lock and
 825  * avoid cleaning up the conn under squeue. This gives us
 826  * improved performance.
 827  */
 828 void
 829 ipcl_hash_remove_locked(conn_t *connp, connf_t  *connfp)
 830 {
 831         ASSERT(MUTEX_HELD(&connfp->connf_lock));
 832         ASSERT(MUTEX_HELD(&connp->conn_lock));
 833         ASSERT((connp->conn_flags & IPCL_CL_LISTENER) == 0);
 834 
 835         if ((connp)->conn_next != NULL) {
 836                 (connp)->conn_next->conn_prev = (connp)->conn_prev;
 837         }
 838         if ((connp)->conn_prev != NULL) {
 839                 (connp)->conn_prev->conn_next = (connp)->conn_next;
 840         } else {
 841                 connfp->connf_head = (connp)->conn_next;
 842         }
 843         (connp)->conn_fanout = NULL;
 844         (connp)->conn_next = NULL;
 845         (connp)->conn_prev = NULL;
 846         (connp)->conn_flags |= IPCL_REMOVED;
 847         ASSERT((connp)->conn_ref == 2);
 848         (connp)->conn_ref--;
 849 }
 850 
 851 #define IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp) {              \
 852         ASSERT((connp)->conn_fanout == NULL);                                \
 853         ASSERT((connp)->conn_next == NULL);                          \
 854         ASSERT((connp)->conn_prev == NULL);                          \
 855         if ((connfp)->connf_head != NULL) {                          \
 856                 (connfp)->connf_head->conn_prev = (connp);                \
 857                 (connp)->conn_next = (connfp)->connf_head;                \
 858         }                                                               \
 859         (connp)->conn_fanout = (connfp);                             \
 860         (connfp)->connf_head = (connp);                                      \
 861         (connp)->conn_flags = ((connp)->conn_flags & ~IPCL_REMOVED) | \
 862             IPCL_CONNECTED;                                             \
 863         CONN_INC_REF(connp);                                            \
 864 }
 865 
 866 #define IPCL_HASH_INSERT_CONNECTED(connfp, connp) {                     \
 867         IPCL_HASH_REMOVE((connp));                                      \
 868         mutex_enter(&(connfp)->connf_lock);                              \
 869         IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp);               \
 870         mutex_exit(&(connfp)->connf_lock);                               \
 871 }
 872 
 873 #define IPCL_HASH_INSERT_BOUND(connfp, connp) {                         \
 874         conn_t *pconnp = NULL, *nconnp;                                 \
 875         IPCL_HASH_REMOVE((connp));                                      \
 876         mutex_enter(&(connfp)->connf_lock);                              \
 877         nconnp = (connfp)->connf_head;                                       \
 878         while (nconnp != NULL &&                                        \
 879             !_IPCL_V4_MATCH_ANY(nconnp->conn_laddr_v6)) {            \
 880                 pconnp = nconnp;                                        \
 881                 nconnp = nconnp->conn_next;                          \
 882         }                                                               \
 883         if (pconnp != NULL) {                                           \
 884                 pconnp->conn_next = (connp);                         \
 885                 (connp)->conn_prev = pconnp;                         \
 886         } else {                                                        \
 887                 (connfp)->connf_head = (connp);                              \
 888         }                                                               \
 889         if (nconnp != NULL) {                                           \
 890                 (connp)->conn_next = nconnp;                         \
 891                 nconnp->conn_prev = (connp);                         \
 892         }                                                               \
 893         (connp)->conn_fanout = (connfp);                             \
 894         (connp)->conn_flags = ((connp)->conn_flags & ~IPCL_REMOVED) | \
 895             IPCL_BOUND;                                                 \
 896         CONN_INC_REF(connp);                                            \
 897         mutex_exit(&(connfp)->connf_lock);                               \
 898 }
 899 
 900 #define IPCL_HASH_INSERT_WILDCARD(connfp, connp) {                      \
 901         conn_t **list, *prev, *next;                                    \
 902         boolean_t isv4mapped =                                          \
 903             IN6_IS_ADDR_V4MAPPED(&(connp)->conn_laddr_v6);               \
 904         IPCL_HASH_REMOVE((connp));                                      \
 905         mutex_enter(&(connfp)->connf_lock);                              \
 906         list = &(connfp)->connf_head;                                    \
 907         prev = NULL;                                                    \
 908         while ((next = *list) != NULL) {                                \
 909                 if (isv4mapped &&                                       \
 910                     IN6_IS_ADDR_UNSPECIFIED(&next->conn_laddr_v6) &&     \
 911                     connp->conn_zoneid == next->conn_zoneid) {            \
 912                         (connp)->conn_next = next;                   \
 913                         if (prev != NULL)                               \
 914                                 prev = next->conn_prev;                      \
 915                         next->conn_prev = (connp);                   \
 916                         break;                                          \
 917                 }                                                       \
 918                 list = &next->conn_next;                         \
 919                 prev = next;                                            \
 920         }                                                               \
 921         (connp)->conn_prev = prev;                                   \
 922         *list = (connp);                                                \
 923         (connp)->conn_fanout = (connfp);                             \
 924         (connp)->conn_flags = ((connp)->conn_flags & ~IPCL_REMOVED) | \
 925             IPCL_BOUND;                                                 \
 926         CONN_INC_REF((connp));                                          \
 927         mutex_exit(&(connfp)->connf_lock);                               \
 928 }
 929 
 930 void
 931 ipcl_hash_insert_wildcard(connf_t *connfp, conn_t *connp)
 932 {
 933         IPCL_HASH_INSERT_WILDCARD(connfp, connp);
 934 }
 935 
 936 /*
 937  * Because the classifier is used to classify inbound packets, the destination
 938  * address is meant to be our local tunnel address (tunnel source), and the
 939  * source the remote tunnel address (tunnel destination).
 940  *
 941  * Note that conn_proto can't be used for fanout since the upper protocol
 942  * can be both 41 and 4 when IPv6 and IPv4 are over the same tunnel.
 943  */
 944 conn_t *
 945 ipcl_iptun_classify_v4(ipaddr_t *src, ipaddr_t *dst, ip_stack_t *ipst)
 946 {
 947         connf_t *connfp;
 948         conn_t  *connp;
 949 
 950         /* first look for IPv4 tunnel links */
 951         connfp = &ipst->ips_ipcl_iptun_fanout[IPCL_IPTUN_HASH(*dst, *src)];
 952         mutex_enter(&connfp->connf_lock);
 953         for (connp = connfp->connf_head; connp != NULL;
 954             connp = connp->conn_next) {
 955                 if (IPCL_IPTUN_MATCH(connp, *dst, *src))
 956                         break;
 957         }
 958         if (connp != NULL)
 959                 goto done;
 960 
 961         mutex_exit(&connfp->connf_lock);
 962 
 963         /* We didn't find an IPv4 tunnel, try a 6to4 tunnel */
 964         connfp = &ipst->ips_ipcl_iptun_fanout[IPCL_IPTUN_HASH(*dst,
 965             INADDR_ANY)];
 966         mutex_enter(&connfp->connf_lock);
 967         for (connp = connfp->connf_head; connp != NULL;
 968             connp = connp->conn_next) {
 969                 if (IPCL_IPTUN_MATCH(connp, *dst, INADDR_ANY))
 970                         break;
 971         }
 972 done:
 973         if (connp != NULL)
 974                 CONN_INC_REF(connp);
 975         mutex_exit(&connfp->connf_lock);
 976         return (connp);
 977 }
 978 
 979 conn_t *
 980 ipcl_iptun_classify_v6(in6_addr_t *src, in6_addr_t *dst, ip_stack_t *ipst)
 981 {
 982         connf_t *connfp;
 983         conn_t  *connp;
 984 
 985         /* Look for an IPv6 tunnel link */
 986         connfp = &ipst->ips_ipcl_iptun_fanout[IPCL_IPTUN_HASH_V6(dst, src)];
 987         mutex_enter(&connfp->connf_lock);
 988         for (connp = connfp->connf_head; connp != NULL;
 989             connp = connp->conn_next) {
 990                 if (IPCL_IPTUN_MATCH_V6(connp, dst, src)) {
 991                         CONN_INC_REF(connp);
 992                         break;
 993                 }
 994         }
 995         mutex_exit(&connfp->connf_lock);
 996         return (connp);
 997 }
 998 
 999 /*
1000  * This function is used only for inserting SCTP raw socket now.
1001  * This may change later.
1002  *
1003  * Note that only one raw socket can be bound to a port.  The param
1004  * lport is in network byte order.
1005  */
1006 static int
1007 ipcl_sctp_hash_insert(conn_t *connp, in_port_t lport)
1008 {
1009         connf_t *connfp;
1010         conn_t  *oconnp;
1011         ip_stack_t      *ipst = connp->conn_netstack->netstack_ip;
1012 
1013         connfp = &ipst->ips_ipcl_raw_fanout[IPCL_RAW_HASH(ntohs(lport), ipst)];
1014 
1015         /* Check for existing raw socket already bound to the port. */
1016         mutex_enter(&connfp->connf_lock);
1017         for (oconnp = connfp->connf_head; oconnp != NULL;
1018             oconnp = oconnp->conn_next) {
1019                 if (oconnp->conn_lport == lport &&
1020                     oconnp->conn_zoneid == connp->conn_zoneid &&
1021                     oconnp->conn_family == connp->conn_family &&
1022                     ((IN6_IS_ADDR_UNSPECIFIED(&connp->conn_laddr_v6) ||
1023                     IN6_IS_ADDR_UNSPECIFIED(&oconnp->conn_laddr_v6) ||
1024                     IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_laddr_v6) ||
1025                     IN6_IS_ADDR_V4MAPPED_ANY(&oconnp->conn_laddr_v6)) ||
1026                     IN6_ARE_ADDR_EQUAL(&oconnp->conn_laddr_v6,
1027                     &connp->conn_laddr_v6))) {
1028                         break;
1029                 }
1030         }
1031         mutex_exit(&connfp->connf_lock);
1032         if (oconnp != NULL)
1033                 return (EADDRNOTAVAIL);
1034 
1035         if (IN6_IS_ADDR_UNSPECIFIED(&connp->conn_faddr_v6) ||
1036             IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_faddr_v6)) {
1037                 if (IN6_IS_ADDR_UNSPECIFIED(&connp->conn_laddr_v6) ||
1038                     IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_laddr_v6)) {
1039                         IPCL_HASH_INSERT_WILDCARD(connfp, connp);
1040                 } else {
1041                         IPCL_HASH_INSERT_BOUND(connfp, connp);
1042                 }
1043         } else {
1044                 IPCL_HASH_INSERT_CONNECTED(connfp, connp);
1045         }
1046         return (0);
1047 }
1048 
1049 static int
1050 ipcl_iptun_hash_insert(conn_t *connp, ip_stack_t *ipst)
1051 {
1052         connf_t *connfp;
1053         conn_t  *tconnp;
1054         ipaddr_t laddr = connp->conn_laddr_v4;
1055         ipaddr_t faddr = connp->conn_faddr_v4;
1056 
1057         connfp = &ipst->ips_ipcl_iptun_fanout[IPCL_IPTUN_HASH(laddr, faddr)];
1058         mutex_enter(&connfp->connf_lock);
1059         for (tconnp = connfp->connf_head; tconnp != NULL;
1060             tconnp = tconnp->conn_next) {
1061                 if (IPCL_IPTUN_MATCH(tconnp, laddr, faddr)) {
1062                         /* A tunnel is already bound to these addresses. */
1063                         mutex_exit(&connfp->connf_lock);
1064                         return (EADDRINUSE);
1065                 }
1066         }
1067         IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp);
1068         mutex_exit(&connfp->connf_lock);
1069         return (0);
1070 }
1071 
1072 static int
1073 ipcl_iptun_hash_insert_v6(conn_t *connp, ip_stack_t *ipst)
1074 {
1075         connf_t *connfp;
1076         conn_t  *tconnp;
1077         in6_addr_t *laddr = &connp->conn_laddr_v6;
1078         in6_addr_t *faddr = &connp->conn_faddr_v6;
1079 
1080         connfp = &ipst->ips_ipcl_iptun_fanout[IPCL_IPTUN_HASH_V6(laddr, faddr)];
1081         mutex_enter(&connfp->connf_lock);
1082         for (tconnp = connfp->connf_head; tconnp != NULL;
1083             tconnp = tconnp->conn_next) {
1084                 if (IPCL_IPTUN_MATCH_V6(tconnp, laddr, faddr)) {
1085                         /* A tunnel is already bound to these addresses. */
1086                         mutex_exit(&connfp->connf_lock);
1087                         return (EADDRINUSE);
1088                 }
1089         }
1090         IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp);
1091         mutex_exit(&connfp->connf_lock);
1092         return (0);
1093 }
1094 
1095 /*
1096  * Check for a MAC exemption conflict on a labeled system.  Note that for
1097  * protocols that use port numbers (UDP, TCP, SCTP), we do this check up in the
1098  * transport layer.  This check is for binding all other protocols.
1099  *
1100  * Returns true if there's a conflict.
1101  */
1102 static boolean_t
1103 check_exempt_conflict_v4(conn_t *connp, ip_stack_t *ipst)
1104 {
1105         connf_t *connfp;
1106         conn_t *tconn;
1107 
1108         connfp = &ipst->ips_ipcl_proto_fanout_v4[connp->conn_proto];
1109         mutex_enter(&connfp->connf_lock);
1110         for (tconn = connfp->connf_head; tconn != NULL;
1111             tconn = tconn->conn_next) {
1112                 /* We don't allow v4 fallback for v6 raw socket */
1113                 if (connp->conn_family != tconn->conn_family)
1114                         continue;
1115                 /* If neither is exempt, then there's no conflict */
1116                 if ((connp->conn_mac_mode == CONN_MAC_DEFAULT) &&
1117                     (tconn->conn_mac_mode == CONN_MAC_DEFAULT))
1118                         continue;
1119                 /* We are only concerned about sockets for a different zone */
1120                 if (connp->conn_zoneid == tconn->conn_zoneid)
1121                         continue;
1122                 /* If both are bound to different specific addrs, ok */
1123                 if (connp->conn_laddr_v4 != INADDR_ANY &&
1124                     tconn->conn_laddr_v4 != INADDR_ANY &&
1125                     connp->conn_laddr_v4 != tconn->conn_laddr_v4)
1126                         continue;
1127                 /* These two conflict; fail */
1128                 break;
1129         }
1130         mutex_exit(&connfp->connf_lock);
1131         return (tconn != NULL);
1132 }
1133 
1134 static boolean_t
1135 check_exempt_conflict_v6(conn_t *connp, ip_stack_t *ipst)
1136 {
1137         connf_t *connfp;
1138         conn_t *tconn;
1139 
1140         connfp = &ipst->ips_ipcl_proto_fanout_v6[connp->conn_proto];
1141         mutex_enter(&connfp->connf_lock);
1142         for (tconn = connfp->connf_head; tconn != NULL;
1143             tconn = tconn->conn_next) {
1144                 /* We don't allow v4 fallback for v6 raw socket */
1145                 if (connp->conn_family != tconn->conn_family)
1146                         continue;
1147                 /* If neither is exempt, then there's no conflict */
1148                 if ((connp->conn_mac_mode == CONN_MAC_DEFAULT) &&
1149                     (tconn->conn_mac_mode == CONN_MAC_DEFAULT))
1150                         continue;
1151                 /* We are only concerned about sockets for a different zone */
1152                 if (connp->conn_zoneid == tconn->conn_zoneid)
1153                         continue;
1154                 /* If both are bound to different addrs, ok */
1155                 if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_laddr_v6) &&
1156                     !IN6_IS_ADDR_UNSPECIFIED(&tconn->conn_laddr_v6) &&
1157                     !IN6_ARE_ADDR_EQUAL(&connp->conn_laddr_v6,
1158                     &tconn->conn_laddr_v6))
1159                         continue;
1160                 /* These two conflict; fail */
1161                 break;
1162         }
1163         mutex_exit(&connfp->connf_lock);
1164         return (tconn != NULL);
1165 }
1166 
1167 /*
1168  * (v4, v6) bind hash insertion routines
1169  * The caller has already setup the conn (conn_proto, conn_laddr_v6, conn_lport)
1170  */
1171 
1172 int
1173 ipcl_bind_insert(conn_t *connp)
1174 {
1175         if (connp->conn_ipversion == IPV6_VERSION)
1176                 return (ipcl_bind_insert_v6(connp));
1177         else
1178                 return (ipcl_bind_insert_v4(connp));
1179 }
1180 
1181 int
1182 ipcl_bind_insert_v4(conn_t *connp)
1183 {
1184         connf_t *connfp;
1185         int     ret = 0;
1186         ip_stack_t      *ipst = connp->conn_netstack->netstack_ip;
1187         uint16_t        lport = connp->conn_lport;
1188         uint8_t         protocol = connp->conn_proto;
1189 
1190         if (IPCL_IS_IPTUN(connp))
1191                 return (ipcl_iptun_hash_insert(connp, ipst));
1192 
1193         switch (protocol) {
1194         default:
1195                 if (is_system_labeled() &&
1196                     check_exempt_conflict_v4(connp, ipst))
1197                         return (EADDRINUSE);
1198                 /* FALLTHROUGH */
1199         case IPPROTO_UDP:
1200                 if (protocol == IPPROTO_UDP) {
1201                         connfp = &ipst->ips_ipcl_udp_fanout[
1202                             IPCL_UDP_HASH(lport, ipst)];
1203                 } else {
1204                         connfp = &ipst->ips_ipcl_proto_fanout_v4[protocol];
1205                 }
1206 
1207                 if (connp->conn_faddr_v4 != INADDR_ANY) {
1208                         IPCL_HASH_INSERT_CONNECTED(connfp, connp);
1209                 } else if (connp->conn_laddr_v4 != INADDR_ANY) {
1210                         IPCL_HASH_INSERT_BOUND(connfp, connp);
1211                 } else {
1212                         IPCL_HASH_INSERT_WILDCARD(connfp, connp);
1213                 }
1214                 if (protocol == IPPROTO_RSVP)
1215                         ill_set_inputfn_all(ipst);
1216                 break;
1217 
1218         case IPPROTO_TCP:
1219                 /* Insert it in the Bind Hash */
1220                 ASSERT(connp->conn_zoneid != ALL_ZONES);
1221                 connfp = &ipst->ips_ipcl_bind_fanout[
1222                     IPCL_BIND_HASH(lport, ipst)];
1223                 if (connp->conn_laddr_v4 != INADDR_ANY) {
1224                         IPCL_HASH_INSERT_BOUND(connfp, connp);
1225                 } else {
1226                         IPCL_HASH_INSERT_WILDCARD(connfp, connp);
1227                 }
1228                 if (cl_inet_listen != NULL) {
1229                         ASSERT(connp->conn_ipversion == IPV4_VERSION);
1230                         connp->conn_flags |= IPCL_CL_LISTENER;
1231                         (*cl_inet_listen)(
1232                             connp->conn_netstack->netstack_stackid,
1233                             IPPROTO_TCP, AF_INET,
1234                             (uint8_t *)&connp->conn_bound_addr_v4, lport, NULL);
1235                 }
1236                 break;
1237 
1238         case IPPROTO_SCTP:
1239                 ret = ipcl_sctp_hash_insert(connp, lport);
1240                 break;
1241         }
1242 
1243         return (ret);
1244 }
1245 
1246 int
1247 ipcl_bind_insert_v6(conn_t *connp)
1248 {
1249         connf_t         *connfp;
1250         int             ret = 0;
1251         ip_stack_t      *ipst = connp->conn_netstack->netstack_ip;
1252         uint16_t        lport = connp->conn_lport;
1253         uint8_t         protocol = connp->conn_proto;
1254 
1255         if (IPCL_IS_IPTUN(connp)) {
1256                 return (ipcl_iptun_hash_insert_v6(connp, ipst));
1257         }
1258 
1259         switch (protocol) {
1260         default:
1261                 if (is_system_labeled() &&
1262                     check_exempt_conflict_v6(connp, ipst))
1263                         return (EADDRINUSE);
1264                 /* FALLTHROUGH */
1265         case IPPROTO_UDP:
1266                 if (protocol == IPPROTO_UDP) {
1267                         connfp = &ipst->ips_ipcl_udp_fanout[
1268                             IPCL_UDP_HASH(lport, ipst)];
1269                 } else {
1270                         connfp = &ipst->ips_ipcl_proto_fanout_v6[protocol];
1271                 }
1272 
1273                 if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_faddr_v6)) {
1274                         IPCL_HASH_INSERT_CONNECTED(connfp, connp);
1275                 } else if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_laddr_v6)) {
1276                         IPCL_HASH_INSERT_BOUND(connfp, connp);
1277                 } else {
1278                         IPCL_HASH_INSERT_WILDCARD(connfp, connp);
1279                 }
1280                 break;
1281 
1282         case IPPROTO_TCP:
1283                 /* Insert it in the Bind Hash */
1284                 ASSERT(connp->conn_zoneid != ALL_ZONES);
1285                 connfp = &ipst->ips_ipcl_bind_fanout[
1286                     IPCL_BIND_HASH(lport, ipst)];
1287                 if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_laddr_v6)) {
1288                         IPCL_HASH_INSERT_BOUND(connfp, connp);
1289                 } else {
1290                         IPCL_HASH_INSERT_WILDCARD(connfp, connp);
1291                 }
1292                 if (cl_inet_listen != NULL) {
1293                         sa_family_t     addr_family;
1294                         uint8_t         *laddrp;
1295 
1296                         if (connp->conn_ipversion == IPV6_VERSION) {
1297                                 addr_family = AF_INET6;
1298                                 laddrp =
1299                                     (uint8_t *)&connp->conn_bound_addr_v6;
1300                         } else {
1301                                 addr_family = AF_INET;
1302                                 laddrp = (uint8_t *)&connp->conn_bound_addr_v4;
1303                         }
1304                         connp->conn_flags |= IPCL_CL_LISTENER;
1305                         (*cl_inet_listen)(
1306                             connp->conn_netstack->netstack_stackid,
1307                             IPPROTO_TCP, addr_family, laddrp, lport, NULL);
1308                 }
1309                 break;
1310 
1311         case IPPROTO_SCTP:
1312                 ret = ipcl_sctp_hash_insert(connp, lport);
1313                 break;
1314         }
1315 
1316         return (ret);
1317 }
1318 
1319 /*
1320  * ipcl_conn_hash insertion routines.
1321  * The caller has already set conn_proto and the addresses/ports in the conn_t.
1322  */
1323 
1324 int
1325 ipcl_conn_insert(conn_t *connp)
1326 {
1327         if (connp->conn_ipversion == IPV6_VERSION)
1328                 return (ipcl_conn_insert_v6(connp));
1329         else
1330                 return (ipcl_conn_insert_v4(connp));
1331 }
1332 
1333 int
1334 ipcl_conn_insert_v4(conn_t *connp)
1335 {
1336         connf_t         *connfp;
1337         conn_t          *tconnp;
1338         int             ret = 0;
1339         ip_stack_t      *ipst = connp->conn_netstack->netstack_ip;
1340         uint16_t        lport = connp->conn_lport;
1341         uint8_t         protocol = connp->conn_proto;
1342 
1343         if (IPCL_IS_IPTUN(connp))
1344                 return (ipcl_iptun_hash_insert(connp, ipst));
1345 
1346         switch (protocol) {
1347         case IPPROTO_TCP:
1348                 /*
1349                  * For TCP, we check whether the connection tuple already
1350                  * exists before allowing the connection to proceed.  We
1351                  * also allow indexing on the zoneid. This is to allow
1352                  * multiple shared stack zones to have the same tcp
1353                  * connection tuple. In practice this only happens for
1354                  * INADDR_LOOPBACK as it's the only local address which
1355                  * doesn't have to be unique.
1356                  */
1357                 connfp = &ipst->ips_ipcl_conn_fanout[
1358                     IPCL_CONN_HASH(connp->conn_faddr_v4,
1359                     connp->conn_ports, ipst)];
1360                 mutex_enter(&connfp->connf_lock);
1361                 for (tconnp = connfp->connf_head; tconnp != NULL;
1362                     tconnp = tconnp->conn_next) {
1363                         if (IPCL_CONN_MATCH(tconnp, connp->conn_proto,
1364                             connp->conn_faddr_v4, connp->conn_laddr_v4,
1365                             connp->conn_ports) &&
1366                             IPCL_ZONE_MATCH(tconnp, connp->conn_zoneid)) {
1367                                 /* Already have a conn. bail out */
1368                                 mutex_exit(&connfp->connf_lock);
1369                                 return (EADDRINUSE);
1370                         }
1371                 }
1372                 if (connp->conn_fanout != NULL) {
1373                         /*
1374                          * Probably a XTI/TLI application trying to do a
1375                          * rebind. Let it happen.
1376                          */
1377                         mutex_exit(&connfp->connf_lock);
1378                         IPCL_HASH_REMOVE(connp);
1379                         mutex_enter(&connfp->connf_lock);
1380                 }
1381 
1382                 ASSERT(connp->conn_recv != NULL);
1383                 ASSERT(connp->conn_recvicmp != NULL);
1384 
1385                 IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp);
1386                 mutex_exit(&connfp->connf_lock);
1387                 break;
1388 
1389         case IPPROTO_SCTP:
1390                 /*
1391                  * The raw socket may have already been bound, remove it
1392                  * from the hash first.
1393                  */
1394                 IPCL_HASH_REMOVE(connp);
1395                 ret = ipcl_sctp_hash_insert(connp, lport);
1396                 break;
1397 
1398         default:
1399                 /*
1400                  * Check for conflicts among MAC exempt bindings.  For
1401                  * transports with port numbers, this is done by the upper
1402                  * level per-transport binding logic.  For all others, it's
1403                  * done here.
1404                  */
1405                 if (is_system_labeled() &&
1406                     check_exempt_conflict_v4(connp, ipst))
1407                         return (EADDRINUSE);
1408                 /* FALLTHROUGH */
1409 
1410         case IPPROTO_UDP:
1411                 if (protocol == IPPROTO_UDP) {
1412                         connfp = &ipst->ips_ipcl_udp_fanout[
1413                             IPCL_UDP_HASH(lport, ipst)];
1414                 } else {
1415                         connfp = &ipst->ips_ipcl_proto_fanout_v4[protocol];
1416                 }
1417 
1418                 if (connp->conn_faddr_v4 != INADDR_ANY) {
1419                         IPCL_HASH_INSERT_CONNECTED(connfp, connp);
1420                 } else if (connp->conn_laddr_v4 != INADDR_ANY) {
1421                         IPCL_HASH_INSERT_BOUND(connfp, connp);
1422                 } else {
1423                         IPCL_HASH_INSERT_WILDCARD(connfp, connp);
1424                 }
1425                 break;
1426         }
1427 
1428         return (ret);
1429 }
1430 
1431 int
1432 ipcl_conn_insert_v6(conn_t *connp)
1433 {
1434         connf_t         *connfp;
1435         conn_t          *tconnp;
1436         int             ret = 0;
1437         ip_stack_t      *ipst = connp->conn_netstack->netstack_ip;
1438         uint16_t        lport = connp->conn_lport;
1439         uint8_t         protocol = connp->conn_proto;
1440         uint_t          ifindex = connp->conn_bound_if;
1441 
1442         if (IPCL_IS_IPTUN(connp))
1443                 return (ipcl_iptun_hash_insert_v6(connp, ipst));
1444 
1445         switch (protocol) {
1446         case IPPROTO_TCP:
1447 
1448                 /*
1449                  * For tcp, we check whether the connection tuple already
1450                  * exists before allowing the connection to proceed.  We
1451                  * also allow indexing on the zoneid. This is to allow
1452                  * multiple shared stack zones to have the same tcp
1453                  * connection tuple. In practice this only happens for
1454                  * ipv6_loopback as it's the only local address which
1455                  * doesn't have to be unique.
1456                  */
1457                 connfp = &ipst->ips_ipcl_conn_fanout[
1458                     IPCL_CONN_HASH_V6(connp->conn_faddr_v6, connp->conn_ports,
1459                     ipst)];
1460                 mutex_enter(&connfp->connf_lock);
1461                 for (tconnp = connfp->connf_head; tconnp != NULL;
1462                     tconnp = tconnp->conn_next) {
1463                         /* NOTE: need to match zoneid. Bug in onnv-gate */
1464                         if (IPCL_CONN_MATCH_V6(tconnp, connp->conn_proto,
1465                             connp->conn_faddr_v6, connp->conn_laddr_v6,
1466                             connp->conn_ports) &&
1467                             (tconnp->conn_bound_if == 0 ||
1468                             tconnp->conn_bound_if == ifindex) &&
1469                             IPCL_ZONE_MATCH(tconnp, connp->conn_zoneid)) {
1470                                 /* Already have a conn. bail out */
1471                                 mutex_exit(&connfp->connf_lock);
1472                                 return (EADDRINUSE);
1473                         }
1474                 }
1475                 if (connp->conn_fanout != NULL) {
1476                         /*
1477                          * Probably a XTI/TLI application trying to do a
1478                          * rebind. Let it happen.
1479                          */
1480                         mutex_exit(&connfp->connf_lock);
1481                         IPCL_HASH_REMOVE(connp);
1482                         mutex_enter(&connfp->connf_lock);
1483                 }
1484                 IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp);
1485                 mutex_exit(&connfp->connf_lock);
1486                 break;
1487 
1488         case IPPROTO_SCTP:
1489                 IPCL_HASH_REMOVE(connp);
1490                 ret = ipcl_sctp_hash_insert(connp, lport);
1491                 break;
1492 
1493         default:
1494                 if (is_system_labeled() &&
1495                     check_exempt_conflict_v6(connp, ipst))
1496                         return (EADDRINUSE);
1497                 /* FALLTHROUGH */
1498         case IPPROTO_UDP:
1499                 if (protocol == IPPROTO_UDP) {
1500                         connfp = &ipst->ips_ipcl_udp_fanout[
1501                             IPCL_UDP_HASH(lport, ipst)];
1502                 } else {
1503                         connfp = &ipst->ips_ipcl_proto_fanout_v6[protocol];
1504                 }
1505 
1506                 if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_faddr_v6)) {
1507                         IPCL_HASH_INSERT_CONNECTED(connfp, connp);
1508                 } else if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_laddr_v6)) {
1509                         IPCL_HASH_INSERT_BOUND(connfp, connp);
1510                 } else {
1511                         IPCL_HASH_INSERT_WILDCARD(connfp, connp);
1512                 }
1513                 break;
1514         }
1515 
1516         return (ret);
1517 }
1518 
1519 /*
1520  * v4 packet classifying function. looks up the fanout table to
1521  * find the conn, the packet belongs to. returns the conn with
1522  * the reference held, null otherwise.
1523  *
1524  * If zoneid is ALL_ZONES, then the search rules described in the "Connection
1525  * Lookup" comment block are applied.  Labels are also checked as described
1526  * above.  If the packet is from the inside (looped back), and is from the same
1527  * zone, then label checks are omitted.
1528  */
1529 conn_t *
1530 ipcl_classify_v4(mblk_t *mp, uint8_t protocol, uint_t hdr_len,
1531     ip_recv_attr_t *ira, ip_stack_t *ipst)
1532 {
1533         ipha_t  *ipha;
1534         connf_t *connfp, *bind_connfp;
1535         uint16_t lport;
1536         uint16_t fport;
1537         uint32_t ports;
1538         conn_t  *connp;
1539         uint16_t  *up;
1540         zoneid_t        zoneid = ira->ira_zoneid;
1541 
1542         ipha = (ipha_t *)mp->b_rptr;
1543         up = (uint16_t *)((uchar_t *)ipha + hdr_len + TCP_PORTS_OFFSET);
1544 
1545         switch (protocol) {
1546         case IPPROTO_TCP:
1547                 ports = *(uint32_t *)up;
1548                 connfp =
1549                     &ipst->ips_ipcl_conn_fanout[IPCL_CONN_HASH(ipha->ipha_src,
1550                     ports, ipst)];
1551                 mutex_enter(&connfp->connf_lock);
1552                 for (connp = connfp->connf_head; connp != NULL;
1553                     connp = connp->conn_next) {
1554                         if (IPCL_CONN_MATCH(connp, protocol,
1555                             ipha->ipha_src, ipha->ipha_dst, ports) &&
1556                             (connp->conn_zoneid == zoneid ||
1557                             connp->conn_allzones ||
1558                             ((connp->conn_mac_mode != CONN_MAC_DEFAULT) &&
1559                             (ira->ira_flags & IRAF_TX_MAC_EXEMPTABLE) &&
1560                             (ira->ira_flags & IRAF_TX_SHARED_ADDR))))
1561                                 break;
1562                 }
1563 
1564                 if (connp != NULL) {
1565                         /*
1566                          * We have a fully-bound TCP connection.
1567                          *
1568                          * For labeled systems, there's no need to check the
1569                          * label here.  It's known to be good as we checked
1570                          * before allowing the connection to become bound.
1571                          */
1572                         CONN_INC_REF(connp);
1573                         mutex_exit(&connfp->connf_lock);
1574                         return (connp);
1575                 }
1576 
1577                 mutex_exit(&connfp->connf_lock);
1578                 lport = up[1];
1579                 bind_connfp =
1580                     &ipst->ips_ipcl_bind_fanout[IPCL_BIND_HASH(lport, ipst)];
1581                 mutex_enter(&bind_connfp->connf_lock);
1582                 for (connp = bind_connfp->connf_head; connp != NULL;
1583                     connp = connp->conn_next) {
1584                         if (IPCL_BIND_MATCH(connp, protocol, ipha->ipha_dst,
1585                             lport) &&
1586                             (connp->conn_zoneid == zoneid ||
1587                             connp->conn_allzones ||
1588                             ((connp->conn_mac_mode != CONN_MAC_DEFAULT) &&
1589                             (ira->ira_flags & IRAF_TX_MAC_EXEMPTABLE) &&
1590                             (ira->ira_flags & IRAF_TX_SHARED_ADDR))))
1591                                 break;
1592                 }
1593 
1594                 /*
1595                  * If the matching connection is SLP on a private address, then
1596                  * the label on the packet must match the local zone's label.
1597                  * Otherwise, it must be in the label range defined by tnrh.
1598                  * This is ensured by tsol_receive_local.
1599                  *
1600                  * Note that we don't check tsol_receive_local for
1601                  * the connected case.
1602                  */
1603                 if (connp != NULL && (ira->ira_flags & IRAF_SYSTEM_LABELED) &&
1604                     !tsol_receive_local(mp, &ipha->ipha_dst, IPV4_VERSION,
1605                     ira, connp)) {
1606                         DTRACE_PROBE3(tx__ip__log__info__classify__tcp,
1607                             char *, "connp(1) could not receive mp(2)",
1608                             conn_t *, connp, mblk_t *, mp);
1609                         connp = NULL;
1610                 }
1611 
1612                 if (connp != NULL) {
1613                         /* Have a listener at least */
1614                         CONN_INC_REF(connp);
1615                         mutex_exit(&bind_connfp->connf_lock);
1616                         return (connp);
1617                 }
1618 
1619                 mutex_exit(&bind_connfp->connf_lock);
1620                 break;
1621 
1622         case IPPROTO_UDP:
1623                 lport = up[1];
1624                 fport = up[0];
1625                 connfp = &ipst->ips_ipcl_udp_fanout[IPCL_UDP_HASH(lport, ipst)];
1626                 mutex_enter(&connfp->connf_lock);
1627                 for (connp = connfp->connf_head; connp != NULL;
1628                     connp = connp->conn_next) {
1629                         if (IPCL_UDP_MATCH(connp, lport, ipha->ipha_dst,
1630                             fport, ipha->ipha_src) &&
1631                             (connp->conn_zoneid == zoneid ||
1632                             connp->conn_allzones ||
1633                             ((connp->conn_mac_mode != CONN_MAC_DEFAULT) &&
1634                             (ira->ira_flags & IRAF_TX_MAC_EXEMPTABLE))))
1635                                 break;
1636                 }
1637 
1638                 if (connp != NULL && (ira->ira_flags & IRAF_SYSTEM_LABELED) &&
1639                     !tsol_receive_local(mp, &ipha->ipha_dst, IPV4_VERSION,
1640                     ira, connp)) {
1641                         DTRACE_PROBE3(tx__ip__log__info__classify__udp,
1642                             char *, "connp(1) could not receive mp(2)",
1643                             conn_t *, connp, mblk_t *, mp);
1644                         connp = NULL;
1645                 }
1646 
1647                 if (connp != NULL) {
1648                         CONN_INC_REF(connp);
1649                         mutex_exit(&connfp->connf_lock);
1650                         return (connp);
1651                 }
1652 
1653                 /*
1654                  * We shouldn't come here for multicast/broadcast packets
1655                  */
1656                 mutex_exit(&connfp->connf_lock);
1657 
1658                 break;
1659 
1660         case IPPROTO_ENCAP:
1661         case IPPROTO_IPV6:
1662                 return (ipcl_iptun_classify_v4(&ipha->ipha_src,
1663                     &ipha->ipha_dst, ipst));
1664         }
1665 
1666         return (NULL);
1667 }
1668 
1669 conn_t *
1670 ipcl_classify_v6(mblk_t *mp, uint8_t protocol, uint_t hdr_len,
1671     ip_recv_attr_t *ira, ip_stack_t *ipst)
1672 {
1673         ip6_t           *ip6h;
1674         connf_t         *connfp, *bind_connfp;
1675         uint16_t        lport;
1676         uint16_t        fport;
1677         tcpha_t         *tcpha;
1678         uint32_t        ports;
1679         conn_t          *connp;
1680         uint16_t        *up;
1681         zoneid_t        zoneid = ira->ira_zoneid;
1682 
1683         ip6h = (ip6_t *)mp->b_rptr;
1684 
1685         switch (protocol) {
1686         case IPPROTO_TCP:
1687                 tcpha = (tcpha_t *)&mp->b_rptr[hdr_len];
1688                 up = &tcpha->tha_lport;
1689                 ports = *(uint32_t *)up;
1690 
1691                 connfp =
1692                     &ipst->ips_ipcl_conn_fanout[IPCL_CONN_HASH_V6(ip6h->ip6_src,
1693                     ports, ipst)];
1694                 mutex_enter(&connfp->connf_lock);
1695                 for (connp = connfp->connf_head; connp != NULL;
1696                     connp = connp->conn_next) {
1697                         if (IPCL_CONN_MATCH_V6(connp, protocol,
1698                             ip6h->ip6_src, ip6h->ip6_dst, ports) &&
1699                             (connp->conn_zoneid == zoneid ||
1700                             connp->conn_allzones ||
1701                             ((connp->conn_mac_mode != CONN_MAC_DEFAULT) &&
1702                             (ira->ira_flags & IRAF_TX_MAC_EXEMPTABLE) &&
1703                             (ira->ira_flags & IRAF_TX_SHARED_ADDR))))
1704                                 break;
1705                 }
1706 
1707                 if (connp != NULL) {
1708                         /*
1709                          * We have a fully-bound TCP connection.
1710                          *
1711                          * For labeled systems, there's no need to check the
1712                          * label here.  It's known to be good as we checked
1713                          * before allowing the connection to become bound.
1714                          */
1715                         CONN_INC_REF(connp);
1716                         mutex_exit(&connfp->connf_lock);
1717                         return (connp);
1718                 }
1719 
1720                 mutex_exit(&connfp->connf_lock);
1721 
1722                 lport = up[1];
1723                 bind_connfp =
1724                     &ipst->ips_ipcl_bind_fanout[IPCL_BIND_HASH(lport, ipst)];
1725                 mutex_enter(&bind_connfp->connf_lock);
1726                 for (connp = bind_connfp->connf_head; connp != NULL;
1727                     connp = connp->conn_next) {
1728                         if (IPCL_BIND_MATCH_V6(connp, protocol,
1729                             ip6h->ip6_dst, lport) &&
1730                             (connp->conn_zoneid == zoneid ||
1731                             connp->conn_allzones ||
1732                             ((connp->conn_mac_mode != CONN_MAC_DEFAULT) &&
1733                             (ira->ira_flags & IRAF_TX_MAC_EXEMPTABLE) &&
1734                             (ira->ira_flags & IRAF_TX_SHARED_ADDR))))
1735                                 break;
1736                 }
1737 
1738                 if (connp != NULL && (ira->ira_flags & IRAF_SYSTEM_LABELED) &&
1739                     !tsol_receive_local(mp, &ip6h->ip6_dst, IPV6_VERSION,
1740                     ira, connp)) {
1741                         DTRACE_PROBE3(tx__ip__log__info__classify__tcp6,
1742                             char *, "connp(1) could not receive mp(2)",
1743                             conn_t *, connp, mblk_t *, mp);
1744                         connp = NULL;
1745                 }
1746 
1747                 if (connp != NULL) {
1748                         /* Have a listner at least */
1749                         CONN_INC_REF(connp);
1750                         mutex_exit(&bind_connfp->connf_lock);
1751                         return (connp);
1752                 }
1753 
1754                 mutex_exit(&bind_connfp->connf_lock);
1755                 break;
1756 
1757         case IPPROTO_UDP:
1758                 up = (uint16_t *)&mp->b_rptr[hdr_len];
1759                 lport = up[1];
1760                 fport = up[0];
1761                 connfp = &ipst->ips_ipcl_udp_fanout[IPCL_UDP_HASH(lport, ipst)];
1762                 mutex_enter(&connfp->connf_lock);
1763                 for (connp = connfp->connf_head; connp != NULL;
1764                     connp = connp->conn_next) {
1765                         if (IPCL_UDP_MATCH_V6(connp, lport, ip6h->ip6_dst,
1766                             fport, ip6h->ip6_src) &&
1767                             (connp->conn_zoneid == zoneid ||
1768                             connp->conn_allzones ||
1769                             ((connp->conn_mac_mode != CONN_MAC_DEFAULT) &&
1770                             (ira->ira_flags & IRAF_TX_MAC_EXEMPTABLE) &&
1771                             (ira->ira_flags & IRAF_TX_SHARED_ADDR))))
1772                                 break;
1773                 }
1774 
1775                 if (connp != NULL && (ira->ira_flags & IRAF_SYSTEM_LABELED) &&
1776                     !tsol_receive_local(mp, &ip6h->ip6_dst, IPV6_VERSION,
1777                     ira, connp)) {
1778                         DTRACE_PROBE3(tx__ip__log__info__classify__udp6,
1779                             char *, "connp(1) could not receive mp(2)",
1780                             conn_t *, connp, mblk_t *, mp);
1781                         connp = NULL;
1782                 }
1783 
1784                 if (connp != NULL) {
1785                         CONN_INC_REF(connp);
1786                         mutex_exit(&connfp->connf_lock);
1787                         return (connp);
1788                 }
1789 
1790                 /*
1791                  * We shouldn't come here for multicast/broadcast packets
1792                  */
1793                 mutex_exit(&connfp->connf_lock);
1794                 break;
1795         case IPPROTO_ENCAP:
1796         case IPPROTO_IPV6:
1797                 return (ipcl_iptun_classify_v6(&ip6h->ip6_src,
1798                     &ip6h->ip6_dst, ipst));
1799         }
1800 
1801         return (NULL);
1802 }
1803 
1804 /*
1805  * wrapper around ipcl_classify_(v4,v6) routines.
1806  */
1807 conn_t *
1808 ipcl_classify(mblk_t *mp, ip_recv_attr_t *ira, ip_stack_t *ipst)
1809 {
1810         if (ira->ira_flags & IRAF_IS_IPV4) {
1811                 return (ipcl_classify_v4(mp, ira->ira_protocol,
1812                     ira->ira_ip_hdr_length, ira, ipst));
1813         } else {
1814                 return (ipcl_classify_v6(mp, ira->ira_protocol,
1815                     ira->ira_ip_hdr_length, ira, ipst));
1816         }
1817 }
1818 
1819 /*
1820  * Only used to classify SCTP RAW sockets
1821  */
1822 conn_t *
1823 ipcl_classify_raw(mblk_t *mp, uint8_t protocol, uint32_t ports,
1824     ipha_t *ipha, ip6_t *ip6h, ip_recv_attr_t *ira, ip_stack_t *ipst)
1825 {
1826         connf_t         *connfp;
1827         conn_t          *connp;
1828         in_port_t       lport;
1829         int             ipversion;
1830         const void      *dst;
1831         zoneid_t        zoneid = ira->ira_zoneid;
1832 
1833         lport = ((uint16_t *)&ports)[1];
1834         if (ira->ira_flags & IRAF_IS_IPV4) {
1835                 dst = (const void *)&ipha->ipha_dst;
1836                 ipversion = IPV4_VERSION;
1837         } else {
1838                 dst = (const void *)&ip6h->ip6_dst;
1839                 ipversion = IPV6_VERSION;
1840         }
1841 
1842         connfp = &ipst->ips_ipcl_raw_fanout[IPCL_RAW_HASH(ntohs(lport), ipst)];
1843         mutex_enter(&connfp->connf_lock);
1844         for (connp = connfp->connf_head; connp != NULL;
1845             connp = connp->conn_next) {
1846                 /* We don't allow v4 fallback for v6 raw socket. */
1847                 if (ipversion != connp->conn_ipversion)
1848                         continue;
1849                 if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_faddr_v6) &&
1850                     !IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_faddr_v6)) {
1851                         if (ipversion == IPV4_VERSION) {
1852                                 if (!IPCL_CONN_MATCH(connp, protocol,
1853                                     ipha->ipha_src, ipha->ipha_dst, ports))
1854                                         continue;
1855                         } else {
1856                                 if (!IPCL_CONN_MATCH_V6(connp, protocol,
1857                                     ip6h->ip6_src, ip6h->ip6_dst, ports))
1858                                         continue;
1859                         }
1860                 } else {
1861                         if (ipversion == IPV4_VERSION) {
1862                                 if (!IPCL_BIND_MATCH(connp, protocol,
1863                                     ipha->ipha_dst, lport))
1864                                         continue;
1865                         } else {
1866                                 if (!IPCL_BIND_MATCH_V6(connp, protocol,
1867                                     ip6h->ip6_dst, lport))
1868                                         continue;
1869                         }
1870                 }
1871 
1872                 if (connp->conn_zoneid == zoneid ||
1873                     connp->conn_allzones ||
1874                     ((connp->conn_mac_mode != CONN_MAC_DEFAULT) &&
1875                     (ira->ira_flags & IRAF_TX_MAC_EXEMPTABLE) &&
1876                     (ira->ira_flags & IRAF_TX_SHARED_ADDR)))
1877                         break;
1878         }
1879 
1880         if (connp != NULL && (ira->ira_flags & IRAF_SYSTEM_LABELED) &&
1881             !tsol_receive_local(mp, dst, ipversion, ira, connp)) {
1882                 DTRACE_PROBE3(tx__ip__log__info__classify__rawip,
1883                     char *, "connp(1) could not receive mp(2)",
1884                     conn_t *, connp, mblk_t *, mp);
1885                 connp = NULL;
1886         }
1887 
1888         if (connp != NULL)
1889                 goto found;
1890         mutex_exit(&connfp->connf_lock);
1891 
1892         /* Try to look for a wildcard SCTP RAW socket match. */
1893         connfp = &ipst->ips_ipcl_raw_fanout[IPCL_RAW_HASH(0, ipst)];
1894         mutex_enter(&connfp->connf_lock);
1895         for (connp = connfp->connf_head; connp != NULL;
1896             connp = connp->conn_next) {
1897                 /* We don't allow v4 fallback for v6 raw socket. */
1898                 if (ipversion != connp->conn_ipversion)
1899                         continue;
1900                 if (!IPCL_ZONE_MATCH(connp, zoneid))
1901                         continue;
1902 
1903                 if (ipversion == IPV4_VERSION) {
1904                         if (IPCL_RAW_MATCH(connp, protocol, ipha->ipha_dst))
1905                                 break;
1906                 } else {
1907                         if (IPCL_RAW_MATCH_V6(connp, protocol, ip6h->ip6_dst)) {
1908                                 break;
1909                         }
1910                 }
1911         }
1912 
1913         if (connp != NULL)
1914                 goto found;
1915 
1916         mutex_exit(&connfp->connf_lock);
1917         return (NULL);
1918 
1919 found:
1920         ASSERT(connp != NULL);
1921         CONN_INC_REF(connp);
1922         mutex_exit(&connfp->connf_lock);
1923         return (connp);
1924 }
1925 
1926 /* ARGSUSED */
1927 static int
1928 tcp_conn_constructor(void *buf, void *cdrarg, int kmflags)
1929 {
1930         itc_t   *itc = (itc_t *)buf;
1931         conn_t  *connp = &itc->itc_conn;
1932         tcp_t   *tcp = (tcp_t *)&itc[1];
1933 
1934         bzero(connp, sizeof (conn_t));
1935         bzero(tcp, sizeof (tcp_t));
1936 
1937         mutex_init(&connp->conn_lock, NULL, MUTEX_DEFAULT, NULL);
1938         cv_init(&connp->conn_cv, NULL, CV_DEFAULT, NULL);
1939         cv_init(&connp->conn_sq_cv, NULL, CV_DEFAULT, NULL);
1940         tcp->tcp_timercache = tcp_timermp_alloc(kmflags);
1941         if (tcp->tcp_timercache == NULL)
1942                 return (ENOMEM);
1943         connp->conn_tcp = tcp;
1944         connp->conn_flags = IPCL_TCPCONN;
1945         connp->conn_proto = IPPROTO_TCP;
1946         tcp->tcp_connp = connp;
1947         rw_init(&connp->conn_ilg_lock, NULL, RW_DEFAULT, NULL);
1948 
1949         connp->conn_ixa = kmem_zalloc(sizeof (ip_xmit_attr_t), kmflags);
1950         if (connp->conn_ixa == NULL) {
1951                 tcp_timermp_free(tcp);
1952                 return (ENOMEM);
1953         }
1954         connp->conn_ixa->ixa_refcnt = 1;
1955         connp->conn_ixa->ixa_protocol = connp->conn_proto;
1956         connp->conn_ixa->ixa_xmit_hint = CONN_TO_XMIT_HINT(connp);
1957         return (0);
1958 }
1959 
1960 /* ARGSUSED */
1961 static void
1962 tcp_conn_destructor(void *buf, void *cdrarg)
1963 {
1964         itc_t   *itc = (itc_t *)buf;
1965         conn_t  *connp = &itc->itc_conn;
1966         tcp_t   *tcp = (tcp_t *)&itc[1];
1967 
1968         ASSERT(connp->conn_flags & IPCL_TCPCONN);
1969         ASSERT(tcp->tcp_connp == connp);
1970         ASSERT(connp->conn_tcp == tcp);
1971         tcp_timermp_free(tcp);
1972         mutex_destroy(&connp->conn_lock);
1973         cv_destroy(&connp->conn_cv);
1974         cv_destroy(&connp->conn_sq_cv);
1975         rw_destroy(&connp->conn_ilg_lock);
1976 
1977         /* Can be NULL if constructor failed */
1978         if (connp->conn_ixa != NULL) {
1979                 ASSERT(connp->conn_ixa->ixa_refcnt == 1);
1980                 ASSERT(connp->conn_ixa->ixa_ire == NULL);
1981                 ASSERT(connp->conn_ixa->ixa_nce == NULL);
1982                 ixa_refrele(connp->conn_ixa);
1983         }
1984 }
1985 
1986 /* ARGSUSED */
1987 static int
1988 ip_conn_constructor(void *buf, void *cdrarg, int kmflags)
1989 {
1990         itc_t   *itc = (itc_t *)buf;
1991         conn_t  *connp = &itc->itc_conn;
1992 
1993         bzero(connp, sizeof (conn_t));
1994         mutex_init(&connp->conn_lock, NULL, MUTEX_DEFAULT, NULL);
1995         cv_init(&connp->conn_cv, NULL, CV_DEFAULT, NULL);
1996         connp->conn_flags = IPCL_IPCCONN;
1997         rw_init(&connp->conn_ilg_lock, NULL, RW_DEFAULT, NULL);
1998 
1999         connp->conn_ixa = kmem_zalloc(sizeof (ip_xmit_attr_t), kmflags);
2000         if (connp->conn_ixa == NULL)
2001                 return (ENOMEM);
2002         connp->conn_ixa->ixa_refcnt = 1;
2003         connp->conn_ixa->ixa_xmit_hint = CONN_TO_XMIT_HINT(connp);
2004         return (0);
2005 }
2006 
2007 /* ARGSUSED */
2008 static void
2009 ip_conn_destructor(void *buf, void *cdrarg)
2010 {
2011         itc_t   *itc = (itc_t *)buf;
2012         conn_t  *connp = &itc->itc_conn;
2013 
2014         ASSERT(connp->conn_flags & IPCL_IPCCONN);
2015         ASSERT(connp->conn_priv == NULL);
2016         mutex_destroy(&connp->conn_lock);
2017         cv_destroy(&connp->conn_cv);
2018         rw_destroy(&connp->conn_ilg_lock);
2019 
2020         /* Can be NULL if constructor failed */
2021         if (connp->conn_ixa != NULL) {
2022                 ASSERT(connp->conn_ixa->ixa_refcnt == 1);
2023                 ASSERT(connp->conn_ixa->ixa_ire == NULL);
2024                 ASSERT(connp->conn_ixa->ixa_nce == NULL);
2025                 ixa_refrele(connp->conn_ixa);
2026         }
2027 }
2028 
2029 /* ARGSUSED */
2030 static int
2031 udp_conn_constructor(void *buf, void *cdrarg, int kmflags)
2032 {
2033         itc_t   *itc = (itc_t *)buf;
2034         conn_t  *connp = &itc->itc_conn;
2035         udp_t   *udp = (udp_t *)&itc[1];
2036 
2037         bzero(connp, sizeof (conn_t));
2038         bzero(udp, sizeof (udp_t));
2039 
2040         mutex_init(&connp->conn_lock, NULL, MUTEX_DEFAULT, NULL);
2041         cv_init(&connp->conn_cv, NULL, CV_DEFAULT, NULL);
2042         connp->conn_udp = udp;
2043         connp->conn_flags = IPCL_UDPCONN;
2044         connp->conn_proto = IPPROTO_UDP;
2045         udp->udp_connp = connp;
2046         rw_init(&connp->conn_ilg_lock, NULL, RW_DEFAULT, NULL);
2047         connp->conn_ixa = kmem_zalloc(sizeof (ip_xmit_attr_t), kmflags);
2048         if (connp->conn_ixa == NULL)
2049                 return (ENOMEM);
2050         connp->conn_ixa->ixa_refcnt = 1;
2051         connp->conn_ixa->ixa_protocol = connp->conn_proto;
2052         connp->conn_ixa->ixa_xmit_hint = CONN_TO_XMIT_HINT(connp);
2053         return (0);
2054 }
2055 
2056 /* ARGSUSED */
2057 static void
2058 udp_conn_destructor(void *buf, void *cdrarg)
2059 {
2060         itc_t   *itc = (itc_t *)buf;
2061         conn_t  *connp = &itc->itc_conn;
2062         udp_t   *udp = (udp_t *)&itc[1];
2063 
2064         ASSERT(connp->conn_flags & IPCL_UDPCONN);
2065         ASSERT(udp->udp_connp == connp);
2066         ASSERT(connp->conn_udp == udp);
2067         mutex_destroy(&connp->conn_lock);
2068         cv_destroy(&connp->conn_cv);
2069         rw_destroy(&connp->conn_ilg_lock);
2070 
2071         /* Can be NULL if constructor failed */
2072         if (connp->conn_ixa != NULL) {
2073                 ASSERT(connp->conn_ixa->ixa_refcnt == 1);
2074                 ASSERT(connp->conn_ixa->ixa_ire == NULL);
2075                 ASSERT(connp->conn_ixa->ixa_nce == NULL);
2076                 ixa_refrele(connp->conn_ixa);
2077         }
2078 }
2079 
2080 /* ARGSUSED */
2081 static int
2082 rawip_conn_constructor(void *buf, void *cdrarg, int kmflags)
2083 {
2084         itc_t   *itc = (itc_t *)buf;
2085         conn_t  *connp = &itc->itc_conn;
2086         icmp_t  *icmp = (icmp_t *)&itc[1];
2087 
2088         bzero(connp, sizeof (conn_t));
2089         bzero(icmp, sizeof (icmp_t));
2090 
2091         mutex_init(&connp->conn_lock, NULL, MUTEX_DEFAULT, NULL);
2092         cv_init(&connp->conn_cv, NULL, CV_DEFAULT, NULL);
2093         connp->conn_icmp = icmp;
2094         connp->conn_flags = IPCL_RAWIPCONN;
2095         connp->conn_proto = IPPROTO_ICMP;
2096         icmp->icmp_connp = connp;
2097         rw_init(&connp->conn_ilg_lock, NULL, RW_DEFAULT, NULL);
2098         connp->conn_ixa = kmem_zalloc(sizeof (ip_xmit_attr_t), kmflags);
2099         if (connp->conn_ixa == NULL)
2100                 return (ENOMEM);
2101         connp->conn_ixa->ixa_refcnt = 1;
2102         connp->conn_ixa->ixa_protocol = connp->conn_proto;
2103         connp->conn_ixa->ixa_xmit_hint = CONN_TO_XMIT_HINT(connp);
2104         return (0);
2105 }
2106 
2107 /* ARGSUSED */
2108 static void
2109 rawip_conn_destructor(void *buf, void *cdrarg)
2110 {
2111         itc_t   *itc = (itc_t *)buf;
2112         conn_t  *connp = &itc->itc_conn;
2113         icmp_t  *icmp = (icmp_t *)&itc[1];
2114 
2115         ASSERT(connp->conn_flags & IPCL_RAWIPCONN);
2116         ASSERT(icmp->icmp_connp == connp);
2117         ASSERT(connp->conn_icmp == icmp);
2118         mutex_destroy(&connp->conn_lock);
2119         cv_destroy(&connp->conn_cv);
2120         rw_destroy(&connp->conn_ilg_lock);
2121 
2122         /* Can be NULL if constructor failed */
2123         if (connp->conn_ixa != NULL) {
2124                 ASSERT(connp->conn_ixa->ixa_refcnt == 1);
2125                 ASSERT(connp->conn_ixa->ixa_ire == NULL);
2126                 ASSERT(connp->conn_ixa->ixa_nce == NULL);
2127                 ixa_refrele(connp->conn_ixa);
2128         }
2129 }
2130 
2131 /* ARGSUSED */
2132 static int
2133 rts_conn_constructor(void *buf, void *cdrarg, int kmflags)
2134 {
2135         itc_t   *itc = (itc_t *)buf;
2136         conn_t  *connp = &itc->itc_conn;
2137         rts_t   *rts = (rts_t *)&itc[1];
2138 
2139         bzero(connp, sizeof (conn_t));
2140         bzero(rts, sizeof (rts_t));
2141 
2142         mutex_init(&connp->conn_lock, NULL, MUTEX_DEFAULT, NULL);
2143         cv_init(&connp->conn_cv, NULL, CV_DEFAULT, NULL);
2144         connp->conn_rts = rts;
2145         connp->conn_flags = IPCL_RTSCONN;
2146         rts->rts_connp = connp;
2147         rw_init(&connp->conn_ilg_lock, NULL, RW_DEFAULT, NULL);
2148         connp->conn_ixa = kmem_zalloc(sizeof (ip_xmit_attr_t), kmflags);
2149         if (connp->conn_ixa == NULL)
2150                 return (ENOMEM);
2151         connp->conn_ixa->ixa_refcnt = 1;
2152         connp->conn_ixa->ixa_xmit_hint = CONN_TO_XMIT_HINT(connp);
2153         return (0);
2154 }
2155 
2156 /* ARGSUSED */
2157 static void
2158 rts_conn_destructor(void *buf, void *cdrarg)
2159 {
2160         itc_t   *itc = (itc_t *)buf;
2161         conn_t  *connp = &itc->itc_conn;
2162         rts_t   *rts = (rts_t *)&itc[1];
2163 
2164         ASSERT(connp->conn_flags & IPCL_RTSCONN);
2165         ASSERT(rts->rts_connp == connp);
2166         ASSERT(connp->conn_rts == rts);
2167         mutex_destroy(&connp->conn_lock);
2168         cv_destroy(&connp->conn_cv);
2169         rw_destroy(&connp->conn_ilg_lock);
2170 
2171         /* Can be NULL if constructor failed */
2172         if (connp->conn_ixa != NULL) {
2173                 ASSERT(connp->conn_ixa->ixa_refcnt == 1);
2174                 ASSERT(connp->conn_ixa->ixa_ire == NULL);
2175                 ASSERT(connp->conn_ixa->ixa_nce == NULL);
2176                 ixa_refrele(connp->conn_ixa);
2177         }
2178 }
2179 
2180 /*
2181  * Called as part of ipcl_conn_destroy to assert and clear any pointers
2182  * in the conn_t.
2183  *
2184  * Below we list all the pointers in the conn_t as a documentation aid.
2185  * The ones that we can not ASSERT to be NULL are #ifdef'ed out.
2186  * If you add any pointers to the conn_t please add an ASSERT here
2187  * and #ifdef it out if it can't be actually asserted to be NULL.
2188  * In any case, we bzero most of the conn_t at the end of the function.
2189  */
2190 void
2191 ipcl_conn_cleanup(conn_t *connp)
2192 {
2193         ip_xmit_attr_t  *ixa;
2194 
2195         ASSERT(connp->conn_latch == NULL);
2196         ASSERT(connp->conn_latch_in_policy == NULL);
2197         ASSERT(connp->conn_latch_in_action == NULL);
2198 #ifdef notdef
2199         ASSERT(connp->conn_rq == NULL);
2200         ASSERT(connp->conn_wq == NULL);
2201 #endif
2202         ASSERT(connp->conn_cred == NULL);
2203         ASSERT(connp->conn_g_fanout == NULL);
2204         ASSERT(connp->conn_g_next == NULL);
2205         ASSERT(connp->conn_g_prev == NULL);
2206         ASSERT(connp->conn_policy == NULL);
2207         ASSERT(connp->conn_fanout == NULL);
2208         ASSERT(connp->conn_next == NULL);
2209         ASSERT(connp->conn_prev == NULL);
2210         ASSERT(connp->conn_oper_pending_ill == NULL);
2211         ASSERT(connp->conn_ilg == NULL);
2212         ASSERT(connp->conn_drain_next == NULL);
2213         ASSERT(connp->conn_drain_prev == NULL);
2214 #ifdef notdef
2215         /* conn_idl is not cleared when removed from idl list */
2216         ASSERT(connp->conn_idl == NULL);
2217 #endif
2218         ASSERT(connp->conn_ipsec_opt_mp == NULL);
2219 #ifdef notdef
2220         /* conn_netstack is cleared by the caller; needed by ixa_cleanup */
2221         ASSERT(connp->conn_netstack == NULL);
2222 #endif
2223 
2224         ASSERT(connp->conn_helper_info == NULL);
2225         ASSERT(connp->conn_ixa != NULL);
2226         ixa = connp->conn_ixa;
2227         ASSERT(ixa->ixa_refcnt == 1);
2228         /* Need to preserve ixa_protocol */
2229         ixa_cleanup(ixa);
2230         ixa->ixa_flags = 0;
2231 
2232         /* Clear out the conn_t fields that are not preserved */
2233         bzero(&connp->conn_start_clr,
2234             sizeof (conn_t) -
2235             ((uchar_t *)&connp->conn_start_clr - (uchar_t *)connp));
2236 }
2237 
2238 /*
2239  * All conns are inserted in a global multi-list for the benefit of
2240  * walkers. The walk is guaranteed to walk all open conns at the time
2241  * of the start of the walk exactly once. This property is needed to
2242  * achieve some cleanups during unplumb of interfaces. This is achieved
2243  * as follows.
2244  *
2245  * ipcl_conn_create and ipcl_conn_destroy are the only functions that
2246  * call the insert and delete functions below at creation and deletion
2247  * time respectively. The conn never moves or changes its position in this
2248  * multi-list during its lifetime. CONN_CONDEMNED ensures that the refcnt
2249  * won't increase due to walkers, once the conn deletion has started. Note
2250  * that we can't remove the conn from the global list and then wait for
2251  * the refcnt to drop to zero, since walkers would then see a truncated
2252  * list. CONN_INCIPIENT ensures that walkers don't start looking at
2253  * conns until ip_open is ready to make them globally visible.
2254  * The global round robin multi-list locks are held only to get the
2255  * next member/insertion/deletion and contention should be negligible
2256  * if the multi-list is much greater than the number of cpus.
2257  */
2258 void
2259 ipcl_globalhash_insert(conn_t *connp)
2260 {
2261         int     index;
2262         struct connf_s  *connfp;
2263         ip_stack_t      *ipst = connp->conn_netstack->netstack_ip;
2264 
2265         /*
2266          * No need for atomic here. Approximate even distribution
2267          * in the global lists is sufficient.
2268          */
2269         ipst->ips_conn_g_index++;
2270         index = ipst->ips_conn_g_index & (CONN_G_HASH_SIZE - 1);
2271 
2272         connp->conn_g_prev = NULL;
2273         /*
2274          * Mark as INCIPIENT, so that walkers will ignore this
2275          * for now, till ip_open is ready to make it visible globally.
2276          */
2277         connp->conn_state_flags |= CONN_INCIPIENT;
2278 
2279         connfp = &ipst->ips_ipcl_globalhash_fanout[index];
2280         /* Insert at the head of the list */
2281         mutex_enter(&connfp->connf_lock);
2282         connp->conn_g_next = connfp->connf_head;
2283         if (connp->conn_g_next != NULL)
2284                 connp->conn_g_next->conn_g_prev = connp;
2285         connfp->connf_head = connp;
2286 
2287         /* The fanout bucket this conn points to */
2288         connp->conn_g_fanout = connfp;
2289 
2290         mutex_exit(&connfp->connf_lock);
2291 }
2292 
2293 void
2294 ipcl_globalhash_remove(conn_t *connp)
2295 {
2296         struct connf_s  *connfp;
2297 
2298         /*
2299          * We were never inserted in the global multi list.
2300          * IPCL_NONE variety is never inserted in the global multilist
2301          * since it is presumed to not need any cleanup and is transient.
2302          */
2303         if (connp->conn_g_fanout == NULL)
2304                 return;
2305 
2306         connfp = connp->conn_g_fanout;
2307         mutex_enter(&connfp->connf_lock);
2308         if (connp->conn_g_prev != NULL)
2309                 connp->conn_g_prev->conn_g_next = connp->conn_g_next;
2310         else
2311                 connfp->connf_head = connp->conn_g_next;
2312         if (connp->conn_g_next != NULL)
2313                 connp->conn_g_next->conn_g_prev = connp->conn_g_prev;
2314         mutex_exit(&connfp->connf_lock);
2315 
2316         /* Better to stumble on a null pointer than to corrupt memory */
2317         connp->conn_g_next = NULL;
2318         connp->conn_g_prev = NULL;
2319         connp->conn_g_fanout = NULL;
2320 }
2321 
2322 /*
2323  * Walk the list of all conn_t's in the system, calling the function provided
2324  * With the specified argument for each.
2325  * Applies to both IPv4 and IPv6.
2326  *
2327  * CONNs may hold pointers to ills (conn_dhcpinit_ill and
2328  * conn_oper_pending_ill). To guard against stale pointers
2329  * ipcl_walk() is called to cleanup the conn_t's, typically when an interface is
2330  * unplumbed or removed. New conn_t's that are created while we are walking
2331  * may be missed by this walk, because they are not necessarily inserted
2332  * at the tail of the list. They are new conn_t's and thus don't have any
2333  * stale pointers. The CONN_CLOSING flag ensures that no new reference
2334  * is created to the struct that is going away.
2335  */
2336 void
2337 ipcl_walk(pfv_t func, void *arg, ip_stack_t *ipst)
2338 {
2339         int     i;
2340         conn_t  *connp;
2341         conn_t  *prev_connp;
2342 
2343         for (i = 0; i < CONN_G_HASH_SIZE; i++) {
2344                 mutex_enter(&ipst->ips_ipcl_globalhash_fanout[i].connf_lock);
2345                 prev_connp = NULL;
2346                 connp = ipst->ips_ipcl_globalhash_fanout[i].connf_head;
2347                 while (connp != NULL) {
2348                         mutex_enter(&connp->conn_lock);
2349                         if (connp->conn_state_flags &
2350                             (CONN_CONDEMNED | CONN_INCIPIENT)) {
2351                                 mutex_exit(&connp->conn_lock);
2352                                 connp = connp->conn_g_next;
2353                                 continue;
2354                         }
2355                         CONN_INC_REF_LOCKED(connp);
2356                         mutex_exit(&connp->conn_lock);
2357                         mutex_exit(
2358                             &ipst->ips_ipcl_globalhash_fanout[i].connf_lock);
2359                         (*func)(connp, arg);
2360                         if (prev_connp != NULL)
2361                                 CONN_DEC_REF(prev_connp);
2362                         mutex_enter(
2363                             &ipst->ips_ipcl_globalhash_fanout[i].connf_lock);
2364                         prev_connp = connp;
2365                         connp = connp->conn_g_next;
2366                 }
2367                 mutex_exit(&ipst->ips_ipcl_globalhash_fanout[i].connf_lock);
2368                 if (prev_connp != NULL)
2369                         CONN_DEC_REF(prev_connp);
2370         }
2371 }
2372 
2373 /*
2374  * Search for a peer TCP/IPv4 loopback conn by doing a reverse lookup on
2375  * the {src, dst, lport, fport} quadruplet.  Returns with conn reference
2376  * held; caller must call CONN_DEC_REF.  Only checks for connected entries
2377  * (peer tcp in ESTABLISHED state).
2378  */
2379 conn_t *
2380 ipcl_conn_tcp_lookup_reversed_ipv4(conn_t *connp, ipha_t *ipha, tcpha_t *tcpha,
2381     ip_stack_t *ipst)
2382 {
2383         uint32_t ports;
2384         uint16_t *pports = (uint16_t *)&ports;
2385         connf_t *connfp;
2386         conn_t  *tconnp;
2387         boolean_t zone_chk;
2388 
2389         /*
2390          * If either the source of destination address is loopback, then
2391          * both endpoints must be in the same Zone.  Otherwise, both of
2392          * the addresses are system-wide unique (tcp is in ESTABLISHED
2393          * state) and the endpoints may reside in different Zones.
2394          */
2395         zone_chk = (ipha->ipha_src == htonl(INADDR_LOOPBACK) ||
2396             ipha->ipha_dst == htonl(INADDR_LOOPBACK));
2397 
2398         pports[0] = tcpha->tha_fport;
2399         pports[1] = tcpha->tha_lport;
2400 
2401         connfp = &ipst->ips_ipcl_conn_fanout[IPCL_CONN_HASH(ipha->ipha_dst,
2402             ports, ipst)];
2403 
2404         mutex_enter(&connfp->connf_lock);
2405         for (tconnp = connfp->connf_head; tconnp != NULL;
2406             tconnp = tconnp->conn_next) {
2407 
2408                 if (IPCL_CONN_MATCH(tconnp, IPPROTO_TCP,
2409                     ipha->ipha_dst, ipha->ipha_src, ports) &&
2410                     tconnp->conn_tcp->tcp_state == TCPS_ESTABLISHED &&
2411                     (!zone_chk || tconnp->conn_zoneid == connp->conn_zoneid)) {
2412 
2413                         ASSERT(tconnp != connp);
2414                         CONN_INC_REF(tconnp);
2415                         mutex_exit(&connfp->connf_lock);
2416                         return (tconnp);
2417                 }
2418         }
2419         mutex_exit(&connfp->connf_lock);
2420         return (NULL);
2421 }
2422 
2423 /*
2424  * Search for a peer TCP/IPv6 loopback conn by doing a reverse lookup on
2425  * the {src, dst, lport, fport} quadruplet.  Returns with conn reference
2426  * held; caller must call CONN_DEC_REF.  Only checks for connected entries
2427  * (peer tcp in ESTABLISHED state).
2428  */
2429 conn_t *
2430 ipcl_conn_tcp_lookup_reversed_ipv6(conn_t *connp, ip6_t *ip6h, tcpha_t *tcpha,
2431     ip_stack_t *ipst)
2432 {
2433         uint32_t ports;
2434         uint16_t *pports = (uint16_t *)&ports;
2435         connf_t *connfp;
2436         conn_t  *tconnp;
2437         boolean_t zone_chk;
2438 
2439         /*
2440          * If either the source of destination address is loopback, then
2441          * both endpoints must be in the same Zone.  Otherwise, both of
2442          * the addresses are system-wide unique (tcp is in ESTABLISHED
2443          * state) and the endpoints may reside in different Zones.  We
2444          * don't do Zone check for link local address(es) because the
2445          * current Zone implementation treats each link local address as
2446          * being unique per system node, i.e. they belong to global Zone.
2447          */
2448         zone_chk = (IN6_IS_ADDR_LOOPBACK(&ip6h->ip6_src) ||
2449             IN6_IS_ADDR_LOOPBACK(&ip6h->ip6_dst));
2450 
2451         pports[0] = tcpha->tha_fport;
2452         pports[1] = tcpha->tha_lport;
2453 
2454         connfp = &ipst->ips_ipcl_conn_fanout[IPCL_CONN_HASH_V6(ip6h->ip6_dst,
2455             ports, ipst)];
2456 
2457         mutex_enter(&connfp->connf_lock);
2458         for (tconnp = connfp->connf_head; tconnp != NULL;
2459             tconnp = tconnp->conn_next) {
2460 
2461                 /* We skip conn_bound_if check here as this is loopback tcp */
2462                 if (IPCL_CONN_MATCH_V6(tconnp, IPPROTO_TCP,
2463                     ip6h->ip6_dst, ip6h->ip6_src, ports) &&
2464                     tconnp->conn_tcp->tcp_state == TCPS_ESTABLISHED &&
2465                     (!zone_chk || tconnp->conn_zoneid == connp->conn_zoneid)) {
2466 
2467                         ASSERT(tconnp != connp);
2468                         CONN_INC_REF(tconnp);
2469                         mutex_exit(&connfp->connf_lock);
2470                         return (tconnp);
2471                 }
2472         }
2473         mutex_exit(&connfp->connf_lock);
2474         return (NULL);
2475 }
2476 
2477 /*
2478  * Find an exact {src, dst, lport, fport} match for a bounced datagram.
2479  * Returns with conn reference held. Caller must call CONN_DEC_REF.
2480  * Only checks for connected entries i.e. no INADDR_ANY checks.
2481  */
2482 conn_t *
2483 ipcl_tcp_lookup_reversed_ipv4(ipha_t *ipha, tcpha_t *tcpha, int min_state,
2484     ip_stack_t *ipst)
2485 {
2486         uint32_t ports;
2487         uint16_t *pports;
2488         connf_t *connfp;
2489         conn_t  *tconnp;
2490 
2491         pports = (uint16_t *)&ports;
2492         pports[0] = tcpha->tha_fport;
2493         pports[1] = tcpha->tha_lport;
2494 
2495         connfp = &ipst->ips_ipcl_conn_fanout[IPCL_CONN_HASH(ipha->ipha_dst,
2496             ports, ipst)];
2497 
2498         mutex_enter(&connfp->connf_lock);
2499         for (tconnp = connfp->connf_head; tconnp != NULL;
2500             tconnp = tconnp->conn_next) {
2501 
2502                 if (IPCL_CONN_MATCH(tconnp, IPPROTO_TCP,
2503                     ipha->ipha_dst, ipha->ipha_src, ports) &&
2504                     tconnp->conn_tcp->tcp_state >= min_state) {
2505 
2506                         CONN_INC_REF(tconnp);
2507                         mutex_exit(&connfp->connf_lock);
2508                         return (tconnp);
2509                 }
2510         }
2511         mutex_exit(&connfp->connf_lock);
2512         return (NULL);
2513 }
2514 
2515 /*
2516  * Find an exact {src, dst, lport, fport} match for a bounced datagram.
2517  * Returns with conn reference held. Caller must call CONN_DEC_REF.
2518  * Only checks for connected entries i.e. no INADDR_ANY checks.
2519  * Match on ifindex in addition to addresses.
2520  */
2521 conn_t *
2522 ipcl_tcp_lookup_reversed_ipv6(ip6_t *ip6h, tcpha_t *tcpha, int min_state,
2523     uint_t ifindex, ip_stack_t *ipst)
2524 {
2525         tcp_t   *tcp;
2526         uint32_t ports;
2527         uint16_t *pports;
2528         connf_t *connfp;
2529         conn_t  *tconnp;
2530 
2531         pports = (uint16_t *)&ports;
2532         pports[0] = tcpha->tha_fport;
2533         pports[1] = tcpha->tha_lport;
2534 
2535         connfp = &ipst->ips_ipcl_conn_fanout[IPCL_CONN_HASH_V6(ip6h->ip6_dst,
2536             ports, ipst)];
2537 
2538         mutex_enter(&connfp->connf_lock);
2539         for (tconnp = connfp->connf_head; tconnp != NULL;
2540             tconnp = tconnp->conn_next) {
2541 
2542                 tcp = tconnp->conn_tcp;
2543                 if (IPCL_CONN_MATCH_V6(tconnp, IPPROTO_TCP,
2544                     ip6h->ip6_dst, ip6h->ip6_src, ports) &&
2545                     tcp->tcp_state >= min_state &&
2546                     (tconnp->conn_bound_if == 0 ||
2547                     tconnp->conn_bound_if == ifindex)) {
2548 
2549                         CONN_INC_REF(tconnp);
2550                         mutex_exit(&connfp->connf_lock);
2551                         return (tconnp);
2552                 }
2553         }
2554         mutex_exit(&connfp->connf_lock);
2555         return (NULL);
2556 }
2557 
2558 /*
2559  * Finds a TCP/IPv4 listening connection; called by tcp_disconnect to locate
2560  * a listener when changing state.
2561  */
2562 conn_t *
2563 ipcl_lookup_listener_v4(uint16_t lport, ipaddr_t laddr, zoneid_t zoneid,
2564     ip_stack_t *ipst)
2565 {
2566         connf_t         *bind_connfp;
2567         conn_t          *connp;
2568         tcp_t           *tcp;
2569 
2570         /*
2571          * Avoid false matches for packets sent to an IP destination of
2572          * all zeros.
2573          */
2574         if (laddr == 0)
2575                 return (NULL);
2576 
2577         ASSERT(zoneid != ALL_ZONES);
2578 
2579         bind_connfp = &ipst->ips_ipcl_bind_fanout[IPCL_BIND_HASH(lport, ipst)];
2580         mutex_enter(&bind_connfp->connf_lock);
2581         for (connp = bind_connfp->connf_head; connp != NULL;
2582             connp = connp->conn_next) {
2583                 tcp = connp->conn_tcp;
2584                 if (IPCL_BIND_MATCH(connp, IPPROTO_TCP, laddr, lport) &&
2585                     IPCL_ZONE_MATCH(connp, zoneid) &&
2586                     (tcp->tcp_listener == NULL)) {
2587                         CONN_INC_REF(connp);
2588                         mutex_exit(&bind_connfp->connf_lock);
2589                         return (connp);
2590                 }
2591         }
2592         mutex_exit(&bind_connfp->connf_lock);
2593         return (NULL);
2594 }
2595 
2596 /*
2597  * Finds a TCP/IPv6 listening connection; called by tcp_disconnect to locate
2598  * a listener when changing state.
2599  */
2600 conn_t *
2601 ipcl_lookup_listener_v6(uint16_t lport, in6_addr_t *laddr, uint_t ifindex,
2602     zoneid_t zoneid, ip_stack_t *ipst)
2603 {
2604         connf_t         *bind_connfp;
2605         conn_t          *connp = NULL;
2606         tcp_t           *tcp;
2607 
2608         /*
2609          * Avoid false matches for packets sent to an IP destination of
2610          * all zeros.
2611          */
2612         if (IN6_IS_ADDR_UNSPECIFIED(laddr))
2613                 return (NULL);
2614 
2615         ASSERT(zoneid != ALL_ZONES);
2616 
2617         bind_connfp = &ipst->ips_ipcl_bind_fanout[IPCL_BIND_HASH(lport, ipst)];
2618         mutex_enter(&bind_connfp->connf_lock);
2619         for (connp = bind_connfp->connf_head; connp != NULL;
2620             connp = connp->conn_next) {
2621                 tcp = connp->conn_tcp;
2622                 if (IPCL_BIND_MATCH_V6(connp, IPPROTO_TCP, *laddr, lport) &&
2623                     IPCL_ZONE_MATCH(connp, zoneid) &&
2624                     (connp->conn_bound_if == 0 ||
2625                     connp->conn_bound_if == ifindex) &&
2626                     tcp->tcp_listener == NULL) {
2627                         CONN_INC_REF(connp);
2628                         mutex_exit(&bind_connfp->connf_lock);
2629                         return (connp);
2630                 }
2631         }
2632         mutex_exit(&bind_connfp->connf_lock);
2633         return (NULL);
2634 }
2635 
2636 /*
2637  * ipcl_get_next_conn
2638  *      get the next entry in the conn global list
2639  *      and put a reference on the next_conn.
2640  *      decrement the reference on the current conn.
2641  *
2642  * This is an iterator based walker function that also provides for
2643  * some selection by the caller. It walks through the conn_hash bucket
2644  * searching for the next valid connp in the list, and selects connections
2645  * that are neither closed nor condemned. It also REFHOLDS the conn
2646  * thus ensuring that the conn exists when the caller uses the conn.
2647  */
2648 conn_t *
2649 ipcl_get_next_conn(connf_t *connfp, conn_t *connp, uint32_t conn_flags)
2650 {
2651         conn_t  *next_connp;
2652 
2653         if (connfp == NULL)
2654                 return (NULL);
2655 
2656         mutex_enter(&connfp->connf_lock);
2657 
2658         next_connp = (connp == NULL) ?
2659             connfp->connf_head : connp->conn_g_next;
2660 
2661         while (next_connp != NULL) {
2662                 mutex_enter(&next_connp->conn_lock);
2663                 if (!(next_connp->conn_flags & conn_flags) ||
2664                     (next_connp->conn_state_flags &
2665                     (CONN_CONDEMNED | CONN_INCIPIENT))) {
2666                         /*
2667                          * This conn has been condemned or
2668                          * is closing, or the flags don't match
2669                          */
2670                         mutex_exit(&next_connp->conn_lock);
2671                         next_connp = next_connp->conn_g_next;
2672                         continue;
2673                 }
2674                 CONN_INC_REF_LOCKED(next_connp);
2675                 mutex_exit(&next_connp->conn_lock);
2676                 break;
2677         }
2678 
2679         mutex_exit(&connfp->connf_lock);
2680 
2681         if (connp != NULL)
2682                 CONN_DEC_REF(connp);
2683 
2684         return (next_connp);
2685 }
2686 
2687 #ifdef CONN_DEBUG
2688 /*
2689  * Trace of the last NBUF refhold/refrele
2690  */
2691 int
2692 conn_trace_ref(conn_t *connp)
2693 {
2694         int     last;
2695         conn_trace_t    *ctb;
2696 
2697         ASSERT(MUTEX_HELD(&connp->conn_lock));
2698         last = connp->conn_trace_last;
2699         last++;
2700         if (last == CONN_TRACE_MAX)
2701                 last = 0;
2702 
2703         ctb = &connp->conn_trace_buf[last];
2704         ctb->ctb_depth = getpcstack(ctb->ctb_stack, CONN_STACK_DEPTH);
2705         connp->conn_trace_last = last;
2706         return (1);
2707 }
2708 
2709 int
2710 conn_untrace_ref(conn_t *connp)
2711 {
2712         int     last;
2713         conn_trace_t    *ctb;
2714 
2715         ASSERT(MUTEX_HELD(&connp->conn_lock));
2716         last = connp->conn_trace_last;
2717         last++;
2718         if (last == CONN_TRACE_MAX)
2719                 last = 0;
2720 
2721         ctb = &connp->conn_trace_buf[last];
2722         ctb->ctb_depth = getpcstack(ctb->ctb_stack, CONN_STACK_DEPTH);
2723         connp->conn_trace_last = last;
2724         return (1);
2725 }
2726 #endif
2727 
2728 mib2_socketInfoEntry_t *
2729 conn_get_socket_info(conn_t *connp, mib2_socketInfoEntry_t *sie)
2730 {
2731         vnode_t *vn = NULL;
2732         vattr_t attr;
2733         uint64_t flags = 0;
2734 
2735         /*
2736          * If the connection is closing, it is not safe to make an upcall or
2737          * access the stream associated with the connection.
2738          * The callers of this function have a reference on connp itself
2739          * so, as long as it is not closing, it's safe to continue.
2740          */
2741         mutex_enter(&connp->conn_lock);
2742 
2743         if ((connp->conn_state_flags & CONN_CLOSING)) {
2744                 mutex_exit(&connp->conn_lock);
2745                 return (NULL);
2746         }
2747 
2748         mutex_exit(&connp->conn_lock);
2749 
2750         if (connp->conn_upper_handle != NULL) {
2751                 vn = (*connp->conn_upcalls->su_get_vnode)
2752                     (connp->conn_upper_handle);
2753         } else if (!IPCL_IS_NONSTR(connp) && connp->conn_rq != NULL) {
2754                 vn = STREAM(connp->conn_rq)->sd_pvnode;
2755                 if (vn != NULL)
2756                         VN_HOLD(vn);
2757                 flags |= MIB2_SOCKINFO_STREAM;
2758         }
2759 
2760         if (vn == NULL || VOP_GETATTR(vn, &attr, 0, CRED(), NULL) != 0) {
2761                 if (vn != NULL)
2762                         VN_RELE(vn);
2763                 return (NULL);
2764         }
2765 
2766         VN_RELE(vn);
2767 
2768         bzero(sie, sizeof (*sie));
2769 
2770         sie->sie_flags = flags;
2771         sie->sie_inode = attr.va_nodeid;
2772         sie->sie_dev = attr.va_rdev;
2773 
2774         return (sie);
2775 }