1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
  23  * Copyright 2016 Joyent, Inc.
  24  */
  25 
  26 /*
  27  * IP PACKET CLASSIFIER
  28  *
  29  * The IP packet classifier provides mapping between IP packets and persistent
  30  * connection state for connection-oriented protocols. It also provides
  31  * interface for managing connection states.
  32  *
  33  * The connection state is kept in conn_t data structure and contains, among
  34  * other things:
  35  *
  36  *      o local/remote address and ports
  37  *      o Transport protocol
  38  *      o squeue for the connection (for TCP only)
  39  *      o reference counter
  40  *      o Connection state
  41  *      o hash table linkage
  42  *      o interface/ire information
  43  *      o credentials
  44  *      o ipsec policy
  45  *      o send and receive functions.
  46  *      o mutex lock.
  47  *
  48  * Connections use a reference counting scheme. They are freed when the
  49  * reference counter drops to zero. A reference is incremented when connection
  50  * is placed in a list or table, when incoming packet for the connection arrives
  51  * and when connection is processed via squeue (squeue processing may be
  52  * asynchronous and the reference protects the connection from being destroyed
  53  * before its processing is finished).
  54  *
  55  * conn_recv is used to pass up packets to the ULP.
  56  * For TCP conn_recv changes. It is tcp_input_listener_unbound initially for
  57  * a listener, and changes to tcp_input_listener as the listener has picked a
  58  * good squeue. For other cases it is set to tcp_input_data.
  59  *
  60  * conn_recvicmp is used to pass up ICMP errors to the ULP.
  61  *
  62  * Classifier uses several hash tables:
  63  *
  64  *      ipcl_conn_fanout:       contains all TCP connections in CONNECTED state
  65  *      ipcl_bind_fanout:       contains all connections in BOUND state
  66  *      ipcl_proto_fanout:      IPv4 protocol fanout
  67  *      ipcl_proto_fanout_v6:   IPv6 protocol fanout
  68  *      ipcl_udp_fanout:        contains all UDP connections
  69  *      ipcl_iptun_fanout:      contains all IP tunnel connections
  70  *      ipcl_globalhash_fanout: contains all connections
  71  *
  72  * The ipcl_globalhash_fanout is used for any walkers (like snmp and Clustering)
  73  * which need to view all existing connections.
  74  *
  75  * All tables are protected by per-bucket locks. When both per-bucket lock and
  76  * connection lock need to be held, the per-bucket lock should be acquired
  77  * first, followed by the connection lock.
  78  *
  79  * All functions doing search in one of these tables increment a reference
  80  * counter on the connection found (if any). This reference should be dropped
  81  * when the caller has finished processing the connection.
  82  *
  83  *
  84  * INTERFACES:
  85  * ===========
  86  *
  87  * Connection Lookup:
  88  * ------------------
  89  *
  90  * conn_t *ipcl_classify_v4(mp, protocol, hdr_len, ira, ip_stack)
  91  * conn_t *ipcl_classify_v6(mp, protocol, hdr_len, ira, ip_stack)
  92  *
  93  * Finds connection for an incoming IPv4 or IPv6 packet. Returns NULL if
  94  * it can't find any associated connection. If the connection is found, its
  95  * reference counter is incremented.
  96  *
  97  *      mp:     mblock, containing packet header. The full header should fit
  98  *              into a single mblock. It should also contain at least full IP
  99  *              and TCP or UDP header.
 100  *
 101  *      protocol: Either IPPROTO_TCP or IPPROTO_UDP.
 102  *
 103  *      hdr_len: The size of IP header. It is used to find TCP or UDP header in
 104  *               the packet.
 105  *
 106  *      ira->ira_zoneid: The zone in which the returned connection must be; the
 107  *              zoneid corresponding to the ire_zoneid on the IRE located for
 108  *              the packet's destination address.
 109  *
 110  *      ira->ira_flags: Contains the IRAF_TX_MAC_EXEMPTABLE and
 111  *              IRAF_TX_SHARED_ADDR flags
 112  *
 113  *      For TCP connections, the lookup order is as follows:
 114  *              5-tuple {src, dst, protocol, local port, remote port}
 115  *                      lookup in ipcl_conn_fanout table.
 116  *              3-tuple {dst, remote port, protocol} lookup in
 117  *                      ipcl_bind_fanout table.
 118  *
 119  *      For UDP connections, a 5-tuple {src, dst, protocol, local port,
 120  *      remote port} lookup is done on ipcl_udp_fanout. Note that,
 121  *      these interfaces do not handle cases where a packets belongs
 122  *      to multiple UDP clients, which is handled in IP itself.
 123  *
 124  * If the destination IRE is ALL_ZONES (indicated by zoneid), then we must
 125  * determine which actual zone gets the segment.  This is used only in a
 126  * labeled environment.  The matching rules are:
 127  *
 128  *      - If it's not a multilevel port, then the label on the packet selects
 129  *        the zone.  Unlabeled packets are delivered to the global zone.
 130  *
 131  *      - If it's a multilevel port, then only the zone registered to receive
 132  *        packets on that port matches.
 133  *
 134  * Also, in a labeled environment, packet labels need to be checked.  For fully
 135  * bound TCP connections, we can assume that the packet label was checked
 136  * during connection establishment, and doesn't need to be checked on each
 137  * packet.  For others, though, we need to check for strict equality or, for
 138  * multilevel ports, membership in the range or set.  This part currently does
 139  * a tnrh lookup on each packet, but could be optimized to use cached results
 140  * if that were necessary.  (SCTP doesn't come through here, but if it did,
 141  * we would apply the same rules as TCP.)
 142  *
 143  * An implication of the above is that fully-bound TCP sockets must always use
 144  * distinct 4-tuples; they can't be discriminated by label alone.
 145  *
 146  * Note that we cannot trust labels on packets sent to fully-bound UDP sockets,
 147  * as there's no connection set-up handshake and no shared state.
 148  *
 149  * Labels on looped-back packets within a single zone do not need to be
 150  * checked, as all processes in the same zone have the same label.
 151  *
 152  * Finally, for unlabeled packets received by a labeled system, special rules
 153  * apply.  We consider only the MLP if there is one.  Otherwise, we prefer a
 154  * socket in the zone whose label matches the default label of the sender, if
 155  * any.  In any event, the receiving socket must have SO_MAC_EXEMPT set and the
 156  * receiver's label must dominate the sender's default label.
 157  *
 158  * conn_t *ipcl_tcp_lookup_reversed_ipv4(ipha_t *, tcpha_t *, int, ip_stack);
 159  * conn_t *ipcl_tcp_lookup_reversed_ipv6(ip6_t *, tcpha_t *, int, uint_t,
 160  *                                       ip_stack);
 161  *
 162  *      Lookup routine to find a exact match for {src, dst, local port,
 163  *      remote port) for TCP connections in ipcl_conn_fanout. The address and
 164  *      ports are read from the IP and TCP header respectively.
 165  *
 166  * conn_t       *ipcl_lookup_listener_v4(lport, laddr, protocol,
 167  *                                       zoneid, ip_stack);
 168  * conn_t       *ipcl_lookup_listener_v6(lport, laddr, protocol, ifindex,
 169  *                                       zoneid, ip_stack);
 170  *
 171  *      Lookup routine to find a listener with the tuple {lport, laddr,
 172  *      protocol} in the ipcl_bind_fanout table. For IPv6, an additional
 173  *      parameter interface index is also compared.
 174  *
 175  * void ipcl_walk(func, arg, ip_stack)
 176  *
 177  *      Apply 'func' to every connection available. The 'func' is called as
 178  *      (*func)(connp, arg). The walk is non-atomic so connections may be
 179  *      created and destroyed during the walk. The CONN_CONDEMNED and
 180  *      CONN_INCIPIENT flags ensure that connections which are newly created
 181  *      or being destroyed are not selected by the walker.
 182  *
 183  * Table Updates
 184  * -------------
 185  *
 186  * int ipcl_conn_insert(connp);
 187  * int ipcl_conn_insert_v4(connp);
 188  * int ipcl_conn_insert_v6(connp);
 189  *
 190  *      Insert 'connp' in the ipcl_conn_fanout.
 191  *      Arguements :
 192  *              connp           conn_t to be inserted
 193  *
 194  *      Return value :
 195  *              0               if connp was inserted
 196  *              EADDRINUSE      if the connection with the same tuple
 197  *                              already exists.
 198  *
 199  * int ipcl_bind_insert(connp);
 200  * int ipcl_bind_insert_v4(connp);
 201  * int ipcl_bind_insert_v6(connp);
 202  *
 203  *      Insert 'connp' in ipcl_bind_fanout.
 204  *      Arguements :
 205  *              connp           conn_t to be inserted
 206  *
 207  *
 208  * void ipcl_hash_remove(connp);
 209  *
 210  *      Removes the 'connp' from the connection fanout table.
 211  *
 212  * Connection Creation/Destruction
 213  * -------------------------------
 214  *
 215  * conn_t *ipcl_conn_create(type, sleep, netstack_t *)
 216  *
 217  *      Creates a new conn based on the type flag, inserts it into
 218  *      globalhash table.
 219  *
 220  *      type:   This flag determines the type of conn_t which needs to be
 221  *              created i.e., which kmem_cache it comes from.
 222  *              IPCL_TCPCONN    indicates a TCP connection
 223  *              IPCL_SCTPCONN   indicates a SCTP connection
 224  *              IPCL_UDPCONN    indicates a UDP conn_t.
 225  *              IPCL_RAWIPCONN  indicates a RAWIP/ICMP conn_t.
 226  *              IPCL_RTSCONN    indicates a RTS conn_t.
 227  *              IPCL_IPCCONN    indicates all other connections.
 228  *
 229  * void ipcl_conn_destroy(connp)
 230  *
 231  *      Destroys the connection state, removes it from the global
 232  *      connection hash table and frees its memory.
 233  */
 234 
 235 #include <sys/types.h>
 236 #include <sys/stream.h>
 237 #include <sys/stropts.h>
 238 #include <sys/sysmacros.h>
 239 #include <sys/strsubr.h>
 240 #include <sys/strsun.h>
 241 #define _SUN_TPI_VERSION 2
 242 #include <sys/ddi.h>
 243 #include <sys/cmn_err.h>
 244 #include <sys/debug.h>
 245 
 246 #include <sys/systm.h>
 247 #include <sys/param.h>
 248 #include <sys/kmem.h>
 249 #include <sys/isa_defs.h>
 250 #include <inet/common.h>
 251 #include <netinet/ip6.h>
 252 #include <netinet/icmp6.h>
 253 
 254 #include <inet/ip.h>
 255 #include <inet/ip_if.h>
 256 #include <inet/ip_ire.h>
 257 #include <inet/ip6.h>
 258 #include <inet/ip_ndp.h>
 259 #include <inet/ip_impl.h>
 260 #include <inet/udp_impl.h>
 261 #include <inet/sctp_ip.h>
 262 #include <inet/sctp/sctp_impl.h>
 263 #include <inet/rawip_impl.h>
 264 #include <inet/rts_impl.h>
 265 #include <inet/iptun/iptun_impl.h>
 266 
 267 #include <sys/cpuvar.h>
 268 
 269 #include <inet/ipclassifier.h>
 270 #include <inet/tcp.h>
 271 #include <inet/ipsec_impl.h>
 272 
 273 #include <sys/tsol/tnet.h>
 274 #include <sys/sockio.h>
 275 
 276 /* Old value for compatibility. Setable in /etc/system */
 277 uint_t tcp_conn_hash_size = 0;
 278 
 279 /* New value. Zero means choose automatically.  Setable in /etc/system */
 280 uint_t ipcl_conn_hash_size = 0;
 281 uint_t ipcl_conn_hash_memfactor = 8192;
 282 uint_t ipcl_conn_hash_maxsize = 82500;
 283 
 284 /* bind/udp fanout table size */
 285 uint_t ipcl_bind_fanout_size = 512;
 286 uint_t ipcl_udp_fanout_size = 16384;
 287 
 288 /* Raw socket fanout size.  Must be a power of 2. */
 289 uint_t ipcl_raw_fanout_size = 256;
 290 
 291 /*
 292  * The IPCL_IPTUN_HASH() function works best with a prime table size.  We
 293  * expect that most large deployments would have hundreds of tunnels, and
 294  * thousands in the extreme case.
 295  */
 296 uint_t ipcl_iptun_fanout_size = 6143;
 297 
 298 /*
 299  * Power of 2^N Primes useful for hashing for N of 0-28,
 300  * these primes are the nearest prime <= 2^N - 2^(N-2).
 301  */
 302 
 303 #define P2Ps() {0, 0, 0, 5, 11, 23, 47, 89, 191, 383, 761, 1531, 3067,  \
 304                 6143, 12281, 24571, 49139, 98299, 196597, 393209,       \
 305                 786431, 1572853, 3145721, 6291449, 12582893, 25165813,  \
 306                 50331599, 100663291, 201326557, 0}
 307 
 308 /*
 309  * wrapper structure to ensure that conn and what follows it (tcp_t, etc)
 310  * are aligned on cache lines.
 311  */
 312 typedef union itc_s {
 313         conn_t  itc_conn;
 314         char    itcu_filler[CACHE_ALIGN(conn_s)];
 315 } itc_t;
 316 
 317 struct kmem_cache  *tcp_conn_cache;
 318 struct kmem_cache  *ip_conn_cache;
 319 extern struct kmem_cache  *sctp_conn_cache;
 320 struct kmem_cache  *udp_conn_cache;
 321 struct kmem_cache  *rawip_conn_cache;
 322 struct kmem_cache  *rts_conn_cache;
 323 
 324 extern void     tcp_timermp_free(tcp_t *);
 325 extern mblk_t   *tcp_timermp_alloc(int);
 326 
 327 static int      ip_conn_constructor(void *, void *, int);
 328 static void     ip_conn_destructor(void *, void *);
 329 
 330 static int      tcp_conn_constructor(void *, void *, int);
 331 static void     tcp_conn_destructor(void *, void *);
 332 
 333 static int      udp_conn_constructor(void *, void *, int);
 334 static void     udp_conn_destructor(void *, void *);
 335 
 336 static int      rawip_conn_constructor(void *, void *, int);
 337 static void     rawip_conn_destructor(void *, void *);
 338 
 339 static int      rts_conn_constructor(void *, void *, int);
 340 static void     rts_conn_destructor(void *, void *);
 341 
 342 /*
 343  * Global (for all stack instances) init routine
 344  */
 345 void
 346 ipcl_g_init(void)
 347 {
 348         ip_conn_cache = kmem_cache_create("ip_conn_cache",
 349             sizeof (conn_t), CACHE_ALIGN_SIZE,
 350             ip_conn_constructor, ip_conn_destructor,
 351             NULL, NULL, NULL, 0);
 352 
 353         tcp_conn_cache = kmem_cache_create("tcp_conn_cache",
 354             sizeof (itc_t) + sizeof (tcp_t), CACHE_ALIGN_SIZE,
 355             tcp_conn_constructor, tcp_conn_destructor,
 356             tcp_conn_reclaim, NULL, NULL, 0);
 357 
 358         udp_conn_cache = kmem_cache_create("udp_conn_cache",
 359             sizeof (itc_t) + sizeof (udp_t), CACHE_ALIGN_SIZE,
 360             udp_conn_constructor, udp_conn_destructor,
 361             NULL, NULL, NULL, 0);
 362 
 363         rawip_conn_cache = kmem_cache_create("rawip_conn_cache",
 364             sizeof (itc_t) + sizeof (icmp_t), CACHE_ALIGN_SIZE,
 365             rawip_conn_constructor, rawip_conn_destructor,
 366             NULL, NULL, NULL, 0);
 367 
 368         rts_conn_cache = kmem_cache_create("rts_conn_cache",
 369             sizeof (itc_t) + sizeof (rts_t), CACHE_ALIGN_SIZE,
 370             rts_conn_constructor, rts_conn_destructor,
 371             NULL, NULL, NULL, 0);
 372 }
 373 
 374 /*
 375  * ipclassifier intialization routine, sets up hash tables.
 376  */
 377 void
 378 ipcl_init(ip_stack_t *ipst)
 379 {
 380         int i;
 381         int sizes[] = P2Ps();
 382 
 383         /*
 384          * Calculate size of conn fanout table from /etc/system settings
 385          */
 386         if (ipcl_conn_hash_size != 0) {
 387                 ipst->ips_ipcl_conn_fanout_size = ipcl_conn_hash_size;
 388         } else if (tcp_conn_hash_size != 0) {
 389                 ipst->ips_ipcl_conn_fanout_size = tcp_conn_hash_size;
 390         } else {
 391                 extern pgcnt_t freemem;
 392 
 393                 ipst->ips_ipcl_conn_fanout_size =
 394                     (freemem * PAGESIZE) / ipcl_conn_hash_memfactor;
 395 
 396                 if (ipst->ips_ipcl_conn_fanout_size > ipcl_conn_hash_maxsize) {
 397                         ipst->ips_ipcl_conn_fanout_size =
 398                             ipcl_conn_hash_maxsize;
 399                 }
 400         }
 401 
 402         for (i = 9; i < sizeof (sizes) / sizeof (*sizes) - 1; i++) {
 403                 if (sizes[i] >= ipst->ips_ipcl_conn_fanout_size) {
 404                         break;
 405                 }
 406         }
 407         if ((ipst->ips_ipcl_conn_fanout_size = sizes[i]) == 0) {
 408                 /* Out of range, use the 2^16 value */
 409                 ipst->ips_ipcl_conn_fanout_size = sizes[16];
 410         }
 411 
 412         /* Take values from /etc/system */
 413         ipst->ips_ipcl_bind_fanout_size = ipcl_bind_fanout_size;
 414         ipst->ips_ipcl_udp_fanout_size = ipcl_udp_fanout_size;
 415         ipst->ips_ipcl_raw_fanout_size = ipcl_raw_fanout_size;
 416         ipst->ips_ipcl_iptun_fanout_size = ipcl_iptun_fanout_size;
 417 
 418         ASSERT(ipst->ips_ipcl_conn_fanout == NULL);
 419 
 420         ipst->ips_ipcl_conn_fanout = kmem_zalloc(
 421             ipst->ips_ipcl_conn_fanout_size * sizeof (connf_t), KM_SLEEP);
 422 
 423         for (i = 0; i < ipst->ips_ipcl_conn_fanout_size; i++) {
 424                 mutex_init(&ipst->ips_ipcl_conn_fanout[i].connf_lock, NULL,
 425                     MUTEX_DEFAULT, NULL);
 426         }
 427 
 428         ipst->ips_ipcl_bind_fanout = kmem_zalloc(
 429             ipst->ips_ipcl_bind_fanout_size * sizeof (connf_t), KM_SLEEP);
 430 
 431         for (i = 0; i < ipst->ips_ipcl_bind_fanout_size; i++) {
 432                 mutex_init(&ipst->ips_ipcl_bind_fanout[i].connf_lock, NULL,
 433                     MUTEX_DEFAULT, NULL);
 434         }
 435 
 436         ipst->ips_ipcl_proto_fanout_v4 = kmem_zalloc(IPPROTO_MAX *
 437             sizeof (connf_t), KM_SLEEP);
 438         for (i = 0; i < IPPROTO_MAX; i++) {
 439                 mutex_init(&ipst->ips_ipcl_proto_fanout_v4[i].connf_lock, NULL,
 440                     MUTEX_DEFAULT, NULL);
 441         }
 442 
 443         ipst->ips_ipcl_proto_fanout_v6 = kmem_zalloc(IPPROTO_MAX *
 444             sizeof (connf_t), KM_SLEEP);
 445         for (i = 0; i < IPPROTO_MAX; i++) {
 446                 mutex_init(&ipst->ips_ipcl_proto_fanout_v6[i].connf_lock, NULL,
 447                     MUTEX_DEFAULT, NULL);
 448         }
 449 
 450         ipst->ips_rts_clients = kmem_zalloc(sizeof (connf_t), KM_SLEEP);
 451         mutex_init(&ipst->ips_rts_clients->connf_lock,
 452             NULL, MUTEX_DEFAULT, NULL);
 453 
 454         ipst->ips_ipcl_udp_fanout = kmem_zalloc(
 455             ipst->ips_ipcl_udp_fanout_size * sizeof (connf_t), KM_SLEEP);
 456         for (i = 0; i < ipst->ips_ipcl_udp_fanout_size; i++) {
 457                 mutex_init(&ipst->ips_ipcl_udp_fanout[i].connf_lock, NULL,
 458                     MUTEX_DEFAULT, NULL);
 459         }
 460 
 461         ipst->ips_ipcl_iptun_fanout = kmem_zalloc(
 462             ipst->ips_ipcl_iptun_fanout_size * sizeof (connf_t), KM_SLEEP);
 463         for (i = 0; i < ipst->ips_ipcl_iptun_fanout_size; i++) {
 464                 mutex_init(&ipst->ips_ipcl_iptun_fanout[i].connf_lock, NULL,
 465                     MUTEX_DEFAULT, NULL);
 466         }
 467 
 468         ipst->ips_ipcl_raw_fanout = kmem_zalloc(
 469             ipst->ips_ipcl_raw_fanout_size * sizeof (connf_t), KM_SLEEP);
 470         for (i = 0; i < ipst->ips_ipcl_raw_fanout_size; i++) {
 471                 mutex_init(&ipst->ips_ipcl_raw_fanout[i].connf_lock, NULL,
 472                     MUTEX_DEFAULT, NULL);
 473         }
 474 
 475         ipst->ips_ipcl_globalhash_fanout = kmem_zalloc(
 476             sizeof (connf_t) * CONN_G_HASH_SIZE, KM_SLEEP);
 477         for (i = 0; i < CONN_G_HASH_SIZE; i++) {
 478                 mutex_init(&ipst->ips_ipcl_globalhash_fanout[i].connf_lock,
 479                     NULL, MUTEX_DEFAULT, NULL);
 480         }
 481 }
 482 
 483 void
 484 ipcl_g_destroy(void)
 485 {
 486         kmem_cache_destroy(ip_conn_cache);
 487         kmem_cache_destroy(tcp_conn_cache);
 488         kmem_cache_destroy(udp_conn_cache);
 489         kmem_cache_destroy(rawip_conn_cache);
 490         kmem_cache_destroy(rts_conn_cache);
 491 }
 492 
 493 /*
 494  * All user-level and kernel use of the stack must be gone
 495  * by now.
 496  */
 497 void
 498 ipcl_destroy(ip_stack_t *ipst)
 499 {
 500         int i;
 501 
 502         for (i = 0; i < ipst->ips_ipcl_conn_fanout_size; i++) {
 503                 ASSERT(ipst->ips_ipcl_conn_fanout[i].connf_head == NULL);
 504                 mutex_destroy(&ipst->ips_ipcl_conn_fanout[i].connf_lock);
 505         }
 506         kmem_free(ipst->ips_ipcl_conn_fanout, ipst->ips_ipcl_conn_fanout_size *
 507             sizeof (connf_t));
 508         ipst->ips_ipcl_conn_fanout = NULL;
 509 
 510         for (i = 0; i < ipst->ips_ipcl_bind_fanout_size; i++) {
 511                 ASSERT(ipst->ips_ipcl_bind_fanout[i].connf_head == NULL);
 512                 mutex_destroy(&ipst->ips_ipcl_bind_fanout[i].connf_lock);
 513         }
 514         kmem_free(ipst->ips_ipcl_bind_fanout, ipst->ips_ipcl_bind_fanout_size *
 515             sizeof (connf_t));
 516         ipst->ips_ipcl_bind_fanout = NULL;
 517 
 518         for (i = 0; i < IPPROTO_MAX; i++) {
 519                 ASSERT(ipst->ips_ipcl_proto_fanout_v4[i].connf_head == NULL);
 520                 mutex_destroy(&ipst->ips_ipcl_proto_fanout_v4[i].connf_lock);
 521         }
 522         kmem_free(ipst->ips_ipcl_proto_fanout_v4,
 523             IPPROTO_MAX * sizeof (connf_t));
 524         ipst->ips_ipcl_proto_fanout_v4 = NULL;
 525 
 526         for (i = 0; i < IPPROTO_MAX; i++) {
 527                 ASSERT(ipst->ips_ipcl_proto_fanout_v6[i].connf_head == NULL);
 528                 mutex_destroy(&ipst->ips_ipcl_proto_fanout_v6[i].connf_lock);
 529         }
 530         kmem_free(ipst->ips_ipcl_proto_fanout_v6,
 531             IPPROTO_MAX * sizeof (connf_t));
 532         ipst->ips_ipcl_proto_fanout_v6 = NULL;
 533 
 534         for (i = 0; i < ipst->ips_ipcl_udp_fanout_size; i++) {
 535                 ASSERT(ipst->ips_ipcl_udp_fanout[i].connf_head == NULL);
 536                 mutex_destroy(&ipst->ips_ipcl_udp_fanout[i].connf_lock);
 537         }
 538         kmem_free(ipst->ips_ipcl_udp_fanout, ipst->ips_ipcl_udp_fanout_size *
 539             sizeof (connf_t));
 540         ipst->ips_ipcl_udp_fanout = NULL;
 541 
 542         for (i = 0; i < ipst->ips_ipcl_iptun_fanout_size; i++) {
 543                 ASSERT(ipst->ips_ipcl_iptun_fanout[i].connf_head == NULL);
 544                 mutex_destroy(&ipst->ips_ipcl_iptun_fanout[i].connf_lock);
 545         }
 546         kmem_free(ipst->ips_ipcl_iptun_fanout,
 547             ipst->ips_ipcl_iptun_fanout_size * sizeof (connf_t));
 548         ipst->ips_ipcl_iptun_fanout = NULL;
 549 
 550         for (i = 0; i < ipst->ips_ipcl_raw_fanout_size; i++) {
 551                 ASSERT(ipst->ips_ipcl_raw_fanout[i].connf_head == NULL);
 552                 mutex_destroy(&ipst->ips_ipcl_raw_fanout[i].connf_lock);
 553         }
 554         kmem_free(ipst->ips_ipcl_raw_fanout, ipst->ips_ipcl_raw_fanout_size *
 555             sizeof (connf_t));
 556         ipst->ips_ipcl_raw_fanout = NULL;
 557 
 558         for (i = 0; i < CONN_G_HASH_SIZE; i++) {
 559                 ASSERT(ipst->ips_ipcl_globalhash_fanout[i].connf_head == NULL);
 560                 mutex_destroy(&ipst->ips_ipcl_globalhash_fanout[i].connf_lock);
 561         }
 562         kmem_free(ipst->ips_ipcl_globalhash_fanout,
 563             sizeof (connf_t) * CONN_G_HASH_SIZE);
 564         ipst->ips_ipcl_globalhash_fanout = NULL;
 565 
 566         ASSERT(ipst->ips_rts_clients->connf_head == NULL);
 567         mutex_destroy(&ipst->ips_rts_clients->connf_lock);
 568         kmem_free(ipst->ips_rts_clients, sizeof (connf_t));
 569         ipst->ips_rts_clients = NULL;
 570 }
 571 
 572 /*
 573  * conn creation routine. initialize the conn, sets the reference
 574  * and inserts it in the global hash table.
 575  */
 576 conn_t *
 577 ipcl_conn_create(uint32_t type, int sleep, netstack_t *ns)
 578 {
 579         conn_t  *connp;
 580         struct kmem_cache *conn_cache;
 581 
 582         switch (type) {
 583         case IPCL_SCTPCONN:
 584                 if ((connp = kmem_cache_alloc(sctp_conn_cache, sleep)) == NULL)
 585                         return (NULL);
 586                 sctp_conn_init(connp);
 587                 netstack_hold(ns);
 588                 connp->conn_netstack = ns;
 589                 connp->conn_ixa->ixa_ipst = ns->netstack_ip;
 590                 connp->conn_ixa->ixa_conn_id = (long)connp;
 591                 ipcl_globalhash_insert(connp);
 592                 return (connp);
 593 
 594         case IPCL_TCPCONN:
 595                 conn_cache = tcp_conn_cache;
 596                 break;
 597 
 598         case IPCL_UDPCONN:
 599                 conn_cache = udp_conn_cache;
 600                 break;
 601 
 602         case IPCL_RAWIPCONN:
 603                 conn_cache = rawip_conn_cache;
 604                 break;
 605 
 606         case IPCL_RTSCONN:
 607                 conn_cache = rts_conn_cache;
 608                 break;
 609 
 610         case IPCL_IPCCONN:
 611                 conn_cache = ip_conn_cache;
 612                 break;
 613 
 614         default:
 615                 connp = NULL;
 616                 ASSERT(0);
 617         }
 618 
 619         if ((connp = kmem_cache_alloc(conn_cache, sleep)) == NULL)
 620                 return (NULL);
 621 
 622         connp->conn_ref = 1;
 623         netstack_hold(ns);
 624         connp->conn_netstack = ns;
 625         connp->conn_ixa->ixa_ipst = ns->netstack_ip;
 626         connp->conn_ixa->ixa_conn_id = (long)connp;
 627         ipcl_globalhash_insert(connp);
 628         return (connp);
 629 }
 630 
 631 void
 632 ipcl_conn_destroy(conn_t *connp)
 633 {
 634         mblk_t  *mp;
 635         netstack_t      *ns = connp->conn_netstack;
 636 
 637         ASSERT(!MUTEX_HELD(&connp->conn_lock));
 638         ASSERT(connp->conn_ref == 0);
 639         ASSERT(connp->conn_ioctlref == 0);
 640 
 641         DTRACE_PROBE1(conn__destroy, conn_t *, connp);
 642 
 643         if (connp->conn_cred != NULL) {
 644                 crfree(connp->conn_cred);
 645                 connp->conn_cred = NULL;
 646                 /* ixa_cred done in ipcl_conn_cleanup below */
 647         }
 648 
 649         if (connp->conn_ht_iphc != NULL) {
 650                 kmem_free(connp->conn_ht_iphc, connp->conn_ht_iphc_allocated);
 651                 connp->conn_ht_iphc = NULL;
 652                 connp->conn_ht_iphc_allocated = 0;
 653                 connp->conn_ht_iphc_len = 0;
 654                 connp->conn_ht_ulp = NULL;
 655                 connp->conn_ht_ulp_len = 0;
 656         }
 657         ip_pkt_free(&connp->conn_xmit_ipp);
 658 
 659         ipcl_globalhash_remove(connp);
 660 
 661         if (connp->conn_latch != NULL) {
 662                 IPLATCH_REFRELE(connp->conn_latch);
 663                 connp->conn_latch = NULL;
 664         }
 665         if (connp->conn_latch_in_policy != NULL) {
 666                 IPPOL_REFRELE(connp->conn_latch_in_policy);
 667                 connp->conn_latch_in_policy = NULL;
 668         }
 669         if (connp->conn_latch_in_action != NULL) {
 670                 IPACT_REFRELE(connp->conn_latch_in_action);
 671                 connp->conn_latch_in_action = NULL;
 672         }
 673         if (connp->conn_policy != NULL) {
 674                 IPPH_REFRELE(connp->conn_policy, ns);
 675                 connp->conn_policy = NULL;
 676         }
 677 
 678         if (connp->conn_ipsec_opt_mp != NULL) {
 679                 freemsg(connp->conn_ipsec_opt_mp);
 680                 connp->conn_ipsec_opt_mp = NULL;
 681         }
 682 
 683         if (connp->conn_flags & IPCL_TCPCONN) {
 684                 tcp_t *tcp = connp->conn_tcp;
 685 
 686                 tcp_free(tcp);
 687                 mp = tcp->tcp_timercache;
 688 
 689                 tcp->tcp_tcps = NULL;
 690 
 691                 /*
 692                  * tcp_rsrv_mp can be NULL if tcp_get_conn() fails to allocate
 693                  * the mblk.
 694                  */
 695                 if (tcp->tcp_rsrv_mp != NULL) {
 696                         freeb(tcp->tcp_rsrv_mp);
 697                         tcp->tcp_rsrv_mp = NULL;
 698                         mutex_destroy(&tcp->tcp_rsrv_mp_lock);
 699                 }
 700 
 701                 ipcl_conn_cleanup(connp);
 702                 connp->conn_flags = IPCL_TCPCONN;
 703                 if (ns != NULL) {
 704                         ASSERT(tcp->tcp_tcps == NULL);
 705                         connp->conn_netstack = NULL;
 706                         connp->conn_ixa->ixa_ipst = NULL;
 707                         netstack_rele(ns);
 708                 }
 709 
 710                 bzero(tcp, sizeof (tcp_t));
 711 
 712                 tcp->tcp_timercache = mp;
 713                 tcp->tcp_connp = connp;
 714                 kmem_cache_free(tcp_conn_cache, connp);
 715                 return;
 716         }
 717 
 718         if (connp->conn_flags & IPCL_SCTPCONN) {
 719                 ASSERT(ns != NULL);
 720                 sctp_free(connp);
 721                 return;
 722         }
 723 
 724         ipcl_conn_cleanup(connp);
 725         if (ns != NULL) {
 726                 connp->conn_netstack = NULL;
 727                 connp->conn_ixa->ixa_ipst = NULL;
 728                 netstack_rele(ns);
 729         }
 730 
 731         /* leave conn_priv aka conn_udp, conn_icmp, etc in place. */
 732         if (connp->conn_flags & IPCL_UDPCONN) {
 733                 connp->conn_flags = IPCL_UDPCONN;
 734                 kmem_cache_free(udp_conn_cache, connp);
 735         } else if (connp->conn_flags & IPCL_RAWIPCONN) {
 736                 connp->conn_flags = IPCL_RAWIPCONN;
 737                 connp->conn_proto = IPPROTO_ICMP;
 738                 connp->conn_ixa->ixa_protocol = connp->conn_proto;
 739                 kmem_cache_free(rawip_conn_cache, connp);
 740         } else if (connp->conn_flags & IPCL_RTSCONN) {
 741                 connp->conn_flags = IPCL_RTSCONN;
 742                 kmem_cache_free(rts_conn_cache, connp);
 743         } else {
 744                 connp->conn_flags = IPCL_IPCCONN;
 745                 ASSERT(connp->conn_flags & IPCL_IPCCONN);
 746                 ASSERT(connp->conn_priv == NULL);
 747                 kmem_cache_free(ip_conn_cache, connp);
 748         }
 749 }
 750 
 751 /*
 752  * Running in cluster mode - deregister listener information
 753  */
 754 static void
 755 ipcl_conn_unlisten(conn_t *connp)
 756 {
 757         ASSERT((connp->conn_flags & IPCL_CL_LISTENER) != 0);
 758         ASSERT(connp->conn_lport != 0);
 759 
 760         if (cl_inet_unlisten != NULL) {
 761                 sa_family_t     addr_family;
 762                 uint8_t         *laddrp;
 763 
 764                 if (connp->conn_ipversion == IPV6_VERSION) {
 765                         addr_family = AF_INET6;
 766                         laddrp = (uint8_t *)&connp->conn_bound_addr_v6;
 767                 } else {
 768                         addr_family = AF_INET;
 769                         laddrp = (uint8_t *)&connp->conn_bound_addr_v4;
 770                 }
 771                 (*cl_inet_unlisten)(connp->conn_netstack->netstack_stackid,
 772                     IPPROTO_TCP, addr_family, laddrp, connp->conn_lport, NULL);
 773         }
 774         connp->conn_flags &= ~IPCL_CL_LISTENER;
 775 }
 776 
 777 /*
 778  * We set the IPCL_REMOVED flag (instead of clearing the flag indicating
 779  * which table the conn belonged to). So for debugging we can see which hash
 780  * table this connection was in.
 781  */
 782 #define IPCL_HASH_REMOVE(connp) {                                       \
 783         connf_t *connfp = (connp)->conn_fanout;                              \
 784         ASSERT(!MUTEX_HELD(&((connp)->conn_lock)));                      \
 785         if (connfp != NULL) {                                           \
 786                 mutex_enter(&connfp->connf_lock);                        \
 787                 if ((connp)->conn_next != NULL)                              \
 788                         (connp)->conn_next->conn_prev =                   \
 789                             (connp)->conn_prev;                              \
 790                 if ((connp)->conn_prev != NULL)                              \
 791                         (connp)->conn_prev->conn_next =                   \
 792                             (connp)->conn_next;                              \
 793                 else                                                    \
 794                         connfp->connf_head = (connp)->conn_next;  \
 795                 (connp)->conn_fanout = NULL;                         \
 796                 (connp)->conn_next = NULL;                           \
 797                 (connp)->conn_prev = NULL;                           \
 798                 (connp)->conn_flags |= IPCL_REMOVED;                 \
 799                 if (((connp)->conn_flags & IPCL_CL_LISTENER) != 0)       \
 800                         ipcl_conn_unlisten((connp));                    \
 801                 CONN_DEC_REF((connp));                                  \
 802                 mutex_exit(&connfp->connf_lock);                 \
 803         }                                                               \
 804 }
 805 
 806 void
 807 ipcl_hash_remove(conn_t *connp)
 808 {
 809         uint8_t         protocol = connp->conn_proto;
 810 
 811         IPCL_HASH_REMOVE(connp);
 812         if (protocol == IPPROTO_RSVP)
 813                 ill_set_inputfn_all(connp->conn_netstack->netstack_ip);
 814 }
 815 
 816 /*
 817  * The whole purpose of this function is allow removal of
 818  * a conn_t from the connected hash for timewait reclaim.
 819  * This is essentially a TW reclaim fastpath where timewait
 820  * collector checks under fanout lock (so no one else can
 821  * get access to the conn_t) that refcnt is 2 i.e. one for
 822  * TCP and one for the classifier hash list. If ref count
 823  * is indeed 2, we can just remove the conn under lock and
 824  * avoid cleaning up the conn under squeue. This gives us
 825  * improved performance.
 826  */
 827 void
 828 ipcl_hash_remove_locked(conn_t *connp, connf_t  *connfp)
 829 {
 830         ASSERT(MUTEX_HELD(&connfp->connf_lock));
 831         ASSERT(MUTEX_HELD(&connp->conn_lock));
 832         ASSERT((connp->conn_flags & IPCL_CL_LISTENER) == 0);
 833 
 834         if ((connp)->conn_next != NULL) {
 835                 (connp)->conn_next->conn_prev = (connp)->conn_prev;
 836         }
 837         if ((connp)->conn_prev != NULL) {
 838                 (connp)->conn_prev->conn_next = (connp)->conn_next;
 839         } else {
 840                 connfp->connf_head = (connp)->conn_next;
 841         }
 842         (connp)->conn_fanout = NULL;
 843         (connp)->conn_next = NULL;
 844         (connp)->conn_prev = NULL;
 845         (connp)->conn_flags |= IPCL_REMOVED;
 846         ASSERT((connp)->conn_ref == 2);
 847         (connp)->conn_ref--;
 848 }
 849 
 850 #define IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp) {              \
 851         ASSERT((connp)->conn_fanout == NULL);                                \
 852         ASSERT((connp)->conn_next == NULL);                          \
 853         ASSERT((connp)->conn_prev == NULL);                          \
 854         if ((connfp)->connf_head != NULL) {                          \
 855                 (connfp)->connf_head->conn_prev = (connp);                \
 856                 (connp)->conn_next = (connfp)->connf_head;                \
 857         }                                                               \
 858         (connp)->conn_fanout = (connfp);                             \
 859         (connfp)->connf_head = (connp);                                      \
 860         (connp)->conn_flags = ((connp)->conn_flags & ~IPCL_REMOVED) | \
 861             IPCL_CONNECTED;                                             \
 862         CONN_INC_REF(connp);                                            \
 863 }
 864 
 865 #define IPCL_HASH_INSERT_CONNECTED(connfp, connp) {                     \
 866         IPCL_HASH_REMOVE((connp));                                      \
 867         mutex_enter(&(connfp)->connf_lock);                              \
 868         IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp);               \
 869         mutex_exit(&(connfp)->connf_lock);                               \
 870 }
 871 
 872 /*
 873  * When inserting bound or wildcard entries into the hash, ordering rules are
 874  * used to facilitate timely and correct lookups.  The order is as follows:
 875  * 1. Entries bound to a specific address
 876  * 2. Entries bound to INADDR_ANY
 877  * 3. Entries bound to ADDR_UNSPECIFIED
 878  * Entries in a category which share conn_lport (such as those using
 879  * SO_REUSEPORT) will be ordered such that the newest inserted is first.
 880  */
 881 
 882 void
 883 ipcl_hash_insert_bound(connf_t *connfp, conn_t *connp)
 884 {
 885         conn_t *pconnp, *nconnp;
 886 
 887         IPCL_HASH_REMOVE(connp);
 888         mutex_enter(&connfp->connf_lock);
 889         nconnp = connfp->connf_head;
 890         pconnp = NULL;
 891         while (nconnp != NULL) {
 892                 /*
 893                  * Walk though entries associated with the fanout until one is
 894                  * found which fulfills any of these conditions:
 895                  * 1. Listen address of ADDR_ANY/ADDR_UNSPECIFIED
 896                  * 2. Listen port the same as connp
 897                  */
 898                 if (_IPCL_V4_MATCH_ANY(nconnp->conn_laddr_v6) ||
 899                     connp->conn_lport == nconnp->conn_lport)
 900                         break;
 901                 pconnp = nconnp;
 902                 nconnp = nconnp->conn_next;
 903         }
 904         if (pconnp != NULL) {
 905                 pconnp->conn_next = connp;
 906                 connp->conn_prev = pconnp;
 907         } else {
 908                 connfp->connf_head = connp;
 909         }
 910         if (nconnp != NULL) {
 911                 connp->conn_next = nconnp;
 912                 nconnp->conn_prev = connp;
 913         }
 914         connp->conn_fanout = connfp;
 915         connp->conn_flags = (connp->conn_flags & ~IPCL_REMOVED) | IPCL_BOUND;
 916         CONN_INC_REF(connp);
 917         mutex_exit(&connfp->connf_lock);
 918 }
 919 
 920 void
 921 ipcl_hash_insert_wildcard(connf_t *connfp, conn_t *connp)
 922 {
 923         conn_t *pconnp = NULL, *nconnp;
 924         boolean_t isv4mapped = IN6_IS_ADDR_V4MAPPED(&connp->conn_laddr_v6);
 925 
 926         IPCL_HASH_REMOVE(connp);
 927         mutex_enter(&connfp->connf_lock);
 928         nconnp = connfp->connf_head;
 929         pconnp = NULL;
 930         while (nconnp != NULL) {
 931                 if (IN6_IS_ADDR_V4MAPPED_ANY(&nconnp->conn_laddr_v6) &&
 932                     isv4mapped && connp->conn_lport == nconnp->conn_lport)
 933                         break;
 934                 if (IN6_IS_ADDR_UNSPECIFIED(&nconnp->conn_laddr_v6) &&
 935                     (isv4mapped ||
 936                     connp->conn_lport == nconnp->conn_lport))
 937                         break;
 938 
 939                 pconnp = nconnp;
 940                 nconnp = nconnp->conn_next;
 941         }
 942         if (pconnp != NULL) {
 943                 pconnp->conn_next = connp;
 944                 connp->conn_prev = pconnp;
 945         } else {
 946                 connfp->connf_head = connp;
 947         }
 948         if (nconnp != NULL) {
 949                 connp->conn_next = nconnp;
 950                 nconnp->conn_prev = connp;
 951         }
 952         connp->conn_fanout = connfp;
 953         connp->conn_flags = (connp->conn_flags & ~IPCL_REMOVED) | IPCL_BOUND;
 954         CONN_INC_REF(connp);
 955         mutex_exit(&connfp->connf_lock);
 956 }
 957 
 958 /*
 959  * Because the classifier is used to classify inbound packets, the destination
 960  * address is meant to be our local tunnel address (tunnel source), and the
 961  * source the remote tunnel address (tunnel destination).
 962  *
 963  * Note that conn_proto can't be used for fanout since the upper protocol
 964  * can be both 41 and 4 when IPv6 and IPv4 are over the same tunnel.
 965  */
 966 conn_t *
 967 ipcl_iptun_classify_v4(ipaddr_t *src, ipaddr_t *dst, ip_stack_t *ipst)
 968 {
 969         connf_t *connfp;
 970         conn_t  *connp;
 971 
 972         /* first look for IPv4 tunnel links */
 973         connfp = &ipst->ips_ipcl_iptun_fanout[IPCL_IPTUN_HASH(*dst, *src)];
 974         mutex_enter(&connfp->connf_lock);
 975         for (connp = connfp->connf_head; connp != NULL;
 976             connp = connp->conn_next) {
 977                 if (IPCL_IPTUN_MATCH(connp, *dst, *src))
 978                         break;
 979         }
 980         if (connp != NULL)
 981                 goto done;
 982 
 983         mutex_exit(&connfp->connf_lock);
 984 
 985         /* We didn't find an IPv4 tunnel, try a 6to4 tunnel */
 986         connfp = &ipst->ips_ipcl_iptun_fanout[IPCL_IPTUN_HASH(*dst,
 987             INADDR_ANY)];
 988         mutex_enter(&connfp->connf_lock);
 989         for (connp = connfp->connf_head; connp != NULL;
 990             connp = connp->conn_next) {
 991                 if (IPCL_IPTUN_MATCH(connp, *dst, INADDR_ANY))
 992                         break;
 993         }
 994 done:
 995         if (connp != NULL)
 996                 CONN_INC_REF(connp);
 997         mutex_exit(&connfp->connf_lock);
 998         return (connp);
 999 }
1000 
1001 conn_t *
1002 ipcl_iptun_classify_v6(in6_addr_t *src, in6_addr_t *dst, ip_stack_t *ipst)
1003 {
1004         connf_t *connfp;
1005         conn_t  *connp;
1006 
1007         /* Look for an IPv6 tunnel link */
1008         connfp = &ipst->ips_ipcl_iptun_fanout[IPCL_IPTUN_HASH_V6(dst, src)];
1009         mutex_enter(&connfp->connf_lock);
1010         for (connp = connfp->connf_head; connp != NULL;
1011             connp = connp->conn_next) {
1012                 if (IPCL_IPTUN_MATCH_V6(connp, dst, src)) {
1013                         CONN_INC_REF(connp);
1014                         break;
1015                 }
1016         }
1017         mutex_exit(&connfp->connf_lock);
1018         return (connp);
1019 }
1020 
1021 /*
1022  * This function is used only for inserting SCTP raw socket now.
1023  * This may change later.
1024  *
1025  * Note that only one raw socket can be bound to a port.  The param
1026  * lport is in network byte order.
1027  */
1028 static int
1029 ipcl_sctp_hash_insert(conn_t *connp, in_port_t lport)
1030 {
1031         connf_t *connfp;
1032         conn_t  *oconnp;
1033         ip_stack_t      *ipst = connp->conn_netstack->netstack_ip;
1034 
1035         connfp = &ipst->ips_ipcl_raw_fanout[IPCL_RAW_HASH(ntohs(lport), ipst)];
1036 
1037         /* Check for existing raw socket already bound to the port. */
1038         mutex_enter(&connfp->connf_lock);
1039         for (oconnp = connfp->connf_head; oconnp != NULL;
1040             oconnp = oconnp->conn_next) {
1041                 if (oconnp->conn_lport == lport &&
1042                     oconnp->conn_zoneid == connp->conn_zoneid &&
1043                     oconnp->conn_family == connp->conn_family &&
1044                     ((IN6_IS_ADDR_UNSPECIFIED(&connp->conn_laddr_v6) ||
1045                     IN6_IS_ADDR_UNSPECIFIED(&oconnp->conn_laddr_v6) ||
1046                     IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_laddr_v6) ||
1047                     IN6_IS_ADDR_V4MAPPED_ANY(&oconnp->conn_laddr_v6)) ||
1048                     IN6_ARE_ADDR_EQUAL(&oconnp->conn_laddr_v6,
1049                     &connp->conn_laddr_v6))) {
1050                         break;
1051                 }
1052         }
1053         mutex_exit(&connfp->connf_lock);
1054         if (oconnp != NULL)
1055                 return (EADDRNOTAVAIL);
1056 
1057         if (IN6_IS_ADDR_UNSPECIFIED(&connp->conn_faddr_v6) ||
1058             IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_faddr_v6)) {
1059                 if (IN6_IS_ADDR_UNSPECIFIED(&connp->conn_laddr_v6) ||
1060                     IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_laddr_v6)) {
1061                         ipcl_hash_insert_wildcard(connfp, connp);
1062                 } else {
1063                         ipcl_hash_insert_bound(connfp, connp);
1064                 }
1065         } else {
1066                 IPCL_HASH_INSERT_CONNECTED(connfp, connp);
1067         }
1068         return (0);
1069 }
1070 
1071 static int
1072 ipcl_iptun_hash_insert(conn_t *connp, ip_stack_t *ipst)
1073 {
1074         connf_t *connfp;
1075         conn_t  *tconnp;
1076         ipaddr_t laddr = connp->conn_laddr_v4;
1077         ipaddr_t faddr = connp->conn_faddr_v4;
1078 
1079         connfp = &ipst->ips_ipcl_iptun_fanout[IPCL_IPTUN_HASH(laddr, faddr)];
1080         mutex_enter(&connfp->connf_lock);
1081         for (tconnp = connfp->connf_head; tconnp != NULL;
1082             tconnp = tconnp->conn_next) {
1083                 if (IPCL_IPTUN_MATCH(tconnp, laddr, faddr)) {
1084                         /* A tunnel is already bound to these addresses. */
1085                         mutex_exit(&connfp->connf_lock);
1086                         return (EADDRINUSE);
1087                 }
1088         }
1089         IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp);
1090         mutex_exit(&connfp->connf_lock);
1091         return (0);
1092 }
1093 
1094 static int
1095 ipcl_iptun_hash_insert_v6(conn_t *connp, ip_stack_t *ipst)
1096 {
1097         connf_t *connfp;
1098         conn_t  *tconnp;
1099         in6_addr_t *laddr = &connp->conn_laddr_v6;
1100         in6_addr_t *faddr = &connp->conn_faddr_v6;
1101 
1102         connfp = &ipst->ips_ipcl_iptun_fanout[IPCL_IPTUN_HASH_V6(laddr, faddr)];
1103         mutex_enter(&connfp->connf_lock);
1104         for (tconnp = connfp->connf_head; tconnp != NULL;
1105             tconnp = tconnp->conn_next) {
1106                 if (IPCL_IPTUN_MATCH_V6(tconnp, laddr, faddr)) {
1107                         /* A tunnel is already bound to these addresses. */
1108                         mutex_exit(&connfp->connf_lock);
1109                         return (EADDRINUSE);
1110                 }
1111         }
1112         IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp);
1113         mutex_exit(&connfp->connf_lock);
1114         return (0);
1115 }
1116 
1117 /*
1118  * Check for a MAC exemption conflict on a labeled system.  Note that for
1119  * protocols that use port numbers (UDP, TCP, SCTP), we do this check up in the
1120  * transport layer.  This check is for binding all other protocols.
1121  *
1122  * Returns true if there's a conflict.
1123  */
1124 static boolean_t
1125 check_exempt_conflict_v4(conn_t *connp, ip_stack_t *ipst)
1126 {
1127         connf_t *connfp;
1128         conn_t *tconn;
1129 
1130         connfp = &ipst->ips_ipcl_proto_fanout_v4[connp->conn_proto];
1131         mutex_enter(&connfp->connf_lock);
1132         for (tconn = connfp->connf_head; tconn != NULL;
1133             tconn = tconn->conn_next) {
1134                 /* We don't allow v4 fallback for v6 raw socket */
1135                 if (connp->conn_family != tconn->conn_family)
1136                         continue;
1137                 /* If neither is exempt, then there's no conflict */
1138                 if ((connp->conn_mac_mode == CONN_MAC_DEFAULT) &&
1139                     (tconn->conn_mac_mode == CONN_MAC_DEFAULT))
1140                         continue;
1141                 /* We are only concerned about sockets for a different zone */
1142                 if (connp->conn_zoneid == tconn->conn_zoneid)
1143                         continue;
1144                 /* If both are bound to different specific addrs, ok */
1145                 if (connp->conn_laddr_v4 != INADDR_ANY &&
1146                     tconn->conn_laddr_v4 != INADDR_ANY &&
1147                     connp->conn_laddr_v4 != tconn->conn_laddr_v4)
1148                         continue;
1149                 /* These two conflict; fail */
1150                 break;
1151         }
1152         mutex_exit(&connfp->connf_lock);
1153         return (tconn != NULL);
1154 }
1155 
1156 static boolean_t
1157 check_exempt_conflict_v6(conn_t *connp, ip_stack_t *ipst)
1158 {
1159         connf_t *connfp;
1160         conn_t *tconn;
1161 
1162         connfp = &ipst->ips_ipcl_proto_fanout_v6[connp->conn_proto];
1163         mutex_enter(&connfp->connf_lock);
1164         for (tconn = connfp->connf_head; tconn != NULL;
1165             tconn = tconn->conn_next) {
1166                 /* We don't allow v4 fallback for v6 raw socket */
1167                 if (connp->conn_family != tconn->conn_family)
1168                         continue;
1169                 /* If neither is exempt, then there's no conflict */
1170                 if ((connp->conn_mac_mode == CONN_MAC_DEFAULT) &&
1171                     (tconn->conn_mac_mode == CONN_MAC_DEFAULT))
1172                         continue;
1173                 /* We are only concerned about sockets for a different zone */
1174                 if (connp->conn_zoneid == tconn->conn_zoneid)
1175                         continue;
1176                 /* If both are bound to different addrs, ok */
1177                 if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_laddr_v6) &&
1178                     !IN6_IS_ADDR_UNSPECIFIED(&tconn->conn_laddr_v6) &&
1179                     !IN6_ARE_ADDR_EQUAL(&connp->conn_laddr_v6,
1180                     &tconn->conn_laddr_v6))
1181                         continue;
1182                 /* These two conflict; fail */
1183                 break;
1184         }
1185         mutex_exit(&connfp->connf_lock);
1186         return (tconn != NULL);
1187 }
1188 
1189 /*
1190  * (v4, v6) bind hash insertion routines
1191  * The caller has already setup the conn (conn_proto, conn_laddr_v6, conn_lport)
1192  */
1193 
1194 int
1195 ipcl_bind_insert(conn_t *connp)
1196 {
1197         if (connp->conn_ipversion == IPV6_VERSION)
1198                 return (ipcl_bind_insert_v6(connp));
1199         else
1200                 return (ipcl_bind_insert_v4(connp));
1201 }
1202 
1203 int
1204 ipcl_bind_insert_v4(conn_t *connp)
1205 {
1206         connf_t *connfp;
1207         int     ret = 0;
1208         ip_stack_t      *ipst = connp->conn_netstack->netstack_ip;
1209         uint16_t        lport = connp->conn_lport;
1210         uint8_t         protocol = connp->conn_proto;
1211 
1212         if (IPCL_IS_IPTUN(connp))
1213                 return (ipcl_iptun_hash_insert(connp, ipst));
1214 
1215         switch (protocol) {
1216         default:
1217                 if (is_system_labeled() &&
1218                     check_exempt_conflict_v4(connp, ipst))
1219                         return (EADDRINUSE);
1220                 /* FALLTHROUGH */
1221         case IPPROTO_UDP:
1222                 if (protocol == IPPROTO_UDP) {
1223                         connfp = &ipst->ips_ipcl_udp_fanout[
1224                             IPCL_UDP_HASH(lport, ipst)];
1225                 } else {
1226                         connfp = &ipst->ips_ipcl_proto_fanout_v4[protocol];
1227                 }
1228 
1229                 if (connp->conn_faddr_v4 != INADDR_ANY) {
1230                         IPCL_HASH_INSERT_CONNECTED(connfp, connp);
1231                 } else if (connp->conn_laddr_v4 != INADDR_ANY) {
1232                         ipcl_hash_insert_bound(connfp, connp);
1233                 } else {
1234                         ipcl_hash_insert_wildcard(connfp, connp);
1235                 }
1236                 if (protocol == IPPROTO_RSVP)
1237                         ill_set_inputfn_all(ipst);
1238                 break;
1239 
1240         case IPPROTO_TCP:
1241                 /* Insert it in the Bind Hash */
1242                 ASSERT(connp->conn_zoneid != ALL_ZONES);
1243                 connfp = &ipst->ips_ipcl_bind_fanout[
1244                     IPCL_BIND_HASH(lport, ipst)];
1245                 if (connp->conn_laddr_v4 != INADDR_ANY) {
1246                         ipcl_hash_insert_bound(connfp, connp);
1247                 } else {
1248                         ipcl_hash_insert_wildcard(connfp, connp);
1249                 }
1250                 if (cl_inet_listen != NULL) {
1251                         ASSERT(connp->conn_ipversion == IPV4_VERSION);
1252                         connp->conn_flags |= IPCL_CL_LISTENER;
1253                         (*cl_inet_listen)(
1254                             connp->conn_netstack->netstack_stackid,
1255                             IPPROTO_TCP, AF_INET,
1256                             (uint8_t *)&connp->conn_bound_addr_v4, lport, NULL);
1257                 }
1258                 break;
1259 
1260         case IPPROTO_SCTP:
1261                 ret = ipcl_sctp_hash_insert(connp, lport);
1262                 break;
1263         }
1264 
1265         return (ret);
1266 }
1267 
1268 int
1269 ipcl_bind_insert_v6(conn_t *connp)
1270 {
1271         connf_t         *connfp;
1272         int             ret = 0;
1273         ip_stack_t      *ipst = connp->conn_netstack->netstack_ip;
1274         uint16_t        lport = connp->conn_lport;
1275         uint8_t         protocol = connp->conn_proto;
1276 
1277         if (IPCL_IS_IPTUN(connp)) {
1278                 return (ipcl_iptun_hash_insert_v6(connp, ipst));
1279         }
1280 
1281         switch (protocol) {
1282         default:
1283                 if (is_system_labeled() &&
1284                     check_exempt_conflict_v6(connp, ipst))
1285                         return (EADDRINUSE);
1286                 /* FALLTHROUGH */
1287         case IPPROTO_UDP:
1288                 if (protocol == IPPROTO_UDP) {
1289                         connfp = &ipst->ips_ipcl_udp_fanout[
1290                             IPCL_UDP_HASH(lport, ipst)];
1291                 } else {
1292                         connfp = &ipst->ips_ipcl_proto_fanout_v6[protocol];
1293                 }
1294 
1295                 if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_faddr_v6)) {
1296                         IPCL_HASH_INSERT_CONNECTED(connfp, connp);
1297                 } else if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_laddr_v6)) {
1298                         ipcl_hash_insert_bound(connfp, connp);
1299                 } else {
1300                         ipcl_hash_insert_wildcard(connfp, connp);
1301                 }
1302                 break;
1303 
1304         case IPPROTO_TCP:
1305                 /* Insert it in the Bind Hash */
1306                 ASSERT(connp->conn_zoneid != ALL_ZONES);
1307                 connfp = &ipst->ips_ipcl_bind_fanout[
1308                     IPCL_BIND_HASH(lport, ipst)];
1309                 if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_laddr_v6)) {
1310                         ipcl_hash_insert_bound(connfp, connp);
1311                 } else {
1312                         ipcl_hash_insert_wildcard(connfp, connp);
1313                 }
1314                 if (cl_inet_listen != NULL) {
1315                         sa_family_t     addr_family;
1316                         uint8_t         *laddrp;
1317 
1318                         if (connp->conn_ipversion == IPV6_VERSION) {
1319                                 addr_family = AF_INET6;
1320                                 laddrp =
1321                                     (uint8_t *)&connp->conn_bound_addr_v6;
1322                         } else {
1323                                 addr_family = AF_INET;
1324                                 laddrp = (uint8_t *)&connp->conn_bound_addr_v4;
1325                         }
1326                         connp->conn_flags |= IPCL_CL_LISTENER;
1327                         (*cl_inet_listen)(
1328                             connp->conn_netstack->netstack_stackid,
1329                             IPPROTO_TCP, addr_family, laddrp, lport, NULL);
1330                 }
1331                 break;
1332 
1333         case IPPROTO_SCTP:
1334                 ret = ipcl_sctp_hash_insert(connp, lport);
1335                 break;
1336         }
1337 
1338         return (ret);
1339 }
1340 
1341 /*
1342  * ipcl_conn_hash insertion routines.
1343  * The caller has already set conn_proto and the addresses/ports in the conn_t.
1344  */
1345 
1346 int
1347 ipcl_conn_insert(conn_t *connp)
1348 {
1349         if (connp->conn_ipversion == IPV6_VERSION)
1350                 return (ipcl_conn_insert_v6(connp));
1351         else
1352                 return (ipcl_conn_insert_v4(connp));
1353 }
1354 
1355 int
1356 ipcl_conn_insert_v4(conn_t *connp)
1357 {
1358         connf_t         *connfp;
1359         conn_t          *tconnp;
1360         int             ret = 0;
1361         ip_stack_t      *ipst = connp->conn_netstack->netstack_ip;
1362         uint16_t        lport = connp->conn_lport;
1363         uint8_t         protocol = connp->conn_proto;
1364 
1365         if (IPCL_IS_IPTUN(connp))
1366                 return (ipcl_iptun_hash_insert(connp, ipst));
1367 
1368         switch (protocol) {
1369         case IPPROTO_TCP:
1370                 /*
1371                  * For TCP, we check whether the connection tuple already
1372                  * exists before allowing the connection to proceed.  We
1373                  * also allow indexing on the zoneid. This is to allow
1374                  * multiple shared stack zones to have the same tcp
1375                  * connection tuple. In practice this only happens for
1376                  * INADDR_LOOPBACK as it's the only local address which
1377                  * doesn't have to be unique.
1378                  */
1379                 connfp = &ipst->ips_ipcl_conn_fanout[
1380                     IPCL_CONN_HASH(connp->conn_faddr_v4,
1381                     connp->conn_ports, ipst)];
1382                 mutex_enter(&connfp->connf_lock);
1383                 for (tconnp = connfp->connf_head; tconnp != NULL;
1384                     tconnp = tconnp->conn_next) {
1385                         if (IPCL_CONN_MATCH(tconnp, connp->conn_proto,
1386                             connp->conn_faddr_v4, connp->conn_laddr_v4,
1387                             connp->conn_ports) &&
1388                             IPCL_ZONE_MATCH(tconnp, connp->conn_zoneid)) {
1389                                 /* Already have a conn. bail out */
1390                                 mutex_exit(&connfp->connf_lock);
1391                                 return (EADDRINUSE);
1392                         }
1393                 }
1394                 if (connp->conn_fanout != NULL) {
1395                         /*
1396                          * Probably a XTI/TLI application trying to do a
1397                          * rebind. Let it happen.
1398                          */
1399                         mutex_exit(&connfp->connf_lock);
1400                         IPCL_HASH_REMOVE(connp);
1401                         mutex_enter(&connfp->connf_lock);
1402                 }
1403 
1404                 ASSERT(connp->conn_recv != NULL);
1405                 ASSERT(connp->conn_recvicmp != NULL);
1406 
1407                 IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp);
1408                 mutex_exit(&connfp->connf_lock);
1409                 break;
1410 
1411         case IPPROTO_SCTP:
1412                 /*
1413                  * The raw socket may have already been bound, remove it
1414                  * from the hash first.
1415                  */
1416                 IPCL_HASH_REMOVE(connp);
1417                 ret = ipcl_sctp_hash_insert(connp, lport);
1418                 break;
1419 
1420         default:
1421                 /*
1422                  * Check for conflicts among MAC exempt bindings.  For
1423                  * transports with port numbers, this is done by the upper
1424                  * level per-transport binding logic.  For all others, it's
1425                  * done here.
1426                  */
1427                 if (is_system_labeled() &&
1428                     check_exempt_conflict_v4(connp, ipst))
1429                         return (EADDRINUSE);
1430                 /* FALLTHROUGH */
1431 
1432         case IPPROTO_UDP:
1433                 if (protocol == IPPROTO_UDP) {
1434                         connfp = &ipst->ips_ipcl_udp_fanout[
1435                             IPCL_UDP_HASH(lport, ipst)];
1436                 } else {
1437                         connfp = &ipst->ips_ipcl_proto_fanout_v4[protocol];
1438                 }
1439 
1440                 if (connp->conn_faddr_v4 != INADDR_ANY) {
1441                         IPCL_HASH_INSERT_CONNECTED(connfp, connp);
1442                 } else if (connp->conn_laddr_v4 != INADDR_ANY) {
1443                         ipcl_hash_insert_bound(connfp, connp);
1444                 } else {
1445                         ipcl_hash_insert_wildcard(connfp, connp);
1446                 }
1447                 break;
1448         }
1449 
1450         return (ret);
1451 }
1452 
1453 int
1454 ipcl_conn_insert_v6(conn_t *connp)
1455 {
1456         connf_t         *connfp;
1457         conn_t          *tconnp;
1458         int             ret = 0;
1459         ip_stack_t      *ipst = connp->conn_netstack->netstack_ip;
1460         uint16_t        lport = connp->conn_lport;
1461         uint8_t         protocol = connp->conn_proto;
1462         uint_t          ifindex = connp->conn_bound_if;
1463 
1464         if (IPCL_IS_IPTUN(connp))
1465                 return (ipcl_iptun_hash_insert_v6(connp, ipst));
1466 
1467         switch (protocol) {
1468         case IPPROTO_TCP:
1469 
1470                 /*
1471                  * For tcp, we check whether the connection tuple already
1472                  * exists before allowing the connection to proceed.  We
1473                  * also allow indexing on the zoneid. This is to allow
1474                  * multiple shared stack zones to have the same tcp
1475                  * connection tuple. In practice this only happens for
1476                  * ipv6_loopback as it's the only local address which
1477                  * doesn't have to be unique.
1478                  */
1479                 connfp = &ipst->ips_ipcl_conn_fanout[
1480                     IPCL_CONN_HASH_V6(connp->conn_faddr_v6, connp->conn_ports,
1481                     ipst)];
1482                 mutex_enter(&connfp->connf_lock);
1483                 for (tconnp = connfp->connf_head; tconnp != NULL;
1484                     tconnp = tconnp->conn_next) {
1485                         /* NOTE: need to match zoneid. Bug in onnv-gate */
1486                         if (IPCL_CONN_MATCH_V6(tconnp, connp->conn_proto,
1487                             connp->conn_faddr_v6, connp->conn_laddr_v6,
1488                             connp->conn_ports) &&
1489                             (tconnp->conn_bound_if == 0 ||
1490                             tconnp->conn_bound_if == ifindex) &&
1491                             IPCL_ZONE_MATCH(tconnp, connp->conn_zoneid)) {
1492                                 /* Already have a conn. bail out */
1493                                 mutex_exit(&connfp->connf_lock);
1494                                 return (EADDRINUSE);
1495                         }
1496                 }
1497                 if (connp->conn_fanout != NULL) {
1498                         /*
1499                          * Probably a XTI/TLI application trying to do a
1500                          * rebind. Let it happen.
1501                          */
1502                         mutex_exit(&connfp->connf_lock);
1503                         IPCL_HASH_REMOVE(connp);
1504                         mutex_enter(&connfp->connf_lock);
1505                 }
1506                 IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp);
1507                 mutex_exit(&connfp->connf_lock);
1508                 break;
1509 
1510         case IPPROTO_SCTP:
1511                 IPCL_HASH_REMOVE(connp);
1512                 ret = ipcl_sctp_hash_insert(connp, lport);
1513                 break;
1514 
1515         default:
1516                 if (is_system_labeled() &&
1517                     check_exempt_conflict_v6(connp, ipst))
1518                         return (EADDRINUSE);
1519                 /* FALLTHROUGH */
1520         case IPPROTO_UDP:
1521                 if (protocol == IPPROTO_UDP) {
1522                         connfp = &ipst->ips_ipcl_udp_fanout[
1523                             IPCL_UDP_HASH(lport, ipst)];
1524                 } else {
1525                         connfp = &ipst->ips_ipcl_proto_fanout_v6[protocol];
1526                 }
1527 
1528                 if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_faddr_v6)) {
1529                         IPCL_HASH_INSERT_CONNECTED(connfp, connp);
1530                 } else if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_laddr_v6)) {
1531                         ipcl_hash_insert_bound(connfp, connp);
1532                 } else {
1533                         ipcl_hash_insert_wildcard(connfp, connp);
1534                 }
1535                 break;
1536         }
1537 
1538         return (ret);
1539 }
1540 
1541 /*
1542  * v4 packet classifying function. looks up the fanout table to
1543  * find the conn, the packet belongs to. returns the conn with
1544  * the reference held, null otherwise.
1545  *
1546  * If zoneid is ALL_ZONES, then the search rules described in the "Connection
1547  * Lookup" comment block are applied.  Labels are also checked as described
1548  * above.  If the packet is from the inside (looped back), and is from the same
1549  * zone, then label checks are omitted.
1550  */
1551 conn_t *
1552 ipcl_classify_v4(mblk_t *mp, uint8_t protocol, uint_t hdr_len,
1553     ip_recv_attr_t *ira, ip_stack_t *ipst)
1554 {
1555         ipha_t  *ipha;
1556         connf_t *connfp, *bind_connfp;
1557         uint16_t lport;
1558         uint16_t fport;
1559         uint32_t ports;
1560         conn_t  *connp;
1561         uint16_t  *up;
1562         zoneid_t        zoneid = ira->ira_zoneid;
1563 
1564         ipha = (ipha_t *)mp->b_rptr;
1565         up = (uint16_t *)((uchar_t *)ipha + hdr_len + TCP_PORTS_OFFSET);
1566 
1567         switch (protocol) {
1568         case IPPROTO_TCP:
1569                 ports = *(uint32_t *)up;
1570                 connfp =
1571                     &ipst->ips_ipcl_conn_fanout[IPCL_CONN_HASH(ipha->ipha_src,
1572                     ports, ipst)];
1573                 mutex_enter(&connfp->connf_lock);
1574                 for (connp = connfp->connf_head; connp != NULL;
1575                     connp = connp->conn_next) {
1576                         if (IPCL_CONN_MATCH(connp, protocol,
1577                             ipha->ipha_src, ipha->ipha_dst, ports) &&
1578                             (connp->conn_zoneid == zoneid ||
1579                             connp->conn_allzones ||
1580                             ((connp->conn_mac_mode != CONN_MAC_DEFAULT) &&
1581                             (ira->ira_flags & IRAF_TX_MAC_EXEMPTABLE) &&
1582                             (ira->ira_flags & IRAF_TX_SHARED_ADDR))))
1583                                 break;
1584                 }
1585 
1586                 if (connp != NULL) {
1587                         /*
1588                          * We have a fully-bound TCP connection.
1589                          *
1590                          * For labeled systems, there's no need to check the
1591                          * label here.  It's known to be good as we checked
1592                          * before allowing the connection to become bound.
1593                          */
1594                         CONN_INC_REF(connp);
1595                         mutex_exit(&connfp->connf_lock);
1596                         return (connp);
1597                 }
1598 
1599                 mutex_exit(&connfp->connf_lock);
1600                 lport = up[1];
1601                 bind_connfp =
1602                     &ipst->ips_ipcl_bind_fanout[IPCL_BIND_HASH(lport, ipst)];
1603                 mutex_enter(&bind_connfp->connf_lock);
1604                 for (connp = bind_connfp->connf_head; connp != NULL;
1605                     connp = connp->conn_next) {
1606                         if (IPCL_BIND_MATCH(connp, protocol, ipha->ipha_dst,
1607                             lport) &&
1608                             (connp->conn_zoneid == zoneid ||
1609                             connp->conn_allzones ||
1610                             ((connp->conn_mac_mode != CONN_MAC_DEFAULT) &&
1611                             (ira->ira_flags & IRAF_TX_MAC_EXEMPTABLE) &&
1612                             (ira->ira_flags & IRAF_TX_SHARED_ADDR))))
1613                                 break;
1614                 }
1615 
1616                 /*
1617                  * If the matching connection is SLP on a private address, then
1618                  * the label on the packet must match the local zone's label.
1619                  * Otherwise, it must be in the label range defined by tnrh.
1620                  * This is ensured by tsol_receive_local.
1621                  *
1622                  * Note that we don't check tsol_receive_local for
1623                  * the connected case.
1624                  */
1625                 if (connp != NULL && (ira->ira_flags & IRAF_SYSTEM_LABELED) &&
1626                     !tsol_receive_local(mp, &ipha->ipha_dst, IPV4_VERSION,
1627                     ira, connp)) {
1628                         DTRACE_PROBE3(tx__ip__log__info__classify__tcp,
1629                             char *, "connp(1) could not receive mp(2)",
1630                             conn_t *, connp, mblk_t *, mp);
1631                         connp = NULL;
1632                 }
1633 
1634                 if (connp != NULL) {
1635                         /* Have a listener at least */
1636                         CONN_INC_REF(connp);
1637                         mutex_exit(&bind_connfp->connf_lock);
1638                         return (connp);
1639                 }
1640 
1641                 mutex_exit(&bind_connfp->connf_lock);
1642                 break;
1643 
1644         case IPPROTO_UDP:
1645                 lport = up[1];
1646                 fport = up[0];
1647                 connfp = &ipst->ips_ipcl_udp_fanout[IPCL_UDP_HASH(lport, ipst)];
1648                 mutex_enter(&connfp->connf_lock);
1649                 for (connp = connfp->connf_head; connp != NULL;
1650                     connp = connp->conn_next) {
1651                         if (IPCL_UDP_MATCH(connp, lport, ipha->ipha_dst,
1652                             fport, ipha->ipha_src) &&
1653                             (connp->conn_zoneid == zoneid ||
1654                             connp->conn_allzones ||
1655                             ((connp->conn_mac_mode != CONN_MAC_DEFAULT) &&
1656                             (ira->ira_flags & IRAF_TX_MAC_EXEMPTABLE))))
1657                                 break;
1658                 }
1659 
1660                 if (connp != NULL && (ira->ira_flags & IRAF_SYSTEM_LABELED) &&
1661                     !tsol_receive_local(mp, &ipha->ipha_dst, IPV4_VERSION,
1662                     ira, connp)) {
1663                         DTRACE_PROBE3(tx__ip__log__info__classify__udp,
1664                             char *, "connp(1) could not receive mp(2)",
1665                             conn_t *, connp, mblk_t *, mp);
1666                         connp = NULL;
1667                 }
1668 
1669                 if (connp != NULL) {
1670                         CONN_INC_REF(connp);
1671                         mutex_exit(&connfp->connf_lock);
1672                         return (connp);
1673                 }
1674 
1675                 /*
1676                  * We shouldn't come here for multicast/broadcast packets
1677                  */
1678                 mutex_exit(&connfp->connf_lock);
1679 
1680                 break;
1681 
1682         case IPPROTO_ENCAP:
1683         case IPPROTO_IPV6:
1684                 return (ipcl_iptun_classify_v4(&ipha->ipha_src,
1685                     &ipha->ipha_dst, ipst));
1686         }
1687 
1688         return (NULL);
1689 }
1690 
1691 conn_t *
1692 ipcl_classify_v6(mblk_t *mp, uint8_t protocol, uint_t hdr_len,
1693     ip_recv_attr_t *ira, ip_stack_t *ipst)
1694 {
1695         ip6_t           *ip6h;
1696         connf_t         *connfp, *bind_connfp;
1697         uint16_t        lport;
1698         uint16_t        fport;
1699         tcpha_t         *tcpha;
1700         uint32_t        ports;
1701         conn_t          *connp;
1702         uint16_t        *up;
1703         zoneid_t        zoneid = ira->ira_zoneid;
1704 
1705         ip6h = (ip6_t *)mp->b_rptr;
1706 
1707         switch (protocol) {
1708         case IPPROTO_TCP:
1709                 tcpha = (tcpha_t *)&mp->b_rptr[hdr_len];
1710                 up = &tcpha->tha_lport;
1711                 ports = *(uint32_t *)up;
1712 
1713                 connfp =
1714                     &ipst->ips_ipcl_conn_fanout[IPCL_CONN_HASH_V6(ip6h->ip6_src,
1715                     ports, ipst)];
1716                 mutex_enter(&connfp->connf_lock);
1717                 for (connp = connfp->connf_head; connp != NULL;
1718                     connp = connp->conn_next) {
1719                         if (IPCL_CONN_MATCH_V6(connp, protocol,
1720                             ip6h->ip6_src, ip6h->ip6_dst, ports) &&
1721                             (connp->conn_zoneid == zoneid ||
1722                             connp->conn_allzones ||
1723                             ((connp->conn_mac_mode != CONN_MAC_DEFAULT) &&
1724                             (ira->ira_flags & IRAF_TX_MAC_EXEMPTABLE) &&
1725                             (ira->ira_flags & IRAF_TX_SHARED_ADDR))))
1726                                 break;
1727                 }
1728 
1729                 if (connp != NULL) {
1730                         /*
1731                          * We have a fully-bound TCP connection.
1732                          *
1733                          * For labeled systems, there's no need to check the
1734                          * label here.  It's known to be good as we checked
1735                          * before allowing the connection to become bound.
1736                          */
1737                         CONN_INC_REF(connp);
1738                         mutex_exit(&connfp->connf_lock);
1739                         return (connp);
1740                 }
1741 
1742                 mutex_exit(&connfp->connf_lock);
1743 
1744                 lport = up[1];
1745                 bind_connfp =
1746                     &ipst->ips_ipcl_bind_fanout[IPCL_BIND_HASH(lport, ipst)];
1747                 mutex_enter(&bind_connfp->connf_lock);
1748                 for (connp = bind_connfp->connf_head; connp != NULL;
1749                     connp = connp->conn_next) {
1750                         if (IPCL_BIND_MATCH_V6(connp, protocol,
1751                             ip6h->ip6_dst, lport) &&
1752                             (connp->conn_zoneid == zoneid ||
1753                             connp->conn_allzones ||
1754                             ((connp->conn_mac_mode != CONN_MAC_DEFAULT) &&
1755                             (ira->ira_flags & IRAF_TX_MAC_EXEMPTABLE) &&
1756                             (ira->ira_flags & IRAF_TX_SHARED_ADDR))))
1757                                 break;
1758                 }
1759 
1760                 if (connp != NULL && (ira->ira_flags & IRAF_SYSTEM_LABELED) &&
1761                     !tsol_receive_local(mp, &ip6h->ip6_dst, IPV6_VERSION,
1762                     ira, connp)) {
1763                         DTRACE_PROBE3(tx__ip__log__info__classify__tcp6,
1764                             char *, "connp(1) could not receive mp(2)",
1765                             conn_t *, connp, mblk_t *, mp);
1766                         connp = NULL;
1767                 }
1768 
1769                 if (connp != NULL) {
1770                         /* Have a listner at least */
1771                         CONN_INC_REF(connp);
1772                         mutex_exit(&bind_connfp->connf_lock);
1773                         return (connp);
1774                 }
1775 
1776                 mutex_exit(&bind_connfp->connf_lock);
1777                 break;
1778 
1779         case IPPROTO_UDP:
1780                 up = (uint16_t *)&mp->b_rptr[hdr_len];
1781                 lport = up[1];
1782                 fport = up[0];
1783                 connfp = &ipst->ips_ipcl_udp_fanout[IPCL_UDP_HASH(lport, ipst)];
1784                 mutex_enter(&connfp->connf_lock);
1785                 for (connp = connfp->connf_head; connp != NULL;
1786                     connp = connp->conn_next) {
1787                         if (IPCL_UDP_MATCH_V6(connp, lport, ip6h->ip6_dst,
1788                             fport, ip6h->ip6_src) &&
1789                             (connp->conn_zoneid == zoneid ||
1790                             connp->conn_allzones ||
1791                             ((connp->conn_mac_mode != CONN_MAC_DEFAULT) &&
1792                             (ira->ira_flags & IRAF_TX_MAC_EXEMPTABLE) &&
1793                             (ira->ira_flags & IRAF_TX_SHARED_ADDR))))
1794                                 break;
1795                 }
1796 
1797                 if (connp != NULL && (ira->ira_flags & IRAF_SYSTEM_LABELED) &&
1798                     !tsol_receive_local(mp, &ip6h->ip6_dst, IPV6_VERSION,
1799                     ira, connp)) {
1800                         DTRACE_PROBE3(tx__ip__log__info__classify__udp6,
1801                             char *, "connp(1) could not receive mp(2)",
1802                             conn_t *, connp, mblk_t *, mp);
1803                         connp = NULL;
1804                 }
1805 
1806                 if (connp != NULL) {
1807                         CONN_INC_REF(connp);
1808                         mutex_exit(&connfp->connf_lock);
1809                         return (connp);
1810                 }
1811 
1812                 /*
1813                  * We shouldn't come here for multicast/broadcast packets
1814                  */
1815                 mutex_exit(&connfp->connf_lock);
1816                 break;
1817         case IPPROTO_ENCAP:
1818         case IPPROTO_IPV6:
1819                 return (ipcl_iptun_classify_v6(&ip6h->ip6_src,
1820                     &ip6h->ip6_dst, ipst));
1821         }
1822 
1823         return (NULL);
1824 }
1825 
1826 /*
1827  * wrapper around ipcl_classify_(v4,v6) routines.
1828  */
1829 conn_t *
1830 ipcl_classify(mblk_t *mp, ip_recv_attr_t *ira, ip_stack_t *ipst)
1831 {
1832         if (ira->ira_flags & IRAF_IS_IPV4) {
1833                 return (ipcl_classify_v4(mp, ira->ira_protocol,
1834                     ira->ira_ip_hdr_length, ira, ipst));
1835         } else {
1836                 return (ipcl_classify_v6(mp, ira->ira_protocol,
1837                     ira->ira_ip_hdr_length, ira, ipst));
1838         }
1839 }
1840 
1841 /*
1842  * Only used to classify SCTP RAW sockets
1843  */
1844 conn_t *
1845 ipcl_classify_raw(mblk_t *mp, uint8_t protocol, uint32_t ports,
1846     ipha_t *ipha, ip6_t *ip6h, ip_recv_attr_t *ira, ip_stack_t *ipst)
1847 {
1848         connf_t         *connfp;
1849         conn_t          *connp;
1850         in_port_t       lport;
1851         int             ipversion;
1852         const void      *dst;
1853         zoneid_t        zoneid = ira->ira_zoneid;
1854 
1855         lport = ((uint16_t *)&ports)[1];
1856         if (ira->ira_flags & IRAF_IS_IPV4) {
1857                 dst = (const void *)&ipha->ipha_dst;
1858                 ipversion = IPV4_VERSION;
1859         } else {
1860                 dst = (const void *)&ip6h->ip6_dst;
1861                 ipversion = IPV6_VERSION;
1862         }
1863 
1864         connfp = &ipst->ips_ipcl_raw_fanout[IPCL_RAW_HASH(ntohs(lport), ipst)];
1865         mutex_enter(&connfp->connf_lock);
1866         for (connp = connfp->connf_head; connp != NULL;
1867             connp = connp->conn_next) {
1868                 /* We don't allow v4 fallback for v6 raw socket. */
1869                 if (ipversion != connp->conn_ipversion)
1870                         continue;
1871                 if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_faddr_v6) &&
1872                     !IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_faddr_v6)) {
1873                         if (ipversion == IPV4_VERSION) {
1874                                 if (!IPCL_CONN_MATCH(connp, protocol,
1875                                     ipha->ipha_src, ipha->ipha_dst, ports))
1876                                         continue;
1877                         } else {
1878                                 if (!IPCL_CONN_MATCH_V6(connp, protocol,
1879                                     ip6h->ip6_src, ip6h->ip6_dst, ports))
1880                                         continue;
1881                         }
1882                 } else {
1883                         if (ipversion == IPV4_VERSION) {
1884                                 if (!IPCL_BIND_MATCH(connp, protocol,
1885                                     ipha->ipha_dst, lport))
1886                                         continue;
1887                         } else {
1888                                 if (!IPCL_BIND_MATCH_V6(connp, protocol,
1889                                     ip6h->ip6_dst, lport))
1890                                         continue;
1891                         }
1892                 }
1893 
1894                 if (connp->conn_zoneid == zoneid ||
1895                     connp->conn_allzones ||
1896                     ((connp->conn_mac_mode != CONN_MAC_DEFAULT) &&
1897                     (ira->ira_flags & IRAF_TX_MAC_EXEMPTABLE) &&
1898                     (ira->ira_flags & IRAF_TX_SHARED_ADDR)))
1899                         break;
1900         }
1901 
1902         if (connp != NULL && (ira->ira_flags & IRAF_SYSTEM_LABELED) &&
1903             !tsol_receive_local(mp, dst, ipversion, ira, connp)) {
1904                 DTRACE_PROBE3(tx__ip__log__info__classify__rawip,
1905                     char *, "connp(1) could not receive mp(2)",
1906                     conn_t *, connp, mblk_t *, mp);
1907                 connp = NULL;
1908         }
1909 
1910         if (connp != NULL)
1911                 goto found;
1912         mutex_exit(&connfp->connf_lock);
1913 
1914         /* Try to look for a wildcard SCTP RAW socket match. */
1915         connfp = &ipst->ips_ipcl_raw_fanout[IPCL_RAW_HASH(0, ipst)];
1916         mutex_enter(&connfp->connf_lock);
1917         for (connp = connfp->connf_head; connp != NULL;
1918             connp = connp->conn_next) {
1919                 /* We don't allow v4 fallback for v6 raw socket. */
1920                 if (ipversion != connp->conn_ipversion)
1921                         continue;
1922                 if (!IPCL_ZONE_MATCH(connp, zoneid))
1923                         continue;
1924 
1925                 if (ipversion == IPV4_VERSION) {
1926                         if (IPCL_RAW_MATCH(connp, protocol, ipha->ipha_dst))
1927                                 break;
1928                 } else {
1929                         if (IPCL_RAW_MATCH_V6(connp, protocol, ip6h->ip6_dst)) {
1930                                 break;
1931                         }
1932                 }
1933         }
1934 
1935         if (connp != NULL)
1936                 goto found;
1937 
1938         mutex_exit(&connfp->connf_lock);
1939         return (NULL);
1940 
1941 found:
1942         ASSERT(connp != NULL);
1943         CONN_INC_REF(connp);
1944         mutex_exit(&connfp->connf_lock);
1945         return (connp);
1946 }
1947 
1948 /* ARGSUSED */
1949 static int
1950 tcp_conn_constructor(void *buf, void *cdrarg, int kmflags)
1951 {
1952         itc_t   *itc = (itc_t *)buf;
1953         conn_t  *connp = &itc->itc_conn;
1954         tcp_t   *tcp = (tcp_t *)&itc[1];
1955 
1956         bzero(connp, sizeof (conn_t));
1957         bzero(tcp, sizeof (tcp_t));
1958 
1959         mutex_init(&connp->conn_lock, NULL, MUTEX_DEFAULT, NULL);
1960         cv_init(&connp->conn_cv, NULL, CV_DEFAULT, NULL);
1961         cv_init(&connp->conn_sq_cv, NULL, CV_DEFAULT, NULL);
1962         tcp->tcp_timercache = tcp_timermp_alloc(kmflags);
1963         if (tcp->tcp_timercache == NULL)
1964                 return (ENOMEM);
1965         connp->conn_tcp = tcp;
1966         connp->conn_flags = IPCL_TCPCONN;
1967         connp->conn_proto = IPPROTO_TCP;
1968         tcp->tcp_connp = connp;
1969         rw_init(&connp->conn_ilg_lock, NULL, RW_DEFAULT, NULL);
1970 
1971         connp->conn_ixa = kmem_zalloc(sizeof (ip_xmit_attr_t), kmflags);
1972         if (connp->conn_ixa == NULL) {
1973                 tcp_timermp_free(tcp);
1974                 return (ENOMEM);
1975         }
1976         connp->conn_ixa->ixa_refcnt = 1;
1977         connp->conn_ixa->ixa_protocol = connp->conn_proto;
1978         connp->conn_ixa->ixa_xmit_hint = CONN_TO_XMIT_HINT(connp);
1979         return (0);
1980 }
1981 
1982 /* ARGSUSED */
1983 static void
1984 tcp_conn_destructor(void *buf, void *cdrarg)
1985 {
1986         itc_t   *itc = (itc_t *)buf;
1987         conn_t  *connp = &itc->itc_conn;
1988         tcp_t   *tcp = (tcp_t *)&itc[1];
1989 
1990         ASSERT(connp->conn_flags & IPCL_TCPCONN);
1991         ASSERT(tcp->tcp_connp == connp);
1992         ASSERT(connp->conn_tcp == tcp);
1993         tcp_timermp_free(tcp);
1994         mutex_destroy(&connp->conn_lock);
1995         cv_destroy(&connp->conn_cv);
1996         cv_destroy(&connp->conn_sq_cv);
1997         rw_destroy(&connp->conn_ilg_lock);
1998 
1999         /* Can be NULL if constructor failed */
2000         if (connp->conn_ixa != NULL) {
2001                 ASSERT(connp->conn_ixa->ixa_refcnt == 1);
2002                 ASSERT(connp->conn_ixa->ixa_ire == NULL);
2003                 ASSERT(connp->conn_ixa->ixa_nce == NULL);
2004                 ixa_refrele(connp->conn_ixa);
2005         }
2006 }
2007 
2008 /* ARGSUSED */
2009 static int
2010 ip_conn_constructor(void *buf, void *cdrarg, int kmflags)
2011 {
2012         itc_t   *itc = (itc_t *)buf;
2013         conn_t  *connp = &itc->itc_conn;
2014 
2015         bzero(connp, sizeof (conn_t));
2016         mutex_init(&connp->conn_lock, NULL, MUTEX_DEFAULT, NULL);
2017         cv_init(&connp->conn_cv, NULL, CV_DEFAULT, NULL);
2018         connp->conn_flags = IPCL_IPCCONN;
2019         rw_init(&connp->conn_ilg_lock, NULL, RW_DEFAULT, NULL);
2020 
2021         connp->conn_ixa = kmem_zalloc(sizeof (ip_xmit_attr_t), kmflags);
2022         if (connp->conn_ixa == NULL)
2023                 return (ENOMEM);
2024         connp->conn_ixa->ixa_refcnt = 1;
2025         connp->conn_ixa->ixa_xmit_hint = CONN_TO_XMIT_HINT(connp);
2026         return (0);
2027 }
2028 
2029 /* ARGSUSED */
2030 static void
2031 ip_conn_destructor(void *buf, void *cdrarg)
2032 {
2033         itc_t   *itc = (itc_t *)buf;
2034         conn_t  *connp = &itc->itc_conn;
2035 
2036         ASSERT(connp->conn_flags & IPCL_IPCCONN);
2037         ASSERT(connp->conn_priv == NULL);
2038         mutex_destroy(&connp->conn_lock);
2039         cv_destroy(&connp->conn_cv);
2040         rw_destroy(&connp->conn_ilg_lock);
2041 
2042         /* Can be NULL if constructor failed */
2043         if (connp->conn_ixa != NULL) {
2044                 ASSERT(connp->conn_ixa->ixa_refcnt == 1);
2045                 ASSERT(connp->conn_ixa->ixa_ire == NULL);
2046                 ASSERT(connp->conn_ixa->ixa_nce == NULL);
2047                 ixa_refrele(connp->conn_ixa);
2048         }
2049 }
2050 
2051 /* ARGSUSED */
2052 static int
2053 udp_conn_constructor(void *buf, void *cdrarg, int kmflags)
2054 {
2055         itc_t   *itc = (itc_t *)buf;
2056         conn_t  *connp = &itc->itc_conn;
2057         udp_t   *udp = (udp_t *)&itc[1];
2058 
2059         bzero(connp, sizeof (conn_t));
2060         bzero(udp, sizeof (udp_t));
2061 
2062         mutex_init(&connp->conn_lock, NULL, MUTEX_DEFAULT, NULL);
2063         cv_init(&connp->conn_cv, NULL, CV_DEFAULT, NULL);
2064         connp->conn_udp = udp;
2065         connp->conn_flags = IPCL_UDPCONN;
2066         connp->conn_proto = IPPROTO_UDP;
2067         udp->udp_connp = connp;
2068         rw_init(&connp->conn_ilg_lock, NULL, RW_DEFAULT, NULL);
2069         connp->conn_ixa = kmem_zalloc(sizeof (ip_xmit_attr_t), kmflags);
2070         if (connp->conn_ixa == NULL)
2071                 return (ENOMEM);
2072         connp->conn_ixa->ixa_refcnt = 1;
2073         connp->conn_ixa->ixa_protocol = connp->conn_proto;
2074         connp->conn_ixa->ixa_xmit_hint = CONN_TO_XMIT_HINT(connp);
2075         return (0);
2076 }
2077 
2078 /* ARGSUSED */
2079 static void
2080 udp_conn_destructor(void *buf, void *cdrarg)
2081 {
2082         itc_t   *itc = (itc_t *)buf;
2083         conn_t  *connp = &itc->itc_conn;
2084         udp_t   *udp = (udp_t *)&itc[1];
2085 
2086         ASSERT(connp->conn_flags & IPCL_UDPCONN);
2087         ASSERT(udp->udp_connp == connp);
2088         ASSERT(connp->conn_udp == udp);
2089         mutex_destroy(&connp->conn_lock);
2090         cv_destroy(&connp->conn_cv);
2091         rw_destroy(&connp->conn_ilg_lock);
2092 
2093         /* Can be NULL if constructor failed */
2094         if (connp->conn_ixa != NULL) {
2095                 ASSERT(connp->conn_ixa->ixa_refcnt == 1);
2096                 ASSERT(connp->conn_ixa->ixa_ire == NULL);
2097                 ASSERT(connp->conn_ixa->ixa_nce == NULL);
2098                 ixa_refrele(connp->conn_ixa);
2099         }
2100 }
2101 
2102 /* ARGSUSED */
2103 static int
2104 rawip_conn_constructor(void *buf, void *cdrarg, int kmflags)
2105 {
2106         itc_t   *itc = (itc_t *)buf;
2107         conn_t  *connp = &itc->itc_conn;
2108         icmp_t  *icmp = (icmp_t *)&itc[1];
2109 
2110         bzero(connp, sizeof (conn_t));
2111         bzero(icmp, sizeof (icmp_t));
2112 
2113         mutex_init(&connp->conn_lock, NULL, MUTEX_DEFAULT, NULL);
2114         cv_init(&connp->conn_cv, NULL, CV_DEFAULT, NULL);
2115         connp->conn_icmp = icmp;
2116         connp->conn_flags = IPCL_RAWIPCONN;
2117         connp->conn_proto = IPPROTO_ICMP;
2118         icmp->icmp_connp = connp;
2119         rw_init(&icmp->icmp_bpf_lock, NULL, RW_DEFAULT, NULL);
2120         rw_init(&connp->conn_ilg_lock, NULL, RW_DEFAULT, NULL);
2121         connp->conn_ixa = kmem_zalloc(sizeof (ip_xmit_attr_t), kmflags);
2122         if (connp->conn_ixa == NULL)
2123                 return (ENOMEM);
2124         connp->conn_ixa->ixa_refcnt = 1;
2125         connp->conn_ixa->ixa_protocol = connp->conn_proto;
2126         connp->conn_ixa->ixa_xmit_hint = CONN_TO_XMIT_HINT(connp);
2127         return (0);
2128 }
2129 
2130 /* ARGSUSED */
2131 static void
2132 rawip_conn_destructor(void *buf, void *cdrarg)
2133 {
2134         itc_t   *itc = (itc_t *)buf;
2135         conn_t  *connp = &itc->itc_conn;
2136         icmp_t  *icmp = (icmp_t *)&itc[1];
2137 
2138         ASSERT(connp->conn_flags & IPCL_RAWIPCONN);
2139         ASSERT(icmp->icmp_connp == connp);
2140         ASSERT(connp->conn_icmp == icmp);
2141         mutex_destroy(&connp->conn_lock);
2142         cv_destroy(&connp->conn_cv);
2143         rw_destroy(&connp->conn_ilg_lock);
2144         rw_destroy(&icmp->icmp_bpf_lock);
2145 
2146         /* Can be NULL if constructor failed */
2147         if (connp->conn_ixa != NULL) {
2148                 ASSERT(connp->conn_ixa->ixa_refcnt == 1);
2149                 ASSERT(connp->conn_ixa->ixa_ire == NULL);
2150                 ASSERT(connp->conn_ixa->ixa_nce == NULL);
2151                 ixa_refrele(connp->conn_ixa);
2152         }
2153 }
2154 
2155 /* ARGSUSED */
2156 static int
2157 rts_conn_constructor(void *buf, void *cdrarg, int kmflags)
2158 {
2159         itc_t   *itc = (itc_t *)buf;
2160         conn_t  *connp = &itc->itc_conn;
2161         rts_t   *rts = (rts_t *)&itc[1];
2162 
2163         bzero(connp, sizeof (conn_t));
2164         bzero(rts, sizeof (rts_t));
2165 
2166         mutex_init(&connp->conn_lock, NULL, MUTEX_DEFAULT, NULL);
2167         cv_init(&connp->conn_cv, NULL, CV_DEFAULT, NULL);
2168         connp->conn_rts = rts;
2169         connp->conn_flags = IPCL_RTSCONN;
2170         rts->rts_connp = connp;
2171         rw_init(&connp->conn_ilg_lock, NULL, RW_DEFAULT, NULL);
2172         connp->conn_ixa = kmem_zalloc(sizeof (ip_xmit_attr_t), kmflags);
2173         if (connp->conn_ixa == NULL)
2174                 return (ENOMEM);
2175         connp->conn_ixa->ixa_refcnt = 1;
2176         connp->conn_ixa->ixa_xmit_hint = CONN_TO_XMIT_HINT(connp);
2177         return (0);
2178 }
2179 
2180 /* ARGSUSED */
2181 static void
2182 rts_conn_destructor(void *buf, void *cdrarg)
2183 {
2184         itc_t   *itc = (itc_t *)buf;
2185         conn_t  *connp = &itc->itc_conn;
2186         rts_t   *rts = (rts_t *)&itc[1];
2187 
2188         ASSERT(connp->conn_flags & IPCL_RTSCONN);
2189         ASSERT(rts->rts_connp == connp);
2190         ASSERT(connp->conn_rts == rts);
2191         mutex_destroy(&connp->conn_lock);
2192         cv_destroy(&connp->conn_cv);
2193         rw_destroy(&connp->conn_ilg_lock);
2194 
2195         /* Can be NULL if constructor failed */
2196         if (connp->conn_ixa != NULL) {
2197                 ASSERT(connp->conn_ixa->ixa_refcnt == 1);
2198                 ASSERT(connp->conn_ixa->ixa_ire == NULL);
2199                 ASSERT(connp->conn_ixa->ixa_nce == NULL);
2200                 ixa_refrele(connp->conn_ixa);
2201         }
2202 }
2203 
2204 /*
2205  * Called as part of ipcl_conn_destroy to assert and clear any pointers
2206  * in the conn_t.
2207  *
2208  * Below we list all the pointers in the conn_t as a documentation aid.
2209  * The ones that we can not ASSERT to be NULL are #ifdef'ed out.
2210  * If you add any pointers to the conn_t please add an ASSERT here
2211  * and #ifdef it out if it can't be actually asserted to be NULL.
2212  * In any case, we bzero most of the conn_t at the end of the function.
2213  */
2214 void
2215 ipcl_conn_cleanup(conn_t *connp)
2216 {
2217         ip_xmit_attr_t  *ixa;
2218 
2219         ASSERT(connp->conn_latch == NULL);
2220         ASSERT(connp->conn_latch_in_policy == NULL);
2221         ASSERT(connp->conn_latch_in_action == NULL);
2222 #ifdef notdef
2223         ASSERT(connp->conn_rq == NULL);
2224         ASSERT(connp->conn_wq == NULL);
2225 #endif
2226         ASSERT(connp->conn_cred == NULL);
2227         ASSERT(connp->conn_g_fanout == NULL);
2228         ASSERT(connp->conn_g_next == NULL);
2229         ASSERT(connp->conn_g_prev == NULL);
2230         ASSERT(connp->conn_policy == NULL);
2231         ASSERT(connp->conn_fanout == NULL);
2232         ASSERT(connp->conn_next == NULL);
2233         ASSERT(connp->conn_prev == NULL);
2234         ASSERT(connp->conn_oper_pending_ill == NULL);
2235         ASSERT(connp->conn_ilg == NULL);
2236         ASSERT(connp->conn_drain_next == NULL);
2237         ASSERT(connp->conn_drain_prev == NULL);
2238 #ifdef notdef
2239         /* conn_idl is not cleared when removed from idl list */
2240         ASSERT(connp->conn_idl == NULL);
2241 #endif
2242         ASSERT(connp->conn_ipsec_opt_mp == NULL);
2243 #ifdef notdef
2244         /* conn_netstack is cleared by the caller; needed by ixa_cleanup */
2245         ASSERT(connp->conn_netstack == NULL);
2246 #endif
2247 
2248         ASSERT(connp->conn_helper_info == NULL);
2249         ASSERT(connp->conn_ixa != NULL);
2250         ixa = connp->conn_ixa;
2251         ASSERT(ixa->ixa_refcnt == 1);
2252         /* Need to preserve ixa_protocol */
2253         ixa_cleanup(ixa);
2254         ixa->ixa_flags = 0;
2255 
2256         /* Clear out the conn_t fields that are not preserved */
2257         bzero(&connp->conn_start_clr,
2258             sizeof (conn_t) -
2259             ((uchar_t *)&connp->conn_start_clr - (uchar_t *)connp));
2260 }
2261 
2262 /*
2263  * All conns are inserted in a global multi-list for the benefit of
2264  * walkers. The walk is guaranteed to walk all open conns at the time
2265  * of the start of the walk exactly once. This property is needed to
2266  * achieve some cleanups during unplumb of interfaces. This is achieved
2267  * as follows.
2268  *
2269  * ipcl_conn_create and ipcl_conn_destroy are the only functions that
2270  * call the insert and delete functions below at creation and deletion
2271  * time respectively. The conn never moves or changes its position in this
2272  * multi-list during its lifetime. CONN_CONDEMNED ensures that the refcnt
2273  * won't increase due to walkers, once the conn deletion has started. Note
2274  * that we can't remove the conn from the global list and then wait for
2275  * the refcnt to drop to zero, since walkers would then see a truncated
2276  * list. CONN_INCIPIENT ensures that walkers don't start looking at
2277  * conns until ip_open is ready to make them globally visible.
2278  * The global round robin multi-list locks are held only to get the
2279  * next member/insertion/deletion and contention should be negligible
2280  * if the multi-list is much greater than the number of cpus.
2281  */
2282 void
2283 ipcl_globalhash_insert(conn_t *connp)
2284 {
2285         int     index;
2286         struct connf_s  *connfp;
2287         ip_stack_t      *ipst = connp->conn_netstack->netstack_ip;
2288 
2289         /*
2290          * No need for atomic here. Approximate even distribution
2291          * in the global lists is sufficient.
2292          */
2293         ipst->ips_conn_g_index++;
2294         index = ipst->ips_conn_g_index & (CONN_G_HASH_SIZE - 1);
2295 
2296         connp->conn_g_prev = NULL;
2297         /*
2298          * Mark as INCIPIENT, so that walkers will ignore this
2299          * for now, till ip_open is ready to make it visible globally.
2300          */
2301         connp->conn_state_flags |= CONN_INCIPIENT;
2302 
2303         connfp = &ipst->ips_ipcl_globalhash_fanout[index];
2304         /* Insert at the head of the list */
2305         mutex_enter(&connfp->connf_lock);
2306         connp->conn_g_next = connfp->connf_head;
2307         if (connp->conn_g_next != NULL)
2308                 connp->conn_g_next->conn_g_prev = connp;
2309         connfp->connf_head = connp;
2310 
2311         /* The fanout bucket this conn points to */
2312         connp->conn_g_fanout = connfp;
2313 
2314         mutex_exit(&connfp->connf_lock);
2315 }
2316 
2317 void
2318 ipcl_globalhash_remove(conn_t *connp)
2319 {
2320         struct connf_s  *connfp;
2321 
2322         /*
2323          * We were never inserted in the global multi list.
2324          * IPCL_NONE variety is never inserted in the global multilist
2325          * since it is presumed to not need any cleanup and is transient.
2326          */
2327         if (connp->conn_g_fanout == NULL)
2328                 return;
2329 
2330         connfp = connp->conn_g_fanout;
2331         mutex_enter(&connfp->connf_lock);
2332         if (connp->conn_g_prev != NULL)
2333                 connp->conn_g_prev->conn_g_next = connp->conn_g_next;
2334         else
2335                 connfp->connf_head = connp->conn_g_next;
2336         if (connp->conn_g_next != NULL)
2337                 connp->conn_g_next->conn_g_prev = connp->conn_g_prev;
2338         mutex_exit(&connfp->connf_lock);
2339 
2340         /* Better to stumble on a null pointer than to corrupt memory */
2341         connp->conn_g_next = NULL;
2342         connp->conn_g_prev = NULL;
2343         connp->conn_g_fanout = NULL;
2344 }
2345 
2346 /*
2347  * Walk the list of all conn_t's in the system, calling the function provided
2348  * With the specified argument for each.
2349  * Applies to both IPv4 and IPv6.
2350  *
2351  * CONNs may hold pointers to ills (conn_dhcpinit_ill and
2352  * conn_oper_pending_ill). To guard against stale pointers
2353  * ipcl_walk() is called to cleanup the conn_t's, typically when an interface is
2354  * unplumbed or removed. New conn_t's that are created while we are walking
2355  * may be missed by this walk, because they are not necessarily inserted
2356  * at the tail of the list. They are new conn_t's and thus don't have any
2357  * stale pointers. The CONN_CLOSING flag ensures that no new reference
2358  * is created to the struct that is going away.
2359  */
2360 void
2361 ipcl_walk(pfv_t func, void *arg, ip_stack_t *ipst)
2362 {
2363         int     i;
2364         conn_t  *connp;
2365         conn_t  *prev_connp;
2366 
2367         for (i = 0; i < CONN_G_HASH_SIZE; i++) {
2368                 mutex_enter(&ipst->ips_ipcl_globalhash_fanout[i].connf_lock);
2369                 prev_connp = NULL;
2370                 connp = ipst->ips_ipcl_globalhash_fanout[i].connf_head;
2371                 while (connp != NULL) {
2372                         mutex_enter(&connp->conn_lock);
2373                         if (connp->conn_state_flags &
2374                             (CONN_CONDEMNED | CONN_INCIPIENT)) {
2375                                 mutex_exit(&connp->conn_lock);
2376                                 connp = connp->conn_g_next;
2377                                 continue;
2378                         }
2379                         CONN_INC_REF_LOCKED(connp);
2380                         mutex_exit(&connp->conn_lock);
2381                         mutex_exit(
2382                             &ipst->ips_ipcl_globalhash_fanout[i].connf_lock);
2383                         (*func)(connp, arg);
2384                         if (prev_connp != NULL)
2385                                 CONN_DEC_REF(prev_connp);
2386                         mutex_enter(
2387                             &ipst->ips_ipcl_globalhash_fanout[i].connf_lock);
2388                         prev_connp = connp;
2389                         connp = connp->conn_g_next;
2390                 }
2391                 mutex_exit(&ipst->ips_ipcl_globalhash_fanout[i].connf_lock);
2392                 if (prev_connp != NULL)
2393                         CONN_DEC_REF(prev_connp);
2394         }
2395 }
2396 
2397 /*
2398  * Search for a peer TCP/IPv4 loopback conn by doing a reverse lookup on
2399  * the {src, dst, lport, fport} quadruplet.  Returns with conn reference
2400  * held; caller must call CONN_DEC_REF.  Only checks for connected entries
2401  * (peer tcp in ESTABLISHED state).
2402  */
2403 conn_t *
2404 ipcl_conn_tcp_lookup_reversed_ipv4(conn_t *connp, ipha_t *ipha, tcpha_t *tcpha,
2405     ip_stack_t *ipst)
2406 {
2407         uint32_t ports;
2408         uint16_t *pports = (uint16_t *)&ports;
2409         connf_t *connfp;
2410         conn_t  *tconnp;
2411         boolean_t zone_chk;
2412 
2413         /*
2414          * If either the source of destination address is loopback, then
2415          * both endpoints must be in the same Zone.  Otherwise, both of
2416          * the addresses are system-wide unique (tcp is in ESTABLISHED
2417          * state) and the endpoints may reside in different Zones.
2418          */
2419         zone_chk = (ipha->ipha_src == htonl(INADDR_LOOPBACK) ||
2420             ipha->ipha_dst == htonl(INADDR_LOOPBACK));
2421 
2422         pports[0] = tcpha->tha_fport;
2423         pports[1] = tcpha->tha_lport;
2424 
2425         connfp = &ipst->ips_ipcl_conn_fanout[IPCL_CONN_HASH(ipha->ipha_dst,
2426             ports, ipst)];
2427 
2428         mutex_enter(&connfp->connf_lock);
2429         for (tconnp = connfp->connf_head; tconnp != NULL;
2430             tconnp = tconnp->conn_next) {
2431 
2432                 if (IPCL_CONN_MATCH(tconnp, IPPROTO_TCP,
2433                     ipha->ipha_dst, ipha->ipha_src, ports) &&
2434                     tconnp->conn_tcp->tcp_state == TCPS_ESTABLISHED &&
2435                     (!zone_chk || tconnp->conn_zoneid == connp->conn_zoneid)) {
2436 
2437                         ASSERT(tconnp != connp);
2438                         CONN_INC_REF(tconnp);
2439                         mutex_exit(&connfp->connf_lock);
2440                         return (tconnp);
2441                 }
2442         }
2443         mutex_exit(&connfp->connf_lock);
2444         return (NULL);
2445 }
2446 
2447 /*
2448  * Search for a peer TCP/IPv6 loopback conn by doing a reverse lookup on
2449  * the {src, dst, lport, fport} quadruplet.  Returns with conn reference
2450  * held; caller must call CONN_DEC_REF.  Only checks for connected entries
2451  * (peer tcp in ESTABLISHED state).
2452  */
2453 conn_t *
2454 ipcl_conn_tcp_lookup_reversed_ipv6(conn_t *connp, ip6_t *ip6h, tcpha_t *tcpha,
2455     ip_stack_t *ipst)
2456 {
2457         uint32_t ports;
2458         uint16_t *pports = (uint16_t *)&ports;
2459         connf_t *connfp;
2460         conn_t  *tconnp;
2461         boolean_t zone_chk;
2462 
2463         /*
2464          * If either the source of destination address is loopback, then
2465          * both endpoints must be in the same Zone.  Otherwise, both of
2466          * the addresses are system-wide unique (tcp is in ESTABLISHED
2467          * state) and the endpoints may reside in different Zones.  We
2468          * don't do Zone check for link local address(es) because the
2469          * current Zone implementation treats each link local address as
2470          * being unique per system node, i.e. they belong to global Zone.
2471          */
2472         zone_chk = (IN6_IS_ADDR_LOOPBACK(&ip6h->ip6_src) ||
2473             IN6_IS_ADDR_LOOPBACK(&ip6h->ip6_dst));
2474 
2475         pports[0] = tcpha->tha_fport;
2476         pports[1] = tcpha->tha_lport;
2477 
2478         connfp = &ipst->ips_ipcl_conn_fanout[IPCL_CONN_HASH_V6(ip6h->ip6_dst,
2479             ports, ipst)];
2480 
2481         mutex_enter(&connfp->connf_lock);
2482         for (tconnp = connfp->connf_head; tconnp != NULL;
2483             tconnp = tconnp->conn_next) {
2484 
2485                 /* We skip conn_bound_if check here as this is loopback tcp */
2486                 if (IPCL_CONN_MATCH_V6(tconnp, IPPROTO_TCP,
2487                     ip6h->ip6_dst, ip6h->ip6_src, ports) &&
2488                     tconnp->conn_tcp->tcp_state == TCPS_ESTABLISHED &&
2489                     (!zone_chk || tconnp->conn_zoneid == connp->conn_zoneid)) {
2490 
2491                         ASSERT(tconnp != connp);
2492                         CONN_INC_REF(tconnp);
2493                         mutex_exit(&connfp->connf_lock);
2494                         return (tconnp);
2495                 }
2496         }
2497         mutex_exit(&connfp->connf_lock);
2498         return (NULL);
2499 }
2500 
2501 /*
2502  * Find an exact {src, dst, lport, fport} match for a bounced datagram.
2503  * Returns with conn reference held. Caller must call CONN_DEC_REF.
2504  * Only checks for connected entries i.e. no INADDR_ANY checks.
2505  */
2506 conn_t *
2507 ipcl_tcp_lookup_reversed_ipv4(ipha_t *ipha, tcpha_t *tcpha, int min_state,
2508     ip_stack_t *ipst)
2509 {
2510         uint32_t ports;
2511         uint16_t *pports;
2512         connf_t *connfp;
2513         conn_t  *tconnp;
2514 
2515         pports = (uint16_t *)&ports;
2516         pports[0] = tcpha->tha_fport;
2517         pports[1] = tcpha->tha_lport;
2518 
2519         connfp = &ipst->ips_ipcl_conn_fanout[IPCL_CONN_HASH(ipha->ipha_dst,
2520             ports, ipst)];
2521 
2522         mutex_enter(&connfp->connf_lock);
2523         for (tconnp = connfp->connf_head; tconnp != NULL;
2524             tconnp = tconnp->conn_next) {
2525 
2526                 if (IPCL_CONN_MATCH(tconnp, IPPROTO_TCP,
2527                     ipha->ipha_dst, ipha->ipha_src, ports) &&
2528                     tconnp->conn_tcp->tcp_state >= min_state) {
2529 
2530                         CONN_INC_REF(tconnp);
2531                         mutex_exit(&connfp->connf_lock);
2532                         return (tconnp);
2533                 }
2534         }
2535         mutex_exit(&connfp->connf_lock);
2536         return (NULL);
2537 }
2538 
2539 /*
2540  * Find an exact {src, dst, lport, fport} match for a bounced datagram.
2541  * Returns with conn reference held. Caller must call CONN_DEC_REF.
2542  * Only checks for connected entries i.e. no INADDR_ANY checks.
2543  * Match on ifindex in addition to addresses.
2544  */
2545 conn_t *
2546 ipcl_tcp_lookup_reversed_ipv6(ip6_t *ip6h, tcpha_t *tcpha, int min_state,
2547     uint_t ifindex, ip_stack_t *ipst)
2548 {
2549         tcp_t   *tcp;
2550         uint32_t ports;
2551         uint16_t *pports;
2552         connf_t *connfp;
2553         conn_t  *tconnp;
2554 
2555         pports = (uint16_t *)&ports;
2556         pports[0] = tcpha->tha_fport;
2557         pports[1] = tcpha->tha_lport;
2558 
2559         connfp = &ipst->ips_ipcl_conn_fanout[IPCL_CONN_HASH_V6(ip6h->ip6_dst,
2560             ports, ipst)];
2561 
2562         mutex_enter(&connfp->connf_lock);
2563         for (tconnp = connfp->connf_head; tconnp != NULL;
2564             tconnp = tconnp->conn_next) {
2565 
2566                 tcp = tconnp->conn_tcp;
2567                 if (IPCL_CONN_MATCH_V6(tconnp, IPPROTO_TCP,
2568                     ip6h->ip6_dst, ip6h->ip6_src, ports) &&
2569                     tcp->tcp_state >= min_state &&
2570                     (tconnp->conn_bound_if == 0 ||
2571                     tconnp->conn_bound_if == ifindex)) {
2572 
2573                         CONN_INC_REF(tconnp);
2574                         mutex_exit(&connfp->connf_lock);
2575                         return (tconnp);
2576                 }
2577         }
2578         mutex_exit(&connfp->connf_lock);
2579         return (NULL);
2580 }
2581 
2582 /*
2583  * Finds a TCP/IPv4 listening connection; called by tcp_disconnect to locate
2584  * a listener when changing state.
2585  */
2586 conn_t *
2587 ipcl_lookup_listener_v4(uint16_t lport, ipaddr_t laddr, zoneid_t zoneid,
2588     ip_stack_t *ipst)
2589 {
2590         connf_t         *bind_connfp;
2591         conn_t          *connp;
2592         tcp_t           *tcp;
2593 
2594         /*
2595          * Avoid false matches for packets sent to an IP destination of
2596          * all zeros.
2597          */
2598         if (laddr == 0)
2599                 return (NULL);
2600 
2601         ASSERT(zoneid != ALL_ZONES);
2602 
2603         bind_connfp = &ipst->ips_ipcl_bind_fanout[IPCL_BIND_HASH(lport, ipst)];
2604         mutex_enter(&bind_connfp->connf_lock);
2605         for (connp = bind_connfp->connf_head; connp != NULL;
2606             connp = connp->conn_next) {
2607                 tcp = connp->conn_tcp;
2608                 if (IPCL_BIND_MATCH(connp, IPPROTO_TCP, laddr, lport) &&
2609                     IPCL_ZONE_MATCH(connp, zoneid) &&
2610                     (tcp->tcp_listener == NULL)) {
2611                         CONN_INC_REF(connp);
2612                         mutex_exit(&bind_connfp->connf_lock);
2613                         return (connp);
2614                 }
2615         }
2616         mutex_exit(&bind_connfp->connf_lock);
2617         return (NULL);
2618 }
2619 
2620 /*
2621  * Finds a TCP/IPv6 listening connection; called by tcp_disconnect to locate
2622  * a listener when changing state.
2623  */
2624 conn_t *
2625 ipcl_lookup_listener_v6(uint16_t lport, in6_addr_t *laddr, uint_t ifindex,
2626     zoneid_t zoneid, ip_stack_t *ipst)
2627 {
2628         connf_t         *bind_connfp;
2629         conn_t          *connp = NULL;
2630         tcp_t           *tcp;
2631 
2632         /*
2633          * Avoid false matches for packets sent to an IP destination of
2634          * all zeros.
2635          */
2636         if (IN6_IS_ADDR_UNSPECIFIED(laddr))
2637                 return (NULL);
2638 
2639         ASSERT(zoneid != ALL_ZONES);
2640 
2641         bind_connfp = &ipst->ips_ipcl_bind_fanout[IPCL_BIND_HASH(lport, ipst)];
2642         mutex_enter(&bind_connfp->connf_lock);
2643         for (connp = bind_connfp->connf_head; connp != NULL;
2644             connp = connp->conn_next) {
2645                 tcp = connp->conn_tcp;
2646                 if (IPCL_BIND_MATCH_V6(connp, IPPROTO_TCP, *laddr, lport) &&
2647                     IPCL_ZONE_MATCH(connp, zoneid) &&
2648                     (connp->conn_bound_if == 0 ||
2649                     connp->conn_bound_if == ifindex) &&
2650                     tcp->tcp_listener == NULL) {
2651                         CONN_INC_REF(connp);
2652                         mutex_exit(&bind_connfp->connf_lock);
2653                         return (connp);
2654                 }
2655         }
2656         mutex_exit(&bind_connfp->connf_lock);
2657         return (NULL);
2658 }
2659 
2660 /*
2661  * ipcl_get_next_conn
2662  *      get the next entry in the conn global list
2663  *      and put a reference on the next_conn.
2664  *      decrement the reference on the current conn.
2665  *
2666  * This is an iterator based walker function that also provides for
2667  * some selection by the caller. It walks through the conn_hash bucket
2668  * searching for the next valid connp in the list, and selects connections
2669  * that are neither closed nor condemned. It also REFHOLDS the conn
2670  * thus ensuring that the conn exists when the caller uses the conn.
2671  */
2672 conn_t *
2673 ipcl_get_next_conn(connf_t *connfp, conn_t *connp, uint32_t conn_flags)
2674 {
2675         conn_t  *next_connp;
2676 
2677         if (connfp == NULL)
2678                 return (NULL);
2679 
2680         mutex_enter(&connfp->connf_lock);
2681 
2682         next_connp = (connp == NULL) ?
2683             connfp->connf_head : connp->conn_g_next;
2684 
2685         while (next_connp != NULL) {
2686                 mutex_enter(&next_connp->conn_lock);
2687                 if (!(next_connp->conn_flags & conn_flags) ||
2688                     (next_connp->conn_state_flags &
2689                     (CONN_CONDEMNED | CONN_INCIPIENT))) {
2690                         /*
2691                          * This conn has been condemned or
2692                          * is closing, or the flags don't match
2693                          */
2694                         mutex_exit(&next_connp->conn_lock);
2695                         next_connp = next_connp->conn_g_next;
2696                         continue;
2697                 }
2698                 CONN_INC_REF_LOCKED(next_connp);
2699                 mutex_exit(&next_connp->conn_lock);
2700                 break;
2701         }
2702 
2703         mutex_exit(&connfp->connf_lock);
2704 
2705         if (connp != NULL)
2706                 CONN_DEC_REF(connp);
2707 
2708         return (next_connp);
2709 }
2710 
2711 #ifdef CONN_DEBUG
2712 /*
2713  * Trace of the last NBUF refhold/refrele
2714  */
2715 int
2716 conn_trace_ref(conn_t *connp)
2717 {
2718         int     last;
2719         conn_trace_t    *ctb;
2720 
2721         ASSERT(MUTEX_HELD(&connp->conn_lock));
2722         last = connp->conn_trace_last;
2723         last++;
2724         if (last == CONN_TRACE_MAX)
2725                 last = 0;
2726 
2727         ctb = &connp->conn_trace_buf[last];
2728         ctb->ctb_depth = getpcstack(ctb->ctb_stack, CONN_STACK_DEPTH);
2729         connp->conn_trace_last = last;
2730         return (1);
2731 }
2732 
2733 int
2734 conn_untrace_ref(conn_t *connp)
2735 {
2736         int     last;
2737         conn_trace_t    *ctb;
2738 
2739         ASSERT(MUTEX_HELD(&connp->conn_lock));
2740         last = connp->conn_trace_last;
2741         last++;
2742         if (last == CONN_TRACE_MAX)
2743                 last = 0;
2744 
2745         ctb = &connp->conn_trace_buf[last];
2746         ctb->ctb_depth = getpcstack(ctb->ctb_stack, CONN_STACK_DEPTH);
2747         connp->conn_trace_last = last;
2748         return (1);
2749 }
2750 #endif