1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
  23  * Copyright 2016 Joyent, Inc.
  24  */
  25 
  26 /*
  27  * IP PACKET CLASSIFIER
  28  *
  29  * The IP packet classifier provides mapping between IP packets and persistent
  30  * connection state for connection-oriented protocols. It also provides
  31  * interface for managing connection states.
  32  *
  33  * The connection state is kept in conn_t data structure and contains, among
  34  * other things:
  35  *
  36  *      o local/remote address and ports
  37  *      o Transport protocol
  38  *      o squeue for the connection (for TCP only)
  39  *      o reference counter
  40  *      o Connection state
  41  *      o hash table linkage
  42  *      o interface/ire information
  43  *      o credentials
  44  *      o ipsec policy
  45  *      o send and receive functions.
  46  *      o mutex lock.
  47  *
  48  * Connections use a reference counting scheme. They are freed when the
  49  * reference counter drops to zero. A reference is incremented when connection
  50  * is placed in a list or table, when incoming packet for the connection arrives
  51  * and when connection is processed via squeue (squeue processing may be
  52  * asynchronous and the reference protects the connection from being destroyed
  53  * before its processing is finished).
  54  *
  55  * conn_recv is used to pass up packets to the ULP.
  56  * For TCP conn_recv changes. It is tcp_input_listener_unbound initially for
  57  * a listener, and changes to tcp_input_listener as the listener has picked a
  58  * good squeue. For other cases it is set to tcp_input_data.
  59  *
  60  * conn_recvicmp is used to pass up ICMP errors to the ULP.
  61  *
  62  * Classifier uses several hash tables:
  63  *
  64  *      ipcl_conn_fanout:       contains all TCP connections in CONNECTED state
  65  *      ipcl_bind_fanout:       contains all connections in BOUND state
  66  *      ipcl_proto_fanout:      IPv4 protocol fanout
  67  *      ipcl_proto_fanout_v6:   IPv6 protocol fanout
  68  *      ipcl_udp_fanout:        contains all UDP connections
  69  *      ipcl_iptun_fanout:      contains all IP tunnel connections
  70  *      ipcl_globalhash_fanout: contains all connections
  71  *
  72  * The ipcl_globalhash_fanout is used for any walkers (like snmp and Clustering)
  73  * which need to view all existing connections.
  74  *
  75  * All tables are protected by per-bucket locks. When both per-bucket lock and
  76  * connection lock need to be held, the per-bucket lock should be acquired
  77  * first, followed by the connection lock.
  78  *
  79  * All functions doing search in one of these tables increment a reference
  80  * counter on the connection found (if any). This reference should be dropped
  81  * when the caller has finished processing the connection.
  82  *
  83  *
  84  * INTERFACES:
  85  * ===========
  86  *
  87  * Connection Lookup:
  88  * ------------------
  89  *
  90  * conn_t *ipcl_classify_v4(mp, protocol, hdr_len, ira, ip_stack)
  91  * conn_t *ipcl_classify_v6(mp, protocol, hdr_len, ira, ip_stack)
  92  *
  93  * Finds connection for an incoming IPv4 or IPv6 packet. Returns NULL if
  94  * it can't find any associated connection. If the connection is found, its
  95  * reference counter is incremented.
  96  *
  97  *      mp:     mblock, containing packet header. The full header should fit
  98  *              into a single mblock. It should also contain at least full IP
  99  *              and TCP or UDP header.
 100  *
 101  *      protocol: Either IPPROTO_TCP or IPPROTO_UDP.
 102  *
 103  *      hdr_len: The size of IP header. It is used to find TCP or UDP header in
 104  *               the packet.
 105  *
 106  *      ira->ira_zoneid: The zone in which the returned connection must be; the
 107  *              zoneid corresponding to the ire_zoneid on the IRE located for
 108  *              the packet's destination address.
 109  *
 110  *      ira->ira_flags: Contains the IRAF_TX_MAC_EXEMPTABLE and
 111  *              IRAF_TX_SHARED_ADDR flags
 112  *
 113  *      For TCP connections, the lookup order is as follows:
 114  *              5-tuple {src, dst, protocol, local port, remote port}
 115  *                      lookup in ipcl_conn_fanout table.
 116  *              3-tuple {dst, remote port, protocol} lookup in
 117  *                      ipcl_bind_fanout table.
 118  *
 119  *      For UDP connections, a 5-tuple {src, dst, protocol, local port,
 120  *      remote port} lookup is done on ipcl_udp_fanout. Note that,
 121  *      these interfaces do not handle cases where a packets belongs
 122  *      to multiple UDP clients, which is handled in IP itself.
 123  *
 124  * If the destination IRE is ALL_ZONES (indicated by zoneid), then we must
 125  * determine which actual zone gets the segment.  This is used only in a
 126  * labeled environment.  The matching rules are:
 127  *
 128  *      - If it's not a multilevel port, then the label on the packet selects
 129  *        the zone.  Unlabeled packets are delivered to the global zone.
 130  *
 131  *      - If it's a multilevel port, then only the zone registered to receive
 132  *        packets on that port matches.
 133  *
 134  * Also, in a labeled environment, packet labels need to be checked.  For fully
 135  * bound TCP connections, we can assume that the packet label was checked
 136  * during connection establishment, and doesn't need to be checked on each
 137  * packet.  For others, though, we need to check for strict equality or, for
 138  * multilevel ports, membership in the range or set.  This part currently does
 139  * a tnrh lookup on each packet, but could be optimized to use cached results
 140  * if that were necessary.  (SCTP doesn't come through here, but if it did,
 141  * we would apply the same rules as TCP.)
 142  *
 143  * An implication of the above is that fully-bound TCP sockets must always use
 144  * distinct 4-tuples; they can't be discriminated by label alone.
 145  *
 146  * Note that we cannot trust labels on packets sent to fully-bound UDP sockets,
 147  * as there's no connection set-up handshake and no shared state.
 148  *
 149  * Labels on looped-back packets within a single zone do not need to be
 150  * checked, as all processes in the same zone have the same label.
 151  *
 152  * Finally, for unlabeled packets received by a labeled system, special rules
 153  * apply.  We consider only the MLP if there is one.  Otherwise, we prefer a
 154  * socket in the zone whose label matches the default label of the sender, if
 155  * any.  In any event, the receiving socket must have SO_MAC_EXEMPT set and the
 156  * receiver's label must dominate the sender's default label.
 157  *
 158  * conn_t *ipcl_tcp_lookup_reversed_ipv4(ipha_t *, tcpha_t *, int, ip_stack);
 159  * conn_t *ipcl_tcp_lookup_reversed_ipv6(ip6_t *, tcpha_t *, int, uint_t,
 160  *                                       ip_stack);
 161  *
 162  *      Lookup routine to find a exact match for {src, dst, local port,
 163  *      remote port) for TCP connections in ipcl_conn_fanout. The address and
 164  *      ports are read from the IP and TCP header respectively.
 165  *
 166  * conn_t       *ipcl_lookup_listener_v4(lport, laddr, protocol,
 167  *                                       zoneid, ip_stack);
 168  * conn_t       *ipcl_lookup_listener_v6(lport, laddr, protocol, ifindex,
 169  *                                       zoneid, ip_stack);
 170  *
 171  *      Lookup routine to find a listener with the tuple {lport, laddr,
 172  *      protocol} in the ipcl_bind_fanout table. For IPv6, an additional
 173  *      parameter interface index is also compared.
 174  *
 175  * void ipcl_walk(func, arg, ip_stack)
 176  *
 177  *      Apply 'func' to every connection available. The 'func' is called as
 178  *      (*func)(connp, arg). The walk is non-atomic so connections may be
 179  *      created and destroyed during the walk. The CONN_CONDEMNED and
 180  *      CONN_INCIPIENT flags ensure that connections which are newly created
 181  *      or being destroyed are not selected by the walker.
 182  *
 183  * Table Updates
 184  * -------------
 185  *
 186  * int ipcl_conn_insert(connp);
 187  * int ipcl_conn_insert_v4(connp);
 188  * int ipcl_conn_insert_v6(connp);
 189  *
 190  *      Insert 'connp' in the ipcl_conn_fanout.
 191  *      Arguements :
 192  *              connp           conn_t to be inserted
 193  *
 194  *      Return value :
 195  *              0               if connp was inserted
 196  *              EADDRINUSE      if the connection with the same tuple
 197  *                              already exists.
 198  *
 199  * int ipcl_bind_insert(connp);
 200  * int ipcl_bind_insert_v4(connp);
 201  * int ipcl_bind_insert_v6(connp);
 202  *
 203  *      Insert 'connp' in ipcl_bind_fanout.
 204  *      Arguements :
 205  *              connp           conn_t to be inserted
 206  *
 207  *
 208  * void ipcl_hash_remove(connp);
 209  *
 210  *      Removes the 'connp' from the connection fanout table.
 211  *
 212  * Connection Creation/Destruction
 213  * -------------------------------
 214  *
 215  * conn_t *ipcl_conn_create(type, sleep, netstack_t *)
 216  *
 217  *      Creates a new conn based on the type flag, inserts it into
 218  *      globalhash table.
 219  *
 220  *      type:   This flag determines the type of conn_t which needs to be
 221  *              created i.e., which kmem_cache it comes from.
 222  *              IPCL_TCPCONN    indicates a TCP connection
 223  *              IPCL_SCTPCONN   indicates a SCTP connection
 224  *              IPCL_UDPCONN    indicates a UDP conn_t.
 225  *              IPCL_RAWIPCONN  indicates a RAWIP/ICMP conn_t.
 226  *              IPCL_RTSCONN    indicates a RTS conn_t.
 227  *              IPCL_IPCCONN    indicates all other connections.
 228  *
 229  * void ipcl_conn_destroy(connp)
 230  *
 231  *      Destroys the connection state, removes it from the global
 232  *      connection hash table and frees its memory.
 233  */
 234 
 235 #include <sys/types.h>
 236 #include <sys/stream.h>
 237 #include <sys/stropts.h>
 238 #include <sys/sysmacros.h>
 239 #include <sys/strsubr.h>
 240 #include <sys/strsun.h>
 241 #define _SUN_TPI_VERSION 2
 242 #include <sys/ddi.h>
 243 #include <sys/cmn_err.h>
 244 #include <sys/debug.h>
 245 
 246 #include <sys/systm.h>
 247 #include <sys/param.h>
 248 #include <sys/kmem.h>
 249 #include <sys/isa_defs.h>
 250 #include <inet/common.h>
 251 #include <netinet/ip6.h>
 252 #include <netinet/icmp6.h>
 253 
 254 #include <inet/ip.h>
 255 #include <inet/ip_if.h>
 256 #include <inet/ip_ire.h>
 257 #include <inet/ip6.h>
 258 #include <inet/ip_ndp.h>
 259 #include <inet/ip_impl.h>
 260 #include <inet/udp_impl.h>
 261 #include <inet/sctp_ip.h>
 262 #include <inet/sctp/sctp_impl.h>
 263 #include <inet/rawip_impl.h>
 264 #include <inet/rts_impl.h>
 265 #include <inet/iptun/iptun_impl.h>
 266 
 267 #include <sys/cpuvar.h>
 268 
 269 #include <inet/ipclassifier.h>
 270 #include <inet/tcp.h>
 271 #include <inet/ipsec_impl.h>
 272 
 273 #include <sys/tsol/tnet.h>
 274 #include <sys/sockio.h>
 275 
 276 /* Old value for compatibility. Setable in /etc/system */
 277 uint_t tcp_conn_hash_size = 0;
 278 
 279 /* New value. Zero means choose automatically.  Setable in /etc/system */
 280 uint_t ipcl_conn_hash_size = 0;
 281 uint_t ipcl_conn_hash_memfactor = 8192;
 282 uint_t ipcl_conn_hash_maxsize = 82500;
 283 
 284 /* bind/udp fanout table size */
 285 uint_t ipcl_bind_fanout_size = 512;
 286 uint_t ipcl_udp_fanout_size = 16384;
 287 
 288 /* Raw socket fanout size.  Must be a power of 2. */
 289 uint_t ipcl_raw_fanout_size = 256;
 290 
 291 /*
 292  * The IPCL_IPTUN_HASH() function works best with a prime table size.  We
 293  * expect that most large deployments would have hundreds of tunnels, and
 294  * thousands in the extreme case.
 295  */
 296 uint_t ipcl_iptun_fanout_size = 6143;
 297 
 298 /*
 299  * Power of 2^N Primes useful for hashing for N of 0-28,
 300  * these primes are the nearest prime <= 2^N - 2^(N-2).
 301  */
 302 
 303 #define P2Ps() {0, 0, 0, 5, 11, 23, 47, 89, 191, 383, 761, 1531, 3067,  \
 304                 6143, 12281, 24571, 49139, 98299, 196597, 393209,       \
 305                 786431, 1572853, 3145721, 6291449, 12582893, 25165813,  \
 306                 50331599, 100663291, 201326557, 0}
 307 
 308 /*
 309  * wrapper structure to ensure that conn and what follows it (tcp_t, etc)
 310  * are aligned on cache lines.
 311  */
 312 typedef union itc_s {
 313         conn_t  itc_conn;
 314         char    itcu_filler[CACHE_ALIGN(conn_s)];
 315 } itc_t;
 316 
 317 struct kmem_cache  *tcp_conn_cache;
 318 struct kmem_cache  *ip_conn_cache;
 319 extern struct kmem_cache  *sctp_conn_cache;
 320 struct kmem_cache  *udp_conn_cache;
 321 struct kmem_cache  *rawip_conn_cache;
 322 struct kmem_cache  *rts_conn_cache;
 323 
 324 extern void     tcp_timermp_free(tcp_t *);
 325 extern mblk_t   *tcp_timermp_alloc(int);
 326 
 327 static int      ip_conn_constructor(void *, void *, int);
 328 static void     ip_conn_destructor(void *, void *);
 329 
 330 static int      tcp_conn_constructor(void *, void *, int);
 331 static void     tcp_conn_destructor(void *, void *);
 332 
 333 static int      udp_conn_constructor(void *, void *, int);
 334 static void     udp_conn_destructor(void *, void *);
 335 
 336 static int      rawip_conn_constructor(void *, void *, int);
 337 static void     rawip_conn_destructor(void *, void *);
 338 
 339 static int      rts_conn_constructor(void *, void *, int);
 340 static void     rts_conn_destructor(void *, void *);
 341 
 342 /*
 343  * Global (for all stack instances) init routine
 344  */
 345 void
 346 ipcl_g_init(void)
 347 {
 348         ip_conn_cache = kmem_cache_create("ip_conn_cache",
 349             sizeof (conn_t), CACHE_ALIGN_SIZE,
 350             ip_conn_constructor, ip_conn_destructor,
 351             NULL, NULL, NULL, 0);
 352 
 353         tcp_conn_cache = kmem_cache_create("tcp_conn_cache",
 354             sizeof (itc_t) + sizeof (tcp_t), CACHE_ALIGN_SIZE,
 355             tcp_conn_constructor, tcp_conn_destructor,
 356             tcp_conn_reclaim, NULL, NULL, 0);
 357 
 358         udp_conn_cache = kmem_cache_create("udp_conn_cache",
 359             sizeof (itc_t) + sizeof (udp_t), CACHE_ALIGN_SIZE,
 360             udp_conn_constructor, udp_conn_destructor,
 361             NULL, NULL, NULL, 0);
 362 
 363         rawip_conn_cache = kmem_cache_create("rawip_conn_cache",
 364             sizeof (itc_t) + sizeof (icmp_t), CACHE_ALIGN_SIZE,
 365             rawip_conn_constructor, rawip_conn_destructor,
 366             NULL, NULL, NULL, 0);
 367 
 368         rts_conn_cache = kmem_cache_create("rts_conn_cache",
 369             sizeof (itc_t) + sizeof (rts_t), CACHE_ALIGN_SIZE,
 370             rts_conn_constructor, rts_conn_destructor,
 371             NULL, NULL, NULL, 0);
 372 }
 373 
 374 /*
 375  * ipclassifier intialization routine, sets up hash tables.
 376  */
 377 void
 378 ipcl_init(ip_stack_t *ipst)
 379 {
 380         int i;
 381         int sizes[] = P2Ps();
 382 
 383         /*
 384          * Calculate size of conn fanout table from /etc/system settings
 385          */
 386         if (ipcl_conn_hash_size != 0) {
 387                 ipst->ips_ipcl_conn_fanout_size = ipcl_conn_hash_size;
 388         } else if (tcp_conn_hash_size != 0) {
 389                 ipst->ips_ipcl_conn_fanout_size = tcp_conn_hash_size;
 390         } else {
 391                 extern pgcnt_t freemem;
 392 
 393                 ipst->ips_ipcl_conn_fanout_size =
 394                     (freemem * PAGESIZE) / ipcl_conn_hash_memfactor;
 395 
 396                 if (ipst->ips_ipcl_conn_fanout_size > ipcl_conn_hash_maxsize) {
 397                         ipst->ips_ipcl_conn_fanout_size =
 398                             ipcl_conn_hash_maxsize;
 399                 }
 400         }
 401 
 402         for (i = 9; i < sizeof (sizes) / sizeof (*sizes) - 1; i++) {
 403                 if (sizes[i] >= ipst->ips_ipcl_conn_fanout_size) {
 404                         break;
 405                 }
 406         }
 407         if ((ipst->ips_ipcl_conn_fanout_size = sizes[i]) == 0) {
 408                 /* Out of range, use the 2^16 value */
 409                 ipst->ips_ipcl_conn_fanout_size = sizes[16];
 410         }
 411 
 412         /* Take values from /etc/system */
 413         ipst->ips_ipcl_bind_fanout_size = ipcl_bind_fanout_size;
 414         ipst->ips_ipcl_udp_fanout_size = ipcl_udp_fanout_size;
 415         ipst->ips_ipcl_raw_fanout_size = ipcl_raw_fanout_size;
 416         ipst->ips_ipcl_iptun_fanout_size = ipcl_iptun_fanout_size;
 417 
 418         ASSERT(ipst->ips_ipcl_conn_fanout == NULL);
 419 
 420         ipst->ips_ipcl_conn_fanout = kmem_zalloc(
 421             ipst->ips_ipcl_conn_fanout_size * sizeof (connf_t), KM_SLEEP);
 422 
 423         for (i = 0; i < ipst->ips_ipcl_conn_fanout_size; i++) {
 424                 mutex_init(&ipst->ips_ipcl_conn_fanout[i].connf_lock, NULL,
 425                     MUTEX_DEFAULT, NULL);
 426         }
 427 
 428         ipst->ips_ipcl_bind_fanout = kmem_zalloc(
 429             ipst->ips_ipcl_bind_fanout_size * sizeof (connf_t), KM_SLEEP);
 430 
 431         for (i = 0; i < ipst->ips_ipcl_bind_fanout_size; i++) {
 432                 mutex_init(&ipst->ips_ipcl_bind_fanout[i].connf_lock, NULL,
 433                     MUTEX_DEFAULT, NULL);
 434         }
 435 
 436         ipst->ips_ipcl_proto_fanout_v4 = kmem_zalloc(IPPROTO_MAX *
 437             sizeof (connf_t), KM_SLEEP);
 438         for (i = 0; i < IPPROTO_MAX; i++) {
 439                 mutex_init(&ipst->ips_ipcl_proto_fanout_v4[i].connf_lock, NULL,
 440                     MUTEX_DEFAULT, NULL);
 441         }
 442 
 443         ipst->ips_ipcl_proto_fanout_v6 = kmem_zalloc(IPPROTO_MAX *
 444             sizeof (connf_t), KM_SLEEP);
 445         for (i = 0; i < IPPROTO_MAX; i++) {
 446                 mutex_init(&ipst->ips_ipcl_proto_fanout_v6[i].connf_lock, NULL,
 447                     MUTEX_DEFAULT, NULL);
 448         }
 449 
 450         ipst->ips_rts_clients = kmem_zalloc(sizeof (connf_t), KM_SLEEP);
 451         mutex_init(&ipst->ips_rts_clients->connf_lock,
 452             NULL, MUTEX_DEFAULT, NULL);
 453 
 454         ipst->ips_ipcl_udp_fanout = kmem_zalloc(
 455             ipst->ips_ipcl_udp_fanout_size * sizeof (connf_t), KM_SLEEP);
 456         for (i = 0; i < ipst->ips_ipcl_udp_fanout_size; i++) {
 457                 mutex_init(&ipst->ips_ipcl_udp_fanout[i].connf_lock, NULL,
 458                     MUTEX_DEFAULT, NULL);
 459         }
 460 
 461         ipst->ips_ipcl_iptun_fanout = kmem_zalloc(
 462             ipst->ips_ipcl_iptun_fanout_size * sizeof (connf_t), KM_SLEEP);
 463         for (i = 0; i < ipst->ips_ipcl_iptun_fanout_size; i++) {
 464                 mutex_init(&ipst->ips_ipcl_iptun_fanout[i].connf_lock, NULL,
 465                     MUTEX_DEFAULT, NULL);
 466         }
 467 
 468         ipst->ips_ipcl_raw_fanout = kmem_zalloc(
 469             ipst->ips_ipcl_raw_fanout_size * sizeof (connf_t), KM_SLEEP);
 470         for (i = 0; i < ipst->ips_ipcl_raw_fanout_size; i++) {
 471                 mutex_init(&ipst->ips_ipcl_raw_fanout[i].connf_lock, NULL,
 472                     MUTEX_DEFAULT, NULL);
 473         }
 474 
 475         ipst->ips_ipcl_globalhash_fanout = kmem_zalloc(
 476             sizeof (connf_t) * CONN_G_HASH_SIZE, KM_SLEEP);
 477         for (i = 0; i < CONN_G_HASH_SIZE; i++) {
 478                 mutex_init(&ipst->ips_ipcl_globalhash_fanout[i].connf_lock,
 479                     NULL, MUTEX_DEFAULT, NULL);
 480         }
 481 }
 482 
 483 void
 484 ipcl_g_destroy(void)
 485 {
 486         kmem_cache_destroy(ip_conn_cache);
 487         kmem_cache_destroy(tcp_conn_cache);
 488         kmem_cache_destroy(udp_conn_cache);
 489         kmem_cache_destroy(rawip_conn_cache);
 490         kmem_cache_destroy(rts_conn_cache);
 491 }
 492 
 493 /*
 494  * All user-level and kernel use of the stack must be gone
 495  * by now.
 496  */
 497 void
 498 ipcl_destroy(ip_stack_t *ipst)
 499 {
 500         int i;
 501 
 502         for (i = 0; i < ipst->ips_ipcl_conn_fanout_size; i++) {
 503                 ASSERT(ipst->ips_ipcl_conn_fanout[i].connf_head == NULL);
 504                 mutex_destroy(&ipst->ips_ipcl_conn_fanout[i].connf_lock);
 505         }
 506         kmem_free(ipst->ips_ipcl_conn_fanout, ipst->ips_ipcl_conn_fanout_size *
 507             sizeof (connf_t));
 508         ipst->ips_ipcl_conn_fanout = NULL;
 509 
 510         for (i = 0; i < ipst->ips_ipcl_bind_fanout_size; i++) {
 511                 ASSERT(ipst->ips_ipcl_bind_fanout[i].connf_head == NULL);
 512                 mutex_destroy(&ipst->ips_ipcl_bind_fanout[i].connf_lock);
 513         }
 514         kmem_free(ipst->ips_ipcl_bind_fanout, ipst->ips_ipcl_bind_fanout_size *
 515             sizeof (connf_t));
 516         ipst->ips_ipcl_bind_fanout = NULL;
 517 
 518         for (i = 0; i < IPPROTO_MAX; i++) {
 519                 ASSERT(ipst->ips_ipcl_proto_fanout_v4[i].connf_head == NULL);
 520                 mutex_destroy(&ipst->ips_ipcl_proto_fanout_v4[i].connf_lock);
 521         }
 522         kmem_free(ipst->ips_ipcl_proto_fanout_v4,
 523             IPPROTO_MAX * sizeof (connf_t));
 524         ipst->ips_ipcl_proto_fanout_v4 = NULL;
 525 
 526         for (i = 0; i < IPPROTO_MAX; i++) {
 527                 ASSERT(ipst->ips_ipcl_proto_fanout_v6[i].connf_head == NULL);
 528                 mutex_destroy(&ipst->ips_ipcl_proto_fanout_v6[i].connf_lock);
 529         }
 530         kmem_free(ipst->ips_ipcl_proto_fanout_v6,
 531             IPPROTO_MAX * sizeof (connf_t));
 532         ipst->ips_ipcl_proto_fanout_v6 = NULL;
 533 
 534         for (i = 0; i < ipst->ips_ipcl_udp_fanout_size; i++) {
 535                 ASSERT(ipst->ips_ipcl_udp_fanout[i].connf_head == NULL);
 536                 mutex_destroy(&ipst->ips_ipcl_udp_fanout[i].connf_lock);
 537         }
 538         kmem_free(ipst->ips_ipcl_udp_fanout, ipst->ips_ipcl_udp_fanout_size *
 539             sizeof (connf_t));
 540         ipst->ips_ipcl_udp_fanout = NULL;
 541 
 542         for (i = 0; i < ipst->ips_ipcl_iptun_fanout_size; i++) {
 543                 ASSERT(ipst->ips_ipcl_iptun_fanout[i].connf_head == NULL);
 544                 mutex_destroy(&ipst->ips_ipcl_iptun_fanout[i].connf_lock);
 545         }
 546         kmem_free(ipst->ips_ipcl_iptun_fanout,
 547             ipst->ips_ipcl_iptun_fanout_size * sizeof (connf_t));
 548         ipst->ips_ipcl_iptun_fanout = NULL;
 549 
 550         for (i = 0; i < ipst->ips_ipcl_raw_fanout_size; i++) {
 551                 ASSERT(ipst->ips_ipcl_raw_fanout[i].connf_head == NULL);
 552                 mutex_destroy(&ipst->ips_ipcl_raw_fanout[i].connf_lock);
 553         }
 554         kmem_free(ipst->ips_ipcl_raw_fanout, ipst->ips_ipcl_raw_fanout_size *
 555             sizeof (connf_t));
 556         ipst->ips_ipcl_raw_fanout = NULL;
 557 
 558         for (i = 0; i < CONN_G_HASH_SIZE; i++) {
 559                 ASSERT(ipst->ips_ipcl_globalhash_fanout[i].connf_head == NULL);
 560                 mutex_destroy(&ipst->ips_ipcl_globalhash_fanout[i].connf_lock);
 561         }
 562         kmem_free(ipst->ips_ipcl_globalhash_fanout,
 563             sizeof (connf_t) * CONN_G_HASH_SIZE);
 564         ipst->ips_ipcl_globalhash_fanout = NULL;
 565 
 566         ASSERT(ipst->ips_rts_clients->connf_head == NULL);
 567         mutex_destroy(&ipst->ips_rts_clients->connf_lock);
 568         kmem_free(ipst->ips_rts_clients, sizeof (connf_t));
 569         ipst->ips_rts_clients = NULL;
 570 }
 571 
 572 /*
 573  * conn creation routine. initialize the conn, sets the reference
 574  * and inserts it in the global hash table.
 575  */
 576 conn_t *
 577 ipcl_conn_create(uint32_t type, int sleep, netstack_t *ns)
 578 {
 579         conn_t  *connp;
 580         struct kmem_cache *conn_cache;
 581 
 582         switch (type) {
 583         case IPCL_SCTPCONN:
 584                 if ((connp = kmem_cache_alloc(sctp_conn_cache, sleep)) == NULL)
 585                         return (NULL);
 586                 sctp_conn_init(connp);
 587                 netstack_hold(ns);
 588                 connp->conn_netstack = ns;
 589                 connp->conn_ixa->ixa_ipst = ns->netstack_ip;
 590                 connp->conn_ixa->ixa_conn_id = (long)connp;
 591                 ipcl_globalhash_insert(connp);
 592                 return (connp);
 593 
 594         case IPCL_TCPCONN:
 595                 conn_cache = tcp_conn_cache;
 596                 break;
 597 
 598         case IPCL_UDPCONN:
 599                 conn_cache = udp_conn_cache;
 600                 break;
 601 
 602         case IPCL_RAWIPCONN:
 603                 conn_cache = rawip_conn_cache;
 604                 break;
 605 
 606         case IPCL_RTSCONN:
 607                 conn_cache = rts_conn_cache;
 608                 break;
 609 
 610         case IPCL_IPCCONN:
 611                 conn_cache = ip_conn_cache;
 612                 break;
 613 
 614         default:
 615                 connp = NULL;
 616                 ASSERT(0);
 617         }
 618 
 619         if ((connp = kmem_cache_alloc(conn_cache, sleep)) == NULL)
 620                 return (NULL);
 621 
 622         connp->conn_ref = 1;
 623         netstack_hold(ns);
 624         connp->conn_netstack = ns;
 625         connp->conn_ixa->ixa_ipst = ns->netstack_ip;
 626         connp->conn_ixa->ixa_conn_id = (long)connp;
 627         ipcl_globalhash_insert(connp);
 628         return (connp);
 629 }
 630 
 631 void
 632 ipcl_conn_destroy(conn_t *connp)
 633 {
 634         mblk_t  *mp;
 635         netstack_t      *ns = connp->conn_netstack;
 636 
 637         ASSERT(!MUTEX_HELD(&connp->conn_lock));
 638         ASSERT(connp->conn_ref == 0);
 639         ASSERT(connp->conn_ioctlref == 0);
 640 
 641         DTRACE_PROBE1(conn__destroy, conn_t *, connp);
 642 
 643         if (connp->conn_cred != NULL) {
 644                 crfree(connp->conn_cred);
 645                 connp->conn_cred = NULL;
 646                 /* ixa_cred done in ipcl_conn_cleanup below */
 647         }
 648 
 649         if (connp->conn_ht_iphc != NULL) {
 650                 kmem_free(connp->conn_ht_iphc, connp->conn_ht_iphc_allocated);
 651                 connp->conn_ht_iphc = NULL;
 652                 connp->conn_ht_iphc_allocated = 0;
 653                 connp->conn_ht_iphc_len = 0;
 654                 connp->conn_ht_ulp = NULL;
 655                 connp->conn_ht_ulp_len = 0;
 656         }
 657         ip_pkt_free(&connp->conn_xmit_ipp);
 658 
 659         ipcl_globalhash_remove(connp);
 660 
 661         if (connp->conn_latch != NULL) {
 662                 IPLATCH_REFRELE(connp->conn_latch);
 663                 connp->conn_latch = NULL;
 664         }
 665         if (connp->conn_latch_in_policy != NULL) {
 666                 IPPOL_REFRELE(connp->conn_latch_in_policy);
 667                 connp->conn_latch_in_policy = NULL;
 668         }
 669         if (connp->conn_latch_in_action != NULL) {
 670                 IPACT_REFRELE(connp->conn_latch_in_action);
 671                 connp->conn_latch_in_action = NULL;
 672         }
 673         if (connp->conn_policy != NULL) {
 674                 IPPH_REFRELE(connp->conn_policy, ns);
 675                 connp->conn_policy = NULL;
 676         }
 677 
 678         if (connp->conn_ipsec_opt_mp != NULL) {
 679                 freemsg(connp->conn_ipsec_opt_mp);
 680                 connp->conn_ipsec_opt_mp = NULL;
 681         }
 682 
 683         if (connp->conn_flags & IPCL_TCPCONN) {
 684                 tcp_t *tcp = connp->conn_tcp;
 685 
 686                 tcp_free(tcp);
 687                 mp = tcp->tcp_timercache;
 688 
 689                 tcp->tcp_tcps = NULL;
 690 
 691                 /*
 692                  * tcp_rsrv_mp can be NULL if tcp_get_conn() fails to allocate
 693                  * the mblk.
 694                  */
 695                 if (tcp->tcp_rsrv_mp != NULL) {
 696                         freeb(tcp->tcp_rsrv_mp);
 697                         tcp->tcp_rsrv_mp = NULL;
 698                         mutex_destroy(&tcp->tcp_rsrv_mp_lock);
 699                 }
 700 
 701                 ipcl_conn_cleanup(connp);
 702                 connp->conn_flags = IPCL_TCPCONN;
 703                 if (ns != NULL) {
 704                         ASSERT(tcp->tcp_tcps == NULL);
 705                         connp->conn_netstack = NULL;
 706                         connp->conn_ixa->ixa_ipst = NULL;
 707                         netstack_rele(ns);
 708                 }
 709 
 710                 bzero(tcp, sizeof (tcp_t));
 711 
 712                 tcp->tcp_timercache = mp;
 713                 tcp->tcp_connp = connp;
 714                 kmem_cache_free(tcp_conn_cache, connp);
 715                 return;
 716         }
 717 
 718         if (connp->conn_flags & IPCL_SCTPCONN) {
 719                 ASSERT(ns != NULL);
 720                 sctp_free(connp);
 721                 return;
 722         }
 723 
 724         ipcl_conn_cleanup(connp);
 725         if (ns != NULL) {
 726                 connp->conn_netstack = NULL;
 727                 connp->conn_ixa->ixa_ipst = NULL;
 728                 netstack_rele(ns);
 729         }
 730 
 731         /* leave conn_priv aka conn_udp, conn_icmp, etc in place. */
 732         if (connp->conn_flags & IPCL_UDPCONN) {
 733                 connp->conn_flags = IPCL_UDPCONN;
 734                 kmem_cache_free(udp_conn_cache, connp);
 735         } else if (connp->conn_flags & IPCL_RAWIPCONN) {
 736                 connp->conn_flags = IPCL_RAWIPCONN;
 737                 connp->conn_proto = IPPROTO_ICMP;
 738                 connp->conn_ixa->ixa_protocol = connp->conn_proto;
 739                 kmem_cache_free(rawip_conn_cache, connp);
 740         } else if (connp->conn_flags & IPCL_RTSCONN) {
 741                 connp->conn_flags = IPCL_RTSCONN;
 742                 kmem_cache_free(rts_conn_cache, connp);
 743         } else {
 744                 connp->conn_flags = IPCL_IPCCONN;
 745                 ASSERT(connp->conn_flags & IPCL_IPCCONN);
 746                 ASSERT(connp->conn_priv == NULL);
 747                 kmem_cache_free(ip_conn_cache, connp);
 748         }
 749 }
 750 
 751 /*
 752  * Running in cluster mode - deregister listener information
 753  */
 754 static void
 755 ipcl_conn_unlisten(conn_t *connp)
 756 {
 757         ASSERT((connp->conn_flags & IPCL_CL_LISTENER) != 0);
 758         ASSERT(connp->conn_lport != 0);
 759 
 760         if (cl_inet_unlisten != NULL) {
 761                 sa_family_t     addr_family;
 762                 uint8_t         *laddrp;
 763 
 764                 if (connp->conn_ipversion == IPV6_VERSION) {
 765                         addr_family = AF_INET6;
 766                         laddrp = (uint8_t *)&connp->conn_bound_addr_v6;
 767                 } else {
 768                         addr_family = AF_INET;
 769                         laddrp = (uint8_t *)&connp->conn_bound_addr_v4;
 770                 }
 771                 (*cl_inet_unlisten)(connp->conn_netstack->netstack_stackid,
 772                     IPPROTO_TCP, addr_family, laddrp, connp->conn_lport, NULL);
 773         }
 774         connp->conn_flags &= ~IPCL_CL_LISTENER;
 775 }
 776 
 777 /*
 778  * We set the IPCL_REMOVED flag (instead of clearing the flag indicating
 779  * which table the conn belonged to). So for debugging we can see which hash
 780  * table this connection was in.
 781  */
 782 #define IPCL_HASH_REMOVE(connp) {                                       \
 783         connf_t *connfp = (connp)->conn_fanout;                              \
 784         ASSERT(!MUTEX_HELD(&((connp)->conn_lock)));                      \
 785         if (connfp != NULL) {                                           \
 786                 mutex_enter(&connfp->connf_lock);                        \
 787                 if ((connp)->conn_next != NULL)                              \
 788                         (connp)->conn_next->conn_prev =                   \
 789                             (connp)->conn_prev;                              \
 790                 if ((connp)->conn_prev != NULL)                              \
 791                         (connp)->conn_prev->conn_next =                   \
 792                             (connp)->conn_next;                              \
 793                 else                                                    \
 794                         connfp->connf_head = (connp)->conn_next;  \
 795                 (connp)->conn_fanout = NULL;                         \
 796                 (connp)->conn_next = NULL;                           \
 797                 (connp)->conn_prev = NULL;                           \
 798                 (connp)->conn_flags |= IPCL_REMOVED;                 \
 799                 if (((connp)->conn_flags & IPCL_CL_LISTENER) != 0)       \
 800                         ipcl_conn_unlisten((connp));                    \
 801                 CONN_DEC_REF((connp));                                  \
 802                 mutex_exit(&connfp->connf_lock);                 \
 803         }                                                               \
 804 }
 805 
 806 void
 807 ipcl_hash_remove(conn_t *connp)
 808 {
 809         uint8_t         protocol = connp->conn_proto;
 810 
 811         IPCL_HASH_REMOVE(connp);
 812         if (protocol == IPPROTO_RSVP)
 813                 ill_set_inputfn_all(connp->conn_netstack->netstack_ip);
 814 }
 815 
 816 /*
 817  * The whole purpose of this function is allow removal of
 818  * a conn_t from the connected hash for timewait reclaim.
 819  * This is essentially a TW reclaim fastpath where timewait
 820  * collector checks under fanout lock (so no one else can
 821  * get access to the conn_t) that refcnt is 2 i.e. one for
 822  * TCP and one for the classifier hash list. If ref count
 823  * is indeed 2, we can just remove the conn under lock and
 824  * avoid cleaning up the conn under squeue. This gives us
 825  * improved performance.
 826  */
 827 void
 828 ipcl_hash_remove_locked(conn_t *connp, connf_t  *connfp)
 829 {
 830         ASSERT(MUTEX_HELD(&connfp->connf_lock));
 831         ASSERT(MUTEX_HELD(&connp->conn_lock));
 832         ASSERT((connp->conn_flags & IPCL_CL_LISTENER) == 0);
 833 
 834         if ((connp)->conn_next != NULL) {
 835                 (connp)->conn_next->conn_prev = (connp)->conn_prev;
 836         }
 837         if ((connp)->conn_prev != NULL) {
 838                 (connp)->conn_prev->conn_next = (connp)->conn_next;
 839         } else {
 840                 connfp->connf_head = (connp)->conn_next;
 841         }
 842         (connp)->conn_fanout = NULL;
 843         (connp)->conn_next = NULL;
 844         (connp)->conn_prev = NULL;
 845         (connp)->conn_flags |= IPCL_REMOVED;
 846         ASSERT((connp)->conn_ref == 2);
 847         (connp)->conn_ref--;
 848 }
 849 
 850 #define IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp) {              \
 851         ASSERT((connp)->conn_fanout == NULL);                                \
 852         ASSERT((connp)->conn_next == NULL);                          \
 853         ASSERT((connp)->conn_prev == NULL);                          \
 854         if ((connfp)->connf_head != NULL) {                          \
 855                 (connfp)->connf_head->conn_prev = (connp);                \
 856                 (connp)->conn_next = (connfp)->connf_head;                \
 857         }                                                               \
 858         (connp)->conn_fanout = (connfp);                             \
 859         (connfp)->connf_head = (connp);                                      \
 860         (connp)->conn_flags = ((connp)->conn_flags & ~IPCL_REMOVED) | \
 861             IPCL_CONNECTED;                                             \
 862         CONN_INC_REF(connp);                                            \
 863 }
 864 
 865 #define IPCL_HASH_INSERT_CONNECTED(connfp, connp) {                     \
 866         IPCL_HASH_REMOVE((connp));                                      \
 867         mutex_enter(&(connfp)->connf_lock);                              \
 868         IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp);               \
 869         mutex_exit(&(connfp)->connf_lock);                               \
 870 }
 871 
 872 /*
 873  * When inserting bound or wildcard entries into the hash, ordering rules are
 874  * used to facilitate timely and correct lookups.  The order is as follows:
 875  * 1. Entries bound to a specific address
 876  * 2. Entries bound to INADDR_ANY
 877  * 3. Entries bound to ADDR_UNSPECIFIED
 878  * Entries in a category which share conn_lport (such as those using
 879  * SO_REUSEPORT) will be ordered such that the newest inserted is first.
 880  */
 881 
 882 void
 883 ipcl_hash_insert_bound(connf_t *connfp, conn_t *connp)
 884 {
 885         conn_t *pconnp, *nconnp;
 886 
 887         IPCL_HASH_REMOVE(connp);
 888         mutex_enter(&connfp->connf_lock);
 889         nconnp = connfp->connf_head;
 890         pconnp = NULL;
 891         while (nconnp != NULL) {
 892                 /*
 893                  * Walk though entries associated with the fanout until one is
 894                  * found which fulfills any of these conditions:
 895                  * 1. Listen address of ADDR_ANY/ADDR_UNSPECIFIED
 896                  * 2. Listen port the same as connp
 897                  */
 898                 if (_IPCL_V4_MATCH_ANY(nconnp->conn_laddr_v6) ||
 899                     connp->conn_lport == nconnp->conn_lport)
 900                         break;
 901                 pconnp = nconnp;
 902                 nconnp = nconnp->conn_next;
 903         }
 904         if (pconnp != NULL) {
 905                 pconnp->conn_next = connp;
 906                 connp->conn_prev = pconnp;
 907         } else {
 908                 connfp->connf_head = connp;
 909         }
 910         if (nconnp != NULL) {
 911                 connp->conn_next = nconnp;
 912                 nconnp->conn_prev = connp;
 913         }
 914         connp->conn_fanout = connfp;
 915         connp->conn_flags = (connp->conn_flags & ~IPCL_REMOVED) | IPCL_BOUND;
 916         CONN_INC_REF(connp);
 917         mutex_exit(&connfp->connf_lock);
 918 }
 919 
 920 void
 921 ipcl_hash_insert_wildcard(connf_t *connfp, conn_t *connp)
 922 {
 923         conn_t **list, *prev, *next;
 924         conn_t *pconnp = NULL, *nconnp;
 925         boolean_t isv4mapped = IN6_IS_ADDR_V4MAPPED(&connp->conn_laddr_v6);
 926 
 927         IPCL_HASH_REMOVE(connp);
 928         mutex_enter(&connfp->connf_lock);
 929         nconnp = connfp->connf_head;
 930         pconnp = NULL;
 931         while (nconnp != NULL) {
 932                 if (IN6_IS_ADDR_V4MAPPED_ANY(&nconnp->conn_laddr_v6) &&
 933                     isv4mapped && connp->conn_lport == nconnp->conn_lport)
 934                         break;
 935                 if (IN6_IS_ADDR_UNSPECIFIED(&nconnp->conn_laddr_v6) &&
 936                     (isv4mapped ||
 937                     connp->conn_lport == nconnp->conn_lport))
 938                         break;
 939 
 940                 pconnp = nconnp;
 941                 nconnp = nconnp->conn_next;
 942         }
 943         if (pconnp != NULL) {
 944                 pconnp->conn_next = connp;
 945                 connp->conn_prev = pconnp;
 946         } else {
 947                 connfp->connf_head = connp;
 948         }
 949         if (nconnp != NULL) {
 950                 connp->conn_next = nconnp;
 951                 nconnp->conn_prev = connp;
 952         }
 953         connp->conn_fanout = connfp;
 954         connp->conn_flags = (connp->conn_flags & ~IPCL_REMOVED) | IPCL_BOUND;
 955         CONN_INC_REF(connp);
 956         mutex_exit(&connfp->connf_lock);
 957 }
 958 
 959 /*
 960  * Because the classifier is used to classify inbound packets, the destination
 961  * address is meant to be our local tunnel address (tunnel source), and the
 962  * source the remote tunnel address (tunnel destination).
 963  *
 964  * Note that conn_proto can't be used for fanout since the upper protocol
 965  * can be both 41 and 4 when IPv6 and IPv4 are over the same tunnel.
 966  */
 967 conn_t *
 968 ipcl_iptun_classify_v4(ipaddr_t *src, ipaddr_t *dst, ip_stack_t *ipst)
 969 {
 970         connf_t *connfp;
 971         conn_t  *connp;
 972 
 973         /* first look for IPv4 tunnel links */
 974         connfp = &ipst->ips_ipcl_iptun_fanout[IPCL_IPTUN_HASH(*dst, *src)];
 975         mutex_enter(&connfp->connf_lock);
 976         for (connp = connfp->connf_head; connp != NULL;
 977             connp = connp->conn_next) {
 978                 if (IPCL_IPTUN_MATCH(connp, *dst, *src))
 979                         break;
 980         }
 981         if (connp != NULL)
 982                 goto done;
 983 
 984         mutex_exit(&connfp->connf_lock);
 985 
 986         /* We didn't find an IPv4 tunnel, try a 6to4 tunnel */
 987         connfp = &ipst->ips_ipcl_iptun_fanout[IPCL_IPTUN_HASH(*dst,
 988             INADDR_ANY)];
 989         mutex_enter(&connfp->connf_lock);
 990         for (connp = connfp->connf_head; connp != NULL;
 991             connp = connp->conn_next) {
 992                 if (IPCL_IPTUN_MATCH(connp, *dst, INADDR_ANY))
 993                         break;
 994         }
 995 done:
 996         if (connp != NULL)
 997                 CONN_INC_REF(connp);
 998         mutex_exit(&connfp->connf_lock);
 999         return (connp);
1000 }
1001 
1002 conn_t *
1003 ipcl_iptun_classify_v6(in6_addr_t *src, in6_addr_t *dst, ip_stack_t *ipst)
1004 {
1005         connf_t *connfp;
1006         conn_t  *connp;
1007 
1008         /* Look for an IPv6 tunnel link */
1009         connfp = &ipst->ips_ipcl_iptun_fanout[IPCL_IPTUN_HASH_V6(dst, src)];
1010         mutex_enter(&connfp->connf_lock);
1011         for (connp = connfp->connf_head; connp != NULL;
1012             connp = connp->conn_next) {
1013                 if (IPCL_IPTUN_MATCH_V6(connp, dst, src)) {
1014                         CONN_INC_REF(connp);
1015                         break;
1016                 }
1017         }
1018         mutex_exit(&connfp->connf_lock);
1019         return (connp);
1020 }
1021 
1022 /*
1023  * This function is used only for inserting SCTP raw socket now.
1024  * This may change later.
1025  *
1026  * Note that only one raw socket can be bound to a port.  The param
1027  * lport is in network byte order.
1028  */
1029 static int
1030 ipcl_sctp_hash_insert(conn_t *connp, in_port_t lport)
1031 {
1032         connf_t *connfp;
1033         conn_t  *oconnp;
1034         ip_stack_t      *ipst = connp->conn_netstack->netstack_ip;
1035 
1036         connfp = &ipst->ips_ipcl_raw_fanout[IPCL_RAW_HASH(ntohs(lport), ipst)];
1037 
1038         /* Check for existing raw socket already bound to the port. */
1039         mutex_enter(&connfp->connf_lock);
1040         for (oconnp = connfp->connf_head; oconnp != NULL;
1041             oconnp = oconnp->conn_next) {
1042                 if (oconnp->conn_lport == lport &&
1043                     oconnp->conn_zoneid == connp->conn_zoneid &&
1044                     oconnp->conn_family == connp->conn_family &&
1045                     ((IN6_IS_ADDR_UNSPECIFIED(&connp->conn_laddr_v6) ||
1046                     IN6_IS_ADDR_UNSPECIFIED(&oconnp->conn_laddr_v6) ||
1047                     IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_laddr_v6) ||
1048                     IN6_IS_ADDR_V4MAPPED_ANY(&oconnp->conn_laddr_v6)) ||
1049                     IN6_ARE_ADDR_EQUAL(&oconnp->conn_laddr_v6,
1050                     &connp->conn_laddr_v6))) {
1051                         break;
1052                 }
1053         }
1054         mutex_exit(&connfp->connf_lock);
1055         if (oconnp != NULL)
1056                 return (EADDRNOTAVAIL);
1057 
1058         if (IN6_IS_ADDR_UNSPECIFIED(&connp->conn_faddr_v6) ||
1059             IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_faddr_v6)) {
1060                 if (IN6_IS_ADDR_UNSPECIFIED(&connp->conn_laddr_v6) ||
1061                     IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_laddr_v6)) {
1062                         ipcl_hash_insert_wildcard(connfp, connp);
1063                 } else {
1064                         ipcl_hash_insert_bound(connfp, connp);
1065                 }
1066         } else {
1067                 IPCL_HASH_INSERT_CONNECTED(connfp, connp);
1068         }
1069         return (0);
1070 }
1071 
1072 static int
1073 ipcl_iptun_hash_insert(conn_t *connp, ip_stack_t *ipst)
1074 {
1075         connf_t *connfp;
1076         conn_t  *tconnp;
1077         ipaddr_t laddr = connp->conn_laddr_v4;
1078         ipaddr_t faddr = connp->conn_faddr_v4;
1079 
1080         connfp = &ipst->ips_ipcl_iptun_fanout[IPCL_IPTUN_HASH(laddr, faddr)];
1081         mutex_enter(&connfp->connf_lock);
1082         for (tconnp = connfp->connf_head; tconnp != NULL;
1083             tconnp = tconnp->conn_next) {
1084                 if (IPCL_IPTUN_MATCH(tconnp, laddr, faddr)) {
1085                         /* A tunnel is already bound to these addresses. */
1086                         mutex_exit(&connfp->connf_lock);
1087                         return (EADDRINUSE);
1088                 }
1089         }
1090         IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp);
1091         mutex_exit(&connfp->connf_lock);
1092         return (0);
1093 }
1094 
1095 static int
1096 ipcl_iptun_hash_insert_v6(conn_t *connp, ip_stack_t *ipst)
1097 {
1098         connf_t *connfp;
1099         conn_t  *tconnp;
1100         in6_addr_t *laddr = &connp->conn_laddr_v6;
1101         in6_addr_t *faddr = &connp->conn_faddr_v6;
1102 
1103         connfp = &ipst->ips_ipcl_iptun_fanout[IPCL_IPTUN_HASH_V6(laddr, faddr)];
1104         mutex_enter(&connfp->connf_lock);
1105         for (tconnp = connfp->connf_head; tconnp != NULL;
1106             tconnp = tconnp->conn_next) {
1107                 if (IPCL_IPTUN_MATCH_V6(tconnp, laddr, faddr)) {
1108                         /* A tunnel is already bound to these addresses. */
1109                         mutex_exit(&connfp->connf_lock);
1110                         return (EADDRINUSE);
1111                 }
1112         }
1113         IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp);
1114         mutex_exit(&connfp->connf_lock);
1115         return (0);
1116 }
1117 
1118 /*
1119  * Check for a MAC exemption conflict on a labeled system.  Note that for
1120  * protocols that use port numbers (UDP, TCP, SCTP), we do this check up in the
1121  * transport layer.  This check is for binding all other protocols.
1122  *
1123  * Returns true if there's a conflict.
1124  */
1125 static boolean_t
1126 check_exempt_conflict_v4(conn_t *connp, ip_stack_t *ipst)
1127 {
1128         connf_t *connfp;
1129         conn_t *tconn;
1130 
1131         connfp = &ipst->ips_ipcl_proto_fanout_v4[connp->conn_proto];
1132         mutex_enter(&connfp->connf_lock);
1133         for (tconn = connfp->connf_head; tconn != NULL;
1134             tconn = tconn->conn_next) {
1135                 /* We don't allow v4 fallback for v6 raw socket */
1136                 if (connp->conn_family != tconn->conn_family)
1137                         continue;
1138                 /* If neither is exempt, then there's no conflict */
1139                 if ((connp->conn_mac_mode == CONN_MAC_DEFAULT) &&
1140                     (tconn->conn_mac_mode == CONN_MAC_DEFAULT))
1141                         continue;
1142                 /* We are only concerned about sockets for a different zone */
1143                 if (connp->conn_zoneid == tconn->conn_zoneid)
1144                         continue;
1145                 /* If both are bound to different specific addrs, ok */
1146                 if (connp->conn_laddr_v4 != INADDR_ANY &&
1147                     tconn->conn_laddr_v4 != INADDR_ANY &&
1148                     connp->conn_laddr_v4 != tconn->conn_laddr_v4)
1149                         continue;
1150                 /* These two conflict; fail */
1151                 break;
1152         }
1153         mutex_exit(&connfp->connf_lock);
1154         return (tconn != NULL);
1155 }
1156 
1157 static boolean_t
1158 check_exempt_conflict_v6(conn_t *connp, ip_stack_t *ipst)
1159 {
1160         connf_t *connfp;
1161         conn_t *tconn;
1162 
1163         connfp = &ipst->ips_ipcl_proto_fanout_v6[connp->conn_proto];
1164         mutex_enter(&connfp->connf_lock);
1165         for (tconn = connfp->connf_head; tconn != NULL;
1166             tconn = tconn->conn_next) {
1167                 /* We don't allow v4 fallback for v6 raw socket */
1168                 if (connp->conn_family != tconn->conn_family)
1169                         continue;
1170                 /* If neither is exempt, then there's no conflict */
1171                 if ((connp->conn_mac_mode == CONN_MAC_DEFAULT) &&
1172                     (tconn->conn_mac_mode == CONN_MAC_DEFAULT))
1173                         continue;
1174                 /* We are only concerned about sockets for a different zone */
1175                 if (connp->conn_zoneid == tconn->conn_zoneid)
1176                         continue;
1177                 /* If both are bound to different addrs, ok */
1178                 if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_laddr_v6) &&
1179                     !IN6_IS_ADDR_UNSPECIFIED(&tconn->conn_laddr_v6) &&
1180                     !IN6_ARE_ADDR_EQUAL(&connp->conn_laddr_v6,
1181                     &tconn->conn_laddr_v6))
1182                         continue;
1183                 /* These two conflict; fail */
1184                 break;
1185         }
1186         mutex_exit(&connfp->connf_lock);
1187         return (tconn != NULL);
1188 }
1189 
1190 /*
1191  * (v4, v6) bind hash insertion routines
1192  * The caller has already setup the conn (conn_proto, conn_laddr_v6, conn_lport)
1193  */
1194 
1195 int
1196 ipcl_bind_insert(conn_t *connp)
1197 {
1198         if (connp->conn_ipversion == IPV6_VERSION)
1199                 return (ipcl_bind_insert_v6(connp));
1200         else
1201                 return (ipcl_bind_insert_v4(connp));
1202 }
1203 
1204 int
1205 ipcl_bind_insert_v4(conn_t *connp)
1206 {
1207         connf_t *connfp;
1208         int     ret = 0;
1209         ip_stack_t      *ipst = connp->conn_netstack->netstack_ip;
1210         uint16_t        lport = connp->conn_lport;
1211         uint8_t         protocol = connp->conn_proto;
1212 
1213         if (IPCL_IS_IPTUN(connp))
1214                 return (ipcl_iptun_hash_insert(connp, ipst));
1215 
1216         switch (protocol) {
1217         default:
1218                 if (is_system_labeled() &&
1219                     check_exempt_conflict_v4(connp, ipst))
1220                         return (EADDRINUSE);
1221                 /* FALLTHROUGH */
1222         case IPPROTO_UDP:
1223                 if (protocol == IPPROTO_UDP) {
1224                         connfp = &ipst->ips_ipcl_udp_fanout[
1225                             IPCL_UDP_HASH(lport, ipst)];
1226                 } else {
1227                         connfp = &ipst->ips_ipcl_proto_fanout_v4[protocol];
1228                 }
1229 
1230                 if (connp->conn_faddr_v4 != INADDR_ANY) {
1231                         IPCL_HASH_INSERT_CONNECTED(connfp, connp);
1232                 } else if (connp->conn_laddr_v4 != INADDR_ANY) {
1233                         ipcl_hash_insert_bound(connfp, connp);
1234                 } else {
1235                         ipcl_hash_insert_wildcard(connfp, connp);
1236                 }
1237                 if (protocol == IPPROTO_RSVP)
1238                         ill_set_inputfn_all(ipst);
1239                 break;
1240 
1241         case IPPROTO_TCP:
1242                 /* Insert it in the Bind Hash */
1243                 ASSERT(connp->conn_zoneid != ALL_ZONES);
1244                 connfp = &ipst->ips_ipcl_bind_fanout[
1245                     IPCL_BIND_HASH(lport, ipst)];
1246                 if (connp->conn_laddr_v4 != INADDR_ANY) {
1247                         ipcl_hash_insert_bound(connfp, connp);
1248                 } else {
1249                         ipcl_hash_insert_wildcard(connfp, connp);
1250                 }
1251                 if (cl_inet_listen != NULL) {
1252                         ASSERT(connp->conn_ipversion == IPV4_VERSION);
1253                         connp->conn_flags |= IPCL_CL_LISTENER;
1254                         (*cl_inet_listen)(
1255                             connp->conn_netstack->netstack_stackid,
1256                             IPPROTO_TCP, AF_INET,
1257                             (uint8_t *)&connp->conn_bound_addr_v4, lport, NULL);
1258                 }
1259                 break;
1260 
1261         case IPPROTO_SCTP:
1262                 ret = ipcl_sctp_hash_insert(connp, lport);
1263                 break;
1264         }
1265 
1266         return (ret);
1267 }
1268 
1269 int
1270 ipcl_bind_insert_v6(conn_t *connp)
1271 {
1272         connf_t         *connfp;
1273         int             ret = 0;
1274         ip_stack_t      *ipst = connp->conn_netstack->netstack_ip;
1275         uint16_t        lport = connp->conn_lport;
1276         uint8_t         protocol = connp->conn_proto;
1277 
1278         if (IPCL_IS_IPTUN(connp)) {
1279                 return (ipcl_iptun_hash_insert_v6(connp, ipst));
1280         }
1281 
1282         switch (protocol) {
1283         default:
1284                 if (is_system_labeled() &&
1285                     check_exempt_conflict_v6(connp, ipst))
1286                         return (EADDRINUSE);
1287                 /* FALLTHROUGH */
1288         case IPPROTO_UDP:
1289                 if (protocol == IPPROTO_UDP) {
1290                         connfp = &ipst->ips_ipcl_udp_fanout[
1291                             IPCL_UDP_HASH(lport, ipst)];
1292                 } else {
1293                         connfp = &ipst->ips_ipcl_proto_fanout_v6[protocol];
1294                 }
1295 
1296                 if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_faddr_v6)) {
1297                         IPCL_HASH_INSERT_CONNECTED(connfp, connp);
1298                 } else if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_laddr_v6)) {
1299                         ipcl_hash_insert_bound(connfp, connp);
1300                 } else {
1301                         ipcl_hash_insert_wildcard(connfp, connp);
1302                 }
1303                 break;
1304 
1305         case IPPROTO_TCP:
1306                 /* Insert it in the Bind Hash */
1307                 ASSERT(connp->conn_zoneid != ALL_ZONES);
1308                 connfp = &ipst->ips_ipcl_bind_fanout[
1309                     IPCL_BIND_HASH(lport, ipst)];
1310                 if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_laddr_v6)) {
1311                         ipcl_hash_insert_bound(connfp, connp);
1312                 } else {
1313                         ipcl_hash_insert_wildcard(connfp, connp);
1314                 }
1315                 if (cl_inet_listen != NULL) {
1316                         sa_family_t     addr_family;
1317                         uint8_t         *laddrp;
1318 
1319                         if (connp->conn_ipversion == IPV6_VERSION) {
1320                                 addr_family = AF_INET6;
1321                                 laddrp =
1322                                     (uint8_t *)&connp->conn_bound_addr_v6;
1323                         } else {
1324                                 addr_family = AF_INET;
1325                                 laddrp = (uint8_t *)&connp->conn_bound_addr_v4;
1326                         }
1327                         connp->conn_flags |= IPCL_CL_LISTENER;
1328                         (*cl_inet_listen)(
1329                             connp->conn_netstack->netstack_stackid,
1330                             IPPROTO_TCP, addr_family, laddrp, lport, NULL);
1331                 }
1332                 break;
1333 
1334         case IPPROTO_SCTP:
1335                 ret = ipcl_sctp_hash_insert(connp, lport);
1336                 break;
1337         }
1338 
1339         return (ret);
1340 }
1341 
1342 /*
1343  * ipcl_conn_hash insertion routines.
1344  * The caller has already set conn_proto and the addresses/ports in the conn_t.
1345  */
1346 
1347 int
1348 ipcl_conn_insert(conn_t *connp)
1349 {
1350         if (connp->conn_ipversion == IPV6_VERSION)
1351                 return (ipcl_conn_insert_v6(connp));
1352         else
1353                 return (ipcl_conn_insert_v4(connp));
1354 }
1355 
1356 int
1357 ipcl_conn_insert_v4(conn_t *connp)
1358 {
1359         connf_t         *connfp;
1360         conn_t          *tconnp;
1361         int             ret = 0;
1362         ip_stack_t      *ipst = connp->conn_netstack->netstack_ip;
1363         uint16_t        lport = connp->conn_lport;
1364         uint8_t         protocol = connp->conn_proto;
1365 
1366         if (IPCL_IS_IPTUN(connp))
1367                 return (ipcl_iptun_hash_insert(connp, ipst));
1368 
1369         switch (protocol) {
1370         case IPPROTO_TCP:
1371                 /*
1372                  * For TCP, we check whether the connection tuple already
1373                  * exists before allowing the connection to proceed.  We
1374                  * also allow indexing on the zoneid. This is to allow
1375                  * multiple shared stack zones to have the same tcp
1376                  * connection tuple. In practice this only happens for
1377                  * INADDR_LOOPBACK as it's the only local address which
1378                  * doesn't have to be unique.
1379                  */
1380                 connfp = &ipst->ips_ipcl_conn_fanout[
1381                     IPCL_CONN_HASH(connp->conn_faddr_v4,
1382                     connp->conn_ports, ipst)];
1383                 mutex_enter(&connfp->connf_lock);
1384                 for (tconnp = connfp->connf_head; tconnp != NULL;
1385                     tconnp = tconnp->conn_next) {
1386                         if (IPCL_CONN_MATCH(tconnp, connp->conn_proto,
1387                             connp->conn_faddr_v4, connp->conn_laddr_v4,
1388                             connp->conn_ports) &&
1389                             IPCL_ZONE_MATCH(tconnp, connp->conn_zoneid)) {
1390                                 /* Already have a conn. bail out */
1391                                 mutex_exit(&connfp->connf_lock);
1392                                 return (EADDRINUSE);
1393                         }
1394                 }
1395                 if (connp->conn_fanout != NULL) {
1396                         /*
1397                          * Probably a XTI/TLI application trying to do a
1398                          * rebind. Let it happen.
1399                          */
1400                         mutex_exit(&connfp->connf_lock);
1401                         IPCL_HASH_REMOVE(connp);
1402                         mutex_enter(&connfp->connf_lock);
1403                 }
1404 
1405                 ASSERT(connp->conn_recv != NULL);
1406                 ASSERT(connp->conn_recvicmp != NULL);
1407 
1408                 IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp);
1409                 mutex_exit(&connfp->connf_lock);
1410                 break;
1411 
1412         case IPPROTO_SCTP:
1413                 /*
1414                  * The raw socket may have already been bound, remove it
1415                  * from the hash first.
1416                  */
1417                 IPCL_HASH_REMOVE(connp);
1418                 ret = ipcl_sctp_hash_insert(connp, lport);
1419                 break;
1420 
1421         default:
1422                 /*
1423                  * Check for conflicts among MAC exempt bindings.  For
1424                  * transports with port numbers, this is done by the upper
1425                  * level per-transport binding logic.  For all others, it's
1426                  * done here.
1427                  */
1428                 if (is_system_labeled() &&
1429                     check_exempt_conflict_v4(connp, ipst))
1430                         return (EADDRINUSE);
1431                 /* FALLTHROUGH */
1432 
1433         case IPPROTO_UDP:
1434                 if (protocol == IPPROTO_UDP) {
1435                         connfp = &ipst->ips_ipcl_udp_fanout[
1436                             IPCL_UDP_HASH(lport, ipst)];
1437                 } else {
1438                         connfp = &ipst->ips_ipcl_proto_fanout_v4[protocol];
1439                 }
1440 
1441                 if (connp->conn_faddr_v4 != INADDR_ANY) {
1442                         IPCL_HASH_INSERT_CONNECTED(connfp, connp);
1443                 } else if (connp->conn_laddr_v4 != INADDR_ANY) {
1444                         ipcl_hash_insert_bound(connfp, connp);
1445                 } else {
1446                         ipcl_hash_insert_wildcard(connfp, connp);
1447                 }
1448                 break;
1449         }
1450 
1451         return (ret);
1452 }
1453 
1454 int
1455 ipcl_conn_insert_v6(conn_t *connp)
1456 {
1457         connf_t         *connfp;
1458         conn_t          *tconnp;
1459         int             ret = 0;
1460         ip_stack_t      *ipst = connp->conn_netstack->netstack_ip;
1461         uint16_t        lport = connp->conn_lport;
1462         uint8_t         protocol = connp->conn_proto;
1463         uint_t          ifindex = connp->conn_bound_if;
1464 
1465         if (IPCL_IS_IPTUN(connp))
1466                 return (ipcl_iptun_hash_insert_v6(connp, ipst));
1467 
1468         switch (protocol) {
1469         case IPPROTO_TCP:
1470 
1471                 /*
1472                  * For tcp, we check whether the connection tuple already
1473                  * exists before allowing the connection to proceed.  We
1474                  * also allow indexing on the zoneid. This is to allow
1475                  * multiple shared stack zones to have the same tcp
1476                  * connection tuple. In practice this only happens for
1477                  * ipv6_loopback as it's the only local address which
1478                  * doesn't have to be unique.
1479                  */
1480                 connfp = &ipst->ips_ipcl_conn_fanout[
1481                     IPCL_CONN_HASH_V6(connp->conn_faddr_v6, connp->conn_ports,
1482                     ipst)];
1483                 mutex_enter(&connfp->connf_lock);
1484                 for (tconnp = connfp->connf_head; tconnp != NULL;
1485                     tconnp = tconnp->conn_next) {
1486                         /* NOTE: need to match zoneid. Bug in onnv-gate */
1487                         if (IPCL_CONN_MATCH_V6(tconnp, connp->conn_proto,
1488                             connp->conn_faddr_v6, connp->conn_laddr_v6,
1489                             connp->conn_ports) &&
1490                             (tconnp->conn_bound_if == 0 ||
1491                             tconnp->conn_bound_if == ifindex) &&
1492                             IPCL_ZONE_MATCH(tconnp, connp->conn_zoneid)) {
1493                                 /* Already have a conn. bail out */
1494                                 mutex_exit(&connfp->connf_lock);
1495                                 return (EADDRINUSE);
1496                         }
1497                 }
1498                 if (connp->conn_fanout != NULL) {
1499                         /*
1500                          * Probably a XTI/TLI application trying to do a
1501                          * rebind. Let it happen.
1502                          */
1503                         mutex_exit(&connfp->connf_lock);
1504                         IPCL_HASH_REMOVE(connp);
1505                         mutex_enter(&connfp->connf_lock);
1506                 }
1507                 IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp);
1508                 mutex_exit(&connfp->connf_lock);
1509                 break;
1510 
1511         case IPPROTO_SCTP:
1512                 IPCL_HASH_REMOVE(connp);
1513                 ret = ipcl_sctp_hash_insert(connp, lport);
1514                 break;
1515 
1516         default:
1517                 if (is_system_labeled() &&
1518                     check_exempt_conflict_v6(connp, ipst))
1519                         return (EADDRINUSE);
1520                 /* FALLTHROUGH */
1521         case IPPROTO_UDP:
1522                 if (protocol == IPPROTO_UDP) {
1523                         connfp = &ipst->ips_ipcl_udp_fanout[
1524                             IPCL_UDP_HASH(lport, ipst)];
1525                 } else {
1526                         connfp = &ipst->ips_ipcl_proto_fanout_v6[protocol];
1527                 }
1528 
1529                 if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_faddr_v6)) {
1530                         IPCL_HASH_INSERT_CONNECTED(connfp, connp);
1531                 } else if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_laddr_v6)) {
1532                         ipcl_hash_insert_bound(connfp, connp);
1533                 } else {
1534                         ipcl_hash_insert_wildcard(connfp, connp);
1535                 }
1536                 break;
1537         }
1538 
1539         return (ret);
1540 }
1541 
1542 /*
1543  * v4 packet classifying function. looks up the fanout table to
1544  * find the conn, the packet belongs to. returns the conn with
1545  * the reference held, null otherwise.
1546  *
1547  * If zoneid is ALL_ZONES, then the search rules described in the "Connection
1548  * Lookup" comment block are applied.  Labels are also checked as described
1549  * above.  If the packet is from the inside (looped back), and is from the same
1550  * zone, then label checks are omitted.
1551  */
1552 conn_t *
1553 ipcl_classify_v4(mblk_t *mp, uint8_t protocol, uint_t hdr_len,
1554     ip_recv_attr_t *ira, ip_stack_t *ipst)
1555 {
1556         ipha_t  *ipha;
1557         connf_t *connfp, *bind_connfp;
1558         uint16_t lport;
1559         uint16_t fport;
1560         uint32_t ports;
1561         conn_t  *connp;
1562         uint16_t  *up;
1563         zoneid_t        zoneid = ira->ira_zoneid;
1564 
1565         ipha = (ipha_t *)mp->b_rptr;
1566         up = (uint16_t *)((uchar_t *)ipha + hdr_len + TCP_PORTS_OFFSET);
1567 
1568         switch (protocol) {
1569         case IPPROTO_TCP:
1570                 ports = *(uint32_t *)up;
1571                 connfp =
1572                     &ipst->ips_ipcl_conn_fanout[IPCL_CONN_HASH(ipha->ipha_src,
1573                     ports, ipst)];
1574                 mutex_enter(&connfp->connf_lock);
1575                 for (connp = connfp->connf_head; connp != NULL;
1576                     connp = connp->conn_next) {
1577                         if (IPCL_CONN_MATCH(connp, protocol,
1578                             ipha->ipha_src, ipha->ipha_dst, ports) &&
1579                             (connp->conn_zoneid == zoneid ||
1580                             connp->conn_allzones ||
1581                             ((connp->conn_mac_mode != CONN_MAC_DEFAULT) &&
1582                             (ira->ira_flags & IRAF_TX_MAC_EXEMPTABLE) &&
1583                             (ira->ira_flags & IRAF_TX_SHARED_ADDR))))
1584                                 break;
1585                 }
1586 
1587                 if (connp != NULL) {
1588                         /*
1589                          * We have a fully-bound TCP connection.
1590                          *
1591                          * For labeled systems, there's no need to check the
1592                          * label here.  It's known to be good as we checked
1593                          * before allowing the connection to become bound.
1594                          */
1595                         CONN_INC_REF(connp);
1596                         mutex_exit(&connfp->connf_lock);
1597                         return (connp);
1598                 }
1599 
1600                 mutex_exit(&connfp->connf_lock);
1601                 lport = up[1];
1602                 bind_connfp =
1603                     &ipst->ips_ipcl_bind_fanout[IPCL_BIND_HASH(lport, ipst)];
1604                 mutex_enter(&bind_connfp->connf_lock);
1605                 for (connp = bind_connfp->connf_head; connp != NULL;
1606                     connp = connp->conn_next) {
1607                         if (IPCL_BIND_MATCH(connp, protocol, ipha->ipha_dst,
1608                             lport) &&
1609                             (connp->conn_zoneid == zoneid ||
1610                             connp->conn_allzones ||
1611                             ((connp->conn_mac_mode != CONN_MAC_DEFAULT) &&
1612                             (ira->ira_flags & IRAF_TX_MAC_EXEMPTABLE) &&
1613                             (ira->ira_flags & IRAF_TX_SHARED_ADDR))))
1614                                 break;
1615                 }
1616 
1617                 /*
1618                  * If the matching connection is SLP on a private address, then
1619                  * the label on the packet must match the local zone's label.
1620                  * Otherwise, it must be in the label range defined by tnrh.
1621                  * This is ensured by tsol_receive_local.
1622                  *
1623                  * Note that we don't check tsol_receive_local for
1624                  * the connected case.
1625                  */
1626                 if (connp != NULL && (ira->ira_flags & IRAF_SYSTEM_LABELED) &&
1627                     !tsol_receive_local(mp, &ipha->ipha_dst, IPV4_VERSION,
1628                     ira, connp)) {
1629                         DTRACE_PROBE3(tx__ip__log__info__classify__tcp,
1630                             char *, "connp(1) could not receive mp(2)",
1631                             conn_t *, connp, mblk_t *, mp);
1632                         connp = NULL;
1633                 }
1634 
1635                 if (connp != NULL) {
1636                         /* Have a listener at least */
1637                         CONN_INC_REF(connp);
1638                         mutex_exit(&bind_connfp->connf_lock);
1639                         return (connp);
1640                 }
1641 
1642                 mutex_exit(&bind_connfp->connf_lock);
1643                 break;
1644 
1645         case IPPROTO_UDP:
1646                 lport = up[1];
1647                 fport = up[0];
1648                 connfp = &ipst->ips_ipcl_udp_fanout[IPCL_UDP_HASH(lport, ipst)];
1649                 mutex_enter(&connfp->connf_lock);
1650                 for (connp = connfp->connf_head; connp != NULL;
1651                     connp = connp->conn_next) {
1652                         if (IPCL_UDP_MATCH(connp, lport, ipha->ipha_dst,
1653                             fport, ipha->ipha_src) &&
1654                             (connp->conn_zoneid == zoneid ||
1655                             connp->conn_allzones ||
1656                             ((connp->conn_mac_mode != CONN_MAC_DEFAULT) &&
1657                             (ira->ira_flags & IRAF_TX_MAC_EXEMPTABLE))))
1658                                 break;
1659                 }
1660 
1661                 if (connp != NULL && (ira->ira_flags & IRAF_SYSTEM_LABELED) &&
1662                     !tsol_receive_local(mp, &ipha->ipha_dst, IPV4_VERSION,
1663                     ira, connp)) {
1664                         DTRACE_PROBE3(tx__ip__log__info__classify__udp,
1665                             char *, "connp(1) could not receive mp(2)",
1666                             conn_t *, connp, mblk_t *, mp);
1667                         connp = NULL;
1668                 }
1669 
1670                 if (connp != NULL) {
1671                         CONN_INC_REF(connp);
1672                         mutex_exit(&connfp->connf_lock);
1673                         return (connp);
1674                 }
1675 
1676                 /*
1677                  * We shouldn't come here for multicast/broadcast packets
1678                  */
1679                 mutex_exit(&connfp->connf_lock);
1680 
1681                 break;
1682 
1683         case IPPROTO_ENCAP:
1684         case IPPROTO_IPV6:
1685                 return (ipcl_iptun_classify_v4(&ipha->ipha_src,
1686                     &ipha->ipha_dst, ipst));
1687         }
1688 
1689         return (NULL);
1690 }
1691 
1692 conn_t *
1693 ipcl_classify_v6(mblk_t *mp, uint8_t protocol, uint_t hdr_len,
1694     ip_recv_attr_t *ira, ip_stack_t *ipst)
1695 {
1696         ip6_t           *ip6h;
1697         connf_t         *connfp, *bind_connfp;
1698         uint16_t        lport;
1699         uint16_t        fport;
1700         tcpha_t         *tcpha;
1701         uint32_t        ports;
1702         conn_t          *connp;
1703         uint16_t        *up;
1704         zoneid_t        zoneid = ira->ira_zoneid;
1705 
1706         ip6h = (ip6_t *)mp->b_rptr;
1707 
1708         switch (protocol) {
1709         case IPPROTO_TCP:
1710                 tcpha = (tcpha_t *)&mp->b_rptr[hdr_len];
1711                 up = &tcpha->tha_lport;
1712                 ports = *(uint32_t *)up;
1713 
1714                 connfp =
1715                     &ipst->ips_ipcl_conn_fanout[IPCL_CONN_HASH_V6(ip6h->ip6_src,
1716                     ports, ipst)];
1717                 mutex_enter(&connfp->connf_lock);
1718                 for (connp = connfp->connf_head; connp != NULL;
1719                     connp = connp->conn_next) {
1720                         if (IPCL_CONN_MATCH_V6(connp, protocol,
1721                             ip6h->ip6_src, ip6h->ip6_dst, ports) &&
1722                             (connp->conn_zoneid == zoneid ||
1723                             connp->conn_allzones ||
1724                             ((connp->conn_mac_mode != CONN_MAC_DEFAULT) &&
1725                             (ira->ira_flags & IRAF_TX_MAC_EXEMPTABLE) &&
1726                             (ira->ira_flags & IRAF_TX_SHARED_ADDR))))
1727                                 break;
1728                 }
1729 
1730                 if (connp != NULL) {
1731                         /*
1732                          * We have a fully-bound TCP connection.
1733                          *
1734                          * For labeled systems, there's no need to check the
1735                          * label here.  It's known to be good as we checked
1736                          * before allowing the connection to become bound.
1737                          */
1738                         CONN_INC_REF(connp);
1739                         mutex_exit(&connfp->connf_lock);
1740                         return (connp);
1741                 }
1742 
1743                 mutex_exit(&connfp->connf_lock);
1744 
1745                 lport = up[1];
1746                 bind_connfp =
1747                     &ipst->ips_ipcl_bind_fanout[IPCL_BIND_HASH(lport, ipst)];
1748                 mutex_enter(&bind_connfp->connf_lock);
1749                 for (connp = bind_connfp->connf_head; connp != NULL;
1750                     connp = connp->conn_next) {
1751                         if (IPCL_BIND_MATCH_V6(connp, protocol,
1752                             ip6h->ip6_dst, lport) &&
1753                             (connp->conn_zoneid == zoneid ||
1754                             connp->conn_allzones ||
1755                             ((connp->conn_mac_mode != CONN_MAC_DEFAULT) &&
1756                             (ira->ira_flags & IRAF_TX_MAC_EXEMPTABLE) &&
1757                             (ira->ira_flags & IRAF_TX_SHARED_ADDR))))
1758                                 break;
1759                 }
1760 
1761                 if (connp != NULL && (ira->ira_flags & IRAF_SYSTEM_LABELED) &&
1762                     !tsol_receive_local(mp, &ip6h->ip6_dst, IPV6_VERSION,
1763                     ira, connp)) {
1764                         DTRACE_PROBE3(tx__ip__log__info__classify__tcp6,
1765                             char *, "connp(1) could not receive mp(2)",
1766                             conn_t *, connp, mblk_t *, mp);
1767                         connp = NULL;
1768                 }
1769 
1770                 if (connp != NULL) {
1771                         /* Have a listner at least */
1772                         CONN_INC_REF(connp);
1773                         mutex_exit(&bind_connfp->connf_lock);
1774                         return (connp);
1775                 }
1776 
1777                 mutex_exit(&bind_connfp->connf_lock);
1778                 break;
1779 
1780         case IPPROTO_UDP:
1781                 up = (uint16_t *)&mp->b_rptr[hdr_len];
1782                 lport = up[1];
1783                 fport = up[0];
1784                 connfp = &ipst->ips_ipcl_udp_fanout[IPCL_UDP_HASH(lport, ipst)];
1785                 mutex_enter(&connfp->connf_lock);
1786                 for (connp = connfp->connf_head; connp != NULL;
1787                     connp = connp->conn_next) {
1788                         if (IPCL_UDP_MATCH_V6(connp, lport, ip6h->ip6_dst,
1789                             fport, ip6h->ip6_src) &&
1790                             (connp->conn_zoneid == zoneid ||
1791                             connp->conn_allzones ||
1792                             ((connp->conn_mac_mode != CONN_MAC_DEFAULT) &&
1793                             (ira->ira_flags & IRAF_TX_MAC_EXEMPTABLE) &&
1794                             (ira->ira_flags & IRAF_TX_SHARED_ADDR))))
1795                                 break;
1796                 }
1797 
1798                 if (connp != NULL && (ira->ira_flags & IRAF_SYSTEM_LABELED) &&
1799                     !tsol_receive_local(mp, &ip6h->ip6_dst, IPV6_VERSION,
1800                     ira, connp)) {
1801                         DTRACE_PROBE3(tx__ip__log__info__classify__udp6,
1802                             char *, "connp(1) could not receive mp(2)",
1803                             conn_t *, connp, mblk_t *, mp);
1804                         connp = NULL;
1805                 }
1806 
1807                 if (connp != NULL) {
1808                         CONN_INC_REF(connp);
1809                         mutex_exit(&connfp->connf_lock);
1810                         return (connp);
1811                 }
1812 
1813                 /*
1814                  * We shouldn't come here for multicast/broadcast packets
1815                  */
1816                 mutex_exit(&connfp->connf_lock);
1817                 break;
1818         case IPPROTO_ENCAP:
1819         case IPPROTO_IPV6:
1820                 return (ipcl_iptun_classify_v6(&ip6h->ip6_src,
1821                     &ip6h->ip6_dst, ipst));
1822         }
1823 
1824         return (NULL);
1825 }
1826 
1827 /*
1828  * wrapper around ipcl_classify_(v4,v6) routines.
1829  */
1830 conn_t *
1831 ipcl_classify(mblk_t *mp, ip_recv_attr_t *ira, ip_stack_t *ipst)
1832 {
1833         if (ira->ira_flags & IRAF_IS_IPV4) {
1834                 return (ipcl_classify_v4(mp, ira->ira_protocol,
1835                     ira->ira_ip_hdr_length, ira, ipst));
1836         } else {
1837                 return (ipcl_classify_v6(mp, ira->ira_protocol,
1838                     ira->ira_ip_hdr_length, ira, ipst));
1839         }
1840 }
1841 
1842 /*
1843  * Only used to classify SCTP RAW sockets
1844  */
1845 conn_t *
1846 ipcl_classify_raw(mblk_t *mp, uint8_t protocol, uint32_t ports,
1847     ipha_t *ipha, ip6_t *ip6h, ip_recv_attr_t *ira, ip_stack_t *ipst)
1848 {
1849         connf_t         *connfp;
1850         conn_t          *connp;
1851         in_port_t       lport;
1852         int             ipversion;
1853         const void      *dst;
1854         zoneid_t        zoneid = ira->ira_zoneid;
1855 
1856         lport = ((uint16_t *)&ports)[1];
1857         if (ira->ira_flags & IRAF_IS_IPV4) {
1858                 dst = (const void *)&ipha->ipha_dst;
1859                 ipversion = IPV4_VERSION;
1860         } else {
1861                 dst = (const void *)&ip6h->ip6_dst;
1862                 ipversion = IPV6_VERSION;
1863         }
1864 
1865         connfp = &ipst->ips_ipcl_raw_fanout[IPCL_RAW_HASH(ntohs(lport), ipst)];
1866         mutex_enter(&connfp->connf_lock);
1867         for (connp = connfp->connf_head; connp != NULL;
1868             connp = connp->conn_next) {
1869                 /* We don't allow v4 fallback for v6 raw socket. */
1870                 if (ipversion != connp->conn_ipversion)
1871                         continue;
1872                 if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_faddr_v6) &&
1873                     !IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_faddr_v6)) {
1874                         if (ipversion == IPV4_VERSION) {
1875                                 if (!IPCL_CONN_MATCH(connp, protocol,
1876                                     ipha->ipha_src, ipha->ipha_dst, ports))
1877                                         continue;
1878                         } else {
1879                                 if (!IPCL_CONN_MATCH_V6(connp, protocol,
1880                                     ip6h->ip6_src, ip6h->ip6_dst, ports))
1881                                         continue;
1882                         }
1883                 } else {
1884                         if (ipversion == IPV4_VERSION) {
1885                                 if (!IPCL_BIND_MATCH(connp, protocol,
1886                                     ipha->ipha_dst, lport))
1887                                         continue;
1888                         } else {
1889                                 if (!IPCL_BIND_MATCH_V6(connp, protocol,
1890                                     ip6h->ip6_dst, lport))
1891                                         continue;
1892                         }
1893                 }
1894 
1895                 if (connp->conn_zoneid == zoneid ||
1896                     connp->conn_allzones ||
1897                     ((connp->conn_mac_mode != CONN_MAC_DEFAULT) &&
1898                     (ira->ira_flags & IRAF_TX_MAC_EXEMPTABLE) &&
1899                     (ira->ira_flags & IRAF_TX_SHARED_ADDR)))
1900                         break;
1901         }
1902 
1903         if (connp != NULL && (ira->ira_flags & IRAF_SYSTEM_LABELED) &&
1904             !tsol_receive_local(mp, dst, ipversion, ira, connp)) {
1905                 DTRACE_PROBE3(tx__ip__log__info__classify__rawip,
1906                     char *, "connp(1) could not receive mp(2)",
1907                     conn_t *, connp, mblk_t *, mp);
1908                 connp = NULL;
1909         }
1910 
1911         if (connp != NULL)
1912                 goto found;
1913         mutex_exit(&connfp->connf_lock);
1914 
1915         /* Try to look for a wildcard SCTP RAW socket match. */
1916         connfp = &ipst->ips_ipcl_raw_fanout[IPCL_RAW_HASH(0, ipst)];
1917         mutex_enter(&connfp->connf_lock);
1918         for (connp = connfp->connf_head; connp != NULL;
1919             connp = connp->conn_next) {
1920                 /* We don't allow v4 fallback for v6 raw socket. */
1921                 if (ipversion != connp->conn_ipversion)
1922                         continue;
1923                 if (!IPCL_ZONE_MATCH(connp, zoneid))
1924                         continue;
1925 
1926                 if (ipversion == IPV4_VERSION) {
1927                         if (IPCL_RAW_MATCH(connp, protocol, ipha->ipha_dst))
1928                                 break;
1929                 } else {
1930                         if (IPCL_RAW_MATCH_V6(connp, protocol, ip6h->ip6_dst)) {
1931                                 break;
1932                         }
1933                 }
1934         }
1935 
1936         if (connp != NULL)
1937                 goto found;
1938 
1939         mutex_exit(&connfp->connf_lock);
1940         return (NULL);
1941 
1942 found:
1943         ASSERT(connp != NULL);
1944         CONN_INC_REF(connp);
1945         mutex_exit(&connfp->connf_lock);
1946         return (connp);
1947 }
1948 
1949 /* ARGSUSED */
1950 static int
1951 tcp_conn_constructor(void *buf, void *cdrarg, int kmflags)
1952 {
1953         itc_t   *itc = (itc_t *)buf;
1954         conn_t  *connp = &itc->itc_conn;
1955         tcp_t   *tcp = (tcp_t *)&itc[1];
1956 
1957         bzero(connp, sizeof (conn_t));
1958         bzero(tcp, sizeof (tcp_t));
1959 
1960         mutex_init(&connp->conn_lock, NULL, MUTEX_DEFAULT, NULL);
1961         cv_init(&connp->conn_cv, NULL, CV_DEFAULT, NULL);
1962         cv_init(&connp->conn_sq_cv, NULL, CV_DEFAULT, NULL);
1963         tcp->tcp_timercache = tcp_timermp_alloc(kmflags);
1964         if (tcp->tcp_timercache == NULL)
1965                 return (ENOMEM);
1966         connp->conn_tcp = tcp;
1967         connp->conn_flags = IPCL_TCPCONN;
1968         connp->conn_proto = IPPROTO_TCP;
1969         tcp->tcp_connp = connp;
1970         rw_init(&connp->conn_ilg_lock, NULL, RW_DEFAULT, NULL);
1971 
1972         connp->conn_ixa = kmem_zalloc(sizeof (ip_xmit_attr_t), kmflags);
1973         if (connp->conn_ixa == NULL) {
1974                 tcp_timermp_free(tcp);
1975                 return (ENOMEM);
1976         }
1977         connp->conn_ixa->ixa_refcnt = 1;
1978         connp->conn_ixa->ixa_protocol = connp->conn_proto;
1979         connp->conn_ixa->ixa_xmit_hint = CONN_TO_XMIT_HINT(connp);
1980         return (0);
1981 }
1982 
1983 /* ARGSUSED */
1984 static void
1985 tcp_conn_destructor(void *buf, void *cdrarg)
1986 {
1987         itc_t   *itc = (itc_t *)buf;
1988         conn_t  *connp = &itc->itc_conn;
1989         tcp_t   *tcp = (tcp_t *)&itc[1];
1990 
1991         ASSERT(connp->conn_flags & IPCL_TCPCONN);
1992         ASSERT(tcp->tcp_connp == connp);
1993         ASSERT(connp->conn_tcp == tcp);
1994         tcp_timermp_free(tcp);
1995         mutex_destroy(&connp->conn_lock);
1996         cv_destroy(&connp->conn_cv);
1997         cv_destroy(&connp->conn_sq_cv);
1998         rw_destroy(&connp->conn_ilg_lock);
1999 
2000         /* Can be NULL if constructor failed */
2001         if (connp->conn_ixa != NULL) {
2002                 ASSERT(connp->conn_ixa->ixa_refcnt == 1);
2003                 ASSERT(connp->conn_ixa->ixa_ire == NULL);
2004                 ASSERT(connp->conn_ixa->ixa_nce == NULL);
2005                 ixa_refrele(connp->conn_ixa);
2006         }
2007 }
2008 
2009 /* ARGSUSED */
2010 static int
2011 ip_conn_constructor(void *buf, void *cdrarg, int kmflags)
2012 {
2013         itc_t   *itc = (itc_t *)buf;
2014         conn_t  *connp = &itc->itc_conn;
2015 
2016         bzero(connp, sizeof (conn_t));
2017         mutex_init(&connp->conn_lock, NULL, MUTEX_DEFAULT, NULL);
2018         cv_init(&connp->conn_cv, NULL, CV_DEFAULT, NULL);
2019         connp->conn_flags = IPCL_IPCCONN;
2020         rw_init(&connp->conn_ilg_lock, NULL, RW_DEFAULT, NULL);
2021 
2022         connp->conn_ixa = kmem_zalloc(sizeof (ip_xmit_attr_t), kmflags);
2023         if (connp->conn_ixa == NULL)
2024                 return (ENOMEM);
2025         connp->conn_ixa->ixa_refcnt = 1;
2026         connp->conn_ixa->ixa_xmit_hint = CONN_TO_XMIT_HINT(connp);
2027         return (0);
2028 }
2029 
2030 /* ARGSUSED */
2031 static void
2032 ip_conn_destructor(void *buf, void *cdrarg)
2033 {
2034         itc_t   *itc = (itc_t *)buf;
2035         conn_t  *connp = &itc->itc_conn;
2036 
2037         ASSERT(connp->conn_flags & IPCL_IPCCONN);
2038         ASSERT(connp->conn_priv == NULL);
2039         mutex_destroy(&connp->conn_lock);
2040         cv_destroy(&connp->conn_cv);
2041         rw_destroy(&connp->conn_ilg_lock);
2042 
2043         /* Can be NULL if constructor failed */
2044         if (connp->conn_ixa != NULL) {
2045                 ASSERT(connp->conn_ixa->ixa_refcnt == 1);
2046                 ASSERT(connp->conn_ixa->ixa_ire == NULL);
2047                 ASSERT(connp->conn_ixa->ixa_nce == NULL);
2048                 ixa_refrele(connp->conn_ixa);
2049         }
2050 }
2051 
2052 /* ARGSUSED */
2053 static int
2054 udp_conn_constructor(void *buf, void *cdrarg, int kmflags)
2055 {
2056         itc_t   *itc = (itc_t *)buf;
2057         conn_t  *connp = &itc->itc_conn;
2058         udp_t   *udp = (udp_t *)&itc[1];
2059 
2060         bzero(connp, sizeof (conn_t));
2061         bzero(udp, sizeof (udp_t));
2062 
2063         mutex_init(&connp->conn_lock, NULL, MUTEX_DEFAULT, NULL);
2064         cv_init(&connp->conn_cv, NULL, CV_DEFAULT, NULL);
2065         connp->conn_udp = udp;
2066         connp->conn_flags = IPCL_UDPCONN;
2067         connp->conn_proto = IPPROTO_UDP;
2068         udp->udp_connp = connp;
2069         rw_init(&connp->conn_ilg_lock, NULL, RW_DEFAULT, NULL);
2070         connp->conn_ixa = kmem_zalloc(sizeof (ip_xmit_attr_t), kmflags);
2071         if (connp->conn_ixa == NULL)
2072                 return (ENOMEM);
2073         connp->conn_ixa->ixa_refcnt = 1;
2074         connp->conn_ixa->ixa_protocol = connp->conn_proto;
2075         connp->conn_ixa->ixa_xmit_hint = CONN_TO_XMIT_HINT(connp);
2076         return (0);
2077 }
2078 
2079 /* ARGSUSED */
2080 static void
2081 udp_conn_destructor(void *buf, void *cdrarg)
2082 {
2083         itc_t   *itc = (itc_t *)buf;
2084         conn_t  *connp = &itc->itc_conn;
2085         udp_t   *udp = (udp_t *)&itc[1];
2086 
2087         ASSERT(connp->conn_flags & IPCL_UDPCONN);
2088         ASSERT(udp->udp_connp == connp);
2089         ASSERT(connp->conn_udp == udp);
2090         mutex_destroy(&connp->conn_lock);
2091         cv_destroy(&connp->conn_cv);
2092         rw_destroy(&connp->conn_ilg_lock);
2093 
2094         /* Can be NULL if constructor failed */
2095         if (connp->conn_ixa != NULL) {
2096                 ASSERT(connp->conn_ixa->ixa_refcnt == 1);
2097                 ASSERT(connp->conn_ixa->ixa_ire == NULL);
2098                 ASSERT(connp->conn_ixa->ixa_nce == NULL);
2099                 ixa_refrele(connp->conn_ixa);
2100         }
2101 }
2102 
2103 /* ARGSUSED */
2104 static int
2105 rawip_conn_constructor(void *buf, void *cdrarg, int kmflags)
2106 {
2107         itc_t   *itc = (itc_t *)buf;
2108         conn_t  *connp = &itc->itc_conn;
2109         icmp_t  *icmp = (icmp_t *)&itc[1];
2110 
2111         bzero(connp, sizeof (conn_t));
2112         bzero(icmp, sizeof (icmp_t));
2113 
2114         mutex_init(&connp->conn_lock, NULL, MUTEX_DEFAULT, NULL);
2115         cv_init(&connp->conn_cv, NULL, CV_DEFAULT, NULL);
2116         connp->conn_icmp = icmp;
2117         connp->conn_flags = IPCL_RAWIPCONN;
2118         connp->conn_proto = IPPROTO_ICMP;
2119         icmp->icmp_connp = connp;
2120         rw_init(&icmp->icmp_bpf_lock, NULL, RW_DEFAULT, NULL);
2121         rw_init(&connp->conn_ilg_lock, NULL, RW_DEFAULT, NULL);
2122         connp->conn_ixa = kmem_zalloc(sizeof (ip_xmit_attr_t), kmflags);
2123         if (connp->conn_ixa == NULL)
2124                 return (ENOMEM);
2125         connp->conn_ixa->ixa_refcnt = 1;
2126         connp->conn_ixa->ixa_protocol = connp->conn_proto;
2127         connp->conn_ixa->ixa_xmit_hint = CONN_TO_XMIT_HINT(connp);
2128         return (0);
2129 }
2130 
2131 /* ARGSUSED */
2132 static void
2133 rawip_conn_destructor(void *buf, void *cdrarg)
2134 {
2135         itc_t   *itc = (itc_t *)buf;
2136         conn_t  *connp = &itc->itc_conn;
2137         icmp_t  *icmp = (icmp_t *)&itc[1];
2138 
2139         ASSERT(connp->conn_flags & IPCL_RAWIPCONN);
2140         ASSERT(icmp->icmp_connp == connp);
2141         ASSERT(connp->conn_icmp == icmp);
2142         mutex_destroy(&connp->conn_lock);
2143         cv_destroy(&connp->conn_cv);
2144         rw_destroy(&connp->conn_ilg_lock);
2145         rw_destroy(&icmp->icmp_bpf_lock);
2146 
2147         /* Can be NULL if constructor failed */
2148         if (connp->conn_ixa != NULL) {
2149                 ASSERT(connp->conn_ixa->ixa_refcnt == 1);
2150                 ASSERT(connp->conn_ixa->ixa_ire == NULL);
2151                 ASSERT(connp->conn_ixa->ixa_nce == NULL);
2152                 ixa_refrele(connp->conn_ixa);
2153         }
2154 }
2155 
2156 /* ARGSUSED */
2157 static int
2158 rts_conn_constructor(void *buf, void *cdrarg, int kmflags)
2159 {
2160         itc_t   *itc = (itc_t *)buf;
2161         conn_t  *connp = &itc->itc_conn;
2162         rts_t   *rts = (rts_t *)&itc[1];
2163 
2164         bzero(connp, sizeof (conn_t));
2165         bzero(rts, sizeof (rts_t));
2166 
2167         mutex_init(&connp->conn_lock, NULL, MUTEX_DEFAULT, NULL);
2168         cv_init(&connp->conn_cv, NULL, CV_DEFAULT, NULL);
2169         connp->conn_rts = rts;
2170         connp->conn_flags = IPCL_RTSCONN;
2171         rts->rts_connp = connp;
2172         rw_init(&connp->conn_ilg_lock, NULL, RW_DEFAULT, NULL);
2173         connp->conn_ixa = kmem_zalloc(sizeof (ip_xmit_attr_t), kmflags);
2174         if (connp->conn_ixa == NULL)
2175                 return (ENOMEM);
2176         connp->conn_ixa->ixa_refcnt = 1;
2177         connp->conn_ixa->ixa_xmit_hint = CONN_TO_XMIT_HINT(connp);
2178         return (0);
2179 }
2180 
2181 /* ARGSUSED */
2182 static void
2183 rts_conn_destructor(void *buf, void *cdrarg)
2184 {
2185         itc_t   *itc = (itc_t *)buf;
2186         conn_t  *connp = &itc->itc_conn;
2187         rts_t   *rts = (rts_t *)&itc[1];
2188 
2189         ASSERT(connp->conn_flags & IPCL_RTSCONN);
2190         ASSERT(rts->rts_connp == connp);
2191         ASSERT(connp->conn_rts == rts);
2192         mutex_destroy(&connp->conn_lock);
2193         cv_destroy(&connp->conn_cv);
2194         rw_destroy(&connp->conn_ilg_lock);
2195 
2196         /* Can be NULL if constructor failed */
2197         if (connp->conn_ixa != NULL) {
2198                 ASSERT(connp->conn_ixa->ixa_refcnt == 1);
2199                 ASSERT(connp->conn_ixa->ixa_ire == NULL);
2200                 ASSERT(connp->conn_ixa->ixa_nce == NULL);
2201                 ixa_refrele(connp->conn_ixa);
2202         }
2203 }
2204 
2205 /*
2206  * Called as part of ipcl_conn_destroy to assert and clear any pointers
2207  * in the conn_t.
2208  *
2209  * Below we list all the pointers in the conn_t as a documentation aid.
2210  * The ones that we can not ASSERT to be NULL are #ifdef'ed out.
2211  * If you add any pointers to the conn_t please add an ASSERT here
2212  * and #ifdef it out if it can't be actually asserted to be NULL.
2213  * In any case, we bzero most of the conn_t at the end of the function.
2214  */
2215 void
2216 ipcl_conn_cleanup(conn_t *connp)
2217 {
2218         ip_xmit_attr_t  *ixa;
2219 
2220         ASSERT(connp->conn_latch == NULL);
2221         ASSERT(connp->conn_latch_in_policy == NULL);
2222         ASSERT(connp->conn_latch_in_action == NULL);
2223 #ifdef notdef
2224         ASSERT(connp->conn_rq == NULL);
2225         ASSERT(connp->conn_wq == NULL);
2226 #endif
2227         ASSERT(connp->conn_cred == NULL);
2228         ASSERT(connp->conn_g_fanout == NULL);
2229         ASSERT(connp->conn_g_next == NULL);
2230         ASSERT(connp->conn_g_prev == NULL);
2231         ASSERT(connp->conn_policy == NULL);
2232         ASSERT(connp->conn_fanout == NULL);
2233         ASSERT(connp->conn_next == NULL);
2234         ASSERT(connp->conn_prev == NULL);
2235         ASSERT(connp->conn_oper_pending_ill == NULL);
2236         ASSERT(connp->conn_ilg == NULL);
2237         ASSERT(connp->conn_drain_next == NULL);
2238         ASSERT(connp->conn_drain_prev == NULL);
2239 #ifdef notdef
2240         /* conn_idl is not cleared when removed from idl list */
2241         ASSERT(connp->conn_idl == NULL);
2242 #endif
2243         ASSERT(connp->conn_ipsec_opt_mp == NULL);
2244 #ifdef notdef
2245         /* conn_netstack is cleared by the caller; needed by ixa_cleanup */
2246         ASSERT(connp->conn_netstack == NULL);
2247 #endif
2248 
2249         ASSERT(connp->conn_helper_info == NULL);
2250         ASSERT(connp->conn_ixa != NULL);
2251         ixa = connp->conn_ixa;
2252         ASSERT(ixa->ixa_refcnt == 1);
2253         /* Need to preserve ixa_protocol */
2254         ixa_cleanup(ixa);
2255         ixa->ixa_flags = 0;
2256 
2257         /* Clear out the conn_t fields that are not preserved */
2258         bzero(&connp->conn_start_clr,
2259             sizeof (conn_t) -
2260             ((uchar_t *)&connp->conn_start_clr - (uchar_t *)connp));
2261 }
2262 
2263 /*
2264  * All conns are inserted in a global multi-list for the benefit of
2265  * walkers. The walk is guaranteed to walk all open conns at the time
2266  * of the start of the walk exactly once. This property is needed to
2267  * achieve some cleanups during unplumb of interfaces. This is achieved
2268  * as follows.
2269  *
2270  * ipcl_conn_create and ipcl_conn_destroy are the only functions that
2271  * call the insert and delete functions below at creation and deletion
2272  * time respectively. The conn never moves or changes its position in this
2273  * multi-list during its lifetime. CONN_CONDEMNED ensures that the refcnt
2274  * won't increase due to walkers, once the conn deletion has started. Note
2275  * that we can't remove the conn from the global list and then wait for
2276  * the refcnt to drop to zero, since walkers would then see a truncated
2277  * list. CONN_INCIPIENT ensures that walkers don't start looking at
2278  * conns until ip_open is ready to make them globally visible.
2279  * The global round robin multi-list locks are held only to get the
2280  * next member/insertion/deletion and contention should be negligible
2281  * if the multi-list is much greater than the number of cpus.
2282  */
2283 void
2284 ipcl_globalhash_insert(conn_t *connp)
2285 {
2286         int     index;
2287         struct connf_s  *connfp;
2288         ip_stack_t      *ipst = connp->conn_netstack->netstack_ip;
2289 
2290         /*
2291          * No need for atomic here. Approximate even distribution
2292          * in the global lists is sufficient.
2293          */
2294         ipst->ips_conn_g_index++;
2295         index = ipst->ips_conn_g_index & (CONN_G_HASH_SIZE - 1);
2296 
2297         connp->conn_g_prev = NULL;
2298         /*
2299          * Mark as INCIPIENT, so that walkers will ignore this
2300          * for now, till ip_open is ready to make it visible globally.
2301          */
2302         connp->conn_state_flags |= CONN_INCIPIENT;
2303 
2304         connfp = &ipst->ips_ipcl_globalhash_fanout[index];
2305         /* Insert at the head of the list */
2306         mutex_enter(&connfp->connf_lock);
2307         connp->conn_g_next = connfp->connf_head;
2308         if (connp->conn_g_next != NULL)
2309                 connp->conn_g_next->conn_g_prev = connp;
2310         connfp->connf_head = connp;
2311 
2312         /* The fanout bucket this conn points to */
2313         connp->conn_g_fanout = connfp;
2314 
2315         mutex_exit(&connfp->connf_lock);
2316 }
2317 
2318 void
2319 ipcl_globalhash_remove(conn_t *connp)
2320 {
2321         struct connf_s  *connfp;
2322 
2323         /*
2324          * We were never inserted in the global multi list.
2325          * IPCL_NONE variety is never inserted in the global multilist
2326          * since it is presumed to not need any cleanup and is transient.
2327          */
2328         if (connp->conn_g_fanout == NULL)
2329                 return;
2330 
2331         connfp = connp->conn_g_fanout;
2332         mutex_enter(&connfp->connf_lock);
2333         if (connp->conn_g_prev != NULL)
2334                 connp->conn_g_prev->conn_g_next = connp->conn_g_next;
2335         else
2336                 connfp->connf_head = connp->conn_g_next;
2337         if (connp->conn_g_next != NULL)
2338                 connp->conn_g_next->conn_g_prev = connp->conn_g_prev;
2339         mutex_exit(&connfp->connf_lock);
2340 
2341         /* Better to stumble on a null pointer than to corrupt memory */
2342         connp->conn_g_next = NULL;
2343         connp->conn_g_prev = NULL;
2344         connp->conn_g_fanout = NULL;
2345 }
2346 
2347 /*
2348  * Walk the list of all conn_t's in the system, calling the function provided
2349  * With the specified argument for each.
2350  * Applies to both IPv4 and IPv6.
2351  *
2352  * CONNs may hold pointers to ills (conn_dhcpinit_ill and
2353  * conn_oper_pending_ill). To guard against stale pointers
2354  * ipcl_walk() is called to cleanup the conn_t's, typically when an interface is
2355  * unplumbed or removed. New conn_t's that are created while we are walking
2356  * may be missed by this walk, because they are not necessarily inserted
2357  * at the tail of the list. They are new conn_t's and thus don't have any
2358  * stale pointers. The CONN_CLOSING flag ensures that no new reference
2359  * is created to the struct that is going away.
2360  */
2361 void
2362 ipcl_walk(pfv_t func, void *arg, ip_stack_t *ipst)
2363 {
2364         int     i;
2365         conn_t  *connp;
2366         conn_t  *prev_connp;
2367 
2368         for (i = 0; i < CONN_G_HASH_SIZE; i++) {
2369                 mutex_enter(&ipst->ips_ipcl_globalhash_fanout[i].connf_lock);
2370                 prev_connp = NULL;
2371                 connp = ipst->ips_ipcl_globalhash_fanout[i].connf_head;
2372                 while (connp != NULL) {
2373                         mutex_enter(&connp->conn_lock);
2374                         if (connp->conn_state_flags &
2375                             (CONN_CONDEMNED | CONN_INCIPIENT)) {
2376                                 mutex_exit(&connp->conn_lock);
2377                                 connp = connp->conn_g_next;
2378                                 continue;
2379                         }
2380                         CONN_INC_REF_LOCKED(connp);
2381                         mutex_exit(&connp->conn_lock);
2382                         mutex_exit(
2383                             &ipst->ips_ipcl_globalhash_fanout[i].connf_lock);
2384                         (*func)(connp, arg);
2385                         if (prev_connp != NULL)
2386                                 CONN_DEC_REF(prev_connp);
2387                         mutex_enter(
2388                             &ipst->ips_ipcl_globalhash_fanout[i].connf_lock);
2389                         prev_connp = connp;
2390                         connp = connp->conn_g_next;
2391                 }
2392                 mutex_exit(&ipst->ips_ipcl_globalhash_fanout[i].connf_lock);
2393                 if (prev_connp != NULL)
2394                         CONN_DEC_REF(prev_connp);
2395         }
2396 }
2397 
2398 /*
2399  * Search for a peer TCP/IPv4 loopback conn by doing a reverse lookup on
2400  * the {src, dst, lport, fport} quadruplet.  Returns with conn reference
2401  * held; caller must call CONN_DEC_REF.  Only checks for connected entries
2402  * (peer tcp in ESTABLISHED state).
2403  */
2404 conn_t *
2405 ipcl_conn_tcp_lookup_reversed_ipv4(conn_t *connp, ipha_t *ipha, tcpha_t *tcpha,
2406     ip_stack_t *ipst)
2407 {
2408         uint32_t ports;
2409         uint16_t *pports = (uint16_t *)&ports;
2410         connf_t *connfp;
2411         conn_t  *tconnp;
2412         boolean_t zone_chk;
2413 
2414         /*
2415          * If either the source of destination address is loopback, then
2416          * both endpoints must be in the same Zone.  Otherwise, both of
2417          * the addresses are system-wide unique (tcp is in ESTABLISHED
2418          * state) and the endpoints may reside in different Zones.
2419          */
2420         zone_chk = (ipha->ipha_src == htonl(INADDR_LOOPBACK) ||
2421             ipha->ipha_dst == htonl(INADDR_LOOPBACK));
2422 
2423         pports[0] = tcpha->tha_fport;
2424         pports[1] = tcpha->tha_lport;
2425 
2426         connfp = &ipst->ips_ipcl_conn_fanout[IPCL_CONN_HASH(ipha->ipha_dst,
2427             ports, ipst)];
2428 
2429         mutex_enter(&connfp->connf_lock);
2430         for (tconnp = connfp->connf_head; tconnp != NULL;
2431             tconnp = tconnp->conn_next) {
2432 
2433                 if (IPCL_CONN_MATCH(tconnp, IPPROTO_TCP,
2434                     ipha->ipha_dst, ipha->ipha_src, ports) &&
2435                     tconnp->conn_tcp->tcp_state == TCPS_ESTABLISHED &&
2436                     (!zone_chk || tconnp->conn_zoneid == connp->conn_zoneid)) {
2437 
2438                         ASSERT(tconnp != connp);
2439                         CONN_INC_REF(tconnp);
2440                         mutex_exit(&connfp->connf_lock);
2441                         return (tconnp);
2442                 }
2443         }
2444         mutex_exit(&connfp->connf_lock);
2445         return (NULL);
2446 }
2447 
2448 /*
2449  * Search for a peer TCP/IPv6 loopback conn by doing a reverse lookup on
2450  * the {src, dst, lport, fport} quadruplet.  Returns with conn reference
2451  * held; caller must call CONN_DEC_REF.  Only checks for connected entries
2452  * (peer tcp in ESTABLISHED state).
2453  */
2454 conn_t *
2455 ipcl_conn_tcp_lookup_reversed_ipv6(conn_t *connp, ip6_t *ip6h, tcpha_t *tcpha,
2456     ip_stack_t *ipst)
2457 {
2458         uint32_t ports;
2459         uint16_t *pports = (uint16_t *)&ports;
2460         connf_t *connfp;
2461         conn_t  *tconnp;
2462         boolean_t zone_chk;
2463 
2464         /*
2465          * If either the source of destination address is loopback, then
2466          * both endpoints must be in the same Zone.  Otherwise, both of
2467          * the addresses are system-wide unique (tcp is in ESTABLISHED
2468          * state) and the endpoints may reside in different Zones.  We
2469          * don't do Zone check for link local address(es) because the
2470          * current Zone implementation treats each link local address as
2471          * being unique per system node, i.e. they belong to global Zone.
2472          */
2473         zone_chk = (IN6_IS_ADDR_LOOPBACK(&ip6h->ip6_src) ||
2474             IN6_IS_ADDR_LOOPBACK(&ip6h->ip6_dst));
2475 
2476         pports[0] = tcpha->tha_fport;
2477         pports[1] = tcpha->tha_lport;
2478 
2479         connfp = &ipst->ips_ipcl_conn_fanout[IPCL_CONN_HASH_V6(ip6h->ip6_dst,
2480             ports, ipst)];
2481 
2482         mutex_enter(&connfp->connf_lock);
2483         for (tconnp = connfp->connf_head; tconnp != NULL;
2484             tconnp = tconnp->conn_next) {
2485 
2486                 /* We skip conn_bound_if check here as this is loopback tcp */
2487                 if (IPCL_CONN_MATCH_V6(tconnp, IPPROTO_TCP,
2488                     ip6h->ip6_dst, ip6h->ip6_src, ports) &&
2489                     tconnp->conn_tcp->tcp_state == TCPS_ESTABLISHED &&
2490                     (!zone_chk || tconnp->conn_zoneid == connp->conn_zoneid)) {
2491 
2492                         ASSERT(tconnp != connp);
2493                         CONN_INC_REF(tconnp);
2494                         mutex_exit(&connfp->connf_lock);
2495                         return (tconnp);
2496                 }
2497         }
2498         mutex_exit(&connfp->connf_lock);
2499         return (NULL);
2500 }
2501 
2502 /*
2503  * Find an exact {src, dst, lport, fport} match for a bounced datagram.
2504  * Returns with conn reference held. Caller must call CONN_DEC_REF.
2505  * Only checks for connected entries i.e. no INADDR_ANY checks.
2506  */
2507 conn_t *
2508 ipcl_tcp_lookup_reversed_ipv4(ipha_t *ipha, tcpha_t *tcpha, int min_state,
2509     ip_stack_t *ipst)
2510 {
2511         uint32_t ports;
2512         uint16_t *pports;
2513         connf_t *connfp;
2514         conn_t  *tconnp;
2515 
2516         pports = (uint16_t *)&ports;
2517         pports[0] = tcpha->tha_fport;
2518         pports[1] = tcpha->tha_lport;
2519 
2520         connfp = &ipst->ips_ipcl_conn_fanout[IPCL_CONN_HASH(ipha->ipha_dst,
2521             ports, ipst)];
2522 
2523         mutex_enter(&connfp->connf_lock);
2524         for (tconnp = connfp->connf_head; tconnp != NULL;
2525             tconnp = tconnp->conn_next) {
2526 
2527                 if (IPCL_CONN_MATCH(tconnp, IPPROTO_TCP,
2528                     ipha->ipha_dst, ipha->ipha_src, ports) &&
2529                     tconnp->conn_tcp->tcp_state >= min_state) {
2530 
2531                         CONN_INC_REF(tconnp);
2532                         mutex_exit(&connfp->connf_lock);
2533                         return (tconnp);
2534                 }
2535         }
2536         mutex_exit(&connfp->connf_lock);
2537         return (NULL);
2538 }
2539 
2540 /*
2541  * Find an exact {src, dst, lport, fport} match for a bounced datagram.
2542  * Returns with conn reference held. Caller must call CONN_DEC_REF.
2543  * Only checks for connected entries i.e. no INADDR_ANY checks.
2544  * Match on ifindex in addition to addresses.
2545  */
2546 conn_t *
2547 ipcl_tcp_lookup_reversed_ipv6(ip6_t *ip6h, tcpha_t *tcpha, int min_state,
2548     uint_t ifindex, ip_stack_t *ipst)
2549 {
2550         tcp_t   *tcp;
2551         uint32_t ports;
2552         uint16_t *pports;
2553         connf_t *connfp;
2554         conn_t  *tconnp;
2555 
2556         pports = (uint16_t *)&ports;
2557         pports[0] = tcpha->tha_fport;
2558         pports[1] = tcpha->tha_lport;
2559 
2560         connfp = &ipst->ips_ipcl_conn_fanout[IPCL_CONN_HASH_V6(ip6h->ip6_dst,
2561             ports, ipst)];
2562 
2563         mutex_enter(&connfp->connf_lock);
2564         for (tconnp = connfp->connf_head; tconnp != NULL;
2565             tconnp = tconnp->conn_next) {
2566 
2567                 tcp = tconnp->conn_tcp;
2568                 if (IPCL_CONN_MATCH_V6(tconnp, IPPROTO_TCP,
2569                     ip6h->ip6_dst, ip6h->ip6_src, ports) &&
2570                     tcp->tcp_state >= min_state &&
2571                     (tconnp->conn_bound_if == 0 ||
2572                     tconnp->conn_bound_if == ifindex)) {
2573 
2574                         CONN_INC_REF(tconnp);
2575                         mutex_exit(&connfp->connf_lock);
2576                         return (tconnp);
2577                 }
2578         }
2579         mutex_exit(&connfp->connf_lock);
2580         return (NULL);
2581 }
2582 
2583 /*
2584  * Finds a TCP/IPv4 listening connection; called by tcp_disconnect to locate
2585  * a listener when changing state.
2586  */
2587 conn_t *
2588 ipcl_lookup_listener_v4(uint16_t lport, ipaddr_t laddr, zoneid_t zoneid,
2589     ip_stack_t *ipst)
2590 {
2591         connf_t         *bind_connfp;
2592         conn_t          *connp;
2593         tcp_t           *tcp;
2594 
2595         /*
2596          * Avoid false matches for packets sent to an IP destination of
2597          * all zeros.
2598          */
2599         if (laddr == 0)
2600                 return (NULL);
2601 
2602         ASSERT(zoneid != ALL_ZONES);
2603 
2604         bind_connfp = &ipst->ips_ipcl_bind_fanout[IPCL_BIND_HASH(lport, ipst)];
2605         mutex_enter(&bind_connfp->connf_lock);
2606         for (connp = bind_connfp->connf_head; connp != NULL;
2607             connp = connp->conn_next) {
2608                 tcp = connp->conn_tcp;
2609                 if (IPCL_BIND_MATCH(connp, IPPROTO_TCP, laddr, lport) &&
2610                     IPCL_ZONE_MATCH(connp, zoneid) &&
2611                     (tcp->tcp_listener == NULL)) {
2612                         CONN_INC_REF(connp);
2613                         mutex_exit(&bind_connfp->connf_lock);
2614                         return (connp);
2615                 }
2616         }
2617         mutex_exit(&bind_connfp->connf_lock);
2618         return (NULL);
2619 }
2620 
2621 /*
2622  * Finds a TCP/IPv6 listening connection; called by tcp_disconnect to locate
2623  * a listener when changing state.
2624  */
2625 conn_t *
2626 ipcl_lookup_listener_v6(uint16_t lport, in6_addr_t *laddr, uint_t ifindex,
2627     zoneid_t zoneid, ip_stack_t *ipst)
2628 {
2629         connf_t         *bind_connfp;
2630         conn_t          *connp = NULL;
2631         tcp_t           *tcp;
2632 
2633         /*
2634          * Avoid false matches for packets sent to an IP destination of
2635          * all zeros.
2636          */
2637         if (IN6_IS_ADDR_UNSPECIFIED(laddr))
2638                 return (NULL);
2639 
2640         ASSERT(zoneid != ALL_ZONES);
2641 
2642         bind_connfp = &ipst->ips_ipcl_bind_fanout[IPCL_BIND_HASH(lport, ipst)];
2643         mutex_enter(&bind_connfp->connf_lock);
2644         for (connp = bind_connfp->connf_head; connp != NULL;
2645             connp = connp->conn_next) {
2646                 tcp = connp->conn_tcp;
2647                 if (IPCL_BIND_MATCH_V6(connp, IPPROTO_TCP, *laddr, lport) &&
2648                     IPCL_ZONE_MATCH(connp, zoneid) &&
2649                     (connp->conn_bound_if == 0 ||
2650                     connp->conn_bound_if == ifindex) &&
2651                     tcp->tcp_listener == NULL) {
2652                         CONN_INC_REF(connp);
2653                         mutex_exit(&bind_connfp->connf_lock);
2654                         return (connp);
2655                 }
2656         }
2657         mutex_exit(&bind_connfp->connf_lock);
2658         return (NULL);
2659 }
2660 
2661 /*
2662  * ipcl_get_next_conn
2663  *      get the next entry in the conn global list
2664  *      and put a reference on the next_conn.
2665  *      decrement the reference on the current conn.
2666  *
2667  * This is an iterator based walker function that also provides for
2668  * some selection by the caller. It walks through the conn_hash bucket
2669  * searching for the next valid connp in the list, and selects connections
2670  * that are neither closed nor condemned. It also REFHOLDS the conn
2671  * thus ensuring that the conn exists when the caller uses the conn.
2672  */
2673 conn_t *
2674 ipcl_get_next_conn(connf_t *connfp, conn_t *connp, uint32_t conn_flags)
2675 {
2676         conn_t  *next_connp;
2677 
2678         if (connfp == NULL)
2679                 return (NULL);
2680 
2681         mutex_enter(&connfp->connf_lock);
2682 
2683         next_connp = (connp == NULL) ?
2684             connfp->connf_head : connp->conn_g_next;
2685 
2686         while (next_connp != NULL) {
2687                 mutex_enter(&next_connp->conn_lock);
2688                 if (!(next_connp->conn_flags & conn_flags) ||
2689                     (next_connp->conn_state_flags &
2690                     (CONN_CONDEMNED | CONN_INCIPIENT))) {
2691                         /*
2692                          * This conn has been condemned or
2693                          * is closing, or the flags don't match
2694                          */
2695                         mutex_exit(&next_connp->conn_lock);
2696                         next_connp = next_connp->conn_g_next;
2697                         continue;
2698                 }
2699                 CONN_INC_REF_LOCKED(next_connp);
2700                 mutex_exit(&next_connp->conn_lock);
2701                 break;
2702         }
2703 
2704         mutex_exit(&connfp->connf_lock);
2705 
2706         if (connp != NULL)
2707                 CONN_DEC_REF(connp);
2708 
2709         return (next_connp);
2710 }
2711 
2712 #ifdef CONN_DEBUG
2713 /*
2714  * Trace of the last NBUF refhold/refrele
2715  */
2716 int
2717 conn_trace_ref(conn_t *connp)
2718 {
2719         int     last;
2720         conn_trace_t    *ctb;
2721 
2722         ASSERT(MUTEX_HELD(&connp->conn_lock));
2723         last = connp->conn_trace_last;
2724         last++;
2725         if (last == CONN_TRACE_MAX)
2726                 last = 0;
2727 
2728         ctb = &connp->conn_trace_buf[last];
2729         ctb->ctb_depth = getpcstack(ctb->ctb_stack, CONN_STACK_DEPTH);
2730         connp->conn_trace_last = last;
2731         return (1);
2732 }
2733 
2734 int
2735 conn_untrace_ref(conn_t *connp)
2736 {
2737         int     last;
2738         conn_trace_t    *ctb;
2739 
2740         ASSERT(MUTEX_HELD(&connp->conn_lock));
2741         last = connp->conn_trace_last;
2742         last++;
2743         if (last == CONN_TRACE_MAX)
2744                 last = 0;
2745 
2746         ctb = &connp->conn_trace_buf[last];
2747         ctb->ctb_depth = getpcstack(ctb->ctb_stack, CONN_STACK_DEPTH);
2748         connp->conn_trace_last = last;
2749         return (1);
2750 }
2751 #endif