1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
  23  * Copyright 2013 Nexenta Systems, Inc.  All rights reserved.
  24  */
  25 
  26 /*
  27  * IP PACKET CLASSIFIER
  28  *
  29  * The IP packet classifier provides mapping between IP packets and persistent
  30  * connection state for connection-oriented protocols. It also provides
  31  * interface for managing connection states.
  32  *
  33  * The connection state is kept in conn_t data structure and contains, among
  34  * other things:
  35  *
  36  *      o local/remote address and ports
  37  *      o Transport protocol
  38  *      o squeue for the connection (for TCP only)
  39  *      o reference counter
  40  *      o Connection state
  41  *      o hash table linkage
  42  *      o interface/ire information
  43  *      o credentials
  44  *      o ipsec policy
  45  *      o send and receive functions.
  46  *      o mutex lock.
  47  *
  48  * Connections use a reference counting scheme. They are freed when the
  49  * reference counter drops to zero. A reference is incremented when connection
  50  * is placed in a list or table, when incoming packet for the connection arrives
  51  * and when connection is processed via squeue (squeue processing may be
  52  * asynchronous and the reference protects the connection from being destroyed
  53  * before its processing is finished).
  54  *
  55  * conn_recv is used to pass up packets to the ULP.
  56  * For TCP conn_recv changes. It is tcp_input_listener_unbound initially for
  57  * a listener, and changes to tcp_input_listener as the listener has picked a
  58  * good squeue. For other cases it is set to tcp_input_data.
  59  *
  60  * conn_recvicmp is used to pass up ICMP errors to the ULP.
  61  *
  62  * Classifier uses several hash tables:
  63  *
  64  *      ipcl_conn_fanout:       contains all TCP connections in CONNECTED state
  65  *      ipcl_bind_fanout:       contains all connections in BOUND state
  66  *      ipcl_proto_fanout:      IPv4 protocol fanout
  67  *      ipcl_proto_fanout_v6:   IPv6 protocol fanout
  68  *      ipcl_udp_fanout:        contains all UDP connections
  69  *      ipcl_iptun_fanout:      contains all IP tunnel connections
  70  *      ipcl_globalhash_fanout: contains all connections
  71  *
  72  * The ipcl_globalhash_fanout is used for any walkers (like snmp and Clustering)
  73  * which need to view all existing connections.
  74  *
  75  * All tables are protected by per-bucket locks. When both per-bucket lock and
  76  * connection lock need to be held, the per-bucket lock should be acquired
  77  * first, followed by the connection lock.
  78  *
  79  * All functions doing search in one of these tables increment a reference
  80  * counter on the connection found (if any). This reference should be dropped
  81  * when the caller has finished processing the connection.
  82  *
  83  *
  84  * INTERFACES:
  85  * ===========
  86  *
  87  * Connection Lookup:
  88  * ------------------
  89  *
  90  * conn_t *ipcl_classify_v4(mp, protocol, hdr_len, ira, ip_stack)
  91  * conn_t *ipcl_classify_v6(mp, protocol, hdr_len, ira, ip_stack)
  92  *
  93  * Finds connection for an incoming IPv4 or IPv6 packet. Returns NULL if
  94  * it can't find any associated connection. If the connection is found, its
  95  * reference counter is incremented.
  96  *
  97  *      mp:     mblock, containing packet header. The full header should fit
  98  *              into a single mblock. It should also contain at least full IP
  99  *              and TCP or UDP header.
 100  *
 101  *      protocol: Either IPPROTO_TCP or IPPROTO_UDP.
 102  *
 103  *      hdr_len: The size of IP header. It is used to find TCP or UDP header in
 104  *               the packet.
 105  *
 106  *      ira->ira_zoneid: The zone in which the returned connection must be; the
 107  *              zoneid corresponding to the ire_zoneid on the IRE located for
 108  *              the packet's destination address.
 109  *
 110  *      ira->ira_flags: Contains the IRAF_TX_MAC_EXEMPTABLE and
 111  *              IRAF_TX_SHARED_ADDR flags
 112  *
 113  *      For TCP connections, the lookup order is as follows:
 114  *              5-tuple {src, dst, protocol, local port, remote port}
 115  *                      lookup in ipcl_conn_fanout table.
 116  *              3-tuple {dst, remote port, protocol} lookup in
 117  *                      ipcl_bind_fanout table.
 118  *
 119  *      For UDP connections, a 5-tuple {src, dst, protocol, local port,
 120  *      remote port} lookup is done on ipcl_udp_fanout. Note that,
 121  *      these interfaces do not handle cases where a packets belongs
 122  *      to multiple UDP clients, which is handled in IP itself.
 123  *
 124  * If the destination IRE is ALL_ZONES (indicated by zoneid), then we must
 125  * determine which actual zone gets the segment.  This is used only in a
 126  * labeled environment.  The matching rules are:
 127  *
 128  *      - If it's not a multilevel port, then the label on the packet selects
 129  *        the zone.  Unlabeled packets are delivered to the global zone.
 130  *
 131  *      - If it's a multilevel port, then only the zone registered to receive
 132  *        packets on that port matches.
 133  *
 134  * Also, in a labeled environment, packet labels need to be checked.  For fully
 135  * bound TCP connections, we can assume that the packet label was checked
 136  * during connection establishment, and doesn't need to be checked on each
 137  * packet.  For others, though, we need to check for strict equality or, for
 138  * multilevel ports, membership in the range or set.  This part currently does
 139  * a tnrh lookup on each packet, but could be optimized to use cached results
 140  * if that were necessary.  (SCTP doesn't come through here, but if it did,
 141  * we would apply the same rules as TCP.)
 142  *
 143  * An implication of the above is that fully-bound TCP sockets must always use
 144  * distinct 4-tuples; they can't be discriminated by label alone.
 145  *
 146  * Note that we cannot trust labels on packets sent to fully-bound UDP sockets,
 147  * as there's no connection set-up handshake and no shared state.
 148  *
 149  * Labels on looped-back packets within a single zone do not need to be
 150  * checked, as all processes in the same zone have the same label.
 151  *
 152  * Finally, for unlabeled packets received by a labeled system, special rules
 153  * apply.  We consider only the MLP if there is one.  Otherwise, we prefer a
 154  * socket in the zone whose label matches the default label of the sender, if
 155  * any.  In any event, the receiving socket must have SO_MAC_EXEMPT set and the
 156  * receiver's label must dominate the sender's default label.
 157  *
 158  * conn_t *ipcl_tcp_lookup_reversed_ipv4(ipha_t *, tcpha_t *, int, ip_stack);
 159  * conn_t *ipcl_tcp_lookup_reversed_ipv6(ip6_t *, tcpha_t *, int, uint_t,
 160  *                                       ip_stack);
 161  *
 162  *      Lookup routine to find a exact match for {src, dst, local port,
 163  *      remote port) for TCP connections in ipcl_conn_fanout. The address and
 164  *      ports are read from the IP and TCP header respectively.
 165  *
 166  * conn_t       *ipcl_lookup_listener_v4(lport, laddr, protocol,
 167  *                                       zoneid, ip_stack);
 168  * conn_t       *ipcl_lookup_listener_v6(lport, laddr, protocol, ifindex,
 169  *                                       zoneid, ip_stack);
 170  *
 171  *      Lookup routine to find a listener with the tuple {lport, laddr,
 172  *      protocol} in the ipcl_bind_fanout table. For IPv6, an additional
 173  *      parameter interface index is also compared.
 174  *
 175  * void ipcl_walk(func, arg, ip_stack)
 176  *
 177  *      Apply 'func' to every connection available. The 'func' is called as
 178  *      (*func)(connp, arg). The walk is non-atomic so connections may be
 179  *      created and destroyed during the walk. The CONN_CONDEMNED and
 180  *      CONN_INCIPIENT flags ensure that connections which are newly created
 181  *      or being destroyed are not selected by the walker.
 182  *
 183  * Table Updates
 184  * -------------
 185  *
 186  * int ipcl_conn_insert(connp);
 187  * int ipcl_conn_insert_v4(connp);
 188  * int ipcl_conn_insert_v6(connp);
 189  *
 190  *      Insert 'connp' in the ipcl_conn_fanout.
 191  *      Arguements :
 192  *              connp           conn_t to be inserted
 193  *
 194  *      Return value :
 195  *              0               if connp was inserted
 196  *              EADDRINUSE      if the connection with the same tuple
 197  *                              already exists.
 198  *
 199  * int ipcl_bind_insert(connp);
 200  * int ipcl_bind_insert_v4(connp);
 201  * int ipcl_bind_insert_v6(connp);
 202  *
 203  *      Insert 'connp' in ipcl_bind_fanout.
 204  *      Arguements :
 205  *              connp           conn_t to be inserted
 206  *
 207  *
 208  * void ipcl_hash_remove(connp);
 209  *
 210  *      Removes the 'connp' from the connection fanout table.
 211  *
 212  * Connection Creation/Destruction
 213  * -------------------------------
 214  *
 215  * conn_t *ipcl_conn_create(type, sleep, netstack_t *)
 216  *
 217  *      Creates a new conn based on the type flag, inserts it into
 218  *      globalhash table.
 219  *
 220  *      type:   This flag determines the type of conn_t which needs to be
 221  *              created i.e., which kmem_cache it comes from.
 222  *              IPCL_TCPCONN    indicates a TCP connection
 223  *              IPCL_SCTPCONN   indicates a SCTP connection
 224  *              IPCL_UDPCONN    indicates a UDP conn_t.
 225  *              IPCL_RAWIPCONN  indicates a RAWIP/ICMP conn_t.
 226  *              IPCL_RTSCONN    indicates a RTS conn_t.
 227  *              IPCL_IPCCONN    indicates all other connections.
 228  *
 229  * void ipcl_conn_destroy(connp)
 230  *
 231  *      Destroys the connection state, removes it from the global
 232  *      connection hash table and frees its memory.
 233  */
 234 
 235 #include <sys/types.h>
 236 #include <sys/stream.h>
 237 #include <sys/stropts.h>
 238 #include <sys/sysmacros.h>
 239 #include <sys/strsubr.h>
 240 #include <sys/strsun.h>
 241 #define _SUN_TPI_VERSION 2
 242 #include <sys/ddi.h>
 243 #include <sys/cmn_err.h>
 244 #include <sys/debug.h>
 245 
 246 #include <sys/systm.h>
 247 #include <sys/param.h>
 248 #include <sys/kmem.h>
 249 #include <sys/isa_defs.h>
 250 #include <inet/common.h>
 251 #include <netinet/ip6.h>
 252 #include <netinet/icmp6.h>
 253 
 254 #include <inet/ip.h>
 255 #include <inet/ip_if.h>
 256 #include <inet/ip_ire.h>
 257 #include <inet/ip6.h>
 258 #include <inet/ip_ndp.h>
 259 #include <inet/ip_impl.h>
 260 #include <inet/udp_impl.h>
 261 #include <inet/sctp_ip.h>
 262 #include <inet/sctp/sctp_impl.h>
 263 #include <inet/rawip_impl.h>
 264 #include <inet/rts_impl.h>
 265 #include <inet/iptun/iptun_impl.h>
 266 
 267 #include <sys/cpuvar.h>
 268 
 269 #include <inet/ipclassifier.h>
 270 #include <inet/tcp.h>
 271 #include <inet/ipsec_impl.h>
 272 
 273 #include <sys/tsol/tnet.h>
 274 #include <sys/sockio.h>
 275 
 276 /* Old value for compatibility. Setable in /etc/system */
 277 uint_t tcp_conn_hash_size = 0;
 278 
 279 /* New value. Zero means choose automatically.  Setable in /etc/system */
 280 volatile uint_t ipcl_conn_hash_size = 0;
 281 uint_t ipcl_conn_hash_memfactor = 8192;
 282 uint_t ipcl_conn_hash_maxsize = 82500;
 283 
 284 /* bind/udp fanout table size */
 285 uint_t ipcl_bind_fanout_size = 512;
 286 uint_t ipcl_udp_fanout_size = 16384;
 287 
 288 /* Raw socket fanout size.  Must be a power of 2. */
 289 uint_t ipcl_raw_fanout_size = 256;
 290 
 291 /*
 292  * The IPCL_IPTUN_HASH() function works best with a prime table size.  We
 293  * expect that most large deployments would have hundreds of tunnels, and
 294  * thousands in the extreme case.
 295  */
 296 uint_t ipcl_iptun_fanout_size = 6143;
 297 
 298 /*
 299  * Power of 2^N Primes useful for hashing for N of 0-28,
 300  * these primes are the nearest prime <= 2^N - 2^(N-2).
 301  */
 302 
 303 #define P2Ps() {0, 0, 0, 5, 11, 23, 47, 89, 191, 383, 761, 1531, 3067,  \
 304                 6143, 12281, 24571, 49139, 98299, 196597, 393209,       \
 305                 786431, 1572853, 3145721, 6291449, 12582893, 25165813,  \
 306                 50331599, 100663291, 201326557, 0}
 307 
 308 /*
 309  * wrapper structure to ensure that conn and what follows it (tcp_t, etc)
 310  * are aligned on cache lines.
 311  */
 312 typedef union itc_s {
 313         conn_t  itc_conn;
 314         char    itcu_filler[CACHE_ALIGN(conn_s)];
 315 } itc_t;
 316 
 317 struct kmem_cache  *tcp_conn_cache;
 318 struct kmem_cache  *ip_conn_cache;
 319 extern struct kmem_cache  *sctp_conn_cache;
 320 struct kmem_cache  *udp_conn_cache;
 321 struct kmem_cache  *rawip_conn_cache;
 322 struct kmem_cache  *rts_conn_cache;
 323 
 324 extern void     tcp_timermp_free(tcp_t *);
 325 extern mblk_t   *tcp_timermp_alloc(int);
 326 
 327 static int      ip_conn_constructor(void *, void *, int);
 328 static void     ip_conn_destructor(void *, void *);
 329 
 330 static int      tcp_conn_constructor(void *, void *, int);
 331 static void     tcp_conn_destructor(void *, void *);
 332 
 333 static int      udp_conn_constructor(void *, void *, int);
 334 static void     udp_conn_destructor(void *, void *);
 335 
 336 static int      rawip_conn_constructor(void *, void *, int);
 337 static void     rawip_conn_destructor(void *, void *);
 338 
 339 static int      rts_conn_constructor(void *, void *, int);
 340 static void     rts_conn_destructor(void *, void *);
 341 
 342 /*
 343  * Global (for all stack instances) init routine
 344  */
 345 void
 346 ipcl_g_init(void)
 347 {
 348         ip_conn_cache = kmem_cache_create("ip_conn_cache",
 349             sizeof (conn_t), CACHE_ALIGN_SIZE,
 350             ip_conn_constructor, ip_conn_destructor,
 351             NULL, NULL, NULL, 0);
 352 
 353         tcp_conn_cache = kmem_cache_create("tcp_conn_cache",
 354             sizeof (itc_t) + sizeof (tcp_t), CACHE_ALIGN_SIZE,
 355             tcp_conn_constructor, tcp_conn_destructor,
 356             tcp_conn_reclaim, NULL, NULL, 0);
 357 
 358         udp_conn_cache = kmem_cache_create("udp_conn_cache",
 359             sizeof (itc_t) + sizeof (udp_t), CACHE_ALIGN_SIZE,
 360             udp_conn_constructor, udp_conn_destructor,
 361             NULL, NULL, NULL, 0);
 362 
 363         rawip_conn_cache = kmem_cache_create("rawip_conn_cache",
 364             sizeof (itc_t) + sizeof (icmp_t), CACHE_ALIGN_SIZE,
 365             rawip_conn_constructor, rawip_conn_destructor,
 366             NULL, NULL, NULL, 0);
 367 
 368         rts_conn_cache = kmem_cache_create("rts_conn_cache",
 369             sizeof (itc_t) + sizeof (rts_t), CACHE_ALIGN_SIZE,
 370             rts_conn_constructor, rts_conn_destructor,
 371             NULL, NULL, NULL, 0);
 372 }
 373 
 374 /*
 375  * ipclassifier intialization routine, sets up hash tables.
 376  */
 377 void
 378 ipcl_init(ip_stack_t *ipst)
 379 {
 380         int i;
 381         int sizes[] = P2Ps();
 382 
 383         /*
 384          * Calculate size of conn fanout table from /etc/system settings
 385          */
 386         if (ipcl_conn_hash_size != 0) {
 387                 ipst->ips_ipcl_conn_fanout_size = ipcl_conn_hash_size;
 388         } else if (tcp_conn_hash_size != 0) {
 389                 ipst->ips_ipcl_conn_fanout_size = tcp_conn_hash_size;
 390         } else {
 391                 extern pgcnt_t freemem;
 392 
 393                 ipst->ips_ipcl_conn_fanout_size =
 394                     (freemem * PAGESIZE) / ipcl_conn_hash_memfactor;
 395 
 396                 if (ipst->ips_ipcl_conn_fanout_size > ipcl_conn_hash_maxsize) {
 397                         ipst->ips_ipcl_conn_fanout_size =
 398                             ipcl_conn_hash_maxsize;
 399                 }
 400         }
 401 
 402         for (i = 9; i < sizeof (sizes) / sizeof (*sizes) - 1; i++) {
 403                 if (sizes[i] >= ipst->ips_ipcl_conn_fanout_size) {
 404                         break;
 405                 }
 406         }
 407         if ((ipst->ips_ipcl_conn_fanout_size = sizes[i]) == 0) {
 408                 /* Out of range, use the 2^16 value */
 409                 ipst->ips_ipcl_conn_fanout_size = sizes[16];
 410         }
 411 
 412         /* Take values from /etc/system */
 413         ipst->ips_ipcl_bind_fanout_size = ipcl_bind_fanout_size;
 414         ipst->ips_ipcl_udp_fanout_size = ipcl_udp_fanout_size;
 415         ipst->ips_ipcl_raw_fanout_size = ipcl_raw_fanout_size;
 416         ipst->ips_ipcl_iptun_fanout_size = ipcl_iptun_fanout_size;
 417 
 418         ASSERT(ipst->ips_ipcl_conn_fanout == NULL);
 419 
 420         ipst->ips_ipcl_conn_fanout = kmem_zalloc(
 421             ipst->ips_ipcl_conn_fanout_size * sizeof (connf_t), KM_SLEEP);
 422 
 423         for (i = 0; i < ipst->ips_ipcl_conn_fanout_size; i++) {
 424                 mutex_init(&ipst->ips_ipcl_conn_fanout[i].connf_lock, NULL,
 425                     MUTEX_DEFAULT, NULL);
 426         }
 427 
 428         ipst->ips_ipcl_bind_fanout = kmem_zalloc(
 429             ipst->ips_ipcl_bind_fanout_size * sizeof (connf_t), KM_SLEEP);
 430 
 431         for (i = 0; i < ipst->ips_ipcl_bind_fanout_size; i++) {
 432                 mutex_init(&ipst->ips_ipcl_bind_fanout[i].connf_lock, NULL,
 433                     MUTEX_DEFAULT, NULL);
 434         }
 435 
 436         ipst->ips_ipcl_proto_fanout_v4 = kmem_zalloc(IPPROTO_MAX *
 437             sizeof (connf_t), KM_SLEEP);
 438         for (i = 0; i < IPPROTO_MAX; i++) {
 439                 mutex_init(&ipst->ips_ipcl_proto_fanout_v4[i].connf_lock, NULL,
 440                     MUTEX_DEFAULT, NULL);
 441         }
 442 
 443         ipst->ips_ipcl_proto_fanout_v6 = kmem_zalloc(IPPROTO_MAX *
 444             sizeof (connf_t), KM_SLEEP);
 445         for (i = 0; i < IPPROTO_MAX; i++) {
 446                 mutex_init(&ipst->ips_ipcl_proto_fanout_v6[i].connf_lock, NULL,
 447                     MUTEX_DEFAULT, NULL);
 448         }
 449 
 450         ipst->ips_rts_clients = kmem_zalloc(sizeof (connf_t), KM_SLEEP);
 451         mutex_init(&ipst->ips_rts_clients->connf_lock,
 452             NULL, MUTEX_DEFAULT, NULL);
 453 
 454         ipst->ips_ipcl_udp_fanout = kmem_zalloc(
 455             ipst->ips_ipcl_udp_fanout_size * sizeof (connf_t), KM_SLEEP);
 456         for (i = 0; i < ipst->ips_ipcl_udp_fanout_size; i++) {
 457                 mutex_init(&ipst->ips_ipcl_udp_fanout[i].connf_lock, NULL,
 458                     MUTEX_DEFAULT, NULL);
 459         }
 460 
 461         ipst->ips_ipcl_iptun_fanout = kmem_zalloc(
 462             ipst->ips_ipcl_iptun_fanout_size * sizeof (connf_t), KM_SLEEP);
 463         for (i = 0; i < ipst->ips_ipcl_iptun_fanout_size; i++) {
 464                 mutex_init(&ipst->ips_ipcl_iptun_fanout[i].connf_lock, NULL,
 465                     MUTEX_DEFAULT, NULL);
 466         }
 467 
 468         ipst->ips_ipcl_raw_fanout = kmem_zalloc(
 469             ipst->ips_ipcl_raw_fanout_size * sizeof (connf_t), KM_SLEEP);
 470         for (i = 0; i < ipst->ips_ipcl_raw_fanout_size; i++) {
 471                 mutex_init(&ipst->ips_ipcl_raw_fanout[i].connf_lock, NULL,
 472                     MUTEX_DEFAULT, NULL);
 473         }
 474 
 475         ipst->ips_ipcl_globalhash_fanout = kmem_zalloc(
 476             sizeof (connf_t) * CONN_G_HASH_SIZE, KM_SLEEP);
 477         for (i = 0; i < CONN_G_HASH_SIZE; i++) {
 478                 mutex_init(&ipst->ips_ipcl_globalhash_fanout[i].connf_lock,
 479                     NULL, MUTEX_DEFAULT, NULL);
 480         }
 481 }
 482 
 483 void
 484 ipcl_g_destroy(void)
 485 {
 486         kmem_cache_destroy(ip_conn_cache);
 487         kmem_cache_destroy(tcp_conn_cache);
 488         kmem_cache_destroy(udp_conn_cache);
 489         kmem_cache_destroy(rawip_conn_cache);
 490         kmem_cache_destroy(rts_conn_cache);
 491 }
 492 
 493 /*
 494  * All user-level and kernel use of the stack must be gone
 495  * by now.
 496  */
 497 void
 498 ipcl_destroy(ip_stack_t *ipst)
 499 {
 500         int i;
 501 
 502         for (i = 0; i < ipst->ips_ipcl_conn_fanout_size; i++) {
 503                 ASSERT(ipst->ips_ipcl_conn_fanout[i].connf_head == NULL);
 504                 mutex_destroy(&ipst->ips_ipcl_conn_fanout[i].connf_lock);
 505         }
 506         kmem_free(ipst->ips_ipcl_conn_fanout, ipst->ips_ipcl_conn_fanout_size *
 507             sizeof (connf_t));
 508         ipst->ips_ipcl_conn_fanout = NULL;
 509 
 510         for (i = 0; i < ipst->ips_ipcl_bind_fanout_size; i++) {
 511                 ASSERT(ipst->ips_ipcl_bind_fanout[i].connf_head == NULL);
 512                 mutex_destroy(&ipst->ips_ipcl_bind_fanout[i].connf_lock);
 513         }
 514         kmem_free(ipst->ips_ipcl_bind_fanout, ipst->ips_ipcl_bind_fanout_size *
 515             sizeof (connf_t));
 516         ipst->ips_ipcl_bind_fanout = NULL;
 517 
 518         for (i = 0; i < IPPROTO_MAX; i++) {
 519                 ASSERT(ipst->ips_ipcl_proto_fanout_v4[i].connf_head == NULL);
 520                 mutex_destroy(&ipst->ips_ipcl_proto_fanout_v4[i].connf_lock);
 521         }
 522         kmem_free(ipst->ips_ipcl_proto_fanout_v4,
 523             IPPROTO_MAX * sizeof (connf_t));
 524         ipst->ips_ipcl_proto_fanout_v4 = NULL;
 525 
 526         for (i = 0; i < IPPROTO_MAX; i++) {
 527                 ASSERT(ipst->ips_ipcl_proto_fanout_v6[i].connf_head == NULL);
 528                 mutex_destroy(&ipst->ips_ipcl_proto_fanout_v6[i].connf_lock);
 529         }
 530         kmem_free(ipst->ips_ipcl_proto_fanout_v6,
 531             IPPROTO_MAX * sizeof (connf_t));
 532         ipst->ips_ipcl_proto_fanout_v6 = NULL;
 533 
 534         for (i = 0; i < ipst->ips_ipcl_udp_fanout_size; i++) {
 535                 ASSERT(ipst->ips_ipcl_udp_fanout[i].connf_head == NULL);
 536                 mutex_destroy(&ipst->ips_ipcl_udp_fanout[i].connf_lock);
 537         }
 538         kmem_free(ipst->ips_ipcl_udp_fanout, ipst->ips_ipcl_udp_fanout_size *
 539             sizeof (connf_t));
 540         ipst->ips_ipcl_udp_fanout = NULL;
 541 
 542         for (i = 0; i < ipst->ips_ipcl_iptun_fanout_size; i++) {
 543                 ASSERT(ipst->ips_ipcl_iptun_fanout[i].connf_head == NULL);
 544                 mutex_destroy(&ipst->ips_ipcl_iptun_fanout[i].connf_lock);
 545         }
 546         kmem_free(ipst->ips_ipcl_iptun_fanout,
 547             ipst->ips_ipcl_iptun_fanout_size * sizeof (connf_t));
 548         ipst->ips_ipcl_iptun_fanout = NULL;
 549 
 550         for (i = 0; i < ipst->ips_ipcl_raw_fanout_size; i++) {
 551                 ASSERT(ipst->ips_ipcl_raw_fanout[i].connf_head == NULL);
 552                 mutex_destroy(&ipst->ips_ipcl_raw_fanout[i].connf_lock);
 553         }
 554         kmem_free(ipst->ips_ipcl_raw_fanout, ipst->ips_ipcl_raw_fanout_size *
 555             sizeof (connf_t));
 556         ipst->ips_ipcl_raw_fanout = NULL;
 557 
 558         for (i = 0; i < CONN_G_HASH_SIZE; i++) {
 559                 ASSERT(ipst->ips_ipcl_globalhash_fanout[i].connf_head == NULL);
 560                 mutex_destroy(&ipst->ips_ipcl_globalhash_fanout[i].connf_lock);
 561         }
 562         kmem_free(ipst->ips_ipcl_globalhash_fanout,
 563             sizeof (connf_t) * CONN_G_HASH_SIZE);
 564         ipst->ips_ipcl_globalhash_fanout = NULL;
 565 
 566         ASSERT(ipst->ips_rts_clients->connf_head == NULL);
 567         mutex_destroy(&ipst->ips_rts_clients->connf_lock);
 568         kmem_free(ipst->ips_rts_clients, sizeof (connf_t));
 569         ipst->ips_rts_clients = NULL;
 570 }
 571 
 572 /*
 573  * conn creation routine. initialize the conn, sets the reference
 574  * and inserts it in the global hash table.
 575  */
 576 conn_t *
 577 ipcl_conn_create(uint32_t type, int sleep, netstack_t *ns)
 578 {
 579         conn_t  *connp;
 580         struct kmem_cache *conn_cache;
 581 
 582         switch (type) {
 583         case IPCL_SCTPCONN:
 584                 if ((connp = kmem_cache_alloc(sctp_conn_cache, sleep)) == NULL)
 585                         return (NULL);
 586                 sctp_conn_init(connp);
 587                 netstack_hold(ns);
 588                 connp->conn_netstack = ns;
 589                 connp->conn_ixa->ixa_ipst = ns->netstack_ip;
 590                 connp->conn_ixa->ixa_conn_id = (long)connp;
 591                 ipcl_globalhash_insert(connp);
 592                 return (connp);
 593 
 594         case IPCL_TCPCONN:
 595                 conn_cache = tcp_conn_cache;
 596                 break;
 597 
 598         case IPCL_UDPCONN:
 599                 conn_cache = udp_conn_cache;
 600                 break;
 601 
 602         case IPCL_RAWIPCONN:
 603                 conn_cache = rawip_conn_cache;
 604                 break;
 605 
 606         case IPCL_RTSCONN:
 607                 conn_cache = rts_conn_cache;
 608                 break;
 609 
 610         case IPCL_IPCCONN:
 611                 conn_cache = ip_conn_cache;
 612                 break;
 613 
 614         default:
 615                 connp = NULL;
 616                 ASSERT(0);
 617         }
 618 
 619         if ((connp = kmem_cache_alloc(conn_cache, sleep)) == NULL)
 620                 return (NULL);
 621 
 622         connp->conn_ref = 1;
 623         netstack_hold(ns);
 624         connp->conn_netstack = ns;
 625         connp->conn_ixa->ixa_ipst = ns->netstack_ip;
 626         connp->conn_ixa->ixa_conn_id = (long)connp;
 627         ipcl_globalhash_insert(connp);
 628         return (connp);
 629 }
 630 
 631 void
 632 ipcl_conn_destroy(conn_t *connp)
 633 {
 634         mblk_t  *mp;
 635         netstack_t      *ns = connp->conn_netstack;
 636 
 637         ASSERT(!MUTEX_HELD(&connp->conn_lock));
 638         ASSERT(connp->conn_ref == 0);
 639         ASSERT(connp->conn_ioctlref == 0);
 640 
 641         DTRACE_PROBE1(conn__destroy, conn_t *, connp);
 642 
 643         if (connp->conn_cred != NULL) {
 644                 crfree(connp->conn_cred);
 645                 connp->conn_cred = NULL;
 646                 /* ixa_cred done in ipcl_conn_cleanup below */
 647         }
 648 
 649         if (connp->conn_ht_iphc != NULL) {
 650                 kmem_free(connp->conn_ht_iphc, connp->conn_ht_iphc_allocated);
 651                 connp->conn_ht_iphc = NULL;
 652                 connp->conn_ht_iphc_allocated = 0;
 653                 connp->conn_ht_iphc_len = 0;
 654                 connp->conn_ht_ulp = NULL;
 655                 connp->conn_ht_ulp_len = 0;
 656         }
 657         ip_pkt_free(&connp->conn_xmit_ipp);
 658 
 659         ipcl_globalhash_remove(connp);
 660 
 661         if (connp->conn_latch != NULL) {
 662                 IPLATCH_REFRELE(connp->conn_latch);
 663                 connp->conn_latch = NULL;
 664         }
 665         if (connp->conn_latch_in_policy != NULL) {
 666                 IPPOL_REFRELE(connp->conn_latch_in_policy);
 667                 connp->conn_latch_in_policy = NULL;
 668         }
 669         if (connp->conn_latch_in_action != NULL) {
 670                 IPACT_REFRELE(connp->conn_latch_in_action);
 671                 connp->conn_latch_in_action = NULL;
 672         }
 673         if (connp->conn_policy != NULL) {
 674                 IPPH_REFRELE(connp->conn_policy, ns);
 675                 connp->conn_policy = NULL;
 676         }
 677 
 678         if (connp->conn_ipsec_opt_mp != NULL) {
 679                 freemsg(connp->conn_ipsec_opt_mp);
 680                 connp->conn_ipsec_opt_mp = NULL;
 681         }
 682 
 683         if (connp->conn_flags & IPCL_TCPCONN) {
 684                 tcp_t *tcp = connp->conn_tcp;
 685 
 686                 tcp_free(tcp);
 687                 mp = tcp->tcp_timercache;
 688 
 689                 tcp->tcp_tcps = NULL;
 690 
 691                 /*
 692                  * tcp_rsrv_mp can be NULL if tcp_get_conn() fails to allocate
 693                  * the mblk.
 694                  */
 695                 if (tcp->tcp_rsrv_mp != NULL) {
 696                         freeb(tcp->tcp_rsrv_mp);
 697                         tcp->tcp_rsrv_mp = NULL;
 698                         mutex_destroy(&tcp->tcp_rsrv_mp_lock);
 699                 }
 700 
 701                 ipcl_conn_cleanup(connp);
 702                 connp->conn_flags = IPCL_TCPCONN;
 703                 if (ns != NULL) {
 704                         ASSERT(tcp->tcp_tcps == NULL);
 705                         connp->conn_netstack = NULL;
 706                         connp->conn_ixa->ixa_ipst = NULL;
 707                         netstack_rele(ns);
 708                 }
 709 
 710                 bzero(tcp, sizeof (tcp_t));
 711 
 712                 tcp->tcp_timercache = mp;
 713                 tcp->tcp_connp = connp;
 714                 kmem_cache_free(tcp_conn_cache, connp);
 715                 return;
 716         }
 717 
 718         if (connp->conn_flags & IPCL_SCTPCONN) {
 719                 ASSERT(ns != NULL);
 720                 sctp_free(connp);
 721                 return;
 722         }
 723 
 724         ipcl_conn_cleanup(connp);
 725         if (ns != NULL) {
 726                 connp->conn_netstack = NULL;
 727                 connp->conn_ixa->ixa_ipst = NULL;
 728                 netstack_rele(ns);
 729         }
 730 
 731         /* leave conn_priv aka conn_udp, conn_icmp, etc in place. */
 732         if (connp->conn_flags & IPCL_UDPCONN) {
 733                 connp->conn_flags = IPCL_UDPCONN;
 734                 kmem_cache_free(udp_conn_cache, connp);
 735         } else if (connp->conn_flags & IPCL_RAWIPCONN) {
 736                 connp->conn_flags = IPCL_RAWIPCONN;
 737                 connp->conn_proto = IPPROTO_ICMP;
 738                 connp->conn_ixa->ixa_protocol = connp->conn_proto;
 739                 kmem_cache_free(rawip_conn_cache, connp);
 740         } else if (connp->conn_flags & IPCL_RTSCONN) {
 741                 connp->conn_flags = IPCL_RTSCONN;
 742                 kmem_cache_free(rts_conn_cache, connp);
 743         } else {
 744                 connp->conn_flags = IPCL_IPCCONN;
 745                 ASSERT(connp->conn_flags & IPCL_IPCCONN);
 746                 ASSERT(connp->conn_priv == NULL);
 747                 kmem_cache_free(ip_conn_cache, connp);
 748         }
 749 }
 750 
 751 /*
 752  * Running in cluster mode - deregister listener information
 753  */
 754 static void
 755 ipcl_conn_unlisten(conn_t *connp)
 756 {
 757         ASSERT((connp->conn_flags & IPCL_CL_LISTENER) != 0);
 758         ASSERT(connp->conn_lport != 0);
 759 
 760         if (cl_inet_unlisten != NULL) {
 761                 sa_family_t     addr_family;
 762                 uint8_t         *laddrp;
 763 
 764                 if (connp->conn_ipversion == IPV6_VERSION) {
 765                         addr_family = AF_INET6;
 766                         laddrp = (uint8_t *)&connp->conn_bound_addr_v6;
 767                 } else {
 768                         addr_family = AF_INET;
 769                         laddrp = (uint8_t *)&connp->conn_bound_addr_v4;
 770                 }
 771                 (*cl_inet_unlisten)(connp->conn_netstack->netstack_stackid,
 772                     IPPROTO_TCP, addr_family, laddrp, connp->conn_lport, NULL);
 773         }
 774         connp->conn_flags &= ~IPCL_CL_LISTENER;
 775 }
 776 
 777 /*
 778  * We set the IPCL_REMOVED flag (instead of clearing the flag indicating
 779  * which table the conn belonged to). So for debugging we can see which hash
 780  * table this connection was in.
 781  */
 782 #define IPCL_HASH_REMOVE(connp) {                                       \
 783         connf_t *connfp = (connp)->conn_fanout;                              \
 784         ASSERT(!MUTEX_HELD(&((connp)->conn_lock)));                      \
 785         if (connfp != NULL) {                                           \
 786                 mutex_enter(&connfp->connf_lock);                        \
 787                 if ((connp)->conn_next != NULL)                              \
 788                         (connp)->conn_next->conn_prev =                   \
 789                             (connp)->conn_prev;                              \
 790                 if ((connp)->conn_prev != NULL)                              \
 791                         (connp)->conn_prev->conn_next =                   \
 792                             (connp)->conn_next;                              \
 793                 else                                                    \
 794                         connfp->connf_head = (connp)->conn_next;  \
 795                 (connp)->conn_fanout = NULL;                         \
 796                 (connp)->conn_next = NULL;                           \
 797                 (connp)->conn_prev = NULL;                           \
 798                 (connp)->conn_flags |= IPCL_REMOVED;                 \
 799                 if (((connp)->conn_flags & IPCL_CL_LISTENER) != 0)       \
 800                         ipcl_conn_unlisten((connp));                    \
 801                 CONN_DEC_REF((connp));                                  \
 802                 mutex_exit(&connfp->connf_lock);                 \
 803         }                                                               \
 804 }
 805 
 806 void
 807 ipcl_hash_remove(conn_t *connp)
 808 {
 809         uint8_t         protocol = connp->conn_proto;
 810 
 811         IPCL_HASH_REMOVE(connp);
 812         if (protocol == IPPROTO_RSVP)
 813                 ill_set_inputfn_all(connp->conn_netstack->netstack_ip);
 814 }
 815 
 816 /*
 817  * The whole purpose of this function is allow removal of
 818  * a conn_t from the connected hash for timewait reclaim.
 819  * This is essentially a TW reclaim fastpath where timewait
 820  * collector checks under fanout lock (so no one else can
 821  * get access to the conn_t) that refcnt is 2 i.e. one for
 822  * TCP and one for the classifier hash list. If ref count
 823  * is indeed 2, we can just remove the conn under lock and
 824  * avoid cleaning up the conn under squeue. This gives us
 825  * improved performance.
 826  */
 827 void
 828 ipcl_hash_remove_locked(conn_t *connp, connf_t  *connfp)
 829 {
 830         ASSERT(MUTEX_HELD(&connfp->connf_lock));
 831         ASSERT(MUTEX_HELD(&connp->conn_lock));
 832         ASSERT((connp->conn_flags & IPCL_CL_LISTENER) == 0);
 833 
 834         if ((connp)->conn_next != NULL) {
 835                 (connp)->conn_next->conn_prev = (connp)->conn_prev;
 836         }
 837         if ((connp)->conn_prev != NULL) {
 838                 (connp)->conn_prev->conn_next = (connp)->conn_next;
 839         } else {
 840                 connfp->connf_head = (connp)->conn_next;
 841         }
 842         (connp)->conn_fanout = NULL;
 843         (connp)->conn_next = NULL;
 844         (connp)->conn_prev = NULL;
 845         (connp)->conn_flags |= IPCL_REMOVED;
 846         ASSERT((connp)->conn_ref == 2);
 847         (connp)->conn_ref--;
 848 }
 849 
 850 #define IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp) {              \
 851         ASSERT((connp)->conn_fanout == NULL);                                \
 852         ASSERT((connp)->conn_next == NULL);                          \
 853         ASSERT((connp)->conn_prev == NULL);                          \
 854         if ((connfp)->connf_head != NULL) {                          \
 855                 (connfp)->connf_head->conn_prev = (connp);                \
 856                 (connp)->conn_next = (connfp)->connf_head;                \
 857         }                                                               \
 858         (connp)->conn_fanout = (connfp);                             \
 859         (connfp)->connf_head = (connp);                                      \
 860         (connp)->conn_flags = ((connp)->conn_flags & ~IPCL_REMOVED) | \
 861             IPCL_CONNECTED;                                             \
 862         CONN_INC_REF(connp);                                            \
 863 }
 864 
 865 #define IPCL_HASH_INSERT_CONNECTED(connfp, connp) {                     \
 866         IPCL_HASH_REMOVE((connp));                                      \
 867         mutex_enter(&(connfp)->connf_lock);                              \
 868         IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp);               \
 869         mutex_exit(&(connfp)->connf_lock);                               \
 870 }
 871 
 872 #define IPCL_HASH_INSERT_BOUND(connfp, connp) {                         \
 873         conn_t *pconnp = NULL, *nconnp;                                 \
 874         IPCL_HASH_REMOVE((connp));                                      \
 875         mutex_enter(&(connfp)->connf_lock);                              \
 876         nconnp = (connfp)->connf_head;                                       \
 877         while (nconnp != NULL &&                                        \
 878             !_IPCL_V4_MATCH_ANY(nconnp->conn_laddr_v6)) {            \
 879                 pconnp = nconnp;                                        \
 880                 nconnp = nconnp->conn_next;                          \
 881         }                                                               \
 882         if (pconnp != NULL) {                                           \
 883                 pconnp->conn_next = (connp);                         \
 884                 (connp)->conn_prev = pconnp;                         \
 885         } else {                                                        \
 886                 (connfp)->connf_head = (connp);                              \
 887         }                                                               \
 888         if (nconnp != NULL) {                                           \
 889                 (connp)->conn_next = nconnp;                         \
 890                 nconnp->conn_prev = (connp);                         \
 891         }                                                               \
 892         (connp)->conn_fanout = (connfp);                             \
 893         (connp)->conn_flags = ((connp)->conn_flags & ~IPCL_REMOVED) | \
 894             IPCL_BOUND;                                                 \
 895         CONN_INC_REF(connp);                                            \
 896         mutex_exit(&(connfp)->connf_lock);                               \
 897 }
 898 
 899 #define IPCL_HASH_INSERT_WILDCARD(connfp, connp) {                      \
 900         conn_t **list, *prev, *next;                                    \
 901         boolean_t isv4mapped =                                          \
 902             IN6_IS_ADDR_V4MAPPED(&(connp)->conn_laddr_v6);               \
 903         IPCL_HASH_REMOVE((connp));                                      \
 904         mutex_enter(&(connfp)->connf_lock);                              \
 905         list = &(connfp)->connf_head;                                    \
 906         prev = NULL;                                                    \
 907         while ((next = *list) != NULL) {                                \
 908                 if (isv4mapped &&                                       \
 909                     IN6_IS_ADDR_UNSPECIFIED(&next->conn_laddr_v6) &&     \
 910                     connp->conn_zoneid == next->conn_zoneid) {            \
 911                         (connp)->conn_next = next;                   \
 912                         if (prev != NULL)                               \
 913                                 prev = next->conn_prev;                      \
 914                         next->conn_prev = (connp);                   \
 915                         break;                                          \
 916                 }                                                       \
 917                 list = &next->conn_next;                         \
 918                 prev = next;                                            \
 919         }                                                               \
 920         (connp)->conn_prev = prev;                                   \
 921         *list = (connp);                                                \
 922         (connp)->conn_fanout = (connfp);                             \
 923         (connp)->conn_flags = ((connp)->conn_flags & ~IPCL_REMOVED) | \
 924             IPCL_BOUND;                                                 \
 925         CONN_INC_REF((connp));                                          \
 926         mutex_exit(&(connfp)->connf_lock);                               \
 927 }
 928 
 929 void
 930 ipcl_hash_insert_wildcard(connf_t *connfp, conn_t *connp)
 931 {
 932         IPCL_HASH_INSERT_WILDCARD(connfp, connp);
 933 }
 934 
 935 /*
 936  * Because the classifier is used to classify inbound packets, the destination
 937  * address is meant to be our local tunnel address (tunnel source), and the
 938  * source the remote tunnel address (tunnel destination).
 939  *
 940  * Note that conn_proto can't be used for fanout since the upper protocol
 941  * can be both 41 and 4 when IPv6 and IPv4 are over the same tunnel.
 942  */
 943 conn_t *
 944 ipcl_iptun_classify_v4(ipaddr_t *src, ipaddr_t *dst, ip_stack_t *ipst)
 945 {
 946         connf_t *connfp;
 947         conn_t  *connp;
 948 
 949         /* first look for IPv4 tunnel links */
 950         connfp = &ipst->ips_ipcl_iptun_fanout[IPCL_IPTUN_HASH(*dst, *src)];
 951         mutex_enter(&connfp->connf_lock);
 952         for (connp = connfp->connf_head; connp != NULL;
 953             connp = connp->conn_next) {
 954                 if (IPCL_IPTUN_MATCH(connp, *dst, *src))
 955                         break;
 956         }
 957         if (connp != NULL)
 958                 goto done;
 959 
 960         mutex_exit(&connfp->connf_lock);
 961 
 962         /* We didn't find an IPv4 tunnel, try a 6to4 tunnel */
 963         connfp = &ipst->ips_ipcl_iptun_fanout[IPCL_IPTUN_HASH(*dst,
 964             INADDR_ANY)];
 965         mutex_enter(&connfp->connf_lock);
 966         for (connp = connfp->connf_head; connp != NULL;
 967             connp = connp->conn_next) {
 968                 if (IPCL_IPTUN_MATCH(connp, *dst, INADDR_ANY))
 969                         break;
 970         }
 971 done:
 972         if (connp != NULL)
 973                 CONN_INC_REF(connp);
 974         mutex_exit(&connfp->connf_lock);
 975         return (connp);
 976 }
 977 
 978 conn_t *
 979 ipcl_iptun_classify_v6(in6_addr_t *src, in6_addr_t *dst, ip_stack_t *ipst)
 980 {
 981         connf_t *connfp;
 982         conn_t  *connp;
 983 
 984         /* Look for an IPv6 tunnel link */
 985         connfp = &ipst->ips_ipcl_iptun_fanout[IPCL_IPTUN_HASH_V6(dst, src)];
 986         mutex_enter(&connfp->connf_lock);
 987         for (connp = connfp->connf_head; connp != NULL;
 988             connp = connp->conn_next) {
 989                 if (IPCL_IPTUN_MATCH_V6(connp, dst, src)) {
 990                         CONN_INC_REF(connp);
 991                         break;
 992                 }
 993         }
 994         mutex_exit(&connfp->connf_lock);
 995         return (connp);
 996 }
 997 
 998 /*
 999  * This function is used only for inserting SCTP raw socket now.
1000  * This may change later.
1001  *
1002  * Note that only one raw socket can be bound to a port.  The param
1003  * lport is in network byte order.
1004  */
1005 static int
1006 ipcl_sctp_hash_insert(conn_t *connp, in_port_t lport)
1007 {
1008         connf_t *connfp;
1009         conn_t  *oconnp;
1010         ip_stack_t      *ipst = connp->conn_netstack->netstack_ip;
1011 
1012         connfp = &ipst->ips_ipcl_raw_fanout[IPCL_RAW_HASH(ntohs(lport), ipst)];
1013 
1014         /* Check for existing raw socket already bound to the port. */
1015         mutex_enter(&connfp->connf_lock);
1016         for (oconnp = connfp->connf_head; oconnp != NULL;
1017             oconnp = oconnp->conn_next) {
1018                 if (oconnp->conn_lport == lport &&
1019                     oconnp->conn_zoneid == connp->conn_zoneid &&
1020                     oconnp->conn_family == connp->conn_family &&
1021                     ((IN6_IS_ADDR_UNSPECIFIED(&connp->conn_laddr_v6) ||
1022                     IN6_IS_ADDR_UNSPECIFIED(&oconnp->conn_laddr_v6) ||
1023                     IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_laddr_v6) ||
1024                     IN6_IS_ADDR_V4MAPPED_ANY(&oconnp->conn_laddr_v6)) ||
1025                     IN6_ARE_ADDR_EQUAL(&oconnp->conn_laddr_v6,
1026                     &connp->conn_laddr_v6))) {
1027                         break;
1028                 }
1029         }
1030         mutex_exit(&connfp->connf_lock);
1031         if (oconnp != NULL)
1032                 return (EADDRNOTAVAIL);
1033 
1034         if (IN6_IS_ADDR_UNSPECIFIED(&connp->conn_faddr_v6) ||
1035             IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_faddr_v6)) {
1036                 if (IN6_IS_ADDR_UNSPECIFIED(&connp->conn_laddr_v6) ||
1037                     IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_laddr_v6)) {
1038                         IPCL_HASH_INSERT_WILDCARD(connfp, connp);
1039                 } else {
1040                         IPCL_HASH_INSERT_BOUND(connfp, connp);
1041                 }
1042         } else {
1043                 IPCL_HASH_INSERT_CONNECTED(connfp, connp);
1044         }
1045         return (0);
1046 }
1047 
1048 static int
1049 ipcl_iptun_hash_insert(conn_t *connp, ip_stack_t *ipst)
1050 {
1051         connf_t *connfp;
1052         conn_t  *tconnp;
1053         ipaddr_t laddr = connp->conn_laddr_v4;
1054         ipaddr_t faddr = connp->conn_faddr_v4;
1055 
1056         connfp = &ipst->ips_ipcl_iptun_fanout[IPCL_IPTUN_HASH(laddr, faddr)];
1057         mutex_enter(&connfp->connf_lock);
1058         for (tconnp = connfp->connf_head; tconnp != NULL;
1059             tconnp = tconnp->conn_next) {
1060                 if (IPCL_IPTUN_MATCH(tconnp, laddr, faddr)) {
1061                         /* A tunnel is already bound to these addresses. */
1062                         mutex_exit(&connfp->connf_lock);
1063                         return (EADDRINUSE);
1064                 }
1065         }
1066         IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp);
1067         mutex_exit(&connfp->connf_lock);
1068         return (0);
1069 }
1070 
1071 static int
1072 ipcl_iptun_hash_insert_v6(conn_t *connp, ip_stack_t *ipst)
1073 {
1074         connf_t *connfp;
1075         conn_t  *tconnp;
1076         in6_addr_t *laddr = &connp->conn_laddr_v6;
1077         in6_addr_t *faddr = &connp->conn_faddr_v6;
1078 
1079         connfp = &ipst->ips_ipcl_iptun_fanout[IPCL_IPTUN_HASH_V6(laddr, faddr)];
1080         mutex_enter(&connfp->connf_lock);
1081         for (tconnp = connfp->connf_head; tconnp != NULL;
1082             tconnp = tconnp->conn_next) {
1083                 if (IPCL_IPTUN_MATCH_V6(tconnp, laddr, faddr)) {
1084                         /* A tunnel is already bound to these addresses. */
1085                         mutex_exit(&connfp->connf_lock);
1086                         return (EADDRINUSE);
1087                 }
1088         }
1089         IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp);
1090         mutex_exit(&connfp->connf_lock);
1091         return (0);
1092 }
1093 
1094 /*
1095  * Check for a MAC exemption conflict on a labeled system.  Note that for
1096  * protocols that use port numbers (UDP, TCP, SCTP), we do this check up in the
1097  * transport layer.  This check is for binding all other protocols.
1098  *
1099  * Returns true if there's a conflict.
1100  */
1101 static boolean_t
1102 check_exempt_conflict_v4(conn_t *connp, ip_stack_t *ipst)
1103 {
1104         connf_t *connfp;
1105         conn_t *tconn;
1106 
1107         connfp = &ipst->ips_ipcl_proto_fanout_v4[connp->conn_proto];
1108         mutex_enter(&connfp->connf_lock);
1109         for (tconn = connfp->connf_head; tconn != NULL;
1110             tconn = tconn->conn_next) {
1111                 /* We don't allow v4 fallback for v6 raw socket */
1112                 if (connp->conn_family != tconn->conn_family)
1113                         continue;
1114                 /* If neither is exempt, then there's no conflict */
1115                 if ((connp->conn_mac_mode == CONN_MAC_DEFAULT) &&
1116                     (tconn->conn_mac_mode == CONN_MAC_DEFAULT))
1117                         continue;
1118                 /* We are only concerned about sockets for a different zone */
1119                 if (connp->conn_zoneid == tconn->conn_zoneid)
1120                         continue;
1121                 /* If both are bound to different specific addrs, ok */
1122                 if (connp->conn_laddr_v4 != INADDR_ANY &&
1123                     tconn->conn_laddr_v4 != INADDR_ANY &&
1124                     connp->conn_laddr_v4 != tconn->conn_laddr_v4)
1125                         continue;
1126                 /* These two conflict; fail */
1127                 break;
1128         }
1129         mutex_exit(&connfp->connf_lock);
1130         return (tconn != NULL);
1131 }
1132 
1133 static boolean_t
1134 check_exempt_conflict_v6(conn_t *connp, ip_stack_t *ipst)
1135 {
1136         connf_t *connfp;
1137         conn_t *tconn;
1138 
1139         connfp = &ipst->ips_ipcl_proto_fanout_v6[connp->conn_proto];
1140         mutex_enter(&connfp->connf_lock);
1141         for (tconn = connfp->connf_head; tconn != NULL;
1142             tconn = tconn->conn_next) {
1143                 /* We don't allow v4 fallback for v6 raw socket */
1144                 if (connp->conn_family != tconn->conn_family)
1145                         continue;
1146                 /* If neither is exempt, then there's no conflict */
1147                 if ((connp->conn_mac_mode == CONN_MAC_DEFAULT) &&
1148                     (tconn->conn_mac_mode == CONN_MAC_DEFAULT))
1149                         continue;
1150                 /* We are only concerned about sockets for a different zone */
1151                 if (connp->conn_zoneid == tconn->conn_zoneid)
1152                         continue;
1153                 /* If both are bound to different addrs, ok */
1154                 if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_laddr_v6) &&
1155                     !IN6_IS_ADDR_UNSPECIFIED(&tconn->conn_laddr_v6) &&
1156                     !IN6_ARE_ADDR_EQUAL(&connp->conn_laddr_v6,
1157                     &tconn->conn_laddr_v6))
1158                         continue;
1159                 /* These two conflict; fail */
1160                 break;
1161         }
1162         mutex_exit(&connfp->connf_lock);
1163         return (tconn != NULL);
1164 }
1165 
1166 /*
1167  * (v4, v6) bind hash insertion routines
1168  * The caller has already setup the conn (conn_proto, conn_laddr_v6, conn_lport)
1169  */
1170 
1171 int
1172 ipcl_bind_insert(conn_t *connp)
1173 {
1174         if (connp->conn_ipversion == IPV6_VERSION)
1175                 return (ipcl_bind_insert_v6(connp));
1176         else
1177                 return (ipcl_bind_insert_v4(connp));
1178 }
1179 
1180 int
1181 ipcl_bind_insert_v4(conn_t *connp)
1182 {
1183         connf_t *connfp;
1184         int     ret = 0;
1185         ip_stack_t      *ipst = connp->conn_netstack->netstack_ip;
1186         uint16_t        lport = connp->conn_lport;
1187         uint8_t         protocol = connp->conn_proto;
1188 
1189         if (IPCL_IS_IPTUN(connp))
1190                 return (ipcl_iptun_hash_insert(connp, ipst));
1191 
1192         switch (protocol) {
1193         default:
1194                 if (is_system_labeled() &&
1195                     check_exempt_conflict_v4(connp, ipst))
1196                         return (EADDRINUSE);
1197                 /* FALLTHROUGH */
1198         case IPPROTO_UDP:
1199                 if (protocol == IPPROTO_UDP) {
1200                         connfp = &ipst->ips_ipcl_udp_fanout[
1201                             IPCL_UDP_HASH(lport, ipst)];
1202                 } else {
1203                         connfp = &ipst->ips_ipcl_proto_fanout_v4[protocol];
1204                 }
1205 
1206                 if (connp->conn_faddr_v4 != INADDR_ANY) {
1207                         IPCL_HASH_INSERT_CONNECTED(connfp, connp);
1208                 } else if (connp->conn_laddr_v4 != INADDR_ANY) {
1209                         IPCL_HASH_INSERT_BOUND(connfp, connp);
1210                 } else {
1211                         IPCL_HASH_INSERT_WILDCARD(connfp, connp);
1212                 }
1213                 if (protocol == IPPROTO_RSVP)
1214                         ill_set_inputfn_all(ipst);
1215                 break;
1216 
1217         case IPPROTO_TCP:
1218                 /* Insert it in the Bind Hash */
1219                 ASSERT(connp->conn_zoneid != ALL_ZONES);
1220                 connfp = &ipst->ips_ipcl_bind_fanout[
1221                     IPCL_BIND_HASH(lport, ipst)];
1222                 if (connp->conn_laddr_v4 != INADDR_ANY) {
1223                         IPCL_HASH_INSERT_BOUND(connfp, connp);
1224                 } else {
1225                         IPCL_HASH_INSERT_WILDCARD(connfp, connp);
1226                 }
1227                 if (cl_inet_listen != NULL) {
1228                         ASSERT(connp->conn_ipversion == IPV4_VERSION);
1229                         connp->conn_flags |= IPCL_CL_LISTENER;
1230                         (*cl_inet_listen)(
1231                             connp->conn_netstack->netstack_stackid,
1232                             IPPROTO_TCP, AF_INET,
1233                             (uint8_t *)&connp->conn_bound_addr_v4, lport, NULL);
1234                 }
1235                 break;
1236 
1237         case IPPROTO_SCTP:
1238                 ret = ipcl_sctp_hash_insert(connp, lport);
1239                 break;
1240         }
1241 
1242         return (ret);
1243 }
1244 
1245 int
1246 ipcl_bind_insert_v6(conn_t *connp)
1247 {
1248         connf_t         *connfp;
1249         int             ret = 0;
1250         ip_stack_t      *ipst = connp->conn_netstack->netstack_ip;
1251         uint16_t        lport = connp->conn_lport;
1252         uint8_t         protocol = connp->conn_proto;
1253 
1254         if (IPCL_IS_IPTUN(connp)) {
1255                 return (ipcl_iptun_hash_insert_v6(connp, ipst));
1256         }
1257 
1258         switch (protocol) {
1259         default:
1260                 if (is_system_labeled() &&
1261                     check_exempt_conflict_v6(connp, ipst))
1262                         return (EADDRINUSE);
1263                 /* FALLTHROUGH */
1264         case IPPROTO_UDP:
1265                 if (protocol == IPPROTO_UDP) {
1266                         connfp = &ipst->ips_ipcl_udp_fanout[
1267                             IPCL_UDP_HASH(lport, ipst)];
1268                 } else {
1269                         connfp = &ipst->ips_ipcl_proto_fanout_v6[protocol];
1270                 }
1271 
1272                 if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_faddr_v6)) {
1273                         IPCL_HASH_INSERT_CONNECTED(connfp, connp);
1274                 } else if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_laddr_v6)) {
1275                         IPCL_HASH_INSERT_BOUND(connfp, connp);
1276                 } else {
1277                         IPCL_HASH_INSERT_WILDCARD(connfp, connp);
1278                 }
1279                 break;
1280 
1281         case IPPROTO_TCP:
1282                 /* Insert it in the Bind Hash */
1283                 ASSERT(connp->conn_zoneid != ALL_ZONES);
1284                 connfp = &ipst->ips_ipcl_bind_fanout[
1285                     IPCL_BIND_HASH(lport, ipst)];
1286                 if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_laddr_v6)) {
1287                         IPCL_HASH_INSERT_BOUND(connfp, connp);
1288                 } else {
1289                         IPCL_HASH_INSERT_WILDCARD(connfp, connp);
1290                 }
1291                 if (cl_inet_listen != NULL) {
1292                         sa_family_t     addr_family;
1293                         uint8_t         *laddrp;
1294 
1295                         if (connp->conn_ipversion == IPV6_VERSION) {
1296                                 addr_family = AF_INET6;
1297                                 laddrp =
1298                                     (uint8_t *)&connp->conn_bound_addr_v6;
1299                         } else {
1300                                 addr_family = AF_INET;
1301                                 laddrp = (uint8_t *)&connp->conn_bound_addr_v4;
1302                         }
1303                         connp->conn_flags |= IPCL_CL_LISTENER;
1304                         (*cl_inet_listen)(
1305                             connp->conn_netstack->netstack_stackid,
1306                             IPPROTO_TCP, addr_family, laddrp, lport, NULL);
1307                 }
1308                 break;
1309 
1310         case IPPROTO_SCTP:
1311                 ret = ipcl_sctp_hash_insert(connp, lport);
1312                 break;
1313         }
1314 
1315         return (ret);
1316 }
1317 
1318 /*
1319  * ipcl_conn_hash insertion routines.
1320  * The caller has already set conn_proto and the addresses/ports in the conn_t.
1321  */
1322 
1323 int
1324 ipcl_conn_insert(conn_t *connp)
1325 {
1326         if (connp->conn_ipversion == IPV6_VERSION)
1327                 return (ipcl_conn_insert_v6(connp));
1328         else
1329                 return (ipcl_conn_insert_v4(connp));
1330 }
1331 
1332 int
1333 ipcl_conn_insert_v4(conn_t *connp)
1334 {
1335         connf_t         *connfp;
1336         conn_t          *tconnp;
1337         int             ret = 0;
1338         ip_stack_t      *ipst = connp->conn_netstack->netstack_ip;
1339         uint16_t        lport = connp->conn_lport;
1340         uint8_t         protocol = connp->conn_proto;
1341 
1342         if (IPCL_IS_IPTUN(connp))
1343                 return (ipcl_iptun_hash_insert(connp, ipst));
1344 
1345         switch (protocol) {
1346         case IPPROTO_TCP:
1347                 /*
1348                  * For TCP, we check whether the connection tuple already
1349                  * exists before allowing the connection to proceed.  We
1350                  * also allow indexing on the zoneid. This is to allow
1351                  * multiple shared stack zones to have the same tcp
1352                  * connection tuple. In practice this only happens for
1353                  * INADDR_LOOPBACK as it's the only local address which
1354                  * doesn't have to be unique.
1355                  */
1356                 connfp = &ipst->ips_ipcl_conn_fanout[
1357                     IPCL_CONN_HASH(connp->conn_faddr_v4,
1358                     connp->conn_ports, ipst)];
1359                 mutex_enter(&connfp->connf_lock);
1360                 for (tconnp = connfp->connf_head; tconnp != NULL;
1361                     tconnp = tconnp->conn_next) {
1362                         if (IPCL_CONN_MATCH(tconnp, connp->conn_proto,
1363                             connp->conn_faddr_v4, connp->conn_laddr_v4,
1364                             connp->conn_ports) &&
1365                             IPCL_ZONE_MATCH(tconnp, connp->conn_zoneid)) {
1366                                 /* Already have a conn. bail out */
1367                                 mutex_exit(&connfp->connf_lock);
1368                                 return (EADDRINUSE);
1369                         }
1370                 }
1371                 if (connp->conn_fanout != NULL) {
1372                         /*
1373                          * Probably a XTI/TLI application trying to do a
1374                          * rebind. Let it happen.
1375                          */
1376                         mutex_exit(&connfp->connf_lock);
1377                         IPCL_HASH_REMOVE(connp);
1378                         mutex_enter(&connfp->connf_lock);
1379                 }
1380 
1381                 ASSERT(connp->conn_recv != NULL);
1382                 ASSERT(connp->conn_recvicmp != NULL);
1383 
1384                 IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp);
1385                 mutex_exit(&connfp->connf_lock);
1386                 break;
1387 
1388         case IPPROTO_SCTP:
1389                 /*
1390                  * The raw socket may have already been bound, remove it
1391                  * from the hash first.
1392                  */
1393                 IPCL_HASH_REMOVE(connp);
1394                 ret = ipcl_sctp_hash_insert(connp, lport);
1395                 break;
1396 
1397         default:
1398                 /*
1399                  * Check for conflicts among MAC exempt bindings.  For
1400                  * transports with port numbers, this is done by the upper
1401                  * level per-transport binding logic.  For all others, it's
1402                  * done here.
1403                  */
1404                 if (is_system_labeled() &&
1405                     check_exempt_conflict_v4(connp, ipst))
1406                         return (EADDRINUSE);
1407                 /* FALLTHROUGH */
1408 
1409         case IPPROTO_UDP:
1410                 if (protocol == IPPROTO_UDP) {
1411                         connfp = &ipst->ips_ipcl_udp_fanout[
1412                             IPCL_UDP_HASH(lport, ipst)];
1413                 } else {
1414                         connfp = &ipst->ips_ipcl_proto_fanout_v4[protocol];
1415                 }
1416 
1417                 if (connp->conn_faddr_v4 != INADDR_ANY) {
1418                         IPCL_HASH_INSERT_CONNECTED(connfp, connp);
1419                 } else if (connp->conn_laddr_v4 != INADDR_ANY) {
1420                         IPCL_HASH_INSERT_BOUND(connfp, connp);
1421                 } else {
1422                         IPCL_HASH_INSERT_WILDCARD(connfp, connp);
1423                 }
1424                 break;
1425         }
1426 
1427         return (ret);
1428 }
1429 
1430 int
1431 ipcl_conn_insert_v6(conn_t *connp)
1432 {
1433         connf_t         *connfp;
1434         conn_t          *tconnp;
1435         int             ret = 0;
1436         ip_stack_t      *ipst = connp->conn_netstack->netstack_ip;
1437         uint16_t        lport = connp->conn_lport;
1438         uint8_t         protocol = connp->conn_proto;
1439         uint_t          ifindex = connp->conn_bound_if;
1440 
1441         if (IPCL_IS_IPTUN(connp))
1442                 return (ipcl_iptun_hash_insert_v6(connp, ipst));
1443 
1444         switch (protocol) {
1445         case IPPROTO_TCP:
1446 
1447                 /*
1448                  * For tcp, we check whether the connection tuple already
1449                  * exists before allowing the connection to proceed.  We
1450                  * also allow indexing on the zoneid. This is to allow
1451                  * multiple shared stack zones to have the same tcp
1452                  * connection tuple. In practice this only happens for
1453                  * ipv6_loopback as it's the only local address which
1454                  * doesn't have to be unique.
1455                  */
1456                 connfp = &ipst->ips_ipcl_conn_fanout[
1457                     IPCL_CONN_HASH_V6(connp->conn_faddr_v6, connp->conn_ports,
1458                     ipst)];
1459                 mutex_enter(&connfp->connf_lock);
1460                 for (tconnp = connfp->connf_head; tconnp != NULL;
1461                     tconnp = tconnp->conn_next) {
1462                         /* NOTE: need to match zoneid. Bug in onnv-gate */
1463                         if (IPCL_CONN_MATCH_V6(tconnp, connp->conn_proto,
1464                             connp->conn_faddr_v6, connp->conn_laddr_v6,
1465                             connp->conn_ports) &&
1466                             (tconnp->conn_bound_if == 0 ||
1467                             tconnp->conn_bound_if == ifindex) &&
1468                             IPCL_ZONE_MATCH(tconnp, connp->conn_zoneid)) {
1469                                 /* Already have a conn. bail out */
1470                                 mutex_exit(&connfp->connf_lock);
1471                                 return (EADDRINUSE);
1472                         }
1473                 }
1474                 if (connp->conn_fanout != NULL) {
1475                         /*
1476                          * Probably a XTI/TLI application trying to do a
1477                          * rebind. Let it happen.
1478                          */
1479                         mutex_exit(&connfp->connf_lock);
1480                         IPCL_HASH_REMOVE(connp);
1481                         mutex_enter(&connfp->connf_lock);
1482                 }
1483                 IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp);
1484                 mutex_exit(&connfp->connf_lock);
1485                 break;
1486 
1487         case IPPROTO_SCTP:
1488                 IPCL_HASH_REMOVE(connp);
1489                 ret = ipcl_sctp_hash_insert(connp, lport);
1490                 break;
1491 
1492         default:
1493                 if (is_system_labeled() &&
1494                     check_exempt_conflict_v6(connp, ipst))
1495                         return (EADDRINUSE);
1496                 /* FALLTHROUGH */
1497         case IPPROTO_UDP:
1498                 if (protocol == IPPROTO_UDP) {
1499                         connfp = &ipst->ips_ipcl_udp_fanout[
1500                             IPCL_UDP_HASH(lport, ipst)];
1501                 } else {
1502                         connfp = &ipst->ips_ipcl_proto_fanout_v6[protocol];
1503                 }
1504 
1505                 if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_faddr_v6)) {
1506                         IPCL_HASH_INSERT_CONNECTED(connfp, connp);
1507                 } else if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_laddr_v6)) {
1508                         IPCL_HASH_INSERT_BOUND(connfp, connp);
1509                 } else {
1510                         IPCL_HASH_INSERT_WILDCARD(connfp, connp);
1511                 }
1512                 break;
1513         }
1514 
1515         return (ret);
1516 }
1517 
1518 /*
1519  * v4 packet classifying function. looks up the fanout table to
1520  * find the conn, the packet belongs to. returns the conn with
1521  * the reference held, null otherwise.
1522  *
1523  * If zoneid is ALL_ZONES, then the search rules described in the "Connection
1524  * Lookup" comment block are applied.  Labels are also checked as described
1525  * above.  If the packet is from the inside (looped back), and is from the same
1526  * zone, then label checks are omitted.
1527  */
1528 conn_t *
1529 ipcl_classify_v4(mblk_t *mp, uint8_t protocol, uint_t hdr_len,
1530     ip_recv_attr_t *ira, ip_stack_t *ipst)
1531 {
1532         ipha_t  *ipha;
1533         connf_t *connfp, *bind_connfp;
1534         uint16_t lport;
1535         uint16_t fport;
1536         uint32_t ports;
1537         conn_t  *connp;
1538         uint16_t  *up;
1539         zoneid_t        zoneid = ira->ira_zoneid;
1540 
1541         ipha = (ipha_t *)mp->b_rptr;
1542         up = (uint16_t *)((uchar_t *)ipha + hdr_len + TCP_PORTS_OFFSET);
1543 
1544         switch (protocol) {
1545         case IPPROTO_TCP:
1546                 ports = *(uint32_t *)up;
1547                 connfp =
1548                     &ipst->ips_ipcl_conn_fanout[IPCL_CONN_HASH(ipha->ipha_src,
1549                     ports, ipst)];
1550                 mutex_enter(&connfp->connf_lock);
1551                 for (connp = connfp->connf_head; connp != NULL;
1552                     connp = connp->conn_next) {
1553                         if (IPCL_CONN_MATCH(connp, protocol,
1554                             ipha->ipha_src, ipha->ipha_dst, ports) &&
1555                             (connp->conn_zoneid == zoneid ||
1556                             connp->conn_allzones ||
1557                             ((connp->conn_mac_mode != CONN_MAC_DEFAULT) &&
1558                             (ira->ira_flags & IRAF_TX_MAC_EXEMPTABLE) &&
1559                             (ira->ira_flags & IRAF_TX_SHARED_ADDR))))
1560                                 break;
1561                 }
1562 
1563                 if (connp != NULL) {
1564                         /*
1565                          * We have a fully-bound TCP connection.
1566                          *
1567                          * For labeled systems, there's no need to check the
1568                          * label here.  It's known to be good as we checked
1569                          * before allowing the connection to become bound.
1570                          */
1571                         CONN_INC_REF(connp);
1572                         mutex_exit(&connfp->connf_lock);
1573                         return (connp);
1574                 }
1575 
1576                 mutex_exit(&connfp->connf_lock);
1577                 lport = up[1];
1578                 bind_connfp =
1579                     &ipst->ips_ipcl_bind_fanout[IPCL_BIND_HASH(lport, ipst)];
1580                 mutex_enter(&bind_connfp->connf_lock);
1581                 for (connp = bind_connfp->connf_head; connp != NULL;
1582                     connp = connp->conn_next) {
1583                         if (IPCL_BIND_MATCH(connp, protocol, ipha->ipha_dst,
1584                             lport) &&
1585                             (connp->conn_zoneid == zoneid ||
1586                             connp->conn_allzones ||
1587                             ((connp->conn_mac_mode != CONN_MAC_DEFAULT) &&
1588                             (ira->ira_flags & IRAF_TX_MAC_EXEMPTABLE) &&
1589                             (ira->ira_flags & IRAF_TX_SHARED_ADDR))))
1590                                 break;
1591                 }
1592 
1593                 /*
1594                  * If the matching connection is SLP on a private address, then
1595                  * the label on the packet must match the local zone's label.
1596                  * Otherwise, it must be in the label range defined by tnrh.
1597                  * This is ensured by tsol_receive_local.
1598                  *
1599                  * Note that we don't check tsol_receive_local for
1600                  * the connected case.
1601                  */
1602                 if (connp != NULL && (ira->ira_flags & IRAF_SYSTEM_LABELED) &&
1603                     !tsol_receive_local(mp, &ipha->ipha_dst, IPV4_VERSION,
1604                     ira, connp)) {
1605                         DTRACE_PROBE3(tx__ip__log__info__classify__tcp,
1606                             char *, "connp(1) could not receive mp(2)",
1607                             conn_t *, connp, mblk_t *, mp);
1608                         connp = NULL;
1609                 }
1610 
1611                 if (connp != NULL) {
1612                         /* Have a listener at least */
1613                         CONN_INC_REF(connp);
1614                         mutex_exit(&bind_connfp->connf_lock);
1615                         return (connp);
1616                 }
1617 
1618                 mutex_exit(&bind_connfp->connf_lock);
1619                 break;
1620 
1621         case IPPROTO_UDP:
1622                 lport = up[1];
1623                 fport = up[0];
1624                 connfp = &ipst->ips_ipcl_udp_fanout[IPCL_UDP_HASH(lport, ipst)];
1625                 mutex_enter(&connfp->connf_lock);
1626                 for (connp = connfp->connf_head; connp != NULL;
1627                     connp = connp->conn_next) {
1628                         if (IPCL_UDP_MATCH(connp, lport, ipha->ipha_dst,
1629                             fport, ipha->ipha_src) &&
1630                             (connp->conn_zoneid == zoneid ||
1631                             connp->conn_allzones ||
1632                             ((connp->conn_mac_mode != CONN_MAC_DEFAULT) &&
1633                             (ira->ira_flags & IRAF_TX_MAC_EXEMPTABLE))))
1634                                 break;
1635                 }
1636 
1637                 if (connp != NULL && (ira->ira_flags & IRAF_SYSTEM_LABELED) &&
1638                     !tsol_receive_local(mp, &ipha->ipha_dst, IPV4_VERSION,
1639                     ira, connp)) {
1640                         DTRACE_PROBE3(tx__ip__log__info__classify__udp,
1641                             char *, "connp(1) could not receive mp(2)",
1642                             conn_t *, connp, mblk_t *, mp);
1643                         connp = NULL;
1644                 }
1645 
1646                 if (connp != NULL) {
1647                         CONN_INC_REF(connp);
1648                         mutex_exit(&connfp->connf_lock);
1649                         return (connp);
1650                 }
1651 
1652                 /*
1653                  * We shouldn't come here for multicast/broadcast packets
1654                  */
1655                 mutex_exit(&connfp->connf_lock);
1656 
1657                 break;
1658 
1659         case IPPROTO_ENCAP:
1660         case IPPROTO_IPV6:
1661                 return (ipcl_iptun_classify_v4(&ipha->ipha_src,
1662                     &ipha->ipha_dst, ipst));
1663         }
1664 
1665         return (NULL);
1666 }
1667 
1668 conn_t *
1669 ipcl_classify_v6(mblk_t *mp, uint8_t protocol, uint_t hdr_len,
1670     ip_recv_attr_t *ira, ip_stack_t *ipst)
1671 {
1672         ip6_t           *ip6h;
1673         connf_t         *connfp, *bind_connfp;
1674         uint16_t        lport;
1675         uint16_t        fport;
1676         tcpha_t         *tcpha;
1677         uint32_t        ports;
1678         conn_t          *connp;
1679         uint16_t        *up;
1680         zoneid_t        zoneid = ira->ira_zoneid;
1681 
1682         ip6h = (ip6_t *)mp->b_rptr;
1683 
1684         switch (protocol) {
1685         case IPPROTO_TCP:
1686                 tcpha = (tcpha_t *)&mp->b_rptr[hdr_len];
1687                 up = &tcpha->tha_lport;
1688                 ports = *(uint32_t *)up;
1689 
1690                 connfp =
1691                     &ipst->ips_ipcl_conn_fanout[IPCL_CONN_HASH_V6(ip6h->ip6_src,
1692                     ports, ipst)];
1693                 mutex_enter(&connfp->connf_lock);
1694                 for (connp = connfp->connf_head; connp != NULL;
1695                     connp = connp->conn_next) {
1696                         if (IPCL_CONN_MATCH_V6(connp, protocol,
1697                             ip6h->ip6_src, ip6h->ip6_dst, ports) &&
1698                             (connp->conn_zoneid == zoneid ||
1699                             connp->conn_allzones ||
1700                             ((connp->conn_mac_mode != CONN_MAC_DEFAULT) &&
1701                             (ira->ira_flags & IRAF_TX_MAC_EXEMPTABLE) &&
1702                             (ira->ira_flags & IRAF_TX_SHARED_ADDR))))
1703                                 break;
1704                 }
1705 
1706                 if (connp != NULL) {
1707                         /*
1708                          * We have a fully-bound TCP connection.
1709                          *
1710                          * For labeled systems, there's no need to check the
1711                          * label here.  It's known to be good as we checked
1712                          * before allowing the connection to become bound.
1713                          */
1714                         CONN_INC_REF(connp);
1715                         mutex_exit(&connfp->connf_lock);
1716                         return (connp);
1717                 }
1718 
1719                 mutex_exit(&connfp->connf_lock);
1720 
1721                 lport = up[1];
1722                 bind_connfp =
1723                     &ipst->ips_ipcl_bind_fanout[IPCL_BIND_HASH(lport, ipst)];
1724                 mutex_enter(&bind_connfp->connf_lock);
1725                 for (connp = bind_connfp->connf_head; connp != NULL;
1726                     connp = connp->conn_next) {
1727                         if (IPCL_BIND_MATCH_V6(connp, protocol,
1728                             ip6h->ip6_dst, lport) &&
1729                             (connp->conn_zoneid == zoneid ||
1730                             connp->conn_allzones ||
1731                             ((connp->conn_mac_mode != CONN_MAC_DEFAULT) &&
1732                             (ira->ira_flags & IRAF_TX_MAC_EXEMPTABLE) &&
1733                             (ira->ira_flags & IRAF_TX_SHARED_ADDR))))
1734                                 break;
1735                 }
1736 
1737                 if (connp != NULL && (ira->ira_flags & IRAF_SYSTEM_LABELED) &&
1738                     !tsol_receive_local(mp, &ip6h->ip6_dst, IPV6_VERSION,
1739                     ira, connp)) {
1740                         DTRACE_PROBE3(tx__ip__log__info__classify__tcp6,
1741                             char *, "connp(1) could not receive mp(2)",
1742                             conn_t *, connp, mblk_t *, mp);
1743                         connp = NULL;
1744                 }
1745 
1746                 if (connp != NULL) {
1747                         /* Have a listner at least */
1748                         CONN_INC_REF(connp);
1749                         mutex_exit(&bind_connfp->connf_lock);
1750                         return (connp);
1751                 }
1752 
1753                 mutex_exit(&bind_connfp->connf_lock);
1754                 break;
1755 
1756         case IPPROTO_UDP:
1757                 up = (uint16_t *)&mp->b_rptr[hdr_len];
1758                 lport = up[1];
1759                 fport = up[0];
1760                 connfp = &ipst->ips_ipcl_udp_fanout[IPCL_UDP_HASH(lport, ipst)];
1761                 mutex_enter(&connfp->connf_lock);
1762                 for (connp = connfp->connf_head; connp != NULL;
1763                     connp = connp->conn_next) {
1764                         if (IPCL_UDP_MATCH_V6(connp, lport, ip6h->ip6_dst,
1765                             fport, ip6h->ip6_src) &&
1766                             (connp->conn_zoneid == zoneid ||
1767                             connp->conn_allzones ||
1768                             ((connp->conn_mac_mode != CONN_MAC_DEFAULT) &&
1769                             (ira->ira_flags & IRAF_TX_MAC_EXEMPTABLE) &&
1770                             (ira->ira_flags & IRAF_TX_SHARED_ADDR))))
1771                                 break;
1772                 }
1773 
1774                 if (connp != NULL && (ira->ira_flags & IRAF_SYSTEM_LABELED) &&
1775                     !tsol_receive_local(mp, &ip6h->ip6_dst, IPV6_VERSION,
1776                     ira, connp)) {
1777                         DTRACE_PROBE3(tx__ip__log__info__classify__udp6,
1778                             char *, "connp(1) could not receive mp(2)",
1779                             conn_t *, connp, mblk_t *, mp);
1780                         connp = NULL;
1781                 }
1782 
1783                 if (connp != NULL) {
1784                         CONN_INC_REF(connp);
1785                         mutex_exit(&connfp->connf_lock);
1786                         return (connp);
1787                 }
1788 
1789                 /*
1790                  * We shouldn't come here for multicast/broadcast packets
1791                  */
1792                 mutex_exit(&connfp->connf_lock);
1793                 break;
1794         case IPPROTO_ENCAP:
1795         case IPPROTO_IPV6:
1796                 return (ipcl_iptun_classify_v6(&ip6h->ip6_src,
1797                     &ip6h->ip6_dst, ipst));
1798         }
1799 
1800         return (NULL);
1801 }
1802 
1803 /*
1804  * wrapper around ipcl_classify_(v4,v6) routines.
1805  */
1806 conn_t *
1807 ipcl_classify(mblk_t *mp, ip_recv_attr_t *ira, ip_stack_t *ipst)
1808 {
1809         if (ira->ira_flags & IRAF_IS_IPV4) {
1810                 return (ipcl_classify_v4(mp, ira->ira_protocol,
1811                     ira->ira_ip_hdr_length, ira, ipst));
1812         } else {
1813                 return (ipcl_classify_v6(mp, ira->ira_protocol,
1814                     ira->ira_ip_hdr_length, ira, ipst));
1815         }
1816 }
1817 
1818 /*
1819  * Only used to classify SCTP RAW sockets
1820  */
1821 conn_t *
1822 ipcl_classify_raw(mblk_t *mp, uint8_t protocol, uint32_t ports,
1823     ipha_t *ipha, ip6_t *ip6h, ip_recv_attr_t *ira, ip_stack_t *ipst)
1824 {
1825         connf_t         *connfp;
1826         conn_t          *connp;
1827         in_port_t       lport;
1828         int             ipversion;
1829         const void      *dst;
1830         zoneid_t        zoneid = ira->ira_zoneid;
1831 
1832         lport = ((uint16_t *)&ports)[1];
1833         if (ira->ira_flags & IRAF_IS_IPV4) {
1834                 dst = (const void *)&ipha->ipha_dst;
1835                 ipversion = IPV4_VERSION;
1836         } else {
1837                 dst = (const void *)&ip6h->ip6_dst;
1838                 ipversion = IPV6_VERSION;
1839         }
1840 
1841         connfp = &ipst->ips_ipcl_raw_fanout[IPCL_RAW_HASH(ntohs(lport), ipst)];
1842         mutex_enter(&connfp->connf_lock);
1843         for (connp = connfp->connf_head; connp != NULL;
1844             connp = connp->conn_next) {
1845                 /* We don't allow v4 fallback for v6 raw socket. */
1846                 if (ipversion != connp->conn_ipversion)
1847                         continue;
1848                 if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_faddr_v6) &&
1849                     !IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_faddr_v6)) {
1850                         if (ipversion == IPV4_VERSION) {
1851                                 if (!IPCL_CONN_MATCH(connp, protocol,
1852                                     ipha->ipha_src, ipha->ipha_dst, ports))
1853                                         continue;
1854                         } else {
1855                                 if (!IPCL_CONN_MATCH_V6(connp, protocol,
1856                                     ip6h->ip6_src, ip6h->ip6_dst, ports))
1857                                         continue;
1858                         }
1859                 } else {
1860                         if (ipversion == IPV4_VERSION) {
1861                                 if (!IPCL_BIND_MATCH(connp, protocol,
1862                                     ipha->ipha_dst, lport))
1863                                         continue;
1864                         } else {
1865                                 if (!IPCL_BIND_MATCH_V6(connp, protocol,
1866                                     ip6h->ip6_dst, lport))
1867                                         continue;
1868                         }
1869                 }
1870 
1871                 if (connp->conn_zoneid == zoneid ||
1872                     connp->conn_allzones ||
1873                     ((connp->conn_mac_mode != CONN_MAC_DEFAULT) &&
1874                     (ira->ira_flags & IRAF_TX_MAC_EXEMPTABLE) &&
1875                     (ira->ira_flags & IRAF_TX_SHARED_ADDR)))
1876                         break;
1877         }
1878 
1879         if (connp != NULL && (ira->ira_flags & IRAF_SYSTEM_LABELED) &&
1880             !tsol_receive_local(mp, dst, ipversion, ira, connp)) {
1881                 DTRACE_PROBE3(tx__ip__log__info__classify__rawip,
1882                     char *, "connp(1) could not receive mp(2)",
1883                     conn_t *, connp, mblk_t *, mp);
1884                 connp = NULL;
1885         }
1886 
1887         if (connp != NULL)
1888                 goto found;
1889         mutex_exit(&connfp->connf_lock);
1890 
1891         /* Try to look for a wildcard SCTP RAW socket match. */
1892         connfp = &ipst->ips_ipcl_raw_fanout[IPCL_RAW_HASH(0, ipst)];
1893         mutex_enter(&connfp->connf_lock);
1894         for (connp = connfp->connf_head; connp != NULL;
1895             connp = connp->conn_next) {
1896                 /* We don't allow v4 fallback for v6 raw socket. */
1897                 if (ipversion != connp->conn_ipversion)
1898                         continue;
1899                 if (!IPCL_ZONE_MATCH(connp, zoneid))
1900                         continue;
1901 
1902                 if (ipversion == IPV4_VERSION) {
1903                         if (IPCL_RAW_MATCH(connp, protocol, ipha->ipha_dst))
1904                                 break;
1905                 } else {
1906                         if (IPCL_RAW_MATCH_V6(connp, protocol, ip6h->ip6_dst)) {
1907                                 break;
1908                         }
1909                 }
1910         }
1911 
1912         if (connp != NULL)
1913                 goto found;
1914 
1915         mutex_exit(&connfp->connf_lock);
1916         return (NULL);
1917 
1918 found:
1919         ASSERT(connp != NULL);
1920         CONN_INC_REF(connp);
1921         mutex_exit(&connfp->connf_lock);
1922         return (connp);
1923 }
1924 
1925 /* ARGSUSED */
1926 static int
1927 tcp_conn_constructor(void *buf, void *cdrarg, int kmflags)
1928 {
1929         itc_t   *itc = (itc_t *)buf;
1930         conn_t  *connp = &itc->itc_conn;
1931         tcp_t   *tcp = (tcp_t *)&itc[1];
1932 
1933         bzero(connp, sizeof (conn_t));
1934         bzero(tcp, sizeof (tcp_t));
1935 
1936         mutex_init(&connp->conn_lock, NULL, MUTEX_DEFAULT, NULL);
1937         cv_init(&connp->conn_cv, NULL, CV_DEFAULT, NULL);
1938         cv_init(&connp->conn_sq_cv, NULL, CV_DEFAULT, NULL);
1939         tcp->tcp_timercache = tcp_timermp_alloc(kmflags);
1940         if (tcp->tcp_timercache == NULL)
1941                 return (ENOMEM);
1942         connp->conn_tcp = tcp;
1943         connp->conn_flags = IPCL_TCPCONN;
1944         connp->conn_proto = IPPROTO_TCP;
1945         tcp->tcp_connp = connp;
1946         rw_init(&connp->conn_ilg_lock, NULL, RW_DEFAULT, NULL);
1947 
1948         connp->conn_ixa = kmem_zalloc(sizeof (ip_xmit_attr_t), kmflags);
1949         if (connp->conn_ixa == NULL) {
1950                 tcp_timermp_free(tcp);
1951                 return (ENOMEM);
1952         }
1953         connp->conn_ixa->ixa_refcnt = 1;
1954         connp->conn_ixa->ixa_protocol = connp->conn_proto;
1955         connp->conn_ixa->ixa_xmit_hint = CONN_TO_XMIT_HINT(connp);
1956         return (0);
1957 }
1958 
1959 /* ARGSUSED */
1960 static void
1961 tcp_conn_destructor(void *buf, void *cdrarg)
1962 {
1963         itc_t   *itc = (itc_t *)buf;
1964         conn_t  *connp = &itc->itc_conn;
1965         tcp_t   *tcp = (tcp_t *)&itc[1];
1966 
1967         ASSERT(connp->conn_flags & IPCL_TCPCONN);
1968         ASSERT(tcp->tcp_connp == connp);
1969         ASSERT(connp->conn_tcp == tcp);
1970         tcp_timermp_free(tcp);
1971         mutex_destroy(&connp->conn_lock);
1972         cv_destroy(&connp->conn_cv);
1973         cv_destroy(&connp->conn_sq_cv);
1974         rw_destroy(&connp->conn_ilg_lock);
1975 
1976         /* Can be NULL if constructor failed */
1977         if (connp->conn_ixa != NULL) {
1978                 ASSERT(connp->conn_ixa->ixa_refcnt == 1);
1979                 ASSERT(connp->conn_ixa->ixa_ire == NULL);
1980                 ASSERT(connp->conn_ixa->ixa_nce == NULL);
1981                 ixa_refrele(connp->conn_ixa);
1982         }
1983 }
1984 
1985 /* ARGSUSED */
1986 static int
1987 ip_conn_constructor(void *buf, void *cdrarg, int kmflags)
1988 {
1989         itc_t   *itc = (itc_t *)buf;
1990         conn_t  *connp = &itc->itc_conn;
1991 
1992         bzero(connp, sizeof (conn_t));
1993         mutex_init(&connp->conn_lock, NULL, MUTEX_DEFAULT, NULL);
1994         cv_init(&connp->conn_cv, NULL, CV_DEFAULT, NULL);
1995         connp->conn_flags = IPCL_IPCCONN;
1996         rw_init(&connp->conn_ilg_lock, NULL, RW_DEFAULT, NULL);
1997 
1998         connp->conn_ixa = kmem_zalloc(sizeof (ip_xmit_attr_t), kmflags);
1999         if (connp->conn_ixa == NULL)
2000                 return (ENOMEM);
2001         connp->conn_ixa->ixa_refcnt = 1;
2002         connp->conn_ixa->ixa_xmit_hint = CONN_TO_XMIT_HINT(connp);
2003         return (0);
2004 }
2005 
2006 /* ARGSUSED */
2007 static void
2008 ip_conn_destructor(void *buf, void *cdrarg)
2009 {
2010         itc_t   *itc = (itc_t *)buf;
2011         conn_t  *connp = &itc->itc_conn;
2012 
2013         ASSERT(connp->conn_flags & IPCL_IPCCONN);
2014         ASSERT(connp->conn_priv == NULL);
2015         mutex_destroy(&connp->conn_lock);
2016         cv_destroy(&connp->conn_cv);
2017         rw_destroy(&connp->conn_ilg_lock);
2018 
2019         /* Can be NULL if constructor failed */
2020         if (connp->conn_ixa != NULL) {
2021                 ASSERT(connp->conn_ixa->ixa_refcnt == 1);
2022                 ASSERT(connp->conn_ixa->ixa_ire == NULL);
2023                 ASSERT(connp->conn_ixa->ixa_nce == NULL);
2024                 ixa_refrele(connp->conn_ixa);
2025         }
2026 }
2027 
2028 /* ARGSUSED */
2029 static int
2030 udp_conn_constructor(void *buf, void *cdrarg, int kmflags)
2031 {
2032         itc_t   *itc = (itc_t *)buf;
2033         conn_t  *connp = &itc->itc_conn;
2034         udp_t   *udp = (udp_t *)&itc[1];
2035 
2036         bzero(connp, sizeof (conn_t));
2037         bzero(udp, sizeof (udp_t));
2038 
2039         mutex_init(&connp->conn_lock, NULL, MUTEX_DEFAULT, NULL);
2040         cv_init(&connp->conn_cv, NULL, CV_DEFAULT, NULL);
2041         connp->conn_udp = udp;
2042         connp->conn_flags = IPCL_UDPCONN;
2043         connp->conn_proto = IPPROTO_UDP;
2044         udp->udp_connp = connp;
2045         rw_init(&connp->conn_ilg_lock, NULL, RW_DEFAULT, NULL);
2046         connp->conn_ixa = kmem_zalloc(sizeof (ip_xmit_attr_t), kmflags);
2047         if (connp->conn_ixa == NULL)
2048                 return (ENOMEM);
2049         connp->conn_ixa->ixa_refcnt = 1;
2050         connp->conn_ixa->ixa_protocol = connp->conn_proto;
2051         connp->conn_ixa->ixa_xmit_hint = CONN_TO_XMIT_HINT(connp);
2052         return (0);
2053 }
2054 
2055 /* ARGSUSED */
2056 static void
2057 udp_conn_destructor(void *buf, void *cdrarg)
2058 {
2059         itc_t   *itc = (itc_t *)buf;
2060         conn_t  *connp = &itc->itc_conn;
2061         udp_t   *udp = (udp_t *)&itc[1];
2062 
2063         ASSERT(connp->conn_flags & IPCL_UDPCONN);
2064         ASSERT(udp->udp_connp == connp);
2065         ASSERT(connp->conn_udp == udp);
2066         mutex_destroy(&connp->conn_lock);
2067         cv_destroy(&connp->conn_cv);
2068         rw_destroy(&connp->conn_ilg_lock);
2069 
2070         /* Can be NULL if constructor failed */
2071         if (connp->conn_ixa != NULL) {
2072                 ASSERT(connp->conn_ixa->ixa_refcnt == 1);
2073                 ASSERT(connp->conn_ixa->ixa_ire == NULL);
2074                 ASSERT(connp->conn_ixa->ixa_nce == NULL);
2075                 ixa_refrele(connp->conn_ixa);
2076         }
2077 }
2078 
2079 /* ARGSUSED */
2080 static int
2081 rawip_conn_constructor(void *buf, void *cdrarg, int kmflags)
2082 {
2083         itc_t   *itc = (itc_t *)buf;
2084         conn_t  *connp = &itc->itc_conn;
2085         icmp_t  *icmp = (icmp_t *)&itc[1];
2086 
2087         bzero(connp, sizeof (conn_t));
2088         bzero(icmp, sizeof (icmp_t));
2089 
2090         mutex_init(&connp->conn_lock, NULL, MUTEX_DEFAULT, NULL);
2091         cv_init(&connp->conn_cv, NULL, CV_DEFAULT, NULL);
2092         connp->conn_icmp = icmp;
2093         connp->conn_flags = IPCL_RAWIPCONN;
2094         connp->conn_proto = IPPROTO_ICMP;
2095         icmp->icmp_connp = connp;
2096         rw_init(&connp->conn_ilg_lock, NULL, RW_DEFAULT, NULL);
2097         connp->conn_ixa = kmem_zalloc(sizeof (ip_xmit_attr_t), kmflags);
2098         if (connp->conn_ixa == NULL)
2099                 return (ENOMEM);
2100         connp->conn_ixa->ixa_refcnt = 1;
2101         connp->conn_ixa->ixa_protocol = connp->conn_proto;
2102         connp->conn_ixa->ixa_xmit_hint = CONN_TO_XMIT_HINT(connp);
2103         return (0);
2104 }
2105 
2106 /* ARGSUSED */
2107 static void
2108 rawip_conn_destructor(void *buf, void *cdrarg)
2109 {
2110         itc_t   *itc = (itc_t *)buf;
2111         conn_t  *connp = &itc->itc_conn;
2112         icmp_t  *icmp = (icmp_t *)&itc[1];
2113 
2114         ASSERT(connp->conn_flags & IPCL_RAWIPCONN);
2115         ASSERT(icmp->icmp_connp == connp);
2116         ASSERT(connp->conn_icmp == icmp);
2117         mutex_destroy(&connp->conn_lock);
2118         cv_destroy(&connp->conn_cv);
2119         rw_destroy(&connp->conn_ilg_lock);
2120 
2121         /* Can be NULL if constructor failed */
2122         if (connp->conn_ixa != NULL) {
2123                 ASSERT(connp->conn_ixa->ixa_refcnt == 1);
2124                 ASSERT(connp->conn_ixa->ixa_ire == NULL);
2125                 ASSERT(connp->conn_ixa->ixa_nce == NULL);
2126                 ixa_refrele(connp->conn_ixa);
2127         }
2128 }
2129 
2130 /* ARGSUSED */
2131 static int
2132 rts_conn_constructor(void *buf, void *cdrarg, int kmflags)
2133 {
2134         itc_t   *itc = (itc_t *)buf;
2135         conn_t  *connp = &itc->itc_conn;
2136         rts_t   *rts = (rts_t *)&itc[1];
2137 
2138         bzero(connp, sizeof (conn_t));
2139         bzero(rts, sizeof (rts_t));
2140 
2141         mutex_init(&connp->conn_lock, NULL, MUTEX_DEFAULT, NULL);
2142         cv_init(&connp->conn_cv, NULL, CV_DEFAULT, NULL);
2143         connp->conn_rts = rts;
2144         connp->conn_flags = IPCL_RTSCONN;
2145         rts->rts_connp = connp;
2146         rw_init(&connp->conn_ilg_lock, NULL, RW_DEFAULT, NULL);
2147         connp->conn_ixa = kmem_zalloc(sizeof (ip_xmit_attr_t), kmflags);
2148         if (connp->conn_ixa == NULL)
2149                 return (ENOMEM);
2150         connp->conn_ixa->ixa_refcnt = 1;
2151         connp->conn_ixa->ixa_xmit_hint = CONN_TO_XMIT_HINT(connp);
2152         return (0);
2153 }
2154 
2155 /* ARGSUSED */
2156 static void
2157 rts_conn_destructor(void *buf, void *cdrarg)
2158 {
2159         itc_t   *itc = (itc_t *)buf;
2160         conn_t  *connp = &itc->itc_conn;
2161         rts_t   *rts = (rts_t *)&itc[1];
2162 
2163         ASSERT(connp->conn_flags & IPCL_RTSCONN);
2164         ASSERT(rts->rts_connp == connp);
2165         ASSERT(connp->conn_rts == rts);
2166         mutex_destroy(&connp->conn_lock);
2167         cv_destroy(&connp->conn_cv);
2168         rw_destroy(&connp->conn_ilg_lock);
2169 
2170         /* Can be NULL if constructor failed */
2171         if (connp->conn_ixa != NULL) {
2172                 ASSERT(connp->conn_ixa->ixa_refcnt == 1);
2173                 ASSERT(connp->conn_ixa->ixa_ire == NULL);
2174                 ASSERT(connp->conn_ixa->ixa_nce == NULL);
2175                 ixa_refrele(connp->conn_ixa);
2176         }
2177 }
2178 
2179 /*
2180  * Called as part of ipcl_conn_destroy to assert and clear any pointers
2181  * in the conn_t.
2182  *
2183  * Below we list all the pointers in the conn_t as a documentation aid.
2184  * The ones that we can not ASSERT to be NULL are #ifdef'ed out.
2185  * If you add any pointers to the conn_t please add an ASSERT here
2186  * and #ifdef it out if it can't be actually asserted to be NULL.
2187  * In any case, we bzero most of the conn_t at the end of the function.
2188  */
2189 void
2190 ipcl_conn_cleanup(conn_t *connp)
2191 {
2192         ip_xmit_attr_t  *ixa;
2193 
2194         ASSERT(connp->conn_latch == NULL);
2195         ASSERT(connp->conn_latch_in_policy == NULL);
2196         ASSERT(connp->conn_latch_in_action == NULL);
2197 #ifdef notdef
2198         ASSERT(connp->conn_rq == NULL);
2199         ASSERT(connp->conn_wq == NULL);
2200 #endif
2201         ASSERT(connp->conn_cred == NULL);
2202         ASSERT(connp->conn_g_fanout == NULL);
2203         ASSERT(connp->conn_g_next == NULL);
2204         ASSERT(connp->conn_g_prev == NULL);
2205         ASSERT(connp->conn_policy == NULL);
2206         ASSERT(connp->conn_fanout == NULL);
2207         ASSERT(connp->conn_next == NULL);
2208         ASSERT(connp->conn_prev == NULL);
2209         ASSERT(connp->conn_oper_pending_ill == NULL);
2210         ASSERT(connp->conn_ilg == NULL);
2211         ASSERT(connp->conn_drain_next == NULL);
2212         ASSERT(connp->conn_drain_prev == NULL);
2213 #ifdef notdef
2214         /* conn_idl is not cleared when removed from idl list */
2215         ASSERT(connp->conn_idl == NULL);
2216 #endif
2217         ASSERT(connp->conn_ipsec_opt_mp == NULL);
2218 #ifdef notdef
2219         /* conn_netstack is cleared by the caller; needed by ixa_cleanup */
2220         ASSERT(connp->conn_netstack == NULL);
2221 #endif
2222 
2223         ASSERT(connp->conn_helper_info == NULL);
2224         ASSERT(connp->conn_ixa != NULL);
2225         ixa = connp->conn_ixa;
2226         ASSERT(ixa->ixa_refcnt == 1);
2227         /* Need to preserve ixa_protocol */
2228         ixa_cleanup(ixa);
2229         ixa->ixa_flags = 0;
2230 
2231         /* Clear out the conn_t fields that are not preserved */
2232         bzero(&connp->conn_start_clr,
2233             sizeof (conn_t) -
2234             ((uchar_t *)&connp->conn_start_clr - (uchar_t *)connp));
2235 }
2236 
2237 /*
2238  * All conns are inserted in a global multi-list for the benefit of
2239  * walkers. The walk is guaranteed to walk all open conns at the time
2240  * of the start of the walk exactly once. This property is needed to
2241  * achieve some cleanups during unplumb of interfaces. This is achieved
2242  * as follows.
2243  *
2244  * ipcl_conn_create and ipcl_conn_destroy are the only functions that
2245  * call the insert and delete functions below at creation and deletion
2246  * time respectively. The conn never moves or changes its position in this
2247  * multi-list during its lifetime. CONN_CONDEMNED ensures that the refcnt
2248  * won't increase due to walkers, once the conn deletion has started. Note
2249  * that we can't remove the conn from the global list and then wait for
2250  * the refcnt to drop to zero, since walkers would then see a truncated
2251  * list. CONN_INCIPIENT ensures that walkers don't start looking at
2252  * conns until ip_open is ready to make them globally visible.
2253  * The global round robin multi-list locks are held only to get the
2254  * next member/insertion/deletion and contention should be negligible
2255  * if the multi-list is much greater than the number of cpus.
2256  */
2257 void
2258 ipcl_globalhash_insert(conn_t *connp)
2259 {
2260         int     index;
2261         struct connf_s  *connfp;
2262         ip_stack_t      *ipst = connp->conn_netstack->netstack_ip;
2263 
2264         /*
2265          * No need for atomic here. Approximate even distribution
2266          * in the global lists is sufficient.
2267          */
2268         ipst->ips_conn_g_index++;
2269         index = ipst->ips_conn_g_index & (CONN_G_HASH_SIZE - 1);
2270 
2271         connp->conn_g_prev = NULL;
2272         /*
2273          * Mark as INCIPIENT, so that walkers will ignore this
2274          * for now, till ip_open is ready to make it visible globally.
2275          */
2276         connp->conn_state_flags |= CONN_INCIPIENT;
2277 
2278         connfp = &ipst->ips_ipcl_globalhash_fanout[index];
2279         /* Insert at the head of the list */
2280         mutex_enter(&connfp->connf_lock);
2281         connp->conn_g_next = connfp->connf_head;
2282         if (connp->conn_g_next != NULL)
2283                 connp->conn_g_next->conn_g_prev = connp;
2284         connfp->connf_head = connp;
2285 
2286         /* The fanout bucket this conn points to */
2287         connp->conn_g_fanout = connfp;
2288 
2289         mutex_exit(&connfp->connf_lock);
2290 }
2291 
2292 void
2293 ipcl_globalhash_remove(conn_t *connp)
2294 {
2295         struct connf_s  *connfp;
2296 
2297         /*
2298          * We were never inserted in the global multi list.
2299          * IPCL_NONE variety is never inserted in the global multilist
2300          * since it is presumed to not need any cleanup and is transient.
2301          */
2302         if (connp->conn_g_fanout == NULL)
2303                 return;
2304 
2305         connfp = connp->conn_g_fanout;
2306         mutex_enter(&connfp->connf_lock);
2307         if (connp->conn_g_prev != NULL)
2308                 connp->conn_g_prev->conn_g_next = connp->conn_g_next;
2309         else
2310                 connfp->connf_head = connp->conn_g_next;
2311         if (connp->conn_g_next != NULL)
2312                 connp->conn_g_next->conn_g_prev = connp->conn_g_prev;
2313         mutex_exit(&connfp->connf_lock);
2314 
2315         /* Better to stumble on a null pointer than to corrupt memory */
2316         connp->conn_g_next = NULL;
2317         connp->conn_g_prev = NULL;
2318         connp->conn_g_fanout = NULL;
2319 }
2320 
2321 /*
2322  * Walk the list of all conn_t's in the system, calling the function provided
2323  * With the specified argument for each.
2324  * Applies to both IPv4 and IPv6.
2325  *
2326  * CONNs may hold pointers to ills (conn_dhcpinit_ill and
2327  * conn_oper_pending_ill). To guard against stale pointers
2328  * ipcl_walk() is called to cleanup the conn_t's, typically when an interface is
2329  * unplumbed or removed. New conn_t's that are created while we are walking
2330  * may be missed by this walk, because they are not necessarily inserted
2331  * at the tail of the list. They are new conn_t's and thus don't have any
2332  * stale pointers. The CONN_CLOSING flag ensures that no new reference
2333  * is created to the struct that is going away.
2334  */
2335 void
2336 ipcl_walk(pfv_t func, void *arg, ip_stack_t *ipst)
2337 {
2338         int     i;
2339         conn_t  *connp;
2340         conn_t  *prev_connp;
2341 
2342         for (i = 0; i < CONN_G_HASH_SIZE; i++) {
2343                 mutex_enter(&ipst->ips_ipcl_globalhash_fanout[i].connf_lock);
2344                 prev_connp = NULL;
2345                 connp = ipst->ips_ipcl_globalhash_fanout[i].connf_head;
2346                 while (connp != NULL) {
2347                         mutex_enter(&connp->conn_lock);
2348                         if (connp->conn_state_flags &
2349                             (CONN_CONDEMNED | CONN_INCIPIENT)) {
2350                                 mutex_exit(&connp->conn_lock);
2351                                 connp = connp->conn_g_next;
2352                                 continue;
2353                         }
2354                         CONN_INC_REF_LOCKED(connp);
2355                         mutex_exit(&connp->conn_lock);
2356                         mutex_exit(
2357                             &ipst->ips_ipcl_globalhash_fanout[i].connf_lock);
2358                         (*func)(connp, arg);
2359                         if (prev_connp != NULL)
2360                                 CONN_DEC_REF(prev_connp);
2361                         mutex_enter(
2362                             &ipst->ips_ipcl_globalhash_fanout[i].connf_lock);
2363                         prev_connp = connp;
2364                         connp = connp->conn_g_next;
2365                 }
2366                 mutex_exit(&ipst->ips_ipcl_globalhash_fanout[i].connf_lock);
2367                 if (prev_connp != NULL)
2368                         CONN_DEC_REF(prev_connp);
2369         }
2370 }
2371 
2372 /*
2373  * Search for a peer TCP/IPv4 loopback conn by doing a reverse lookup on
2374  * the {src, dst, lport, fport} quadruplet.  Returns with conn reference
2375  * held; caller must call CONN_DEC_REF.  Only checks for connected entries
2376  * (peer tcp in ESTABLISHED state).
2377  */
2378 conn_t *
2379 ipcl_conn_tcp_lookup_reversed_ipv4(conn_t *connp, ipha_t *ipha, tcpha_t *tcpha,
2380     ip_stack_t *ipst)
2381 {
2382         uint32_t ports;
2383         uint16_t *pports = (uint16_t *)&ports;
2384         connf_t *connfp;
2385         conn_t  *tconnp;
2386         boolean_t zone_chk;
2387 
2388         /*
2389          * If either the source of destination address is loopback, then
2390          * both endpoints must be in the same Zone.  Otherwise, both of
2391          * the addresses are system-wide unique (tcp is in ESTABLISHED
2392          * state) and the endpoints may reside in different Zones.
2393          */
2394         zone_chk = (ipha->ipha_src == htonl(INADDR_LOOPBACK) ||
2395             ipha->ipha_dst == htonl(INADDR_LOOPBACK));
2396 
2397         pports[0] = tcpha->tha_fport;
2398         pports[1] = tcpha->tha_lport;
2399 
2400         connfp = &ipst->ips_ipcl_conn_fanout[IPCL_CONN_HASH(ipha->ipha_dst,
2401             ports, ipst)];
2402 
2403         mutex_enter(&connfp->connf_lock);
2404         for (tconnp = connfp->connf_head; tconnp != NULL;
2405             tconnp = tconnp->conn_next) {
2406 
2407                 if (IPCL_CONN_MATCH(tconnp, IPPROTO_TCP,
2408                     ipha->ipha_dst, ipha->ipha_src, ports) &&
2409                     tconnp->conn_tcp->tcp_state == TCPS_ESTABLISHED &&
2410                     (!zone_chk || tconnp->conn_zoneid == connp->conn_zoneid)) {
2411 
2412                         ASSERT(tconnp != connp);
2413                         CONN_INC_REF(tconnp);
2414                         mutex_exit(&connfp->connf_lock);
2415                         return (tconnp);
2416                 }
2417         }
2418         mutex_exit(&connfp->connf_lock);
2419         return (NULL);
2420 }
2421 
2422 /*
2423  * Search for a peer TCP/IPv6 loopback conn by doing a reverse lookup on
2424  * the {src, dst, lport, fport} quadruplet.  Returns with conn reference
2425  * held; caller must call CONN_DEC_REF.  Only checks for connected entries
2426  * (peer tcp in ESTABLISHED state).
2427  */
2428 conn_t *
2429 ipcl_conn_tcp_lookup_reversed_ipv6(conn_t *connp, ip6_t *ip6h, tcpha_t *tcpha,
2430     ip_stack_t *ipst)
2431 {
2432         uint32_t ports;
2433         uint16_t *pports = (uint16_t *)&ports;
2434         connf_t *connfp;
2435         conn_t  *tconnp;
2436         boolean_t zone_chk;
2437 
2438         /*
2439          * If either the source of destination address is loopback, then
2440          * both endpoints must be in the same Zone.  Otherwise, both of
2441          * the addresses are system-wide unique (tcp is in ESTABLISHED
2442          * state) and the endpoints may reside in different Zones.  We
2443          * don't do Zone check for link local address(es) because the
2444          * current Zone implementation treats each link local address as
2445          * being unique per system node, i.e. they belong to global Zone.
2446          */
2447         zone_chk = (IN6_IS_ADDR_LOOPBACK(&ip6h->ip6_src) ||
2448             IN6_IS_ADDR_LOOPBACK(&ip6h->ip6_dst));
2449 
2450         pports[0] = tcpha->tha_fport;
2451         pports[1] = tcpha->tha_lport;
2452 
2453         connfp = &ipst->ips_ipcl_conn_fanout[IPCL_CONN_HASH_V6(ip6h->ip6_dst,
2454             ports, ipst)];
2455 
2456         mutex_enter(&connfp->connf_lock);
2457         for (tconnp = connfp->connf_head; tconnp != NULL;
2458             tconnp = tconnp->conn_next) {
2459 
2460                 /* We skip conn_bound_if check here as this is loopback tcp */
2461                 if (IPCL_CONN_MATCH_V6(tconnp, IPPROTO_TCP,
2462                     ip6h->ip6_dst, ip6h->ip6_src, ports) &&
2463                     tconnp->conn_tcp->tcp_state == TCPS_ESTABLISHED &&
2464                     (!zone_chk || tconnp->conn_zoneid == connp->conn_zoneid)) {
2465 
2466                         ASSERT(tconnp != connp);
2467                         CONN_INC_REF(tconnp);
2468                         mutex_exit(&connfp->connf_lock);
2469                         return (tconnp);
2470                 }
2471         }
2472         mutex_exit(&connfp->connf_lock);
2473         return (NULL);
2474 }
2475 
2476 /*
2477  * Find an exact {src, dst, lport, fport} match for a bounced datagram.
2478  * Returns with conn reference held. Caller must call CONN_DEC_REF.
2479  * Only checks for connected entries i.e. no INADDR_ANY checks.
2480  */
2481 conn_t *
2482 ipcl_tcp_lookup_reversed_ipv4(ipha_t *ipha, tcpha_t *tcpha, int min_state,
2483     ip_stack_t *ipst)
2484 {
2485         uint32_t ports;
2486         uint16_t *pports;
2487         connf_t *connfp;
2488         conn_t  *tconnp;
2489 
2490         pports = (uint16_t *)&ports;
2491         pports[0] = tcpha->tha_fport;
2492         pports[1] = tcpha->tha_lport;
2493 
2494         connfp = &ipst->ips_ipcl_conn_fanout[IPCL_CONN_HASH(ipha->ipha_dst,
2495             ports, ipst)];
2496 
2497         mutex_enter(&connfp->connf_lock);
2498         for (tconnp = connfp->connf_head; tconnp != NULL;
2499             tconnp = tconnp->conn_next) {
2500 
2501                 if (IPCL_CONN_MATCH(tconnp, IPPROTO_TCP,
2502                     ipha->ipha_dst, ipha->ipha_src, ports) &&
2503                     tconnp->conn_tcp->tcp_state >= min_state) {
2504 
2505                         CONN_INC_REF(tconnp);
2506                         mutex_exit(&connfp->connf_lock);
2507                         return (tconnp);
2508                 }
2509         }
2510         mutex_exit(&connfp->connf_lock);
2511         return (NULL);
2512 }
2513 
2514 /*
2515  * Find an exact {src, dst, lport, fport} match for a bounced datagram.
2516  * Returns with conn reference held. Caller must call CONN_DEC_REF.
2517  * Only checks for connected entries i.e. no INADDR_ANY checks.
2518  * Match on ifindex in addition to addresses.
2519  */
2520 conn_t *
2521 ipcl_tcp_lookup_reversed_ipv6(ip6_t *ip6h, tcpha_t *tcpha, int min_state,
2522     uint_t ifindex, ip_stack_t *ipst)
2523 {
2524         tcp_t   *tcp;
2525         uint32_t ports;
2526         uint16_t *pports;
2527         connf_t *connfp;
2528         conn_t  *tconnp;
2529 
2530         pports = (uint16_t *)&ports;
2531         pports[0] = tcpha->tha_fport;
2532         pports[1] = tcpha->tha_lport;
2533 
2534         connfp = &ipst->ips_ipcl_conn_fanout[IPCL_CONN_HASH_V6(ip6h->ip6_dst,
2535             ports, ipst)];
2536 
2537         mutex_enter(&connfp->connf_lock);
2538         for (tconnp = connfp->connf_head; tconnp != NULL;
2539             tconnp = tconnp->conn_next) {
2540 
2541                 tcp = tconnp->conn_tcp;
2542                 if (IPCL_CONN_MATCH_V6(tconnp, IPPROTO_TCP,
2543                     ip6h->ip6_dst, ip6h->ip6_src, ports) &&
2544                     tcp->tcp_state >= min_state &&
2545                     (tconnp->conn_bound_if == 0 ||
2546                     tconnp->conn_bound_if == ifindex)) {
2547 
2548                         CONN_INC_REF(tconnp);
2549                         mutex_exit(&connfp->connf_lock);
2550                         return (tconnp);
2551                 }
2552         }
2553         mutex_exit(&connfp->connf_lock);
2554         return (NULL);
2555 }
2556 
2557 /*
2558  * Finds a TCP/IPv4 listening connection; called by tcp_disconnect to locate
2559  * a listener when changing state.
2560  */
2561 conn_t *
2562 ipcl_lookup_listener_v4(uint16_t lport, ipaddr_t laddr, zoneid_t zoneid,
2563     ip_stack_t *ipst)
2564 {
2565         connf_t         *bind_connfp;
2566         conn_t          *connp;
2567         tcp_t           *tcp;
2568 
2569         /*
2570          * Avoid false matches for packets sent to an IP destination of
2571          * all zeros.
2572          */
2573         if (laddr == 0)
2574                 return (NULL);
2575 
2576         ASSERT(zoneid != ALL_ZONES);
2577 
2578         bind_connfp = &ipst->ips_ipcl_bind_fanout[IPCL_BIND_HASH(lport, ipst)];
2579         mutex_enter(&bind_connfp->connf_lock);
2580         for (connp = bind_connfp->connf_head; connp != NULL;
2581             connp = connp->conn_next) {
2582                 tcp = connp->conn_tcp;
2583                 if (IPCL_BIND_MATCH(connp, IPPROTO_TCP, laddr, lport) &&
2584                     IPCL_ZONE_MATCH(connp, zoneid) &&
2585                     (tcp->tcp_listener == NULL)) {
2586                         CONN_INC_REF(connp);
2587                         mutex_exit(&bind_connfp->connf_lock);
2588                         return (connp);
2589                 }
2590         }
2591         mutex_exit(&bind_connfp->connf_lock);
2592         return (NULL);
2593 }
2594 
2595 /*
2596  * Finds a TCP/IPv6 listening connection; called by tcp_disconnect to locate
2597  * a listener when changing state.
2598  */
2599 conn_t *
2600 ipcl_lookup_listener_v6(uint16_t lport, in6_addr_t *laddr, uint_t ifindex,
2601     zoneid_t zoneid, ip_stack_t *ipst)
2602 {
2603         connf_t         *bind_connfp;
2604         conn_t          *connp = NULL;
2605         tcp_t           *tcp;
2606 
2607         /*
2608          * Avoid false matches for packets sent to an IP destination of
2609          * all zeros.
2610          */
2611         if (IN6_IS_ADDR_UNSPECIFIED(laddr))
2612                 return (NULL);
2613 
2614         ASSERT(zoneid != ALL_ZONES);
2615 
2616         bind_connfp = &ipst->ips_ipcl_bind_fanout[IPCL_BIND_HASH(lport, ipst)];
2617         mutex_enter(&bind_connfp->connf_lock);
2618         for (connp = bind_connfp->connf_head; connp != NULL;
2619             connp = connp->conn_next) {
2620                 tcp = connp->conn_tcp;
2621                 if (IPCL_BIND_MATCH_V6(connp, IPPROTO_TCP, *laddr, lport) &&
2622                     IPCL_ZONE_MATCH(connp, zoneid) &&
2623                     (connp->conn_bound_if == 0 ||
2624                     connp->conn_bound_if == ifindex) &&
2625                     tcp->tcp_listener == NULL) {
2626                         CONN_INC_REF(connp);
2627                         mutex_exit(&bind_connfp->connf_lock);
2628                         return (connp);
2629                 }
2630         }
2631         mutex_exit(&bind_connfp->connf_lock);
2632         return (NULL);
2633 }
2634 
2635 /*
2636  * ipcl_get_next_conn
2637  *      get the next entry in the conn global list
2638  *      and put a reference on the next_conn.
2639  *      decrement the reference on the current conn.
2640  *
2641  * This is an iterator based walker function that also provides for
2642  * some selection by the caller. It walks through the conn_hash bucket
2643  * searching for the next valid connp in the list, and selects connections
2644  * that are neither closed nor condemned. It also REFHOLDS the conn
2645  * thus ensuring that the conn exists when the caller uses the conn.
2646  */
2647 conn_t *
2648 ipcl_get_next_conn(connf_t *connfp, conn_t *connp, uint32_t conn_flags)
2649 {
2650         conn_t  *next_connp;
2651 
2652         if (connfp == NULL)
2653                 return (NULL);
2654 
2655         mutex_enter(&connfp->connf_lock);
2656 
2657         next_connp = (connp == NULL) ?
2658             connfp->connf_head : connp->conn_g_next;
2659 
2660         while (next_connp != NULL) {
2661                 mutex_enter(&next_connp->conn_lock);
2662                 if (!(next_connp->conn_flags & conn_flags) ||
2663                     (next_connp->conn_state_flags &
2664                     (CONN_CONDEMNED | CONN_INCIPIENT))) {
2665                         /*
2666                          * This conn has been condemned or
2667                          * is closing, or the flags don't match
2668                          */
2669                         mutex_exit(&next_connp->conn_lock);
2670                         next_connp = next_connp->conn_g_next;
2671                         continue;
2672                 }
2673                 CONN_INC_REF_LOCKED(next_connp);
2674                 mutex_exit(&next_connp->conn_lock);
2675                 break;
2676         }
2677 
2678         mutex_exit(&connfp->connf_lock);
2679 
2680         if (connp != NULL)
2681                 CONN_DEC_REF(connp);
2682 
2683         return (next_connp);
2684 }
2685 
2686 #ifdef CONN_DEBUG
2687 /*
2688  * Trace of the last NBUF refhold/refrele
2689  */
2690 int
2691 conn_trace_ref(conn_t *connp)
2692 {
2693         int     last;
2694         conn_trace_t    *ctb;
2695 
2696         ASSERT(MUTEX_HELD(&connp->conn_lock));
2697         last = connp->conn_trace_last;
2698         last++;
2699         if (last == CONN_TRACE_MAX)
2700                 last = 0;
2701 
2702         ctb = &connp->conn_trace_buf[last];
2703         ctb->ctb_depth = getpcstack(ctb->ctb_stack, CONN_STACK_DEPTH);
2704         connp->conn_trace_last = last;
2705         return (1);
2706 }
2707 
2708 int
2709 conn_untrace_ref(conn_t *connp)
2710 {
2711         int     last;
2712         conn_trace_t    *ctb;
2713 
2714         ASSERT(MUTEX_HELD(&connp->conn_lock));
2715         last = connp->conn_trace_last;
2716         last++;
2717         if (last == CONN_TRACE_MAX)
2718                 last = 0;
2719 
2720         ctb = &connp->conn_trace_buf[last];
2721         ctb->ctb_depth = getpcstack(ctb->ctb_stack, CONN_STACK_DEPTH);
2722         connp->conn_trace_last = last;
2723         return (1);
2724 }
2725 #endif