Print this page
Reduce lint
OS-5007 support SO_ATTACH_FILTER on ICMP sockets
Reviewed by: Cody Mello <melloc@joyent.com>
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
Approved by: Jerry Jelinek <jerry.jelinek@joyent.com>
OS-4018 lxbrand support TCP SO_REUSEPORT
Reviewed by: Robert Mustacchi <rm@joyent.com>
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
Reviewed by: Cody Mello <cody.mello@joyent.com>
| Split |
Close |
| Expand all |
| Collapse all |
--- old/usr/src/uts/common/inet/ip/ipclassifier.c
+++ new/usr/src/uts/common/inet/ip/ipclassifier.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
|
↓ open down ↓ |
12 lines elided |
↑ open up ↑ |
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21 /*
22 22 * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
23 + * Copyright 2016 Joyent, Inc.
23 24 */
24 25
25 26 /*
26 27 * IP PACKET CLASSIFIER
27 28 *
28 29 * The IP packet classifier provides mapping between IP packets and persistent
29 30 * connection state for connection-oriented protocols. It also provides
30 31 * interface for managing connection states.
31 32 *
32 33 * The connection state is kept in conn_t data structure and contains, among
33 34 * other things:
34 35 *
35 36 * o local/remote address and ports
36 37 * o Transport protocol
37 38 * o squeue for the connection (for TCP only)
38 39 * o reference counter
39 40 * o Connection state
40 41 * o hash table linkage
41 42 * o interface/ire information
42 43 * o credentials
43 44 * o ipsec policy
44 45 * o send and receive functions.
45 46 * o mutex lock.
46 47 *
47 48 * Connections use a reference counting scheme. They are freed when the
48 49 * reference counter drops to zero. A reference is incremented when connection
49 50 * is placed in a list or table, when incoming packet for the connection arrives
50 51 * and when connection is processed via squeue (squeue processing may be
51 52 * asynchronous and the reference protects the connection from being destroyed
52 53 * before its processing is finished).
53 54 *
54 55 * conn_recv is used to pass up packets to the ULP.
55 56 * For TCP conn_recv changes. It is tcp_input_listener_unbound initially for
56 57 * a listener, and changes to tcp_input_listener as the listener has picked a
57 58 * good squeue. For other cases it is set to tcp_input_data.
58 59 *
59 60 * conn_recvicmp is used to pass up ICMP errors to the ULP.
60 61 *
61 62 * Classifier uses several hash tables:
62 63 *
63 64 * ipcl_conn_fanout: contains all TCP connections in CONNECTED state
64 65 * ipcl_bind_fanout: contains all connections in BOUND state
65 66 * ipcl_proto_fanout: IPv4 protocol fanout
66 67 * ipcl_proto_fanout_v6: IPv6 protocol fanout
67 68 * ipcl_udp_fanout: contains all UDP connections
68 69 * ipcl_iptun_fanout: contains all IP tunnel connections
69 70 * ipcl_globalhash_fanout: contains all connections
70 71 *
71 72 * The ipcl_globalhash_fanout is used for any walkers (like snmp and Clustering)
72 73 * which need to view all existing connections.
73 74 *
74 75 * All tables are protected by per-bucket locks. When both per-bucket lock and
75 76 * connection lock need to be held, the per-bucket lock should be acquired
76 77 * first, followed by the connection lock.
77 78 *
78 79 * All functions doing search in one of these tables increment a reference
79 80 * counter on the connection found (if any). This reference should be dropped
80 81 * when the caller has finished processing the connection.
81 82 *
82 83 *
83 84 * INTERFACES:
84 85 * ===========
85 86 *
86 87 * Connection Lookup:
87 88 * ------------------
88 89 *
89 90 * conn_t *ipcl_classify_v4(mp, protocol, hdr_len, ira, ip_stack)
90 91 * conn_t *ipcl_classify_v6(mp, protocol, hdr_len, ira, ip_stack)
91 92 *
92 93 * Finds connection for an incoming IPv4 or IPv6 packet. Returns NULL if
93 94 * it can't find any associated connection. If the connection is found, its
94 95 * reference counter is incremented.
95 96 *
96 97 * mp: mblock, containing packet header. The full header should fit
97 98 * into a single mblock. It should also contain at least full IP
98 99 * and TCP or UDP header.
99 100 *
100 101 * protocol: Either IPPROTO_TCP or IPPROTO_UDP.
101 102 *
102 103 * hdr_len: The size of IP header. It is used to find TCP or UDP header in
103 104 * the packet.
104 105 *
105 106 * ira->ira_zoneid: The zone in which the returned connection must be; the
106 107 * zoneid corresponding to the ire_zoneid on the IRE located for
107 108 * the packet's destination address.
108 109 *
109 110 * ira->ira_flags: Contains the IRAF_TX_MAC_EXEMPTABLE and
110 111 * IRAF_TX_SHARED_ADDR flags
111 112 *
112 113 * For TCP connections, the lookup order is as follows:
113 114 * 5-tuple {src, dst, protocol, local port, remote port}
114 115 * lookup in ipcl_conn_fanout table.
115 116 * 3-tuple {dst, remote port, protocol} lookup in
116 117 * ipcl_bind_fanout table.
117 118 *
118 119 * For UDP connections, a 5-tuple {src, dst, protocol, local port,
119 120 * remote port} lookup is done on ipcl_udp_fanout. Note that,
120 121 * these interfaces do not handle cases where a packets belongs
121 122 * to multiple UDP clients, which is handled in IP itself.
122 123 *
123 124 * If the destination IRE is ALL_ZONES (indicated by zoneid), then we must
124 125 * determine which actual zone gets the segment. This is used only in a
125 126 * labeled environment. The matching rules are:
126 127 *
127 128 * - If it's not a multilevel port, then the label on the packet selects
128 129 * the zone. Unlabeled packets are delivered to the global zone.
129 130 *
130 131 * - If it's a multilevel port, then only the zone registered to receive
131 132 * packets on that port matches.
132 133 *
133 134 * Also, in a labeled environment, packet labels need to be checked. For fully
134 135 * bound TCP connections, we can assume that the packet label was checked
135 136 * during connection establishment, and doesn't need to be checked on each
136 137 * packet. For others, though, we need to check for strict equality or, for
137 138 * multilevel ports, membership in the range or set. This part currently does
138 139 * a tnrh lookup on each packet, but could be optimized to use cached results
139 140 * if that were necessary. (SCTP doesn't come through here, but if it did,
140 141 * we would apply the same rules as TCP.)
141 142 *
142 143 * An implication of the above is that fully-bound TCP sockets must always use
143 144 * distinct 4-tuples; they can't be discriminated by label alone.
144 145 *
145 146 * Note that we cannot trust labels on packets sent to fully-bound UDP sockets,
146 147 * as there's no connection set-up handshake and no shared state.
147 148 *
148 149 * Labels on looped-back packets within a single zone do not need to be
149 150 * checked, as all processes in the same zone have the same label.
150 151 *
151 152 * Finally, for unlabeled packets received by a labeled system, special rules
152 153 * apply. We consider only the MLP if there is one. Otherwise, we prefer a
153 154 * socket in the zone whose label matches the default label of the sender, if
154 155 * any. In any event, the receiving socket must have SO_MAC_EXEMPT set and the
155 156 * receiver's label must dominate the sender's default label.
156 157 *
157 158 * conn_t *ipcl_tcp_lookup_reversed_ipv4(ipha_t *, tcpha_t *, int, ip_stack);
158 159 * conn_t *ipcl_tcp_lookup_reversed_ipv6(ip6_t *, tcpha_t *, int, uint_t,
159 160 * ip_stack);
160 161 *
161 162 * Lookup routine to find a exact match for {src, dst, local port,
162 163 * remote port) for TCP connections in ipcl_conn_fanout. The address and
163 164 * ports are read from the IP and TCP header respectively.
164 165 *
165 166 * conn_t *ipcl_lookup_listener_v4(lport, laddr, protocol,
166 167 * zoneid, ip_stack);
167 168 * conn_t *ipcl_lookup_listener_v6(lport, laddr, protocol, ifindex,
168 169 * zoneid, ip_stack);
169 170 *
170 171 * Lookup routine to find a listener with the tuple {lport, laddr,
171 172 * protocol} in the ipcl_bind_fanout table. For IPv6, an additional
172 173 * parameter interface index is also compared.
173 174 *
174 175 * void ipcl_walk(func, arg, ip_stack)
175 176 *
176 177 * Apply 'func' to every connection available. The 'func' is called as
177 178 * (*func)(connp, arg). The walk is non-atomic so connections may be
178 179 * created and destroyed during the walk. The CONN_CONDEMNED and
179 180 * CONN_INCIPIENT flags ensure that connections which are newly created
180 181 * or being destroyed are not selected by the walker.
181 182 *
182 183 * Table Updates
183 184 * -------------
184 185 *
185 186 * int ipcl_conn_insert(connp);
186 187 * int ipcl_conn_insert_v4(connp);
187 188 * int ipcl_conn_insert_v6(connp);
188 189 *
189 190 * Insert 'connp' in the ipcl_conn_fanout.
190 191 * Arguements :
191 192 * connp conn_t to be inserted
192 193 *
193 194 * Return value :
194 195 * 0 if connp was inserted
195 196 * EADDRINUSE if the connection with the same tuple
196 197 * already exists.
197 198 *
198 199 * int ipcl_bind_insert(connp);
199 200 * int ipcl_bind_insert_v4(connp);
200 201 * int ipcl_bind_insert_v6(connp);
201 202 *
202 203 * Insert 'connp' in ipcl_bind_fanout.
203 204 * Arguements :
204 205 * connp conn_t to be inserted
205 206 *
206 207 *
207 208 * void ipcl_hash_remove(connp);
208 209 *
209 210 * Removes the 'connp' from the connection fanout table.
210 211 *
211 212 * Connection Creation/Destruction
212 213 * -------------------------------
213 214 *
214 215 * conn_t *ipcl_conn_create(type, sleep, netstack_t *)
215 216 *
216 217 * Creates a new conn based on the type flag, inserts it into
217 218 * globalhash table.
218 219 *
219 220 * type: This flag determines the type of conn_t which needs to be
220 221 * created i.e., which kmem_cache it comes from.
221 222 * IPCL_TCPCONN indicates a TCP connection
222 223 * IPCL_SCTPCONN indicates a SCTP connection
223 224 * IPCL_UDPCONN indicates a UDP conn_t.
224 225 * IPCL_RAWIPCONN indicates a RAWIP/ICMP conn_t.
225 226 * IPCL_RTSCONN indicates a RTS conn_t.
226 227 * IPCL_IPCCONN indicates all other connections.
227 228 *
228 229 * void ipcl_conn_destroy(connp)
229 230 *
230 231 * Destroys the connection state, removes it from the global
231 232 * connection hash table and frees its memory.
232 233 */
233 234
234 235 #include <sys/types.h>
235 236 #include <sys/stream.h>
236 237 #include <sys/stropts.h>
237 238 #include <sys/sysmacros.h>
238 239 #include <sys/strsubr.h>
239 240 #include <sys/strsun.h>
240 241 #define _SUN_TPI_VERSION 2
241 242 #include <sys/ddi.h>
242 243 #include <sys/cmn_err.h>
243 244 #include <sys/debug.h>
244 245
245 246 #include <sys/systm.h>
246 247 #include <sys/param.h>
247 248 #include <sys/kmem.h>
248 249 #include <sys/isa_defs.h>
249 250 #include <inet/common.h>
250 251 #include <netinet/ip6.h>
251 252 #include <netinet/icmp6.h>
252 253
253 254 #include <inet/ip.h>
254 255 #include <inet/ip_if.h>
255 256 #include <inet/ip_ire.h>
256 257 #include <inet/ip6.h>
257 258 #include <inet/ip_ndp.h>
258 259 #include <inet/ip_impl.h>
259 260 #include <inet/udp_impl.h>
260 261 #include <inet/sctp_ip.h>
261 262 #include <inet/sctp/sctp_impl.h>
262 263 #include <inet/rawip_impl.h>
263 264 #include <inet/rts_impl.h>
264 265 #include <inet/iptun/iptun_impl.h>
265 266
266 267 #include <sys/cpuvar.h>
267 268
268 269 #include <inet/ipclassifier.h>
269 270 #include <inet/tcp.h>
270 271 #include <inet/ipsec_impl.h>
271 272
272 273 #include <sys/tsol/tnet.h>
273 274 #include <sys/sockio.h>
274 275
275 276 /* Old value for compatibility. Setable in /etc/system */
276 277 uint_t tcp_conn_hash_size = 0;
277 278
278 279 /* New value. Zero means choose automatically. Setable in /etc/system */
279 280 uint_t ipcl_conn_hash_size = 0;
280 281 uint_t ipcl_conn_hash_memfactor = 8192;
281 282 uint_t ipcl_conn_hash_maxsize = 82500;
282 283
283 284 /* bind/udp fanout table size */
284 285 uint_t ipcl_bind_fanout_size = 512;
285 286 uint_t ipcl_udp_fanout_size = 16384;
286 287
287 288 /* Raw socket fanout size. Must be a power of 2. */
288 289 uint_t ipcl_raw_fanout_size = 256;
289 290
290 291 /*
291 292 * The IPCL_IPTUN_HASH() function works best with a prime table size. We
292 293 * expect that most large deployments would have hundreds of tunnels, and
293 294 * thousands in the extreme case.
294 295 */
295 296 uint_t ipcl_iptun_fanout_size = 6143;
296 297
297 298 /*
298 299 * Power of 2^N Primes useful for hashing for N of 0-28,
299 300 * these primes are the nearest prime <= 2^N - 2^(N-2).
300 301 */
301 302
302 303 #define P2Ps() {0, 0, 0, 5, 11, 23, 47, 89, 191, 383, 761, 1531, 3067, \
303 304 6143, 12281, 24571, 49139, 98299, 196597, 393209, \
304 305 786431, 1572853, 3145721, 6291449, 12582893, 25165813, \
305 306 50331599, 100663291, 201326557, 0}
306 307
307 308 /*
308 309 * wrapper structure to ensure that conn and what follows it (tcp_t, etc)
309 310 * are aligned on cache lines.
310 311 */
311 312 typedef union itc_s {
312 313 conn_t itc_conn;
313 314 char itcu_filler[CACHE_ALIGN(conn_s)];
314 315 } itc_t;
315 316
316 317 struct kmem_cache *tcp_conn_cache;
317 318 struct kmem_cache *ip_conn_cache;
318 319 extern struct kmem_cache *sctp_conn_cache;
319 320 struct kmem_cache *udp_conn_cache;
320 321 struct kmem_cache *rawip_conn_cache;
321 322 struct kmem_cache *rts_conn_cache;
322 323
323 324 extern void tcp_timermp_free(tcp_t *);
324 325 extern mblk_t *tcp_timermp_alloc(int);
325 326
326 327 static int ip_conn_constructor(void *, void *, int);
327 328 static void ip_conn_destructor(void *, void *);
328 329
329 330 static int tcp_conn_constructor(void *, void *, int);
330 331 static void tcp_conn_destructor(void *, void *);
331 332
332 333 static int udp_conn_constructor(void *, void *, int);
333 334 static void udp_conn_destructor(void *, void *);
334 335
335 336 static int rawip_conn_constructor(void *, void *, int);
336 337 static void rawip_conn_destructor(void *, void *);
337 338
338 339 static int rts_conn_constructor(void *, void *, int);
339 340 static void rts_conn_destructor(void *, void *);
340 341
341 342 /*
342 343 * Global (for all stack instances) init routine
343 344 */
344 345 void
345 346 ipcl_g_init(void)
346 347 {
347 348 ip_conn_cache = kmem_cache_create("ip_conn_cache",
348 349 sizeof (conn_t), CACHE_ALIGN_SIZE,
349 350 ip_conn_constructor, ip_conn_destructor,
350 351 NULL, NULL, NULL, 0);
351 352
352 353 tcp_conn_cache = kmem_cache_create("tcp_conn_cache",
353 354 sizeof (itc_t) + sizeof (tcp_t), CACHE_ALIGN_SIZE,
354 355 tcp_conn_constructor, tcp_conn_destructor,
355 356 tcp_conn_reclaim, NULL, NULL, 0);
356 357
357 358 udp_conn_cache = kmem_cache_create("udp_conn_cache",
358 359 sizeof (itc_t) + sizeof (udp_t), CACHE_ALIGN_SIZE,
359 360 udp_conn_constructor, udp_conn_destructor,
360 361 NULL, NULL, NULL, 0);
361 362
362 363 rawip_conn_cache = kmem_cache_create("rawip_conn_cache",
363 364 sizeof (itc_t) + sizeof (icmp_t), CACHE_ALIGN_SIZE,
364 365 rawip_conn_constructor, rawip_conn_destructor,
365 366 NULL, NULL, NULL, 0);
366 367
367 368 rts_conn_cache = kmem_cache_create("rts_conn_cache",
368 369 sizeof (itc_t) + sizeof (rts_t), CACHE_ALIGN_SIZE,
369 370 rts_conn_constructor, rts_conn_destructor,
370 371 NULL, NULL, NULL, 0);
371 372 }
372 373
373 374 /*
374 375 * ipclassifier intialization routine, sets up hash tables.
375 376 */
376 377 void
377 378 ipcl_init(ip_stack_t *ipst)
378 379 {
379 380 int i;
380 381 int sizes[] = P2Ps();
381 382
382 383 /*
383 384 * Calculate size of conn fanout table from /etc/system settings
384 385 */
385 386 if (ipcl_conn_hash_size != 0) {
386 387 ipst->ips_ipcl_conn_fanout_size = ipcl_conn_hash_size;
387 388 } else if (tcp_conn_hash_size != 0) {
388 389 ipst->ips_ipcl_conn_fanout_size = tcp_conn_hash_size;
389 390 } else {
390 391 extern pgcnt_t freemem;
391 392
392 393 ipst->ips_ipcl_conn_fanout_size =
393 394 (freemem * PAGESIZE) / ipcl_conn_hash_memfactor;
394 395
395 396 if (ipst->ips_ipcl_conn_fanout_size > ipcl_conn_hash_maxsize) {
396 397 ipst->ips_ipcl_conn_fanout_size =
397 398 ipcl_conn_hash_maxsize;
398 399 }
399 400 }
400 401
401 402 for (i = 9; i < sizeof (sizes) / sizeof (*sizes) - 1; i++) {
402 403 if (sizes[i] >= ipst->ips_ipcl_conn_fanout_size) {
403 404 break;
404 405 }
405 406 }
406 407 if ((ipst->ips_ipcl_conn_fanout_size = sizes[i]) == 0) {
407 408 /* Out of range, use the 2^16 value */
408 409 ipst->ips_ipcl_conn_fanout_size = sizes[16];
409 410 }
410 411
411 412 /* Take values from /etc/system */
412 413 ipst->ips_ipcl_bind_fanout_size = ipcl_bind_fanout_size;
413 414 ipst->ips_ipcl_udp_fanout_size = ipcl_udp_fanout_size;
414 415 ipst->ips_ipcl_raw_fanout_size = ipcl_raw_fanout_size;
415 416 ipst->ips_ipcl_iptun_fanout_size = ipcl_iptun_fanout_size;
416 417
417 418 ASSERT(ipst->ips_ipcl_conn_fanout == NULL);
418 419
419 420 ipst->ips_ipcl_conn_fanout = kmem_zalloc(
420 421 ipst->ips_ipcl_conn_fanout_size * sizeof (connf_t), KM_SLEEP);
421 422
422 423 for (i = 0; i < ipst->ips_ipcl_conn_fanout_size; i++) {
423 424 mutex_init(&ipst->ips_ipcl_conn_fanout[i].connf_lock, NULL,
424 425 MUTEX_DEFAULT, NULL);
425 426 }
426 427
427 428 ipst->ips_ipcl_bind_fanout = kmem_zalloc(
428 429 ipst->ips_ipcl_bind_fanout_size * sizeof (connf_t), KM_SLEEP);
429 430
430 431 for (i = 0; i < ipst->ips_ipcl_bind_fanout_size; i++) {
431 432 mutex_init(&ipst->ips_ipcl_bind_fanout[i].connf_lock, NULL,
432 433 MUTEX_DEFAULT, NULL);
433 434 }
434 435
435 436 ipst->ips_ipcl_proto_fanout_v4 = kmem_zalloc(IPPROTO_MAX *
436 437 sizeof (connf_t), KM_SLEEP);
437 438 for (i = 0; i < IPPROTO_MAX; i++) {
438 439 mutex_init(&ipst->ips_ipcl_proto_fanout_v4[i].connf_lock, NULL,
439 440 MUTEX_DEFAULT, NULL);
440 441 }
441 442
442 443 ipst->ips_ipcl_proto_fanout_v6 = kmem_zalloc(IPPROTO_MAX *
443 444 sizeof (connf_t), KM_SLEEP);
444 445 for (i = 0; i < IPPROTO_MAX; i++) {
445 446 mutex_init(&ipst->ips_ipcl_proto_fanout_v6[i].connf_lock, NULL,
446 447 MUTEX_DEFAULT, NULL);
447 448 }
448 449
449 450 ipst->ips_rts_clients = kmem_zalloc(sizeof (connf_t), KM_SLEEP);
450 451 mutex_init(&ipst->ips_rts_clients->connf_lock,
451 452 NULL, MUTEX_DEFAULT, NULL);
452 453
453 454 ipst->ips_ipcl_udp_fanout = kmem_zalloc(
454 455 ipst->ips_ipcl_udp_fanout_size * sizeof (connf_t), KM_SLEEP);
455 456 for (i = 0; i < ipst->ips_ipcl_udp_fanout_size; i++) {
456 457 mutex_init(&ipst->ips_ipcl_udp_fanout[i].connf_lock, NULL,
457 458 MUTEX_DEFAULT, NULL);
458 459 }
459 460
460 461 ipst->ips_ipcl_iptun_fanout = kmem_zalloc(
461 462 ipst->ips_ipcl_iptun_fanout_size * sizeof (connf_t), KM_SLEEP);
462 463 for (i = 0; i < ipst->ips_ipcl_iptun_fanout_size; i++) {
463 464 mutex_init(&ipst->ips_ipcl_iptun_fanout[i].connf_lock, NULL,
464 465 MUTEX_DEFAULT, NULL);
465 466 }
466 467
467 468 ipst->ips_ipcl_raw_fanout = kmem_zalloc(
468 469 ipst->ips_ipcl_raw_fanout_size * sizeof (connf_t), KM_SLEEP);
469 470 for (i = 0; i < ipst->ips_ipcl_raw_fanout_size; i++) {
470 471 mutex_init(&ipst->ips_ipcl_raw_fanout[i].connf_lock, NULL,
471 472 MUTEX_DEFAULT, NULL);
472 473 }
473 474
474 475 ipst->ips_ipcl_globalhash_fanout = kmem_zalloc(
475 476 sizeof (connf_t) * CONN_G_HASH_SIZE, KM_SLEEP);
476 477 for (i = 0; i < CONN_G_HASH_SIZE; i++) {
477 478 mutex_init(&ipst->ips_ipcl_globalhash_fanout[i].connf_lock,
478 479 NULL, MUTEX_DEFAULT, NULL);
479 480 }
480 481 }
481 482
482 483 void
483 484 ipcl_g_destroy(void)
484 485 {
485 486 kmem_cache_destroy(ip_conn_cache);
486 487 kmem_cache_destroy(tcp_conn_cache);
487 488 kmem_cache_destroy(udp_conn_cache);
488 489 kmem_cache_destroy(rawip_conn_cache);
489 490 kmem_cache_destroy(rts_conn_cache);
490 491 }
491 492
492 493 /*
493 494 * All user-level and kernel use of the stack must be gone
494 495 * by now.
495 496 */
496 497 void
497 498 ipcl_destroy(ip_stack_t *ipst)
498 499 {
499 500 int i;
500 501
501 502 for (i = 0; i < ipst->ips_ipcl_conn_fanout_size; i++) {
502 503 ASSERT(ipst->ips_ipcl_conn_fanout[i].connf_head == NULL);
503 504 mutex_destroy(&ipst->ips_ipcl_conn_fanout[i].connf_lock);
504 505 }
505 506 kmem_free(ipst->ips_ipcl_conn_fanout, ipst->ips_ipcl_conn_fanout_size *
506 507 sizeof (connf_t));
507 508 ipst->ips_ipcl_conn_fanout = NULL;
508 509
509 510 for (i = 0; i < ipst->ips_ipcl_bind_fanout_size; i++) {
510 511 ASSERT(ipst->ips_ipcl_bind_fanout[i].connf_head == NULL);
511 512 mutex_destroy(&ipst->ips_ipcl_bind_fanout[i].connf_lock);
512 513 }
513 514 kmem_free(ipst->ips_ipcl_bind_fanout, ipst->ips_ipcl_bind_fanout_size *
514 515 sizeof (connf_t));
515 516 ipst->ips_ipcl_bind_fanout = NULL;
516 517
517 518 for (i = 0; i < IPPROTO_MAX; i++) {
518 519 ASSERT(ipst->ips_ipcl_proto_fanout_v4[i].connf_head == NULL);
519 520 mutex_destroy(&ipst->ips_ipcl_proto_fanout_v4[i].connf_lock);
520 521 }
521 522 kmem_free(ipst->ips_ipcl_proto_fanout_v4,
522 523 IPPROTO_MAX * sizeof (connf_t));
523 524 ipst->ips_ipcl_proto_fanout_v4 = NULL;
524 525
525 526 for (i = 0; i < IPPROTO_MAX; i++) {
526 527 ASSERT(ipst->ips_ipcl_proto_fanout_v6[i].connf_head == NULL);
527 528 mutex_destroy(&ipst->ips_ipcl_proto_fanout_v6[i].connf_lock);
528 529 }
529 530 kmem_free(ipst->ips_ipcl_proto_fanout_v6,
530 531 IPPROTO_MAX * sizeof (connf_t));
531 532 ipst->ips_ipcl_proto_fanout_v6 = NULL;
532 533
533 534 for (i = 0; i < ipst->ips_ipcl_udp_fanout_size; i++) {
534 535 ASSERT(ipst->ips_ipcl_udp_fanout[i].connf_head == NULL);
535 536 mutex_destroy(&ipst->ips_ipcl_udp_fanout[i].connf_lock);
536 537 }
537 538 kmem_free(ipst->ips_ipcl_udp_fanout, ipst->ips_ipcl_udp_fanout_size *
538 539 sizeof (connf_t));
539 540 ipst->ips_ipcl_udp_fanout = NULL;
540 541
541 542 for (i = 0; i < ipst->ips_ipcl_iptun_fanout_size; i++) {
542 543 ASSERT(ipst->ips_ipcl_iptun_fanout[i].connf_head == NULL);
543 544 mutex_destroy(&ipst->ips_ipcl_iptun_fanout[i].connf_lock);
544 545 }
545 546 kmem_free(ipst->ips_ipcl_iptun_fanout,
546 547 ipst->ips_ipcl_iptun_fanout_size * sizeof (connf_t));
547 548 ipst->ips_ipcl_iptun_fanout = NULL;
548 549
549 550 for (i = 0; i < ipst->ips_ipcl_raw_fanout_size; i++) {
550 551 ASSERT(ipst->ips_ipcl_raw_fanout[i].connf_head == NULL);
551 552 mutex_destroy(&ipst->ips_ipcl_raw_fanout[i].connf_lock);
552 553 }
553 554 kmem_free(ipst->ips_ipcl_raw_fanout, ipst->ips_ipcl_raw_fanout_size *
554 555 sizeof (connf_t));
555 556 ipst->ips_ipcl_raw_fanout = NULL;
556 557
557 558 for (i = 0; i < CONN_G_HASH_SIZE; i++) {
558 559 ASSERT(ipst->ips_ipcl_globalhash_fanout[i].connf_head == NULL);
559 560 mutex_destroy(&ipst->ips_ipcl_globalhash_fanout[i].connf_lock);
560 561 }
561 562 kmem_free(ipst->ips_ipcl_globalhash_fanout,
562 563 sizeof (connf_t) * CONN_G_HASH_SIZE);
563 564 ipst->ips_ipcl_globalhash_fanout = NULL;
564 565
565 566 ASSERT(ipst->ips_rts_clients->connf_head == NULL);
566 567 mutex_destroy(&ipst->ips_rts_clients->connf_lock);
567 568 kmem_free(ipst->ips_rts_clients, sizeof (connf_t));
568 569 ipst->ips_rts_clients = NULL;
569 570 }
570 571
571 572 /*
572 573 * conn creation routine. initialize the conn, sets the reference
573 574 * and inserts it in the global hash table.
574 575 */
575 576 conn_t *
576 577 ipcl_conn_create(uint32_t type, int sleep, netstack_t *ns)
577 578 {
578 579 conn_t *connp;
579 580 struct kmem_cache *conn_cache;
580 581
581 582 switch (type) {
582 583 case IPCL_SCTPCONN:
583 584 if ((connp = kmem_cache_alloc(sctp_conn_cache, sleep)) == NULL)
584 585 return (NULL);
585 586 sctp_conn_init(connp);
586 587 netstack_hold(ns);
587 588 connp->conn_netstack = ns;
588 589 connp->conn_ixa->ixa_ipst = ns->netstack_ip;
589 590 connp->conn_ixa->ixa_conn_id = (long)connp;
590 591 ipcl_globalhash_insert(connp);
591 592 return (connp);
592 593
593 594 case IPCL_TCPCONN:
594 595 conn_cache = tcp_conn_cache;
595 596 break;
596 597
597 598 case IPCL_UDPCONN:
598 599 conn_cache = udp_conn_cache;
599 600 break;
600 601
601 602 case IPCL_RAWIPCONN:
602 603 conn_cache = rawip_conn_cache;
603 604 break;
604 605
605 606 case IPCL_RTSCONN:
606 607 conn_cache = rts_conn_cache;
607 608 break;
608 609
609 610 case IPCL_IPCCONN:
610 611 conn_cache = ip_conn_cache;
611 612 break;
612 613
613 614 default:
614 615 connp = NULL;
615 616 ASSERT(0);
616 617 }
617 618
618 619 if ((connp = kmem_cache_alloc(conn_cache, sleep)) == NULL)
619 620 return (NULL);
620 621
621 622 connp->conn_ref = 1;
622 623 netstack_hold(ns);
623 624 connp->conn_netstack = ns;
624 625 connp->conn_ixa->ixa_ipst = ns->netstack_ip;
625 626 connp->conn_ixa->ixa_conn_id = (long)connp;
626 627 ipcl_globalhash_insert(connp);
627 628 return (connp);
628 629 }
629 630
630 631 void
631 632 ipcl_conn_destroy(conn_t *connp)
632 633 {
633 634 mblk_t *mp;
634 635 netstack_t *ns = connp->conn_netstack;
635 636
636 637 ASSERT(!MUTEX_HELD(&connp->conn_lock));
637 638 ASSERT(connp->conn_ref == 0);
638 639 ASSERT(connp->conn_ioctlref == 0);
639 640
640 641 DTRACE_PROBE1(conn__destroy, conn_t *, connp);
641 642
642 643 if (connp->conn_cred != NULL) {
643 644 crfree(connp->conn_cred);
644 645 connp->conn_cred = NULL;
645 646 /* ixa_cred done in ipcl_conn_cleanup below */
646 647 }
647 648
648 649 if (connp->conn_ht_iphc != NULL) {
649 650 kmem_free(connp->conn_ht_iphc, connp->conn_ht_iphc_allocated);
650 651 connp->conn_ht_iphc = NULL;
651 652 connp->conn_ht_iphc_allocated = 0;
652 653 connp->conn_ht_iphc_len = 0;
653 654 connp->conn_ht_ulp = NULL;
654 655 connp->conn_ht_ulp_len = 0;
655 656 }
656 657 ip_pkt_free(&connp->conn_xmit_ipp);
657 658
658 659 ipcl_globalhash_remove(connp);
659 660
660 661 if (connp->conn_latch != NULL) {
661 662 IPLATCH_REFRELE(connp->conn_latch);
662 663 connp->conn_latch = NULL;
663 664 }
664 665 if (connp->conn_latch_in_policy != NULL) {
665 666 IPPOL_REFRELE(connp->conn_latch_in_policy);
666 667 connp->conn_latch_in_policy = NULL;
667 668 }
668 669 if (connp->conn_latch_in_action != NULL) {
669 670 IPACT_REFRELE(connp->conn_latch_in_action);
670 671 connp->conn_latch_in_action = NULL;
671 672 }
672 673 if (connp->conn_policy != NULL) {
673 674 IPPH_REFRELE(connp->conn_policy, ns);
674 675 connp->conn_policy = NULL;
675 676 }
676 677
677 678 if (connp->conn_ipsec_opt_mp != NULL) {
678 679 freemsg(connp->conn_ipsec_opt_mp);
679 680 connp->conn_ipsec_opt_mp = NULL;
680 681 }
681 682
682 683 if (connp->conn_flags & IPCL_TCPCONN) {
683 684 tcp_t *tcp = connp->conn_tcp;
684 685
685 686 tcp_free(tcp);
686 687 mp = tcp->tcp_timercache;
687 688
688 689 tcp->tcp_tcps = NULL;
689 690
690 691 /*
691 692 * tcp_rsrv_mp can be NULL if tcp_get_conn() fails to allocate
692 693 * the mblk.
693 694 */
694 695 if (tcp->tcp_rsrv_mp != NULL) {
695 696 freeb(tcp->tcp_rsrv_mp);
696 697 tcp->tcp_rsrv_mp = NULL;
697 698 mutex_destroy(&tcp->tcp_rsrv_mp_lock);
698 699 }
699 700
700 701 ipcl_conn_cleanup(connp);
701 702 connp->conn_flags = IPCL_TCPCONN;
702 703 if (ns != NULL) {
703 704 ASSERT(tcp->tcp_tcps == NULL);
704 705 connp->conn_netstack = NULL;
705 706 connp->conn_ixa->ixa_ipst = NULL;
706 707 netstack_rele(ns);
707 708 }
708 709
709 710 bzero(tcp, sizeof (tcp_t));
710 711
711 712 tcp->tcp_timercache = mp;
712 713 tcp->tcp_connp = connp;
713 714 kmem_cache_free(tcp_conn_cache, connp);
714 715 return;
715 716 }
716 717
717 718 if (connp->conn_flags & IPCL_SCTPCONN) {
718 719 ASSERT(ns != NULL);
719 720 sctp_free(connp);
720 721 return;
721 722 }
722 723
723 724 ipcl_conn_cleanup(connp);
724 725 if (ns != NULL) {
725 726 connp->conn_netstack = NULL;
726 727 connp->conn_ixa->ixa_ipst = NULL;
727 728 netstack_rele(ns);
728 729 }
729 730
730 731 /* leave conn_priv aka conn_udp, conn_icmp, etc in place. */
731 732 if (connp->conn_flags & IPCL_UDPCONN) {
732 733 connp->conn_flags = IPCL_UDPCONN;
733 734 kmem_cache_free(udp_conn_cache, connp);
734 735 } else if (connp->conn_flags & IPCL_RAWIPCONN) {
735 736 connp->conn_flags = IPCL_RAWIPCONN;
736 737 connp->conn_proto = IPPROTO_ICMP;
737 738 connp->conn_ixa->ixa_protocol = connp->conn_proto;
738 739 kmem_cache_free(rawip_conn_cache, connp);
739 740 } else if (connp->conn_flags & IPCL_RTSCONN) {
740 741 connp->conn_flags = IPCL_RTSCONN;
741 742 kmem_cache_free(rts_conn_cache, connp);
742 743 } else {
743 744 connp->conn_flags = IPCL_IPCCONN;
744 745 ASSERT(connp->conn_flags & IPCL_IPCCONN);
745 746 ASSERT(connp->conn_priv == NULL);
746 747 kmem_cache_free(ip_conn_cache, connp);
747 748 }
748 749 }
749 750
750 751 /*
751 752 * Running in cluster mode - deregister listener information
752 753 */
753 754 static void
754 755 ipcl_conn_unlisten(conn_t *connp)
755 756 {
756 757 ASSERT((connp->conn_flags & IPCL_CL_LISTENER) != 0);
757 758 ASSERT(connp->conn_lport != 0);
758 759
759 760 if (cl_inet_unlisten != NULL) {
760 761 sa_family_t addr_family;
761 762 uint8_t *laddrp;
762 763
763 764 if (connp->conn_ipversion == IPV6_VERSION) {
764 765 addr_family = AF_INET6;
765 766 laddrp = (uint8_t *)&connp->conn_bound_addr_v6;
766 767 } else {
767 768 addr_family = AF_INET;
768 769 laddrp = (uint8_t *)&connp->conn_bound_addr_v4;
769 770 }
770 771 (*cl_inet_unlisten)(connp->conn_netstack->netstack_stackid,
771 772 IPPROTO_TCP, addr_family, laddrp, connp->conn_lport, NULL);
772 773 }
773 774 connp->conn_flags &= ~IPCL_CL_LISTENER;
774 775 }
775 776
776 777 /*
777 778 * We set the IPCL_REMOVED flag (instead of clearing the flag indicating
778 779 * which table the conn belonged to). So for debugging we can see which hash
779 780 * table this connection was in.
780 781 */
781 782 #define IPCL_HASH_REMOVE(connp) { \
782 783 connf_t *connfp = (connp)->conn_fanout; \
783 784 ASSERT(!MUTEX_HELD(&((connp)->conn_lock))); \
784 785 if (connfp != NULL) { \
785 786 mutex_enter(&connfp->connf_lock); \
786 787 if ((connp)->conn_next != NULL) \
787 788 (connp)->conn_next->conn_prev = \
788 789 (connp)->conn_prev; \
789 790 if ((connp)->conn_prev != NULL) \
790 791 (connp)->conn_prev->conn_next = \
791 792 (connp)->conn_next; \
792 793 else \
793 794 connfp->connf_head = (connp)->conn_next; \
794 795 (connp)->conn_fanout = NULL; \
795 796 (connp)->conn_next = NULL; \
796 797 (connp)->conn_prev = NULL; \
797 798 (connp)->conn_flags |= IPCL_REMOVED; \
798 799 if (((connp)->conn_flags & IPCL_CL_LISTENER) != 0) \
799 800 ipcl_conn_unlisten((connp)); \
800 801 CONN_DEC_REF((connp)); \
801 802 mutex_exit(&connfp->connf_lock); \
802 803 } \
803 804 }
804 805
805 806 void
806 807 ipcl_hash_remove(conn_t *connp)
807 808 {
808 809 uint8_t protocol = connp->conn_proto;
809 810
810 811 IPCL_HASH_REMOVE(connp);
811 812 if (protocol == IPPROTO_RSVP)
812 813 ill_set_inputfn_all(connp->conn_netstack->netstack_ip);
813 814 }
814 815
815 816 /*
816 817 * The whole purpose of this function is allow removal of
817 818 * a conn_t from the connected hash for timewait reclaim.
818 819 * This is essentially a TW reclaim fastpath where timewait
819 820 * collector checks under fanout lock (so no one else can
820 821 * get access to the conn_t) that refcnt is 2 i.e. one for
821 822 * TCP and one for the classifier hash list. If ref count
822 823 * is indeed 2, we can just remove the conn under lock and
823 824 * avoid cleaning up the conn under squeue. This gives us
824 825 * improved performance.
825 826 */
826 827 void
827 828 ipcl_hash_remove_locked(conn_t *connp, connf_t *connfp)
828 829 {
829 830 ASSERT(MUTEX_HELD(&connfp->connf_lock));
830 831 ASSERT(MUTEX_HELD(&connp->conn_lock));
831 832 ASSERT((connp->conn_flags & IPCL_CL_LISTENER) == 0);
832 833
833 834 if ((connp)->conn_next != NULL) {
834 835 (connp)->conn_next->conn_prev = (connp)->conn_prev;
835 836 }
836 837 if ((connp)->conn_prev != NULL) {
837 838 (connp)->conn_prev->conn_next = (connp)->conn_next;
838 839 } else {
839 840 connfp->connf_head = (connp)->conn_next;
840 841 }
841 842 (connp)->conn_fanout = NULL;
842 843 (connp)->conn_next = NULL;
843 844 (connp)->conn_prev = NULL;
844 845 (connp)->conn_flags |= IPCL_REMOVED;
845 846 ASSERT((connp)->conn_ref == 2);
846 847 (connp)->conn_ref--;
847 848 }
848 849
849 850 #define IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp) { \
850 851 ASSERT((connp)->conn_fanout == NULL); \
851 852 ASSERT((connp)->conn_next == NULL); \
852 853 ASSERT((connp)->conn_prev == NULL); \
853 854 if ((connfp)->connf_head != NULL) { \
854 855 (connfp)->connf_head->conn_prev = (connp); \
855 856 (connp)->conn_next = (connfp)->connf_head; \
856 857 } \
857 858 (connp)->conn_fanout = (connfp); \
858 859 (connfp)->connf_head = (connp); \
859 860 (connp)->conn_flags = ((connp)->conn_flags & ~IPCL_REMOVED) | \
860 861 IPCL_CONNECTED; \
|
↓ open down ↓ |
828 lines elided |
↑ open up ↑ |
861 862 CONN_INC_REF(connp); \
862 863 }
863 864
864 865 #define IPCL_HASH_INSERT_CONNECTED(connfp, connp) { \
865 866 IPCL_HASH_REMOVE((connp)); \
866 867 mutex_enter(&(connfp)->connf_lock); \
867 868 IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp); \
868 869 mutex_exit(&(connfp)->connf_lock); \
869 870 }
870 871
871 -#define IPCL_HASH_INSERT_BOUND(connfp, connp) { \
872 - conn_t *pconnp = NULL, *nconnp; \
873 - IPCL_HASH_REMOVE((connp)); \
874 - mutex_enter(&(connfp)->connf_lock); \
875 - nconnp = (connfp)->connf_head; \
876 - while (nconnp != NULL && \
877 - !_IPCL_V4_MATCH_ANY(nconnp->conn_laddr_v6)) { \
878 - pconnp = nconnp; \
879 - nconnp = nconnp->conn_next; \
880 - } \
881 - if (pconnp != NULL) { \
882 - pconnp->conn_next = (connp); \
883 - (connp)->conn_prev = pconnp; \
884 - } else { \
885 - (connfp)->connf_head = (connp); \
886 - } \
887 - if (nconnp != NULL) { \
888 - (connp)->conn_next = nconnp; \
889 - nconnp->conn_prev = (connp); \
890 - } \
891 - (connp)->conn_fanout = (connfp); \
892 - (connp)->conn_flags = ((connp)->conn_flags & ~IPCL_REMOVED) | \
893 - IPCL_BOUND; \
894 - CONN_INC_REF(connp); \
895 - mutex_exit(&(connfp)->connf_lock); \
896 -}
872 +/*
873 + * When inserting bound or wildcard entries into the hash, ordering rules are
874 + * used to facilitate timely and correct lookups. The order is as follows:
875 + * 1. Entries bound to a specific address
876 + * 2. Entries bound to INADDR_ANY
877 + * 3. Entries bound to ADDR_UNSPECIFIED
878 + * Entries in a category which share conn_lport (such as those using
879 + * SO_REUSEPORT) will be ordered such that the newest inserted is first.
880 + */
897 881
898 -#define IPCL_HASH_INSERT_WILDCARD(connfp, connp) { \
899 - conn_t **list, *prev, *next; \
900 - boolean_t isv4mapped = \
901 - IN6_IS_ADDR_V4MAPPED(&(connp)->conn_laddr_v6); \
902 - IPCL_HASH_REMOVE((connp)); \
903 - mutex_enter(&(connfp)->connf_lock); \
904 - list = &(connfp)->connf_head; \
905 - prev = NULL; \
906 - while ((next = *list) != NULL) { \
907 - if (isv4mapped && \
908 - IN6_IS_ADDR_UNSPECIFIED(&next->conn_laddr_v6) && \
909 - connp->conn_zoneid == next->conn_zoneid) { \
910 - (connp)->conn_next = next; \
911 - if (prev != NULL) \
912 - prev = next->conn_prev; \
913 - next->conn_prev = (connp); \
914 - break; \
915 - } \
916 - list = &next->conn_next; \
917 - prev = next; \
918 - } \
919 - (connp)->conn_prev = prev; \
920 - *list = (connp); \
921 - (connp)->conn_fanout = (connfp); \
922 - (connp)->conn_flags = ((connp)->conn_flags & ~IPCL_REMOVED) | \
923 - IPCL_BOUND; \
924 - CONN_INC_REF((connp)); \
925 - mutex_exit(&(connfp)->connf_lock); \
882 +void
883 +ipcl_hash_insert_bound(connf_t *connfp, conn_t *connp)
884 +{
885 + conn_t *pconnp, *nconnp;
886 +
887 + IPCL_HASH_REMOVE(connp);
888 + mutex_enter(&connfp->connf_lock);
889 + nconnp = connfp->connf_head;
890 + pconnp = NULL;
891 + while (nconnp != NULL) {
892 + /*
893 + * Walk though entries associated with the fanout until one is
894 + * found which fulfills any of these conditions:
895 + * 1. Listen address of ADDR_ANY/ADDR_UNSPECIFIED
896 + * 2. Listen port the same as connp
897 + */
898 + if (_IPCL_V4_MATCH_ANY(nconnp->conn_laddr_v6) ||
899 + connp->conn_lport == nconnp->conn_lport)
900 + break;
901 + pconnp = nconnp;
902 + nconnp = nconnp->conn_next;
903 + }
904 + if (pconnp != NULL) {
905 + pconnp->conn_next = connp;
906 + connp->conn_prev = pconnp;
907 + } else {
908 + connfp->connf_head = connp;
909 + }
910 + if (nconnp != NULL) {
911 + connp->conn_next = nconnp;
912 + nconnp->conn_prev = connp;
913 + }
914 + connp->conn_fanout = connfp;
915 + connp->conn_flags = (connp->conn_flags & ~IPCL_REMOVED) | IPCL_BOUND;
916 + CONN_INC_REF(connp);
917 + mutex_exit(&connfp->connf_lock);
926 918 }
927 919
928 920 void
929 921 ipcl_hash_insert_wildcard(connf_t *connfp, conn_t *connp)
930 922 {
931 - IPCL_HASH_INSERT_WILDCARD(connfp, connp);
923 + conn_t *pconnp = NULL, *nconnp;
924 + boolean_t isv4mapped = IN6_IS_ADDR_V4MAPPED(&connp->conn_laddr_v6);
925 +
926 + IPCL_HASH_REMOVE(connp);
927 + mutex_enter(&connfp->connf_lock);
928 + nconnp = connfp->connf_head;
929 + pconnp = NULL;
930 + while (nconnp != NULL) {
931 + if (IN6_IS_ADDR_V4MAPPED_ANY(&nconnp->conn_laddr_v6) &&
932 + isv4mapped && connp->conn_lport == nconnp->conn_lport)
933 + break;
934 + if (IN6_IS_ADDR_UNSPECIFIED(&nconnp->conn_laddr_v6) &&
935 + (isv4mapped ||
936 + connp->conn_lport == nconnp->conn_lport))
937 + break;
938 +
939 + pconnp = nconnp;
940 + nconnp = nconnp->conn_next;
941 + }
942 + if (pconnp != NULL) {
943 + pconnp->conn_next = connp;
944 + connp->conn_prev = pconnp;
945 + } else {
946 + connfp->connf_head = connp;
947 + }
948 + if (nconnp != NULL) {
949 + connp->conn_next = nconnp;
950 + nconnp->conn_prev = connp;
951 + }
952 + connp->conn_fanout = connfp;
953 + connp->conn_flags = (connp->conn_flags & ~IPCL_REMOVED) | IPCL_BOUND;
954 + CONN_INC_REF(connp);
955 + mutex_exit(&connfp->connf_lock);
932 956 }
933 957
934 958 /*
935 959 * Because the classifier is used to classify inbound packets, the destination
936 960 * address is meant to be our local tunnel address (tunnel source), and the
937 961 * source the remote tunnel address (tunnel destination).
938 962 *
939 963 * Note that conn_proto can't be used for fanout since the upper protocol
940 964 * can be both 41 and 4 when IPv6 and IPv4 are over the same tunnel.
941 965 */
942 966 conn_t *
943 967 ipcl_iptun_classify_v4(ipaddr_t *src, ipaddr_t *dst, ip_stack_t *ipst)
944 968 {
945 969 connf_t *connfp;
946 970 conn_t *connp;
947 971
948 972 /* first look for IPv4 tunnel links */
949 973 connfp = &ipst->ips_ipcl_iptun_fanout[IPCL_IPTUN_HASH(*dst, *src)];
950 974 mutex_enter(&connfp->connf_lock);
951 975 for (connp = connfp->connf_head; connp != NULL;
952 976 connp = connp->conn_next) {
953 977 if (IPCL_IPTUN_MATCH(connp, *dst, *src))
954 978 break;
955 979 }
956 980 if (connp != NULL)
957 981 goto done;
958 982
959 983 mutex_exit(&connfp->connf_lock);
960 984
961 985 /* We didn't find an IPv4 tunnel, try a 6to4 tunnel */
962 986 connfp = &ipst->ips_ipcl_iptun_fanout[IPCL_IPTUN_HASH(*dst,
963 987 INADDR_ANY)];
964 988 mutex_enter(&connfp->connf_lock);
965 989 for (connp = connfp->connf_head; connp != NULL;
966 990 connp = connp->conn_next) {
967 991 if (IPCL_IPTUN_MATCH(connp, *dst, INADDR_ANY))
968 992 break;
969 993 }
970 994 done:
971 995 if (connp != NULL)
972 996 CONN_INC_REF(connp);
973 997 mutex_exit(&connfp->connf_lock);
974 998 return (connp);
975 999 }
976 1000
977 1001 conn_t *
978 1002 ipcl_iptun_classify_v6(in6_addr_t *src, in6_addr_t *dst, ip_stack_t *ipst)
979 1003 {
980 1004 connf_t *connfp;
981 1005 conn_t *connp;
982 1006
983 1007 /* Look for an IPv6 tunnel link */
984 1008 connfp = &ipst->ips_ipcl_iptun_fanout[IPCL_IPTUN_HASH_V6(dst, src)];
985 1009 mutex_enter(&connfp->connf_lock);
986 1010 for (connp = connfp->connf_head; connp != NULL;
987 1011 connp = connp->conn_next) {
988 1012 if (IPCL_IPTUN_MATCH_V6(connp, dst, src)) {
989 1013 CONN_INC_REF(connp);
990 1014 break;
991 1015 }
992 1016 }
993 1017 mutex_exit(&connfp->connf_lock);
994 1018 return (connp);
995 1019 }
996 1020
997 1021 /*
998 1022 * This function is used only for inserting SCTP raw socket now.
999 1023 * This may change later.
1000 1024 *
1001 1025 * Note that only one raw socket can be bound to a port. The param
1002 1026 * lport is in network byte order.
1003 1027 */
1004 1028 static int
1005 1029 ipcl_sctp_hash_insert(conn_t *connp, in_port_t lport)
1006 1030 {
1007 1031 connf_t *connfp;
1008 1032 conn_t *oconnp;
1009 1033 ip_stack_t *ipst = connp->conn_netstack->netstack_ip;
1010 1034
1011 1035 connfp = &ipst->ips_ipcl_raw_fanout[IPCL_RAW_HASH(ntohs(lport), ipst)];
1012 1036
1013 1037 /* Check for existing raw socket already bound to the port. */
1014 1038 mutex_enter(&connfp->connf_lock);
1015 1039 for (oconnp = connfp->connf_head; oconnp != NULL;
1016 1040 oconnp = oconnp->conn_next) {
1017 1041 if (oconnp->conn_lport == lport &&
1018 1042 oconnp->conn_zoneid == connp->conn_zoneid &&
1019 1043 oconnp->conn_family == connp->conn_family &&
1020 1044 ((IN6_IS_ADDR_UNSPECIFIED(&connp->conn_laddr_v6) ||
1021 1045 IN6_IS_ADDR_UNSPECIFIED(&oconnp->conn_laddr_v6) ||
1022 1046 IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_laddr_v6) ||
1023 1047 IN6_IS_ADDR_V4MAPPED_ANY(&oconnp->conn_laddr_v6)) ||
1024 1048 IN6_ARE_ADDR_EQUAL(&oconnp->conn_laddr_v6,
1025 1049 &connp->conn_laddr_v6))) {
1026 1050 break;
|
↓ open down ↓ |
85 lines elided |
↑ open up ↑ |
1027 1051 }
1028 1052 }
1029 1053 mutex_exit(&connfp->connf_lock);
1030 1054 if (oconnp != NULL)
1031 1055 return (EADDRNOTAVAIL);
1032 1056
1033 1057 if (IN6_IS_ADDR_UNSPECIFIED(&connp->conn_faddr_v6) ||
1034 1058 IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_faddr_v6)) {
1035 1059 if (IN6_IS_ADDR_UNSPECIFIED(&connp->conn_laddr_v6) ||
1036 1060 IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_laddr_v6)) {
1037 - IPCL_HASH_INSERT_WILDCARD(connfp, connp);
1061 + ipcl_hash_insert_wildcard(connfp, connp);
1038 1062 } else {
1039 - IPCL_HASH_INSERT_BOUND(connfp, connp);
1063 + ipcl_hash_insert_bound(connfp, connp);
1040 1064 }
1041 1065 } else {
1042 1066 IPCL_HASH_INSERT_CONNECTED(connfp, connp);
1043 1067 }
1044 1068 return (0);
1045 1069 }
1046 1070
1047 1071 static int
1048 1072 ipcl_iptun_hash_insert(conn_t *connp, ip_stack_t *ipst)
1049 1073 {
1050 1074 connf_t *connfp;
1051 1075 conn_t *tconnp;
1052 1076 ipaddr_t laddr = connp->conn_laddr_v4;
1053 1077 ipaddr_t faddr = connp->conn_faddr_v4;
1054 1078
1055 1079 connfp = &ipst->ips_ipcl_iptun_fanout[IPCL_IPTUN_HASH(laddr, faddr)];
1056 1080 mutex_enter(&connfp->connf_lock);
1057 1081 for (tconnp = connfp->connf_head; tconnp != NULL;
1058 1082 tconnp = tconnp->conn_next) {
1059 1083 if (IPCL_IPTUN_MATCH(tconnp, laddr, faddr)) {
1060 1084 /* A tunnel is already bound to these addresses. */
1061 1085 mutex_exit(&connfp->connf_lock);
1062 1086 return (EADDRINUSE);
1063 1087 }
1064 1088 }
1065 1089 IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp);
1066 1090 mutex_exit(&connfp->connf_lock);
1067 1091 return (0);
1068 1092 }
1069 1093
1070 1094 static int
1071 1095 ipcl_iptun_hash_insert_v6(conn_t *connp, ip_stack_t *ipst)
1072 1096 {
1073 1097 connf_t *connfp;
1074 1098 conn_t *tconnp;
1075 1099 in6_addr_t *laddr = &connp->conn_laddr_v6;
1076 1100 in6_addr_t *faddr = &connp->conn_faddr_v6;
1077 1101
1078 1102 connfp = &ipst->ips_ipcl_iptun_fanout[IPCL_IPTUN_HASH_V6(laddr, faddr)];
1079 1103 mutex_enter(&connfp->connf_lock);
1080 1104 for (tconnp = connfp->connf_head; tconnp != NULL;
1081 1105 tconnp = tconnp->conn_next) {
1082 1106 if (IPCL_IPTUN_MATCH_V6(tconnp, laddr, faddr)) {
1083 1107 /* A tunnel is already bound to these addresses. */
1084 1108 mutex_exit(&connfp->connf_lock);
1085 1109 return (EADDRINUSE);
1086 1110 }
1087 1111 }
1088 1112 IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp);
1089 1113 mutex_exit(&connfp->connf_lock);
1090 1114 return (0);
1091 1115 }
1092 1116
1093 1117 /*
1094 1118 * Check for a MAC exemption conflict on a labeled system. Note that for
1095 1119 * protocols that use port numbers (UDP, TCP, SCTP), we do this check up in the
1096 1120 * transport layer. This check is for binding all other protocols.
1097 1121 *
1098 1122 * Returns true if there's a conflict.
1099 1123 */
1100 1124 static boolean_t
1101 1125 check_exempt_conflict_v4(conn_t *connp, ip_stack_t *ipst)
1102 1126 {
1103 1127 connf_t *connfp;
1104 1128 conn_t *tconn;
1105 1129
1106 1130 connfp = &ipst->ips_ipcl_proto_fanout_v4[connp->conn_proto];
1107 1131 mutex_enter(&connfp->connf_lock);
1108 1132 for (tconn = connfp->connf_head; tconn != NULL;
1109 1133 tconn = tconn->conn_next) {
1110 1134 /* We don't allow v4 fallback for v6 raw socket */
1111 1135 if (connp->conn_family != tconn->conn_family)
1112 1136 continue;
1113 1137 /* If neither is exempt, then there's no conflict */
1114 1138 if ((connp->conn_mac_mode == CONN_MAC_DEFAULT) &&
1115 1139 (tconn->conn_mac_mode == CONN_MAC_DEFAULT))
1116 1140 continue;
1117 1141 /* We are only concerned about sockets for a different zone */
1118 1142 if (connp->conn_zoneid == tconn->conn_zoneid)
1119 1143 continue;
1120 1144 /* If both are bound to different specific addrs, ok */
1121 1145 if (connp->conn_laddr_v4 != INADDR_ANY &&
1122 1146 tconn->conn_laddr_v4 != INADDR_ANY &&
1123 1147 connp->conn_laddr_v4 != tconn->conn_laddr_v4)
1124 1148 continue;
1125 1149 /* These two conflict; fail */
1126 1150 break;
1127 1151 }
1128 1152 mutex_exit(&connfp->connf_lock);
1129 1153 return (tconn != NULL);
1130 1154 }
1131 1155
1132 1156 static boolean_t
1133 1157 check_exempt_conflict_v6(conn_t *connp, ip_stack_t *ipst)
1134 1158 {
1135 1159 connf_t *connfp;
1136 1160 conn_t *tconn;
1137 1161
1138 1162 connfp = &ipst->ips_ipcl_proto_fanout_v6[connp->conn_proto];
1139 1163 mutex_enter(&connfp->connf_lock);
1140 1164 for (tconn = connfp->connf_head; tconn != NULL;
1141 1165 tconn = tconn->conn_next) {
1142 1166 /* We don't allow v4 fallback for v6 raw socket */
1143 1167 if (connp->conn_family != tconn->conn_family)
1144 1168 continue;
1145 1169 /* If neither is exempt, then there's no conflict */
1146 1170 if ((connp->conn_mac_mode == CONN_MAC_DEFAULT) &&
1147 1171 (tconn->conn_mac_mode == CONN_MAC_DEFAULT))
1148 1172 continue;
1149 1173 /* We are only concerned about sockets for a different zone */
1150 1174 if (connp->conn_zoneid == tconn->conn_zoneid)
1151 1175 continue;
1152 1176 /* If both are bound to different addrs, ok */
1153 1177 if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_laddr_v6) &&
1154 1178 !IN6_IS_ADDR_UNSPECIFIED(&tconn->conn_laddr_v6) &&
1155 1179 !IN6_ARE_ADDR_EQUAL(&connp->conn_laddr_v6,
1156 1180 &tconn->conn_laddr_v6))
1157 1181 continue;
1158 1182 /* These two conflict; fail */
1159 1183 break;
1160 1184 }
1161 1185 mutex_exit(&connfp->connf_lock);
1162 1186 return (tconn != NULL);
1163 1187 }
1164 1188
1165 1189 /*
1166 1190 * (v4, v6) bind hash insertion routines
1167 1191 * The caller has already setup the conn (conn_proto, conn_laddr_v6, conn_lport)
1168 1192 */
1169 1193
1170 1194 int
1171 1195 ipcl_bind_insert(conn_t *connp)
1172 1196 {
1173 1197 if (connp->conn_ipversion == IPV6_VERSION)
1174 1198 return (ipcl_bind_insert_v6(connp));
1175 1199 else
1176 1200 return (ipcl_bind_insert_v4(connp));
1177 1201 }
1178 1202
1179 1203 int
1180 1204 ipcl_bind_insert_v4(conn_t *connp)
1181 1205 {
1182 1206 connf_t *connfp;
1183 1207 int ret = 0;
1184 1208 ip_stack_t *ipst = connp->conn_netstack->netstack_ip;
1185 1209 uint16_t lport = connp->conn_lport;
1186 1210 uint8_t protocol = connp->conn_proto;
1187 1211
1188 1212 if (IPCL_IS_IPTUN(connp))
1189 1213 return (ipcl_iptun_hash_insert(connp, ipst));
1190 1214
1191 1215 switch (protocol) {
1192 1216 default:
1193 1217 if (is_system_labeled() &&
1194 1218 check_exempt_conflict_v4(connp, ipst))
1195 1219 return (EADDRINUSE);
1196 1220 /* FALLTHROUGH */
1197 1221 case IPPROTO_UDP:
|
↓ open down ↓ |
148 lines elided |
↑ open up ↑ |
1198 1222 if (protocol == IPPROTO_UDP) {
1199 1223 connfp = &ipst->ips_ipcl_udp_fanout[
1200 1224 IPCL_UDP_HASH(lport, ipst)];
1201 1225 } else {
1202 1226 connfp = &ipst->ips_ipcl_proto_fanout_v4[protocol];
1203 1227 }
1204 1228
1205 1229 if (connp->conn_faddr_v4 != INADDR_ANY) {
1206 1230 IPCL_HASH_INSERT_CONNECTED(connfp, connp);
1207 1231 } else if (connp->conn_laddr_v4 != INADDR_ANY) {
1208 - IPCL_HASH_INSERT_BOUND(connfp, connp);
1232 + ipcl_hash_insert_bound(connfp, connp);
1209 1233 } else {
1210 - IPCL_HASH_INSERT_WILDCARD(connfp, connp);
1234 + ipcl_hash_insert_wildcard(connfp, connp);
1211 1235 }
1212 1236 if (protocol == IPPROTO_RSVP)
1213 1237 ill_set_inputfn_all(ipst);
1214 1238 break;
1215 1239
1216 1240 case IPPROTO_TCP:
1217 1241 /* Insert it in the Bind Hash */
1218 1242 ASSERT(connp->conn_zoneid != ALL_ZONES);
1219 1243 connfp = &ipst->ips_ipcl_bind_fanout[
1220 1244 IPCL_BIND_HASH(lport, ipst)];
1221 1245 if (connp->conn_laddr_v4 != INADDR_ANY) {
1222 - IPCL_HASH_INSERT_BOUND(connfp, connp);
1246 + ipcl_hash_insert_bound(connfp, connp);
1223 1247 } else {
1224 - IPCL_HASH_INSERT_WILDCARD(connfp, connp);
1248 + ipcl_hash_insert_wildcard(connfp, connp);
1225 1249 }
1226 1250 if (cl_inet_listen != NULL) {
1227 1251 ASSERT(connp->conn_ipversion == IPV4_VERSION);
1228 1252 connp->conn_flags |= IPCL_CL_LISTENER;
1229 1253 (*cl_inet_listen)(
1230 1254 connp->conn_netstack->netstack_stackid,
1231 1255 IPPROTO_TCP, AF_INET,
1232 1256 (uint8_t *)&connp->conn_bound_addr_v4, lport, NULL);
1233 1257 }
1234 1258 break;
1235 1259
1236 1260 case IPPROTO_SCTP:
1237 1261 ret = ipcl_sctp_hash_insert(connp, lport);
1238 1262 break;
1239 1263 }
1240 1264
1241 1265 return (ret);
1242 1266 }
1243 1267
1244 1268 int
1245 1269 ipcl_bind_insert_v6(conn_t *connp)
1246 1270 {
1247 1271 connf_t *connfp;
1248 1272 int ret = 0;
1249 1273 ip_stack_t *ipst = connp->conn_netstack->netstack_ip;
1250 1274 uint16_t lport = connp->conn_lport;
1251 1275 uint8_t protocol = connp->conn_proto;
1252 1276
1253 1277 if (IPCL_IS_IPTUN(connp)) {
1254 1278 return (ipcl_iptun_hash_insert_v6(connp, ipst));
1255 1279 }
1256 1280
1257 1281 switch (protocol) {
1258 1282 default:
1259 1283 if (is_system_labeled() &&
1260 1284 check_exempt_conflict_v6(connp, ipst))
1261 1285 return (EADDRINUSE);
1262 1286 /* FALLTHROUGH */
1263 1287 case IPPROTO_UDP:
|
↓ open down ↓ |
29 lines elided |
↑ open up ↑ |
1264 1288 if (protocol == IPPROTO_UDP) {
1265 1289 connfp = &ipst->ips_ipcl_udp_fanout[
1266 1290 IPCL_UDP_HASH(lport, ipst)];
1267 1291 } else {
1268 1292 connfp = &ipst->ips_ipcl_proto_fanout_v6[protocol];
1269 1293 }
1270 1294
1271 1295 if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_faddr_v6)) {
1272 1296 IPCL_HASH_INSERT_CONNECTED(connfp, connp);
1273 1297 } else if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_laddr_v6)) {
1274 - IPCL_HASH_INSERT_BOUND(connfp, connp);
1298 + ipcl_hash_insert_bound(connfp, connp);
1275 1299 } else {
1276 - IPCL_HASH_INSERT_WILDCARD(connfp, connp);
1300 + ipcl_hash_insert_wildcard(connfp, connp);
1277 1301 }
1278 1302 break;
1279 1303
1280 1304 case IPPROTO_TCP:
1281 1305 /* Insert it in the Bind Hash */
1282 1306 ASSERT(connp->conn_zoneid != ALL_ZONES);
1283 1307 connfp = &ipst->ips_ipcl_bind_fanout[
1284 1308 IPCL_BIND_HASH(lport, ipst)];
1285 1309 if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_laddr_v6)) {
1286 - IPCL_HASH_INSERT_BOUND(connfp, connp);
1310 + ipcl_hash_insert_bound(connfp, connp);
1287 1311 } else {
1288 - IPCL_HASH_INSERT_WILDCARD(connfp, connp);
1312 + ipcl_hash_insert_wildcard(connfp, connp);
1289 1313 }
1290 1314 if (cl_inet_listen != NULL) {
1291 1315 sa_family_t addr_family;
1292 1316 uint8_t *laddrp;
1293 1317
1294 1318 if (connp->conn_ipversion == IPV6_VERSION) {
1295 1319 addr_family = AF_INET6;
1296 1320 laddrp =
1297 1321 (uint8_t *)&connp->conn_bound_addr_v6;
1298 1322 } else {
1299 1323 addr_family = AF_INET;
1300 1324 laddrp = (uint8_t *)&connp->conn_bound_addr_v4;
1301 1325 }
1302 1326 connp->conn_flags |= IPCL_CL_LISTENER;
1303 1327 (*cl_inet_listen)(
1304 1328 connp->conn_netstack->netstack_stackid,
1305 1329 IPPROTO_TCP, addr_family, laddrp, lport, NULL);
1306 1330 }
1307 1331 break;
1308 1332
1309 1333 case IPPROTO_SCTP:
1310 1334 ret = ipcl_sctp_hash_insert(connp, lport);
1311 1335 break;
1312 1336 }
1313 1337
1314 1338 return (ret);
1315 1339 }
1316 1340
1317 1341 /*
1318 1342 * ipcl_conn_hash insertion routines.
1319 1343 * The caller has already set conn_proto and the addresses/ports in the conn_t.
1320 1344 */
1321 1345
1322 1346 int
1323 1347 ipcl_conn_insert(conn_t *connp)
1324 1348 {
1325 1349 if (connp->conn_ipversion == IPV6_VERSION)
1326 1350 return (ipcl_conn_insert_v6(connp));
1327 1351 else
1328 1352 return (ipcl_conn_insert_v4(connp));
1329 1353 }
1330 1354
1331 1355 int
1332 1356 ipcl_conn_insert_v4(conn_t *connp)
1333 1357 {
1334 1358 connf_t *connfp;
1335 1359 conn_t *tconnp;
1336 1360 int ret = 0;
1337 1361 ip_stack_t *ipst = connp->conn_netstack->netstack_ip;
1338 1362 uint16_t lport = connp->conn_lport;
1339 1363 uint8_t protocol = connp->conn_proto;
1340 1364
1341 1365 if (IPCL_IS_IPTUN(connp))
1342 1366 return (ipcl_iptun_hash_insert(connp, ipst));
1343 1367
1344 1368 switch (protocol) {
1345 1369 case IPPROTO_TCP:
1346 1370 /*
1347 1371 * For TCP, we check whether the connection tuple already
1348 1372 * exists before allowing the connection to proceed. We
1349 1373 * also allow indexing on the zoneid. This is to allow
1350 1374 * multiple shared stack zones to have the same tcp
1351 1375 * connection tuple. In practice this only happens for
1352 1376 * INADDR_LOOPBACK as it's the only local address which
1353 1377 * doesn't have to be unique.
1354 1378 */
1355 1379 connfp = &ipst->ips_ipcl_conn_fanout[
1356 1380 IPCL_CONN_HASH(connp->conn_faddr_v4,
1357 1381 connp->conn_ports, ipst)];
1358 1382 mutex_enter(&connfp->connf_lock);
1359 1383 for (tconnp = connfp->connf_head; tconnp != NULL;
1360 1384 tconnp = tconnp->conn_next) {
1361 1385 if (IPCL_CONN_MATCH(tconnp, connp->conn_proto,
1362 1386 connp->conn_faddr_v4, connp->conn_laddr_v4,
1363 1387 connp->conn_ports) &&
1364 1388 IPCL_ZONE_MATCH(tconnp, connp->conn_zoneid)) {
1365 1389 /* Already have a conn. bail out */
1366 1390 mutex_exit(&connfp->connf_lock);
1367 1391 return (EADDRINUSE);
1368 1392 }
1369 1393 }
1370 1394 if (connp->conn_fanout != NULL) {
1371 1395 /*
1372 1396 * Probably a XTI/TLI application trying to do a
1373 1397 * rebind. Let it happen.
1374 1398 */
1375 1399 mutex_exit(&connfp->connf_lock);
1376 1400 IPCL_HASH_REMOVE(connp);
1377 1401 mutex_enter(&connfp->connf_lock);
1378 1402 }
1379 1403
1380 1404 ASSERT(connp->conn_recv != NULL);
1381 1405 ASSERT(connp->conn_recvicmp != NULL);
1382 1406
1383 1407 IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp);
1384 1408 mutex_exit(&connfp->connf_lock);
1385 1409 break;
1386 1410
1387 1411 case IPPROTO_SCTP:
1388 1412 /*
1389 1413 * The raw socket may have already been bound, remove it
1390 1414 * from the hash first.
1391 1415 */
1392 1416 IPCL_HASH_REMOVE(connp);
1393 1417 ret = ipcl_sctp_hash_insert(connp, lport);
1394 1418 break;
1395 1419
1396 1420 default:
1397 1421 /*
1398 1422 * Check for conflicts among MAC exempt bindings. For
1399 1423 * transports with port numbers, this is done by the upper
1400 1424 * level per-transport binding logic. For all others, it's
1401 1425 * done here.
1402 1426 */
1403 1427 if (is_system_labeled() &&
1404 1428 check_exempt_conflict_v4(connp, ipst))
1405 1429 return (EADDRINUSE);
1406 1430 /* FALLTHROUGH */
1407 1431
1408 1432 case IPPROTO_UDP:
|
↓ open down ↓ |
110 lines elided |
↑ open up ↑ |
1409 1433 if (protocol == IPPROTO_UDP) {
1410 1434 connfp = &ipst->ips_ipcl_udp_fanout[
1411 1435 IPCL_UDP_HASH(lport, ipst)];
1412 1436 } else {
1413 1437 connfp = &ipst->ips_ipcl_proto_fanout_v4[protocol];
1414 1438 }
1415 1439
1416 1440 if (connp->conn_faddr_v4 != INADDR_ANY) {
1417 1441 IPCL_HASH_INSERT_CONNECTED(connfp, connp);
1418 1442 } else if (connp->conn_laddr_v4 != INADDR_ANY) {
1419 - IPCL_HASH_INSERT_BOUND(connfp, connp);
1443 + ipcl_hash_insert_bound(connfp, connp);
1420 1444 } else {
1421 - IPCL_HASH_INSERT_WILDCARD(connfp, connp);
1445 + ipcl_hash_insert_wildcard(connfp, connp);
1422 1446 }
1423 1447 break;
1424 1448 }
1425 1449
1426 1450 return (ret);
1427 1451 }
1428 1452
1429 1453 int
1430 1454 ipcl_conn_insert_v6(conn_t *connp)
1431 1455 {
1432 1456 connf_t *connfp;
1433 1457 conn_t *tconnp;
1434 1458 int ret = 0;
1435 1459 ip_stack_t *ipst = connp->conn_netstack->netstack_ip;
1436 1460 uint16_t lport = connp->conn_lport;
1437 1461 uint8_t protocol = connp->conn_proto;
1438 1462 uint_t ifindex = connp->conn_bound_if;
1439 1463
1440 1464 if (IPCL_IS_IPTUN(connp))
1441 1465 return (ipcl_iptun_hash_insert_v6(connp, ipst));
1442 1466
1443 1467 switch (protocol) {
1444 1468 case IPPROTO_TCP:
1445 1469
1446 1470 /*
1447 1471 * For tcp, we check whether the connection tuple already
1448 1472 * exists before allowing the connection to proceed. We
1449 1473 * also allow indexing on the zoneid. This is to allow
1450 1474 * multiple shared stack zones to have the same tcp
1451 1475 * connection tuple. In practice this only happens for
1452 1476 * ipv6_loopback as it's the only local address which
1453 1477 * doesn't have to be unique.
1454 1478 */
1455 1479 connfp = &ipst->ips_ipcl_conn_fanout[
1456 1480 IPCL_CONN_HASH_V6(connp->conn_faddr_v6, connp->conn_ports,
1457 1481 ipst)];
1458 1482 mutex_enter(&connfp->connf_lock);
1459 1483 for (tconnp = connfp->connf_head; tconnp != NULL;
1460 1484 tconnp = tconnp->conn_next) {
1461 1485 /* NOTE: need to match zoneid. Bug in onnv-gate */
1462 1486 if (IPCL_CONN_MATCH_V6(tconnp, connp->conn_proto,
1463 1487 connp->conn_faddr_v6, connp->conn_laddr_v6,
1464 1488 connp->conn_ports) &&
1465 1489 (tconnp->conn_bound_if == 0 ||
1466 1490 tconnp->conn_bound_if == ifindex) &&
1467 1491 IPCL_ZONE_MATCH(tconnp, connp->conn_zoneid)) {
1468 1492 /* Already have a conn. bail out */
1469 1493 mutex_exit(&connfp->connf_lock);
1470 1494 return (EADDRINUSE);
1471 1495 }
1472 1496 }
1473 1497 if (connp->conn_fanout != NULL) {
1474 1498 /*
1475 1499 * Probably a XTI/TLI application trying to do a
1476 1500 * rebind. Let it happen.
1477 1501 */
1478 1502 mutex_exit(&connfp->connf_lock);
1479 1503 IPCL_HASH_REMOVE(connp);
1480 1504 mutex_enter(&connfp->connf_lock);
1481 1505 }
1482 1506 IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp);
1483 1507 mutex_exit(&connfp->connf_lock);
1484 1508 break;
1485 1509
1486 1510 case IPPROTO_SCTP:
1487 1511 IPCL_HASH_REMOVE(connp);
1488 1512 ret = ipcl_sctp_hash_insert(connp, lport);
1489 1513 break;
1490 1514
1491 1515 default:
1492 1516 if (is_system_labeled() &&
1493 1517 check_exempt_conflict_v6(connp, ipst))
1494 1518 return (EADDRINUSE);
1495 1519 /* FALLTHROUGH */
1496 1520 case IPPROTO_UDP:
|
↓ open down ↓ |
65 lines elided |
↑ open up ↑ |
1497 1521 if (protocol == IPPROTO_UDP) {
1498 1522 connfp = &ipst->ips_ipcl_udp_fanout[
1499 1523 IPCL_UDP_HASH(lport, ipst)];
1500 1524 } else {
1501 1525 connfp = &ipst->ips_ipcl_proto_fanout_v6[protocol];
1502 1526 }
1503 1527
1504 1528 if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_faddr_v6)) {
1505 1529 IPCL_HASH_INSERT_CONNECTED(connfp, connp);
1506 1530 } else if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_laddr_v6)) {
1507 - IPCL_HASH_INSERT_BOUND(connfp, connp);
1531 + ipcl_hash_insert_bound(connfp, connp);
1508 1532 } else {
1509 - IPCL_HASH_INSERT_WILDCARD(connfp, connp);
1533 + ipcl_hash_insert_wildcard(connfp, connp);
1510 1534 }
1511 1535 break;
1512 1536 }
1513 1537
1514 1538 return (ret);
1515 1539 }
1516 1540
1517 1541 /*
1518 1542 * v4 packet classifying function. looks up the fanout table to
1519 1543 * find the conn, the packet belongs to. returns the conn with
1520 1544 * the reference held, null otherwise.
1521 1545 *
1522 1546 * If zoneid is ALL_ZONES, then the search rules described in the "Connection
1523 1547 * Lookup" comment block are applied. Labels are also checked as described
1524 1548 * above. If the packet is from the inside (looped back), and is from the same
1525 1549 * zone, then label checks are omitted.
1526 1550 */
1527 1551 conn_t *
1528 1552 ipcl_classify_v4(mblk_t *mp, uint8_t protocol, uint_t hdr_len,
1529 1553 ip_recv_attr_t *ira, ip_stack_t *ipst)
1530 1554 {
1531 1555 ipha_t *ipha;
1532 1556 connf_t *connfp, *bind_connfp;
1533 1557 uint16_t lport;
1534 1558 uint16_t fport;
1535 1559 uint32_t ports;
1536 1560 conn_t *connp;
1537 1561 uint16_t *up;
1538 1562 zoneid_t zoneid = ira->ira_zoneid;
1539 1563
1540 1564 ipha = (ipha_t *)mp->b_rptr;
1541 1565 up = (uint16_t *)((uchar_t *)ipha + hdr_len + TCP_PORTS_OFFSET);
1542 1566
1543 1567 switch (protocol) {
1544 1568 case IPPROTO_TCP:
1545 1569 ports = *(uint32_t *)up;
1546 1570 connfp =
1547 1571 &ipst->ips_ipcl_conn_fanout[IPCL_CONN_HASH(ipha->ipha_src,
1548 1572 ports, ipst)];
1549 1573 mutex_enter(&connfp->connf_lock);
1550 1574 for (connp = connfp->connf_head; connp != NULL;
1551 1575 connp = connp->conn_next) {
1552 1576 if (IPCL_CONN_MATCH(connp, protocol,
1553 1577 ipha->ipha_src, ipha->ipha_dst, ports) &&
1554 1578 (connp->conn_zoneid == zoneid ||
1555 1579 connp->conn_allzones ||
1556 1580 ((connp->conn_mac_mode != CONN_MAC_DEFAULT) &&
1557 1581 (ira->ira_flags & IRAF_TX_MAC_EXEMPTABLE) &&
1558 1582 (ira->ira_flags & IRAF_TX_SHARED_ADDR))))
1559 1583 break;
1560 1584 }
1561 1585
1562 1586 if (connp != NULL) {
1563 1587 /*
1564 1588 * We have a fully-bound TCP connection.
1565 1589 *
1566 1590 * For labeled systems, there's no need to check the
1567 1591 * label here. It's known to be good as we checked
1568 1592 * before allowing the connection to become bound.
1569 1593 */
1570 1594 CONN_INC_REF(connp);
1571 1595 mutex_exit(&connfp->connf_lock);
1572 1596 return (connp);
1573 1597 }
1574 1598
1575 1599 mutex_exit(&connfp->connf_lock);
1576 1600 lport = up[1];
1577 1601 bind_connfp =
1578 1602 &ipst->ips_ipcl_bind_fanout[IPCL_BIND_HASH(lport, ipst)];
1579 1603 mutex_enter(&bind_connfp->connf_lock);
1580 1604 for (connp = bind_connfp->connf_head; connp != NULL;
1581 1605 connp = connp->conn_next) {
1582 1606 if (IPCL_BIND_MATCH(connp, protocol, ipha->ipha_dst,
1583 1607 lport) &&
1584 1608 (connp->conn_zoneid == zoneid ||
1585 1609 connp->conn_allzones ||
1586 1610 ((connp->conn_mac_mode != CONN_MAC_DEFAULT) &&
1587 1611 (ira->ira_flags & IRAF_TX_MAC_EXEMPTABLE) &&
1588 1612 (ira->ira_flags & IRAF_TX_SHARED_ADDR))))
1589 1613 break;
1590 1614 }
1591 1615
1592 1616 /*
1593 1617 * If the matching connection is SLP on a private address, then
1594 1618 * the label on the packet must match the local zone's label.
1595 1619 * Otherwise, it must be in the label range defined by tnrh.
1596 1620 * This is ensured by tsol_receive_local.
1597 1621 *
1598 1622 * Note that we don't check tsol_receive_local for
1599 1623 * the connected case.
1600 1624 */
1601 1625 if (connp != NULL && (ira->ira_flags & IRAF_SYSTEM_LABELED) &&
1602 1626 !tsol_receive_local(mp, &ipha->ipha_dst, IPV4_VERSION,
1603 1627 ira, connp)) {
1604 1628 DTRACE_PROBE3(tx__ip__log__info__classify__tcp,
1605 1629 char *, "connp(1) could not receive mp(2)",
1606 1630 conn_t *, connp, mblk_t *, mp);
1607 1631 connp = NULL;
1608 1632 }
1609 1633
1610 1634 if (connp != NULL) {
1611 1635 /* Have a listener at least */
1612 1636 CONN_INC_REF(connp);
1613 1637 mutex_exit(&bind_connfp->connf_lock);
1614 1638 return (connp);
1615 1639 }
1616 1640
1617 1641 mutex_exit(&bind_connfp->connf_lock);
1618 1642 break;
1619 1643
1620 1644 case IPPROTO_UDP:
1621 1645 lport = up[1];
1622 1646 fport = up[0];
1623 1647 connfp = &ipst->ips_ipcl_udp_fanout[IPCL_UDP_HASH(lport, ipst)];
1624 1648 mutex_enter(&connfp->connf_lock);
1625 1649 for (connp = connfp->connf_head; connp != NULL;
1626 1650 connp = connp->conn_next) {
1627 1651 if (IPCL_UDP_MATCH(connp, lport, ipha->ipha_dst,
1628 1652 fport, ipha->ipha_src) &&
1629 1653 (connp->conn_zoneid == zoneid ||
1630 1654 connp->conn_allzones ||
1631 1655 ((connp->conn_mac_mode != CONN_MAC_DEFAULT) &&
1632 1656 (ira->ira_flags & IRAF_TX_MAC_EXEMPTABLE))))
1633 1657 break;
1634 1658 }
1635 1659
1636 1660 if (connp != NULL && (ira->ira_flags & IRAF_SYSTEM_LABELED) &&
1637 1661 !tsol_receive_local(mp, &ipha->ipha_dst, IPV4_VERSION,
1638 1662 ira, connp)) {
1639 1663 DTRACE_PROBE3(tx__ip__log__info__classify__udp,
1640 1664 char *, "connp(1) could not receive mp(2)",
1641 1665 conn_t *, connp, mblk_t *, mp);
1642 1666 connp = NULL;
1643 1667 }
1644 1668
1645 1669 if (connp != NULL) {
1646 1670 CONN_INC_REF(connp);
1647 1671 mutex_exit(&connfp->connf_lock);
1648 1672 return (connp);
1649 1673 }
1650 1674
1651 1675 /*
1652 1676 * We shouldn't come here for multicast/broadcast packets
1653 1677 */
1654 1678 mutex_exit(&connfp->connf_lock);
1655 1679
1656 1680 break;
1657 1681
1658 1682 case IPPROTO_ENCAP:
1659 1683 case IPPROTO_IPV6:
1660 1684 return (ipcl_iptun_classify_v4(&ipha->ipha_src,
1661 1685 &ipha->ipha_dst, ipst));
1662 1686 }
1663 1687
1664 1688 return (NULL);
1665 1689 }
1666 1690
1667 1691 conn_t *
1668 1692 ipcl_classify_v6(mblk_t *mp, uint8_t protocol, uint_t hdr_len,
1669 1693 ip_recv_attr_t *ira, ip_stack_t *ipst)
1670 1694 {
1671 1695 ip6_t *ip6h;
1672 1696 connf_t *connfp, *bind_connfp;
1673 1697 uint16_t lport;
1674 1698 uint16_t fport;
1675 1699 tcpha_t *tcpha;
1676 1700 uint32_t ports;
1677 1701 conn_t *connp;
1678 1702 uint16_t *up;
1679 1703 zoneid_t zoneid = ira->ira_zoneid;
1680 1704
1681 1705 ip6h = (ip6_t *)mp->b_rptr;
1682 1706
1683 1707 switch (protocol) {
1684 1708 case IPPROTO_TCP:
1685 1709 tcpha = (tcpha_t *)&mp->b_rptr[hdr_len];
1686 1710 up = &tcpha->tha_lport;
1687 1711 ports = *(uint32_t *)up;
1688 1712
1689 1713 connfp =
1690 1714 &ipst->ips_ipcl_conn_fanout[IPCL_CONN_HASH_V6(ip6h->ip6_src,
1691 1715 ports, ipst)];
1692 1716 mutex_enter(&connfp->connf_lock);
1693 1717 for (connp = connfp->connf_head; connp != NULL;
1694 1718 connp = connp->conn_next) {
1695 1719 if (IPCL_CONN_MATCH_V6(connp, protocol,
1696 1720 ip6h->ip6_src, ip6h->ip6_dst, ports) &&
1697 1721 (connp->conn_zoneid == zoneid ||
1698 1722 connp->conn_allzones ||
1699 1723 ((connp->conn_mac_mode != CONN_MAC_DEFAULT) &&
1700 1724 (ira->ira_flags & IRAF_TX_MAC_EXEMPTABLE) &&
1701 1725 (ira->ira_flags & IRAF_TX_SHARED_ADDR))))
1702 1726 break;
1703 1727 }
1704 1728
1705 1729 if (connp != NULL) {
1706 1730 /*
1707 1731 * We have a fully-bound TCP connection.
1708 1732 *
1709 1733 * For labeled systems, there's no need to check the
1710 1734 * label here. It's known to be good as we checked
1711 1735 * before allowing the connection to become bound.
1712 1736 */
1713 1737 CONN_INC_REF(connp);
1714 1738 mutex_exit(&connfp->connf_lock);
1715 1739 return (connp);
1716 1740 }
1717 1741
1718 1742 mutex_exit(&connfp->connf_lock);
1719 1743
1720 1744 lport = up[1];
1721 1745 bind_connfp =
1722 1746 &ipst->ips_ipcl_bind_fanout[IPCL_BIND_HASH(lport, ipst)];
1723 1747 mutex_enter(&bind_connfp->connf_lock);
1724 1748 for (connp = bind_connfp->connf_head; connp != NULL;
1725 1749 connp = connp->conn_next) {
1726 1750 if (IPCL_BIND_MATCH_V6(connp, protocol,
1727 1751 ip6h->ip6_dst, lport) &&
1728 1752 (connp->conn_zoneid == zoneid ||
1729 1753 connp->conn_allzones ||
1730 1754 ((connp->conn_mac_mode != CONN_MAC_DEFAULT) &&
1731 1755 (ira->ira_flags & IRAF_TX_MAC_EXEMPTABLE) &&
1732 1756 (ira->ira_flags & IRAF_TX_SHARED_ADDR))))
1733 1757 break;
1734 1758 }
1735 1759
1736 1760 if (connp != NULL && (ira->ira_flags & IRAF_SYSTEM_LABELED) &&
1737 1761 !tsol_receive_local(mp, &ip6h->ip6_dst, IPV6_VERSION,
1738 1762 ira, connp)) {
1739 1763 DTRACE_PROBE3(tx__ip__log__info__classify__tcp6,
1740 1764 char *, "connp(1) could not receive mp(2)",
1741 1765 conn_t *, connp, mblk_t *, mp);
1742 1766 connp = NULL;
1743 1767 }
1744 1768
1745 1769 if (connp != NULL) {
1746 1770 /* Have a listner at least */
1747 1771 CONN_INC_REF(connp);
1748 1772 mutex_exit(&bind_connfp->connf_lock);
1749 1773 return (connp);
1750 1774 }
1751 1775
1752 1776 mutex_exit(&bind_connfp->connf_lock);
1753 1777 break;
1754 1778
1755 1779 case IPPROTO_UDP:
1756 1780 up = (uint16_t *)&mp->b_rptr[hdr_len];
1757 1781 lport = up[1];
1758 1782 fport = up[0];
1759 1783 connfp = &ipst->ips_ipcl_udp_fanout[IPCL_UDP_HASH(lport, ipst)];
1760 1784 mutex_enter(&connfp->connf_lock);
1761 1785 for (connp = connfp->connf_head; connp != NULL;
1762 1786 connp = connp->conn_next) {
1763 1787 if (IPCL_UDP_MATCH_V6(connp, lport, ip6h->ip6_dst,
1764 1788 fport, ip6h->ip6_src) &&
1765 1789 (connp->conn_zoneid == zoneid ||
1766 1790 connp->conn_allzones ||
1767 1791 ((connp->conn_mac_mode != CONN_MAC_DEFAULT) &&
1768 1792 (ira->ira_flags & IRAF_TX_MAC_EXEMPTABLE) &&
1769 1793 (ira->ira_flags & IRAF_TX_SHARED_ADDR))))
1770 1794 break;
1771 1795 }
1772 1796
1773 1797 if (connp != NULL && (ira->ira_flags & IRAF_SYSTEM_LABELED) &&
1774 1798 !tsol_receive_local(mp, &ip6h->ip6_dst, IPV6_VERSION,
1775 1799 ira, connp)) {
1776 1800 DTRACE_PROBE3(tx__ip__log__info__classify__udp6,
1777 1801 char *, "connp(1) could not receive mp(2)",
1778 1802 conn_t *, connp, mblk_t *, mp);
1779 1803 connp = NULL;
1780 1804 }
1781 1805
1782 1806 if (connp != NULL) {
1783 1807 CONN_INC_REF(connp);
1784 1808 mutex_exit(&connfp->connf_lock);
1785 1809 return (connp);
1786 1810 }
1787 1811
1788 1812 /*
1789 1813 * We shouldn't come here for multicast/broadcast packets
1790 1814 */
1791 1815 mutex_exit(&connfp->connf_lock);
1792 1816 break;
1793 1817 case IPPROTO_ENCAP:
1794 1818 case IPPROTO_IPV6:
1795 1819 return (ipcl_iptun_classify_v6(&ip6h->ip6_src,
1796 1820 &ip6h->ip6_dst, ipst));
1797 1821 }
1798 1822
1799 1823 return (NULL);
1800 1824 }
1801 1825
1802 1826 /*
1803 1827 * wrapper around ipcl_classify_(v4,v6) routines.
1804 1828 */
1805 1829 conn_t *
1806 1830 ipcl_classify(mblk_t *mp, ip_recv_attr_t *ira, ip_stack_t *ipst)
1807 1831 {
1808 1832 if (ira->ira_flags & IRAF_IS_IPV4) {
1809 1833 return (ipcl_classify_v4(mp, ira->ira_protocol,
1810 1834 ira->ira_ip_hdr_length, ira, ipst));
1811 1835 } else {
1812 1836 return (ipcl_classify_v6(mp, ira->ira_protocol,
1813 1837 ira->ira_ip_hdr_length, ira, ipst));
1814 1838 }
1815 1839 }
1816 1840
1817 1841 /*
1818 1842 * Only used to classify SCTP RAW sockets
1819 1843 */
1820 1844 conn_t *
1821 1845 ipcl_classify_raw(mblk_t *mp, uint8_t protocol, uint32_t ports,
1822 1846 ipha_t *ipha, ip6_t *ip6h, ip_recv_attr_t *ira, ip_stack_t *ipst)
1823 1847 {
1824 1848 connf_t *connfp;
1825 1849 conn_t *connp;
1826 1850 in_port_t lport;
1827 1851 int ipversion;
1828 1852 const void *dst;
1829 1853 zoneid_t zoneid = ira->ira_zoneid;
1830 1854
1831 1855 lport = ((uint16_t *)&ports)[1];
1832 1856 if (ira->ira_flags & IRAF_IS_IPV4) {
1833 1857 dst = (const void *)&ipha->ipha_dst;
1834 1858 ipversion = IPV4_VERSION;
1835 1859 } else {
1836 1860 dst = (const void *)&ip6h->ip6_dst;
1837 1861 ipversion = IPV6_VERSION;
1838 1862 }
1839 1863
1840 1864 connfp = &ipst->ips_ipcl_raw_fanout[IPCL_RAW_HASH(ntohs(lport), ipst)];
1841 1865 mutex_enter(&connfp->connf_lock);
1842 1866 for (connp = connfp->connf_head; connp != NULL;
1843 1867 connp = connp->conn_next) {
1844 1868 /* We don't allow v4 fallback for v6 raw socket. */
1845 1869 if (ipversion != connp->conn_ipversion)
1846 1870 continue;
1847 1871 if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_faddr_v6) &&
1848 1872 !IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_faddr_v6)) {
1849 1873 if (ipversion == IPV4_VERSION) {
1850 1874 if (!IPCL_CONN_MATCH(connp, protocol,
1851 1875 ipha->ipha_src, ipha->ipha_dst, ports))
1852 1876 continue;
1853 1877 } else {
1854 1878 if (!IPCL_CONN_MATCH_V6(connp, protocol,
1855 1879 ip6h->ip6_src, ip6h->ip6_dst, ports))
1856 1880 continue;
1857 1881 }
1858 1882 } else {
1859 1883 if (ipversion == IPV4_VERSION) {
1860 1884 if (!IPCL_BIND_MATCH(connp, protocol,
1861 1885 ipha->ipha_dst, lport))
1862 1886 continue;
1863 1887 } else {
1864 1888 if (!IPCL_BIND_MATCH_V6(connp, protocol,
1865 1889 ip6h->ip6_dst, lport))
1866 1890 continue;
1867 1891 }
1868 1892 }
1869 1893
1870 1894 if (connp->conn_zoneid == zoneid ||
1871 1895 connp->conn_allzones ||
1872 1896 ((connp->conn_mac_mode != CONN_MAC_DEFAULT) &&
1873 1897 (ira->ira_flags & IRAF_TX_MAC_EXEMPTABLE) &&
1874 1898 (ira->ira_flags & IRAF_TX_SHARED_ADDR)))
1875 1899 break;
1876 1900 }
1877 1901
1878 1902 if (connp != NULL && (ira->ira_flags & IRAF_SYSTEM_LABELED) &&
1879 1903 !tsol_receive_local(mp, dst, ipversion, ira, connp)) {
1880 1904 DTRACE_PROBE3(tx__ip__log__info__classify__rawip,
1881 1905 char *, "connp(1) could not receive mp(2)",
1882 1906 conn_t *, connp, mblk_t *, mp);
1883 1907 connp = NULL;
1884 1908 }
1885 1909
1886 1910 if (connp != NULL)
1887 1911 goto found;
1888 1912 mutex_exit(&connfp->connf_lock);
1889 1913
1890 1914 /* Try to look for a wildcard SCTP RAW socket match. */
1891 1915 connfp = &ipst->ips_ipcl_raw_fanout[IPCL_RAW_HASH(0, ipst)];
1892 1916 mutex_enter(&connfp->connf_lock);
1893 1917 for (connp = connfp->connf_head; connp != NULL;
1894 1918 connp = connp->conn_next) {
1895 1919 /* We don't allow v4 fallback for v6 raw socket. */
1896 1920 if (ipversion != connp->conn_ipversion)
1897 1921 continue;
1898 1922 if (!IPCL_ZONE_MATCH(connp, zoneid))
1899 1923 continue;
1900 1924
1901 1925 if (ipversion == IPV4_VERSION) {
1902 1926 if (IPCL_RAW_MATCH(connp, protocol, ipha->ipha_dst))
1903 1927 break;
1904 1928 } else {
1905 1929 if (IPCL_RAW_MATCH_V6(connp, protocol, ip6h->ip6_dst)) {
1906 1930 break;
1907 1931 }
1908 1932 }
1909 1933 }
1910 1934
1911 1935 if (connp != NULL)
1912 1936 goto found;
1913 1937
1914 1938 mutex_exit(&connfp->connf_lock);
1915 1939 return (NULL);
1916 1940
1917 1941 found:
1918 1942 ASSERT(connp != NULL);
1919 1943 CONN_INC_REF(connp);
1920 1944 mutex_exit(&connfp->connf_lock);
1921 1945 return (connp);
1922 1946 }
1923 1947
1924 1948 /* ARGSUSED */
1925 1949 static int
1926 1950 tcp_conn_constructor(void *buf, void *cdrarg, int kmflags)
1927 1951 {
1928 1952 itc_t *itc = (itc_t *)buf;
1929 1953 conn_t *connp = &itc->itc_conn;
1930 1954 tcp_t *tcp = (tcp_t *)&itc[1];
1931 1955
1932 1956 bzero(connp, sizeof (conn_t));
1933 1957 bzero(tcp, sizeof (tcp_t));
1934 1958
1935 1959 mutex_init(&connp->conn_lock, NULL, MUTEX_DEFAULT, NULL);
1936 1960 cv_init(&connp->conn_cv, NULL, CV_DEFAULT, NULL);
1937 1961 cv_init(&connp->conn_sq_cv, NULL, CV_DEFAULT, NULL);
1938 1962 tcp->tcp_timercache = tcp_timermp_alloc(kmflags);
1939 1963 if (tcp->tcp_timercache == NULL)
1940 1964 return (ENOMEM);
1941 1965 connp->conn_tcp = tcp;
1942 1966 connp->conn_flags = IPCL_TCPCONN;
1943 1967 connp->conn_proto = IPPROTO_TCP;
1944 1968 tcp->tcp_connp = connp;
1945 1969 rw_init(&connp->conn_ilg_lock, NULL, RW_DEFAULT, NULL);
1946 1970
1947 1971 connp->conn_ixa = kmem_zalloc(sizeof (ip_xmit_attr_t), kmflags);
1948 1972 if (connp->conn_ixa == NULL) {
1949 1973 tcp_timermp_free(tcp);
1950 1974 return (ENOMEM);
1951 1975 }
1952 1976 connp->conn_ixa->ixa_refcnt = 1;
1953 1977 connp->conn_ixa->ixa_protocol = connp->conn_proto;
1954 1978 connp->conn_ixa->ixa_xmit_hint = CONN_TO_XMIT_HINT(connp);
1955 1979 return (0);
1956 1980 }
1957 1981
1958 1982 /* ARGSUSED */
1959 1983 static void
1960 1984 tcp_conn_destructor(void *buf, void *cdrarg)
1961 1985 {
1962 1986 itc_t *itc = (itc_t *)buf;
1963 1987 conn_t *connp = &itc->itc_conn;
1964 1988 tcp_t *tcp = (tcp_t *)&itc[1];
1965 1989
1966 1990 ASSERT(connp->conn_flags & IPCL_TCPCONN);
1967 1991 ASSERT(tcp->tcp_connp == connp);
1968 1992 ASSERT(connp->conn_tcp == tcp);
1969 1993 tcp_timermp_free(tcp);
1970 1994 mutex_destroy(&connp->conn_lock);
1971 1995 cv_destroy(&connp->conn_cv);
1972 1996 cv_destroy(&connp->conn_sq_cv);
1973 1997 rw_destroy(&connp->conn_ilg_lock);
1974 1998
1975 1999 /* Can be NULL if constructor failed */
1976 2000 if (connp->conn_ixa != NULL) {
1977 2001 ASSERT(connp->conn_ixa->ixa_refcnt == 1);
1978 2002 ASSERT(connp->conn_ixa->ixa_ire == NULL);
1979 2003 ASSERT(connp->conn_ixa->ixa_nce == NULL);
1980 2004 ixa_refrele(connp->conn_ixa);
1981 2005 }
1982 2006 }
1983 2007
1984 2008 /* ARGSUSED */
1985 2009 static int
1986 2010 ip_conn_constructor(void *buf, void *cdrarg, int kmflags)
1987 2011 {
1988 2012 itc_t *itc = (itc_t *)buf;
1989 2013 conn_t *connp = &itc->itc_conn;
1990 2014
1991 2015 bzero(connp, sizeof (conn_t));
1992 2016 mutex_init(&connp->conn_lock, NULL, MUTEX_DEFAULT, NULL);
1993 2017 cv_init(&connp->conn_cv, NULL, CV_DEFAULT, NULL);
1994 2018 connp->conn_flags = IPCL_IPCCONN;
1995 2019 rw_init(&connp->conn_ilg_lock, NULL, RW_DEFAULT, NULL);
1996 2020
1997 2021 connp->conn_ixa = kmem_zalloc(sizeof (ip_xmit_attr_t), kmflags);
1998 2022 if (connp->conn_ixa == NULL)
1999 2023 return (ENOMEM);
2000 2024 connp->conn_ixa->ixa_refcnt = 1;
2001 2025 connp->conn_ixa->ixa_xmit_hint = CONN_TO_XMIT_HINT(connp);
2002 2026 return (0);
2003 2027 }
2004 2028
2005 2029 /* ARGSUSED */
2006 2030 static void
2007 2031 ip_conn_destructor(void *buf, void *cdrarg)
2008 2032 {
2009 2033 itc_t *itc = (itc_t *)buf;
2010 2034 conn_t *connp = &itc->itc_conn;
2011 2035
2012 2036 ASSERT(connp->conn_flags & IPCL_IPCCONN);
2013 2037 ASSERT(connp->conn_priv == NULL);
2014 2038 mutex_destroy(&connp->conn_lock);
2015 2039 cv_destroy(&connp->conn_cv);
2016 2040 rw_destroy(&connp->conn_ilg_lock);
2017 2041
2018 2042 /* Can be NULL if constructor failed */
2019 2043 if (connp->conn_ixa != NULL) {
2020 2044 ASSERT(connp->conn_ixa->ixa_refcnt == 1);
2021 2045 ASSERT(connp->conn_ixa->ixa_ire == NULL);
2022 2046 ASSERT(connp->conn_ixa->ixa_nce == NULL);
2023 2047 ixa_refrele(connp->conn_ixa);
2024 2048 }
2025 2049 }
2026 2050
2027 2051 /* ARGSUSED */
2028 2052 static int
2029 2053 udp_conn_constructor(void *buf, void *cdrarg, int kmflags)
2030 2054 {
2031 2055 itc_t *itc = (itc_t *)buf;
2032 2056 conn_t *connp = &itc->itc_conn;
2033 2057 udp_t *udp = (udp_t *)&itc[1];
2034 2058
2035 2059 bzero(connp, sizeof (conn_t));
2036 2060 bzero(udp, sizeof (udp_t));
2037 2061
2038 2062 mutex_init(&connp->conn_lock, NULL, MUTEX_DEFAULT, NULL);
2039 2063 cv_init(&connp->conn_cv, NULL, CV_DEFAULT, NULL);
2040 2064 connp->conn_udp = udp;
2041 2065 connp->conn_flags = IPCL_UDPCONN;
2042 2066 connp->conn_proto = IPPROTO_UDP;
2043 2067 udp->udp_connp = connp;
2044 2068 rw_init(&connp->conn_ilg_lock, NULL, RW_DEFAULT, NULL);
2045 2069 connp->conn_ixa = kmem_zalloc(sizeof (ip_xmit_attr_t), kmflags);
2046 2070 if (connp->conn_ixa == NULL)
2047 2071 return (ENOMEM);
2048 2072 connp->conn_ixa->ixa_refcnt = 1;
2049 2073 connp->conn_ixa->ixa_protocol = connp->conn_proto;
2050 2074 connp->conn_ixa->ixa_xmit_hint = CONN_TO_XMIT_HINT(connp);
2051 2075 return (0);
2052 2076 }
2053 2077
2054 2078 /* ARGSUSED */
2055 2079 static void
2056 2080 udp_conn_destructor(void *buf, void *cdrarg)
2057 2081 {
2058 2082 itc_t *itc = (itc_t *)buf;
2059 2083 conn_t *connp = &itc->itc_conn;
2060 2084 udp_t *udp = (udp_t *)&itc[1];
2061 2085
2062 2086 ASSERT(connp->conn_flags & IPCL_UDPCONN);
2063 2087 ASSERT(udp->udp_connp == connp);
2064 2088 ASSERT(connp->conn_udp == udp);
2065 2089 mutex_destroy(&connp->conn_lock);
2066 2090 cv_destroy(&connp->conn_cv);
2067 2091 rw_destroy(&connp->conn_ilg_lock);
2068 2092
2069 2093 /* Can be NULL if constructor failed */
2070 2094 if (connp->conn_ixa != NULL) {
2071 2095 ASSERT(connp->conn_ixa->ixa_refcnt == 1);
2072 2096 ASSERT(connp->conn_ixa->ixa_ire == NULL);
2073 2097 ASSERT(connp->conn_ixa->ixa_nce == NULL);
2074 2098 ixa_refrele(connp->conn_ixa);
2075 2099 }
2076 2100 }
2077 2101
2078 2102 /* ARGSUSED */
2079 2103 static int
2080 2104 rawip_conn_constructor(void *buf, void *cdrarg, int kmflags)
2081 2105 {
2082 2106 itc_t *itc = (itc_t *)buf;
2083 2107 conn_t *connp = &itc->itc_conn;
2084 2108 icmp_t *icmp = (icmp_t *)&itc[1];
|
↓ open down ↓ |
565 lines elided |
↑ open up ↑ |
2085 2109
2086 2110 bzero(connp, sizeof (conn_t));
2087 2111 bzero(icmp, sizeof (icmp_t));
2088 2112
2089 2113 mutex_init(&connp->conn_lock, NULL, MUTEX_DEFAULT, NULL);
2090 2114 cv_init(&connp->conn_cv, NULL, CV_DEFAULT, NULL);
2091 2115 connp->conn_icmp = icmp;
2092 2116 connp->conn_flags = IPCL_RAWIPCONN;
2093 2117 connp->conn_proto = IPPROTO_ICMP;
2094 2118 icmp->icmp_connp = connp;
2119 + rw_init(&icmp->icmp_bpf_lock, NULL, RW_DEFAULT, NULL);
2095 2120 rw_init(&connp->conn_ilg_lock, NULL, RW_DEFAULT, NULL);
2096 2121 connp->conn_ixa = kmem_zalloc(sizeof (ip_xmit_attr_t), kmflags);
2097 2122 if (connp->conn_ixa == NULL)
2098 2123 return (ENOMEM);
2099 2124 connp->conn_ixa->ixa_refcnt = 1;
2100 2125 connp->conn_ixa->ixa_protocol = connp->conn_proto;
2101 2126 connp->conn_ixa->ixa_xmit_hint = CONN_TO_XMIT_HINT(connp);
2102 2127 return (0);
2103 2128 }
2104 2129
2105 2130 /* ARGSUSED */
2106 2131 static void
2107 2132 rawip_conn_destructor(void *buf, void *cdrarg)
2108 2133 {
|
↓ open down ↓ |
4 lines elided |
↑ open up ↑ |
2109 2134 itc_t *itc = (itc_t *)buf;
2110 2135 conn_t *connp = &itc->itc_conn;
2111 2136 icmp_t *icmp = (icmp_t *)&itc[1];
2112 2137
2113 2138 ASSERT(connp->conn_flags & IPCL_RAWIPCONN);
2114 2139 ASSERT(icmp->icmp_connp == connp);
2115 2140 ASSERT(connp->conn_icmp == icmp);
2116 2141 mutex_destroy(&connp->conn_lock);
2117 2142 cv_destroy(&connp->conn_cv);
2118 2143 rw_destroy(&connp->conn_ilg_lock);
2144 + rw_destroy(&icmp->icmp_bpf_lock);
2119 2145
2120 2146 /* Can be NULL if constructor failed */
2121 2147 if (connp->conn_ixa != NULL) {
2122 2148 ASSERT(connp->conn_ixa->ixa_refcnt == 1);
2123 2149 ASSERT(connp->conn_ixa->ixa_ire == NULL);
2124 2150 ASSERT(connp->conn_ixa->ixa_nce == NULL);
2125 2151 ixa_refrele(connp->conn_ixa);
2126 2152 }
2127 2153 }
2128 2154
2129 2155 /* ARGSUSED */
2130 2156 static int
2131 2157 rts_conn_constructor(void *buf, void *cdrarg, int kmflags)
2132 2158 {
2133 2159 itc_t *itc = (itc_t *)buf;
2134 2160 conn_t *connp = &itc->itc_conn;
2135 2161 rts_t *rts = (rts_t *)&itc[1];
2136 2162
2137 2163 bzero(connp, sizeof (conn_t));
2138 2164 bzero(rts, sizeof (rts_t));
2139 2165
2140 2166 mutex_init(&connp->conn_lock, NULL, MUTEX_DEFAULT, NULL);
2141 2167 cv_init(&connp->conn_cv, NULL, CV_DEFAULT, NULL);
2142 2168 connp->conn_rts = rts;
2143 2169 connp->conn_flags = IPCL_RTSCONN;
2144 2170 rts->rts_connp = connp;
2145 2171 rw_init(&connp->conn_ilg_lock, NULL, RW_DEFAULT, NULL);
2146 2172 connp->conn_ixa = kmem_zalloc(sizeof (ip_xmit_attr_t), kmflags);
2147 2173 if (connp->conn_ixa == NULL)
2148 2174 return (ENOMEM);
2149 2175 connp->conn_ixa->ixa_refcnt = 1;
2150 2176 connp->conn_ixa->ixa_xmit_hint = CONN_TO_XMIT_HINT(connp);
2151 2177 return (0);
2152 2178 }
2153 2179
2154 2180 /* ARGSUSED */
2155 2181 static void
2156 2182 rts_conn_destructor(void *buf, void *cdrarg)
2157 2183 {
2158 2184 itc_t *itc = (itc_t *)buf;
2159 2185 conn_t *connp = &itc->itc_conn;
2160 2186 rts_t *rts = (rts_t *)&itc[1];
2161 2187
2162 2188 ASSERT(connp->conn_flags & IPCL_RTSCONN);
2163 2189 ASSERT(rts->rts_connp == connp);
2164 2190 ASSERT(connp->conn_rts == rts);
2165 2191 mutex_destroy(&connp->conn_lock);
2166 2192 cv_destroy(&connp->conn_cv);
2167 2193 rw_destroy(&connp->conn_ilg_lock);
2168 2194
2169 2195 /* Can be NULL if constructor failed */
2170 2196 if (connp->conn_ixa != NULL) {
2171 2197 ASSERT(connp->conn_ixa->ixa_refcnt == 1);
2172 2198 ASSERT(connp->conn_ixa->ixa_ire == NULL);
2173 2199 ASSERT(connp->conn_ixa->ixa_nce == NULL);
2174 2200 ixa_refrele(connp->conn_ixa);
2175 2201 }
2176 2202 }
2177 2203
2178 2204 /*
2179 2205 * Called as part of ipcl_conn_destroy to assert and clear any pointers
2180 2206 * in the conn_t.
2181 2207 *
2182 2208 * Below we list all the pointers in the conn_t as a documentation aid.
2183 2209 * The ones that we can not ASSERT to be NULL are #ifdef'ed out.
2184 2210 * If you add any pointers to the conn_t please add an ASSERT here
2185 2211 * and #ifdef it out if it can't be actually asserted to be NULL.
2186 2212 * In any case, we bzero most of the conn_t at the end of the function.
2187 2213 */
2188 2214 void
2189 2215 ipcl_conn_cleanup(conn_t *connp)
2190 2216 {
2191 2217 ip_xmit_attr_t *ixa;
2192 2218
2193 2219 ASSERT(connp->conn_latch == NULL);
2194 2220 ASSERT(connp->conn_latch_in_policy == NULL);
2195 2221 ASSERT(connp->conn_latch_in_action == NULL);
2196 2222 #ifdef notdef
2197 2223 ASSERT(connp->conn_rq == NULL);
2198 2224 ASSERT(connp->conn_wq == NULL);
2199 2225 #endif
2200 2226 ASSERT(connp->conn_cred == NULL);
2201 2227 ASSERT(connp->conn_g_fanout == NULL);
2202 2228 ASSERT(connp->conn_g_next == NULL);
2203 2229 ASSERT(connp->conn_g_prev == NULL);
2204 2230 ASSERT(connp->conn_policy == NULL);
2205 2231 ASSERT(connp->conn_fanout == NULL);
2206 2232 ASSERT(connp->conn_next == NULL);
2207 2233 ASSERT(connp->conn_prev == NULL);
2208 2234 ASSERT(connp->conn_oper_pending_ill == NULL);
2209 2235 ASSERT(connp->conn_ilg == NULL);
2210 2236 ASSERT(connp->conn_drain_next == NULL);
2211 2237 ASSERT(connp->conn_drain_prev == NULL);
2212 2238 #ifdef notdef
2213 2239 /* conn_idl is not cleared when removed from idl list */
2214 2240 ASSERT(connp->conn_idl == NULL);
2215 2241 #endif
2216 2242 ASSERT(connp->conn_ipsec_opt_mp == NULL);
2217 2243 #ifdef notdef
2218 2244 /* conn_netstack is cleared by the caller; needed by ixa_cleanup */
2219 2245 ASSERT(connp->conn_netstack == NULL);
2220 2246 #endif
2221 2247
2222 2248 ASSERT(connp->conn_helper_info == NULL);
2223 2249 ASSERT(connp->conn_ixa != NULL);
2224 2250 ixa = connp->conn_ixa;
2225 2251 ASSERT(ixa->ixa_refcnt == 1);
2226 2252 /* Need to preserve ixa_protocol */
2227 2253 ixa_cleanup(ixa);
2228 2254 ixa->ixa_flags = 0;
2229 2255
2230 2256 /* Clear out the conn_t fields that are not preserved */
2231 2257 bzero(&connp->conn_start_clr,
2232 2258 sizeof (conn_t) -
2233 2259 ((uchar_t *)&connp->conn_start_clr - (uchar_t *)connp));
2234 2260 }
2235 2261
2236 2262 /*
2237 2263 * All conns are inserted in a global multi-list for the benefit of
2238 2264 * walkers. The walk is guaranteed to walk all open conns at the time
2239 2265 * of the start of the walk exactly once. This property is needed to
2240 2266 * achieve some cleanups during unplumb of interfaces. This is achieved
2241 2267 * as follows.
2242 2268 *
2243 2269 * ipcl_conn_create and ipcl_conn_destroy are the only functions that
2244 2270 * call the insert and delete functions below at creation and deletion
2245 2271 * time respectively. The conn never moves or changes its position in this
2246 2272 * multi-list during its lifetime. CONN_CONDEMNED ensures that the refcnt
2247 2273 * won't increase due to walkers, once the conn deletion has started. Note
2248 2274 * that we can't remove the conn from the global list and then wait for
2249 2275 * the refcnt to drop to zero, since walkers would then see a truncated
2250 2276 * list. CONN_INCIPIENT ensures that walkers don't start looking at
2251 2277 * conns until ip_open is ready to make them globally visible.
2252 2278 * The global round robin multi-list locks are held only to get the
2253 2279 * next member/insertion/deletion and contention should be negligible
2254 2280 * if the multi-list is much greater than the number of cpus.
2255 2281 */
2256 2282 void
2257 2283 ipcl_globalhash_insert(conn_t *connp)
2258 2284 {
2259 2285 int index;
2260 2286 struct connf_s *connfp;
2261 2287 ip_stack_t *ipst = connp->conn_netstack->netstack_ip;
2262 2288
2263 2289 /*
2264 2290 * No need for atomic here. Approximate even distribution
2265 2291 * in the global lists is sufficient.
2266 2292 */
2267 2293 ipst->ips_conn_g_index++;
2268 2294 index = ipst->ips_conn_g_index & (CONN_G_HASH_SIZE - 1);
2269 2295
2270 2296 connp->conn_g_prev = NULL;
2271 2297 /*
2272 2298 * Mark as INCIPIENT, so that walkers will ignore this
2273 2299 * for now, till ip_open is ready to make it visible globally.
2274 2300 */
2275 2301 connp->conn_state_flags |= CONN_INCIPIENT;
2276 2302
2277 2303 connfp = &ipst->ips_ipcl_globalhash_fanout[index];
2278 2304 /* Insert at the head of the list */
2279 2305 mutex_enter(&connfp->connf_lock);
2280 2306 connp->conn_g_next = connfp->connf_head;
2281 2307 if (connp->conn_g_next != NULL)
2282 2308 connp->conn_g_next->conn_g_prev = connp;
2283 2309 connfp->connf_head = connp;
2284 2310
2285 2311 /* The fanout bucket this conn points to */
2286 2312 connp->conn_g_fanout = connfp;
2287 2313
2288 2314 mutex_exit(&connfp->connf_lock);
2289 2315 }
2290 2316
2291 2317 void
2292 2318 ipcl_globalhash_remove(conn_t *connp)
2293 2319 {
2294 2320 struct connf_s *connfp;
2295 2321
2296 2322 /*
2297 2323 * We were never inserted in the global multi list.
2298 2324 * IPCL_NONE variety is never inserted in the global multilist
2299 2325 * since it is presumed to not need any cleanup and is transient.
2300 2326 */
2301 2327 if (connp->conn_g_fanout == NULL)
2302 2328 return;
2303 2329
2304 2330 connfp = connp->conn_g_fanout;
2305 2331 mutex_enter(&connfp->connf_lock);
2306 2332 if (connp->conn_g_prev != NULL)
2307 2333 connp->conn_g_prev->conn_g_next = connp->conn_g_next;
2308 2334 else
2309 2335 connfp->connf_head = connp->conn_g_next;
2310 2336 if (connp->conn_g_next != NULL)
2311 2337 connp->conn_g_next->conn_g_prev = connp->conn_g_prev;
2312 2338 mutex_exit(&connfp->connf_lock);
2313 2339
2314 2340 /* Better to stumble on a null pointer than to corrupt memory */
2315 2341 connp->conn_g_next = NULL;
2316 2342 connp->conn_g_prev = NULL;
2317 2343 connp->conn_g_fanout = NULL;
2318 2344 }
2319 2345
2320 2346 /*
2321 2347 * Walk the list of all conn_t's in the system, calling the function provided
2322 2348 * With the specified argument for each.
2323 2349 * Applies to both IPv4 and IPv6.
2324 2350 *
2325 2351 * CONNs may hold pointers to ills (conn_dhcpinit_ill and
2326 2352 * conn_oper_pending_ill). To guard against stale pointers
2327 2353 * ipcl_walk() is called to cleanup the conn_t's, typically when an interface is
2328 2354 * unplumbed or removed. New conn_t's that are created while we are walking
2329 2355 * may be missed by this walk, because they are not necessarily inserted
2330 2356 * at the tail of the list. They are new conn_t's and thus don't have any
2331 2357 * stale pointers. The CONN_CLOSING flag ensures that no new reference
2332 2358 * is created to the struct that is going away.
2333 2359 */
2334 2360 void
2335 2361 ipcl_walk(pfv_t func, void *arg, ip_stack_t *ipst)
2336 2362 {
2337 2363 int i;
2338 2364 conn_t *connp;
2339 2365 conn_t *prev_connp;
2340 2366
2341 2367 for (i = 0; i < CONN_G_HASH_SIZE; i++) {
2342 2368 mutex_enter(&ipst->ips_ipcl_globalhash_fanout[i].connf_lock);
2343 2369 prev_connp = NULL;
2344 2370 connp = ipst->ips_ipcl_globalhash_fanout[i].connf_head;
2345 2371 while (connp != NULL) {
2346 2372 mutex_enter(&connp->conn_lock);
2347 2373 if (connp->conn_state_flags &
2348 2374 (CONN_CONDEMNED | CONN_INCIPIENT)) {
2349 2375 mutex_exit(&connp->conn_lock);
2350 2376 connp = connp->conn_g_next;
2351 2377 continue;
2352 2378 }
2353 2379 CONN_INC_REF_LOCKED(connp);
2354 2380 mutex_exit(&connp->conn_lock);
2355 2381 mutex_exit(
2356 2382 &ipst->ips_ipcl_globalhash_fanout[i].connf_lock);
2357 2383 (*func)(connp, arg);
2358 2384 if (prev_connp != NULL)
2359 2385 CONN_DEC_REF(prev_connp);
2360 2386 mutex_enter(
2361 2387 &ipst->ips_ipcl_globalhash_fanout[i].connf_lock);
2362 2388 prev_connp = connp;
2363 2389 connp = connp->conn_g_next;
2364 2390 }
2365 2391 mutex_exit(&ipst->ips_ipcl_globalhash_fanout[i].connf_lock);
2366 2392 if (prev_connp != NULL)
2367 2393 CONN_DEC_REF(prev_connp);
2368 2394 }
2369 2395 }
2370 2396
2371 2397 /*
2372 2398 * Search for a peer TCP/IPv4 loopback conn by doing a reverse lookup on
2373 2399 * the {src, dst, lport, fport} quadruplet. Returns with conn reference
2374 2400 * held; caller must call CONN_DEC_REF. Only checks for connected entries
2375 2401 * (peer tcp in ESTABLISHED state).
2376 2402 */
2377 2403 conn_t *
2378 2404 ipcl_conn_tcp_lookup_reversed_ipv4(conn_t *connp, ipha_t *ipha, tcpha_t *tcpha,
2379 2405 ip_stack_t *ipst)
2380 2406 {
2381 2407 uint32_t ports;
2382 2408 uint16_t *pports = (uint16_t *)&ports;
2383 2409 connf_t *connfp;
2384 2410 conn_t *tconnp;
2385 2411 boolean_t zone_chk;
2386 2412
2387 2413 /*
2388 2414 * If either the source of destination address is loopback, then
2389 2415 * both endpoints must be in the same Zone. Otherwise, both of
2390 2416 * the addresses are system-wide unique (tcp is in ESTABLISHED
2391 2417 * state) and the endpoints may reside in different Zones.
2392 2418 */
2393 2419 zone_chk = (ipha->ipha_src == htonl(INADDR_LOOPBACK) ||
2394 2420 ipha->ipha_dst == htonl(INADDR_LOOPBACK));
2395 2421
2396 2422 pports[0] = tcpha->tha_fport;
2397 2423 pports[1] = tcpha->tha_lport;
2398 2424
2399 2425 connfp = &ipst->ips_ipcl_conn_fanout[IPCL_CONN_HASH(ipha->ipha_dst,
2400 2426 ports, ipst)];
2401 2427
2402 2428 mutex_enter(&connfp->connf_lock);
2403 2429 for (tconnp = connfp->connf_head; tconnp != NULL;
2404 2430 tconnp = tconnp->conn_next) {
2405 2431
2406 2432 if (IPCL_CONN_MATCH(tconnp, IPPROTO_TCP,
2407 2433 ipha->ipha_dst, ipha->ipha_src, ports) &&
2408 2434 tconnp->conn_tcp->tcp_state == TCPS_ESTABLISHED &&
2409 2435 (!zone_chk || tconnp->conn_zoneid == connp->conn_zoneid)) {
2410 2436
2411 2437 ASSERT(tconnp != connp);
2412 2438 CONN_INC_REF(tconnp);
2413 2439 mutex_exit(&connfp->connf_lock);
2414 2440 return (tconnp);
2415 2441 }
2416 2442 }
2417 2443 mutex_exit(&connfp->connf_lock);
2418 2444 return (NULL);
2419 2445 }
2420 2446
2421 2447 /*
2422 2448 * Search for a peer TCP/IPv6 loopback conn by doing a reverse lookup on
2423 2449 * the {src, dst, lport, fport} quadruplet. Returns with conn reference
2424 2450 * held; caller must call CONN_DEC_REF. Only checks for connected entries
2425 2451 * (peer tcp in ESTABLISHED state).
2426 2452 */
2427 2453 conn_t *
2428 2454 ipcl_conn_tcp_lookup_reversed_ipv6(conn_t *connp, ip6_t *ip6h, tcpha_t *tcpha,
2429 2455 ip_stack_t *ipst)
2430 2456 {
2431 2457 uint32_t ports;
2432 2458 uint16_t *pports = (uint16_t *)&ports;
2433 2459 connf_t *connfp;
2434 2460 conn_t *tconnp;
2435 2461 boolean_t zone_chk;
2436 2462
2437 2463 /*
2438 2464 * If either the source of destination address is loopback, then
2439 2465 * both endpoints must be in the same Zone. Otherwise, both of
2440 2466 * the addresses are system-wide unique (tcp is in ESTABLISHED
2441 2467 * state) and the endpoints may reside in different Zones. We
2442 2468 * don't do Zone check for link local address(es) because the
2443 2469 * current Zone implementation treats each link local address as
2444 2470 * being unique per system node, i.e. they belong to global Zone.
2445 2471 */
2446 2472 zone_chk = (IN6_IS_ADDR_LOOPBACK(&ip6h->ip6_src) ||
2447 2473 IN6_IS_ADDR_LOOPBACK(&ip6h->ip6_dst));
2448 2474
2449 2475 pports[0] = tcpha->tha_fport;
2450 2476 pports[1] = tcpha->tha_lport;
2451 2477
2452 2478 connfp = &ipst->ips_ipcl_conn_fanout[IPCL_CONN_HASH_V6(ip6h->ip6_dst,
2453 2479 ports, ipst)];
2454 2480
2455 2481 mutex_enter(&connfp->connf_lock);
2456 2482 for (tconnp = connfp->connf_head; tconnp != NULL;
2457 2483 tconnp = tconnp->conn_next) {
2458 2484
2459 2485 /* We skip conn_bound_if check here as this is loopback tcp */
2460 2486 if (IPCL_CONN_MATCH_V6(tconnp, IPPROTO_TCP,
2461 2487 ip6h->ip6_dst, ip6h->ip6_src, ports) &&
2462 2488 tconnp->conn_tcp->tcp_state == TCPS_ESTABLISHED &&
2463 2489 (!zone_chk || tconnp->conn_zoneid == connp->conn_zoneid)) {
2464 2490
2465 2491 ASSERT(tconnp != connp);
2466 2492 CONN_INC_REF(tconnp);
2467 2493 mutex_exit(&connfp->connf_lock);
2468 2494 return (tconnp);
2469 2495 }
2470 2496 }
2471 2497 mutex_exit(&connfp->connf_lock);
2472 2498 return (NULL);
2473 2499 }
2474 2500
2475 2501 /*
2476 2502 * Find an exact {src, dst, lport, fport} match for a bounced datagram.
2477 2503 * Returns with conn reference held. Caller must call CONN_DEC_REF.
2478 2504 * Only checks for connected entries i.e. no INADDR_ANY checks.
2479 2505 */
2480 2506 conn_t *
2481 2507 ipcl_tcp_lookup_reversed_ipv4(ipha_t *ipha, tcpha_t *tcpha, int min_state,
2482 2508 ip_stack_t *ipst)
2483 2509 {
2484 2510 uint32_t ports;
2485 2511 uint16_t *pports;
2486 2512 connf_t *connfp;
2487 2513 conn_t *tconnp;
2488 2514
2489 2515 pports = (uint16_t *)&ports;
2490 2516 pports[0] = tcpha->tha_fport;
2491 2517 pports[1] = tcpha->tha_lport;
2492 2518
2493 2519 connfp = &ipst->ips_ipcl_conn_fanout[IPCL_CONN_HASH(ipha->ipha_dst,
2494 2520 ports, ipst)];
2495 2521
2496 2522 mutex_enter(&connfp->connf_lock);
2497 2523 for (tconnp = connfp->connf_head; tconnp != NULL;
2498 2524 tconnp = tconnp->conn_next) {
2499 2525
2500 2526 if (IPCL_CONN_MATCH(tconnp, IPPROTO_TCP,
2501 2527 ipha->ipha_dst, ipha->ipha_src, ports) &&
2502 2528 tconnp->conn_tcp->tcp_state >= min_state) {
2503 2529
2504 2530 CONN_INC_REF(tconnp);
2505 2531 mutex_exit(&connfp->connf_lock);
2506 2532 return (tconnp);
2507 2533 }
2508 2534 }
2509 2535 mutex_exit(&connfp->connf_lock);
2510 2536 return (NULL);
2511 2537 }
2512 2538
2513 2539 /*
2514 2540 * Find an exact {src, dst, lport, fport} match for a bounced datagram.
2515 2541 * Returns with conn reference held. Caller must call CONN_DEC_REF.
2516 2542 * Only checks for connected entries i.e. no INADDR_ANY checks.
2517 2543 * Match on ifindex in addition to addresses.
2518 2544 */
2519 2545 conn_t *
2520 2546 ipcl_tcp_lookup_reversed_ipv6(ip6_t *ip6h, tcpha_t *tcpha, int min_state,
2521 2547 uint_t ifindex, ip_stack_t *ipst)
2522 2548 {
2523 2549 tcp_t *tcp;
2524 2550 uint32_t ports;
2525 2551 uint16_t *pports;
2526 2552 connf_t *connfp;
2527 2553 conn_t *tconnp;
2528 2554
2529 2555 pports = (uint16_t *)&ports;
2530 2556 pports[0] = tcpha->tha_fport;
2531 2557 pports[1] = tcpha->tha_lport;
2532 2558
2533 2559 connfp = &ipst->ips_ipcl_conn_fanout[IPCL_CONN_HASH_V6(ip6h->ip6_dst,
2534 2560 ports, ipst)];
2535 2561
2536 2562 mutex_enter(&connfp->connf_lock);
2537 2563 for (tconnp = connfp->connf_head; tconnp != NULL;
2538 2564 tconnp = tconnp->conn_next) {
2539 2565
2540 2566 tcp = tconnp->conn_tcp;
2541 2567 if (IPCL_CONN_MATCH_V6(tconnp, IPPROTO_TCP,
2542 2568 ip6h->ip6_dst, ip6h->ip6_src, ports) &&
2543 2569 tcp->tcp_state >= min_state &&
2544 2570 (tconnp->conn_bound_if == 0 ||
2545 2571 tconnp->conn_bound_if == ifindex)) {
2546 2572
2547 2573 CONN_INC_REF(tconnp);
2548 2574 mutex_exit(&connfp->connf_lock);
2549 2575 return (tconnp);
2550 2576 }
2551 2577 }
2552 2578 mutex_exit(&connfp->connf_lock);
2553 2579 return (NULL);
2554 2580 }
2555 2581
2556 2582 /*
2557 2583 * Finds a TCP/IPv4 listening connection; called by tcp_disconnect to locate
2558 2584 * a listener when changing state.
2559 2585 */
2560 2586 conn_t *
2561 2587 ipcl_lookup_listener_v4(uint16_t lport, ipaddr_t laddr, zoneid_t zoneid,
2562 2588 ip_stack_t *ipst)
2563 2589 {
2564 2590 connf_t *bind_connfp;
2565 2591 conn_t *connp;
2566 2592 tcp_t *tcp;
2567 2593
2568 2594 /*
2569 2595 * Avoid false matches for packets sent to an IP destination of
2570 2596 * all zeros.
2571 2597 */
2572 2598 if (laddr == 0)
2573 2599 return (NULL);
2574 2600
2575 2601 ASSERT(zoneid != ALL_ZONES);
2576 2602
2577 2603 bind_connfp = &ipst->ips_ipcl_bind_fanout[IPCL_BIND_HASH(lport, ipst)];
2578 2604 mutex_enter(&bind_connfp->connf_lock);
2579 2605 for (connp = bind_connfp->connf_head; connp != NULL;
2580 2606 connp = connp->conn_next) {
2581 2607 tcp = connp->conn_tcp;
2582 2608 if (IPCL_BIND_MATCH(connp, IPPROTO_TCP, laddr, lport) &&
2583 2609 IPCL_ZONE_MATCH(connp, zoneid) &&
2584 2610 (tcp->tcp_listener == NULL)) {
2585 2611 CONN_INC_REF(connp);
2586 2612 mutex_exit(&bind_connfp->connf_lock);
2587 2613 return (connp);
2588 2614 }
2589 2615 }
2590 2616 mutex_exit(&bind_connfp->connf_lock);
2591 2617 return (NULL);
2592 2618 }
2593 2619
2594 2620 /*
2595 2621 * Finds a TCP/IPv6 listening connection; called by tcp_disconnect to locate
2596 2622 * a listener when changing state.
2597 2623 */
2598 2624 conn_t *
2599 2625 ipcl_lookup_listener_v6(uint16_t lport, in6_addr_t *laddr, uint_t ifindex,
2600 2626 zoneid_t zoneid, ip_stack_t *ipst)
2601 2627 {
2602 2628 connf_t *bind_connfp;
2603 2629 conn_t *connp = NULL;
2604 2630 tcp_t *tcp;
2605 2631
2606 2632 /*
2607 2633 * Avoid false matches for packets sent to an IP destination of
2608 2634 * all zeros.
2609 2635 */
2610 2636 if (IN6_IS_ADDR_UNSPECIFIED(laddr))
2611 2637 return (NULL);
2612 2638
2613 2639 ASSERT(zoneid != ALL_ZONES);
2614 2640
2615 2641 bind_connfp = &ipst->ips_ipcl_bind_fanout[IPCL_BIND_HASH(lport, ipst)];
2616 2642 mutex_enter(&bind_connfp->connf_lock);
2617 2643 for (connp = bind_connfp->connf_head; connp != NULL;
2618 2644 connp = connp->conn_next) {
2619 2645 tcp = connp->conn_tcp;
2620 2646 if (IPCL_BIND_MATCH_V6(connp, IPPROTO_TCP, *laddr, lport) &&
2621 2647 IPCL_ZONE_MATCH(connp, zoneid) &&
2622 2648 (connp->conn_bound_if == 0 ||
2623 2649 connp->conn_bound_if == ifindex) &&
2624 2650 tcp->tcp_listener == NULL) {
2625 2651 CONN_INC_REF(connp);
2626 2652 mutex_exit(&bind_connfp->connf_lock);
2627 2653 return (connp);
2628 2654 }
2629 2655 }
2630 2656 mutex_exit(&bind_connfp->connf_lock);
2631 2657 return (NULL);
2632 2658 }
2633 2659
2634 2660 /*
2635 2661 * ipcl_get_next_conn
2636 2662 * get the next entry in the conn global list
2637 2663 * and put a reference on the next_conn.
2638 2664 * decrement the reference on the current conn.
2639 2665 *
2640 2666 * This is an iterator based walker function that also provides for
2641 2667 * some selection by the caller. It walks through the conn_hash bucket
2642 2668 * searching for the next valid connp in the list, and selects connections
2643 2669 * that are neither closed nor condemned. It also REFHOLDS the conn
2644 2670 * thus ensuring that the conn exists when the caller uses the conn.
2645 2671 */
2646 2672 conn_t *
2647 2673 ipcl_get_next_conn(connf_t *connfp, conn_t *connp, uint32_t conn_flags)
2648 2674 {
2649 2675 conn_t *next_connp;
2650 2676
2651 2677 if (connfp == NULL)
2652 2678 return (NULL);
2653 2679
2654 2680 mutex_enter(&connfp->connf_lock);
2655 2681
2656 2682 next_connp = (connp == NULL) ?
2657 2683 connfp->connf_head : connp->conn_g_next;
2658 2684
2659 2685 while (next_connp != NULL) {
2660 2686 mutex_enter(&next_connp->conn_lock);
2661 2687 if (!(next_connp->conn_flags & conn_flags) ||
2662 2688 (next_connp->conn_state_flags &
2663 2689 (CONN_CONDEMNED | CONN_INCIPIENT))) {
2664 2690 /*
2665 2691 * This conn has been condemned or
2666 2692 * is closing, or the flags don't match
2667 2693 */
2668 2694 mutex_exit(&next_connp->conn_lock);
2669 2695 next_connp = next_connp->conn_g_next;
2670 2696 continue;
2671 2697 }
2672 2698 CONN_INC_REF_LOCKED(next_connp);
2673 2699 mutex_exit(&next_connp->conn_lock);
2674 2700 break;
2675 2701 }
2676 2702
2677 2703 mutex_exit(&connfp->connf_lock);
2678 2704
2679 2705 if (connp != NULL)
2680 2706 CONN_DEC_REF(connp);
2681 2707
2682 2708 return (next_connp);
2683 2709 }
2684 2710
2685 2711 #ifdef CONN_DEBUG
2686 2712 /*
2687 2713 * Trace of the last NBUF refhold/refrele
2688 2714 */
2689 2715 int
2690 2716 conn_trace_ref(conn_t *connp)
2691 2717 {
2692 2718 int last;
2693 2719 conn_trace_t *ctb;
2694 2720
2695 2721 ASSERT(MUTEX_HELD(&connp->conn_lock));
2696 2722 last = connp->conn_trace_last;
2697 2723 last++;
2698 2724 if (last == CONN_TRACE_MAX)
2699 2725 last = 0;
2700 2726
2701 2727 ctb = &connp->conn_trace_buf[last];
2702 2728 ctb->ctb_depth = getpcstack(ctb->ctb_stack, CONN_STACK_DEPTH);
2703 2729 connp->conn_trace_last = last;
2704 2730 return (1);
2705 2731 }
2706 2732
2707 2733 int
2708 2734 conn_untrace_ref(conn_t *connp)
2709 2735 {
2710 2736 int last;
2711 2737 conn_trace_t *ctb;
2712 2738
2713 2739 ASSERT(MUTEX_HELD(&connp->conn_lock));
2714 2740 last = connp->conn_trace_last;
2715 2741 last++;
2716 2742 if (last == CONN_TRACE_MAX)
2717 2743 last = 0;
2718 2744
2719 2745 ctb = &connp->conn_trace_buf[last];
2720 2746 ctb->ctb_depth = getpcstack(ctb->ctb_stack, CONN_STACK_DEPTH);
2721 2747 connp->conn_trace_last = last;
2722 2748 return (1);
2723 2749 }
2724 2750 #endif
|
↓ open down ↓ |
596 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX