Print this page
8900 deadlock between netstack teardown and kstat read
Reviewed by: Jason King <jason.king@joyent.com>
Reviewed by: Ryan Zezeski <rpz@joyent.com>
Reviewed by: Robert Mustacchi <rm@joyent.com>
| Split |
Close |
| Expand all |
| Collapse all |
--- old/usr/src/uts/common/os/netstack.c
+++ new/usr/src/uts/common/os/netstack.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
↓ open down ↓ |
14 lines elided |
↑ open up ↑ |
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21
22 22 /*
23 23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
24 24 * Use is subject to license terms.
25 - * Copyright (c) 2016, Joyent, Inc. All rights reserved.
25 + * Copyright (c) 2017, Joyent, Inc. All rights reserved.
26 26 */
27 27
28 28 #include <sys/param.h>
29 29 #include <sys/sysmacros.h>
30 30 #include <sys/vm.h>
31 31 #include <sys/proc.h>
32 32 #include <sys/tuneable.h>
33 33 #include <sys/systm.h>
34 34 #include <sys/cmn_err.h>
35 35 #include <sys/debug.h>
36 36 #include <sys/sdt.h>
37 37 #include <sys/mutex.h>
38 38 #include <sys/bitmap.h>
39 39 #include <sys/atomic.h>
40 +#include <sys/sunddi.h>
40 41 #include <sys/kobj.h>
41 42 #include <sys/disp.h>
42 43 #include <vm/seg_kmem.h>
43 44 #include <sys/zone.h>
44 45 #include <sys/netstack.h>
45 46
46 47 /*
47 48 * What we use so that the zones framework can tell us about new zones,
48 49 * which we use to create new stacks.
49 50 */
50 51 static zone_key_t netstack_zone_key;
51 52
52 53 static int netstack_initialized = 0;
53 54
54 55 /*
55 56 * Track the registered netstacks.
56 57 * The global lock protects
57 58 * - ns_reg
58 59 * - the list starting at netstack_head and following the netstack_next
59 60 * pointers.
60 61 */
61 62 static kmutex_t netstack_g_lock;
62 63
63 64 /*
64 65 * Registry of netstacks with their create/shutdown/destory functions.
65 66 */
66 67 static struct netstack_registry ns_reg[NS_MAX];
67 68
68 69 /*
69 70 * Global list of existing stacks. We use this when a new zone with
70 71 * an exclusive IP instance is created.
71 72 *
72 73 * Note that in some cases a netstack_t needs to stay around after the zone
73 74 * has gone away. This is because there might be outstanding references
74 75 * (from TCP TIME_WAIT connections, IPsec state, etc). The netstack_t data
75 76 * structure and all the foo_stack_t's hanging off of it will be cleaned up
76 77 * when the last reference to it is dropped.
77 78 * However, the same zone might be rebooted. That is handled using the
78 79 * assumption that the zones framework picks a new zoneid each time a zone
79 80 * is (re)booted. We assert for that condition in netstack_zone_create().
80 81 * Thus the old netstack_t can take its time for things to time out.
81 82 */
82 83 static netstack_t *netstack_head;
83 84
84 85 /*
85 86 * To support kstat_create_netstack() using kstat_zone_add we need
86 87 * to track both
87 88 * - all zoneids that use the global/shared stack
88 89 * - all kstats that have been added for the shared stack
89 90 */
90 91 struct shared_zone_list {
91 92 struct shared_zone_list *sz_next;
92 93 zoneid_t sz_zoneid;
93 94 };
94 95
95 96 struct shared_kstat_list {
96 97 struct shared_kstat_list *sk_next;
97 98 kstat_t *sk_kstat;
98 99 };
99 100
100 101 static kmutex_t netstack_shared_lock; /* protects the following two */
101 102 static struct shared_zone_list *netstack_shared_zones;
102 103 static struct shared_kstat_list *netstack_shared_kstats;
103 104
104 105 static void *netstack_zone_create(zoneid_t zoneid);
105 106 static void netstack_zone_shutdown(zoneid_t zoneid, void *arg);
106 107 static void netstack_zone_destroy(zoneid_t zoneid, void *arg);
107 108
108 109 static void netstack_shared_zone_add(zoneid_t zoneid);
109 110 static void netstack_shared_zone_remove(zoneid_t zoneid);
110 111 static void netstack_shared_kstat_add(kstat_t *ks);
111 112 static void netstack_shared_kstat_remove(kstat_t *ks);
112 113
113 114 typedef boolean_t applyfn_t(kmutex_t *, netstack_t *, int);
114 115
|
↓ open down ↓ |
65 lines elided |
↑ open up ↑ |
115 116 static void apply_all_netstacks(int, applyfn_t *);
116 117 static void apply_all_modules(netstack_t *, applyfn_t *);
117 118 static void apply_all_modules_reverse(netstack_t *, applyfn_t *);
118 119 static boolean_t netstack_apply_create(kmutex_t *, netstack_t *, int);
119 120 static boolean_t netstack_apply_shutdown(kmutex_t *, netstack_t *, int);
120 121 static boolean_t netstack_apply_destroy(kmutex_t *, netstack_t *, int);
121 122 static boolean_t wait_for_zone_creator(netstack_t *, kmutex_t *);
122 123 static boolean_t wait_for_nms_inprogress(netstack_t *, nm_state_t *,
123 124 kmutex_t *);
124 125
126 +static ksema_t netstack_reap_limiter;
127 +/*
128 + * Hard-coded constant, but since this is not tunable in real-time, it seems
129 + * making it an /etc/system tunable is better than nothing.
130 + */
131 +uint_t netstack_outstanding_reaps = 1024;
132 +
125 133 void
126 134 netstack_init(void)
127 135 {
128 136 mutex_init(&netstack_g_lock, NULL, MUTEX_DEFAULT, NULL);
129 137 mutex_init(&netstack_shared_lock, NULL, MUTEX_DEFAULT, NULL);
130 138
139 + sema_init(&netstack_reap_limiter, netstack_outstanding_reaps, NULL,
140 + SEMA_DRIVER, NULL);
141 +
131 142 netstack_initialized = 1;
132 143
133 144 /*
134 145 * We want to be informed each time a zone is created or
135 146 * destroyed in the kernel, so we can maintain the
136 147 * stack instance information.
137 148 */
138 149 zone_key_create(&netstack_zone_key, netstack_zone_create,
139 150 netstack_zone_shutdown, netstack_zone_destroy);
140 151 }
141 152
142 153 /*
143 154 * Register a new module with the framework.
144 155 * This registers interest in changes to the set of netstacks.
145 156 * The createfn and destroyfn are required, but the shutdownfn can be
146 157 * NULL.
147 158 * Note that due to the current zsd implementation, when the create
148 159 * function is called the zone isn't fully present, thus functions
149 160 * like zone_find_by_* will fail, hence the create function can not
150 161 * use many zones kernel functions including zcmn_err().
151 162 */
152 163 void
153 164 netstack_register(int moduleid,
154 165 void *(*module_create)(netstackid_t, netstack_t *),
155 166 void (*module_shutdown)(netstackid_t, void *),
156 167 void (*module_destroy)(netstackid_t, void *))
157 168 {
158 169 netstack_t *ns;
159 170
160 171 ASSERT(netstack_initialized);
161 172 ASSERT(moduleid >= 0 && moduleid < NS_MAX);
162 173 ASSERT(module_create != NULL);
163 174
164 175 /*
165 176 * Make instances created after this point in time run the create
166 177 * callback.
167 178 */
168 179 mutex_enter(&netstack_g_lock);
169 180 ASSERT(ns_reg[moduleid].nr_create == NULL);
170 181 ASSERT(ns_reg[moduleid].nr_flags == 0);
171 182 ns_reg[moduleid].nr_create = module_create;
172 183 ns_reg[moduleid].nr_shutdown = module_shutdown;
173 184 ns_reg[moduleid].nr_destroy = module_destroy;
174 185 ns_reg[moduleid].nr_flags = NRF_REGISTERED;
175 186
176 187 /*
177 188 * Determine the set of stacks that exist before we drop the lock.
178 189 * Set NSS_CREATE_NEEDED for each of those.
179 190 * netstacks which have been deleted will have NSS_CREATE_COMPLETED
180 191 * set, but check NSF_CLOSING to be sure.
181 192 */
182 193 for (ns = netstack_head; ns != NULL; ns = ns->netstack_next) {
183 194 nm_state_t *nms = &ns->netstack_m_state[moduleid];
184 195
185 196 mutex_enter(&ns->netstack_lock);
186 197 if (!(ns->netstack_flags & NSF_CLOSING) &&
187 198 (nms->nms_flags & NSS_CREATE_ALL) == 0) {
188 199 nms->nms_flags |= NSS_CREATE_NEEDED;
189 200 DTRACE_PROBE2(netstack__create__needed,
190 201 netstack_t *, ns, int, moduleid);
191 202 }
192 203 mutex_exit(&ns->netstack_lock);
193 204 }
194 205 mutex_exit(&netstack_g_lock);
195 206
196 207 /*
197 208 * At this point in time a new instance can be created or an instance
198 209 * can be destroyed, or some other module can register or unregister.
199 210 * Make sure we either run all the create functions for this moduleid
200 211 * or we wait for any other creators for this moduleid.
201 212 */
202 213 apply_all_netstacks(moduleid, netstack_apply_create);
203 214 }
204 215
205 216 void
206 217 netstack_unregister(int moduleid)
207 218 {
208 219 netstack_t *ns;
209 220
210 221 ASSERT(moduleid >= 0 && moduleid < NS_MAX);
211 222
212 223 ASSERT(ns_reg[moduleid].nr_create != NULL);
213 224 ASSERT(ns_reg[moduleid].nr_flags & NRF_REGISTERED);
214 225
215 226 mutex_enter(&netstack_g_lock);
216 227 /*
217 228 * Determine the set of stacks that exist before we drop the lock.
218 229 * Set NSS_SHUTDOWN_NEEDED and NSS_DESTROY_NEEDED for each of those.
219 230 * That ensures that when we return all the callbacks for existing
220 231 * instances have completed. And since we set NRF_DYING no new
221 232 * instances can use this module.
222 233 */
223 234 for (ns = netstack_head; ns != NULL; ns = ns->netstack_next) {
224 235 boolean_t created = B_FALSE;
225 236 nm_state_t *nms = &ns->netstack_m_state[moduleid];
226 237
227 238 mutex_enter(&ns->netstack_lock);
228 239
229 240 /*
230 241 * We need to be careful here. We could actually have a netstack
231 242 * being created as we speak waiting for us to let go of this
232 243 * lock to proceed. It may have set NSS_CREATE_NEEDED, but not
233 244 * have gotten to the point of completing it yet. If
234 245 * NSS_CREATE_NEEDED, we can safely just remove it here and
235 246 * never create the module. However, if NSS_CREATE_INPROGRESS is
236 247 * set, we need to still flag this module for shutdown and
237 248 * deletion, just as though it had reached NSS_CREATE_COMPLETED.
238 249 *
239 250 * It is safe to do that because of two different guarantees
240 251 * that exist in the system. The first is that before we do a
241 252 * create, shutdown, or destroy, we ensure that nothing else is
242 253 * in progress in the system for this netstack and wait for it
243 254 * to complete. Secondly, because the zone is being created, we
244 255 * know that the following call to apply_all_netstack will block
245 256 * on the zone finishing its initialization.
246 257 */
247 258 if (nms->nms_flags & NSS_CREATE_NEEDED)
248 259 nms->nms_flags &= ~NSS_CREATE_NEEDED;
249 260
250 261 if (nms->nms_flags & NSS_CREATE_INPROGRESS ||
251 262 nms->nms_flags & NSS_CREATE_COMPLETED)
252 263 created = B_TRUE;
253 264
254 265 if (ns_reg[moduleid].nr_shutdown != NULL && created &&
255 266 (nms->nms_flags & NSS_CREATE_COMPLETED) &&
256 267 (nms->nms_flags & NSS_SHUTDOWN_ALL) == 0) {
257 268 nms->nms_flags |= NSS_SHUTDOWN_NEEDED;
258 269 DTRACE_PROBE2(netstack__shutdown__needed,
259 270 netstack_t *, ns, int, moduleid);
260 271 }
261 272 if ((ns_reg[moduleid].nr_flags & NRF_REGISTERED) &&
262 273 ns_reg[moduleid].nr_destroy != NULL && created &&
263 274 (nms->nms_flags & NSS_DESTROY_ALL) == 0) {
264 275 nms->nms_flags |= NSS_DESTROY_NEEDED;
265 276 DTRACE_PROBE2(netstack__destroy__needed,
266 277 netstack_t *, ns, int, moduleid);
267 278 }
268 279 mutex_exit(&ns->netstack_lock);
269 280 }
270 281 /*
271 282 * Prevent any new netstack from calling the registered create
272 283 * function, while keeping the function pointers in place until the
273 284 * shutdown and destroy callbacks are complete.
274 285 */
275 286 ns_reg[moduleid].nr_flags |= NRF_DYING;
276 287 mutex_exit(&netstack_g_lock);
277 288
278 289 apply_all_netstacks(moduleid, netstack_apply_shutdown);
279 290 apply_all_netstacks(moduleid, netstack_apply_destroy);
280 291
281 292 /*
282 293 * Clear the nms_flags so that we can handle this module
283 294 * being loaded again.
284 295 * Also remove the registered functions.
285 296 */
286 297 mutex_enter(&netstack_g_lock);
287 298 ASSERT(ns_reg[moduleid].nr_flags & NRF_REGISTERED);
288 299 ASSERT(ns_reg[moduleid].nr_flags & NRF_DYING);
289 300 for (ns = netstack_head; ns != NULL; ns = ns->netstack_next) {
290 301 nm_state_t *nms = &ns->netstack_m_state[moduleid];
291 302
292 303 mutex_enter(&ns->netstack_lock);
293 304 if (nms->nms_flags & NSS_DESTROY_COMPLETED) {
294 305 nms->nms_flags = 0;
295 306 DTRACE_PROBE2(netstack__destroy__done,
296 307 netstack_t *, ns, int, moduleid);
297 308 }
298 309 mutex_exit(&ns->netstack_lock);
299 310 }
300 311
301 312 ns_reg[moduleid].nr_create = NULL;
302 313 ns_reg[moduleid].nr_shutdown = NULL;
303 314 ns_reg[moduleid].nr_destroy = NULL;
304 315 ns_reg[moduleid].nr_flags = 0;
305 316 mutex_exit(&netstack_g_lock);
306 317 }
307 318
308 319 /*
309 320 * Lookup and/or allocate a netstack for this zone.
310 321 */
311 322 static void *
312 323 netstack_zone_create(zoneid_t zoneid)
313 324 {
314 325 netstackid_t stackid;
315 326 netstack_t *ns;
316 327 netstack_t **nsp;
317 328 zone_t *zone;
318 329 int i;
319 330
320 331 ASSERT(netstack_initialized);
321 332
322 333 zone = zone_find_by_id_nolock(zoneid);
323 334 ASSERT(zone != NULL);
324 335
325 336 if (zone->zone_flags & ZF_NET_EXCL) {
326 337 stackid = zoneid;
327 338 } else {
328 339 /* Look for the stack instance for the global */
329 340 stackid = GLOBAL_NETSTACKID;
330 341 }
331 342
332 343 /* Allocate even if it isn't needed; simplifies locking */
333 344 ns = (netstack_t *)kmem_zalloc(sizeof (netstack_t), KM_SLEEP);
334 345
335 346 /* Look if there is a matching stack instance */
336 347 mutex_enter(&netstack_g_lock);
337 348 for (nsp = &netstack_head; *nsp != NULL;
338 349 nsp = &((*nsp)->netstack_next)) {
339 350 if ((*nsp)->netstack_stackid == stackid) {
340 351 /*
341 352 * Should never find a pre-existing exclusive stack
342 353 */
343 354 VERIFY(stackid == GLOBAL_NETSTACKID);
344 355 kmem_free(ns, sizeof (netstack_t));
345 356 ns = *nsp;
346 357 mutex_enter(&ns->netstack_lock);
347 358 ns->netstack_numzones++;
348 359 mutex_exit(&ns->netstack_lock);
349 360 mutex_exit(&netstack_g_lock);
350 361 DTRACE_PROBE1(netstack__inc__numzones,
351 362 netstack_t *, ns);
352 363 /* Record that we have a new shared stack zone */
353 364 netstack_shared_zone_add(zoneid);
354 365 zone->zone_netstack = ns;
355 366 return (ns);
356 367 }
357 368 }
358 369 /* Not found */
359 370 mutex_init(&ns->netstack_lock, NULL, MUTEX_DEFAULT, NULL);
360 371 cv_init(&ns->netstack_cv, NULL, CV_DEFAULT, NULL);
361 372 ns->netstack_stackid = zoneid;
362 373 ns->netstack_numzones = 1;
363 374 ns->netstack_refcnt = 1; /* Decremented by netstack_zone_destroy */
364 375 ns->netstack_flags = NSF_UNINIT;
365 376 *nsp = ns;
366 377 zone->zone_netstack = ns;
367 378
368 379 mutex_enter(&ns->netstack_lock);
369 380 /*
370 381 * Mark this netstack as having a CREATE running so
371 382 * any netstack_register/netstack_unregister waits for
372 383 * the existing create callbacks to complete in moduleid order
373 384 */
374 385 ns->netstack_flags |= NSF_ZONE_CREATE;
375 386
376 387 /*
377 388 * Determine the set of module create functions that need to be
378 389 * called before we drop the lock.
379 390 * Set NSS_CREATE_NEEDED for each of those.
380 391 * Skip any with NRF_DYING set, since those are in the process of
381 392 * going away, by checking for flags being exactly NRF_REGISTERED.
382 393 */
383 394 for (i = 0; i < NS_MAX; i++) {
384 395 nm_state_t *nms = &ns->netstack_m_state[i];
385 396
386 397 cv_init(&nms->nms_cv, NULL, CV_DEFAULT, NULL);
387 398
388 399 if ((ns_reg[i].nr_flags == NRF_REGISTERED) &&
389 400 (nms->nms_flags & NSS_CREATE_ALL) == 0) {
390 401 nms->nms_flags |= NSS_CREATE_NEEDED;
391 402 DTRACE_PROBE2(netstack__create__needed,
392 403 netstack_t *, ns, int, i);
393 404 }
394 405 }
395 406 mutex_exit(&ns->netstack_lock);
396 407 mutex_exit(&netstack_g_lock);
397 408
398 409 apply_all_modules(ns, netstack_apply_create);
399 410
400 411 /* Tell any waiting netstack_register/netstack_unregister to proceed */
401 412 mutex_enter(&ns->netstack_lock);
402 413 ns->netstack_flags &= ~NSF_UNINIT;
403 414 ASSERT(ns->netstack_flags & NSF_ZONE_CREATE);
404 415 ns->netstack_flags &= ~NSF_ZONE_CREATE;
405 416 cv_broadcast(&ns->netstack_cv);
406 417 mutex_exit(&ns->netstack_lock);
407 418
408 419 return (ns);
409 420 }
410 421
411 422 /* ARGSUSED */
412 423 static void
413 424 netstack_zone_shutdown(zoneid_t zoneid, void *arg)
414 425 {
415 426 netstack_t *ns = (netstack_t *)arg;
416 427 int i;
417 428
418 429 ASSERT(arg != NULL);
419 430
420 431 mutex_enter(&ns->netstack_lock);
421 432 ASSERT(ns->netstack_numzones > 0);
422 433 if (ns->netstack_numzones != 1) {
423 434 /* Stack instance being used by other zone */
424 435 mutex_exit(&ns->netstack_lock);
425 436 ASSERT(ns->netstack_stackid == GLOBAL_NETSTACKID);
426 437 return;
427 438 }
428 439 mutex_exit(&ns->netstack_lock);
429 440
430 441 mutex_enter(&netstack_g_lock);
431 442 mutex_enter(&ns->netstack_lock);
432 443 /*
433 444 * Mark this netstack as having a SHUTDOWN running so
434 445 * any netstack_register/netstack_unregister waits for
435 446 * the existing create callbacks to complete in moduleid order
436 447 */
437 448 ASSERT(!(ns->netstack_flags & NSF_ZONE_INPROGRESS));
438 449 ns->netstack_flags |= NSF_ZONE_SHUTDOWN;
439 450
440 451 /*
441 452 * Determine the set of stacks that exist before we drop the lock.
442 453 * Set NSS_SHUTDOWN_NEEDED for each of those.
443 454 */
444 455 for (i = 0; i < NS_MAX; i++) {
445 456 nm_state_t *nms = &ns->netstack_m_state[i];
446 457
447 458 if ((ns_reg[i].nr_flags & NRF_REGISTERED) &&
448 459 ns_reg[i].nr_shutdown != NULL &&
449 460 (nms->nms_flags & NSS_CREATE_COMPLETED) &&
450 461 (nms->nms_flags & NSS_SHUTDOWN_ALL) == 0) {
451 462 nms->nms_flags |= NSS_SHUTDOWN_NEEDED;
452 463 DTRACE_PROBE2(netstack__shutdown__needed,
453 464 netstack_t *, ns, int, i);
454 465 }
455 466 }
456 467 mutex_exit(&ns->netstack_lock);
457 468 mutex_exit(&netstack_g_lock);
458 469
459 470 /*
460 471 * Call the shutdown function for all registered modules for this
461 472 * netstack.
462 473 */
463 474 apply_all_modules_reverse(ns, netstack_apply_shutdown);
464 475
465 476 /* Tell any waiting netstack_register/netstack_unregister to proceed */
466 477 mutex_enter(&ns->netstack_lock);
467 478 ASSERT(ns->netstack_flags & NSF_ZONE_SHUTDOWN);
468 479 ns->netstack_flags &= ~NSF_ZONE_SHUTDOWN;
469 480 cv_broadcast(&ns->netstack_cv);
470 481 mutex_exit(&ns->netstack_lock);
471 482 }
472 483
473 484 /*
474 485 * Common routine to release a zone.
475 486 * If this was the last zone using the stack instance then prepare to
476 487 * have the refcnt dropping to zero free the zone.
477 488 */
478 489 /* ARGSUSED */
479 490 static void
480 491 netstack_zone_destroy(zoneid_t zoneid, void *arg)
481 492 {
482 493 netstack_t *ns = (netstack_t *)arg;
483 494
484 495 ASSERT(arg != NULL);
485 496
486 497 mutex_enter(&ns->netstack_lock);
487 498 ASSERT(ns->netstack_numzones > 0);
488 499 ns->netstack_numzones--;
489 500 if (ns->netstack_numzones != 0) {
490 501 /* Stack instance being used by other zone */
491 502 mutex_exit(&ns->netstack_lock);
492 503 ASSERT(ns->netstack_stackid == GLOBAL_NETSTACKID);
493 504 /* Record that we a shared stack zone has gone away */
494 505 netstack_shared_zone_remove(zoneid);
495 506 return;
496 507 }
497 508 /*
498 509 * Set CLOSING so that netstack_find_by will not find it.
499 510 */
500 511 ns->netstack_flags |= NSF_CLOSING;
501 512 mutex_exit(&ns->netstack_lock);
502 513 DTRACE_PROBE1(netstack__dec__numzones, netstack_t *, ns);
503 514 /* No other thread can call zone_destroy for this stack */
504 515
505 516 /*
506 517 * Decrease refcnt to account for the one in netstack_zone_init()
507 518 */
508 519 netstack_rele(ns);
509 520 }
510 521
511 522 /*
512 523 * Called when the reference count drops to zero.
513 524 * Call the destroy functions for each registered module.
514 525 */
515 526 static void
516 527 netstack_stack_inactive(netstack_t *ns)
517 528 {
518 529 int i;
519 530
520 531 mutex_enter(&netstack_g_lock);
521 532 mutex_enter(&ns->netstack_lock);
522 533 /*
523 534 * Mark this netstack as having a DESTROY running so
524 535 * any netstack_register/netstack_unregister waits for
525 536 * the existing destroy callbacks to complete in reverse moduleid order
526 537 */
527 538 ASSERT(!(ns->netstack_flags & NSF_ZONE_INPROGRESS));
528 539 ns->netstack_flags |= NSF_ZONE_DESTROY;
529 540 /*
530 541 * If the shutdown callback wasn't called earlier (e.g., if this is
531 542 * a netstack shared between multiple zones), then we schedule it now.
532 543 *
533 544 * Determine the set of stacks that exist before we drop the lock.
534 545 * Set NSS_DESTROY_NEEDED for each of those. That
535 546 * ensures that when we return all the callbacks for existing
536 547 * instances have completed.
537 548 */
538 549 for (i = 0; i < NS_MAX; i++) {
539 550 nm_state_t *nms = &ns->netstack_m_state[i];
540 551
541 552 if ((ns_reg[i].nr_flags & NRF_REGISTERED) &&
542 553 ns_reg[i].nr_shutdown != NULL &&
543 554 (nms->nms_flags & NSS_CREATE_COMPLETED) &&
544 555 (nms->nms_flags & NSS_SHUTDOWN_ALL) == 0) {
545 556 nms->nms_flags |= NSS_SHUTDOWN_NEEDED;
546 557 DTRACE_PROBE2(netstack__shutdown__needed,
547 558 netstack_t *, ns, int, i);
548 559 }
549 560
550 561 if ((ns_reg[i].nr_flags & NRF_REGISTERED) &&
551 562 ns_reg[i].nr_destroy != NULL &&
552 563 (nms->nms_flags & NSS_CREATE_COMPLETED) &&
553 564 (nms->nms_flags & NSS_DESTROY_ALL) == 0) {
554 565 nms->nms_flags |= NSS_DESTROY_NEEDED;
555 566 DTRACE_PROBE2(netstack__destroy__needed,
556 567 netstack_t *, ns, int, i);
557 568 }
558 569 }
559 570 mutex_exit(&ns->netstack_lock);
560 571 mutex_exit(&netstack_g_lock);
561 572
562 573 /*
563 574 * Call the shutdown and destroy functions for all registered modules
564 575 * for this netstack.
565 576 *
566 577 * Since there are some ordering dependencies between the modules we
567 578 * tear them down in the reverse order of what was used to create them.
568 579 *
569 580 * Since a netstack_t is never reused (when a zone is rebooted it gets
570 581 * a new zoneid == netstackid i.e. a new netstack_t is allocated) we
571 582 * leave nms_flags the way it is i.e. with NSS_DESTROY_COMPLETED set.
572 583 * That is different than in the netstack_unregister() case.
573 584 */
574 585 apply_all_modules_reverse(ns, netstack_apply_shutdown);
575 586 apply_all_modules_reverse(ns, netstack_apply_destroy);
576 587
577 588 /* Tell any waiting netstack_register/netstack_unregister to proceed */
578 589 mutex_enter(&ns->netstack_lock);
579 590 ASSERT(ns->netstack_flags & NSF_ZONE_DESTROY);
580 591 ns->netstack_flags &= ~NSF_ZONE_DESTROY;
581 592 cv_broadcast(&ns->netstack_cv);
582 593 mutex_exit(&ns->netstack_lock);
583 594 }
584 595
585 596 /*
586 597 * Apply a function to all netstacks for a particular moduleid.
587 598 *
588 599 * If there is any zone activity (due to a zone being created, shutdown,
589 600 * or destroyed) we wait for that to complete before we proceed. This ensures
590 601 * that the moduleids are processed in order when a zone is created or
591 602 * destroyed.
592 603 *
593 604 * The applyfn has to drop netstack_g_lock if it does some work.
594 605 * In that case we don't follow netstack_next,
595 606 * even if it is possible to do so without any hazards. This is
596 607 * because we want the design to allow for the list of netstacks threaded
597 608 * by netstack_next to change in any arbitrary way during the time the
598 609 * lock was dropped.
599 610 *
600 611 * It is safe to restart the loop at netstack_head since the applyfn
601 612 * changes netstack_m_state as it processes things, so a subsequent
602 613 * pass through will have no effect in applyfn, hence the loop will terminate
603 614 * in at worst O(N^2).
604 615 */
605 616 static void
606 617 apply_all_netstacks(int moduleid, applyfn_t *applyfn)
607 618 {
608 619 netstack_t *ns;
609 620
610 621 mutex_enter(&netstack_g_lock);
611 622 ns = netstack_head;
612 623 while (ns != NULL) {
613 624 if (wait_for_zone_creator(ns, &netstack_g_lock)) {
614 625 /* Lock dropped - restart at head */
615 626 ns = netstack_head;
616 627 } else if ((applyfn)(&netstack_g_lock, ns, moduleid)) {
617 628 /* Lock dropped - restart at head */
618 629 ns = netstack_head;
619 630 } else {
620 631 ns = ns->netstack_next;
621 632 }
622 633 }
623 634 mutex_exit(&netstack_g_lock);
624 635 }
625 636
626 637 /*
627 638 * Apply a function to all moduleids for a particular netstack.
628 639 *
629 640 * Since the netstack linkage doesn't matter in this case we can
630 641 * ignore whether the function drops the lock.
631 642 */
632 643 static void
633 644 apply_all_modules(netstack_t *ns, applyfn_t *applyfn)
634 645 {
635 646 int i;
636 647
637 648 mutex_enter(&netstack_g_lock);
638 649 for (i = 0; i < NS_MAX; i++) {
639 650 /*
640 651 * We don't care whether the lock was dropped
641 652 * since we are not iterating over netstack_head.
642 653 */
643 654 (void) (applyfn)(&netstack_g_lock, ns, i);
644 655 }
645 656 mutex_exit(&netstack_g_lock);
646 657 }
647 658
648 659 /* Like the above but in reverse moduleid order */
649 660 static void
650 661 apply_all_modules_reverse(netstack_t *ns, applyfn_t *applyfn)
651 662 {
652 663 int i;
653 664
654 665 mutex_enter(&netstack_g_lock);
655 666 for (i = NS_MAX-1; i >= 0; i--) {
656 667 /*
657 668 * We don't care whether the lock was dropped
658 669 * since we are not iterating over netstack_head.
659 670 */
660 671 (void) (applyfn)(&netstack_g_lock, ns, i);
661 672 }
662 673 mutex_exit(&netstack_g_lock);
663 674 }
664 675
665 676 /*
666 677 * Call the create function for the ns and moduleid if CREATE_NEEDED
667 678 * is set.
668 679 * If some other thread gets here first and sets *_INPROGRESS, then
669 680 * we wait for that thread to complete so that we can ensure that
670 681 * all the callbacks are done when we've looped over all netstacks/moduleids.
671 682 *
672 683 * When we call the create function, we temporarily drop the netstack_lock
673 684 * held by the caller, and return true to tell the caller it needs to
674 685 * re-evalute the state.
675 686 */
676 687 static boolean_t
677 688 netstack_apply_create(kmutex_t *lockp, netstack_t *ns, int moduleid)
678 689 {
679 690 void *result;
680 691 netstackid_t stackid;
681 692 nm_state_t *nms = &ns->netstack_m_state[moduleid];
682 693 boolean_t dropped = B_FALSE;
683 694
684 695 ASSERT(MUTEX_HELD(lockp));
685 696 mutex_enter(&ns->netstack_lock);
686 697
687 698 if (wait_for_nms_inprogress(ns, nms, lockp))
688 699 dropped = B_TRUE;
689 700
690 701 if (nms->nms_flags & NSS_CREATE_NEEDED) {
691 702 nms->nms_flags &= ~NSS_CREATE_NEEDED;
692 703 nms->nms_flags |= NSS_CREATE_INPROGRESS;
693 704 DTRACE_PROBE2(netstack__create__inprogress,
694 705 netstack_t *, ns, int, moduleid);
695 706 mutex_exit(&ns->netstack_lock);
696 707 mutex_exit(lockp);
697 708 dropped = B_TRUE;
698 709
699 710 ASSERT(ns_reg[moduleid].nr_create != NULL);
700 711 stackid = ns->netstack_stackid;
701 712 DTRACE_PROBE2(netstack__create__start,
702 713 netstackid_t, stackid,
703 714 netstack_t *, ns);
704 715 result = (ns_reg[moduleid].nr_create)(stackid, ns);
705 716 DTRACE_PROBE2(netstack__create__end,
706 717 void *, result, netstack_t *, ns);
707 718
708 719 ASSERT(result != NULL);
709 720 mutex_enter(lockp);
710 721 mutex_enter(&ns->netstack_lock);
711 722 ns->netstack_modules[moduleid] = result;
712 723 nms->nms_flags &= ~NSS_CREATE_INPROGRESS;
713 724 nms->nms_flags |= NSS_CREATE_COMPLETED;
714 725 cv_broadcast(&nms->nms_cv);
715 726 DTRACE_PROBE2(netstack__create__completed,
716 727 netstack_t *, ns, int, moduleid);
717 728 mutex_exit(&ns->netstack_lock);
718 729 return (dropped);
719 730 } else {
720 731 mutex_exit(&ns->netstack_lock);
721 732 return (dropped);
722 733 }
723 734 }
724 735
725 736 /*
726 737 * Call the shutdown function for the ns and moduleid if SHUTDOWN_NEEDED
727 738 * is set.
728 739 * If some other thread gets here first and sets *_INPROGRESS, then
729 740 * we wait for that thread to complete so that we can ensure that
730 741 * all the callbacks are done when we've looped over all netstacks/moduleids.
731 742 *
732 743 * When we call the shutdown function, we temporarily drop the netstack_lock
733 744 * held by the caller, and return true to tell the caller it needs to
734 745 * re-evalute the state.
735 746 */
736 747 static boolean_t
737 748 netstack_apply_shutdown(kmutex_t *lockp, netstack_t *ns, int moduleid)
738 749 {
739 750 netstackid_t stackid;
740 751 void * netstack_module;
741 752 nm_state_t *nms = &ns->netstack_m_state[moduleid];
742 753 boolean_t dropped = B_FALSE;
743 754
744 755 ASSERT(MUTEX_HELD(lockp));
745 756 mutex_enter(&ns->netstack_lock);
746 757
747 758 if (wait_for_nms_inprogress(ns, nms, lockp))
748 759 dropped = B_TRUE;
749 760
750 761 if (nms->nms_flags & NSS_SHUTDOWN_NEEDED) {
751 762 nms->nms_flags &= ~NSS_SHUTDOWN_NEEDED;
752 763 nms->nms_flags |= NSS_SHUTDOWN_INPROGRESS;
753 764 DTRACE_PROBE2(netstack__shutdown__inprogress,
754 765 netstack_t *, ns, int, moduleid);
755 766 mutex_exit(&ns->netstack_lock);
756 767 mutex_exit(lockp);
757 768 dropped = B_TRUE;
758 769
759 770 ASSERT(ns_reg[moduleid].nr_shutdown != NULL);
760 771 stackid = ns->netstack_stackid;
761 772 netstack_module = ns->netstack_modules[moduleid];
762 773 DTRACE_PROBE2(netstack__shutdown__start,
763 774 netstackid_t, stackid,
764 775 void *, netstack_module);
765 776 (ns_reg[moduleid].nr_shutdown)(stackid, netstack_module);
766 777 DTRACE_PROBE1(netstack__shutdown__end,
767 778 netstack_t *, ns);
768 779
769 780 mutex_enter(lockp);
770 781 mutex_enter(&ns->netstack_lock);
771 782 nms->nms_flags &= ~NSS_SHUTDOWN_INPROGRESS;
772 783 nms->nms_flags |= NSS_SHUTDOWN_COMPLETED;
773 784 cv_broadcast(&nms->nms_cv);
774 785 DTRACE_PROBE2(netstack__shutdown__completed,
775 786 netstack_t *, ns, int, moduleid);
776 787 mutex_exit(&ns->netstack_lock);
777 788 return (dropped);
778 789 } else {
779 790 mutex_exit(&ns->netstack_lock);
780 791 return (dropped);
781 792 }
782 793 }
783 794
784 795 /*
785 796 * Call the destroy function for the ns and moduleid if DESTROY_NEEDED
786 797 * is set.
787 798 * If some other thread gets here first and sets *_INPROGRESS, then
788 799 * we wait for that thread to complete so that we can ensure that
789 800 * all the callbacks are done when we've looped over all netstacks/moduleids.
790 801 *
791 802 * When we call the destroy function, we temporarily drop the netstack_lock
792 803 * held by the caller, and return true to tell the caller it needs to
793 804 * re-evalute the state.
794 805 */
795 806 static boolean_t
796 807 netstack_apply_destroy(kmutex_t *lockp, netstack_t *ns, int moduleid)
797 808 {
798 809 netstackid_t stackid;
799 810 void * netstack_module;
800 811 nm_state_t *nms = &ns->netstack_m_state[moduleid];
801 812 boolean_t dropped = B_FALSE;
802 813
803 814 ASSERT(MUTEX_HELD(lockp));
804 815 mutex_enter(&ns->netstack_lock);
805 816
806 817 if (wait_for_nms_inprogress(ns, nms, lockp))
807 818 dropped = B_TRUE;
808 819
809 820 if (nms->nms_flags & NSS_DESTROY_NEEDED) {
810 821 nms->nms_flags &= ~NSS_DESTROY_NEEDED;
811 822 nms->nms_flags |= NSS_DESTROY_INPROGRESS;
812 823 DTRACE_PROBE2(netstack__destroy__inprogress,
813 824 netstack_t *, ns, int, moduleid);
814 825 mutex_exit(&ns->netstack_lock);
815 826 mutex_exit(lockp);
816 827 dropped = B_TRUE;
817 828
818 829 ASSERT(ns_reg[moduleid].nr_destroy != NULL);
819 830 stackid = ns->netstack_stackid;
820 831 netstack_module = ns->netstack_modules[moduleid];
821 832 DTRACE_PROBE2(netstack__destroy__start,
822 833 netstackid_t, stackid,
823 834 void *, netstack_module);
824 835 (ns_reg[moduleid].nr_destroy)(stackid, netstack_module);
825 836 DTRACE_PROBE1(netstack__destroy__end,
826 837 netstack_t *, ns);
827 838
828 839 mutex_enter(lockp);
829 840 mutex_enter(&ns->netstack_lock);
830 841 ns->netstack_modules[moduleid] = NULL;
831 842 nms->nms_flags &= ~NSS_DESTROY_INPROGRESS;
832 843 nms->nms_flags |= NSS_DESTROY_COMPLETED;
833 844 cv_broadcast(&nms->nms_cv);
834 845 DTRACE_PROBE2(netstack__destroy__completed,
835 846 netstack_t *, ns, int, moduleid);
836 847 mutex_exit(&ns->netstack_lock);
837 848 return (dropped);
838 849 } else {
839 850 mutex_exit(&ns->netstack_lock);
840 851 return (dropped);
841 852 }
842 853 }
843 854
844 855 /*
845 856 * If somebody is creating the netstack (due to a new zone being created)
846 857 * then we wait for them to complete. This ensures that any additional
847 858 * netstack_register() doesn't cause the create functions to run out of
848 859 * order.
849 860 * Note that we do not need such a global wait in the case of the shutdown
850 861 * and destroy callbacks, since in that case it is sufficient for both
851 862 * threads to set NEEDED and wait for INPROGRESS to ensure ordering.
852 863 * Returns true if lockp was temporarily dropped while waiting.
853 864 */
854 865 static boolean_t
855 866 wait_for_zone_creator(netstack_t *ns, kmutex_t *lockp)
856 867 {
857 868 boolean_t dropped = B_FALSE;
858 869
859 870 mutex_enter(&ns->netstack_lock);
860 871 while (ns->netstack_flags & NSF_ZONE_CREATE) {
861 872 DTRACE_PROBE1(netstack__wait__zone__inprogress,
862 873 netstack_t *, ns);
863 874 if (lockp != NULL) {
864 875 dropped = B_TRUE;
865 876 mutex_exit(lockp);
866 877 }
867 878 cv_wait(&ns->netstack_cv, &ns->netstack_lock);
868 879 if (lockp != NULL) {
869 880 /* First drop netstack_lock to preserve order */
870 881 mutex_exit(&ns->netstack_lock);
871 882 mutex_enter(lockp);
872 883 mutex_enter(&ns->netstack_lock);
873 884 }
874 885 }
875 886 mutex_exit(&ns->netstack_lock);
876 887 return (dropped);
877 888 }
878 889
879 890 /*
880 891 * Wait for any INPROGRESS flag to be cleared for the netstack/moduleid
881 892 * combination.
882 893 * Returns true if lockp was temporarily dropped while waiting.
883 894 */
884 895 static boolean_t
885 896 wait_for_nms_inprogress(netstack_t *ns, nm_state_t *nms, kmutex_t *lockp)
886 897 {
887 898 boolean_t dropped = B_FALSE;
888 899
889 900 while (nms->nms_flags & NSS_ALL_INPROGRESS) {
890 901 DTRACE_PROBE2(netstack__wait__nms__inprogress,
891 902 netstack_t *, ns, nm_state_t *, nms);
892 903 if (lockp != NULL) {
893 904 dropped = B_TRUE;
894 905 mutex_exit(lockp);
895 906 }
896 907 cv_wait(&nms->nms_cv, &ns->netstack_lock);
897 908 if (lockp != NULL) {
898 909 /* First drop netstack_lock to preserve order */
899 910 mutex_exit(&ns->netstack_lock);
900 911 mutex_enter(lockp);
901 912 mutex_enter(&ns->netstack_lock);
902 913 }
903 914 }
904 915 return (dropped);
905 916 }
906 917
907 918 /*
908 919 * Get the stack instance used in caller's zone.
909 920 * Increases the reference count, caller must do a netstack_rele.
910 921 * It can't be called after zone_destroy() has started.
911 922 */
912 923 netstack_t *
913 924 netstack_get_current(void)
914 925 {
915 926 netstack_t *ns;
916 927
917 928 ns = curproc->p_zone->zone_netstack;
918 929 ASSERT(ns != NULL);
919 930 if (ns->netstack_flags & (NSF_UNINIT|NSF_CLOSING))
920 931 return (NULL);
921 932
922 933 netstack_hold(ns);
923 934
924 935 return (ns);
925 936 }
926 937
927 938 /*
928 939 * Find a stack instance given the cred.
929 940 * This is used by the modules to potentially allow for a future when
930 941 * something other than the zoneid is used to determine the stack.
931 942 */
932 943 netstack_t *
933 944 netstack_find_by_cred(const cred_t *cr)
934 945 {
935 946 zoneid_t zoneid = crgetzoneid(cr);
936 947
937 948 /* Handle the case when cr_zone is NULL */
938 949 if (zoneid == (zoneid_t)-1)
939 950 zoneid = GLOBAL_ZONEID;
940 951
941 952 /* For performance ... */
942 953 if (curproc->p_zone->zone_id == zoneid)
943 954 return (netstack_get_current());
944 955 else
945 956 return (netstack_find_by_zoneid(zoneid));
946 957 }
947 958
948 959 /*
949 960 * Find a stack instance given the zoneid.
950 961 * Increases the reference count if found; caller must do a
951 962 * netstack_rele().
952 963 *
953 964 * If there is no exact match then assume the shared stack instance
954 965 * matches.
955 966 *
956 967 * Skip the unitialized ones.
957 968 */
958 969 netstack_t *
959 970 netstack_find_by_zoneid(zoneid_t zoneid)
960 971 {
961 972 netstack_t *ns;
962 973 zone_t *zone;
963 974
964 975 zone = zone_find_by_id(zoneid);
965 976
966 977 if (zone == NULL)
967 978 return (NULL);
968 979
969 980 ns = zone->zone_netstack;
970 981 ASSERT(ns != NULL);
971 982 if (ns->netstack_flags & (NSF_UNINIT|NSF_CLOSING))
972 983 ns = NULL;
973 984 else
974 985 netstack_hold(ns);
975 986
976 987 zone_rele(zone);
977 988 return (ns);
978 989 }
979 990
980 991 /*
981 992 * Find a stack instance given the zoneid. Can only be called from
982 993 * the create callback. See the comments in zone_find_by_id_nolock why
983 994 * that limitation exists.
984 995 *
985 996 * Increases the reference count if found; caller must do a
986 997 * netstack_rele().
987 998 *
988 999 * If there is no exact match then assume the shared stack instance
989 1000 * matches.
990 1001 *
991 1002 * Skip the unitialized ones.
992 1003 */
993 1004 netstack_t *
994 1005 netstack_find_by_zoneid_nolock(zoneid_t zoneid)
995 1006 {
996 1007 netstack_t *ns;
997 1008 zone_t *zone;
998 1009
999 1010 zone = zone_find_by_id_nolock(zoneid);
1000 1011
1001 1012 if (zone == NULL)
1002 1013 return (NULL);
1003 1014
1004 1015 ns = zone->zone_netstack;
1005 1016 ASSERT(ns != NULL);
1006 1017
1007 1018 if (ns->netstack_flags & (NSF_UNINIT|NSF_CLOSING))
1008 1019 ns = NULL;
1009 1020 else
1010 1021 netstack_hold(ns);
1011 1022
1012 1023 /* zone_find_by_id_nolock does not have a hold on the zone */
1013 1024 return (ns);
1014 1025 }
1015 1026
1016 1027 /*
1017 1028 * Find a stack instance given the stackid with exact match?
1018 1029 * Increases the reference count if found; caller must do a
1019 1030 * netstack_rele().
1020 1031 *
1021 1032 * Skip the unitialized ones.
1022 1033 */
1023 1034 netstack_t *
1024 1035 netstack_find_by_stackid(netstackid_t stackid)
1025 1036 {
1026 1037 netstack_t *ns;
1027 1038
1028 1039 mutex_enter(&netstack_g_lock);
1029 1040 for (ns = netstack_head; ns != NULL; ns = ns->netstack_next) {
1030 1041 mutex_enter(&ns->netstack_lock);
1031 1042 if (ns->netstack_stackid == stackid &&
1032 1043 !(ns->netstack_flags & (NSF_UNINIT|NSF_CLOSING))) {
1033 1044 mutex_exit(&ns->netstack_lock);
1034 1045 netstack_hold(ns);
1035 1046 mutex_exit(&netstack_g_lock);
1036 1047 return (ns);
1037 1048 }
1038 1049 mutex_exit(&ns->netstack_lock);
1039 1050 }
1040 1051 mutex_exit(&netstack_g_lock);
1041 1052 return (NULL);
1042 1053 }
1043 1054
1044 1055 boolean_t
1045 1056 netstack_inuse_by_stackid(netstackid_t stackid)
1046 1057 {
1047 1058 netstack_t *ns;
1048 1059 boolean_t rval = B_FALSE;
1049 1060
1050 1061 mutex_enter(&netstack_g_lock);
1051 1062
1052 1063 for (ns = netstack_head; ns != NULL; ns = ns->netstack_next) {
1053 1064 if (ns->netstack_stackid == stackid) {
|
↓ open down ↓ |
913 lines elided |
↑ open up ↑ |
1054 1065 rval = B_TRUE;
1055 1066 break;
1056 1067 }
1057 1068 }
1058 1069
1059 1070 mutex_exit(&netstack_g_lock);
1060 1071
1061 1072 return (rval);
1062 1073 }
1063 1074
1075 +
1076 +static void
1077 +netstack_reap(void *arg)
1078 +{
1079 + netstack_t **nsp, *ns = (netstack_t *)arg;
1080 + boolean_t found;
1081 + int i;
1082 +
1083 + /*
1084 + * Time to call the destroy functions and free up
1085 + * the structure
1086 + */
1087 + netstack_stack_inactive(ns);
1088 +
1089 + /* Make sure nothing increased the references */
1090 + ASSERT(ns->netstack_refcnt == 0);
1091 + ASSERT(ns->netstack_numzones == 0);
1092 +
1093 + /* Finally remove from list of netstacks */
1094 + mutex_enter(&netstack_g_lock);
1095 + found = B_FALSE;
1096 + for (nsp = &netstack_head; *nsp != NULL;
1097 + nsp = &(*nsp)->netstack_next) {
1098 + if (*nsp == ns) {
1099 + *nsp = ns->netstack_next;
1100 + ns->netstack_next = NULL;
1101 + found = B_TRUE;
1102 + break;
1103 + }
1104 + }
1105 + ASSERT(found);
1106 + mutex_exit(&netstack_g_lock);
1107 +
1108 + /* Make sure nothing increased the references */
1109 + ASSERT(ns->netstack_refcnt == 0);
1110 + ASSERT(ns->netstack_numzones == 0);
1111 +
1112 + ASSERT(ns->netstack_flags & NSF_CLOSING);
1113 +
1114 + for (i = 0; i < NS_MAX; i++) {
1115 + nm_state_t *nms = &ns->netstack_m_state[i];
1116 +
1117 + cv_destroy(&nms->nms_cv);
1118 + }
1119 + mutex_destroy(&ns->netstack_lock);
1120 + cv_destroy(&ns->netstack_cv);
1121 + kmem_free(ns, sizeof (*ns));
1122 + /* Allow another reap to be scheduled. */
1123 + sema_v(&netstack_reap_limiter);
1124 +}
1125 +
1064 1126 void
1065 1127 netstack_rele(netstack_t *ns)
1066 1128 {
1067 - netstack_t **nsp;
1068 - boolean_t found;
1069 1129 int refcnt, numzones;
1070 - int i;
1071 1130
1072 1131 mutex_enter(&ns->netstack_lock);
1073 1132 ASSERT(ns->netstack_refcnt > 0);
1074 1133 ns->netstack_refcnt--;
1075 1134 /*
1076 1135 * As we drop the lock additional netstack_rele()s can come in
1077 1136 * and decrement the refcnt to zero and free the netstack_t.
1078 1137 * Store pointers in local variables and if we were not the last
1079 1138 * then don't reference the netstack_t after that.
1080 1139 */
1081 1140 refcnt = ns->netstack_refcnt;
1082 1141 numzones = ns->netstack_numzones;
1083 1142 DTRACE_PROBE1(netstack__dec__ref, netstack_t *, ns);
1084 1143 mutex_exit(&ns->netstack_lock);
1085 1144
1086 1145 if (refcnt == 0 && numzones == 0) {
1087 1146 /*
1088 - * Time to call the destroy functions and free up
1089 - * the structure
1147 + * Because there are possibilities of re-entrancy in various
1148 + * netstack structures by callers, which might cause a lock up
1149 + * due to odd reference models, or other factors, we choose to
1150 + * schedule the actual deletion of this netstack as a deferred
1151 + * task on the system taskq. This way, any such reference
1152 + * models won't trip over themselves.
1153 + *
1154 + * Assume we aren't in a high-priority interrupt context, so
1155 + * we can use KM_SLEEP and semaphores.
1090 1156 */
1091 - netstack_stack_inactive(ns);
1157 + if (sema_tryp(&netstack_reap_limiter) == 0) {
1158 + /*
1159 + * Indicate we're slamming against a limit.
1160 + */
1161 + hrtime_t measurement = gethrtime();
1092 1162
1093 - /* Make sure nothing increased the references */
1094 - ASSERT(ns->netstack_refcnt == 0);
1095 - ASSERT(ns->netstack_numzones == 0);
1096 -
1097 - /* Finally remove from list of netstacks */
1098 - mutex_enter(&netstack_g_lock);
1099 - found = B_FALSE;
1100 - for (nsp = &netstack_head; *nsp != NULL;
1101 - nsp = &(*nsp)->netstack_next) {
1102 - if (*nsp == ns) {
1103 - *nsp = ns->netstack_next;
1104 - ns->netstack_next = NULL;
1105 - found = B_TRUE;
1106 - break;
1107 - }
1163 + sema_p(&netstack_reap_limiter);
1164 + /* Capture delay in ns. */
1165 + DTRACE_PROBE1(netstack__reap__rate__limited,
1166 + hrtime_t, gethrtime() - measurement);
1108 1167 }
1109 - ASSERT(found);
1110 - mutex_exit(&netstack_g_lock);
1111 1168
1112 - /* Make sure nothing increased the references */
1113 - ASSERT(ns->netstack_refcnt == 0);
1114 - ASSERT(ns->netstack_numzones == 0);
1115 -
1116 - ASSERT(ns->netstack_flags & NSF_CLOSING);
1117 -
1118 - for (i = 0; i < NS_MAX; i++) {
1119 - nm_state_t *nms = &ns->netstack_m_state[i];
1120 -
1121 - cv_destroy(&nms->nms_cv);
1122 - }
1123 - mutex_destroy(&ns->netstack_lock);
1124 - cv_destroy(&ns->netstack_cv);
1125 - kmem_free(ns, sizeof (*ns));
1169 + /* TQ_SLEEP should prevent taskq_dispatch() from failing. */
1170 + (void) taskq_dispatch(system_taskq, netstack_reap, ns,
1171 + TQ_SLEEP);
1126 1172 }
1127 1173 }
1128 1174
1129 1175 void
1130 1176 netstack_hold(netstack_t *ns)
1131 1177 {
1132 1178 mutex_enter(&ns->netstack_lock);
1133 1179 ns->netstack_refcnt++;
1134 1180 ASSERT(ns->netstack_refcnt > 0);
1135 1181 mutex_exit(&ns->netstack_lock);
1136 1182 DTRACE_PROBE1(netstack__inc__ref, netstack_t *, ns);
1137 1183 }
1138 1184
1139 1185 /*
1140 1186 * To support kstat_create_netstack() using kstat_zone_add we need
1141 1187 * to track both
1142 1188 * - all zoneids that use the global/shared stack
1143 1189 * - all kstats that have been added for the shared stack
1144 1190 */
1145 1191 kstat_t *
1146 1192 kstat_create_netstack(char *ks_module, int ks_instance, char *ks_name,
1147 1193 char *ks_class, uchar_t ks_type, uint_t ks_ndata, uchar_t ks_flags,
1148 1194 netstackid_t ks_netstackid)
1149 1195 {
1150 1196 kstat_t *ks;
1151 1197
1152 1198 if (ks_netstackid == GLOBAL_NETSTACKID) {
1153 1199 ks = kstat_create_zone(ks_module, ks_instance, ks_name,
1154 1200 ks_class, ks_type, ks_ndata, ks_flags, GLOBAL_ZONEID);
1155 1201 if (ks != NULL)
1156 1202 netstack_shared_kstat_add(ks);
1157 1203 return (ks);
1158 1204 } else {
1159 1205 zoneid_t zoneid = ks_netstackid;
1160 1206
1161 1207 return (kstat_create_zone(ks_module, ks_instance, ks_name,
1162 1208 ks_class, ks_type, ks_ndata, ks_flags, zoneid));
1163 1209 }
1164 1210 }
1165 1211
1166 1212 void
1167 1213 kstat_delete_netstack(kstat_t *ks, netstackid_t ks_netstackid)
1168 1214 {
1169 1215 if (ks_netstackid == GLOBAL_NETSTACKID) {
1170 1216 netstack_shared_kstat_remove(ks);
1171 1217 }
1172 1218 kstat_delete(ks);
1173 1219 }
1174 1220
1175 1221 static void
1176 1222 netstack_shared_zone_add(zoneid_t zoneid)
1177 1223 {
1178 1224 struct shared_zone_list *sz;
1179 1225 struct shared_kstat_list *sk;
1180 1226
1181 1227 sz = (struct shared_zone_list *)kmem_zalloc(sizeof (*sz), KM_SLEEP);
1182 1228 sz->sz_zoneid = zoneid;
1183 1229
1184 1230 /* Insert in list */
1185 1231 mutex_enter(&netstack_shared_lock);
1186 1232 sz->sz_next = netstack_shared_zones;
1187 1233 netstack_shared_zones = sz;
1188 1234
1189 1235 /*
1190 1236 * Perform kstat_zone_add for each existing shared stack kstat.
1191 1237 * Note: Holds netstack_shared_lock lock across kstat_zone_add.
1192 1238 */
1193 1239 for (sk = netstack_shared_kstats; sk != NULL; sk = sk->sk_next) {
1194 1240 kstat_zone_add(sk->sk_kstat, zoneid);
1195 1241 }
1196 1242 mutex_exit(&netstack_shared_lock);
1197 1243 }
1198 1244
1199 1245 static void
1200 1246 netstack_shared_zone_remove(zoneid_t zoneid)
1201 1247 {
1202 1248 struct shared_zone_list **szp, *sz;
1203 1249 struct shared_kstat_list *sk;
1204 1250
1205 1251 /* Find in list */
1206 1252 mutex_enter(&netstack_shared_lock);
1207 1253 sz = NULL;
1208 1254 for (szp = &netstack_shared_zones; *szp != NULL;
1209 1255 szp = &((*szp)->sz_next)) {
1210 1256 if ((*szp)->sz_zoneid == zoneid) {
1211 1257 sz = *szp;
1212 1258 break;
1213 1259 }
1214 1260 }
1215 1261 /* We must find it */
1216 1262 ASSERT(sz != NULL);
1217 1263 *szp = sz->sz_next;
1218 1264 sz->sz_next = NULL;
1219 1265
1220 1266 /*
1221 1267 * Perform kstat_zone_remove for each existing shared stack kstat.
1222 1268 * Note: Holds netstack_shared_lock lock across kstat_zone_remove.
1223 1269 */
1224 1270 for (sk = netstack_shared_kstats; sk != NULL; sk = sk->sk_next) {
1225 1271 kstat_zone_remove(sk->sk_kstat, zoneid);
1226 1272 }
1227 1273 mutex_exit(&netstack_shared_lock);
1228 1274
1229 1275 kmem_free(sz, sizeof (*sz));
1230 1276 }
1231 1277
1232 1278 static void
1233 1279 netstack_shared_kstat_add(kstat_t *ks)
1234 1280 {
1235 1281 struct shared_zone_list *sz;
1236 1282 struct shared_kstat_list *sk;
1237 1283
1238 1284 sk = (struct shared_kstat_list *)kmem_zalloc(sizeof (*sk), KM_SLEEP);
1239 1285 sk->sk_kstat = ks;
1240 1286
1241 1287 /* Insert in list */
1242 1288 mutex_enter(&netstack_shared_lock);
1243 1289 sk->sk_next = netstack_shared_kstats;
1244 1290 netstack_shared_kstats = sk;
1245 1291
1246 1292 /*
1247 1293 * Perform kstat_zone_add for each existing shared stack zone.
1248 1294 * Note: Holds netstack_shared_lock lock across kstat_zone_add.
1249 1295 */
1250 1296 for (sz = netstack_shared_zones; sz != NULL; sz = sz->sz_next) {
1251 1297 kstat_zone_add(ks, sz->sz_zoneid);
1252 1298 }
1253 1299 mutex_exit(&netstack_shared_lock);
1254 1300 }
1255 1301
1256 1302 static void
1257 1303 netstack_shared_kstat_remove(kstat_t *ks)
1258 1304 {
1259 1305 struct shared_zone_list *sz;
1260 1306 struct shared_kstat_list **skp, *sk;
1261 1307
1262 1308 /* Find in list */
1263 1309 mutex_enter(&netstack_shared_lock);
1264 1310 sk = NULL;
1265 1311 for (skp = &netstack_shared_kstats; *skp != NULL;
1266 1312 skp = &((*skp)->sk_next)) {
1267 1313 if ((*skp)->sk_kstat == ks) {
1268 1314 sk = *skp;
1269 1315 break;
1270 1316 }
1271 1317 }
1272 1318 /* Must find it */
1273 1319 ASSERT(sk != NULL);
1274 1320 *skp = sk->sk_next;
1275 1321 sk->sk_next = NULL;
1276 1322
1277 1323 /*
1278 1324 * Perform kstat_zone_remove for each existing shared stack kstat.
1279 1325 * Note: Holds netstack_shared_lock lock across kstat_zone_remove.
1280 1326 */
1281 1327 for (sz = netstack_shared_zones; sz != NULL; sz = sz->sz_next) {
1282 1328 kstat_zone_remove(ks, sz->sz_zoneid);
1283 1329 }
1284 1330 mutex_exit(&netstack_shared_lock);
1285 1331 kmem_free(sk, sizeof (*sk));
1286 1332 }
1287 1333
1288 1334 /*
1289 1335 * If a zoneid is part of the shared zone, return true
1290 1336 */
1291 1337 static boolean_t
1292 1338 netstack_find_shared_zoneid(zoneid_t zoneid)
1293 1339 {
1294 1340 struct shared_zone_list *sz;
1295 1341
1296 1342 mutex_enter(&netstack_shared_lock);
1297 1343 for (sz = netstack_shared_zones; sz != NULL; sz = sz->sz_next) {
1298 1344 if (sz->sz_zoneid == zoneid) {
1299 1345 mutex_exit(&netstack_shared_lock);
1300 1346 return (B_TRUE);
1301 1347 }
1302 1348 }
1303 1349 mutex_exit(&netstack_shared_lock);
1304 1350 return (B_FALSE);
1305 1351 }
1306 1352
1307 1353 /*
1308 1354 * Hide the fact that zoneids and netstackids are allocated from
1309 1355 * the same space in the current implementation.
1310 1356 * We currently do not check that the stackid/zoneids are valid, since there
1311 1357 * is no need for that. But this should only be done for ids that are
1312 1358 * valid.
1313 1359 */
1314 1360 zoneid_t
1315 1361 netstackid_to_zoneid(netstackid_t stackid)
1316 1362 {
1317 1363 return (stackid);
1318 1364 }
1319 1365
1320 1366 netstackid_t
1321 1367 zoneid_to_netstackid(zoneid_t zoneid)
1322 1368 {
1323 1369 if (netstack_find_shared_zoneid(zoneid))
1324 1370 return (GLOBAL_ZONEID);
1325 1371 else
1326 1372 return (zoneid);
1327 1373 }
1328 1374
1329 1375 zoneid_t
1330 1376 netstack_get_zoneid(netstack_t *ns)
1331 1377 {
1332 1378 return (netstackid_to_zoneid(ns->netstack_stackid));
1333 1379 }
1334 1380
1335 1381 /*
1336 1382 * Simplistic support for walking all the handles.
1337 1383 * Example usage:
1338 1384 * netstack_handle_t nh;
1339 1385 * netstack_t *ns;
1340 1386 *
1341 1387 * netstack_next_init(&nh);
1342 1388 * while ((ns = netstack_next(&nh)) != NULL) {
1343 1389 * do something;
1344 1390 * netstack_rele(ns);
1345 1391 * }
1346 1392 * netstack_next_fini(&nh);
1347 1393 */
1348 1394 void
1349 1395 netstack_next_init(netstack_handle_t *handle)
1350 1396 {
1351 1397 *handle = 0;
1352 1398 }
1353 1399
1354 1400 /* ARGSUSED */
1355 1401 void
1356 1402 netstack_next_fini(netstack_handle_t *handle)
1357 1403 {
1358 1404 }
1359 1405
1360 1406 netstack_t *
1361 1407 netstack_next(netstack_handle_t *handle)
1362 1408 {
1363 1409 netstack_t *ns;
1364 1410 int i, end;
1365 1411
1366 1412 end = *handle;
1367 1413 /* Walk skipping *handle number of instances */
1368 1414
1369 1415 /* Look if there is a matching stack instance */
1370 1416 mutex_enter(&netstack_g_lock);
1371 1417 ns = netstack_head;
1372 1418 for (i = 0; i < end; i++) {
1373 1419 if (ns == NULL)
1374 1420 break;
1375 1421 ns = ns->netstack_next;
1376 1422 }
1377 1423 /* skip those with that aren't really here */
1378 1424 while (ns != NULL) {
1379 1425 mutex_enter(&ns->netstack_lock);
1380 1426 if ((ns->netstack_flags & (NSF_UNINIT|NSF_CLOSING)) == 0) {
1381 1427 mutex_exit(&ns->netstack_lock);
1382 1428 break;
1383 1429 }
1384 1430 mutex_exit(&ns->netstack_lock);
1385 1431 end++;
1386 1432 ns = ns->netstack_next;
1387 1433 }
1388 1434 if (ns != NULL) {
1389 1435 *handle = end + 1;
1390 1436 netstack_hold(ns);
1391 1437 }
1392 1438 mutex_exit(&netstack_g_lock);
1393 1439 return (ns);
1394 1440 }
|
↓ open down ↓ |
259 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX