1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
25 * Copyright (c) 2016, Joyent, Inc. All rights reserved.
26 */
27
28 #include <sys/param.h>
29 #include <sys/sysmacros.h>
30 #include <sys/vm.h>
31 #include <sys/proc.h>
32 #include <sys/tuneable.h>
33 #include <sys/systm.h>
34 #include <sys/cmn_err.h>
35 #include <sys/debug.h>
36 #include <sys/sdt.h>
37 #include <sys/mutex.h>
38 #include <sys/bitmap.h>
39 #include <sys/atomic.h>
40 #include <sys/sunddi.h>
41 #include <sys/kobj.h>
42 #include <sys/disp.h>
43 #include <vm/seg_kmem.h>
44 #include <sys/zone.h>
45 #include <sys/netstack.h>
46
47 /*
48 * What we use so that the zones framework can tell us about new zones,
49 * which we use to create new stacks.
50 */
51 static zone_key_t netstack_zone_key;
52
53 static int netstack_initialized = 0;
54
55 /*
56 * Track the registered netstacks.
57 * The global lock protects
58 * - ns_reg
59 * - the list starting at netstack_head and following the netstack_next
60 * pointers.
61 */
62 static kmutex_t netstack_g_lock;
63
64 /*
65 * Registry of netstacks with their create/shutdown/destory functions.
66 */
67 static struct netstack_registry ns_reg[NS_MAX];
68
69 /*
70 * Global list of existing stacks. We use this when a new zone with
71 * an exclusive IP instance is created.
72 *
73 * Note that in some cases a netstack_t needs to stay around after the zone
74 * has gone away. This is because there might be outstanding references
75 * (from TCP TIME_WAIT connections, IPsec state, etc). The netstack_t data
76 * structure and all the foo_stack_t's hanging off of it will be cleaned up
77 * when the last reference to it is dropped.
78 * However, the same zone might be rebooted. That is handled using the
79 * assumption that the zones framework picks a new zoneid each time a zone
80 * is (re)booted. We assert for that condition in netstack_zone_create().
81 * Thus the old netstack_t can take its time for things to time out.
82 */
83 static netstack_t *netstack_head;
84
85 /*
86 * To support kstat_create_netstack() using kstat_zone_add we need
87 * to track both
88 * - all zoneids that use the global/shared stack
89 * - all kstats that have been added for the shared stack
90 */
91 struct shared_zone_list {
92 struct shared_zone_list *sz_next;
93 zoneid_t sz_zoneid;
94 };
95
96 struct shared_kstat_list {
97 struct shared_kstat_list *sk_next;
98 kstat_t *sk_kstat;
99 };
100
101 static kmutex_t netstack_shared_lock; /* protects the following two */
102 static struct shared_zone_list *netstack_shared_zones;
103 static struct shared_kstat_list *netstack_shared_kstats;
104
105 static void *netstack_zone_create(zoneid_t zoneid);
106 static void netstack_zone_shutdown(zoneid_t zoneid, void *arg);
107 static void netstack_zone_destroy(zoneid_t zoneid, void *arg);
108
109 static void netstack_shared_zone_add(zoneid_t zoneid);
110 static void netstack_shared_zone_remove(zoneid_t zoneid);
111 static void netstack_shared_kstat_add(kstat_t *ks);
112 static void netstack_shared_kstat_remove(kstat_t *ks);
113
114 typedef boolean_t applyfn_t(kmutex_t *, netstack_t *, int);
115
116 static void apply_all_netstacks(int, applyfn_t *);
117 static void apply_all_modules(netstack_t *, applyfn_t *);
118 static void apply_all_modules_reverse(netstack_t *, applyfn_t *);
119 static boolean_t netstack_apply_create(kmutex_t *, netstack_t *, int);
120 static boolean_t netstack_apply_shutdown(kmutex_t *, netstack_t *, int);
121 static boolean_t netstack_apply_destroy(kmutex_t *, netstack_t *, int);
122 static boolean_t wait_for_zone_creator(netstack_t *, kmutex_t *);
123 static boolean_t wait_for_nms_inprogress(netstack_t *, nm_state_t *,
124 kmutex_t *);
125
126 static void netstack_reap_work(netstack_t *, boolean_t);
127 ksema_t netstack_reap_limiter;
128
129 void
130 netstack_init(void)
131 {
132 mutex_init(&netstack_g_lock, NULL, MUTEX_DEFAULT, NULL);
133 mutex_init(&netstack_shared_lock, NULL, MUTEX_DEFAULT, NULL);
134
135 /* XXX KEBE SAYS hard-coded constant needs to be fixed. */
136 sema_init(&netstack_reap_limiter, 1024, NULL, SEMA_DRIVER, NULL);
137
138 netstack_initialized = 1;
139
140 /*
141 * We want to be informed each time a zone is created or
142 * destroyed in the kernel, so we can maintain the
143 * stack instance information.
144 */
145 zone_key_create(&netstack_zone_key, netstack_zone_create,
146 netstack_zone_shutdown, netstack_zone_destroy);
147 }
148
149 /*
150 * Register a new module with the framework.
151 * This registers interest in changes to the set of netstacks.
152 * The createfn and destroyfn are required, but the shutdownfn can be
153 * NULL.
154 * Note that due to the current zsd implementation, when the create
155 * function is called the zone isn't fully present, thus functions
156 * like zone_find_by_* will fail, hence the create function can not
157 * use many zones kernel functions including zcmn_err().
158 */
159 void
160 netstack_register(int moduleid,
161 void *(*module_create)(netstackid_t, netstack_t *),
162 void (*module_shutdown)(netstackid_t, void *),
163 void (*module_destroy)(netstackid_t, void *))
164 {
165 netstack_t *ns;
166
167 ASSERT(netstack_initialized);
168 ASSERT(moduleid >= 0 && moduleid < NS_MAX);
169 ASSERT(module_create != NULL);
170
171 /*
172 * Make instances created after this point in time run the create
173 * callback.
174 */
175 mutex_enter(&netstack_g_lock);
176 ASSERT(ns_reg[moduleid].nr_create == NULL);
177 ASSERT(ns_reg[moduleid].nr_flags == 0);
178 ns_reg[moduleid].nr_create = module_create;
179 ns_reg[moduleid].nr_shutdown = module_shutdown;
180 ns_reg[moduleid].nr_destroy = module_destroy;
181 ns_reg[moduleid].nr_flags = NRF_REGISTERED;
182
183 /*
184 * Determine the set of stacks that exist before we drop the lock.
185 * Set NSS_CREATE_NEEDED for each of those.
186 * netstacks which have been deleted will have NSS_CREATE_COMPLETED
187 * set, but check NSF_CLOSING to be sure.
188 */
189 for (ns = netstack_head; ns != NULL; ns = ns->netstack_next) {
190 nm_state_t *nms = &ns->netstack_m_state[moduleid];
191
192 mutex_enter(&ns->netstack_lock);
193 if (!(ns->netstack_flags & NSF_CLOSING) &&
194 (nms->nms_flags & NSS_CREATE_ALL) == 0) {
195 nms->nms_flags |= NSS_CREATE_NEEDED;
196 DTRACE_PROBE2(netstack__create__needed,
197 netstack_t *, ns, int, moduleid);
198 }
199 mutex_exit(&ns->netstack_lock);
200 }
201 mutex_exit(&netstack_g_lock);
202
203 /*
204 * At this point in time a new instance can be created or an instance
205 * can be destroyed, or some other module can register or unregister.
206 * Make sure we either run all the create functions for this moduleid
207 * or we wait for any other creators for this moduleid.
208 */
209 apply_all_netstacks(moduleid, netstack_apply_create);
210 }
211
212 void
213 netstack_unregister(int moduleid)
214 {
215 netstack_t *ns;
216
217 ASSERT(moduleid >= 0 && moduleid < NS_MAX);
218
219 ASSERT(ns_reg[moduleid].nr_create != NULL);
220 ASSERT(ns_reg[moduleid].nr_flags & NRF_REGISTERED);
221
222 mutex_enter(&netstack_g_lock);
223 /*
224 * Determine the set of stacks that exist before we drop the lock.
225 * Set NSS_SHUTDOWN_NEEDED and NSS_DESTROY_NEEDED for each of those.
226 * That ensures that when we return all the callbacks for existing
227 * instances have completed. And since we set NRF_DYING no new
228 * instances can use this module.
229 */
230 for (ns = netstack_head; ns != NULL; ns = ns->netstack_next) {
231 boolean_t created = B_FALSE;
232 nm_state_t *nms = &ns->netstack_m_state[moduleid];
233
234 mutex_enter(&ns->netstack_lock);
235
236 /*
237 * We need to be careful here. We could actually have a netstack
238 * being created as we speak waiting for us to let go of this
239 * lock to proceed. It may have set NSS_CREATE_NEEDED, but not
240 * have gotten to the point of completing it yet. If
241 * NSS_CREATE_NEEDED, we can safely just remove it here and
242 * never create the module. However, if NSS_CREATE_INPROGRESS is
243 * set, we need to still flag this module for shutdown and
244 * deletion, just as though it had reached NSS_CREATE_COMPLETED.
245 *
246 * It is safe to do that because of two different guarantees
247 * that exist in the system. The first is that before we do a
248 * create, shutdown, or destroy, we ensure that nothing else is
249 * in progress in the system for this netstack and wait for it
250 * to complete. Secondly, because the zone is being created, we
251 * know that the following call to apply_all_netstack will block
252 * on the zone finishing its initialization.
253 */
254 if (nms->nms_flags & NSS_CREATE_NEEDED)
255 nms->nms_flags &= ~NSS_CREATE_NEEDED;
256
257 if (nms->nms_flags & NSS_CREATE_INPROGRESS ||
258 nms->nms_flags & NSS_CREATE_COMPLETED)
259 created = B_TRUE;
260
261 if (ns_reg[moduleid].nr_shutdown != NULL && created &&
262 (nms->nms_flags & NSS_CREATE_COMPLETED) &&
263 (nms->nms_flags & NSS_SHUTDOWN_ALL) == 0) {
264 nms->nms_flags |= NSS_SHUTDOWN_NEEDED;
265 DTRACE_PROBE2(netstack__shutdown__needed,
266 netstack_t *, ns, int, moduleid);
267 }
268 if ((ns_reg[moduleid].nr_flags & NRF_REGISTERED) &&
269 ns_reg[moduleid].nr_destroy != NULL && created &&
270 (nms->nms_flags & NSS_DESTROY_ALL) == 0) {
271 nms->nms_flags |= NSS_DESTROY_NEEDED;
272 DTRACE_PROBE2(netstack__destroy__needed,
273 netstack_t *, ns, int, moduleid);
274 }
275 mutex_exit(&ns->netstack_lock);
276 }
277 /*
278 * Prevent any new netstack from calling the registered create
279 * function, while keeping the function pointers in place until the
280 * shutdown and destroy callbacks are complete.
281 */
282 ns_reg[moduleid].nr_flags |= NRF_DYING;
283 mutex_exit(&netstack_g_lock);
284
285 apply_all_netstacks(moduleid, netstack_apply_shutdown);
286 apply_all_netstacks(moduleid, netstack_apply_destroy);
287
288 /*
289 * Clear the nms_flags so that we can handle this module
290 * being loaded again.
291 * Also remove the registered functions.
292 */
293 mutex_enter(&netstack_g_lock);
294 ASSERT(ns_reg[moduleid].nr_flags & NRF_REGISTERED);
295 ASSERT(ns_reg[moduleid].nr_flags & NRF_DYING);
296 for (ns = netstack_head; ns != NULL; ns = ns->netstack_next) {
297 nm_state_t *nms = &ns->netstack_m_state[moduleid];
298
299 mutex_enter(&ns->netstack_lock);
300 if (nms->nms_flags & NSS_DESTROY_COMPLETED) {
301 nms->nms_flags = 0;
302 DTRACE_PROBE2(netstack__destroy__done,
303 netstack_t *, ns, int, moduleid);
304 }
305 mutex_exit(&ns->netstack_lock);
306 }
307
308 ns_reg[moduleid].nr_create = NULL;
309 ns_reg[moduleid].nr_shutdown = NULL;
310 ns_reg[moduleid].nr_destroy = NULL;
311 ns_reg[moduleid].nr_flags = 0;
312 mutex_exit(&netstack_g_lock);
313 }
314
315 /*
316 * Lookup and/or allocate a netstack for this zone.
317 */
318 static void *
319 netstack_zone_create(zoneid_t zoneid)
320 {
321 netstackid_t stackid;
322 netstack_t *ns;
323 netstack_t **nsp;
324 zone_t *zone;
325 int i;
326
327 ASSERT(netstack_initialized);
328
329 zone = zone_find_by_id_nolock(zoneid);
330 ASSERT(zone != NULL);
331
332 if (zone->zone_flags & ZF_NET_EXCL) {
333 stackid = zoneid;
334 } else {
335 /* Look for the stack instance for the global */
336 stackid = GLOBAL_NETSTACKID;
337 }
338
339 /* Allocate even if it isn't needed; simplifies locking */
340 ns = (netstack_t *)kmem_zalloc(sizeof (netstack_t), KM_SLEEP);
341
342 /* Look if there is a matching stack instance */
343 mutex_enter(&netstack_g_lock);
344 for (nsp = &netstack_head; *nsp != NULL;
345 nsp = &((*nsp)->netstack_next)) {
346 if ((*nsp)->netstack_stackid == stackid) {
347 /*
348 * Should never find a pre-existing exclusive stack
349 */
350 VERIFY(stackid == GLOBAL_NETSTACKID);
351 kmem_free(ns, sizeof (netstack_t));
352 ns = *nsp;
353 mutex_enter(&ns->netstack_lock);
354 ns->netstack_numzones++;
355 mutex_exit(&ns->netstack_lock);
356 mutex_exit(&netstack_g_lock);
357 DTRACE_PROBE1(netstack__inc__numzones,
358 netstack_t *, ns);
359 /* Record that we have a new shared stack zone */
360 netstack_shared_zone_add(zoneid);
361 zone->zone_netstack = ns;
362 return (ns);
363 }
364 }
365 /* Not found */
366 mutex_init(&ns->netstack_lock, NULL, MUTEX_DEFAULT, NULL);
367 cv_init(&ns->netstack_cv, NULL, CV_DEFAULT, NULL);
368 ns->netstack_stackid = zoneid;
369 ns->netstack_numzones = 1;
370 ns->netstack_refcnt = 1; /* Decremented by netstack_zone_destroy */
371 ns->netstack_flags = NSF_UNINIT;
372 *nsp = ns;
373 zone->zone_netstack = ns;
374
375 mutex_enter(&ns->netstack_lock);
376 /*
377 * Mark this netstack as having a CREATE running so
378 * any netstack_register/netstack_unregister waits for
379 * the existing create callbacks to complete in moduleid order
380 */
381 ns->netstack_flags |= NSF_ZONE_CREATE;
382
383 /*
384 * Determine the set of module create functions that need to be
385 * called before we drop the lock.
386 * Set NSS_CREATE_NEEDED for each of those.
387 * Skip any with NRF_DYING set, since those are in the process of
388 * going away, by checking for flags being exactly NRF_REGISTERED.
389 */
390 for (i = 0; i < NS_MAX; i++) {
391 nm_state_t *nms = &ns->netstack_m_state[i];
392
393 cv_init(&nms->nms_cv, NULL, CV_DEFAULT, NULL);
394
395 if ((ns_reg[i].nr_flags == NRF_REGISTERED) &&
396 (nms->nms_flags & NSS_CREATE_ALL) == 0) {
397 nms->nms_flags |= NSS_CREATE_NEEDED;
398 DTRACE_PROBE2(netstack__create__needed,
399 netstack_t *, ns, int, i);
400 }
401 }
402 mutex_exit(&ns->netstack_lock);
403 mutex_exit(&netstack_g_lock);
404
405 apply_all_modules(ns, netstack_apply_create);
406
407 /* Tell any waiting netstack_register/netstack_unregister to proceed */
408 mutex_enter(&ns->netstack_lock);
409 ns->netstack_flags &= ~NSF_UNINIT;
410 ASSERT(ns->netstack_flags & NSF_ZONE_CREATE);
411 ns->netstack_flags &= ~NSF_ZONE_CREATE;
412 cv_broadcast(&ns->netstack_cv);
413 mutex_exit(&ns->netstack_lock);
414
415 return (ns);
416 }
417
418 /* ARGSUSED */
419 static void
420 netstack_zone_shutdown(zoneid_t zoneid, void *arg)
421 {
422 netstack_t *ns = (netstack_t *)arg;
423 int i;
424
425 ASSERT(arg != NULL);
426
427 mutex_enter(&ns->netstack_lock);
428 ASSERT(ns->netstack_numzones > 0);
429 if (ns->netstack_numzones != 1) {
430 /* Stack instance being used by other zone */
431 mutex_exit(&ns->netstack_lock);
432 ASSERT(ns->netstack_stackid == GLOBAL_NETSTACKID);
433 return;
434 }
435 mutex_exit(&ns->netstack_lock);
436
437 mutex_enter(&netstack_g_lock);
438 mutex_enter(&ns->netstack_lock);
439 /*
440 * Mark this netstack as having a SHUTDOWN running so
441 * any netstack_register/netstack_unregister waits for
442 * the existing create callbacks to complete in moduleid order
443 */
444 ASSERT(!(ns->netstack_flags & NSF_ZONE_INPROGRESS));
445 ns->netstack_flags |= NSF_ZONE_SHUTDOWN;
446
447 /*
448 * Determine the set of stacks that exist before we drop the lock.
449 * Set NSS_SHUTDOWN_NEEDED for each of those.
450 */
451 for (i = 0; i < NS_MAX; i++) {
452 nm_state_t *nms = &ns->netstack_m_state[i];
453
454 if ((ns_reg[i].nr_flags & NRF_REGISTERED) &&
455 ns_reg[i].nr_shutdown != NULL &&
456 (nms->nms_flags & NSS_CREATE_COMPLETED) &&
457 (nms->nms_flags & NSS_SHUTDOWN_ALL) == 0) {
458 nms->nms_flags |= NSS_SHUTDOWN_NEEDED;
459 DTRACE_PROBE2(netstack__shutdown__needed,
460 netstack_t *, ns, int, i);
461 }
462 }
463 mutex_exit(&ns->netstack_lock);
464 mutex_exit(&netstack_g_lock);
465
466 /*
467 * Call the shutdown function for all registered modules for this
468 * netstack.
469 */
470 apply_all_modules_reverse(ns, netstack_apply_shutdown);
471
472 /* Tell any waiting netstack_register/netstack_unregister to proceed */
473 mutex_enter(&ns->netstack_lock);
474 ASSERT(ns->netstack_flags & NSF_ZONE_SHUTDOWN);
475 ns->netstack_flags &= ~NSF_ZONE_SHUTDOWN;
476 cv_broadcast(&ns->netstack_cv);
477 mutex_exit(&ns->netstack_lock);
478 }
479
480 /*
481 * Common routine to release a zone.
482 * If this was the last zone using the stack instance then prepare to
483 * have the refcnt dropping to zero free the zone.
484 */
485 /* ARGSUSED */
486 static void
487 netstack_zone_destroy(zoneid_t zoneid, void *arg)
488 {
489 netstack_t *ns = (netstack_t *)arg;
490
491 ASSERT(arg != NULL);
492
493 mutex_enter(&ns->netstack_lock);
494 ASSERT(ns->netstack_numzones > 0);
495 ns->netstack_numzones--;
496 if (ns->netstack_numzones != 0) {
497 /* Stack instance being used by other zone */
498 mutex_exit(&ns->netstack_lock);
499 ASSERT(ns->netstack_stackid == GLOBAL_NETSTACKID);
500 /* Record that we a shared stack zone has gone away */
501 netstack_shared_zone_remove(zoneid);
502 return;
503 }
504 /*
505 * Set CLOSING so that netstack_find_by will not find it.
506 */
507 ns->netstack_flags |= NSF_CLOSING;
508 mutex_exit(&ns->netstack_lock);
509 DTRACE_PROBE1(netstack__dec__numzones, netstack_t *, ns);
510 /* No other thread can call zone_destroy for this stack */
511
512 /*
513 * Decrease refcnt to account for the one in netstack_zone_init()
514 */
515 netstack_rele(ns);
516 }
517
518 /*
519 * Called when the reference count drops to zero.
520 * Call the destroy functions for each registered module.
521 */
522 static void
523 netstack_stack_inactive(netstack_t *ns)
524 {
525 int i;
526
527 mutex_enter(&netstack_g_lock);
528 mutex_enter(&ns->netstack_lock);
529 /*
530 * Mark this netstack as having a DESTROY running so
531 * any netstack_register/netstack_unregister waits for
532 * the existing destroy callbacks to complete in reverse moduleid order
533 */
534 ASSERT(!(ns->netstack_flags & NSF_ZONE_INPROGRESS));
535 ns->netstack_flags |= NSF_ZONE_DESTROY;
536 /*
537 * If the shutdown callback wasn't called earlier (e.g., if this is
538 * a netstack shared between multiple zones), then we schedule it now.
539 *
540 * Determine the set of stacks that exist before we drop the lock.
541 * Set NSS_DESTROY_NEEDED for each of those. That
542 * ensures that when we return all the callbacks for existing
543 * instances have completed.
544 */
545 for (i = 0; i < NS_MAX; i++) {
546 nm_state_t *nms = &ns->netstack_m_state[i];
547
548 if ((ns_reg[i].nr_flags & NRF_REGISTERED) &&
549 ns_reg[i].nr_shutdown != NULL &&
550 (nms->nms_flags & NSS_CREATE_COMPLETED) &&
551 (nms->nms_flags & NSS_SHUTDOWN_ALL) == 0) {
552 nms->nms_flags |= NSS_SHUTDOWN_NEEDED;
553 DTRACE_PROBE2(netstack__shutdown__needed,
554 netstack_t *, ns, int, i);
555 }
556
557 if ((ns_reg[i].nr_flags & NRF_REGISTERED) &&
558 ns_reg[i].nr_destroy != NULL &&
559 (nms->nms_flags & NSS_CREATE_COMPLETED) &&
560 (nms->nms_flags & NSS_DESTROY_ALL) == 0) {
561 nms->nms_flags |= NSS_DESTROY_NEEDED;
562 DTRACE_PROBE2(netstack__destroy__needed,
563 netstack_t *, ns, int, i);
564 }
565 }
566 mutex_exit(&ns->netstack_lock);
567 mutex_exit(&netstack_g_lock);
568
569 /*
570 * Call the shutdown and destroy functions for all registered modules
571 * for this netstack.
572 *
573 * Since there are some ordering dependencies between the modules we
574 * tear them down in the reverse order of what was used to create them.
575 *
576 * Since a netstack_t is never reused (when a zone is rebooted it gets
577 * a new zoneid == netstackid i.e. a new netstack_t is allocated) we
578 * leave nms_flags the way it is i.e. with NSS_DESTROY_COMPLETED set.
579 * That is different than in the netstack_unregister() case.
580 */
581 apply_all_modules_reverse(ns, netstack_apply_shutdown);
582 apply_all_modules_reverse(ns, netstack_apply_destroy);
583
584 /* Tell any waiting netstack_register/netstack_unregister to proceed */
585 mutex_enter(&ns->netstack_lock);
586 ASSERT(ns->netstack_flags & NSF_ZONE_DESTROY);
587 ns->netstack_flags &= ~NSF_ZONE_DESTROY;
588 cv_broadcast(&ns->netstack_cv);
589 mutex_exit(&ns->netstack_lock);
590 }
591
592 /*
593 * Apply a function to all netstacks for a particular moduleid.
594 *
595 * If there is any zone activity (due to a zone being created, shutdown,
596 * or destroyed) we wait for that to complete before we proceed. This ensures
597 * that the moduleids are processed in order when a zone is created or
598 * destroyed.
599 *
600 * The applyfn has to drop netstack_g_lock if it does some work.
601 * In that case we don't follow netstack_next,
602 * even if it is possible to do so without any hazards. This is
603 * because we want the design to allow for the list of netstacks threaded
604 * by netstack_next to change in any arbitrary way during the time the
605 * lock was dropped.
606 *
607 * It is safe to restart the loop at netstack_head since the applyfn
608 * changes netstack_m_state as it processes things, so a subsequent
609 * pass through will have no effect in applyfn, hence the loop will terminate
610 * in at worst O(N^2).
611 */
612 static void
613 apply_all_netstacks(int moduleid, applyfn_t *applyfn)
614 {
615 netstack_t *ns;
616
617 mutex_enter(&netstack_g_lock);
618 ns = netstack_head;
619 while (ns != NULL) {
620 if (wait_for_zone_creator(ns, &netstack_g_lock)) {
621 /* Lock dropped - restart at head */
622 ns = netstack_head;
623 } else if ((applyfn)(&netstack_g_lock, ns, moduleid)) {
624 /* Lock dropped - restart at head */
625 ns = netstack_head;
626 } else {
627 ns = ns->netstack_next;
628 }
629 }
630 mutex_exit(&netstack_g_lock);
631 }
632
633 /*
634 * Apply a function to all moduleids for a particular netstack.
635 *
636 * Since the netstack linkage doesn't matter in this case we can
637 * ignore whether the function drops the lock.
638 */
639 static void
640 apply_all_modules(netstack_t *ns, applyfn_t *applyfn)
641 {
642 int i;
643
644 mutex_enter(&netstack_g_lock);
645 for (i = 0; i < NS_MAX; i++) {
646 /*
647 * We don't care whether the lock was dropped
648 * since we are not iterating over netstack_head.
649 */
650 (void) (applyfn)(&netstack_g_lock, ns, i);
651 }
652 mutex_exit(&netstack_g_lock);
653 }
654
655 /* Like the above but in reverse moduleid order */
656 static void
657 apply_all_modules_reverse(netstack_t *ns, applyfn_t *applyfn)
658 {
659 int i;
660
661 mutex_enter(&netstack_g_lock);
662 for (i = NS_MAX-1; i >= 0; i--) {
663 /*
664 * We don't care whether the lock was dropped
665 * since we are not iterating over netstack_head.
666 */
667 (void) (applyfn)(&netstack_g_lock, ns, i);
668 }
669 mutex_exit(&netstack_g_lock);
670 }
671
672 /*
673 * Call the create function for the ns and moduleid if CREATE_NEEDED
674 * is set.
675 * If some other thread gets here first and sets *_INPROGRESS, then
676 * we wait for that thread to complete so that we can ensure that
677 * all the callbacks are done when we've looped over all netstacks/moduleids.
678 *
679 * When we call the create function, we temporarily drop the netstack_lock
680 * held by the caller, and return true to tell the caller it needs to
681 * re-evalute the state.
682 */
683 static boolean_t
684 netstack_apply_create(kmutex_t *lockp, netstack_t *ns, int moduleid)
685 {
686 void *result;
687 netstackid_t stackid;
688 nm_state_t *nms = &ns->netstack_m_state[moduleid];
689 boolean_t dropped = B_FALSE;
690
691 ASSERT(MUTEX_HELD(lockp));
692 mutex_enter(&ns->netstack_lock);
693
694 if (wait_for_nms_inprogress(ns, nms, lockp))
695 dropped = B_TRUE;
696
697 if (nms->nms_flags & NSS_CREATE_NEEDED) {
698 nms->nms_flags &= ~NSS_CREATE_NEEDED;
699 nms->nms_flags |= NSS_CREATE_INPROGRESS;
700 DTRACE_PROBE2(netstack__create__inprogress,
701 netstack_t *, ns, int, moduleid);
702 mutex_exit(&ns->netstack_lock);
703 mutex_exit(lockp);
704 dropped = B_TRUE;
705
706 ASSERT(ns_reg[moduleid].nr_create != NULL);
707 stackid = ns->netstack_stackid;
708 DTRACE_PROBE2(netstack__create__start,
709 netstackid_t, stackid,
710 netstack_t *, ns);
711 result = (ns_reg[moduleid].nr_create)(stackid, ns);
712 DTRACE_PROBE2(netstack__create__end,
713 void *, result, netstack_t *, ns);
714
715 ASSERT(result != NULL);
716 mutex_enter(lockp);
717 mutex_enter(&ns->netstack_lock);
718 ns->netstack_modules[moduleid] = result;
719 nms->nms_flags &= ~NSS_CREATE_INPROGRESS;
720 nms->nms_flags |= NSS_CREATE_COMPLETED;
721 cv_broadcast(&nms->nms_cv);
722 DTRACE_PROBE2(netstack__create__completed,
723 netstack_t *, ns, int, moduleid);
724 mutex_exit(&ns->netstack_lock);
725 return (dropped);
726 } else {
727 mutex_exit(&ns->netstack_lock);
728 return (dropped);
729 }
730 }
731
732 /*
733 * Call the shutdown function for the ns and moduleid if SHUTDOWN_NEEDED
734 * is set.
735 * If some other thread gets here first and sets *_INPROGRESS, then
736 * we wait for that thread to complete so that we can ensure that
737 * all the callbacks are done when we've looped over all netstacks/moduleids.
738 *
739 * When we call the shutdown function, we temporarily drop the netstack_lock
740 * held by the caller, and return true to tell the caller it needs to
741 * re-evalute the state.
742 */
743 static boolean_t
744 netstack_apply_shutdown(kmutex_t *lockp, netstack_t *ns, int moduleid)
745 {
746 netstackid_t stackid;
747 void * netstack_module;
748 nm_state_t *nms = &ns->netstack_m_state[moduleid];
749 boolean_t dropped = B_FALSE;
750
751 ASSERT(MUTEX_HELD(lockp));
752 mutex_enter(&ns->netstack_lock);
753
754 if (wait_for_nms_inprogress(ns, nms, lockp))
755 dropped = B_TRUE;
756
757 if (nms->nms_flags & NSS_SHUTDOWN_NEEDED) {
758 nms->nms_flags &= ~NSS_SHUTDOWN_NEEDED;
759 nms->nms_flags |= NSS_SHUTDOWN_INPROGRESS;
760 DTRACE_PROBE2(netstack__shutdown__inprogress,
761 netstack_t *, ns, int, moduleid);
762 mutex_exit(&ns->netstack_lock);
763 mutex_exit(lockp);
764 dropped = B_TRUE;
765
766 ASSERT(ns_reg[moduleid].nr_shutdown != NULL);
767 stackid = ns->netstack_stackid;
768 netstack_module = ns->netstack_modules[moduleid];
769 DTRACE_PROBE2(netstack__shutdown__start,
770 netstackid_t, stackid,
771 void *, netstack_module);
772 (ns_reg[moduleid].nr_shutdown)(stackid, netstack_module);
773 DTRACE_PROBE1(netstack__shutdown__end,
774 netstack_t *, ns);
775
776 mutex_enter(lockp);
777 mutex_enter(&ns->netstack_lock);
778 nms->nms_flags &= ~NSS_SHUTDOWN_INPROGRESS;
779 nms->nms_flags |= NSS_SHUTDOWN_COMPLETED;
780 cv_broadcast(&nms->nms_cv);
781 DTRACE_PROBE2(netstack__shutdown__completed,
782 netstack_t *, ns, int, moduleid);
783 mutex_exit(&ns->netstack_lock);
784 return (dropped);
785 } else {
786 mutex_exit(&ns->netstack_lock);
787 return (dropped);
788 }
789 }
790
791 /*
792 * Call the destroy function for the ns and moduleid if DESTROY_NEEDED
793 * is set.
794 * If some other thread gets here first and sets *_INPROGRESS, then
795 * we wait for that thread to complete so that we can ensure that
796 * all the callbacks are done when we've looped over all netstacks/moduleids.
797 *
798 * When we call the destroy function, we temporarily drop the netstack_lock
799 * held by the caller, and return true to tell the caller it needs to
800 * re-evalute the state.
801 */
802 static boolean_t
803 netstack_apply_destroy(kmutex_t *lockp, netstack_t *ns, int moduleid)
804 {
805 netstackid_t stackid;
806 void * netstack_module;
807 nm_state_t *nms = &ns->netstack_m_state[moduleid];
808 boolean_t dropped = B_FALSE;
809
810 ASSERT(MUTEX_HELD(lockp));
811 mutex_enter(&ns->netstack_lock);
812
813 if (wait_for_nms_inprogress(ns, nms, lockp))
814 dropped = B_TRUE;
815
816 if (nms->nms_flags & NSS_DESTROY_NEEDED) {
817 nms->nms_flags &= ~NSS_DESTROY_NEEDED;
818 nms->nms_flags |= NSS_DESTROY_INPROGRESS;
819 DTRACE_PROBE2(netstack__destroy__inprogress,
820 netstack_t *, ns, int, moduleid);
821 mutex_exit(&ns->netstack_lock);
822 mutex_exit(lockp);
823 dropped = B_TRUE;
824
825 ASSERT(ns_reg[moduleid].nr_destroy != NULL);
826 stackid = ns->netstack_stackid;
827 netstack_module = ns->netstack_modules[moduleid];
828 DTRACE_PROBE2(netstack__destroy__start,
829 netstackid_t, stackid,
830 void *, netstack_module);
831 (ns_reg[moduleid].nr_destroy)(stackid, netstack_module);
832 DTRACE_PROBE1(netstack__destroy__end,
833 netstack_t *, ns);
834
835 mutex_enter(lockp);
836 mutex_enter(&ns->netstack_lock);
837 ns->netstack_modules[moduleid] = NULL;
838 nms->nms_flags &= ~NSS_DESTROY_INPROGRESS;
839 nms->nms_flags |= NSS_DESTROY_COMPLETED;
840 cv_broadcast(&nms->nms_cv);
841 DTRACE_PROBE2(netstack__destroy__completed,
842 netstack_t *, ns, int, moduleid);
843 mutex_exit(&ns->netstack_lock);
844 return (dropped);
845 } else {
846 mutex_exit(&ns->netstack_lock);
847 return (dropped);
848 }
849 }
850
851 /*
852 * If somebody is creating the netstack (due to a new zone being created)
853 * then we wait for them to complete. This ensures that any additional
854 * netstack_register() doesn't cause the create functions to run out of
855 * order.
856 * Note that we do not need such a global wait in the case of the shutdown
857 * and destroy callbacks, since in that case it is sufficient for both
858 * threads to set NEEDED and wait for INPROGRESS to ensure ordering.
859 * Returns true if lockp was temporarily dropped while waiting.
860 */
861 static boolean_t
862 wait_for_zone_creator(netstack_t *ns, kmutex_t *lockp)
863 {
864 boolean_t dropped = B_FALSE;
865
866 mutex_enter(&ns->netstack_lock);
867 while (ns->netstack_flags & NSF_ZONE_CREATE) {
868 DTRACE_PROBE1(netstack__wait__zone__inprogress,
869 netstack_t *, ns);
870 if (lockp != NULL) {
871 dropped = B_TRUE;
872 mutex_exit(lockp);
873 }
874 cv_wait(&ns->netstack_cv, &ns->netstack_lock);
875 if (lockp != NULL) {
876 /* First drop netstack_lock to preserve order */
877 mutex_exit(&ns->netstack_lock);
878 mutex_enter(lockp);
879 mutex_enter(&ns->netstack_lock);
880 }
881 }
882 mutex_exit(&ns->netstack_lock);
883 return (dropped);
884 }
885
886 /*
887 * Wait for any INPROGRESS flag to be cleared for the netstack/moduleid
888 * combination.
889 * Returns true if lockp was temporarily dropped while waiting.
890 */
891 static boolean_t
892 wait_for_nms_inprogress(netstack_t *ns, nm_state_t *nms, kmutex_t *lockp)
893 {
894 boolean_t dropped = B_FALSE;
895
896 while (nms->nms_flags & NSS_ALL_INPROGRESS) {
897 DTRACE_PROBE2(netstack__wait__nms__inprogress,
898 netstack_t *, ns, nm_state_t *, nms);
899 if (lockp != NULL) {
900 dropped = B_TRUE;
901 mutex_exit(lockp);
902 }
903 cv_wait(&nms->nms_cv, &ns->netstack_lock);
904 if (lockp != NULL) {
905 /* First drop netstack_lock to preserve order */
906 mutex_exit(&ns->netstack_lock);
907 mutex_enter(lockp);
908 mutex_enter(&ns->netstack_lock);
909 }
910 }
911 return (dropped);
912 }
913
914 /*
915 * Get the stack instance used in caller's zone.
916 * Increases the reference count, caller must do a netstack_rele.
917 * It can't be called after zone_destroy() has started.
918 */
919 netstack_t *
920 netstack_get_current(void)
921 {
922 netstack_t *ns;
923
924 ns = curproc->p_zone->zone_netstack;
925 ASSERT(ns != NULL);
926 if (ns->netstack_flags & (NSF_UNINIT|NSF_CLOSING))
927 return (NULL);
928
929 netstack_hold(ns);
930
931 return (ns);
932 }
933
934 /*
935 * Find a stack instance given the cred.
936 * This is used by the modules to potentially allow for a future when
937 * something other than the zoneid is used to determine the stack.
938 */
939 netstack_t *
940 netstack_find_by_cred(const cred_t *cr)
941 {
942 zoneid_t zoneid = crgetzoneid(cr);
943
944 /* Handle the case when cr_zone is NULL */
945 if (zoneid == (zoneid_t)-1)
946 zoneid = GLOBAL_ZONEID;
947
948 /* For performance ... */
949 if (curproc->p_zone->zone_id == zoneid)
950 return (netstack_get_current());
951 else
952 return (netstack_find_by_zoneid(zoneid));
953 }
954
955 /*
956 * Find a stack instance given the zoneid.
957 * Increases the reference count if found; caller must do a
958 * netstack_rele().
959 *
960 * If there is no exact match then assume the shared stack instance
961 * matches.
962 *
963 * Skip the unitialized ones.
964 */
965 netstack_t *
966 netstack_find_by_zoneid(zoneid_t zoneid)
967 {
968 netstack_t *ns;
969 zone_t *zone;
970
971 zone = zone_find_by_id(zoneid);
972
973 if (zone == NULL)
974 return (NULL);
975
976 ns = zone->zone_netstack;
977 ASSERT(ns != NULL);
978 if (ns->netstack_flags & (NSF_UNINIT|NSF_CLOSING))
979 ns = NULL;
980 else
981 netstack_hold(ns);
982
983 zone_rele(zone);
984 return (ns);
985 }
986
987 /*
988 * Find a stack instance given the zoneid. Can only be called from
989 * the create callback. See the comments in zone_find_by_id_nolock why
990 * that limitation exists.
991 *
992 * Increases the reference count if found; caller must do a
993 * netstack_rele().
994 *
995 * If there is no exact match then assume the shared stack instance
996 * matches.
997 *
998 * Skip the unitialized ones.
999 */
1000 netstack_t *
1001 netstack_find_by_zoneid_nolock(zoneid_t zoneid)
1002 {
1003 netstack_t *ns;
1004 zone_t *zone;
1005
1006 zone = zone_find_by_id_nolock(zoneid);
1007
1008 if (zone == NULL)
1009 return (NULL);
1010
1011 ns = zone->zone_netstack;
1012 ASSERT(ns != NULL);
1013
1014 if (ns->netstack_flags & (NSF_UNINIT|NSF_CLOSING))
1015 ns = NULL;
1016 else
1017 netstack_hold(ns);
1018
1019 /* zone_find_by_id_nolock does not have a hold on the zone */
1020 return (ns);
1021 }
1022
1023 /*
1024 * Find a stack instance given the stackid with exact match?
1025 * Increases the reference count if found; caller must do a
1026 * netstack_rele().
1027 *
1028 * Skip the unitialized ones.
1029 */
1030 netstack_t *
1031 netstack_find_by_stackid(netstackid_t stackid)
1032 {
1033 netstack_t *ns;
1034
1035 mutex_enter(&netstack_g_lock);
1036 for (ns = netstack_head; ns != NULL; ns = ns->netstack_next) {
1037 mutex_enter(&ns->netstack_lock);
1038 if (ns->netstack_stackid == stackid &&
1039 !(ns->netstack_flags & (NSF_UNINIT|NSF_CLOSING))) {
1040 mutex_exit(&ns->netstack_lock);
1041 netstack_hold(ns);
1042 mutex_exit(&netstack_g_lock);
1043 return (ns);
1044 }
1045 mutex_exit(&ns->netstack_lock);
1046 }
1047 mutex_exit(&netstack_g_lock);
1048 return (NULL);
1049 }
1050
1051 boolean_t
1052 netstack_inuse_by_stackid(netstackid_t stackid)
1053 {
1054 netstack_t *ns;
1055 boolean_t rval = B_FALSE;
1056
1057 mutex_enter(&netstack_g_lock);
1058
1059 for (ns = netstack_head; ns != NULL; ns = ns->netstack_next) {
1060 if (ns->netstack_stackid == stackid) {
1061 rval = B_TRUE;
1062 break;
1063 }
1064 }
1065
1066 mutex_exit(&netstack_g_lock);
1067
1068 return (rval);
1069 }
1070
1071
1072 static void
1073 netstack_reap(void *arg)
1074 {
1075 /* Indicate we took a semaphore to get here. */
1076 netstack_reap_work((netstack_t *)arg, B_TRUE);
1077 }
1078
1079 static void
1080 netstack_reap_intr(void *arg)
1081 {
1082 /* Indicate we did NOT TAKE a semaphore to get here. */
1083 netstack_reap_work((netstack_t *)arg, B_FALSE);
1084 }
1085
1086 static void
1087 netstack_reap_work(netstack_t *ns, boolean_t semaphore_signal)
1088 {
1089 netstack_t **nsp;
1090 boolean_t found;
1091 int i;
1092
1093 /*
1094 * Time to call the destroy functions and free up
1095 * the structure
1096 */
1097 netstack_stack_inactive(ns);
1098
1099 /* Make sure nothing increased the references */
1100 ASSERT(ns->netstack_refcnt == 0);
1101 ASSERT(ns->netstack_numzones == 0);
1102
1103 /* Finally remove from list of netstacks */
1104 mutex_enter(&netstack_g_lock);
1105 found = B_FALSE;
1106 for (nsp = &netstack_head; *nsp != NULL;
1107 nsp = &(*nsp)->netstack_next) {
1108 if (*nsp == ns) {
1109 *nsp = ns->netstack_next;
1110 ns->netstack_next = NULL;
1111 found = B_TRUE;
1112 break;
1113 }
1114 }
1115 ASSERT(found);
1116 mutex_exit(&netstack_g_lock);
1117
1118 /* Make sure nothing increased the references */
1119 ASSERT(ns->netstack_refcnt == 0);
1120 ASSERT(ns->netstack_numzones == 0);
1121
1122 ASSERT(ns->netstack_flags & NSF_CLOSING);
1123
1124 for (i = 0; i < NS_MAX; i++) {
1125 nm_state_t *nms = &ns->netstack_m_state[i];
1126
1127 cv_destroy(&nms->nms_cv);
1128 }
1129 mutex_destroy(&ns->netstack_lock);
1130 cv_destroy(&ns->netstack_cv);
1131 kmem_free(ns, sizeof (*ns));
1132 /* Allow another reap to be scheduled. */
1133 if (semaphore_signal)
1134 sema_v(&netstack_reap_limiter);
1135 }
1136
1137 void
1138 netstack_rele(netstack_t *ns)
1139 {
1140 int refcnt, numzones;
1141
1142 mutex_enter(&ns->netstack_lock);
1143 ASSERT(ns->netstack_refcnt > 0);
1144 ns->netstack_refcnt--;
1145 /*
1146 * As we drop the lock additional netstack_rele()s can come in
1147 * and decrement the refcnt to zero and free the netstack_t.
1148 * Store pointers in local variables and if we were not the last
1149 * then don't reference the netstack_t after that.
1150 */
1151 refcnt = ns->netstack_refcnt;
1152 numzones = ns->netstack_numzones;
1153 DTRACE_PROBE1(netstack__dec__ref, netstack_t *, ns);
1154 mutex_exit(&ns->netstack_lock);
1155
1156 if (refcnt == 0 && numzones == 0) {
1157 boolean_t is_not_intr = !servicing_interrupt();
1158
1159 /*
1160 * Because there are possibilities of kstats being held by
1161 * callers, which would then be immediately freed, but held up
1162 * due to kstat's odd reference model recording the thread, we
1163 * choose to schedule the actual deletion of this netstack as
1164 * a deferred task on the system taskq. This way, any
1165 * store-the-thread-pointer semantics won't trip over
1166 * themselves.
1167 *
1168 * On the off chance this is called in interrupt context, we
1169 * cannot use the semaphore to enforce rate-limiting.
1170 */
1171 if (is_not_intr && sema_tryp(&netstack_reap_limiter) == 0) {
1172 /*
1173 * XXX KEBE SAYS inidicate we're slamming against
1174 * a limit.
1175 */
1176 hrtime_t measurement = gethrtime();
1177
1178 sema_p(&netstack_reap_limiter);
1179 /* Caputre delay in ns. */
1180 DTRACE_PROBE1(netstack__reap__rate__limited,
1181 hrtime_t *, gethrtime() - measurement);
1182 }
1183
1184 if (taskq_dispatch(system_taskq,
1185 is_not_intr ? netstack_reap : netstack_reap_intr, ns,
1186 TQ_NOSLEEP) == NULL) {
1187 /*
1188 * Well shoot, why can't we taskq_dispatch?
1189 * Take our chances with a direct call.
1190 */
1191 DTRACE_PROBE1(netstack__reap__taskq__fail,
1192 netstack_t *, ns);
1193 netstack_reap_work(ns, is_not_intr);
1194 }
1195 }
1196 }
1197
1198 void
1199 netstack_hold(netstack_t *ns)
1200 {
1201 mutex_enter(&ns->netstack_lock);
1202 ns->netstack_refcnt++;
1203 ASSERT(ns->netstack_refcnt > 0);
1204 mutex_exit(&ns->netstack_lock);
1205 DTRACE_PROBE1(netstack__inc__ref, netstack_t *, ns);
1206 }
1207
1208 /*
1209 * To support kstat_create_netstack() using kstat_zone_add we need
1210 * to track both
1211 * - all zoneids that use the global/shared stack
1212 * - all kstats that have been added for the shared stack
1213 */
1214 kstat_t *
1215 kstat_create_netstack(char *ks_module, int ks_instance, char *ks_name,
1216 char *ks_class, uchar_t ks_type, uint_t ks_ndata, uchar_t ks_flags,
1217 netstackid_t ks_netstackid)
1218 {
1219 kstat_t *ks;
1220
1221 if (ks_netstackid == GLOBAL_NETSTACKID) {
1222 ks = kstat_create_zone(ks_module, ks_instance, ks_name,
1223 ks_class, ks_type, ks_ndata, ks_flags, GLOBAL_ZONEID);
1224 if (ks != NULL)
1225 netstack_shared_kstat_add(ks);
1226 return (ks);
1227 } else {
1228 zoneid_t zoneid = ks_netstackid;
1229
1230 return (kstat_create_zone(ks_module, ks_instance, ks_name,
1231 ks_class, ks_type, ks_ndata, ks_flags, zoneid));
1232 }
1233 }
1234
1235 void
1236 kstat_delete_netstack(kstat_t *ks, netstackid_t ks_netstackid)
1237 {
1238 if (ks_netstackid == GLOBAL_NETSTACKID) {
1239 netstack_shared_kstat_remove(ks);
1240 }
1241 kstat_delete(ks);
1242 }
1243
1244 static void
1245 netstack_shared_zone_add(zoneid_t zoneid)
1246 {
1247 struct shared_zone_list *sz;
1248 struct shared_kstat_list *sk;
1249
1250 sz = (struct shared_zone_list *)kmem_zalloc(sizeof (*sz), KM_SLEEP);
1251 sz->sz_zoneid = zoneid;
1252
1253 /* Insert in list */
1254 mutex_enter(&netstack_shared_lock);
1255 sz->sz_next = netstack_shared_zones;
1256 netstack_shared_zones = sz;
1257
1258 /*
1259 * Perform kstat_zone_add for each existing shared stack kstat.
1260 * Note: Holds netstack_shared_lock lock across kstat_zone_add.
1261 */
1262 for (sk = netstack_shared_kstats; sk != NULL; sk = sk->sk_next) {
1263 kstat_zone_add(sk->sk_kstat, zoneid);
1264 }
1265 mutex_exit(&netstack_shared_lock);
1266 }
1267
1268 static void
1269 netstack_shared_zone_remove(zoneid_t zoneid)
1270 {
1271 struct shared_zone_list **szp, *sz;
1272 struct shared_kstat_list *sk;
1273
1274 /* Find in list */
1275 mutex_enter(&netstack_shared_lock);
1276 sz = NULL;
1277 for (szp = &netstack_shared_zones; *szp != NULL;
1278 szp = &((*szp)->sz_next)) {
1279 if ((*szp)->sz_zoneid == zoneid) {
1280 sz = *szp;
1281 break;
1282 }
1283 }
1284 /* We must find it */
1285 ASSERT(sz != NULL);
1286 *szp = sz->sz_next;
1287 sz->sz_next = NULL;
1288
1289 /*
1290 * Perform kstat_zone_remove for each existing shared stack kstat.
1291 * Note: Holds netstack_shared_lock lock across kstat_zone_remove.
1292 */
1293 for (sk = netstack_shared_kstats; sk != NULL; sk = sk->sk_next) {
1294 kstat_zone_remove(sk->sk_kstat, zoneid);
1295 }
1296 mutex_exit(&netstack_shared_lock);
1297
1298 kmem_free(sz, sizeof (*sz));
1299 }
1300
1301 static void
1302 netstack_shared_kstat_add(kstat_t *ks)
1303 {
1304 struct shared_zone_list *sz;
1305 struct shared_kstat_list *sk;
1306
1307 sk = (struct shared_kstat_list *)kmem_zalloc(sizeof (*sk), KM_SLEEP);
1308 sk->sk_kstat = ks;
1309
1310 /* Insert in list */
1311 mutex_enter(&netstack_shared_lock);
1312 sk->sk_next = netstack_shared_kstats;
1313 netstack_shared_kstats = sk;
1314
1315 /*
1316 * Perform kstat_zone_add for each existing shared stack zone.
1317 * Note: Holds netstack_shared_lock lock across kstat_zone_add.
1318 */
1319 for (sz = netstack_shared_zones; sz != NULL; sz = sz->sz_next) {
1320 kstat_zone_add(ks, sz->sz_zoneid);
1321 }
1322 mutex_exit(&netstack_shared_lock);
1323 }
1324
1325 static void
1326 netstack_shared_kstat_remove(kstat_t *ks)
1327 {
1328 struct shared_zone_list *sz;
1329 struct shared_kstat_list **skp, *sk;
1330
1331 /* Find in list */
1332 mutex_enter(&netstack_shared_lock);
1333 sk = NULL;
1334 for (skp = &netstack_shared_kstats; *skp != NULL;
1335 skp = &((*skp)->sk_next)) {
1336 if ((*skp)->sk_kstat == ks) {
1337 sk = *skp;
1338 break;
1339 }
1340 }
1341 /* Must find it */
1342 ASSERT(sk != NULL);
1343 *skp = sk->sk_next;
1344 sk->sk_next = NULL;
1345
1346 /*
1347 * Perform kstat_zone_remove for each existing shared stack kstat.
1348 * Note: Holds netstack_shared_lock lock across kstat_zone_remove.
1349 */
1350 for (sz = netstack_shared_zones; sz != NULL; sz = sz->sz_next) {
1351 kstat_zone_remove(ks, sz->sz_zoneid);
1352 }
1353 mutex_exit(&netstack_shared_lock);
1354 kmem_free(sk, sizeof (*sk));
1355 }
1356
1357 /*
1358 * If a zoneid is part of the shared zone, return true
1359 */
1360 static boolean_t
1361 netstack_find_shared_zoneid(zoneid_t zoneid)
1362 {
1363 struct shared_zone_list *sz;
1364
1365 mutex_enter(&netstack_shared_lock);
1366 for (sz = netstack_shared_zones; sz != NULL; sz = sz->sz_next) {
1367 if (sz->sz_zoneid == zoneid) {
1368 mutex_exit(&netstack_shared_lock);
1369 return (B_TRUE);
1370 }
1371 }
1372 mutex_exit(&netstack_shared_lock);
1373 return (B_FALSE);
1374 }
1375
1376 /*
1377 * Hide the fact that zoneids and netstackids are allocated from
1378 * the same space in the current implementation.
1379 * We currently do not check that the stackid/zoneids are valid, since there
1380 * is no need for that. But this should only be done for ids that are
1381 * valid.
1382 */
1383 zoneid_t
1384 netstackid_to_zoneid(netstackid_t stackid)
1385 {
1386 return (stackid);
1387 }
1388
1389 netstackid_t
1390 zoneid_to_netstackid(zoneid_t zoneid)
1391 {
1392 if (netstack_find_shared_zoneid(zoneid))
1393 return (GLOBAL_ZONEID);
1394 else
1395 return (zoneid);
1396 }
1397
1398 zoneid_t
1399 netstack_get_zoneid(netstack_t *ns)
1400 {
1401 return (netstackid_to_zoneid(ns->netstack_stackid));
1402 }
1403
1404 /*
1405 * Simplistic support for walking all the handles.
1406 * Example usage:
1407 * netstack_handle_t nh;
1408 * netstack_t *ns;
1409 *
1410 * netstack_next_init(&nh);
1411 * while ((ns = netstack_next(&nh)) != NULL) {
1412 * do something;
1413 * netstack_rele(ns);
1414 * }
1415 * netstack_next_fini(&nh);
1416 */
1417 void
1418 netstack_next_init(netstack_handle_t *handle)
1419 {
1420 *handle = 0;
1421 }
1422
1423 /* ARGSUSED */
1424 void
1425 netstack_next_fini(netstack_handle_t *handle)
1426 {
1427 }
1428
1429 netstack_t *
1430 netstack_next(netstack_handle_t *handle)
1431 {
1432 netstack_t *ns;
1433 int i, end;
1434
1435 end = *handle;
1436 /* Walk skipping *handle number of instances */
1437
1438 /* Look if there is a matching stack instance */
1439 mutex_enter(&netstack_g_lock);
1440 ns = netstack_head;
1441 for (i = 0; i < end; i++) {
1442 if (ns == NULL)
1443 break;
1444 ns = ns->netstack_next;
1445 }
1446 /* skip those with that aren't really here */
1447 while (ns != NULL) {
1448 mutex_enter(&ns->netstack_lock);
1449 if ((ns->netstack_flags & (NSF_UNINIT|NSF_CLOSING)) == 0) {
1450 mutex_exit(&ns->netstack_lock);
1451 break;
1452 }
1453 mutex_exit(&ns->netstack_lock);
1454 end++;
1455 ns = ns->netstack_next;
1456 }
1457 if (ns != NULL) {
1458 *handle = end + 1;
1459 netstack_hold(ns);
1460 }
1461 mutex_exit(&netstack_g_lock);
1462 return (ns);
1463 }