1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
25 * Copyright (c) 2017, Joyent, Inc. All rights reserved.
26 */
27
28 #include <sys/param.h>
29 #include <sys/sysmacros.h>
30 #include <sys/vm.h>
31 #include <sys/proc.h>
32 #include <sys/tuneable.h>
33 #include <sys/systm.h>
34 #include <sys/cmn_err.h>
35 #include <sys/debug.h>
36 #include <sys/sdt.h>
37 #include <sys/mutex.h>
38 #include <sys/bitmap.h>
39 #include <sys/atomic.h>
40 #include <sys/sunddi.h>
41 #include <sys/kobj.h>
42 #include <sys/disp.h>
43 #include <vm/seg_kmem.h>
44 #include <sys/zone.h>
45 #include <sys/netstack.h>
46
47 /*
48 * What we use so that the zones framework can tell us about new zones,
49 * which we use to create new stacks.
50 */
51 static zone_key_t netstack_zone_key;
52
53 static int netstack_initialized = 0;
54
55 /*
56 * Track the registered netstacks.
57 * The global lock protects
58 * - ns_reg
59 * - the list starting at netstack_head and following the netstack_next
60 * pointers.
61 */
62 static kmutex_t netstack_g_lock;
63
64 /*
65 * Registry of netstacks with their create/shutdown/destory functions.
66 */
67 static struct netstack_registry ns_reg[NS_MAX];
68
69 /*
70 * Global list of existing stacks. We use this when a new zone with
71 * an exclusive IP instance is created.
72 *
73 * Note that in some cases a netstack_t needs to stay around after the zone
74 * has gone away. This is because there might be outstanding references
75 * (from TCP TIME_WAIT connections, IPsec state, etc). The netstack_t data
76 * structure and all the foo_stack_t's hanging off of it will be cleaned up
77 * when the last reference to it is dropped.
78 * However, the same zone might be rebooted. That is handled using the
79 * assumption that the zones framework picks a new zoneid each time a zone
80 * is (re)booted. We assert for that condition in netstack_zone_create().
81 * Thus the old netstack_t can take its time for things to time out.
82 */
83 static netstack_t *netstack_head;
84
85 /*
86 * To support kstat_create_netstack() using kstat_zone_add we need
87 * to track both
88 * - all zoneids that use the global/shared stack
89 * - all kstats that have been added for the shared stack
90 */
91 struct shared_zone_list {
92 struct shared_zone_list *sz_next;
93 zoneid_t sz_zoneid;
94 };
95
96 struct shared_kstat_list {
97 struct shared_kstat_list *sk_next;
98 kstat_t *sk_kstat;
99 };
100
101 static kmutex_t netstack_shared_lock; /* protects the following two */
102 static struct shared_zone_list *netstack_shared_zones;
103 static struct shared_kstat_list *netstack_shared_kstats;
104
105 static void *netstack_zone_create(zoneid_t zoneid);
106 static void netstack_zone_shutdown(zoneid_t zoneid, void *arg);
107 static void netstack_zone_destroy(zoneid_t zoneid, void *arg);
108
109 static void netstack_shared_zone_add(zoneid_t zoneid);
110 static void netstack_shared_zone_remove(zoneid_t zoneid);
111 static void netstack_shared_kstat_add(kstat_t *ks);
112 static void netstack_shared_kstat_remove(kstat_t *ks);
113
114 typedef boolean_t applyfn_t(kmutex_t *, netstack_t *, int);
115
116 static void apply_all_netstacks(int, applyfn_t *);
117 static void apply_all_modules(netstack_t *, applyfn_t *);
118 static void apply_all_modules_reverse(netstack_t *, applyfn_t *);
119 static boolean_t netstack_apply_create(kmutex_t *, netstack_t *, int);
120 static boolean_t netstack_apply_shutdown(kmutex_t *, netstack_t *, int);
121 static boolean_t netstack_apply_destroy(kmutex_t *, netstack_t *, int);
122 static boolean_t wait_for_zone_creator(netstack_t *, kmutex_t *);
123 static boolean_t wait_for_nms_inprogress(netstack_t *, nm_state_t *,
124 kmutex_t *);
125
126 static ksema_t netstack_reap_limiter;
127 /*
128 * Hard-coded constant, but since this is not tunable in real-time, it seems
129 * making it an /etc/system tunable is better than nothing.
130 */
131 uint_t netstack_outstanding_reaps = 1024;
132
133 void
134 netstack_init(void)
135 {
136 mutex_init(&netstack_g_lock, NULL, MUTEX_DEFAULT, NULL);
137 mutex_init(&netstack_shared_lock, NULL, MUTEX_DEFAULT, NULL);
138
139 sema_init(&netstack_reap_limiter, netstack_outstanding_reaps, NULL,
140 SEMA_DRIVER, NULL);
141
142 netstack_initialized = 1;
143
144 /*
145 * We want to be informed each time a zone is created or
146 * destroyed in the kernel, so we can maintain the
147 * stack instance information.
148 */
149 zone_key_create(&netstack_zone_key, netstack_zone_create,
150 netstack_zone_shutdown, netstack_zone_destroy);
151 }
152
153 /*
154 * Register a new module with the framework.
155 * This registers interest in changes to the set of netstacks.
156 * The createfn and destroyfn are required, but the shutdownfn can be
157 * NULL.
158 * Note that due to the current zsd implementation, when the create
159 * function is called the zone isn't fully present, thus functions
160 * like zone_find_by_* will fail, hence the create function can not
161 * use many zones kernel functions including zcmn_err().
162 */
163 void
164 netstack_register(int moduleid,
165 void *(*module_create)(netstackid_t, netstack_t *),
166 void (*module_shutdown)(netstackid_t, void *),
167 void (*module_destroy)(netstackid_t, void *))
168 {
169 netstack_t *ns;
170
171 ASSERT(netstack_initialized);
172 ASSERT(moduleid >= 0 && moduleid < NS_MAX);
173 ASSERT(module_create != NULL);
174
175 /*
176 * Make instances created after this point in time run the create
177 * callback.
178 */
179 mutex_enter(&netstack_g_lock);
180 ASSERT(ns_reg[moduleid].nr_create == NULL);
181 ASSERT(ns_reg[moduleid].nr_flags == 0);
182 ns_reg[moduleid].nr_create = module_create;
183 ns_reg[moduleid].nr_shutdown = module_shutdown;
184 ns_reg[moduleid].nr_destroy = module_destroy;
185 ns_reg[moduleid].nr_flags = NRF_REGISTERED;
186
187 /*
188 * Determine the set of stacks that exist before we drop the lock.
189 * Set NSS_CREATE_NEEDED for each of those.
190 * netstacks which have been deleted will have NSS_CREATE_COMPLETED
191 * set, but check NSF_CLOSING to be sure.
192 */
193 for (ns = netstack_head; ns != NULL; ns = ns->netstack_next) {
194 nm_state_t *nms = &ns->netstack_m_state[moduleid];
195
196 mutex_enter(&ns->netstack_lock);
197 if (!(ns->netstack_flags & NSF_CLOSING) &&
198 (nms->nms_flags & NSS_CREATE_ALL) == 0) {
199 nms->nms_flags |= NSS_CREATE_NEEDED;
200 DTRACE_PROBE2(netstack__create__needed,
201 netstack_t *, ns, int, moduleid);
202 }
203 mutex_exit(&ns->netstack_lock);
204 }
205 mutex_exit(&netstack_g_lock);
206
207 /*
208 * At this point in time a new instance can be created or an instance
209 * can be destroyed, or some other module can register or unregister.
210 * Make sure we either run all the create functions for this moduleid
211 * or we wait for any other creators for this moduleid.
212 */
213 apply_all_netstacks(moduleid, netstack_apply_create);
214 }
215
216 void
217 netstack_unregister(int moduleid)
218 {
219 netstack_t *ns;
220
221 ASSERT(moduleid >= 0 && moduleid < NS_MAX);
222
223 ASSERT(ns_reg[moduleid].nr_create != NULL);
224 ASSERT(ns_reg[moduleid].nr_flags & NRF_REGISTERED);
225
226 mutex_enter(&netstack_g_lock);
227 /*
228 * Determine the set of stacks that exist before we drop the lock.
229 * Set NSS_SHUTDOWN_NEEDED and NSS_DESTROY_NEEDED for each of those.
230 * That ensures that when we return all the callbacks for existing
231 * instances have completed. And since we set NRF_DYING no new
232 * instances can use this module.
233 */
234 for (ns = netstack_head; ns != NULL; ns = ns->netstack_next) {
235 boolean_t created = B_FALSE;
236 nm_state_t *nms = &ns->netstack_m_state[moduleid];
237
238 mutex_enter(&ns->netstack_lock);
239
240 /*
241 * We need to be careful here. We could actually have a netstack
242 * being created as we speak waiting for us to let go of this
243 * lock to proceed. It may have set NSS_CREATE_NEEDED, but not
244 * have gotten to the point of completing it yet. If
245 * NSS_CREATE_NEEDED, we can safely just remove it here and
246 * never create the module. However, if NSS_CREATE_INPROGRESS is
247 * set, we need to still flag this module for shutdown and
248 * deletion, just as though it had reached NSS_CREATE_COMPLETED.
249 *
250 * It is safe to do that because of two different guarantees
251 * that exist in the system. The first is that before we do a
252 * create, shutdown, or destroy, we ensure that nothing else is
253 * in progress in the system for this netstack and wait for it
254 * to complete. Secondly, because the zone is being created, we
255 * know that the following call to apply_all_netstack will block
256 * on the zone finishing its initialization.
257 */
258 if (nms->nms_flags & NSS_CREATE_NEEDED)
259 nms->nms_flags &= ~NSS_CREATE_NEEDED;
260
261 if (nms->nms_flags & NSS_CREATE_INPROGRESS ||
262 nms->nms_flags & NSS_CREATE_COMPLETED)
263 created = B_TRUE;
264
265 if (ns_reg[moduleid].nr_shutdown != NULL && created &&
266 (nms->nms_flags & NSS_CREATE_COMPLETED) &&
267 (nms->nms_flags & NSS_SHUTDOWN_ALL) == 0) {
268 nms->nms_flags |= NSS_SHUTDOWN_NEEDED;
269 DTRACE_PROBE2(netstack__shutdown__needed,
270 netstack_t *, ns, int, moduleid);
271 }
272 if ((ns_reg[moduleid].nr_flags & NRF_REGISTERED) &&
273 ns_reg[moduleid].nr_destroy != NULL && created &&
274 (nms->nms_flags & NSS_DESTROY_ALL) == 0) {
275 nms->nms_flags |= NSS_DESTROY_NEEDED;
276 DTRACE_PROBE2(netstack__destroy__needed,
277 netstack_t *, ns, int, moduleid);
278 }
279 mutex_exit(&ns->netstack_lock);
280 }
281 /*
282 * Prevent any new netstack from calling the registered create
283 * function, while keeping the function pointers in place until the
284 * shutdown and destroy callbacks are complete.
285 */
286 ns_reg[moduleid].nr_flags |= NRF_DYING;
287 mutex_exit(&netstack_g_lock);
288
289 apply_all_netstacks(moduleid, netstack_apply_shutdown);
290 apply_all_netstacks(moduleid, netstack_apply_destroy);
291
292 /*
293 * Clear the nms_flags so that we can handle this module
294 * being loaded again.
295 * Also remove the registered functions.
296 */
297 mutex_enter(&netstack_g_lock);
298 ASSERT(ns_reg[moduleid].nr_flags & NRF_REGISTERED);
299 ASSERT(ns_reg[moduleid].nr_flags & NRF_DYING);
300 for (ns = netstack_head; ns != NULL; ns = ns->netstack_next) {
301 nm_state_t *nms = &ns->netstack_m_state[moduleid];
302
303 mutex_enter(&ns->netstack_lock);
304 if (nms->nms_flags & NSS_DESTROY_COMPLETED) {
305 nms->nms_flags = 0;
306 DTRACE_PROBE2(netstack__destroy__done,
307 netstack_t *, ns, int, moduleid);
308 }
309 mutex_exit(&ns->netstack_lock);
310 }
311
312 ns_reg[moduleid].nr_create = NULL;
313 ns_reg[moduleid].nr_shutdown = NULL;
314 ns_reg[moduleid].nr_destroy = NULL;
315 ns_reg[moduleid].nr_flags = 0;
316 mutex_exit(&netstack_g_lock);
317 }
318
319 /*
320 * Lookup and/or allocate a netstack for this zone.
321 */
322 static void *
323 netstack_zone_create(zoneid_t zoneid)
324 {
325 netstackid_t stackid;
326 netstack_t *ns;
327 netstack_t **nsp;
328 zone_t *zone;
329 int i;
330
331 ASSERT(netstack_initialized);
332
333 zone = zone_find_by_id_nolock(zoneid);
334 ASSERT(zone != NULL);
335
336 if (zone->zone_flags & ZF_NET_EXCL) {
337 stackid = zoneid;
338 } else {
339 /* Look for the stack instance for the global */
340 stackid = GLOBAL_NETSTACKID;
341 }
342
343 /* Allocate even if it isn't needed; simplifies locking */
344 ns = (netstack_t *)kmem_zalloc(sizeof (netstack_t), KM_SLEEP);
345
346 /* Look if there is a matching stack instance */
347 mutex_enter(&netstack_g_lock);
348 for (nsp = &netstack_head; *nsp != NULL;
349 nsp = &((*nsp)->netstack_next)) {
350 if ((*nsp)->netstack_stackid == stackid) {
351 /*
352 * Should never find a pre-existing exclusive stack
353 */
354 VERIFY(stackid == GLOBAL_NETSTACKID);
355 kmem_free(ns, sizeof (netstack_t));
356 ns = *nsp;
357 mutex_enter(&ns->netstack_lock);
358 ns->netstack_numzones++;
359 mutex_exit(&ns->netstack_lock);
360 mutex_exit(&netstack_g_lock);
361 DTRACE_PROBE1(netstack__inc__numzones,
362 netstack_t *, ns);
363 /* Record that we have a new shared stack zone */
364 netstack_shared_zone_add(zoneid);
365 zone->zone_netstack = ns;
366 return (ns);
367 }
368 }
369 /* Not found */
370 mutex_init(&ns->netstack_lock, NULL, MUTEX_DEFAULT, NULL);
371 cv_init(&ns->netstack_cv, NULL, CV_DEFAULT, NULL);
372 ns->netstack_stackid = zoneid;
373 ns->netstack_numzones = 1;
374 ns->netstack_refcnt = 1; /* Decremented by netstack_zone_destroy */
375 ns->netstack_flags = NSF_UNINIT;
376 *nsp = ns;
377 zone->zone_netstack = ns;
378
379 mutex_enter(&ns->netstack_lock);
380 /*
381 * Mark this netstack as having a CREATE running so
382 * any netstack_register/netstack_unregister waits for
383 * the existing create callbacks to complete in moduleid order
384 */
385 ns->netstack_flags |= NSF_ZONE_CREATE;
386
387 /*
388 * Determine the set of module create functions that need to be
389 * called before we drop the lock.
390 * Set NSS_CREATE_NEEDED for each of those.
391 * Skip any with NRF_DYING set, since those are in the process of
392 * going away, by checking for flags being exactly NRF_REGISTERED.
393 */
394 for (i = 0; i < NS_MAX; i++) {
395 nm_state_t *nms = &ns->netstack_m_state[i];
396
397 cv_init(&nms->nms_cv, NULL, CV_DEFAULT, NULL);
398
399 if ((ns_reg[i].nr_flags == NRF_REGISTERED) &&
400 (nms->nms_flags & NSS_CREATE_ALL) == 0) {
401 nms->nms_flags |= NSS_CREATE_NEEDED;
402 DTRACE_PROBE2(netstack__create__needed,
403 netstack_t *, ns, int, i);
404 }
405 }
406 mutex_exit(&ns->netstack_lock);
407 mutex_exit(&netstack_g_lock);
408
409 apply_all_modules(ns, netstack_apply_create);
410
411 /* Tell any waiting netstack_register/netstack_unregister to proceed */
412 mutex_enter(&ns->netstack_lock);
413 ns->netstack_flags &= ~NSF_UNINIT;
414 ASSERT(ns->netstack_flags & NSF_ZONE_CREATE);
415 ns->netstack_flags &= ~NSF_ZONE_CREATE;
416 cv_broadcast(&ns->netstack_cv);
417 mutex_exit(&ns->netstack_lock);
418
419 return (ns);
420 }
421
422 /* ARGSUSED */
423 static void
424 netstack_zone_shutdown(zoneid_t zoneid, void *arg)
425 {
426 netstack_t *ns = (netstack_t *)arg;
427 int i;
428
429 ASSERT(arg != NULL);
430
431 mutex_enter(&ns->netstack_lock);
432 ASSERT(ns->netstack_numzones > 0);
433 if (ns->netstack_numzones != 1) {
434 /* Stack instance being used by other zone */
435 mutex_exit(&ns->netstack_lock);
436 ASSERT(ns->netstack_stackid == GLOBAL_NETSTACKID);
437 return;
438 }
439 mutex_exit(&ns->netstack_lock);
440
441 mutex_enter(&netstack_g_lock);
442 mutex_enter(&ns->netstack_lock);
443 /*
444 * Mark this netstack as having a SHUTDOWN running so
445 * any netstack_register/netstack_unregister waits for
446 * the existing create callbacks to complete in moduleid order
447 */
448 ASSERT(!(ns->netstack_flags & NSF_ZONE_INPROGRESS));
449 ns->netstack_flags |= NSF_ZONE_SHUTDOWN;
450
451 /*
452 * Determine the set of stacks that exist before we drop the lock.
453 * Set NSS_SHUTDOWN_NEEDED for each of those.
454 */
455 for (i = 0; i < NS_MAX; i++) {
456 nm_state_t *nms = &ns->netstack_m_state[i];
457
458 if ((ns_reg[i].nr_flags & NRF_REGISTERED) &&
459 ns_reg[i].nr_shutdown != NULL &&
460 (nms->nms_flags & NSS_CREATE_COMPLETED) &&
461 (nms->nms_flags & NSS_SHUTDOWN_ALL) == 0) {
462 nms->nms_flags |= NSS_SHUTDOWN_NEEDED;
463 DTRACE_PROBE2(netstack__shutdown__needed,
464 netstack_t *, ns, int, i);
465 }
466 }
467 mutex_exit(&ns->netstack_lock);
468 mutex_exit(&netstack_g_lock);
469
470 /*
471 * Call the shutdown function for all registered modules for this
472 * netstack.
473 */
474 apply_all_modules_reverse(ns, netstack_apply_shutdown);
475
476 /* Tell any waiting netstack_register/netstack_unregister to proceed */
477 mutex_enter(&ns->netstack_lock);
478 ASSERT(ns->netstack_flags & NSF_ZONE_SHUTDOWN);
479 ns->netstack_flags &= ~NSF_ZONE_SHUTDOWN;
480 cv_broadcast(&ns->netstack_cv);
481 mutex_exit(&ns->netstack_lock);
482 }
483
484 /*
485 * Common routine to release a zone.
486 * If this was the last zone using the stack instance then prepare to
487 * have the refcnt dropping to zero free the zone.
488 */
489 /* ARGSUSED */
490 static void
491 netstack_zone_destroy(zoneid_t zoneid, void *arg)
492 {
493 netstack_t *ns = (netstack_t *)arg;
494
495 ASSERT(arg != NULL);
496
497 mutex_enter(&ns->netstack_lock);
498 ASSERT(ns->netstack_numzones > 0);
499 ns->netstack_numzones--;
500 if (ns->netstack_numzones != 0) {
501 /* Stack instance being used by other zone */
502 mutex_exit(&ns->netstack_lock);
503 ASSERT(ns->netstack_stackid == GLOBAL_NETSTACKID);
504 /* Record that we a shared stack zone has gone away */
505 netstack_shared_zone_remove(zoneid);
506 return;
507 }
508 /*
509 * Set CLOSING so that netstack_find_by will not find it.
510 */
511 ns->netstack_flags |= NSF_CLOSING;
512 mutex_exit(&ns->netstack_lock);
513 DTRACE_PROBE1(netstack__dec__numzones, netstack_t *, ns);
514 /* No other thread can call zone_destroy for this stack */
515
516 /*
517 * Decrease refcnt to account for the one in netstack_zone_init()
518 */
519 netstack_rele(ns);
520 }
521
522 /*
523 * Called when the reference count drops to zero.
524 * Call the destroy functions for each registered module.
525 */
526 static void
527 netstack_stack_inactive(netstack_t *ns)
528 {
529 int i;
530
531 mutex_enter(&netstack_g_lock);
532 mutex_enter(&ns->netstack_lock);
533 /*
534 * Mark this netstack as having a DESTROY running so
535 * any netstack_register/netstack_unregister waits for
536 * the existing destroy callbacks to complete in reverse moduleid order
537 */
538 ASSERT(!(ns->netstack_flags & NSF_ZONE_INPROGRESS));
539 ns->netstack_flags |= NSF_ZONE_DESTROY;
540 /*
541 * If the shutdown callback wasn't called earlier (e.g., if this is
542 * a netstack shared between multiple zones), then we schedule it now.
543 *
544 * Determine the set of stacks that exist before we drop the lock.
545 * Set NSS_DESTROY_NEEDED for each of those. That
546 * ensures that when we return all the callbacks for existing
547 * instances have completed.
548 */
549 for (i = 0; i < NS_MAX; i++) {
550 nm_state_t *nms = &ns->netstack_m_state[i];
551
552 if ((ns_reg[i].nr_flags & NRF_REGISTERED) &&
553 ns_reg[i].nr_shutdown != NULL &&
554 (nms->nms_flags & NSS_CREATE_COMPLETED) &&
555 (nms->nms_flags & NSS_SHUTDOWN_ALL) == 0) {
556 nms->nms_flags |= NSS_SHUTDOWN_NEEDED;
557 DTRACE_PROBE2(netstack__shutdown__needed,
558 netstack_t *, ns, int, i);
559 }
560
561 if ((ns_reg[i].nr_flags & NRF_REGISTERED) &&
562 ns_reg[i].nr_destroy != NULL &&
563 (nms->nms_flags & NSS_CREATE_COMPLETED) &&
564 (nms->nms_flags & NSS_DESTROY_ALL) == 0) {
565 nms->nms_flags |= NSS_DESTROY_NEEDED;
566 DTRACE_PROBE2(netstack__destroy__needed,
567 netstack_t *, ns, int, i);
568 }
569 }
570 mutex_exit(&ns->netstack_lock);
571 mutex_exit(&netstack_g_lock);
572
573 /*
574 * Call the shutdown and destroy functions for all registered modules
575 * for this netstack.
576 *
577 * Since there are some ordering dependencies between the modules we
578 * tear them down in the reverse order of what was used to create them.
579 *
580 * Since a netstack_t is never reused (when a zone is rebooted it gets
581 * a new zoneid == netstackid i.e. a new netstack_t is allocated) we
582 * leave nms_flags the way it is i.e. with NSS_DESTROY_COMPLETED set.
583 * That is different than in the netstack_unregister() case.
584 */
585 apply_all_modules_reverse(ns, netstack_apply_shutdown);
586 apply_all_modules_reverse(ns, netstack_apply_destroy);
587
588 /* Tell any waiting netstack_register/netstack_unregister to proceed */
589 mutex_enter(&ns->netstack_lock);
590 ASSERT(ns->netstack_flags & NSF_ZONE_DESTROY);
591 ns->netstack_flags &= ~NSF_ZONE_DESTROY;
592 cv_broadcast(&ns->netstack_cv);
593 mutex_exit(&ns->netstack_lock);
594 }
595
596 /*
597 * Apply a function to all netstacks for a particular moduleid.
598 *
599 * If there is any zone activity (due to a zone being created, shutdown,
600 * or destroyed) we wait for that to complete before we proceed. This ensures
601 * that the moduleids are processed in order when a zone is created or
602 * destroyed.
603 *
604 * The applyfn has to drop netstack_g_lock if it does some work.
605 * In that case we don't follow netstack_next,
606 * even if it is possible to do so without any hazards. This is
607 * because we want the design to allow for the list of netstacks threaded
608 * by netstack_next to change in any arbitrary way during the time the
609 * lock was dropped.
610 *
611 * It is safe to restart the loop at netstack_head since the applyfn
612 * changes netstack_m_state as it processes things, so a subsequent
613 * pass through will have no effect in applyfn, hence the loop will terminate
614 * in at worst O(N^2).
615 */
616 static void
617 apply_all_netstacks(int moduleid, applyfn_t *applyfn)
618 {
619 netstack_t *ns;
620
621 mutex_enter(&netstack_g_lock);
622 ns = netstack_head;
623 while (ns != NULL) {
624 if (wait_for_zone_creator(ns, &netstack_g_lock)) {
625 /* Lock dropped - restart at head */
626 ns = netstack_head;
627 } else if ((applyfn)(&netstack_g_lock, ns, moduleid)) {
628 /* Lock dropped - restart at head */
629 ns = netstack_head;
630 } else {
631 ns = ns->netstack_next;
632 }
633 }
634 mutex_exit(&netstack_g_lock);
635 }
636
637 /*
638 * Apply a function to all moduleids for a particular netstack.
639 *
640 * Since the netstack linkage doesn't matter in this case we can
641 * ignore whether the function drops the lock.
642 */
643 static void
644 apply_all_modules(netstack_t *ns, applyfn_t *applyfn)
645 {
646 int i;
647
648 mutex_enter(&netstack_g_lock);
649 for (i = 0; i < NS_MAX; i++) {
650 /*
651 * We don't care whether the lock was dropped
652 * since we are not iterating over netstack_head.
653 */
654 (void) (applyfn)(&netstack_g_lock, ns, i);
655 }
656 mutex_exit(&netstack_g_lock);
657 }
658
659 /* Like the above but in reverse moduleid order */
660 static void
661 apply_all_modules_reverse(netstack_t *ns, applyfn_t *applyfn)
662 {
663 int i;
664
665 mutex_enter(&netstack_g_lock);
666 for (i = NS_MAX-1; i >= 0; i--) {
667 /*
668 * We don't care whether the lock was dropped
669 * since we are not iterating over netstack_head.
670 */
671 (void) (applyfn)(&netstack_g_lock, ns, i);
672 }
673 mutex_exit(&netstack_g_lock);
674 }
675
676 /*
677 * Call the create function for the ns and moduleid if CREATE_NEEDED
678 * is set.
679 * If some other thread gets here first and sets *_INPROGRESS, then
680 * we wait for that thread to complete so that we can ensure that
681 * all the callbacks are done when we've looped over all netstacks/moduleids.
682 *
683 * When we call the create function, we temporarily drop the netstack_lock
684 * held by the caller, and return true to tell the caller it needs to
685 * re-evalute the state.
686 */
687 static boolean_t
688 netstack_apply_create(kmutex_t *lockp, netstack_t *ns, int moduleid)
689 {
690 void *result;
691 netstackid_t stackid;
692 nm_state_t *nms = &ns->netstack_m_state[moduleid];
693 boolean_t dropped = B_FALSE;
694
695 ASSERT(MUTEX_HELD(lockp));
696 mutex_enter(&ns->netstack_lock);
697
698 if (wait_for_nms_inprogress(ns, nms, lockp))
699 dropped = B_TRUE;
700
701 if (nms->nms_flags & NSS_CREATE_NEEDED) {
702 nms->nms_flags &= ~NSS_CREATE_NEEDED;
703 nms->nms_flags |= NSS_CREATE_INPROGRESS;
704 DTRACE_PROBE2(netstack__create__inprogress,
705 netstack_t *, ns, int, moduleid);
706 mutex_exit(&ns->netstack_lock);
707 mutex_exit(lockp);
708 dropped = B_TRUE;
709
710 ASSERT(ns_reg[moduleid].nr_create != NULL);
711 stackid = ns->netstack_stackid;
712 DTRACE_PROBE2(netstack__create__start,
713 netstackid_t, stackid,
714 netstack_t *, ns);
715 result = (ns_reg[moduleid].nr_create)(stackid, ns);
716 DTRACE_PROBE2(netstack__create__end,
717 void *, result, netstack_t *, ns);
718
719 ASSERT(result != NULL);
720 mutex_enter(lockp);
721 mutex_enter(&ns->netstack_lock);
722 ns->netstack_modules[moduleid] = result;
723 nms->nms_flags &= ~NSS_CREATE_INPROGRESS;
724 nms->nms_flags |= NSS_CREATE_COMPLETED;
725 cv_broadcast(&nms->nms_cv);
726 DTRACE_PROBE2(netstack__create__completed,
727 netstack_t *, ns, int, moduleid);
728 mutex_exit(&ns->netstack_lock);
729 return (dropped);
730 } else {
731 mutex_exit(&ns->netstack_lock);
732 return (dropped);
733 }
734 }
735
736 /*
737 * Call the shutdown function for the ns and moduleid if SHUTDOWN_NEEDED
738 * is set.
739 * If some other thread gets here first and sets *_INPROGRESS, then
740 * we wait for that thread to complete so that we can ensure that
741 * all the callbacks are done when we've looped over all netstacks/moduleids.
742 *
743 * When we call the shutdown function, we temporarily drop the netstack_lock
744 * held by the caller, and return true to tell the caller it needs to
745 * re-evalute the state.
746 */
747 static boolean_t
748 netstack_apply_shutdown(kmutex_t *lockp, netstack_t *ns, int moduleid)
749 {
750 netstackid_t stackid;
751 void * netstack_module;
752 nm_state_t *nms = &ns->netstack_m_state[moduleid];
753 boolean_t dropped = B_FALSE;
754
755 ASSERT(MUTEX_HELD(lockp));
756 mutex_enter(&ns->netstack_lock);
757
758 if (wait_for_nms_inprogress(ns, nms, lockp))
759 dropped = B_TRUE;
760
761 if (nms->nms_flags & NSS_SHUTDOWN_NEEDED) {
762 nms->nms_flags &= ~NSS_SHUTDOWN_NEEDED;
763 nms->nms_flags |= NSS_SHUTDOWN_INPROGRESS;
764 DTRACE_PROBE2(netstack__shutdown__inprogress,
765 netstack_t *, ns, int, moduleid);
766 mutex_exit(&ns->netstack_lock);
767 mutex_exit(lockp);
768 dropped = B_TRUE;
769
770 ASSERT(ns_reg[moduleid].nr_shutdown != NULL);
771 stackid = ns->netstack_stackid;
772 netstack_module = ns->netstack_modules[moduleid];
773 DTRACE_PROBE2(netstack__shutdown__start,
774 netstackid_t, stackid,
775 void *, netstack_module);
776 (ns_reg[moduleid].nr_shutdown)(stackid, netstack_module);
777 DTRACE_PROBE1(netstack__shutdown__end,
778 netstack_t *, ns);
779
780 mutex_enter(lockp);
781 mutex_enter(&ns->netstack_lock);
782 nms->nms_flags &= ~NSS_SHUTDOWN_INPROGRESS;
783 nms->nms_flags |= NSS_SHUTDOWN_COMPLETED;
784 cv_broadcast(&nms->nms_cv);
785 DTRACE_PROBE2(netstack__shutdown__completed,
786 netstack_t *, ns, int, moduleid);
787 mutex_exit(&ns->netstack_lock);
788 return (dropped);
789 } else {
790 mutex_exit(&ns->netstack_lock);
791 return (dropped);
792 }
793 }
794
795 /*
796 * Call the destroy function for the ns and moduleid if DESTROY_NEEDED
797 * is set.
798 * If some other thread gets here first and sets *_INPROGRESS, then
799 * we wait for that thread to complete so that we can ensure that
800 * all the callbacks are done when we've looped over all netstacks/moduleids.
801 *
802 * When we call the destroy function, we temporarily drop the netstack_lock
803 * held by the caller, and return true to tell the caller it needs to
804 * re-evalute the state.
805 */
806 static boolean_t
807 netstack_apply_destroy(kmutex_t *lockp, netstack_t *ns, int moduleid)
808 {
809 netstackid_t stackid;
810 void * netstack_module;
811 nm_state_t *nms = &ns->netstack_m_state[moduleid];
812 boolean_t dropped = B_FALSE;
813
814 ASSERT(MUTEX_HELD(lockp));
815 mutex_enter(&ns->netstack_lock);
816
817 if (wait_for_nms_inprogress(ns, nms, lockp))
818 dropped = B_TRUE;
819
820 if (nms->nms_flags & NSS_DESTROY_NEEDED) {
821 nms->nms_flags &= ~NSS_DESTROY_NEEDED;
822 nms->nms_flags |= NSS_DESTROY_INPROGRESS;
823 DTRACE_PROBE2(netstack__destroy__inprogress,
824 netstack_t *, ns, int, moduleid);
825 mutex_exit(&ns->netstack_lock);
826 mutex_exit(lockp);
827 dropped = B_TRUE;
828
829 ASSERT(ns_reg[moduleid].nr_destroy != NULL);
830 stackid = ns->netstack_stackid;
831 netstack_module = ns->netstack_modules[moduleid];
832 DTRACE_PROBE2(netstack__destroy__start,
833 netstackid_t, stackid,
834 void *, netstack_module);
835 (ns_reg[moduleid].nr_destroy)(stackid, netstack_module);
836 DTRACE_PROBE1(netstack__destroy__end,
837 netstack_t *, ns);
838
839 mutex_enter(lockp);
840 mutex_enter(&ns->netstack_lock);
841 ns->netstack_modules[moduleid] = NULL;
842 nms->nms_flags &= ~NSS_DESTROY_INPROGRESS;
843 nms->nms_flags |= NSS_DESTROY_COMPLETED;
844 cv_broadcast(&nms->nms_cv);
845 DTRACE_PROBE2(netstack__destroy__completed,
846 netstack_t *, ns, int, moduleid);
847 mutex_exit(&ns->netstack_lock);
848 return (dropped);
849 } else {
850 mutex_exit(&ns->netstack_lock);
851 return (dropped);
852 }
853 }
854
855 /*
856 * If somebody is creating the netstack (due to a new zone being created)
857 * then we wait for them to complete. This ensures that any additional
858 * netstack_register() doesn't cause the create functions to run out of
859 * order.
860 * Note that we do not need such a global wait in the case of the shutdown
861 * and destroy callbacks, since in that case it is sufficient for both
862 * threads to set NEEDED and wait for INPROGRESS to ensure ordering.
863 * Returns true if lockp was temporarily dropped while waiting.
864 */
865 static boolean_t
866 wait_for_zone_creator(netstack_t *ns, kmutex_t *lockp)
867 {
868 boolean_t dropped = B_FALSE;
869
870 mutex_enter(&ns->netstack_lock);
871 while (ns->netstack_flags & NSF_ZONE_CREATE) {
872 DTRACE_PROBE1(netstack__wait__zone__inprogress,
873 netstack_t *, ns);
874 if (lockp != NULL) {
875 dropped = B_TRUE;
876 mutex_exit(lockp);
877 }
878 cv_wait(&ns->netstack_cv, &ns->netstack_lock);
879 if (lockp != NULL) {
880 /* First drop netstack_lock to preserve order */
881 mutex_exit(&ns->netstack_lock);
882 mutex_enter(lockp);
883 mutex_enter(&ns->netstack_lock);
884 }
885 }
886 mutex_exit(&ns->netstack_lock);
887 return (dropped);
888 }
889
890 /*
891 * Wait for any INPROGRESS flag to be cleared for the netstack/moduleid
892 * combination.
893 * Returns true if lockp was temporarily dropped while waiting.
894 */
895 static boolean_t
896 wait_for_nms_inprogress(netstack_t *ns, nm_state_t *nms, kmutex_t *lockp)
897 {
898 boolean_t dropped = B_FALSE;
899
900 while (nms->nms_flags & NSS_ALL_INPROGRESS) {
901 DTRACE_PROBE2(netstack__wait__nms__inprogress,
902 netstack_t *, ns, nm_state_t *, nms);
903 if (lockp != NULL) {
904 dropped = B_TRUE;
905 mutex_exit(lockp);
906 }
907 cv_wait(&nms->nms_cv, &ns->netstack_lock);
908 if (lockp != NULL) {
909 /* First drop netstack_lock to preserve order */
910 mutex_exit(&ns->netstack_lock);
911 mutex_enter(lockp);
912 mutex_enter(&ns->netstack_lock);
913 }
914 }
915 return (dropped);
916 }
917
918 /*
919 * Get the stack instance used in caller's zone.
920 * Increases the reference count, caller must do a netstack_rele.
921 * It can't be called after zone_destroy() has started.
922 */
923 netstack_t *
924 netstack_get_current(void)
925 {
926 netstack_t *ns;
927
928 ns = curproc->p_zone->zone_netstack;
929 ASSERT(ns != NULL);
930 if (ns->netstack_flags & (NSF_UNINIT|NSF_CLOSING))
931 return (NULL);
932
933 netstack_hold(ns);
934
935 return (ns);
936 }
937
938 /*
939 * Find a stack instance given the cred.
940 * This is used by the modules to potentially allow for a future when
941 * something other than the zoneid is used to determine the stack.
942 */
943 netstack_t *
944 netstack_find_by_cred(const cred_t *cr)
945 {
946 zoneid_t zoneid = crgetzoneid(cr);
947
948 /* Handle the case when cr_zone is NULL */
949 if (zoneid == (zoneid_t)-1)
950 zoneid = GLOBAL_ZONEID;
951
952 /* For performance ... */
953 if (curproc->p_zone->zone_id == zoneid)
954 return (netstack_get_current());
955 else
956 return (netstack_find_by_zoneid(zoneid));
957 }
958
959 /*
960 * Find a stack instance given the zoneid.
961 * Increases the reference count if found; caller must do a
962 * netstack_rele().
963 *
964 * If there is no exact match then assume the shared stack instance
965 * matches.
966 *
967 * Skip the unitialized ones.
968 */
969 netstack_t *
970 netstack_find_by_zoneid(zoneid_t zoneid)
971 {
972 netstack_t *ns;
973 zone_t *zone;
974
975 zone = zone_find_by_id(zoneid);
976
977 if (zone == NULL)
978 return (NULL);
979
980 ns = zone->zone_netstack;
981 ASSERT(ns != NULL);
982 if (ns->netstack_flags & (NSF_UNINIT|NSF_CLOSING))
983 ns = NULL;
984 else
985 netstack_hold(ns);
986
987 zone_rele(zone);
988 return (ns);
989 }
990
991 /*
992 * Find a stack instance given the zoneid. Can only be called from
993 * the create callback. See the comments in zone_find_by_id_nolock why
994 * that limitation exists.
995 *
996 * Increases the reference count if found; caller must do a
997 * netstack_rele().
998 *
999 * If there is no exact match then assume the shared stack instance
1000 * matches.
1001 *
1002 * Skip the unitialized ones.
1003 */
1004 netstack_t *
1005 netstack_find_by_zoneid_nolock(zoneid_t zoneid)
1006 {
1007 netstack_t *ns;
1008 zone_t *zone;
1009
1010 zone = zone_find_by_id_nolock(zoneid);
1011
1012 if (zone == NULL)
1013 return (NULL);
1014
1015 ns = zone->zone_netstack;
1016 ASSERT(ns != NULL);
1017
1018 if (ns->netstack_flags & (NSF_UNINIT|NSF_CLOSING))
1019 ns = NULL;
1020 else
1021 netstack_hold(ns);
1022
1023 /* zone_find_by_id_nolock does not have a hold on the zone */
1024 return (ns);
1025 }
1026
1027 /*
1028 * Find a stack instance given the stackid with exact match?
1029 * Increases the reference count if found; caller must do a
1030 * netstack_rele().
1031 *
1032 * Skip the unitialized ones.
1033 */
1034 netstack_t *
1035 netstack_find_by_stackid(netstackid_t stackid)
1036 {
1037 netstack_t *ns;
1038
1039 mutex_enter(&netstack_g_lock);
1040 for (ns = netstack_head; ns != NULL; ns = ns->netstack_next) {
1041 mutex_enter(&ns->netstack_lock);
1042 if (ns->netstack_stackid == stackid &&
1043 !(ns->netstack_flags & (NSF_UNINIT|NSF_CLOSING))) {
1044 mutex_exit(&ns->netstack_lock);
1045 netstack_hold(ns);
1046 mutex_exit(&netstack_g_lock);
1047 return (ns);
1048 }
1049 mutex_exit(&ns->netstack_lock);
1050 }
1051 mutex_exit(&netstack_g_lock);
1052 return (NULL);
1053 }
1054
1055 boolean_t
1056 netstack_inuse_by_stackid(netstackid_t stackid)
1057 {
1058 netstack_t *ns;
1059 boolean_t rval = B_FALSE;
1060
1061 mutex_enter(&netstack_g_lock);
1062
1063 for (ns = netstack_head; ns != NULL; ns = ns->netstack_next) {
1064 if (ns->netstack_stackid == stackid) {
1065 rval = B_TRUE;
1066 break;
1067 }
1068 }
1069
1070 mutex_exit(&netstack_g_lock);
1071
1072 return (rval);
1073 }
1074
1075
1076 static void
1077 netstack_reap(void *arg)
1078 {
1079 netstack_t **nsp, *ns = (netstack_t *)arg;
1080 boolean_t found;
1081 int i;
1082
1083 /*
1084 * Time to call the destroy functions and free up
1085 * the structure
1086 */
1087 netstack_stack_inactive(ns);
1088
1089 /* Make sure nothing increased the references */
1090 ASSERT(ns->netstack_refcnt == 0);
1091 ASSERT(ns->netstack_numzones == 0);
1092
1093 /* Finally remove from list of netstacks */
1094 mutex_enter(&netstack_g_lock);
1095 found = B_FALSE;
1096 for (nsp = &netstack_head; *nsp != NULL;
1097 nsp = &(*nsp)->netstack_next) {
1098 if (*nsp == ns) {
1099 *nsp = ns->netstack_next;
1100 ns->netstack_next = NULL;
1101 found = B_TRUE;
1102 break;
1103 }
1104 }
1105 ASSERT(found);
1106 mutex_exit(&netstack_g_lock);
1107
1108 /* Make sure nothing increased the references */
1109 ASSERT(ns->netstack_refcnt == 0);
1110 ASSERT(ns->netstack_numzones == 0);
1111
1112 ASSERT(ns->netstack_flags & NSF_CLOSING);
1113
1114 for (i = 0; i < NS_MAX; i++) {
1115 nm_state_t *nms = &ns->netstack_m_state[i];
1116
1117 cv_destroy(&nms->nms_cv);
1118 }
1119 mutex_destroy(&ns->netstack_lock);
1120 cv_destroy(&ns->netstack_cv);
1121 kmem_free(ns, sizeof (*ns));
1122 /* Allow another reap to be scheduled. */
1123 sema_v(&netstack_reap_limiter);
1124 }
1125
1126 void
1127 netstack_rele(netstack_t *ns)
1128 {
1129 int refcnt, numzones;
1130
1131 mutex_enter(&ns->netstack_lock);
1132 ASSERT(ns->netstack_refcnt > 0);
1133 ns->netstack_refcnt--;
1134 /*
1135 * As we drop the lock additional netstack_rele()s can come in
1136 * and decrement the refcnt to zero and free the netstack_t.
1137 * Store pointers in local variables and if we were not the last
1138 * then don't reference the netstack_t after that.
1139 */
1140 refcnt = ns->netstack_refcnt;
1141 numzones = ns->netstack_numzones;
1142 DTRACE_PROBE1(netstack__dec__ref, netstack_t *, ns);
1143 mutex_exit(&ns->netstack_lock);
1144
1145 if (refcnt == 0 && numzones == 0) {
1146 /*
1147 * Because there are possibilities of re-entrancy in various
1148 * netstack structures by callers, which might cause a lock up
1149 * due to odd reference models, or other factors, we choose to
1150 * schedule the actual deletion of this netstack as a deferred
1151 * task on the system taskq. This way, any such reference
1152 * models won't trip over themselves.
1153 *
1154 * Assume we aren't in a high-priority interrupt context, so
1155 * we can use KM_SLEEP and semaphores.
1156 */
1157 if (sema_tryp(&netstack_reap_limiter) == 0) {
1158 /*
1159 * Indicate we're slamming against a limit.
1160 */
1161 hrtime_t measurement = gethrtime();
1162
1163 sema_p(&netstack_reap_limiter);
1164 /* Capture delay in ns. */
1165 DTRACE_PROBE1(netstack__reap__rate__limited,
1166 hrtime_t, gethrtime() - measurement);
1167 }
1168
1169 /* TQ_SLEEP should prevent taskq_dispatch() from failing. */
1170 (void) taskq_dispatch(system_taskq, netstack_reap, ns,
1171 TQ_SLEEP);
1172 }
1173 }
1174
1175 void
1176 netstack_hold(netstack_t *ns)
1177 {
1178 mutex_enter(&ns->netstack_lock);
1179 ns->netstack_refcnt++;
1180 ASSERT(ns->netstack_refcnt > 0);
1181 mutex_exit(&ns->netstack_lock);
1182 DTRACE_PROBE1(netstack__inc__ref, netstack_t *, ns);
1183 }
1184
1185 /*
1186 * To support kstat_create_netstack() using kstat_zone_add we need
1187 * to track both
1188 * - all zoneids that use the global/shared stack
1189 * - all kstats that have been added for the shared stack
1190 */
1191 kstat_t *
1192 kstat_create_netstack(char *ks_module, int ks_instance, char *ks_name,
1193 char *ks_class, uchar_t ks_type, uint_t ks_ndata, uchar_t ks_flags,
1194 netstackid_t ks_netstackid)
1195 {
1196 kstat_t *ks;
1197
1198 if (ks_netstackid == GLOBAL_NETSTACKID) {
1199 ks = kstat_create_zone(ks_module, ks_instance, ks_name,
1200 ks_class, ks_type, ks_ndata, ks_flags, GLOBAL_ZONEID);
1201 if (ks != NULL)
1202 netstack_shared_kstat_add(ks);
1203 return (ks);
1204 } else {
1205 zoneid_t zoneid = ks_netstackid;
1206
1207 return (kstat_create_zone(ks_module, ks_instance, ks_name,
1208 ks_class, ks_type, ks_ndata, ks_flags, zoneid));
1209 }
1210 }
1211
1212 void
1213 kstat_delete_netstack(kstat_t *ks, netstackid_t ks_netstackid)
1214 {
1215 if (ks_netstackid == GLOBAL_NETSTACKID) {
1216 netstack_shared_kstat_remove(ks);
1217 }
1218 kstat_delete(ks);
1219 }
1220
1221 static void
1222 netstack_shared_zone_add(zoneid_t zoneid)
1223 {
1224 struct shared_zone_list *sz;
1225 struct shared_kstat_list *sk;
1226
1227 sz = (struct shared_zone_list *)kmem_zalloc(sizeof (*sz), KM_SLEEP);
1228 sz->sz_zoneid = zoneid;
1229
1230 /* Insert in list */
1231 mutex_enter(&netstack_shared_lock);
1232 sz->sz_next = netstack_shared_zones;
1233 netstack_shared_zones = sz;
1234
1235 /*
1236 * Perform kstat_zone_add for each existing shared stack kstat.
1237 * Note: Holds netstack_shared_lock lock across kstat_zone_add.
1238 */
1239 for (sk = netstack_shared_kstats; sk != NULL; sk = sk->sk_next) {
1240 kstat_zone_add(sk->sk_kstat, zoneid);
1241 }
1242 mutex_exit(&netstack_shared_lock);
1243 }
1244
1245 static void
1246 netstack_shared_zone_remove(zoneid_t zoneid)
1247 {
1248 struct shared_zone_list **szp, *sz;
1249 struct shared_kstat_list *sk;
1250
1251 /* Find in list */
1252 mutex_enter(&netstack_shared_lock);
1253 sz = NULL;
1254 for (szp = &netstack_shared_zones; *szp != NULL;
1255 szp = &((*szp)->sz_next)) {
1256 if ((*szp)->sz_zoneid == zoneid) {
1257 sz = *szp;
1258 break;
1259 }
1260 }
1261 /* We must find it */
1262 ASSERT(sz != NULL);
1263 *szp = sz->sz_next;
1264 sz->sz_next = NULL;
1265
1266 /*
1267 * Perform kstat_zone_remove for each existing shared stack kstat.
1268 * Note: Holds netstack_shared_lock lock across kstat_zone_remove.
1269 */
1270 for (sk = netstack_shared_kstats; sk != NULL; sk = sk->sk_next) {
1271 kstat_zone_remove(sk->sk_kstat, zoneid);
1272 }
1273 mutex_exit(&netstack_shared_lock);
1274
1275 kmem_free(sz, sizeof (*sz));
1276 }
1277
1278 static void
1279 netstack_shared_kstat_add(kstat_t *ks)
1280 {
1281 struct shared_zone_list *sz;
1282 struct shared_kstat_list *sk;
1283
1284 sk = (struct shared_kstat_list *)kmem_zalloc(sizeof (*sk), KM_SLEEP);
1285 sk->sk_kstat = ks;
1286
1287 /* Insert in list */
1288 mutex_enter(&netstack_shared_lock);
1289 sk->sk_next = netstack_shared_kstats;
1290 netstack_shared_kstats = sk;
1291
1292 /*
1293 * Perform kstat_zone_add for each existing shared stack zone.
1294 * Note: Holds netstack_shared_lock lock across kstat_zone_add.
1295 */
1296 for (sz = netstack_shared_zones; sz != NULL; sz = sz->sz_next) {
1297 kstat_zone_add(ks, sz->sz_zoneid);
1298 }
1299 mutex_exit(&netstack_shared_lock);
1300 }
1301
1302 static void
1303 netstack_shared_kstat_remove(kstat_t *ks)
1304 {
1305 struct shared_zone_list *sz;
1306 struct shared_kstat_list **skp, *sk;
1307
1308 /* Find in list */
1309 mutex_enter(&netstack_shared_lock);
1310 sk = NULL;
1311 for (skp = &netstack_shared_kstats; *skp != NULL;
1312 skp = &((*skp)->sk_next)) {
1313 if ((*skp)->sk_kstat == ks) {
1314 sk = *skp;
1315 break;
1316 }
1317 }
1318 /* Must find it */
1319 ASSERT(sk != NULL);
1320 *skp = sk->sk_next;
1321 sk->sk_next = NULL;
1322
1323 /*
1324 * Perform kstat_zone_remove for each existing shared stack kstat.
1325 * Note: Holds netstack_shared_lock lock across kstat_zone_remove.
1326 */
1327 for (sz = netstack_shared_zones; sz != NULL; sz = sz->sz_next) {
1328 kstat_zone_remove(ks, sz->sz_zoneid);
1329 }
1330 mutex_exit(&netstack_shared_lock);
1331 kmem_free(sk, sizeof (*sk));
1332 }
1333
1334 /*
1335 * If a zoneid is part of the shared zone, return true
1336 */
1337 static boolean_t
1338 netstack_find_shared_zoneid(zoneid_t zoneid)
1339 {
1340 struct shared_zone_list *sz;
1341
1342 mutex_enter(&netstack_shared_lock);
1343 for (sz = netstack_shared_zones; sz != NULL; sz = sz->sz_next) {
1344 if (sz->sz_zoneid == zoneid) {
1345 mutex_exit(&netstack_shared_lock);
1346 return (B_TRUE);
1347 }
1348 }
1349 mutex_exit(&netstack_shared_lock);
1350 return (B_FALSE);
1351 }
1352
1353 /*
1354 * Hide the fact that zoneids and netstackids are allocated from
1355 * the same space in the current implementation.
1356 * We currently do not check that the stackid/zoneids are valid, since there
1357 * is no need for that. But this should only be done for ids that are
1358 * valid.
1359 */
1360 zoneid_t
1361 netstackid_to_zoneid(netstackid_t stackid)
1362 {
1363 return (stackid);
1364 }
1365
1366 netstackid_t
1367 zoneid_to_netstackid(zoneid_t zoneid)
1368 {
1369 if (netstack_find_shared_zoneid(zoneid))
1370 return (GLOBAL_ZONEID);
1371 else
1372 return (zoneid);
1373 }
1374
1375 zoneid_t
1376 netstack_get_zoneid(netstack_t *ns)
1377 {
1378 return (netstackid_to_zoneid(ns->netstack_stackid));
1379 }
1380
1381 /*
1382 * Simplistic support for walking all the handles.
1383 * Example usage:
1384 * netstack_handle_t nh;
1385 * netstack_t *ns;
1386 *
1387 * netstack_next_init(&nh);
1388 * while ((ns = netstack_next(&nh)) != NULL) {
1389 * do something;
1390 * netstack_rele(ns);
1391 * }
1392 * netstack_next_fini(&nh);
1393 */
1394 void
1395 netstack_next_init(netstack_handle_t *handle)
1396 {
1397 *handle = 0;
1398 }
1399
1400 /* ARGSUSED */
1401 void
1402 netstack_next_fini(netstack_handle_t *handle)
1403 {
1404 }
1405
1406 netstack_t *
1407 netstack_next(netstack_handle_t *handle)
1408 {
1409 netstack_t *ns;
1410 int i, end;
1411
1412 end = *handle;
1413 /* Walk skipping *handle number of instances */
1414
1415 /* Look if there is a matching stack instance */
1416 mutex_enter(&netstack_g_lock);
1417 ns = netstack_head;
1418 for (i = 0; i < end; i++) {
1419 if (ns == NULL)
1420 break;
1421 ns = ns->netstack_next;
1422 }
1423 /* skip those with that aren't really here */
1424 while (ns != NULL) {
1425 mutex_enter(&ns->netstack_lock);
1426 if ((ns->netstack_flags & (NSF_UNINIT|NSF_CLOSING)) == 0) {
1427 mutex_exit(&ns->netstack_lock);
1428 break;
1429 }
1430 mutex_exit(&ns->netstack_lock);
1431 end++;
1432 ns = ns->netstack_next;
1433 }
1434 if (ns != NULL) {
1435 *handle = end + 1;
1436 netstack_hold(ns);
1437 }
1438 mutex_exit(&netstack_g_lock);
1439 return (ns);
1440 }