1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
25 * Copyright (c) 2016, Joyent, Inc. All rights reserved.
26 */
27
28 #include <sys/param.h>
29 #include <sys/sysmacros.h>
30 #include <sys/vm.h>
31 #include <sys/proc.h>
32 #include <sys/tuneable.h>
33 #include <sys/systm.h>
34 #include <sys/cmn_err.h>
35 #include <sys/debug.h>
36 #include <sys/sdt.h>
37 #include <sys/mutex.h>
38 #include <sys/bitmap.h>
39 #include <sys/atomic.h>
40 #include <sys/sunddi.h>
41 #include <sys/kobj.h>
42 #include <sys/disp.h>
43 #include <vm/seg_kmem.h>
44 #include <sys/zone.h>
45 #include <sys/netstack.h>
46
47 /*
48 * What we use so that the zones framework can tell us about new zones,
49 * which we use to create new stacks.
50 */
51 static zone_key_t netstack_zone_key;
52
53 static int netstack_initialized = 0;
54
55 /*
56 * Track the registered netstacks.
57 * The global lock protects
58 * - ns_reg
59 * - the list starting at netstack_head and following the netstack_next
60 * pointers.
61 */
62 static kmutex_t netstack_g_lock;
63
64 /*
65 * Registry of netstacks with their create/shutdown/destory functions.
66 */
67 static struct netstack_registry ns_reg[NS_MAX];
68
69 /*
70 * Global list of existing stacks. We use this when a new zone with
71 * an exclusive IP instance is created.
72 *
73 * Note that in some cases a netstack_t needs to stay around after the zone
74 * has gone away. This is because there might be outstanding references
75 * (from TCP TIME_WAIT connections, IPsec state, etc). The netstack_t data
76 * structure and all the foo_stack_t's hanging off of it will be cleaned up
77 * when the last reference to it is dropped.
78 * However, the same zone might be rebooted. That is handled using the
79 * assumption that the zones framework picks a new zoneid each time a zone
80 * is (re)booted. We assert for that condition in netstack_zone_create().
81 * Thus the old netstack_t can take its time for things to time out.
82 */
83 static netstack_t *netstack_head;
84
85 /*
86 * To support kstat_create_netstack() using kstat_zone_add we need
87 * to track both
88 * - all zoneids that use the global/shared stack
89 * - all kstats that have been added for the shared stack
90 */
91 struct shared_zone_list {
92 struct shared_zone_list *sz_next;
93 zoneid_t sz_zoneid;
94 };
95
96 struct shared_kstat_list {
97 struct shared_kstat_list *sk_next;
98 kstat_t *sk_kstat;
99 };
100
101 static kmutex_t netstack_shared_lock; /* protects the following two */
102 static struct shared_zone_list *netstack_shared_zones;
103 static struct shared_kstat_list *netstack_shared_kstats;
104
105 static void *netstack_zone_create(zoneid_t zoneid);
106 static void netstack_zone_shutdown(zoneid_t zoneid, void *arg);
107 static void netstack_zone_destroy(zoneid_t zoneid, void *arg);
108
109 static void netstack_shared_zone_add(zoneid_t zoneid);
110 static void netstack_shared_zone_remove(zoneid_t zoneid);
111 static void netstack_shared_kstat_add(kstat_t *ks);
112 static void netstack_shared_kstat_remove(kstat_t *ks);
113
114 typedef boolean_t applyfn_t(kmutex_t *, netstack_t *, int);
115
116 static void apply_all_netstacks(int, applyfn_t *);
117 static void apply_all_modules(netstack_t *, applyfn_t *);
118 static void apply_all_modules_reverse(netstack_t *, applyfn_t *);
119 static boolean_t netstack_apply_create(kmutex_t *, netstack_t *, int);
120 static boolean_t netstack_apply_shutdown(kmutex_t *, netstack_t *, int);
121 static boolean_t netstack_apply_destroy(kmutex_t *, netstack_t *, int);
122 static boolean_t wait_for_zone_creator(netstack_t *, kmutex_t *);
123 static boolean_t wait_for_nms_inprogress(netstack_t *, nm_state_t *,
124 kmutex_t *);
125
126 static void netstack_hold_locked(netstack_t *);
127 static void netstack_reap_work(netstack_t *, boolean_t);
128 ksema_t netstack_reap_limiter;
129
130 void
131 netstack_init(void)
132 {
133 mutex_init(&netstack_g_lock, NULL, MUTEX_DEFAULT, NULL);
134 mutex_init(&netstack_shared_lock, NULL, MUTEX_DEFAULT, NULL);
135
136 /* XXX KEBE SAYS hard-coded constant needs to be fixed. */
137 sema_init(&netstack_reap_limiter, 1024, NULL, SEMA_DRIVER, NULL);
138
139 netstack_initialized = 1;
140
141 /*
142 * We want to be informed each time a zone is created or
143 * destroyed in the kernel, so we can maintain the
144 * stack instance information.
145 */
146 zone_key_create(&netstack_zone_key, netstack_zone_create,
147 netstack_zone_shutdown, netstack_zone_destroy);
148 }
149
150 /*
151 * Register a new module with the framework.
152 * This registers interest in changes to the set of netstacks.
153 * The createfn and destroyfn are required, but the shutdownfn can be
154 * NULL.
155 * Note that due to the current zsd implementation, when the create
156 * function is called the zone isn't fully present, thus functions
157 * like zone_find_by_* will fail, hence the create function can not
158 * use many zones kernel functions including zcmn_err().
159 */
160 void
161 netstack_register(int moduleid,
162 void *(*module_create)(netstackid_t, netstack_t *),
163 void (*module_shutdown)(netstackid_t, void *),
164 void (*module_destroy)(netstackid_t, void *))
165 {
166 netstack_t *ns;
167
168 ASSERT(netstack_initialized);
169 ASSERT(moduleid >= 0 && moduleid < NS_MAX);
170 ASSERT(module_create != NULL);
171
172 /*
173 * Make instances created after this point in time run the create
174 * callback.
175 */
176 mutex_enter(&netstack_g_lock);
177 ASSERT(ns_reg[moduleid].nr_create == NULL);
178 ASSERT(ns_reg[moduleid].nr_flags == 0);
179 ns_reg[moduleid].nr_create = module_create;
180 ns_reg[moduleid].nr_shutdown = module_shutdown;
181 ns_reg[moduleid].nr_destroy = module_destroy;
182 ns_reg[moduleid].nr_flags = NRF_REGISTERED;
183
184 /*
185 * Determine the set of stacks that exist before we drop the lock.
186 * Set NSS_CREATE_NEEDED for each of those.
187 * netstacks which have been deleted will have NSS_CREATE_COMPLETED
188 * set, but check NSF_CLOSING to be sure.
189 */
190 for (ns = netstack_head; ns != NULL; ns = ns->netstack_next) {
191 nm_state_t *nms = &ns->netstack_m_state[moduleid];
192
193 mutex_enter(&ns->netstack_lock);
194 if (!(ns->netstack_flags & NSF_CLOSING) &&
195 (nms->nms_flags & NSS_CREATE_ALL) == 0) {
196 nms->nms_flags |= NSS_CREATE_NEEDED;
197 DTRACE_PROBE2(netstack__create__needed,
198 netstack_t *, ns, int, moduleid);
199 }
200 mutex_exit(&ns->netstack_lock);
201 }
202 mutex_exit(&netstack_g_lock);
203
204 /*
205 * At this point in time a new instance can be created or an instance
206 * can be destroyed, or some other module can register or unregister.
207 * Make sure we either run all the create functions for this moduleid
208 * or we wait for any other creators for this moduleid.
209 */
210 apply_all_netstacks(moduleid, netstack_apply_create);
211 }
212
213 void
214 netstack_unregister(int moduleid)
215 {
216 netstack_t *ns;
217
218 ASSERT(moduleid >= 0 && moduleid < NS_MAX);
219
220 ASSERT(ns_reg[moduleid].nr_create != NULL);
221 ASSERT(ns_reg[moduleid].nr_flags & NRF_REGISTERED);
222
223 mutex_enter(&netstack_g_lock);
224 /*
225 * Determine the set of stacks that exist before we drop the lock.
226 * Set NSS_SHUTDOWN_NEEDED and NSS_DESTROY_NEEDED for each of those.
227 * That ensures that when we return all the callbacks for existing
228 * instances have completed. And since we set NRF_DYING no new
229 * instances can use this module.
230 */
231 for (ns = netstack_head; ns != NULL; ns = ns->netstack_next) {
232 boolean_t created = B_FALSE;
233 nm_state_t *nms = &ns->netstack_m_state[moduleid];
234
235 mutex_enter(&ns->netstack_lock);
236
237 /*
238 * We need to be careful here. We could actually have a netstack
239 * being created as we speak waiting for us to let go of this
240 * lock to proceed. It may have set NSS_CREATE_NEEDED, but not
241 * have gotten to the point of completing it yet. If
242 * NSS_CREATE_NEEDED, we can safely just remove it here and
243 * never create the module. However, if NSS_CREATE_INPROGRESS is
244 * set, we need to still flag this module for shutdown and
245 * deletion, just as though it had reached NSS_CREATE_COMPLETED.
246 *
247 * It is safe to do that because of two different guarantees
248 * that exist in the system. The first is that before we do a
249 * create, shutdown, or destroy, we ensure that nothing else is
250 * in progress in the system for this netstack and wait for it
251 * to complete. Secondly, because the zone is being created, we
252 * know that the following call to apply_all_netstack will block
253 * on the zone finishing its initialization.
254 */
255 if (nms->nms_flags & NSS_CREATE_NEEDED)
256 nms->nms_flags &= ~NSS_CREATE_NEEDED;
257
258 if (nms->nms_flags & NSS_CREATE_INPROGRESS ||
259 nms->nms_flags & NSS_CREATE_COMPLETED)
260 created = B_TRUE;
261
262 if (ns_reg[moduleid].nr_shutdown != NULL && created &&
263 (nms->nms_flags & NSS_CREATE_COMPLETED) &&
264 (nms->nms_flags & NSS_SHUTDOWN_ALL) == 0) {
265 nms->nms_flags |= NSS_SHUTDOWN_NEEDED;
266 DTRACE_PROBE2(netstack__shutdown__needed,
267 netstack_t *, ns, int, moduleid);
268 }
269 if ((ns_reg[moduleid].nr_flags & NRF_REGISTERED) &&
270 ns_reg[moduleid].nr_destroy != NULL && created &&
271 (nms->nms_flags & NSS_DESTROY_ALL) == 0) {
272 nms->nms_flags |= NSS_DESTROY_NEEDED;
273 DTRACE_PROBE2(netstack__destroy__needed,
274 netstack_t *, ns, int, moduleid);
275 }
276 mutex_exit(&ns->netstack_lock);
277 }
278 /*
279 * Prevent any new netstack from calling the registered create
280 * function, while keeping the function pointers in place until the
281 * shutdown and destroy callbacks are complete.
282 */
283 ns_reg[moduleid].nr_flags |= NRF_DYING;
284 mutex_exit(&netstack_g_lock);
285
286 apply_all_netstacks(moduleid, netstack_apply_shutdown);
287 apply_all_netstacks(moduleid, netstack_apply_destroy);
288
289 /*
290 * Clear the nms_flags so that we can handle this module
291 * being loaded again.
292 * Also remove the registered functions.
293 */
294 mutex_enter(&netstack_g_lock);
295 ASSERT(ns_reg[moduleid].nr_flags & NRF_REGISTERED);
296 ASSERT(ns_reg[moduleid].nr_flags & NRF_DYING);
297 for (ns = netstack_head; ns != NULL; ns = ns->netstack_next) {
298 nm_state_t *nms = &ns->netstack_m_state[moduleid];
299
300 mutex_enter(&ns->netstack_lock);
301 if (nms->nms_flags & NSS_DESTROY_COMPLETED) {
302 nms->nms_flags = 0;
303 DTRACE_PROBE2(netstack__destroy__done,
304 netstack_t *, ns, int, moduleid);
305 }
306 mutex_exit(&ns->netstack_lock);
307 }
308
309 ns_reg[moduleid].nr_create = NULL;
310 ns_reg[moduleid].nr_shutdown = NULL;
311 ns_reg[moduleid].nr_destroy = NULL;
312 ns_reg[moduleid].nr_flags = 0;
313 mutex_exit(&netstack_g_lock);
314 }
315
316 /*
317 * Lookup and/or allocate a netstack for this zone.
318 */
319 static void *
320 netstack_zone_create(zoneid_t zoneid)
321 {
322 netstackid_t stackid;
323 netstack_t *ns;
324 netstack_t **nsp;
325 zone_t *zone;
326 int i;
327
328 ASSERT(netstack_initialized);
329
330 zone = zone_find_by_id_nolock(zoneid);
331 ASSERT(zone != NULL);
332
333 if (zone->zone_flags & ZF_NET_EXCL) {
334 stackid = zoneid;
335 } else {
336 /* Look for the stack instance for the global */
337 stackid = GLOBAL_NETSTACKID;
338 }
339
340 /* Allocate even if it isn't needed; simplifies locking */
341 ns = (netstack_t *)kmem_zalloc(sizeof (netstack_t), KM_SLEEP);
342
343 /* Look if there is a matching stack instance */
344 mutex_enter(&netstack_g_lock);
345 for (nsp = &netstack_head; *nsp != NULL;
346 nsp = &((*nsp)->netstack_next)) {
347 if ((*nsp)->netstack_stackid == stackid) {
348 /*
349 * Should never find a pre-existing exclusive stack
350 */
351 VERIFY(stackid == GLOBAL_NETSTACKID);
352 kmem_free(ns, sizeof (netstack_t));
353 ns = *nsp;
354 mutex_enter(&ns->netstack_lock);
355 ns->netstack_numzones++;
356 mutex_exit(&ns->netstack_lock);
357 mutex_exit(&netstack_g_lock);
358 DTRACE_PROBE1(netstack__inc__numzones,
359 netstack_t *, ns);
360 /* Record that we have a new shared stack zone */
361 netstack_shared_zone_add(zoneid);
362 zone->zone_netstack = ns;
363 return (ns);
364 }
365 }
366 /* Not found */
367 mutex_init(&ns->netstack_lock, NULL, MUTEX_DEFAULT, NULL);
368 cv_init(&ns->netstack_cv, NULL, CV_DEFAULT, NULL);
369 ns->netstack_stackid = zoneid;
370 ns->netstack_numzones = 1;
371 ns->netstack_refcnt = 1; /* Decremented by netstack_zone_destroy */
372 ns->netstack_flags = NSF_UNINIT;
373 *nsp = ns;
374 zone->zone_netstack = ns;
375
376 mutex_enter(&ns->netstack_lock);
377 /*
378 * Mark this netstack as having a CREATE running so
379 * any netstack_register/netstack_unregister waits for
380 * the existing create callbacks to complete in moduleid order
381 */
382 ns->netstack_flags |= NSF_ZONE_CREATE;
383
384 /*
385 * Determine the set of module create functions that need to be
386 * called before we drop the lock.
387 * Set NSS_CREATE_NEEDED for each of those.
388 * Skip any with NRF_DYING set, since those are in the process of
389 * going away, by checking for flags being exactly NRF_REGISTERED.
390 */
391 for (i = 0; i < NS_MAX; i++) {
392 nm_state_t *nms = &ns->netstack_m_state[i];
393
394 cv_init(&nms->nms_cv, NULL, CV_DEFAULT, NULL);
395
396 if ((ns_reg[i].nr_flags == NRF_REGISTERED) &&
397 (nms->nms_flags & NSS_CREATE_ALL) == 0) {
398 nms->nms_flags |= NSS_CREATE_NEEDED;
399 DTRACE_PROBE2(netstack__create__needed,
400 netstack_t *, ns, int, i);
401 }
402 }
403 mutex_exit(&ns->netstack_lock);
404 mutex_exit(&netstack_g_lock);
405
406 apply_all_modules(ns, netstack_apply_create);
407
408 /* Tell any waiting netstack_register/netstack_unregister to proceed */
409 mutex_enter(&ns->netstack_lock);
410 ns->netstack_flags &= ~NSF_UNINIT;
411 ASSERT(ns->netstack_flags & NSF_ZONE_CREATE);
412 ns->netstack_flags &= ~NSF_ZONE_CREATE;
413 cv_broadcast(&ns->netstack_cv);
414 mutex_exit(&ns->netstack_lock);
415
416 return (ns);
417 }
418
419 /* ARGSUSED */
420 static void
421 netstack_zone_shutdown(zoneid_t zoneid, void *arg)
422 {
423 netstack_t *ns = (netstack_t *)arg;
424 int i;
425
426 ASSERT(arg != NULL);
427
428 mutex_enter(&ns->netstack_lock);
429 ASSERT(ns->netstack_numzones > 0);
430 if (ns->netstack_numzones != 1) {
431 /* Stack instance being used by other zone */
432 mutex_exit(&ns->netstack_lock);
433 ASSERT(ns->netstack_stackid == GLOBAL_NETSTACKID);
434 return;
435 }
436 mutex_exit(&ns->netstack_lock);
437
438 mutex_enter(&netstack_g_lock);
439 mutex_enter(&ns->netstack_lock);
440 /*
441 * Mark this netstack as having a SHUTDOWN running so
442 * any netstack_register/netstack_unregister waits for
443 * the existing create callbacks to complete in moduleid order
444 */
445 ASSERT(!(ns->netstack_flags & NSF_ZONE_INPROGRESS));
446 ns->netstack_flags |= NSF_ZONE_SHUTDOWN;
447
448 /*
449 * Determine the set of stacks that exist before we drop the lock.
450 * Set NSS_SHUTDOWN_NEEDED for each of those.
451 */
452 for (i = 0; i < NS_MAX; i++) {
453 nm_state_t *nms = &ns->netstack_m_state[i];
454
455 if ((ns_reg[i].nr_flags & NRF_REGISTERED) &&
456 ns_reg[i].nr_shutdown != NULL &&
457 (nms->nms_flags & NSS_CREATE_COMPLETED) &&
458 (nms->nms_flags & NSS_SHUTDOWN_ALL) == 0) {
459 nms->nms_flags |= NSS_SHUTDOWN_NEEDED;
460 DTRACE_PROBE2(netstack__shutdown__needed,
461 netstack_t *, ns, int, i);
462 }
463 }
464 mutex_exit(&ns->netstack_lock);
465 mutex_exit(&netstack_g_lock);
466
467 /*
468 * Call the shutdown function for all registered modules for this
469 * netstack.
470 */
471 apply_all_modules_reverse(ns, netstack_apply_shutdown);
472
473 /* Tell any waiting netstack_register/netstack_unregister to proceed */
474 mutex_enter(&ns->netstack_lock);
475 ASSERT(ns->netstack_flags & NSF_ZONE_SHUTDOWN);
476 ns->netstack_flags &= ~NSF_ZONE_SHUTDOWN;
477 cv_broadcast(&ns->netstack_cv);
478 mutex_exit(&ns->netstack_lock);
479 }
480
481 /*
482 * Common routine to release a zone.
483 * If this was the last zone using the stack instance then prepare to
484 * have the refcnt dropping to zero free the zone.
485 */
486 /* ARGSUSED */
487 static void
488 netstack_zone_destroy(zoneid_t zoneid, void *arg)
489 {
490 netstack_t *ns = (netstack_t *)arg;
491
492 ASSERT(arg != NULL);
493
494 mutex_enter(&ns->netstack_lock);
495 ASSERT(ns->netstack_numzones > 0);
496 ns->netstack_numzones--;
497 if (ns->netstack_numzones != 0) {
498 /* Stack instance being used by other zone */
499 mutex_exit(&ns->netstack_lock);
500 ASSERT(ns->netstack_stackid == GLOBAL_NETSTACKID);
501 /* Record that we a shared stack zone has gone away */
502 netstack_shared_zone_remove(zoneid);
503 return;
504 }
505 /*
506 * Set CLOSING so that netstack_find_by will not find it.
507 */
508 ns->netstack_flags |= NSF_CLOSING;
509 mutex_exit(&ns->netstack_lock);
510 DTRACE_PROBE1(netstack__dec__numzones, netstack_t *, ns);
511 /* No other thread can call zone_destroy for this stack */
512
513 /*
514 * Decrease refcnt to account for the one in netstack_zone_init()
515 */
516 netstack_rele(ns);
517 }
518
519 /*
520 * Called when the reference count drops to zero.
521 * Call the destroy functions for each registered module.
522 */
523 static void
524 netstack_stack_inactive(netstack_t *ns)
525 {
526 int i;
527
528 mutex_enter(&netstack_g_lock);
529 mutex_enter(&ns->netstack_lock);
530 /*
531 * Mark this netstack as having a DESTROY running so
532 * any netstack_register/netstack_unregister waits for
533 * the existing destroy callbacks to complete in reverse moduleid order
534 */
535 ASSERT(!(ns->netstack_flags & NSF_ZONE_INPROGRESS));
536 ns->netstack_flags |= NSF_ZONE_DESTROY;
537 /*
538 * If the shutdown callback wasn't called earlier (e.g., if this is
539 * a netstack shared between multiple zones), then we schedule it now.
540 *
541 * Determine the set of stacks that exist before we drop the lock.
542 * Set NSS_DESTROY_NEEDED for each of those. That
543 * ensures that when we return all the callbacks for existing
544 * instances have completed.
545 */
546 for (i = 0; i < NS_MAX; i++) {
547 nm_state_t *nms = &ns->netstack_m_state[i];
548
549 if ((ns_reg[i].nr_flags & NRF_REGISTERED) &&
550 ns_reg[i].nr_shutdown != NULL &&
551 (nms->nms_flags & NSS_CREATE_COMPLETED) &&
552 (nms->nms_flags & NSS_SHUTDOWN_ALL) == 0) {
553 nms->nms_flags |= NSS_SHUTDOWN_NEEDED;
554 DTRACE_PROBE2(netstack__shutdown__needed,
555 netstack_t *, ns, int, i);
556 }
557
558 if ((ns_reg[i].nr_flags & NRF_REGISTERED) &&
559 ns_reg[i].nr_destroy != NULL &&
560 (nms->nms_flags & NSS_CREATE_COMPLETED) &&
561 (nms->nms_flags & NSS_DESTROY_ALL) == 0) {
562 nms->nms_flags |= NSS_DESTROY_NEEDED;
563 DTRACE_PROBE2(netstack__destroy__needed,
564 netstack_t *, ns, int, i);
565 }
566 }
567 mutex_exit(&ns->netstack_lock);
568 mutex_exit(&netstack_g_lock);
569
570 /*
571 * Call the shutdown and destroy functions for all registered modules
572 * for this netstack.
573 *
574 * Since there are some ordering dependencies between the modules we
575 * tear them down in the reverse order of what was used to create them.
576 *
577 * Since a netstack_t is never reused (when a zone is rebooted it gets
578 * a new zoneid == netstackid i.e. a new netstack_t is allocated) we
579 * leave nms_flags the way it is i.e. with NSS_DESTROY_COMPLETED set.
580 * That is different than in the netstack_unregister() case.
581 */
582 apply_all_modules_reverse(ns, netstack_apply_shutdown);
583 apply_all_modules_reverse(ns, netstack_apply_destroy);
584
585 /* Tell any waiting netstack_register/netstack_unregister to proceed */
586 mutex_enter(&ns->netstack_lock);
587 ASSERT(ns->netstack_flags & NSF_ZONE_DESTROY);
588 ns->netstack_flags &= ~NSF_ZONE_DESTROY;
589 cv_broadcast(&ns->netstack_cv);
590 mutex_exit(&ns->netstack_lock);
591 }
592
593 /*
594 * Apply a function to all netstacks for a particular moduleid.
595 *
596 * If there is any zone activity (due to a zone being created, shutdown,
597 * or destroyed) we wait for that to complete before we proceed. This ensures
598 * that the moduleids are processed in order when a zone is created or
599 * destroyed.
600 *
601 * The applyfn has to drop netstack_g_lock if it does some work.
602 * In that case we don't follow netstack_next,
603 * even if it is possible to do so without any hazards. This is
604 * because we want the design to allow for the list of netstacks threaded
605 * by netstack_next to change in any arbitrary way during the time the
606 * lock was dropped.
607 *
608 * It is safe to restart the loop at netstack_head since the applyfn
609 * changes netstack_m_state as it processes things, so a subsequent
610 * pass through will have no effect in applyfn, hence the loop will terminate
611 * in at worst O(N^2).
612 */
613 static void
614 apply_all_netstacks(int moduleid, applyfn_t *applyfn)
615 {
616 netstack_t *ns;
617
618 mutex_enter(&netstack_g_lock);
619 ns = netstack_head;
620 while (ns != NULL) {
621 if (wait_for_zone_creator(ns, &netstack_g_lock)) {
622 /* Lock dropped - restart at head */
623 ns = netstack_head;
624 } else if ((applyfn)(&netstack_g_lock, ns, moduleid)) {
625 /* Lock dropped - restart at head */
626 ns = netstack_head;
627 } else {
628 ns = ns->netstack_next;
629 }
630 }
631 mutex_exit(&netstack_g_lock);
632 }
633
634 /*
635 * Apply a function to all moduleids for a particular netstack.
636 *
637 * Since the netstack linkage doesn't matter in this case we can
638 * ignore whether the function drops the lock.
639 */
640 static void
641 apply_all_modules(netstack_t *ns, applyfn_t *applyfn)
642 {
643 int i;
644
645 mutex_enter(&netstack_g_lock);
646 for (i = 0; i < NS_MAX; i++) {
647 /*
648 * We don't care whether the lock was dropped
649 * since we are not iterating over netstack_head.
650 */
651 (void) (applyfn)(&netstack_g_lock, ns, i);
652 }
653 mutex_exit(&netstack_g_lock);
654 }
655
656 /* Like the above but in reverse moduleid order */
657 static void
658 apply_all_modules_reverse(netstack_t *ns, applyfn_t *applyfn)
659 {
660 int i;
661
662 mutex_enter(&netstack_g_lock);
663 for (i = NS_MAX-1; i >= 0; i--) {
664 /*
665 * We don't care whether the lock was dropped
666 * since we are not iterating over netstack_head.
667 */
668 (void) (applyfn)(&netstack_g_lock, ns, i);
669 }
670 mutex_exit(&netstack_g_lock);
671 }
672
673 /*
674 * Call the create function for the ns and moduleid if CREATE_NEEDED
675 * is set.
676 * If some other thread gets here first and sets *_INPROGRESS, then
677 * we wait for that thread to complete so that we can ensure that
678 * all the callbacks are done when we've looped over all netstacks/moduleids.
679 *
680 * When we call the create function, we temporarily drop the netstack_lock
681 * held by the caller, and return true to tell the caller it needs to
682 * re-evalute the state.
683 */
684 static boolean_t
685 netstack_apply_create(kmutex_t *lockp, netstack_t *ns, int moduleid)
686 {
687 void *result;
688 netstackid_t stackid;
689 nm_state_t *nms = &ns->netstack_m_state[moduleid];
690 boolean_t dropped = B_FALSE;
691
692 ASSERT(MUTEX_HELD(lockp));
693 mutex_enter(&ns->netstack_lock);
694
695 if (wait_for_nms_inprogress(ns, nms, lockp))
696 dropped = B_TRUE;
697
698 if (nms->nms_flags & NSS_CREATE_NEEDED) {
699 nms->nms_flags &= ~NSS_CREATE_NEEDED;
700 nms->nms_flags |= NSS_CREATE_INPROGRESS;
701 DTRACE_PROBE2(netstack__create__inprogress,
702 netstack_t *, ns, int, moduleid);
703 mutex_exit(&ns->netstack_lock);
704 mutex_exit(lockp);
705 dropped = B_TRUE;
706
707 ASSERT(ns_reg[moduleid].nr_create != NULL);
708 stackid = ns->netstack_stackid;
709 DTRACE_PROBE2(netstack__create__start,
710 netstackid_t, stackid,
711 netstack_t *, ns);
712 result = (ns_reg[moduleid].nr_create)(stackid, ns);
713 DTRACE_PROBE2(netstack__create__end,
714 void *, result, netstack_t *, ns);
715
716 ASSERT(result != NULL);
717 mutex_enter(lockp);
718 mutex_enter(&ns->netstack_lock);
719 ns->netstack_modules[moduleid] = result;
720 nms->nms_flags &= ~NSS_CREATE_INPROGRESS;
721 nms->nms_flags |= NSS_CREATE_COMPLETED;
722 cv_broadcast(&nms->nms_cv);
723 DTRACE_PROBE2(netstack__create__completed,
724 netstack_t *, ns, int, moduleid);
725 mutex_exit(&ns->netstack_lock);
726 return (dropped);
727 } else {
728 mutex_exit(&ns->netstack_lock);
729 return (dropped);
730 }
731 }
732
733 /*
734 * Call the shutdown function for the ns and moduleid if SHUTDOWN_NEEDED
735 * is set.
736 * If some other thread gets here first and sets *_INPROGRESS, then
737 * we wait for that thread to complete so that we can ensure that
738 * all the callbacks are done when we've looped over all netstacks/moduleids.
739 *
740 * When we call the shutdown function, we temporarily drop the netstack_lock
741 * held by the caller, and return true to tell the caller it needs to
742 * re-evalute the state.
743 */
744 static boolean_t
745 netstack_apply_shutdown(kmutex_t *lockp, netstack_t *ns, int moduleid)
746 {
747 netstackid_t stackid;
748 void * netstack_module;
749 nm_state_t *nms = &ns->netstack_m_state[moduleid];
750 boolean_t dropped = B_FALSE;
751
752 ASSERT(MUTEX_HELD(lockp));
753 mutex_enter(&ns->netstack_lock);
754
755 if (wait_for_nms_inprogress(ns, nms, lockp))
756 dropped = B_TRUE;
757
758 if (nms->nms_flags & NSS_SHUTDOWN_NEEDED) {
759 nms->nms_flags &= ~NSS_SHUTDOWN_NEEDED;
760 nms->nms_flags |= NSS_SHUTDOWN_INPROGRESS;
761 DTRACE_PROBE2(netstack__shutdown__inprogress,
762 netstack_t *, ns, int, moduleid);
763 mutex_exit(&ns->netstack_lock);
764 mutex_exit(lockp);
765 dropped = B_TRUE;
766
767 ASSERT(ns_reg[moduleid].nr_shutdown != NULL);
768 stackid = ns->netstack_stackid;
769 netstack_module = ns->netstack_modules[moduleid];
770 DTRACE_PROBE2(netstack__shutdown__start,
771 netstackid_t, stackid,
772 void *, netstack_module);
773 (ns_reg[moduleid].nr_shutdown)(stackid, netstack_module);
774 DTRACE_PROBE1(netstack__shutdown__end,
775 netstack_t *, ns);
776
777 mutex_enter(lockp);
778 mutex_enter(&ns->netstack_lock);
779 nms->nms_flags &= ~NSS_SHUTDOWN_INPROGRESS;
780 nms->nms_flags |= NSS_SHUTDOWN_COMPLETED;
781 cv_broadcast(&nms->nms_cv);
782 DTRACE_PROBE2(netstack__shutdown__completed,
783 netstack_t *, ns, int, moduleid);
784 mutex_exit(&ns->netstack_lock);
785 return (dropped);
786 } else {
787 mutex_exit(&ns->netstack_lock);
788 return (dropped);
789 }
790 }
791
792 /*
793 * Call the destroy function for the ns and moduleid if DESTROY_NEEDED
794 * is set.
795 * If some other thread gets here first and sets *_INPROGRESS, then
796 * we wait for that thread to complete so that we can ensure that
797 * all the callbacks are done when we've looped over all netstacks/moduleids.
798 *
799 * When we call the destroy function, we temporarily drop the netstack_lock
800 * held by the caller, and return true to tell the caller it needs to
801 * re-evalute the state.
802 */
803 static boolean_t
804 netstack_apply_destroy(kmutex_t *lockp, netstack_t *ns, int moduleid)
805 {
806 netstackid_t stackid;
807 void * netstack_module;
808 nm_state_t *nms = &ns->netstack_m_state[moduleid];
809 boolean_t dropped = B_FALSE;
810
811 ASSERT(MUTEX_HELD(lockp));
812 mutex_enter(&ns->netstack_lock);
813
814 if (wait_for_nms_inprogress(ns, nms, lockp))
815 dropped = B_TRUE;
816
817 if (nms->nms_flags & NSS_DESTROY_NEEDED) {
818 nms->nms_flags &= ~NSS_DESTROY_NEEDED;
819 nms->nms_flags |= NSS_DESTROY_INPROGRESS;
820 DTRACE_PROBE2(netstack__destroy__inprogress,
821 netstack_t *, ns, int, moduleid);
822 mutex_exit(&ns->netstack_lock);
823 mutex_exit(lockp);
824 dropped = B_TRUE;
825
826 ASSERT(ns_reg[moduleid].nr_destroy != NULL);
827 stackid = ns->netstack_stackid;
828 netstack_module = ns->netstack_modules[moduleid];
829 DTRACE_PROBE2(netstack__destroy__start,
830 netstackid_t, stackid,
831 void *, netstack_module);
832 (ns_reg[moduleid].nr_destroy)(stackid, netstack_module);
833 DTRACE_PROBE1(netstack__destroy__end,
834 netstack_t *, ns);
835
836 mutex_enter(lockp);
837 mutex_enter(&ns->netstack_lock);
838 ns->netstack_modules[moduleid] = NULL;
839 nms->nms_flags &= ~NSS_DESTROY_INPROGRESS;
840 nms->nms_flags |= NSS_DESTROY_COMPLETED;
841 cv_broadcast(&nms->nms_cv);
842 DTRACE_PROBE2(netstack__destroy__completed,
843 netstack_t *, ns, int, moduleid);
844 mutex_exit(&ns->netstack_lock);
845 return (dropped);
846 } else {
847 mutex_exit(&ns->netstack_lock);
848 return (dropped);
849 }
850 }
851
852 /*
853 * If somebody is creating the netstack (due to a new zone being created)
854 * then we wait for them to complete. This ensures that any additional
855 * netstack_register() doesn't cause the create functions to run out of
856 * order.
857 * Note that we do not need such a global wait in the case of the shutdown
858 * and destroy callbacks, since in that case it is sufficient for both
859 * threads to set NEEDED and wait for INPROGRESS to ensure ordering.
860 * Returns true if lockp was temporarily dropped while waiting.
861 */
862 static boolean_t
863 wait_for_zone_creator(netstack_t *ns, kmutex_t *lockp)
864 {
865 boolean_t dropped = B_FALSE;
866
867 mutex_enter(&ns->netstack_lock);
868 while (ns->netstack_flags & NSF_ZONE_CREATE) {
869 DTRACE_PROBE1(netstack__wait__zone__inprogress,
870 netstack_t *, ns);
871 if (lockp != NULL) {
872 dropped = B_TRUE;
873 mutex_exit(lockp);
874 }
875 cv_wait(&ns->netstack_cv, &ns->netstack_lock);
876 if (lockp != NULL) {
877 /* First drop netstack_lock to preserve order */
878 mutex_exit(&ns->netstack_lock);
879 mutex_enter(lockp);
880 mutex_enter(&ns->netstack_lock);
881 }
882 }
883 mutex_exit(&ns->netstack_lock);
884 return (dropped);
885 }
886
887 /*
888 * Wait for any INPROGRESS flag to be cleared for the netstack/moduleid
889 * combination.
890 * Returns true if lockp was temporarily dropped while waiting.
891 */
892 static boolean_t
893 wait_for_nms_inprogress(netstack_t *ns, nm_state_t *nms, kmutex_t *lockp)
894 {
895 boolean_t dropped = B_FALSE;
896
897 while (nms->nms_flags & NSS_ALL_INPROGRESS) {
898 DTRACE_PROBE2(netstack__wait__nms__inprogress,
899 netstack_t *, ns, nm_state_t *, nms);
900 if (lockp != NULL) {
901 dropped = B_TRUE;
902 mutex_exit(lockp);
903 }
904 cv_wait(&nms->nms_cv, &ns->netstack_lock);
905 if (lockp != NULL) {
906 /* First drop netstack_lock to preserve order */
907 mutex_exit(&ns->netstack_lock);
908 mutex_enter(lockp);
909 mutex_enter(&ns->netstack_lock);
910 }
911 }
912 return (dropped);
913 }
914
915 /*
916 * Get the stack instance used in caller's zone.
917 * Increases the reference count, caller must do a netstack_rele.
918 * It can't be called after zone_destroy() has started.
919 */
920 netstack_t *
921 netstack_get_current(void)
922 {
923 netstack_t *ns;
924
925 ns = curproc->p_zone->zone_netstack;
926 ASSERT(ns != NULL);
927 if (ns->netstack_flags & (NSF_UNINIT|NSF_CLOSING))
928 return (NULL);
929
930 netstack_hold(ns);
931
932 return (ns);
933 }
934
935 /*
936 * Find a stack instance given the cred.
937 * This is used by the modules to potentially allow for a future when
938 * something other than the zoneid is used to determine the stack.
939 */
940 netstack_t *
941 netstack_find_by_cred(const cred_t *cr)
942 {
943 zoneid_t zoneid = crgetzoneid(cr);
944
945 /* Handle the case when cr_zone is NULL */
946 if (zoneid == (zoneid_t)-1)
947 zoneid = GLOBAL_ZONEID;
948
949 /* For performance ... */
950 if (curproc->p_zone->zone_id == zoneid)
951 return (netstack_get_current());
952 else
953 return (netstack_find_by_zoneid(zoneid));
954 }
955
956 /*
957 * Find a stack instance given the zoneid.
958 * Increases the reference count if found; caller must do a
959 * netstack_rele().
960 *
961 * If there is no exact match then assume the shared stack instance
962 * matches.
963 *
964 * Skip the unitialized ones.
965 */
966 netstack_t *
967 netstack_find_by_zoneid(zoneid_t zoneid)
968 {
969 netstack_t *ns;
970 zone_t *zone;
971
972 zone = zone_find_by_id(zoneid);
973
974 if (zone == NULL)
975 return (NULL);
976
977 ns = zone->zone_netstack;
978 ASSERT(ns != NULL);
979 if (ns->netstack_flags & (NSF_UNINIT|NSF_CLOSING))
980 ns = NULL;
981 else
982 netstack_hold(ns);
983
984 zone_rele(zone);
985 return (ns);
986 }
987
988 /*
989 * Find a stack instance given the zoneid. Can only be called from
990 * the create callback. See the comments in zone_find_by_id_nolock why
991 * that limitation exists.
992 *
993 * Increases the reference count if found; caller must do a
994 * netstack_rele().
995 *
996 * If there is no exact match then assume the shared stack instance
997 * matches.
998 *
999 * Skip the unitialized ones.
1000 */
1001 netstack_t *
1002 netstack_find_by_zoneid_nolock(zoneid_t zoneid)
1003 {
1004 netstack_t *ns;
1005 zone_t *zone;
1006
1007 zone = zone_find_by_id_nolock(zoneid);
1008
1009 if (zone == NULL)
1010 return (NULL);
1011
1012 ns = zone->zone_netstack;
1013 ASSERT(ns != NULL);
1014
1015 if (ns->netstack_flags & (NSF_UNINIT|NSF_CLOSING))
1016 ns = NULL;
1017 else
1018 netstack_hold(ns);
1019
1020 /* zone_find_by_id_nolock does not have a hold on the zone */
1021 return (ns);
1022 }
1023
1024 /*
1025 * Find a stack instance given the stackid with exact match?
1026 * Increases the reference count if found; caller must do a
1027 * netstack_rele().
1028 *
1029 * Skip the unitialized ones.
1030 */
1031 netstack_t *
1032 netstack_find_by_stackid(netstackid_t stackid)
1033 {
1034 netstack_t *ns;
1035
1036 mutex_enter(&netstack_g_lock);
1037 for (ns = netstack_head; ns != NULL; ns = ns->netstack_next) {
1038 mutex_enter(&ns->netstack_lock);
1039 if (ns->netstack_stackid == stackid &&
1040 !(ns->netstack_flags & (NSF_UNINIT|NSF_CLOSING))) {
1041 netstack_hold_locked(ns);
1042 mutex_exit(&ns->netstack_lock);
1043 mutex_exit(&netstack_g_lock);
1044 return (ns);
1045 }
1046 mutex_exit(&ns->netstack_lock);
1047 }
1048 mutex_exit(&netstack_g_lock);
1049 return (NULL);
1050 }
1051
1052 boolean_t
1053 netstack_inuse_by_stackid(netstackid_t stackid)
1054 {
1055 netstack_t *ns;
1056 boolean_t rval = B_FALSE;
1057
1058 mutex_enter(&netstack_g_lock);
1059
1060 for (ns = netstack_head; ns != NULL; ns = ns->netstack_next) {
1061 if (ns->netstack_stackid == stackid) {
1062 rval = B_TRUE;
1063 break;
1064 }
1065 }
1066
1067 mutex_exit(&netstack_g_lock);
1068
1069 return (rval);
1070 }
1071
1072
1073 static void
1074 netstack_reap(void *arg)
1075 {
1076 /* Indicate we took a semaphore to get here. */
1077 netstack_reap_work((netstack_t *)arg, B_TRUE);
1078 }
1079
1080 static void
1081 netstack_reap_intr(void *arg)
1082 {
1083 /* Indicate we did NOT TAKE a semaphore to get here. */
1084 netstack_reap_work((netstack_t *)arg, B_FALSE);
1085 }
1086
1087 static void
1088 netstack_reap_work(netstack_t *ns, boolean_t semaphore_signal)
1089 {
1090 netstack_t **nsp;
1091 boolean_t found;
1092 int i;
1093
1094 /*
1095 * Time to call the destroy functions and free up
1096 * the structure
1097 */
1098 netstack_stack_inactive(ns);
1099
1100 /* Make sure nothing increased the references */
1101 ASSERT(ns->netstack_refcnt == 0);
1102 ASSERT(ns->netstack_numzones == 0);
1103
1104 /* Finally remove from list of netstacks */
1105 mutex_enter(&netstack_g_lock);
1106 found = B_FALSE;
1107 for (nsp = &netstack_head; *nsp != NULL;
1108 nsp = &(*nsp)->netstack_next) {
1109 if (*nsp == ns) {
1110 *nsp = ns->netstack_next;
1111 ns->netstack_next = NULL;
1112 found = B_TRUE;
1113 break;
1114 }
1115 }
1116 ASSERT(found);
1117 mutex_exit(&netstack_g_lock);
1118
1119 /* Make sure nothing increased the references */
1120 ASSERT(ns->netstack_refcnt == 0);
1121 ASSERT(ns->netstack_numzones == 0);
1122
1123 ASSERT(ns->netstack_flags & NSF_CLOSING);
1124
1125 for (i = 0; i < NS_MAX; i++) {
1126 nm_state_t *nms = &ns->netstack_m_state[i];
1127
1128 cv_destroy(&nms->nms_cv);
1129 }
1130 mutex_destroy(&ns->netstack_lock);
1131 cv_destroy(&ns->netstack_cv);
1132 kmem_free(ns, sizeof (*ns));
1133 /* Allow another reap to be scheduled. */
1134 if (semaphore_signal)
1135 sema_v(&netstack_reap_limiter);
1136 }
1137
1138 void
1139 netstack_rele(netstack_t *ns)
1140 {
1141 int refcnt, numzones;
1142
1143 mutex_enter(&ns->netstack_lock);
1144 ASSERT(ns->netstack_refcnt > 0);
1145 ns->netstack_refcnt--;
1146 /*
1147 * As we drop the lock additional netstack_rele()s can come in
1148 * and decrement the refcnt to zero and free the netstack_t.
1149 * Store pointers in local variables and if we were not the last
1150 * then don't reference the netstack_t after that.
1151 */
1152 refcnt = ns->netstack_refcnt;
1153 numzones = ns->netstack_numzones;
1154 DTRACE_PROBE1(netstack__dec__ref, netstack_t *, ns);
1155 mutex_exit(&ns->netstack_lock);
1156
1157 if (refcnt == 0 && numzones == 0) {
1158 boolean_t is_not_intr = !servicing_interrupt();
1159
1160 /*
1161 * Because there are possibilities of kstats being held by
1162 * callers, which would then be immediately freed, but held up
1163 * due to kstat's odd reference model recording the thread, we
1164 * choose to schedule the actual deletion of this netstack as
1165 * a deferred task on the system taskq. This way, any
1166 * store-the-thread-pointer semantics won't trip over
1167 * themselves.
1168 *
1169 * On the off chance this is called in interrupt context, we
1170 * cannot use the semaphore to enforce rate-limiting.
1171 */
1172 if (is_not_intr && sema_tryp(&netstack_reap_limiter) == 0) {
1173 /*
1174 * XXX KEBE SAYS inidicate we're slamming against
1175 * a limit.
1176 */
1177 hrtime_t measurement = gethrtime();
1178
1179 sema_p(&netstack_reap_limiter);
1180 /* Caputre delay in ns. */
1181 DTRACE_PROBE1(netstack__reap__rate__limited,
1182 hrtime_t *, gethrtime() - measurement);
1183 }
1184
1185 if (taskq_dispatch(system_taskq,
1186 is_not_intr ? netstack_reap : netstack_reap_intr, ns,
1187 TQ_NOSLEEP) == NULL) {
1188 /*
1189 * Well shoot, why can't we taskq_dispatch?
1190 * Take our chances with a direct call.
1191 */
1192 DTRACE_PROBE1(netstack__reap__taskq__fail,
1193 netstack_t *, ns);
1194 netstack_reap_work(ns, is_not_intr);
1195 }
1196 }
1197 }
1198
1199 static void
1200 netstack_hold_locked(netstack_t *ns)
1201 {
1202 ASSERT(MUTEX_HELD(&ns->netstack_lock));
1203 ns->netstack_refcnt++;
1204 ASSERT(ns->netstack_refcnt > 0);
1205 DTRACE_PROBE1(netstack__inc__ref, netstack_t *, ns);
1206 }
1207
1208 void
1209 netstack_hold(netstack_t *ns)
1210 {
1211 mutex_enter(&ns->netstack_lock);
1212 netstack_hold_locked(ns);
1213 mutex_exit(&ns->netstack_lock);
1214 }
1215
1216 /*
1217 * To support kstat_create_netstack() using kstat_zone_add we need
1218 * to track both
1219 * - all zoneids that use the global/shared stack
1220 * - all kstats that have been added for the shared stack
1221 */
1222 kstat_t *
1223 kstat_create_netstack(char *ks_module, int ks_instance, char *ks_name,
1224 char *ks_class, uchar_t ks_type, uint_t ks_ndata, uchar_t ks_flags,
1225 netstackid_t ks_netstackid)
1226 {
1227 kstat_t *ks;
1228
1229 if (ks_netstackid == GLOBAL_NETSTACKID) {
1230 ks = kstat_create_zone(ks_module, ks_instance, ks_name,
1231 ks_class, ks_type, ks_ndata, ks_flags, GLOBAL_ZONEID);
1232 if (ks != NULL)
1233 netstack_shared_kstat_add(ks);
1234 return (ks);
1235 } else {
1236 zoneid_t zoneid = ks_netstackid;
1237
1238 return (kstat_create_zone(ks_module, ks_instance, ks_name,
1239 ks_class, ks_type, ks_ndata, ks_flags, zoneid));
1240 }
1241 }
1242
1243 void
1244 kstat_delete_netstack(kstat_t *ks, netstackid_t ks_netstackid)
1245 {
1246 if (ks_netstackid == GLOBAL_NETSTACKID) {
1247 netstack_shared_kstat_remove(ks);
1248 }
1249 kstat_delete(ks);
1250 }
1251
1252 static void
1253 netstack_shared_zone_add(zoneid_t zoneid)
1254 {
1255 struct shared_zone_list *sz;
1256 struct shared_kstat_list *sk;
1257
1258 sz = (struct shared_zone_list *)kmem_zalloc(sizeof (*sz), KM_SLEEP);
1259 sz->sz_zoneid = zoneid;
1260
1261 /* Insert in list */
1262 mutex_enter(&netstack_shared_lock);
1263 sz->sz_next = netstack_shared_zones;
1264 netstack_shared_zones = sz;
1265
1266 /*
1267 * Perform kstat_zone_add for each existing shared stack kstat.
1268 * Note: Holds netstack_shared_lock lock across kstat_zone_add.
1269 */
1270 for (sk = netstack_shared_kstats; sk != NULL; sk = sk->sk_next) {
1271 kstat_zone_add(sk->sk_kstat, zoneid);
1272 }
1273 mutex_exit(&netstack_shared_lock);
1274 }
1275
1276 static void
1277 netstack_shared_zone_remove(zoneid_t zoneid)
1278 {
1279 struct shared_zone_list **szp, *sz;
1280 struct shared_kstat_list *sk;
1281
1282 /* Find in list */
1283 mutex_enter(&netstack_shared_lock);
1284 sz = NULL;
1285 for (szp = &netstack_shared_zones; *szp != NULL;
1286 szp = &((*szp)->sz_next)) {
1287 if ((*szp)->sz_zoneid == zoneid) {
1288 sz = *szp;
1289 break;
1290 }
1291 }
1292 /* We must find it */
1293 ASSERT(sz != NULL);
1294 *szp = sz->sz_next;
1295 sz->sz_next = NULL;
1296
1297 /*
1298 * Perform kstat_zone_remove for each existing shared stack kstat.
1299 * Note: Holds netstack_shared_lock lock across kstat_zone_remove.
1300 */
1301 for (sk = netstack_shared_kstats; sk != NULL; sk = sk->sk_next) {
1302 kstat_zone_remove(sk->sk_kstat, zoneid);
1303 }
1304 mutex_exit(&netstack_shared_lock);
1305
1306 kmem_free(sz, sizeof (*sz));
1307 }
1308
1309 static void
1310 netstack_shared_kstat_add(kstat_t *ks)
1311 {
1312 struct shared_zone_list *sz;
1313 struct shared_kstat_list *sk;
1314
1315 sk = (struct shared_kstat_list *)kmem_zalloc(sizeof (*sk), KM_SLEEP);
1316 sk->sk_kstat = ks;
1317
1318 /* Insert in list */
1319 mutex_enter(&netstack_shared_lock);
1320 sk->sk_next = netstack_shared_kstats;
1321 netstack_shared_kstats = sk;
1322
1323 /*
1324 * Perform kstat_zone_add for each existing shared stack zone.
1325 * Note: Holds netstack_shared_lock lock across kstat_zone_add.
1326 */
1327 for (sz = netstack_shared_zones; sz != NULL; sz = sz->sz_next) {
1328 kstat_zone_add(ks, sz->sz_zoneid);
1329 }
1330 mutex_exit(&netstack_shared_lock);
1331 }
1332
1333 static void
1334 netstack_shared_kstat_remove(kstat_t *ks)
1335 {
1336 struct shared_zone_list *sz;
1337 struct shared_kstat_list **skp, *sk;
1338
1339 /* Find in list */
1340 mutex_enter(&netstack_shared_lock);
1341 sk = NULL;
1342 for (skp = &netstack_shared_kstats; *skp != NULL;
1343 skp = &((*skp)->sk_next)) {
1344 if ((*skp)->sk_kstat == ks) {
1345 sk = *skp;
1346 break;
1347 }
1348 }
1349 /* Must find it */
1350 ASSERT(sk != NULL);
1351 *skp = sk->sk_next;
1352 sk->sk_next = NULL;
1353
1354 /*
1355 * Perform kstat_zone_remove for each existing shared stack kstat.
1356 * Note: Holds netstack_shared_lock lock across kstat_zone_remove.
1357 */
1358 for (sz = netstack_shared_zones; sz != NULL; sz = sz->sz_next) {
1359 kstat_zone_remove(ks, sz->sz_zoneid);
1360 }
1361 mutex_exit(&netstack_shared_lock);
1362 kmem_free(sk, sizeof (*sk));
1363 }
1364
1365 /*
1366 * If a zoneid is part of the shared zone, return true
1367 */
1368 static boolean_t
1369 netstack_find_shared_zoneid(zoneid_t zoneid)
1370 {
1371 struct shared_zone_list *sz;
1372
1373 mutex_enter(&netstack_shared_lock);
1374 for (sz = netstack_shared_zones; sz != NULL; sz = sz->sz_next) {
1375 if (sz->sz_zoneid == zoneid) {
1376 mutex_exit(&netstack_shared_lock);
1377 return (B_TRUE);
1378 }
1379 }
1380 mutex_exit(&netstack_shared_lock);
1381 return (B_FALSE);
1382 }
1383
1384 /*
1385 * Hide the fact that zoneids and netstackids are allocated from
1386 * the same space in the current implementation.
1387 * We currently do not check that the stackid/zoneids are valid, since there
1388 * is no need for that. But this should only be done for ids that are
1389 * valid.
1390 */
1391 zoneid_t
1392 netstackid_to_zoneid(netstackid_t stackid)
1393 {
1394 return (stackid);
1395 }
1396
1397 netstackid_t
1398 zoneid_to_netstackid(zoneid_t zoneid)
1399 {
1400 if (netstack_find_shared_zoneid(zoneid))
1401 return (GLOBAL_ZONEID);
1402 else
1403 return (zoneid);
1404 }
1405
1406 zoneid_t
1407 netstack_get_zoneid(netstack_t *ns)
1408 {
1409 return (netstackid_to_zoneid(ns->netstack_stackid));
1410 }
1411
1412 /*
1413 * Simplistic support for walking all the handles.
1414 * Example usage:
1415 * netstack_handle_t nh;
1416 * netstack_t *ns;
1417 *
1418 * netstack_next_init(&nh);
1419 * while ((ns = netstack_next(&nh)) != NULL) {
1420 * do something;
1421 * netstack_rele(ns);
1422 * }
1423 * netstack_next_fini(&nh);
1424 */
1425 void
1426 netstack_next_init(netstack_handle_t *handle)
1427 {
1428 *handle = 0;
1429 }
1430
1431 /* ARGSUSED */
1432 void
1433 netstack_next_fini(netstack_handle_t *handle)
1434 {
1435 }
1436
1437 netstack_t *
1438 netstack_next(netstack_handle_t *handle)
1439 {
1440 netstack_t *ns;
1441 int i, end;
1442
1443 end = *handle;
1444 /* Walk skipping *handle number of instances */
1445
1446 /* Look if there is a matching stack instance */
1447 mutex_enter(&netstack_g_lock);
1448 ns = netstack_head;
1449 for (i = 0; i < end; i++) {
1450 if (ns == NULL)
1451 break;
1452 ns = ns->netstack_next;
1453 }
1454 /* skip those with that aren't really here */
1455 while (ns != NULL) {
1456 mutex_enter(&ns->netstack_lock);
1457 if ((ns->netstack_flags & (NSF_UNINIT|NSF_CLOSING)) == 0) {
1458 mutex_exit(&ns->netstack_lock);
1459 break;
1460 }
1461 mutex_exit(&ns->netstack_lock);
1462 end++;
1463 ns = ns->netstack_next;
1464 }
1465 if (ns != NULL) {
1466 *handle = end + 1;
1467 netstack_hold(ns);
1468 }
1469 mutex_exit(&netstack_g_lock);
1470 return (ns);
1471 }