1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  24  * Use is subject to license terms.
  25  * Copyright (c) 2016, Joyent, Inc.  All rights reserved.
  26  */
  27 
  28 #include <sys/param.h>
  29 #include <sys/sysmacros.h>
  30 #include <sys/vm.h>
  31 #include <sys/proc.h>
  32 #include <sys/tuneable.h>
  33 #include <sys/systm.h>
  34 #include <sys/cmn_err.h>
  35 #include <sys/debug.h>
  36 #include <sys/sdt.h>
  37 #include <sys/mutex.h>
  38 #include <sys/bitmap.h>
  39 #include <sys/atomic.h>
  40 #include <sys/kobj.h>
  41 #include <sys/disp.h>
  42 #include <vm/seg_kmem.h>
  43 #include <sys/zone.h>
  44 #include <sys/netstack.h>
  45 
  46 /*
  47  * What we use so that the zones framework can tell us about new zones,
  48  * which we use to create new stacks.
  49  */
  50 static zone_key_t netstack_zone_key;
  51 
  52 static int      netstack_initialized = 0;
  53 
  54 /*
  55  * Track the registered netstacks.
  56  * The global lock protects
  57  * - ns_reg
  58  * - the list starting at netstack_head and following the netstack_next
  59  *   pointers.
  60  */
  61 static kmutex_t netstack_g_lock;
  62 
  63 /*
  64  * Registry of netstacks with their create/shutdown/destory functions.
  65  */
  66 static struct netstack_registry ns_reg[NS_MAX];
  67 
  68 /*
  69  * Global list of existing stacks.  We use this when a new zone with
  70  * an exclusive IP instance is created.
  71  *
  72  * Note that in some cases a netstack_t needs to stay around after the zone
  73  * has gone away. This is because there might be outstanding references
  74  * (from TCP TIME_WAIT connections, IPsec state, etc). The netstack_t data
  75  * structure and all the foo_stack_t's hanging off of it will be cleaned up
  76  * when the last reference to it is dropped.
  77  * However, the same zone might be rebooted. That is handled using the
  78  * assumption that the zones framework picks a new zoneid each time a zone
  79  * is (re)booted. We assert for that condition in netstack_zone_create().
  80  * Thus the old netstack_t can take its time for things to time out.
  81  */
  82 static netstack_t *netstack_head;
  83 
  84 /*
  85  * To support kstat_create_netstack() using kstat_zone_add we need
  86  * to track both
  87  *  - all zoneids that use the global/shared stack
  88  *  - all kstats that have been added for the shared stack
  89  */
  90 struct shared_zone_list {
  91         struct shared_zone_list *sz_next;
  92         zoneid_t                sz_zoneid;
  93 };
  94 
  95 struct shared_kstat_list {
  96         struct shared_kstat_list *sk_next;
  97         kstat_t                  *sk_kstat;
  98 };
  99 
 100 static kmutex_t netstack_shared_lock;   /* protects the following two */
 101 static struct shared_zone_list  *netstack_shared_zones;
 102 static struct shared_kstat_list *netstack_shared_kstats;
 103 
 104 static void     *netstack_zone_create(zoneid_t zoneid);
 105 static void     netstack_zone_shutdown(zoneid_t zoneid, void *arg);
 106 static void     netstack_zone_destroy(zoneid_t zoneid, void *arg);
 107 
 108 static void     netstack_shared_zone_add(zoneid_t zoneid);
 109 static void     netstack_shared_zone_remove(zoneid_t zoneid);
 110 static void     netstack_shared_kstat_add(kstat_t *ks);
 111 static void     netstack_shared_kstat_remove(kstat_t *ks);
 112 
 113 typedef boolean_t applyfn_t(kmutex_t *, netstack_t *, int);
 114 
 115 static void     apply_all_netstacks(int, applyfn_t *);
 116 static void     apply_all_modules(netstack_t *, applyfn_t *);
 117 static void     apply_all_modules_reverse(netstack_t *, applyfn_t *);
 118 static boolean_t netstack_apply_create(kmutex_t *, netstack_t *, int);
 119 static boolean_t netstack_apply_shutdown(kmutex_t *, netstack_t *, int);
 120 static boolean_t netstack_apply_destroy(kmutex_t *, netstack_t *, int);
 121 static boolean_t wait_for_zone_creator(netstack_t *, kmutex_t *);
 122 static boolean_t wait_for_nms_inprogress(netstack_t *, nm_state_t *,
 123     kmutex_t *);
 124 
 125 void
 126 netstack_init(void)
 127 {
 128         mutex_init(&netstack_g_lock, NULL, MUTEX_DEFAULT, NULL);
 129         mutex_init(&netstack_shared_lock, NULL, MUTEX_DEFAULT, NULL);
 130 
 131         netstack_initialized = 1;
 132 
 133         /*
 134          * We want to be informed each time a zone is created or
 135          * destroyed in the kernel, so we can maintain the
 136          * stack instance information.
 137          */
 138         zone_key_create(&netstack_zone_key, netstack_zone_create,
 139             netstack_zone_shutdown, netstack_zone_destroy);
 140 }
 141 
 142 /*
 143  * Register a new module with the framework.
 144  * This registers interest in changes to the set of netstacks.
 145  * The createfn and destroyfn are required, but the shutdownfn can be
 146  * NULL.
 147  * Note that due to the current zsd implementation, when the create
 148  * function is called the zone isn't fully present, thus functions
 149  * like zone_find_by_* will fail, hence the create function can not
 150  * use many zones kernel functions including zcmn_err().
 151  */
 152 void
 153 netstack_register(int moduleid,
 154     void *(*module_create)(netstackid_t, netstack_t *),
 155     void (*module_shutdown)(netstackid_t, void *),
 156     void (*module_destroy)(netstackid_t, void *))
 157 {
 158         netstack_t *ns;
 159 
 160         ASSERT(netstack_initialized);
 161         ASSERT(moduleid >= 0 && moduleid < NS_MAX);
 162         ASSERT(module_create != NULL);
 163 
 164         /*
 165          * Make instances created after this point in time run the create
 166          * callback.
 167          */
 168         mutex_enter(&netstack_g_lock);
 169         ASSERT(ns_reg[moduleid].nr_create == NULL);
 170         ASSERT(ns_reg[moduleid].nr_flags == 0);
 171         ns_reg[moduleid].nr_create = module_create;
 172         ns_reg[moduleid].nr_shutdown = module_shutdown;
 173         ns_reg[moduleid].nr_destroy = module_destroy;
 174         ns_reg[moduleid].nr_flags = NRF_REGISTERED;
 175 
 176         /*
 177          * Determine the set of stacks that exist before we drop the lock.
 178          * Set NSS_CREATE_NEEDED for each of those.
 179          * netstacks which have been deleted will have NSS_CREATE_COMPLETED
 180          * set, but check NSF_CLOSING to be sure.
 181          */
 182         for (ns = netstack_head; ns != NULL; ns = ns->netstack_next) {
 183                 nm_state_t *nms = &ns->netstack_m_state[moduleid];
 184 
 185                 mutex_enter(&ns->netstack_lock);
 186                 if (!(ns->netstack_flags & NSF_CLOSING) &&
 187                     (nms->nms_flags & NSS_CREATE_ALL) == 0) {
 188                         nms->nms_flags |= NSS_CREATE_NEEDED;
 189                         DTRACE_PROBE2(netstack__create__needed,
 190                             netstack_t *, ns, int, moduleid);
 191                 }
 192                 mutex_exit(&ns->netstack_lock);
 193         }
 194         mutex_exit(&netstack_g_lock);
 195 
 196         /*
 197          * At this point in time a new instance can be created or an instance
 198          * can be destroyed, or some other module can register or unregister.
 199          * Make sure we either run all the create functions for this moduleid
 200          * or we wait for any other creators for this moduleid.
 201          */
 202         apply_all_netstacks(moduleid, netstack_apply_create);
 203 }
 204 
 205 void
 206 netstack_unregister(int moduleid)
 207 {
 208         netstack_t *ns;
 209 
 210         ASSERT(moduleid >= 0 && moduleid < NS_MAX);
 211 
 212         ASSERT(ns_reg[moduleid].nr_create != NULL);
 213         ASSERT(ns_reg[moduleid].nr_flags & NRF_REGISTERED);
 214 
 215         mutex_enter(&netstack_g_lock);
 216         /*
 217          * Determine the set of stacks that exist before we drop the lock.
 218          * Set NSS_SHUTDOWN_NEEDED and NSS_DESTROY_NEEDED for each of those.
 219          * That ensures that when we return all the callbacks for existing
 220          * instances have completed. And since we set NRF_DYING no new
 221          * instances can use this module.
 222          */
 223         for (ns = netstack_head; ns != NULL; ns = ns->netstack_next) {
 224                 boolean_t created = B_FALSE;
 225                 nm_state_t *nms = &ns->netstack_m_state[moduleid];
 226 
 227                 mutex_enter(&ns->netstack_lock);
 228 
 229                 /*
 230                  * We need to be careful here. We could actually have a netstack
 231                  * being created as we speak waiting for us to let go of this
 232                  * lock to proceed. It may have set NSS_CREATE_NEEDED, but not
 233                  * have gotten to the point of completing it yet. If
 234                  * NSS_CREATE_NEEDED, we can safely just remove it here and
 235                  * never create the module. However, if NSS_CREATE_INPROGRESS is
 236                  * set, we need to still flag this module for shutdown and
 237                  * deletion, just as though it had reached NSS_CREATE_COMPLETED.
 238                  *
 239                  * It is safe to do that because of two different guarantees
 240                  * that exist in the system. The first is that before we do a
 241                  * create, shutdown, or destroy, we ensure that nothing else is
 242                  * in progress in the system for this netstack and wait for it
 243                  * to complete. Secondly, because the zone is being created, we
 244                  * know that the following call to apply_all_netstack will block
 245                  * on the zone finishing its initialization.
 246                  */
 247                 if (nms->nms_flags & NSS_CREATE_NEEDED)
 248                         nms->nms_flags &= ~NSS_CREATE_NEEDED;
 249 
 250                 if (nms->nms_flags & NSS_CREATE_INPROGRESS ||
 251                     nms->nms_flags & NSS_CREATE_COMPLETED)
 252                         created = B_TRUE;
 253 
 254                 if (ns_reg[moduleid].nr_shutdown != NULL && created &&
 255                     (nms->nms_flags & NSS_CREATE_COMPLETED) &&
 256                     (nms->nms_flags & NSS_SHUTDOWN_ALL) == 0) {
 257                         nms->nms_flags |= NSS_SHUTDOWN_NEEDED;
 258                         DTRACE_PROBE2(netstack__shutdown__needed,
 259                             netstack_t *, ns, int, moduleid);
 260                 }
 261                 if ((ns_reg[moduleid].nr_flags & NRF_REGISTERED) &&
 262                     ns_reg[moduleid].nr_destroy != NULL && created &&
 263                     (nms->nms_flags & NSS_DESTROY_ALL) == 0) {
 264                         nms->nms_flags |= NSS_DESTROY_NEEDED;
 265                         DTRACE_PROBE2(netstack__destroy__needed,
 266                             netstack_t *, ns, int, moduleid);
 267                 }
 268                 mutex_exit(&ns->netstack_lock);
 269         }
 270         /*
 271          * Prevent any new netstack from calling the registered create
 272          * function, while keeping the function pointers in place until the
 273          * shutdown and destroy callbacks are complete.
 274          */
 275         ns_reg[moduleid].nr_flags |= NRF_DYING;
 276         mutex_exit(&netstack_g_lock);
 277 
 278         apply_all_netstacks(moduleid, netstack_apply_shutdown);
 279         apply_all_netstacks(moduleid, netstack_apply_destroy);
 280 
 281         /*
 282          * Clear the nms_flags so that we can handle this module
 283          * being loaded again.
 284          * Also remove the registered functions.
 285          */
 286         mutex_enter(&netstack_g_lock);
 287         ASSERT(ns_reg[moduleid].nr_flags & NRF_REGISTERED);
 288         ASSERT(ns_reg[moduleid].nr_flags & NRF_DYING);
 289         for (ns = netstack_head; ns != NULL; ns = ns->netstack_next) {
 290                 nm_state_t *nms = &ns->netstack_m_state[moduleid];
 291 
 292                 mutex_enter(&ns->netstack_lock);
 293                 if (nms->nms_flags & NSS_DESTROY_COMPLETED) {
 294                         nms->nms_flags = 0;
 295                         DTRACE_PROBE2(netstack__destroy__done,
 296                             netstack_t *, ns, int, moduleid);
 297                 }
 298                 mutex_exit(&ns->netstack_lock);
 299         }
 300 
 301         ns_reg[moduleid].nr_create = NULL;
 302         ns_reg[moduleid].nr_shutdown = NULL;
 303         ns_reg[moduleid].nr_destroy = NULL;
 304         ns_reg[moduleid].nr_flags = 0;
 305         mutex_exit(&netstack_g_lock);
 306 }
 307 
 308 /*
 309  * Lookup and/or allocate a netstack for this zone.
 310  */
 311 static void *
 312 netstack_zone_create(zoneid_t zoneid)
 313 {
 314         netstackid_t stackid;
 315         netstack_t *ns;
 316         netstack_t **nsp;
 317         zone_t  *zone;
 318         int i;
 319 
 320         ASSERT(netstack_initialized);
 321 
 322         zone = zone_find_by_id_nolock(zoneid);
 323         ASSERT(zone != NULL);
 324 
 325         if (zone->zone_flags & ZF_NET_EXCL) {
 326                 stackid = zoneid;
 327         } else {
 328                 /* Look for the stack instance for the global */
 329                 stackid = GLOBAL_NETSTACKID;
 330         }
 331 
 332         /* Allocate even if it isn't needed; simplifies locking */
 333         ns = (netstack_t *)kmem_zalloc(sizeof (netstack_t), KM_SLEEP);
 334 
 335         /* Look if there is a matching stack instance */
 336         mutex_enter(&netstack_g_lock);
 337         for (nsp = &netstack_head; *nsp != NULL;
 338             nsp = &((*nsp)->netstack_next)) {
 339                 if ((*nsp)->netstack_stackid == stackid) {
 340                         /*
 341                          * Should never find a pre-existing exclusive stack
 342                          */
 343                         VERIFY(stackid == GLOBAL_NETSTACKID);
 344                         kmem_free(ns, sizeof (netstack_t));
 345                         ns = *nsp;
 346                         mutex_enter(&ns->netstack_lock);
 347                         ns->netstack_numzones++;
 348                         mutex_exit(&ns->netstack_lock);
 349                         mutex_exit(&netstack_g_lock);
 350                         DTRACE_PROBE1(netstack__inc__numzones,
 351                             netstack_t *, ns);
 352                         /* Record that we have a new shared stack zone */
 353                         netstack_shared_zone_add(zoneid);
 354                         zone->zone_netstack = ns;
 355                         return (ns);
 356                 }
 357         }
 358         /* Not found */
 359         mutex_init(&ns->netstack_lock, NULL, MUTEX_DEFAULT, NULL);
 360         cv_init(&ns->netstack_cv, NULL, CV_DEFAULT, NULL);
 361         ns->netstack_stackid = zoneid;
 362         ns->netstack_numzones = 1;
 363         ns->netstack_refcnt = 1; /* Decremented by netstack_zone_destroy */
 364         ns->netstack_flags = NSF_UNINIT;
 365         *nsp = ns;
 366         zone->zone_netstack = ns;
 367 
 368         mutex_enter(&ns->netstack_lock);
 369         /*
 370          * Mark this netstack as having a CREATE running so
 371          * any netstack_register/netstack_unregister waits for
 372          * the existing create callbacks to complete in moduleid order
 373          */
 374         ns->netstack_flags |= NSF_ZONE_CREATE;
 375 
 376         /*
 377          * Determine the set of module create functions that need to be
 378          * called before we drop the lock.
 379          * Set NSS_CREATE_NEEDED for each of those.
 380          * Skip any with NRF_DYING set, since those are in the process of
 381          * going away, by checking for flags being exactly NRF_REGISTERED.
 382          */
 383         for (i = 0; i < NS_MAX; i++) {
 384                 nm_state_t *nms = &ns->netstack_m_state[i];
 385 
 386                 cv_init(&nms->nms_cv, NULL, CV_DEFAULT, NULL);
 387 
 388                 if ((ns_reg[i].nr_flags == NRF_REGISTERED) &&
 389                     (nms->nms_flags & NSS_CREATE_ALL) == 0) {
 390                         nms->nms_flags |= NSS_CREATE_NEEDED;
 391                         DTRACE_PROBE2(netstack__create__needed,
 392                             netstack_t *, ns, int, i);
 393                 }
 394         }
 395         mutex_exit(&ns->netstack_lock);
 396         mutex_exit(&netstack_g_lock);
 397 
 398         apply_all_modules(ns, netstack_apply_create);
 399 
 400         /* Tell any waiting netstack_register/netstack_unregister to proceed */
 401         mutex_enter(&ns->netstack_lock);
 402         ns->netstack_flags &= ~NSF_UNINIT;
 403         ASSERT(ns->netstack_flags & NSF_ZONE_CREATE);
 404         ns->netstack_flags &= ~NSF_ZONE_CREATE;
 405         cv_broadcast(&ns->netstack_cv);
 406         mutex_exit(&ns->netstack_lock);
 407 
 408         return (ns);
 409 }
 410 
 411 /* ARGSUSED */
 412 static void
 413 netstack_zone_shutdown(zoneid_t zoneid, void *arg)
 414 {
 415         netstack_t *ns = (netstack_t *)arg;
 416         int i;
 417 
 418         ASSERT(arg != NULL);
 419 
 420         mutex_enter(&ns->netstack_lock);
 421         ASSERT(ns->netstack_numzones > 0);
 422         if (ns->netstack_numzones != 1) {
 423                 /* Stack instance being used by other zone */
 424                 mutex_exit(&ns->netstack_lock);
 425                 ASSERT(ns->netstack_stackid == GLOBAL_NETSTACKID);
 426                 return;
 427         }
 428         mutex_exit(&ns->netstack_lock);
 429 
 430         mutex_enter(&netstack_g_lock);
 431         mutex_enter(&ns->netstack_lock);
 432         /*
 433          * Mark this netstack as having a SHUTDOWN running so
 434          * any netstack_register/netstack_unregister waits for
 435          * the existing create callbacks to complete in moduleid order
 436          */
 437         ASSERT(!(ns->netstack_flags & NSF_ZONE_INPROGRESS));
 438         ns->netstack_flags |= NSF_ZONE_SHUTDOWN;
 439 
 440         /*
 441          * Determine the set of stacks that exist before we drop the lock.
 442          * Set NSS_SHUTDOWN_NEEDED for each of those.
 443          */
 444         for (i = 0; i < NS_MAX; i++) {
 445                 nm_state_t *nms = &ns->netstack_m_state[i];
 446 
 447                 if ((ns_reg[i].nr_flags & NRF_REGISTERED) &&
 448                     ns_reg[i].nr_shutdown != NULL &&
 449                     (nms->nms_flags & NSS_CREATE_COMPLETED) &&
 450                     (nms->nms_flags & NSS_SHUTDOWN_ALL) == 0) {
 451                         nms->nms_flags |= NSS_SHUTDOWN_NEEDED;
 452                         DTRACE_PROBE2(netstack__shutdown__needed,
 453                             netstack_t *, ns, int, i);
 454                 }
 455         }
 456         mutex_exit(&ns->netstack_lock);
 457         mutex_exit(&netstack_g_lock);
 458 
 459         /*
 460          * Call the shutdown function for all registered modules for this
 461          * netstack.
 462          */
 463         apply_all_modules_reverse(ns, netstack_apply_shutdown);
 464 
 465         /* Tell any waiting netstack_register/netstack_unregister to proceed */
 466         mutex_enter(&ns->netstack_lock);
 467         ASSERT(ns->netstack_flags & NSF_ZONE_SHUTDOWN);
 468         ns->netstack_flags &= ~NSF_ZONE_SHUTDOWN;
 469         cv_broadcast(&ns->netstack_cv);
 470         mutex_exit(&ns->netstack_lock);
 471 }
 472 
 473 /*
 474  * Common routine to release a zone.
 475  * If this was the last zone using the stack instance then prepare to
 476  * have the refcnt dropping to zero free the zone.
 477  */
 478 /* ARGSUSED */
 479 static void
 480 netstack_zone_destroy(zoneid_t zoneid, void *arg)
 481 {
 482         netstack_t *ns = (netstack_t *)arg;
 483 
 484         ASSERT(arg != NULL);
 485 
 486         mutex_enter(&ns->netstack_lock);
 487         ASSERT(ns->netstack_numzones > 0);
 488         ns->netstack_numzones--;
 489         if (ns->netstack_numzones != 0) {
 490                 /* Stack instance being used by other zone */
 491                 mutex_exit(&ns->netstack_lock);
 492                 ASSERT(ns->netstack_stackid == GLOBAL_NETSTACKID);
 493                 /* Record that we a shared stack zone has gone away */
 494                 netstack_shared_zone_remove(zoneid);
 495                 return;
 496         }
 497         /*
 498          * Set CLOSING so that netstack_find_by will not find it.
 499          */
 500         ns->netstack_flags |= NSF_CLOSING;
 501         mutex_exit(&ns->netstack_lock);
 502         DTRACE_PROBE1(netstack__dec__numzones, netstack_t *, ns);
 503         /* No other thread can call zone_destroy for this stack */
 504 
 505         /*
 506          * Decrease refcnt to account for the one in netstack_zone_init()
 507          */
 508         netstack_rele(ns);
 509 }
 510 
 511 /*
 512  * Called when the reference count drops to zero.
 513  * Call the destroy functions for each registered module.
 514  */
 515 static void
 516 netstack_stack_inactive(netstack_t *ns)
 517 {
 518         int i;
 519 
 520         mutex_enter(&netstack_g_lock);
 521         mutex_enter(&ns->netstack_lock);
 522         /*
 523          * Mark this netstack as having a DESTROY running so
 524          * any netstack_register/netstack_unregister waits for
 525          * the existing destroy callbacks to complete in reverse moduleid order
 526          */
 527         ASSERT(!(ns->netstack_flags & NSF_ZONE_INPROGRESS));
 528         ns->netstack_flags |= NSF_ZONE_DESTROY;
 529         /*
 530          * If the shutdown callback wasn't called earlier (e.g., if this is
 531          * a netstack shared between multiple zones), then we schedule it now.
 532          *
 533          * Determine the set of stacks that exist before we drop the lock.
 534          * Set NSS_DESTROY_NEEDED for each of those. That
 535          * ensures that when we return all the callbacks for existing
 536          * instances have completed.
 537          */
 538         for (i = 0; i < NS_MAX; i++) {
 539                 nm_state_t *nms = &ns->netstack_m_state[i];
 540 
 541                 if ((ns_reg[i].nr_flags & NRF_REGISTERED) &&
 542                     ns_reg[i].nr_shutdown != NULL &&
 543                     (nms->nms_flags & NSS_CREATE_COMPLETED) &&
 544                     (nms->nms_flags & NSS_SHUTDOWN_ALL) == 0) {
 545                         nms->nms_flags |= NSS_SHUTDOWN_NEEDED;
 546                         DTRACE_PROBE2(netstack__shutdown__needed,
 547                             netstack_t *, ns, int, i);
 548                 }
 549 
 550                 if ((ns_reg[i].nr_flags & NRF_REGISTERED) &&
 551                     ns_reg[i].nr_destroy != NULL &&
 552                     (nms->nms_flags & NSS_CREATE_COMPLETED) &&
 553                     (nms->nms_flags & NSS_DESTROY_ALL) == 0) {
 554                         nms->nms_flags |= NSS_DESTROY_NEEDED;
 555                         DTRACE_PROBE2(netstack__destroy__needed,
 556                             netstack_t *, ns, int, i);
 557                 }
 558         }
 559         mutex_exit(&ns->netstack_lock);
 560         mutex_exit(&netstack_g_lock);
 561 
 562         /*
 563          * Call the shutdown and destroy functions for all registered modules
 564          * for this netstack.
 565          *
 566          * Since there are some ordering dependencies between the modules we
 567          * tear them down in the reverse order of what was used to create them.
 568          *
 569          * Since a netstack_t is never reused (when a zone is rebooted it gets
 570          * a new zoneid == netstackid i.e. a new netstack_t is allocated) we
 571          * leave nms_flags the way it is i.e. with NSS_DESTROY_COMPLETED set.
 572          * That is different than in the netstack_unregister() case.
 573          */
 574         apply_all_modules_reverse(ns, netstack_apply_shutdown);
 575         apply_all_modules_reverse(ns, netstack_apply_destroy);
 576 
 577         /* Tell any waiting netstack_register/netstack_unregister to proceed */
 578         mutex_enter(&ns->netstack_lock);
 579         ASSERT(ns->netstack_flags & NSF_ZONE_DESTROY);
 580         ns->netstack_flags &= ~NSF_ZONE_DESTROY;
 581         cv_broadcast(&ns->netstack_cv);
 582         mutex_exit(&ns->netstack_lock);
 583 }
 584 
 585 /*
 586  * Apply a function to all netstacks for a particular moduleid.
 587  *
 588  * If there is any zone activity (due to a zone being created, shutdown,
 589  * or destroyed) we wait for that to complete before we proceed. This ensures
 590  * that the moduleids are processed in order when a zone is created or
 591  * destroyed.
 592  *
 593  * The applyfn has to drop netstack_g_lock if it does some work.
 594  * In that case we don't follow netstack_next,
 595  * even if it is possible to do so without any hazards. This is
 596  * because we want the design to allow for the list of netstacks threaded
 597  * by netstack_next to change in any arbitrary way during the time the
 598  * lock was dropped.
 599  *
 600  * It is safe to restart the loop at netstack_head since the applyfn
 601  * changes netstack_m_state as it processes things, so a subsequent
 602  * pass through will have no effect in applyfn, hence the loop will terminate
 603  * in at worst O(N^2).
 604  */
 605 static void
 606 apply_all_netstacks(int moduleid, applyfn_t *applyfn)
 607 {
 608         netstack_t *ns;
 609 
 610         mutex_enter(&netstack_g_lock);
 611         ns = netstack_head;
 612         while (ns != NULL) {
 613                 if (wait_for_zone_creator(ns, &netstack_g_lock)) {
 614                         /* Lock dropped - restart at head */
 615                         ns = netstack_head;
 616                 } else if ((applyfn)(&netstack_g_lock, ns, moduleid)) {
 617                         /* Lock dropped - restart at head */
 618                         ns = netstack_head;
 619                 } else {
 620                         ns = ns->netstack_next;
 621                 }
 622         }
 623         mutex_exit(&netstack_g_lock);
 624 }
 625 
 626 /*
 627  * Apply a function to all moduleids for a particular netstack.
 628  *
 629  * Since the netstack linkage doesn't matter in this case we can
 630  * ignore whether the function drops the lock.
 631  */
 632 static void
 633 apply_all_modules(netstack_t *ns, applyfn_t *applyfn)
 634 {
 635         int i;
 636 
 637         mutex_enter(&netstack_g_lock);
 638         for (i = 0; i < NS_MAX; i++) {
 639                 /*
 640                  * We don't care whether the lock was dropped
 641                  * since we are not iterating over netstack_head.
 642                  */
 643                 (void) (applyfn)(&netstack_g_lock, ns, i);
 644         }
 645         mutex_exit(&netstack_g_lock);
 646 }
 647 
 648 /* Like the above but in reverse moduleid order */
 649 static void
 650 apply_all_modules_reverse(netstack_t *ns, applyfn_t *applyfn)
 651 {
 652         int i;
 653 
 654         mutex_enter(&netstack_g_lock);
 655         for (i = NS_MAX-1; i >= 0; i--) {
 656                 /*
 657                  * We don't care whether the lock was dropped
 658                  * since we are not iterating over netstack_head.
 659                  */
 660                 (void) (applyfn)(&netstack_g_lock, ns, i);
 661         }
 662         mutex_exit(&netstack_g_lock);
 663 }
 664 
 665 /*
 666  * Call the create function for the ns and moduleid if CREATE_NEEDED
 667  * is set.
 668  * If some other thread gets here first and sets *_INPROGRESS, then
 669  * we wait for that thread to complete so that we can ensure that
 670  * all the callbacks are done when we've looped over all netstacks/moduleids.
 671  *
 672  * When we call the create function, we temporarily drop the netstack_lock
 673  * held by the caller, and return true to tell the caller it needs to
 674  * re-evalute the state.
 675  */
 676 static boolean_t
 677 netstack_apply_create(kmutex_t *lockp, netstack_t *ns, int moduleid)
 678 {
 679         void *result;
 680         netstackid_t stackid;
 681         nm_state_t *nms = &ns->netstack_m_state[moduleid];
 682         boolean_t dropped = B_FALSE;
 683 
 684         ASSERT(MUTEX_HELD(lockp));
 685         mutex_enter(&ns->netstack_lock);
 686 
 687         if (wait_for_nms_inprogress(ns, nms, lockp))
 688                 dropped = B_TRUE;
 689 
 690         if (nms->nms_flags & NSS_CREATE_NEEDED) {
 691                 nms->nms_flags &= ~NSS_CREATE_NEEDED;
 692                 nms->nms_flags |= NSS_CREATE_INPROGRESS;
 693                 DTRACE_PROBE2(netstack__create__inprogress,
 694                     netstack_t *, ns, int, moduleid);
 695                 mutex_exit(&ns->netstack_lock);
 696                 mutex_exit(lockp);
 697                 dropped = B_TRUE;
 698 
 699                 ASSERT(ns_reg[moduleid].nr_create != NULL);
 700                 stackid = ns->netstack_stackid;
 701                 DTRACE_PROBE2(netstack__create__start,
 702                     netstackid_t, stackid,
 703                     netstack_t *, ns);
 704                 result = (ns_reg[moduleid].nr_create)(stackid, ns);
 705                 DTRACE_PROBE2(netstack__create__end,
 706                     void *, result, netstack_t *, ns);
 707 
 708                 ASSERT(result != NULL);
 709                 mutex_enter(lockp);
 710                 mutex_enter(&ns->netstack_lock);
 711                 ns->netstack_modules[moduleid] = result;
 712                 nms->nms_flags &= ~NSS_CREATE_INPROGRESS;
 713                 nms->nms_flags |= NSS_CREATE_COMPLETED;
 714                 cv_broadcast(&nms->nms_cv);
 715                 DTRACE_PROBE2(netstack__create__completed,
 716                     netstack_t *, ns, int, moduleid);
 717                 mutex_exit(&ns->netstack_lock);
 718                 return (dropped);
 719         } else {
 720                 mutex_exit(&ns->netstack_lock);
 721                 return (dropped);
 722         }
 723 }
 724 
 725 /*
 726  * Call the shutdown function for the ns and moduleid if SHUTDOWN_NEEDED
 727  * is set.
 728  * If some other thread gets here first and sets *_INPROGRESS, then
 729  * we wait for that thread to complete so that we can ensure that
 730  * all the callbacks are done when we've looped over all netstacks/moduleids.
 731  *
 732  * When we call the shutdown function, we temporarily drop the netstack_lock
 733  * held by the caller, and return true to tell the caller it needs to
 734  * re-evalute the state.
 735  */
 736 static boolean_t
 737 netstack_apply_shutdown(kmutex_t *lockp, netstack_t *ns, int moduleid)
 738 {
 739         netstackid_t stackid;
 740         void * netstack_module;
 741         nm_state_t *nms = &ns->netstack_m_state[moduleid];
 742         boolean_t dropped = B_FALSE;
 743 
 744         ASSERT(MUTEX_HELD(lockp));
 745         mutex_enter(&ns->netstack_lock);
 746 
 747         if (wait_for_nms_inprogress(ns, nms, lockp))
 748                 dropped = B_TRUE;
 749 
 750         if (nms->nms_flags & NSS_SHUTDOWN_NEEDED) {
 751                 nms->nms_flags &= ~NSS_SHUTDOWN_NEEDED;
 752                 nms->nms_flags |= NSS_SHUTDOWN_INPROGRESS;
 753                 DTRACE_PROBE2(netstack__shutdown__inprogress,
 754                     netstack_t *, ns, int, moduleid);
 755                 mutex_exit(&ns->netstack_lock);
 756                 mutex_exit(lockp);
 757                 dropped = B_TRUE;
 758 
 759                 ASSERT(ns_reg[moduleid].nr_shutdown != NULL);
 760                 stackid = ns->netstack_stackid;
 761                 netstack_module = ns->netstack_modules[moduleid];
 762                 DTRACE_PROBE2(netstack__shutdown__start,
 763                     netstackid_t, stackid,
 764                     void *, netstack_module);
 765                 (ns_reg[moduleid].nr_shutdown)(stackid, netstack_module);
 766                 DTRACE_PROBE1(netstack__shutdown__end,
 767                     netstack_t *, ns);
 768 
 769                 mutex_enter(lockp);
 770                 mutex_enter(&ns->netstack_lock);
 771                 nms->nms_flags &= ~NSS_SHUTDOWN_INPROGRESS;
 772                 nms->nms_flags |= NSS_SHUTDOWN_COMPLETED;
 773                 cv_broadcast(&nms->nms_cv);
 774                 DTRACE_PROBE2(netstack__shutdown__completed,
 775                     netstack_t *, ns, int, moduleid);
 776                 mutex_exit(&ns->netstack_lock);
 777                 return (dropped);
 778         } else {
 779                 mutex_exit(&ns->netstack_lock);
 780                 return (dropped);
 781         }
 782 }
 783 
 784 /*
 785  * Call the destroy function for the ns and moduleid if DESTROY_NEEDED
 786  * is set.
 787  * If some other thread gets here first and sets *_INPROGRESS, then
 788  * we wait for that thread to complete so that we can ensure that
 789  * all the callbacks are done when we've looped over all netstacks/moduleids.
 790  *
 791  * When we call the destroy function, we temporarily drop the netstack_lock
 792  * held by the caller, and return true to tell the caller it needs to
 793  * re-evalute the state.
 794  */
 795 static boolean_t
 796 netstack_apply_destroy(kmutex_t *lockp, netstack_t *ns, int moduleid)
 797 {
 798         netstackid_t stackid;
 799         void * netstack_module;
 800         nm_state_t *nms = &ns->netstack_m_state[moduleid];
 801         boolean_t dropped = B_FALSE;
 802 
 803         ASSERT(MUTEX_HELD(lockp));
 804         mutex_enter(&ns->netstack_lock);
 805 
 806         if (wait_for_nms_inprogress(ns, nms, lockp))
 807                 dropped = B_TRUE;
 808 
 809         if (nms->nms_flags & NSS_DESTROY_NEEDED) {
 810                 nms->nms_flags &= ~NSS_DESTROY_NEEDED;
 811                 nms->nms_flags |= NSS_DESTROY_INPROGRESS;
 812                 DTRACE_PROBE2(netstack__destroy__inprogress,
 813                     netstack_t *, ns, int, moduleid);
 814                 mutex_exit(&ns->netstack_lock);
 815                 mutex_exit(lockp);
 816                 dropped = B_TRUE;
 817 
 818                 ASSERT(ns_reg[moduleid].nr_destroy != NULL);
 819                 stackid = ns->netstack_stackid;
 820                 netstack_module = ns->netstack_modules[moduleid];
 821                 DTRACE_PROBE2(netstack__destroy__start,
 822                     netstackid_t, stackid,
 823                     void *, netstack_module);
 824                 (ns_reg[moduleid].nr_destroy)(stackid, netstack_module);
 825                 DTRACE_PROBE1(netstack__destroy__end,
 826                     netstack_t *, ns);
 827 
 828                 mutex_enter(lockp);
 829                 mutex_enter(&ns->netstack_lock);
 830                 ns->netstack_modules[moduleid] = NULL;
 831                 nms->nms_flags &= ~NSS_DESTROY_INPROGRESS;
 832                 nms->nms_flags |= NSS_DESTROY_COMPLETED;
 833                 cv_broadcast(&nms->nms_cv);
 834                 DTRACE_PROBE2(netstack__destroy__completed,
 835                     netstack_t *, ns, int, moduleid);
 836                 mutex_exit(&ns->netstack_lock);
 837                 return (dropped);
 838         } else {
 839                 mutex_exit(&ns->netstack_lock);
 840                 return (dropped);
 841         }
 842 }
 843 
 844 /*
 845  * If somebody  is creating the netstack (due to a new zone being created)
 846  * then we wait for them to complete. This ensures that any additional
 847  * netstack_register() doesn't cause the create functions to run out of
 848  * order.
 849  * Note that we do not need such a global wait in the case of the shutdown
 850  * and destroy callbacks, since in that case it is sufficient for both
 851  * threads to set NEEDED and wait for INPROGRESS to ensure ordering.
 852  * Returns true if lockp was temporarily dropped while waiting.
 853  */
 854 static boolean_t
 855 wait_for_zone_creator(netstack_t *ns, kmutex_t *lockp)
 856 {
 857         boolean_t dropped = B_FALSE;
 858 
 859         mutex_enter(&ns->netstack_lock);
 860         while (ns->netstack_flags & NSF_ZONE_CREATE) {
 861                 DTRACE_PROBE1(netstack__wait__zone__inprogress,
 862                     netstack_t *, ns);
 863                 if (lockp != NULL) {
 864                         dropped = B_TRUE;
 865                         mutex_exit(lockp);
 866                 }
 867                 cv_wait(&ns->netstack_cv, &ns->netstack_lock);
 868                 if (lockp != NULL) {
 869                         /* First drop netstack_lock to preserve order */
 870                         mutex_exit(&ns->netstack_lock);
 871                         mutex_enter(lockp);
 872                         mutex_enter(&ns->netstack_lock);
 873                 }
 874         }
 875         mutex_exit(&ns->netstack_lock);
 876         return (dropped);
 877 }
 878 
 879 /*
 880  * Wait for any INPROGRESS flag to be cleared for the netstack/moduleid
 881  * combination.
 882  * Returns true if lockp was temporarily dropped while waiting.
 883  */
 884 static boolean_t
 885 wait_for_nms_inprogress(netstack_t *ns, nm_state_t *nms, kmutex_t *lockp)
 886 {
 887         boolean_t dropped = B_FALSE;
 888 
 889         while (nms->nms_flags & NSS_ALL_INPROGRESS) {
 890                 DTRACE_PROBE2(netstack__wait__nms__inprogress,
 891                     netstack_t *, ns, nm_state_t *, nms);
 892                 if (lockp != NULL) {
 893                         dropped = B_TRUE;
 894                         mutex_exit(lockp);
 895                 }
 896                 cv_wait(&nms->nms_cv, &ns->netstack_lock);
 897                 if (lockp != NULL) {
 898                         /* First drop netstack_lock to preserve order */
 899                         mutex_exit(&ns->netstack_lock);
 900                         mutex_enter(lockp);
 901                         mutex_enter(&ns->netstack_lock);
 902                 }
 903         }
 904         return (dropped);
 905 }
 906 
 907 /*
 908  * Get the stack instance used in caller's zone.
 909  * Increases the reference count, caller must do a netstack_rele.
 910  * It can't be called after zone_destroy() has started.
 911  */
 912 netstack_t *
 913 netstack_get_current(void)
 914 {
 915         netstack_t *ns;
 916 
 917         ns = curproc->p_zone->zone_netstack;
 918         ASSERT(ns != NULL);
 919         if (ns->netstack_flags & (NSF_UNINIT|NSF_CLOSING))
 920                 return (NULL);
 921 
 922         netstack_hold(ns);
 923 
 924         return (ns);
 925 }
 926 
 927 /*
 928  * Find a stack instance given the cred.
 929  * This is used by the modules to potentially allow for a future when
 930  * something other than the zoneid is used to determine the stack.
 931  */
 932 netstack_t *
 933 netstack_find_by_cred(const cred_t *cr)
 934 {
 935         zoneid_t zoneid = crgetzoneid(cr);
 936 
 937         /* Handle the case when cr_zone is NULL */
 938         if (zoneid == (zoneid_t)-1)
 939                 zoneid = GLOBAL_ZONEID;
 940 
 941         /* For performance ... */
 942         if (curproc->p_zone->zone_id == zoneid)
 943                 return (netstack_get_current());
 944         else
 945                 return (netstack_find_by_zoneid(zoneid));
 946 }
 947 
 948 /*
 949  * Find a stack instance given the zoneid.
 950  * Increases the reference count if found; caller must do a
 951  * netstack_rele().
 952  *
 953  * If there is no exact match then assume the shared stack instance
 954  * matches.
 955  *
 956  * Skip the unitialized ones.
 957  */
 958 netstack_t *
 959 netstack_find_by_zoneid(zoneid_t zoneid)
 960 {
 961         netstack_t *ns;
 962         zone_t *zone;
 963 
 964         zone = zone_find_by_id(zoneid);
 965 
 966         if (zone == NULL)
 967                 return (NULL);
 968 
 969         ns = zone->zone_netstack;
 970         ASSERT(ns != NULL);
 971         if (ns->netstack_flags & (NSF_UNINIT|NSF_CLOSING))
 972                 ns = NULL;
 973         else
 974                 netstack_hold(ns);
 975 
 976         zone_rele(zone);
 977         return (ns);
 978 }
 979 
 980 /*
 981  * Find a stack instance given the zoneid. Can only be called from
 982  * the create callback. See the comments in zone_find_by_id_nolock why
 983  * that limitation exists.
 984  *
 985  * Increases the reference count if found; caller must do a
 986  * netstack_rele().
 987  *
 988  * If there is no exact match then assume the shared stack instance
 989  * matches.
 990  *
 991  * Skip the unitialized ones.
 992  */
 993 netstack_t *
 994 netstack_find_by_zoneid_nolock(zoneid_t zoneid)
 995 {
 996         netstack_t *ns;
 997         zone_t *zone;
 998 
 999         zone = zone_find_by_id_nolock(zoneid);
1000 
1001         if (zone == NULL)
1002                 return (NULL);
1003 
1004         ns = zone->zone_netstack;
1005         ASSERT(ns != NULL);
1006 
1007         if (ns->netstack_flags & (NSF_UNINIT|NSF_CLOSING))
1008                 ns = NULL;
1009         else
1010                 netstack_hold(ns);
1011 
1012         /* zone_find_by_id_nolock does not have a hold on the zone */
1013         return (ns);
1014 }
1015 
1016 /*
1017  * Find a stack instance given the stackid with exact match?
1018  * Increases the reference count if found; caller must do a
1019  * netstack_rele().
1020  *
1021  * Skip the unitialized ones.
1022  */
1023 netstack_t *
1024 netstack_find_by_stackid(netstackid_t stackid)
1025 {
1026         netstack_t *ns;
1027 
1028         mutex_enter(&netstack_g_lock);
1029         for (ns = netstack_head; ns != NULL; ns = ns->netstack_next) {
1030                 mutex_enter(&ns->netstack_lock);
1031                 if (ns->netstack_stackid == stackid &&
1032                     !(ns->netstack_flags & (NSF_UNINIT|NSF_CLOSING))) {
1033                         mutex_exit(&ns->netstack_lock);
1034                         netstack_hold(ns);
1035                         mutex_exit(&netstack_g_lock);
1036                         return (ns);
1037                 }
1038                 mutex_exit(&ns->netstack_lock);
1039         }
1040         mutex_exit(&netstack_g_lock);
1041         return (NULL);
1042 }
1043 
1044 boolean_t
1045 netstack_inuse_by_stackid(netstackid_t stackid)
1046 {
1047         netstack_t *ns;
1048         boolean_t rval = B_FALSE;
1049 
1050         mutex_enter(&netstack_g_lock);
1051 
1052         for (ns = netstack_head; ns != NULL; ns = ns->netstack_next) {
1053                 if (ns->netstack_stackid == stackid) {
1054                         rval = B_TRUE;
1055                         break;
1056                 }
1057         }
1058 
1059         mutex_exit(&netstack_g_lock);
1060 
1061         return (rval);
1062 }
1063 
1064 void
1065 netstack_rele(netstack_t *ns)
1066 {
1067         netstack_t **nsp;
1068         boolean_t found;
1069         int refcnt, numzones;
1070         int i;
1071 
1072         mutex_enter(&ns->netstack_lock);
1073         ASSERT(ns->netstack_refcnt > 0);
1074         ns->netstack_refcnt--;
1075         /*
1076          * As we drop the lock additional netstack_rele()s can come in
1077          * and decrement the refcnt to zero and free the netstack_t.
1078          * Store pointers in local variables and if we were not the last
1079          * then don't reference the netstack_t after that.
1080          */
1081         refcnt = ns->netstack_refcnt;
1082         numzones = ns->netstack_numzones;
1083         DTRACE_PROBE1(netstack__dec__ref, netstack_t *, ns);
1084         mutex_exit(&ns->netstack_lock);
1085 
1086         if (refcnt == 0 && numzones == 0) {
1087                 /*
1088                  * Time to call the destroy functions and free up
1089                  * the structure
1090                  */
1091                 netstack_stack_inactive(ns);
1092 
1093                 /* Make sure nothing increased the references */
1094                 ASSERT(ns->netstack_refcnt == 0);
1095                 ASSERT(ns->netstack_numzones == 0);
1096 
1097                 /* Finally remove from list of netstacks */
1098                 mutex_enter(&netstack_g_lock);
1099                 found = B_FALSE;
1100                 for (nsp = &netstack_head; *nsp != NULL;
1101                     nsp = &(*nsp)->netstack_next) {
1102                         if (*nsp == ns) {
1103                                 *nsp = ns->netstack_next;
1104                                 ns->netstack_next = NULL;
1105                                 found = B_TRUE;
1106                                 break;
1107                         }
1108                 }
1109                 ASSERT(found);
1110                 mutex_exit(&netstack_g_lock);
1111 
1112                 /* Make sure nothing increased the references */
1113                 ASSERT(ns->netstack_refcnt == 0);
1114                 ASSERT(ns->netstack_numzones == 0);
1115 
1116                 ASSERT(ns->netstack_flags & NSF_CLOSING);
1117 
1118                 for (i = 0; i < NS_MAX; i++) {
1119                         nm_state_t *nms = &ns->netstack_m_state[i];
1120 
1121                         cv_destroy(&nms->nms_cv);
1122                 }
1123                 mutex_destroy(&ns->netstack_lock);
1124                 cv_destroy(&ns->netstack_cv);
1125                 kmem_free(ns, sizeof (*ns));
1126         }
1127 }
1128 
1129 void
1130 netstack_hold(netstack_t *ns)
1131 {
1132         mutex_enter(&ns->netstack_lock);
1133         ns->netstack_refcnt++;
1134         ASSERT(ns->netstack_refcnt > 0);
1135         mutex_exit(&ns->netstack_lock);
1136         DTRACE_PROBE1(netstack__inc__ref, netstack_t *, ns);
1137 }
1138 
1139 /*
1140  * To support kstat_create_netstack() using kstat_zone_add we need
1141  * to track both
1142  *  - all zoneids that use the global/shared stack
1143  *  - all kstats that have been added for the shared stack
1144  */
1145 kstat_t *
1146 kstat_create_netstack(char *ks_module, int ks_instance, char *ks_name,
1147     char *ks_class, uchar_t ks_type, uint_t ks_ndata, uchar_t ks_flags,
1148     netstackid_t ks_netstackid)
1149 {
1150         kstat_t *ks;
1151 
1152         if (ks_netstackid == GLOBAL_NETSTACKID) {
1153                 ks = kstat_create_zone(ks_module, ks_instance, ks_name,
1154                     ks_class, ks_type, ks_ndata, ks_flags, GLOBAL_ZONEID);
1155                 if (ks != NULL)
1156                         netstack_shared_kstat_add(ks);
1157                 return (ks);
1158         } else {
1159                 zoneid_t zoneid = ks_netstackid;
1160 
1161                 return (kstat_create_zone(ks_module, ks_instance, ks_name,
1162                     ks_class, ks_type, ks_ndata, ks_flags, zoneid));
1163         }
1164 }
1165 
1166 void
1167 kstat_delete_netstack(kstat_t *ks, netstackid_t ks_netstackid)
1168 {
1169         if (ks_netstackid == GLOBAL_NETSTACKID) {
1170                 netstack_shared_kstat_remove(ks);
1171         }
1172         kstat_delete(ks);
1173 }
1174 
1175 static void
1176 netstack_shared_zone_add(zoneid_t zoneid)
1177 {
1178         struct shared_zone_list *sz;
1179         struct shared_kstat_list *sk;
1180 
1181         sz = (struct shared_zone_list *)kmem_zalloc(sizeof (*sz), KM_SLEEP);
1182         sz->sz_zoneid = zoneid;
1183 
1184         /* Insert in list */
1185         mutex_enter(&netstack_shared_lock);
1186         sz->sz_next = netstack_shared_zones;
1187         netstack_shared_zones = sz;
1188 
1189         /*
1190          * Perform kstat_zone_add for each existing shared stack kstat.
1191          * Note: Holds netstack_shared_lock lock across kstat_zone_add.
1192          */
1193         for (sk = netstack_shared_kstats; sk != NULL; sk = sk->sk_next) {
1194                 kstat_zone_add(sk->sk_kstat, zoneid);
1195         }
1196         mutex_exit(&netstack_shared_lock);
1197 }
1198 
1199 static void
1200 netstack_shared_zone_remove(zoneid_t zoneid)
1201 {
1202         struct shared_zone_list **szp, *sz;
1203         struct shared_kstat_list *sk;
1204 
1205         /* Find in list */
1206         mutex_enter(&netstack_shared_lock);
1207         sz = NULL;
1208         for (szp = &netstack_shared_zones; *szp != NULL;
1209             szp = &((*szp)->sz_next)) {
1210                 if ((*szp)->sz_zoneid == zoneid) {
1211                         sz = *szp;
1212                         break;
1213                 }
1214         }
1215         /* We must find it */
1216         ASSERT(sz != NULL);
1217         *szp = sz->sz_next;
1218         sz->sz_next = NULL;
1219 
1220         /*
1221          * Perform kstat_zone_remove for each existing shared stack kstat.
1222          * Note: Holds netstack_shared_lock lock across kstat_zone_remove.
1223          */
1224         for (sk = netstack_shared_kstats; sk != NULL; sk = sk->sk_next) {
1225                 kstat_zone_remove(sk->sk_kstat, zoneid);
1226         }
1227         mutex_exit(&netstack_shared_lock);
1228 
1229         kmem_free(sz, sizeof (*sz));
1230 }
1231 
1232 static void
1233 netstack_shared_kstat_add(kstat_t *ks)
1234 {
1235         struct shared_zone_list *sz;
1236         struct shared_kstat_list *sk;
1237 
1238         sk = (struct shared_kstat_list *)kmem_zalloc(sizeof (*sk), KM_SLEEP);
1239         sk->sk_kstat = ks;
1240 
1241         /* Insert in list */
1242         mutex_enter(&netstack_shared_lock);
1243         sk->sk_next = netstack_shared_kstats;
1244         netstack_shared_kstats = sk;
1245 
1246         /*
1247          * Perform kstat_zone_add for each existing shared stack zone.
1248          * Note: Holds netstack_shared_lock lock across kstat_zone_add.
1249          */
1250         for (sz = netstack_shared_zones; sz != NULL; sz = sz->sz_next) {
1251                 kstat_zone_add(ks, sz->sz_zoneid);
1252         }
1253         mutex_exit(&netstack_shared_lock);
1254 }
1255 
1256 static void
1257 netstack_shared_kstat_remove(kstat_t *ks)
1258 {
1259         struct shared_zone_list *sz;
1260         struct shared_kstat_list **skp, *sk;
1261 
1262         /* Find in list */
1263         mutex_enter(&netstack_shared_lock);
1264         sk = NULL;
1265         for (skp = &netstack_shared_kstats; *skp != NULL;
1266             skp = &((*skp)->sk_next)) {
1267                 if ((*skp)->sk_kstat == ks) {
1268                         sk = *skp;
1269                         break;
1270                 }
1271         }
1272         /* Must find it */
1273         ASSERT(sk != NULL);
1274         *skp = sk->sk_next;
1275         sk->sk_next = NULL;
1276 
1277         /*
1278          * Perform kstat_zone_remove for each existing shared stack kstat.
1279          * Note: Holds netstack_shared_lock lock across kstat_zone_remove.
1280          */
1281         for (sz = netstack_shared_zones; sz != NULL; sz = sz->sz_next) {
1282                 kstat_zone_remove(ks, sz->sz_zoneid);
1283         }
1284         mutex_exit(&netstack_shared_lock);
1285         kmem_free(sk, sizeof (*sk));
1286 }
1287 
1288 /*
1289  * If a zoneid is part of the shared zone, return true
1290  */
1291 static boolean_t
1292 netstack_find_shared_zoneid(zoneid_t zoneid)
1293 {
1294         struct shared_zone_list *sz;
1295 
1296         mutex_enter(&netstack_shared_lock);
1297         for (sz = netstack_shared_zones; sz != NULL; sz = sz->sz_next) {
1298                 if (sz->sz_zoneid == zoneid) {
1299                         mutex_exit(&netstack_shared_lock);
1300                         return (B_TRUE);
1301                 }
1302         }
1303         mutex_exit(&netstack_shared_lock);
1304         return (B_FALSE);
1305 }
1306 
1307 /*
1308  * Hide the fact that zoneids and netstackids are allocated from
1309  * the same space in the current implementation.
1310  * We currently do not check that the stackid/zoneids are valid, since there
1311  * is no need for that. But this should only be done for ids that are
1312  * valid.
1313  */
1314 zoneid_t
1315 netstackid_to_zoneid(netstackid_t stackid)
1316 {
1317         return (stackid);
1318 }
1319 
1320 netstackid_t
1321 zoneid_to_netstackid(zoneid_t zoneid)
1322 {
1323         if (netstack_find_shared_zoneid(zoneid))
1324                 return (GLOBAL_ZONEID);
1325         else
1326                 return (zoneid);
1327 }
1328 
1329 zoneid_t
1330 netstack_get_zoneid(netstack_t *ns)
1331 {
1332         return (netstackid_to_zoneid(ns->netstack_stackid));
1333 }
1334 
1335 /*
1336  * Simplistic support for walking all the handles.
1337  * Example usage:
1338  *      netstack_handle_t nh;
1339  *      netstack_t *ns;
1340  *
1341  *      netstack_next_init(&nh);
1342  *      while ((ns = netstack_next(&nh)) != NULL) {
1343  *              do something;
1344  *              netstack_rele(ns);
1345  *      }
1346  *      netstack_next_fini(&nh);
1347  */
1348 void
1349 netstack_next_init(netstack_handle_t *handle)
1350 {
1351         *handle = 0;
1352 }
1353 
1354 /* ARGSUSED */
1355 void
1356 netstack_next_fini(netstack_handle_t *handle)
1357 {
1358 }
1359 
1360 netstack_t *
1361 netstack_next(netstack_handle_t *handle)
1362 {
1363         netstack_t *ns;
1364         int i, end;
1365 
1366         end = *handle;
1367         /* Walk skipping *handle number of instances */
1368 
1369         /* Look if there is a matching stack instance */
1370         mutex_enter(&netstack_g_lock);
1371         ns = netstack_head;
1372         for (i = 0; i < end; i++) {
1373                 if (ns == NULL)
1374                         break;
1375                 ns = ns->netstack_next;
1376         }
1377         /* skip those with that aren't really here */
1378         while (ns != NULL) {
1379                 mutex_enter(&ns->netstack_lock);
1380                 if ((ns->netstack_flags & (NSF_UNINIT|NSF_CLOSING)) == 0) {
1381                         mutex_exit(&ns->netstack_lock);
1382                         break;
1383                 }
1384                 mutex_exit(&ns->netstack_lock);
1385                 end++;
1386                 ns = ns->netstack_next;
1387         }
1388         if (ns != NULL) {
1389                 *handle = end + 1;
1390                 netstack_hold(ns);
1391         }
1392         mutex_exit(&netstack_g_lock);
1393         return (ns);
1394 }