Print this page
    
8901 netstack_find_by_stackid() drops-and-reacquires
Reviewed by: Jason King <jbk@joyent.com>
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
Reviewed by: Robert Mustacchi <rm@joyent.com>
Reviewed by: Ryan Zezeski <rpz@joyent.com>
    
      
        | Split | 
	Close | 
      
      | Expand all | 
      | Collapse all | 
    
    
          --- old/usr/src/uts/common/os/netstack.c
          +++ new/usr/src/uts/common/os/netstack.c
   1    1  /*
   2    2   * CDDL HEADER START
   3    3   *
   4    4   * The contents of this file are subject to the terms of the
   5    5   * Common Development and Distribution License (the "License").
   6    6   * You may not use this file except in compliance with the License.
   7    7   *
   8    8   * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9    9   * or http://www.opensolaris.org/os/licensing.
  10   10   * See the License for the specific language governing permissions
  11   11   * and limitations under the License.
  12   12   *
  13   13   * When distributing Covered Code, include this CDDL HEADER in each
  14   14   * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15   15   * If applicable, add the following below this CDDL HEADER, with the
  16   16   * fields enclosed by brackets "[]" replaced with your own identifying
  17   17   * information: Portions Copyright [yyyy] [name of copyright owner]
  18   18   *
  19   19   * CDDL HEADER END
  20   20   */
  21   21  
  22   22  /*
  23   23   * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  24   24   * Use is subject to license terms.
  25   25   * Copyright (c) 2017, Joyent, Inc.  All rights reserved.
  26   26   */
  27   27  
  28   28  #include <sys/param.h>
  29   29  #include <sys/sysmacros.h>
  30   30  #include <sys/vm.h>
  31   31  #include <sys/proc.h>
  32   32  #include <sys/tuneable.h>
  33   33  #include <sys/systm.h>
  34   34  #include <sys/cmn_err.h>
  35   35  #include <sys/debug.h>
  36   36  #include <sys/sdt.h>
  37   37  #include <sys/mutex.h>
  38   38  #include <sys/bitmap.h>
  39   39  #include <sys/atomic.h>
  40   40  #include <sys/sunddi.h>
  41   41  #include <sys/kobj.h>
  42   42  #include <sys/disp.h>
  43   43  #include <vm/seg_kmem.h>
  44   44  #include <sys/zone.h>
  45   45  #include <sys/netstack.h>
  46   46  
  47   47  /*
  48   48   * What we use so that the zones framework can tell us about new zones,
  49   49   * which we use to create new stacks.
  50   50   */
  51   51  static zone_key_t netstack_zone_key;
  52   52  
  53   53  static int      netstack_initialized = 0;
  54   54  
  55   55  /*
  56   56   * Track the registered netstacks.
  57   57   * The global lock protects
  58   58   * - ns_reg
  59   59   * - the list starting at netstack_head and following the netstack_next
  60   60   *   pointers.
  61   61   */
  62   62  static kmutex_t netstack_g_lock;
  63   63  
  64   64  /*
  65   65   * Registry of netstacks with their create/shutdown/destory functions.
  66   66   */
  67   67  static struct netstack_registry ns_reg[NS_MAX];
  68   68  
  69   69  /*
  70   70   * Global list of existing stacks.  We use this when a new zone with
  71   71   * an exclusive IP instance is created.
  72   72   *
  73   73   * Note that in some cases a netstack_t needs to stay around after the zone
  74   74   * has gone away. This is because there might be outstanding references
  75   75   * (from TCP TIME_WAIT connections, IPsec state, etc). The netstack_t data
  76   76   * structure and all the foo_stack_t's hanging off of it will be cleaned up
  77   77   * when the last reference to it is dropped.
  78   78   * However, the same zone might be rebooted. That is handled using the
  79   79   * assumption that the zones framework picks a new zoneid each time a zone
  80   80   * is (re)booted. We assert for that condition in netstack_zone_create().
  81   81   * Thus the old netstack_t can take its time for things to time out.
  82   82   */
  83   83  static netstack_t *netstack_head;
  84   84  
  85   85  /*
  86   86   * To support kstat_create_netstack() using kstat_zone_add we need
  87   87   * to track both
  88   88   *  - all zoneids that use the global/shared stack
  89   89   *  - all kstats that have been added for the shared stack
  90   90   */
  91   91  struct shared_zone_list {
  92   92          struct shared_zone_list *sz_next;
  93   93          zoneid_t                sz_zoneid;
  94   94  };
  95   95  
  96   96  struct shared_kstat_list {
  97   97          struct shared_kstat_list *sk_next;
  98   98          kstat_t                  *sk_kstat;
  99   99  };
 100  100  
 101  101  static kmutex_t netstack_shared_lock;   /* protects the following two */
 102  102  static struct shared_zone_list  *netstack_shared_zones;
 103  103  static struct shared_kstat_list *netstack_shared_kstats;
 104  104  
 105  105  static void     *netstack_zone_create(zoneid_t zoneid);
 106  106  static void     netstack_zone_shutdown(zoneid_t zoneid, void *arg);
 107  107  static void     netstack_zone_destroy(zoneid_t zoneid, void *arg);
 108  108  
 109  109  static void     netstack_shared_zone_add(zoneid_t zoneid);
 110  110  static void     netstack_shared_zone_remove(zoneid_t zoneid);
 111  111  static void     netstack_shared_kstat_add(kstat_t *ks);
 112  112  static void     netstack_shared_kstat_remove(kstat_t *ks);
 113  113  
 114  114  typedef boolean_t applyfn_t(kmutex_t *, netstack_t *, int);
 115  115  
  
    | 
      ↓ open down ↓ | 
    115 lines elided | 
    
      ↑ open up ↑ | 
  
 116  116  static void     apply_all_netstacks(int, applyfn_t *);
 117  117  static void     apply_all_modules(netstack_t *, applyfn_t *);
 118  118  static void     apply_all_modules_reverse(netstack_t *, applyfn_t *);
 119  119  static boolean_t netstack_apply_create(kmutex_t *, netstack_t *, int);
 120  120  static boolean_t netstack_apply_shutdown(kmutex_t *, netstack_t *, int);
 121  121  static boolean_t netstack_apply_destroy(kmutex_t *, netstack_t *, int);
 122  122  static boolean_t wait_for_zone_creator(netstack_t *, kmutex_t *);
 123  123  static boolean_t wait_for_nms_inprogress(netstack_t *, nm_state_t *,
 124  124      kmutex_t *);
 125  125  
      126 +static void netstack_hold_locked(netstack_t *);
      127 +
 126  128  static ksema_t netstack_reap_limiter;
 127  129  /*
 128  130   * Hard-coded constant, but since this is not tunable in real-time, it seems
 129  131   * making it an /etc/system tunable is better than nothing.
 130  132   */
 131  133  uint_t netstack_outstanding_reaps = 1024;
 132  134  
 133  135  void
 134  136  netstack_init(void)
 135  137  {
 136  138          mutex_init(&netstack_g_lock, NULL, MUTEX_DEFAULT, NULL);
 137  139          mutex_init(&netstack_shared_lock, NULL, MUTEX_DEFAULT, NULL);
 138  140  
 139  141          sema_init(&netstack_reap_limiter, netstack_outstanding_reaps, NULL,
 140  142              SEMA_DRIVER, NULL);
 141  143  
 142  144          netstack_initialized = 1;
 143  145  
 144  146          /*
 145  147           * We want to be informed each time a zone is created or
 146  148           * destroyed in the kernel, so we can maintain the
 147  149           * stack instance information.
 148  150           */
 149  151          zone_key_create(&netstack_zone_key, netstack_zone_create,
 150  152              netstack_zone_shutdown, netstack_zone_destroy);
 151  153  }
 152  154  
 153  155  /*
 154  156   * Register a new module with the framework.
 155  157   * This registers interest in changes to the set of netstacks.
 156  158   * The createfn and destroyfn are required, but the shutdownfn can be
 157  159   * NULL.
 158  160   * Note that due to the current zsd implementation, when the create
 159  161   * function is called the zone isn't fully present, thus functions
 160  162   * like zone_find_by_* will fail, hence the create function can not
 161  163   * use many zones kernel functions including zcmn_err().
 162  164   */
 163  165  void
 164  166  netstack_register(int moduleid,
 165  167      void *(*module_create)(netstackid_t, netstack_t *),
 166  168      void (*module_shutdown)(netstackid_t, void *),
 167  169      void (*module_destroy)(netstackid_t, void *))
 168  170  {
 169  171          netstack_t *ns;
 170  172  
 171  173          ASSERT(netstack_initialized);
 172  174          ASSERT(moduleid >= 0 && moduleid < NS_MAX);
 173  175          ASSERT(module_create != NULL);
 174  176  
 175  177          /*
 176  178           * Make instances created after this point in time run the create
 177  179           * callback.
 178  180           */
 179  181          mutex_enter(&netstack_g_lock);
 180  182          ASSERT(ns_reg[moduleid].nr_create == NULL);
 181  183          ASSERT(ns_reg[moduleid].nr_flags == 0);
 182  184          ns_reg[moduleid].nr_create = module_create;
 183  185          ns_reg[moduleid].nr_shutdown = module_shutdown;
 184  186          ns_reg[moduleid].nr_destroy = module_destroy;
 185  187          ns_reg[moduleid].nr_flags = NRF_REGISTERED;
 186  188  
 187  189          /*
 188  190           * Determine the set of stacks that exist before we drop the lock.
 189  191           * Set NSS_CREATE_NEEDED for each of those.
 190  192           * netstacks which have been deleted will have NSS_CREATE_COMPLETED
 191  193           * set, but check NSF_CLOSING to be sure.
 192  194           */
 193  195          for (ns = netstack_head; ns != NULL; ns = ns->netstack_next) {
 194  196                  nm_state_t *nms = &ns->netstack_m_state[moduleid];
 195  197  
 196  198                  mutex_enter(&ns->netstack_lock);
 197  199                  if (!(ns->netstack_flags & NSF_CLOSING) &&
 198  200                      (nms->nms_flags & NSS_CREATE_ALL) == 0) {
 199  201                          nms->nms_flags |= NSS_CREATE_NEEDED;
 200  202                          DTRACE_PROBE2(netstack__create__needed,
 201  203                              netstack_t *, ns, int, moduleid);
 202  204                  }
 203  205                  mutex_exit(&ns->netstack_lock);
 204  206          }
 205  207          mutex_exit(&netstack_g_lock);
 206  208  
 207  209          /*
 208  210           * At this point in time a new instance can be created or an instance
 209  211           * can be destroyed, or some other module can register or unregister.
 210  212           * Make sure we either run all the create functions for this moduleid
 211  213           * or we wait for any other creators for this moduleid.
 212  214           */
 213  215          apply_all_netstacks(moduleid, netstack_apply_create);
 214  216  }
 215  217  
 216  218  void
 217  219  netstack_unregister(int moduleid)
 218  220  {
 219  221          netstack_t *ns;
 220  222  
 221  223          ASSERT(moduleid >= 0 && moduleid < NS_MAX);
 222  224  
 223  225          ASSERT(ns_reg[moduleid].nr_create != NULL);
 224  226          ASSERT(ns_reg[moduleid].nr_flags & NRF_REGISTERED);
 225  227  
 226  228          mutex_enter(&netstack_g_lock);
 227  229          /*
 228  230           * Determine the set of stacks that exist before we drop the lock.
 229  231           * Set NSS_SHUTDOWN_NEEDED and NSS_DESTROY_NEEDED for each of those.
 230  232           * That ensures that when we return all the callbacks for existing
 231  233           * instances have completed. And since we set NRF_DYING no new
 232  234           * instances can use this module.
 233  235           */
 234  236          for (ns = netstack_head; ns != NULL; ns = ns->netstack_next) {
 235  237                  boolean_t created = B_FALSE;
 236  238                  nm_state_t *nms = &ns->netstack_m_state[moduleid];
 237  239  
 238  240                  mutex_enter(&ns->netstack_lock);
 239  241  
 240  242                  /*
 241  243                   * We need to be careful here. We could actually have a netstack
 242  244                   * being created as we speak waiting for us to let go of this
 243  245                   * lock to proceed. It may have set NSS_CREATE_NEEDED, but not
 244  246                   * have gotten to the point of completing it yet. If
 245  247                   * NSS_CREATE_NEEDED, we can safely just remove it here and
 246  248                   * never create the module. However, if NSS_CREATE_INPROGRESS is
 247  249                   * set, we need to still flag this module for shutdown and
 248  250                   * deletion, just as though it had reached NSS_CREATE_COMPLETED.
 249  251                   *
 250  252                   * It is safe to do that because of two different guarantees
 251  253                   * that exist in the system. The first is that before we do a
 252  254                   * create, shutdown, or destroy, we ensure that nothing else is
 253  255                   * in progress in the system for this netstack and wait for it
 254  256                   * to complete. Secondly, because the zone is being created, we
 255  257                   * know that the following call to apply_all_netstack will block
 256  258                   * on the zone finishing its initialization.
 257  259                   */
 258  260                  if (nms->nms_flags & NSS_CREATE_NEEDED)
 259  261                          nms->nms_flags &= ~NSS_CREATE_NEEDED;
 260  262  
 261  263                  if (nms->nms_flags & NSS_CREATE_INPROGRESS ||
 262  264                      nms->nms_flags & NSS_CREATE_COMPLETED)
 263  265                          created = B_TRUE;
 264  266  
 265  267                  if (ns_reg[moduleid].nr_shutdown != NULL && created &&
 266  268                      (nms->nms_flags & NSS_CREATE_COMPLETED) &&
 267  269                      (nms->nms_flags & NSS_SHUTDOWN_ALL) == 0) {
 268  270                          nms->nms_flags |= NSS_SHUTDOWN_NEEDED;
 269  271                          DTRACE_PROBE2(netstack__shutdown__needed,
 270  272                              netstack_t *, ns, int, moduleid);
 271  273                  }
 272  274                  if ((ns_reg[moduleid].nr_flags & NRF_REGISTERED) &&
 273  275                      ns_reg[moduleid].nr_destroy != NULL && created &&
 274  276                      (nms->nms_flags & NSS_DESTROY_ALL) == 0) {
 275  277                          nms->nms_flags |= NSS_DESTROY_NEEDED;
 276  278                          DTRACE_PROBE2(netstack__destroy__needed,
 277  279                              netstack_t *, ns, int, moduleid);
 278  280                  }
 279  281                  mutex_exit(&ns->netstack_lock);
 280  282          }
 281  283          /*
 282  284           * Prevent any new netstack from calling the registered create
 283  285           * function, while keeping the function pointers in place until the
 284  286           * shutdown and destroy callbacks are complete.
 285  287           */
 286  288          ns_reg[moduleid].nr_flags |= NRF_DYING;
 287  289          mutex_exit(&netstack_g_lock);
 288  290  
 289  291          apply_all_netstacks(moduleid, netstack_apply_shutdown);
 290  292          apply_all_netstacks(moduleid, netstack_apply_destroy);
 291  293  
 292  294          /*
 293  295           * Clear the nms_flags so that we can handle this module
 294  296           * being loaded again.
 295  297           * Also remove the registered functions.
 296  298           */
 297  299          mutex_enter(&netstack_g_lock);
 298  300          ASSERT(ns_reg[moduleid].nr_flags & NRF_REGISTERED);
 299  301          ASSERT(ns_reg[moduleid].nr_flags & NRF_DYING);
 300  302          for (ns = netstack_head; ns != NULL; ns = ns->netstack_next) {
 301  303                  nm_state_t *nms = &ns->netstack_m_state[moduleid];
 302  304  
 303  305                  mutex_enter(&ns->netstack_lock);
 304  306                  if (nms->nms_flags & NSS_DESTROY_COMPLETED) {
 305  307                          nms->nms_flags = 0;
 306  308                          DTRACE_PROBE2(netstack__destroy__done,
 307  309                              netstack_t *, ns, int, moduleid);
 308  310                  }
 309  311                  mutex_exit(&ns->netstack_lock);
 310  312          }
 311  313  
 312  314          ns_reg[moduleid].nr_create = NULL;
 313  315          ns_reg[moduleid].nr_shutdown = NULL;
 314  316          ns_reg[moduleid].nr_destroy = NULL;
 315  317          ns_reg[moduleid].nr_flags = 0;
 316  318          mutex_exit(&netstack_g_lock);
 317  319  }
 318  320  
 319  321  /*
 320  322   * Lookup and/or allocate a netstack for this zone.
 321  323   */
 322  324  static void *
 323  325  netstack_zone_create(zoneid_t zoneid)
 324  326  {
 325  327          netstackid_t stackid;
 326  328          netstack_t *ns;
 327  329          netstack_t **nsp;
 328  330          zone_t  *zone;
 329  331          int i;
 330  332  
 331  333          ASSERT(netstack_initialized);
 332  334  
 333  335          zone = zone_find_by_id_nolock(zoneid);
 334  336          ASSERT(zone != NULL);
 335  337  
 336  338          if (zone->zone_flags & ZF_NET_EXCL) {
 337  339                  stackid = zoneid;
 338  340          } else {
 339  341                  /* Look for the stack instance for the global */
 340  342                  stackid = GLOBAL_NETSTACKID;
 341  343          }
 342  344  
 343  345          /* Allocate even if it isn't needed; simplifies locking */
 344  346          ns = (netstack_t *)kmem_zalloc(sizeof (netstack_t), KM_SLEEP);
 345  347  
 346  348          /* Look if there is a matching stack instance */
 347  349          mutex_enter(&netstack_g_lock);
 348  350          for (nsp = &netstack_head; *nsp != NULL;
 349  351              nsp = &((*nsp)->netstack_next)) {
 350  352                  if ((*nsp)->netstack_stackid == stackid) {
 351  353                          /*
 352  354                           * Should never find a pre-existing exclusive stack
 353  355                           */
 354  356                          VERIFY(stackid == GLOBAL_NETSTACKID);
 355  357                          kmem_free(ns, sizeof (netstack_t));
 356  358                          ns = *nsp;
 357  359                          mutex_enter(&ns->netstack_lock);
 358  360                          ns->netstack_numzones++;
 359  361                          mutex_exit(&ns->netstack_lock);
 360  362                          mutex_exit(&netstack_g_lock);
 361  363                          DTRACE_PROBE1(netstack__inc__numzones,
 362  364                              netstack_t *, ns);
 363  365                          /* Record that we have a new shared stack zone */
 364  366                          netstack_shared_zone_add(zoneid);
 365  367                          zone->zone_netstack = ns;
 366  368                          return (ns);
 367  369                  }
 368  370          }
 369  371          /* Not found */
 370  372          mutex_init(&ns->netstack_lock, NULL, MUTEX_DEFAULT, NULL);
 371  373          cv_init(&ns->netstack_cv, NULL, CV_DEFAULT, NULL);
 372  374          ns->netstack_stackid = zoneid;
 373  375          ns->netstack_numzones = 1;
 374  376          ns->netstack_refcnt = 1; /* Decremented by netstack_zone_destroy */
 375  377          ns->netstack_flags = NSF_UNINIT;
 376  378          *nsp = ns;
 377  379          zone->zone_netstack = ns;
 378  380  
 379  381          mutex_enter(&ns->netstack_lock);
 380  382          /*
 381  383           * Mark this netstack as having a CREATE running so
 382  384           * any netstack_register/netstack_unregister waits for
 383  385           * the existing create callbacks to complete in moduleid order
 384  386           */
 385  387          ns->netstack_flags |= NSF_ZONE_CREATE;
 386  388  
 387  389          /*
 388  390           * Determine the set of module create functions that need to be
 389  391           * called before we drop the lock.
 390  392           * Set NSS_CREATE_NEEDED for each of those.
 391  393           * Skip any with NRF_DYING set, since those are in the process of
 392  394           * going away, by checking for flags being exactly NRF_REGISTERED.
 393  395           */
 394  396          for (i = 0; i < NS_MAX; i++) {
 395  397                  nm_state_t *nms = &ns->netstack_m_state[i];
 396  398  
 397  399                  cv_init(&nms->nms_cv, NULL, CV_DEFAULT, NULL);
 398  400  
 399  401                  if ((ns_reg[i].nr_flags == NRF_REGISTERED) &&
 400  402                      (nms->nms_flags & NSS_CREATE_ALL) == 0) {
 401  403                          nms->nms_flags |= NSS_CREATE_NEEDED;
 402  404                          DTRACE_PROBE2(netstack__create__needed,
 403  405                              netstack_t *, ns, int, i);
 404  406                  }
 405  407          }
 406  408          mutex_exit(&ns->netstack_lock);
 407  409          mutex_exit(&netstack_g_lock);
 408  410  
 409  411          apply_all_modules(ns, netstack_apply_create);
 410  412  
 411  413          /* Tell any waiting netstack_register/netstack_unregister to proceed */
 412  414          mutex_enter(&ns->netstack_lock);
 413  415          ns->netstack_flags &= ~NSF_UNINIT;
 414  416          ASSERT(ns->netstack_flags & NSF_ZONE_CREATE);
 415  417          ns->netstack_flags &= ~NSF_ZONE_CREATE;
 416  418          cv_broadcast(&ns->netstack_cv);
 417  419          mutex_exit(&ns->netstack_lock);
 418  420  
 419  421          return (ns);
 420  422  }
 421  423  
 422  424  /* ARGSUSED */
 423  425  static void
 424  426  netstack_zone_shutdown(zoneid_t zoneid, void *arg)
 425  427  {
 426  428          netstack_t *ns = (netstack_t *)arg;
 427  429          int i;
 428  430  
 429  431          ASSERT(arg != NULL);
 430  432  
 431  433          mutex_enter(&ns->netstack_lock);
 432  434          ASSERT(ns->netstack_numzones > 0);
 433  435          if (ns->netstack_numzones != 1) {
 434  436                  /* Stack instance being used by other zone */
 435  437                  mutex_exit(&ns->netstack_lock);
 436  438                  ASSERT(ns->netstack_stackid == GLOBAL_NETSTACKID);
 437  439                  return;
 438  440          }
 439  441          mutex_exit(&ns->netstack_lock);
 440  442  
 441  443          mutex_enter(&netstack_g_lock);
 442  444          mutex_enter(&ns->netstack_lock);
 443  445          /*
 444  446           * Mark this netstack as having a SHUTDOWN running so
 445  447           * any netstack_register/netstack_unregister waits for
 446  448           * the existing create callbacks to complete in moduleid order
 447  449           */
 448  450          ASSERT(!(ns->netstack_flags & NSF_ZONE_INPROGRESS));
 449  451          ns->netstack_flags |= NSF_ZONE_SHUTDOWN;
 450  452  
 451  453          /*
 452  454           * Determine the set of stacks that exist before we drop the lock.
 453  455           * Set NSS_SHUTDOWN_NEEDED for each of those.
 454  456           */
 455  457          for (i = 0; i < NS_MAX; i++) {
 456  458                  nm_state_t *nms = &ns->netstack_m_state[i];
 457  459  
 458  460                  if ((ns_reg[i].nr_flags & NRF_REGISTERED) &&
 459  461                      ns_reg[i].nr_shutdown != NULL &&
 460  462                      (nms->nms_flags & NSS_CREATE_COMPLETED) &&
 461  463                      (nms->nms_flags & NSS_SHUTDOWN_ALL) == 0) {
 462  464                          nms->nms_flags |= NSS_SHUTDOWN_NEEDED;
 463  465                          DTRACE_PROBE2(netstack__shutdown__needed,
 464  466                              netstack_t *, ns, int, i);
 465  467                  }
 466  468          }
 467  469          mutex_exit(&ns->netstack_lock);
 468  470          mutex_exit(&netstack_g_lock);
 469  471  
 470  472          /*
 471  473           * Call the shutdown function for all registered modules for this
 472  474           * netstack.
 473  475           */
 474  476          apply_all_modules_reverse(ns, netstack_apply_shutdown);
 475  477  
 476  478          /* Tell any waiting netstack_register/netstack_unregister to proceed */
 477  479          mutex_enter(&ns->netstack_lock);
 478  480          ASSERT(ns->netstack_flags & NSF_ZONE_SHUTDOWN);
 479  481          ns->netstack_flags &= ~NSF_ZONE_SHUTDOWN;
 480  482          cv_broadcast(&ns->netstack_cv);
 481  483          mutex_exit(&ns->netstack_lock);
 482  484  }
 483  485  
 484  486  /*
 485  487   * Common routine to release a zone.
 486  488   * If this was the last zone using the stack instance then prepare to
 487  489   * have the refcnt dropping to zero free the zone.
 488  490   */
 489  491  /* ARGSUSED */
 490  492  static void
 491  493  netstack_zone_destroy(zoneid_t zoneid, void *arg)
 492  494  {
 493  495          netstack_t *ns = (netstack_t *)arg;
 494  496  
 495  497          ASSERT(arg != NULL);
 496  498  
 497  499          mutex_enter(&ns->netstack_lock);
 498  500          ASSERT(ns->netstack_numzones > 0);
 499  501          ns->netstack_numzones--;
 500  502          if (ns->netstack_numzones != 0) {
 501  503                  /* Stack instance being used by other zone */
 502  504                  mutex_exit(&ns->netstack_lock);
 503  505                  ASSERT(ns->netstack_stackid == GLOBAL_NETSTACKID);
 504  506                  /* Record that we a shared stack zone has gone away */
 505  507                  netstack_shared_zone_remove(zoneid);
 506  508                  return;
 507  509          }
 508  510          /*
 509  511           * Set CLOSING so that netstack_find_by will not find it.
 510  512           */
 511  513          ns->netstack_flags |= NSF_CLOSING;
 512  514          mutex_exit(&ns->netstack_lock);
 513  515          DTRACE_PROBE1(netstack__dec__numzones, netstack_t *, ns);
 514  516          /* No other thread can call zone_destroy for this stack */
 515  517  
 516  518          /*
 517  519           * Decrease refcnt to account for the one in netstack_zone_init()
 518  520           */
 519  521          netstack_rele(ns);
 520  522  }
 521  523  
 522  524  /*
 523  525   * Called when the reference count drops to zero.
 524  526   * Call the destroy functions for each registered module.
 525  527   */
 526  528  static void
 527  529  netstack_stack_inactive(netstack_t *ns)
 528  530  {
 529  531          int i;
 530  532  
 531  533          mutex_enter(&netstack_g_lock);
 532  534          mutex_enter(&ns->netstack_lock);
 533  535          /*
 534  536           * Mark this netstack as having a DESTROY running so
 535  537           * any netstack_register/netstack_unregister waits for
 536  538           * the existing destroy callbacks to complete in reverse moduleid order
 537  539           */
 538  540          ASSERT(!(ns->netstack_flags & NSF_ZONE_INPROGRESS));
 539  541          ns->netstack_flags |= NSF_ZONE_DESTROY;
 540  542          /*
 541  543           * If the shutdown callback wasn't called earlier (e.g., if this is
 542  544           * a netstack shared between multiple zones), then we schedule it now.
 543  545           *
 544  546           * Determine the set of stacks that exist before we drop the lock.
 545  547           * Set NSS_DESTROY_NEEDED for each of those. That
 546  548           * ensures that when we return all the callbacks for existing
 547  549           * instances have completed.
 548  550           */
 549  551          for (i = 0; i < NS_MAX; i++) {
 550  552                  nm_state_t *nms = &ns->netstack_m_state[i];
 551  553  
 552  554                  if ((ns_reg[i].nr_flags & NRF_REGISTERED) &&
 553  555                      ns_reg[i].nr_shutdown != NULL &&
 554  556                      (nms->nms_flags & NSS_CREATE_COMPLETED) &&
 555  557                      (nms->nms_flags & NSS_SHUTDOWN_ALL) == 0) {
 556  558                          nms->nms_flags |= NSS_SHUTDOWN_NEEDED;
 557  559                          DTRACE_PROBE2(netstack__shutdown__needed,
 558  560                              netstack_t *, ns, int, i);
 559  561                  }
 560  562  
 561  563                  if ((ns_reg[i].nr_flags & NRF_REGISTERED) &&
 562  564                      ns_reg[i].nr_destroy != NULL &&
 563  565                      (nms->nms_flags & NSS_CREATE_COMPLETED) &&
 564  566                      (nms->nms_flags & NSS_DESTROY_ALL) == 0) {
 565  567                          nms->nms_flags |= NSS_DESTROY_NEEDED;
 566  568                          DTRACE_PROBE2(netstack__destroy__needed,
 567  569                              netstack_t *, ns, int, i);
 568  570                  }
 569  571          }
 570  572          mutex_exit(&ns->netstack_lock);
 571  573          mutex_exit(&netstack_g_lock);
 572  574  
 573  575          /*
 574  576           * Call the shutdown and destroy functions for all registered modules
 575  577           * for this netstack.
 576  578           *
 577  579           * Since there are some ordering dependencies between the modules we
 578  580           * tear them down in the reverse order of what was used to create them.
 579  581           *
 580  582           * Since a netstack_t is never reused (when a zone is rebooted it gets
 581  583           * a new zoneid == netstackid i.e. a new netstack_t is allocated) we
 582  584           * leave nms_flags the way it is i.e. with NSS_DESTROY_COMPLETED set.
 583  585           * That is different than in the netstack_unregister() case.
 584  586           */
 585  587          apply_all_modules_reverse(ns, netstack_apply_shutdown);
 586  588          apply_all_modules_reverse(ns, netstack_apply_destroy);
 587  589  
 588  590          /* Tell any waiting netstack_register/netstack_unregister to proceed */
 589  591          mutex_enter(&ns->netstack_lock);
 590  592          ASSERT(ns->netstack_flags & NSF_ZONE_DESTROY);
 591  593          ns->netstack_flags &= ~NSF_ZONE_DESTROY;
 592  594          cv_broadcast(&ns->netstack_cv);
 593  595          mutex_exit(&ns->netstack_lock);
 594  596  }
 595  597  
 596  598  /*
 597  599   * Apply a function to all netstacks for a particular moduleid.
 598  600   *
 599  601   * If there is any zone activity (due to a zone being created, shutdown,
 600  602   * or destroyed) we wait for that to complete before we proceed. This ensures
 601  603   * that the moduleids are processed in order when a zone is created or
 602  604   * destroyed.
 603  605   *
 604  606   * The applyfn has to drop netstack_g_lock if it does some work.
 605  607   * In that case we don't follow netstack_next,
 606  608   * even if it is possible to do so without any hazards. This is
 607  609   * because we want the design to allow for the list of netstacks threaded
 608  610   * by netstack_next to change in any arbitrary way during the time the
 609  611   * lock was dropped.
 610  612   *
 611  613   * It is safe to restart the loop at netstack_head since the applyfn
 612  614   * changes netstack_m_state as it processes things, so a subsequent
 613  615   * pass through will have no effect in applyfn, hence the loop will terminate
 614  616   * in at worst O(N^2).
 615  617   */
 616  618  static void
 617  619  apply_all_netstacks(int moduleid, applyfn_t *applyfn)
 618  620  {
 619  621          netstack_t *ns;
 620  622  
 621  623          mutex_enter(&netstack_g_lock);
 622  624          ns = netstack_head;
 623  625          while (ns != NULL) {
 624  626                  if (wait_for_zone_creator(ns, &netstack_g_lock)) {
 625  627                          /* Lock dropped - restart at head */
 626  628                          ns = netstack_head;
 627  629                  } else if ((applyfn)(&netstack_g_lock, ns, moduleid)) {
 628  630                          /* Lock dropped - restart at head */
 629  631                          ns = netstack_head;
 630  632                  } else {
 631  633                          ns = ns->netstack_next;
 632  634                  }
 633  635          }
 634  636          mutex_exit(&netstack_g_lock);
 635  637  }
 636  638  
 637  639  /*
 638  640   * Apply a function to all moduleids for a particular netstack.
 639  641   *
 640  642   * Since the netstack linkage doesn't matter in this case we can
 641  643   * ignore whether the function drops the lock.
 642  644   */
 643  645  static void
 644  646  apply_all_modules(netstack_t *ns, applyfn_t *applyfn)
 645  647  {
 646  648          int i;
 647  649  
 648  650          mutex_enter(&netstack_g_lock);
 649  651          for (i = 0; i < NS_MAX; i++) {
 650  652                  /*
 651  653                   * We don't care whether the lock was dropped
 652  654                   * since we are not iterating over netstack_head.
 653  655                   */
 654  656                  (void) (applyfn)(&netstack_g_lock, ns, i);
 655  657          }
 656  658          mutex_exit(&netstack_g_lock);
 657  659  }
 658  660  
 659  661  /* Like the above but in reverse moduleid order */
 660  662  static void
 661  663  apply_all_modules_reverse(netstack_t *ns, applyfn_t *applyfn)
 662  664  {
 663  665          int i;
 664  666  
 665  667          mutex_enter(&netstack_g_lock);
 666  668          for (i = NS_MAX-1; i >= 0; i--) {
 667  669                  /*
 668  670                   * We don't care whether the lock was dropped
 669  671                   * since we are not iterating over netstack_head.
 670  672                   */
 671  673                  (void) (applyfn)(&netstack_g_lock, ns, i);
 672  674          }
 673  675          mutex_exit(&netstack_g_lock);
 674  676  }
 675  677  
 676  678  /*
 677  679   * Call the create function for the ns and moduleid if CREATE_NEEDED
 678  680   * is set.
 679  681   * If some other thread gets here first and sets *_INPROGRESS, then
 680  682   * we wait for that thread to complete so that we can ensure that
 681  683   * all the callbacks are done when we've looped over all netstacks/moduleids.
 682  684   *
 683  685   * When we call the create function, we temporarily drop the netstack_lock
 684  686   * held by the caller, and return true to tell the caller it needs to
 685  687   * re-evalute the state.
 686  688   */
 687  689  static boolean_t
 688  690  netstack_apply_create(kmutex_t *lockp, netstack_t *ns, int moduleid)
 689  691  {
 690  692          void *result;
 691  693          netstackid_t stackid;
 692  694          nm_state_t *nms = &ns->netstack_m_state[moduleid];
 693  695          boolean_t dropped = B_FALSE;
 694  696  
 695  697          ASSERT(MUTEX_HELD(lockp));
 696  698          mutex_enter(&ns->netstack_lock);
 697  699  
 698  700          if (wait_for_nms_inprogress(ns, nms, lockp))
 699  701                  dropped = B_TRUE;
 700  702  
 701  703          if (nms->nms_flags & NSS_CREATE_NEEDED) {
 702  704                  nms->nms_flags &= ~NSS_CREATE_NEEDED;
 703  705                  nms->nms_flags |= NSS_CREATE_INPROGRESS;
 704  706                  DTRACE_PROBE2(netstack__create__inprogress,
 705  707                      netstack_t *, ns, int, moduleid);
 706  708                  mutex_exit(&ns->netstack_lock);
 707  709                  mutex_exit(lockp);
 708  710                  dropped = B_TRUE;
 709  711  
 710  712                  ASSERT(ns_reg[moduleid].nr_create != NULL);
 711  713                  stackid = ns->netstack_stackid;
 712  714                  DTRACE_PROBE2(netstack__create__start,
 713  715                      netstackid_t, stackid,
 714  716                      netstack_t *, ns);
 715  717                  result = (ns_reg[moduleid].nr_create)(stackid, ns);
 716  718                  DTRACE_PROBE2(netstack__create__end,
 717  719                      void *, result, netstack_t *, ns);
 718  720  
 719  721                  ASSERT(result != NULL);
 720  722                  mutex_enter(lockp);
 721  723                  mutex_enter(&ns->netstack_lock);
 722  724                  ns->netstack_modules[moduleid] = result;
 723  725                  nms->nms_flags &= ~NSS_CREATE_INPROGRESS;
 724  726                  nms->nms_flags |= NSS_CREATE_COMPLETED;
 725  727                  cv_broadcast(&nms->nms_cv);
 726  728                  DTRACE_PROBE2(netstack__create__completed,
 727  729                      netstack_t *, ns, int, moduleid);
 728  730                  mutex_exit(&ns->netstack_lock);
 729  731                  return (dropped);
 730  732          } else {
 731  733                  mutex_exit(&ns->netstack_lock);
 732  734                  return (dropped);
 733  735          }
 734  736  }
 735  737  
 736  738  /*
 737  739   * Call the shutdown function for the ns and moduleid if SHUTDOWN_NEEDED
 738  740   * is set.
 739  741   * If some other thread gets here first and sets *_INPROGRESS, then
 740  742   * we wait for that thread to complete so that we can ensure that
 741  743   * all the callbacks are done when we've looped over all netstacks/moduleids.
 742  744   *
 743  745   * When we call the shutdown function, we temporarily drop the netstack_lock
 744  746   * held by the caller, and return true to tell the caller it needs to
 745  747   * re-evalute the state.
 746  748   */
 747  749  static boolean_t
 748  750  netstack_apply_shutdown(kmutex_t *lockp, netstack_t *ns, int moduleid)
 749  751  {
 750  752          netstackid_t stackid;
 751  753          void * netstack_module;
 752  754          nm_state_t *nms = &ns->netstack_m_state[moduleid];
 753  755          boolean_t dropped = B_FALSE;
 754  756  
 755  757          ASSERT(MUTEX_HELD(lockp));
 756  758          mutex_enter(&ns->netstack_lock);
 757  759  
 758  760          if (wait_for_nms_inprogress(ns, nms, lockp))
 759  761                  dropped = B_TRUE;
 760  762  
 761  763          if (nms->nms_flags & NSS_SHUTDOWN_NEEDED) {
 762  764                  nms->nms_flags &= ~NSS_SHUTDOWN_NEEDED;
 763  765                  nms->nms_flags |= NSS_SHUTDOWN_INPROGRESS;
 764  766                  DTRACE_PROBE2(netstack__shutdown__inprogress,
 765  767                      netstack_t *, ns, int, moduleid);
 766  768                  mutex_exit(&ns->netstack_lock);
 767  769                  mutex_exit(lockp);
 768  770                  dropped = B_TRUE;
 769  771  
 770  772                  ASSERT(ns_reg[moduleid].nr_shutdown != NULL);
 771  773                  stackid = ns->netstack_stackid;
 772  774                  netstack_module = ns->netstack_modules[moduleid];
 773  775                  DTRACE_PROBE2(netstack__shutdown__start,
 774  776                      netstackid_t, stackid,
 775  777                      void *, netstack_module);
 776  778                  (ns_reg[moduleid].nr_shutdown)(stackid, netstack_module);
 777  779                  DTRACE_PROBE1(netstack__shutdown__end,
 778  780                      netstack_t *, ns);
 779  781  
 780  782                  mutex_enter(lockp);
 781  783                  mutex_enter(&ns->netstack_lock);
 782  784                  nms->nms_flags &= ~NSS_SHUTDOWN_INPROGRESS;
 783  785                  nms->nms_flags |= NSS_SHUTDOWN_COMPLETED;
 784  786                  cv_broadcast(&nms->nms_cv);
 785  787                  DTRACE_PROBE2(netstack__shutdown__completed,
 786  788                      netstack_t *, ns, int, moduleid);
 787  789                  mutex_exit(&ns->netstack_lock);
 788  790                  return (dropped);
 789  791          } else {
 790  792                  mutex_exit(&ns->netstack_lock);
 791  793                  return (dropped);
 792  794          }
 793  795  }
 794  796  
 795  797  /*
 796  798   * Call the destroy function for the ns and moduleid if DESTROY_NEEDED
 797  799   * is set.
 798  800   * If some other thread gets here first and sets *_INPROGRESS, then
 799  801   * we wait for that thread to complete so that we can ensure that
 800  802   * all the callbacks are done when we've looped over all netstacks/moduleids.
 801  803   *
 802  804   * When we call the destroy function, we temporarily drop the netstack_lock
 803  805   * held by the caller, and return true to tell the caller it needs to
 804  806   * re-evalute the state.
 805  807   */
 806  808  static boolean_t
 807  809  netstack_apply_destroy(kmutex_t *lockp, netstack_t *ns, int moduleid)
 808  810  {
 809  811          netstackid_t stackid;
 810  812          void * netstack_module;
 811  813          nm_state_t *nms = &ns->netstack_m_state[moduleid];
 812  814          boolean_t dropped = B_FALSE;
 813  815  
 814  816          ASSERT(MUTEX_HELD(lockp));
 815  817          mutex_enter(&ns->netstack_lock);
 816  818  
 817  819          if (wait_for_nms_inprogress(ns, nms, lockp))
 818  820                  dropped = B_TRUE;
 819  821  
 820  822          if (nms->nms_flags & NSS_DESTROY_NEEDED) {
 821  823                  nms->nms_flags &= ~NSS_DESTROY_NEEDED;
 822  824                  nms->nms_flags |= NSS_DESTROY_INPROGRESS;
 823  825                  DTRACE_PROBE2(netstack__destroy__inprogress,
 824  826                      netstack_t *, ns, int, moduleid);
 825  827                  mutex_exit(&ns->netstack_lock);
 826  828                  mutex_exit(lockp);
 827  829                  dropped = B_TRUE;
 828  830  
 829  831                  ASSERT(ns_reg[moduleid].nr_destroy != NULL);
 830  832                  stackid = ns->netstack_stackid;
 831  833                  netstack_module = ns->netstack_modules[moduleid];
 832  834                  DTRACE_PROBE2(netstack__destroy__start,
 833  835                      netstackid_t, stackid,
 834  836                      void *, netstack_module);
 835  837                  (ns_reg[moduleid].nr_destroy)(stackid, netstack_module);
 836  838                  DTRACE_PROBE1(netstack__destroy__end,
 837  839                      netstack_t *, ns);
 838  840  
 839  841                  mutex_enter(lockp);
 840  842                  mutex_enter(&ns->netstack_lock);
 841  843                  ns->netstack_modules[moduleid] = NULL;
 842  844                  nms->nms_flags &= ~NSS_DESTROY_INPROGRESS;
 843  845                  nms->nms_flags |= NSS_DESTROY_COMPLETED;
 844  846                  cv_broadcast(&nms->nms_cv);
 845  847                  DTRACE_PROBE2(netstack__destroy__completed,
 846  848                      netstack_t *, ns, int, moduleid);
 847  849                  mutex_exit(&ns->netstack_lock);
 848  850                  return (dropped);
 849  851          } else {
 850  852                  mutex_exit(&ns->netstack_lock);
 851  853                  return (dropped);
 852  854          }
 853  855  }
 854  856  
 855  857  /*
 856  858   * If somebody  is creating the netstack (due to a new zone being created)
 857  859   * then we wait for them to complete. This ensures that any additional
 858  860   * netstack_register() doesn't cause the create functions to run out of
 859  861   * order.
 860  862   * Note that we do not need such a global wait in the case of the shutdown
 861  863   * and destroy callbacks, since in that case it is sufficient for both
 862  864   * threads to set NEEDED and wait for INPROGRESS to ensure ordering.
 863  865   * Returns true if lockp was temporarily dropped while waiting.
 864  866   */
 865  867  static boolean_t
 866  868  wait_for_zone_creator(netstack_t *ns, kmutex_t *lockp)
 867  869  {
 868  870          boolean_t dropped = B_FALSE;
 869  871  
 870  872          mutex_enter(&ns->netstack_lock);
 871  873          while (ns->netstack_flags & NSF_ZONE_CREATE) {
 872  874                  DTRACE_PROBE1(netstack__wait__zone__inprogress,
 873  875                      netstack_t *, ns);
 874  876                  if (lockp != NULL) {
 875  877                          dropped = B_TRUE;
 876  878                          mutex_exit(lockp);
 877  879                  }
 878  880                  cv_wait(&ns->netstack_cv, &ns->netstack_lock);
 879  881                  if (lockp != NULL) {
 880  882                          /* First drop netstack_lock to preserve order */
 881  883                          mutex_exit(&ns->netstack_lock);
 882  884                          mutex_enter(lockp);
 883  885                          mutex_enter(&ns->netstack_lock);
 884  886                  }
 885  887          }
 886  888          mutex_exit(&ns->netstack_lock);
 887  889          return (dropped);
 888  890  }
 889  891  
 890  892  /*
 891  893   * Wait for any INPROGRESS flag to be cleared for the netstack/moduleid
 892  894   * combination.
 893  895   * Returns true if lockp was temporarily dropped while waiting.
 894  896   */
 895  897  static boolean_t
 896  898  wait_for_nms_inprogress(netstack_t *ns, nm_state_t *nms, kmutex_t *lockp)
 897  899  {
 898  900          boolean_t dropped = B_FALSE;
 899  901  
 900  902          while (nms->nms_flags & NSS_ALL_INPROGRESS) {
 901  903                  DTRACE_PROBE2(netstack__wait__nms__inprogress,
 902  904                      netstack_t *, ns, nm_state_t *, nms);
 903  905                  if (lockp != NULL) {
 904  906                          dropped = B_TRUE;
 905  907                          mutex_exit(lockp);
 906  908                  }
 907  909                  cv_wait(&nms->nms_cv, &ns->netstack_lock);
 908  910                  if (lockp != NULL) {
 909  911                          /* First drop netstack_lock to preserve order */
 910  912                          mutex_exit(&ns->netstack_lock);
 911  913                          mutex_enter(lockp);
 912  914                          mutex_enter(&ns->netstack_lock);
 913  915                  }
 914  916          }
 915  917          return (dropped);
 916  918  }
 917  919  
 918  920  /*
 919  921   * Get the stack instance used in caller's zone.
  
    | 
      ↓ open down ↓ | 
    784 lines elided | 
    
      ↑ open up ↑ | 
  
 920  922   * Increases the reference count, caller must do a netstack_rele.
 921  923   * It can't be called after zone_destroy() has started.
 922  924   */
 923  925  netstack_t *
 924  926  netstack_get_current(void)
 925  927  {
 926  928          netstack_t *ns;
 927  929  
 928  930          ns = curproc->p_zone->zone_netstack;
 929  931          ASSERT(ns != NULL);
 930      -        if (ns->netstack_flags & (NSF_UNINIT|NSF_CLOSING))
 931      -                return (NULL);
 932      -
 933      -        netstack_hold(ns);
 934      -
 935      -        return (ns);
      932 +        return (netstack_hold_if_active(ns));
 936  933  }
 937  934  
 938  935  /*
 939  936   * Find a stack instance given the cred.
 940  937   * This is used by the modules to potentially allow for a future when
 941  938   * something other than the zoneid is used to determine the stack.
 942  939   */
 943  940  netstack_t *
 944  941  netstack_find_by_cred(const cred_t *cr)
 945  942  {
 946  943          zoneid_t zoneid = crgetzoneid(cr);
 947  944  
 948  945          /* Handle the case when cr_zone is NULL */
 949  946          if (zoneid == (zoneid_t)-1)
 950  947                  zoneid = GLOBAL_ZONEID;
 951  948  
 952  949          /* For performance ... */
 953  950          if (curproc->p_zone->zone_id == zoneid)
 954  951                  return (netstack_get_current());
 955  952          else
 956  953                  return (netstack_find_by_zoneid(zoneid));
  
    | 
      ↓ open down ↓ | 
    11 lines elided | 
    
      ↑ open up ↑ | 
  
 957  954  }
 958  955  
 959  956  /*
 960  957   * Find a stack instance given the zoneid.
 961  958   * Increases the reference count if found; caller must do a
 962  959   * netstack_rele().
 963  960   *
 964  961   * If there is no exact match then assume the shared stack instance
 965  962   * matches.
 966  963   *
 967      - * Skip the unitialized ones.
      964 + * Skip the uninitialized and closing ones.
 968  965   */
 969  966  netstack_t *
 970  967  netstack_find_by_zoneid(zoneid_t zoneid)
 971  968  {
 972  969          netstack_t *ns;
 973  970          zone_t *zone;
 974  971  
 975  972          zone = zone_find_by_id(zoneid);
 976  973  
 977  974          if (zone == NULL)
 978  975                  return (NULL);
 979  976  
 980      -        ns = zone->zone_netstack;
 981      -        ASSERT(ns != NULL);
 982      -        if (ns->netstack_flags & (NSF_UNINIT|NSF_CLOSING))
 983      -                ns = NULL;
 984      -        else
 985      -                netstack_hold(ns);
      977 +        ASSERT(zone->zone_netstack != NULL);
      978 +        ns = netstack_hold_if_active(zone->zone_netstack);
 986  979  
 987  980          zone_rele(zone);
 988  981          return (ns);
 989  982  }
 990  983  
 991  984  /*
 992  985   * Find a stack instance given the zoneid. Can only be called from
 993  986   * the create callback. See the comments in zone_find_by_id_nolock why
 994  987   * that limitation exists.
 995  988   *
 996  989   * Increases the reference count if found; caller must do a
  
    | 
      ↓ open down ↓ | 
    1 lines elided | 
    
      ↑ open up ↑ | 
  
 997  990   * netstack_rele().
 998  991   *
 999  992   * If there is no exact match then assume the shared stack instance
1000  993   * matches.
1001  994   *
1002  995   * Skip the unitialized ones.
1003  996   */
1004  997  netstack_t *
1005  998  netstack_find_by_zoneid_nolock(zoneid_t zoneid)
1006  999  {
1007      -        netstack_t *ns;
1008 1000          zone_t *zone;
1009 1001  
1010 1002          zone = zone_find_by_id_nolock(zoneid);
1011 1003  
1012 1004          if (zone == NULL)
1013 1005                  return (NULL);
1014 1006  
1015      -        ns = zone->zone_netstack;
1016      -        ASSERT(ns != NULL);
1017      -
1018      -        if (ns->netstack_flags & (NSF_UNINIT|NSF_CLOSING))
1019      -                ns = NULL;
1020      -        else
1021      -                netstack_hold(ns);
1022      -
     1007 +        ASSERT(zone->zone_netstack != NULL);
1023 1008          /* zone_find_by_id_nolock does not have a hold on the zone */
1024      -        return (ns);
     1009 +        return (netstack_hold_if_active(zone->zone_netstack));
1025 1010  }
1026 1011  
1027 1012  /*
1028 1013   * Find a stack instance given the stackid with exact match?
1029 1014   * Increases the reference count if found; caller must do a
1030 1015   * netstack_rele().
1031 1016   *
1032 1017   * Skip the unitialized ones.
1033 1018   */
1034 1019  netstack_t *
1035 1020  netstack_find_by_stackid(netstackid_t stackid)
1036 1021  {
1037 1022          netstack_t *ns;
1038 1023  
1039 1024          mutex_enter(&netstack_g_lock);
1040 1025          for (ns = netstack_head; ns != NULL; ns = ns->netstack_next) {
     1026 +                /* Can't use hold_if_active because of stackid check. */
1041 1027                  mutex_enter(&ns->netstack_lock);
1042 1028                  if (ns->netstack_stackid == stackid &&
1043 1029                      !(ns->netstack_flags & (NSF_UNINIT|NSF_CLOSING))) {
     1030 +                        netstack_hold_locked(ns);
1044 1031                          mutex_exit(&ns->netstack_lock);
1045      -                        netstack_hold(ns);
1046 1032                          mutex_exit(&netstack_g_lock);
1047 1033                          return (ns);
1048 1034                  }
1049 1035                  mutex_exit(&ns->netstack_lock);
1050 1036          }
1051 1037          mutex_exit(&netstack_g_lock);
1052 1038          return (NULL);
1053 1039  }
1054 1040  
1055 1041  boolean_t
1056 1042  netstack_inuse_by_stackid(netstackid_t stackid)
1057 1043  {
1058 1044          netstack_t *ns;
1059 1045          boolean_t rval = B_FALSE;
1060 1046  
1061 1047          mutex_enter(&netstack_g_lock);
1062 1048  
1063 1049          for (ns = netstack_head; ns != NULL; ns = ns->netstack_next) {
1064 1050                  if (ns->netstack_stackid == stackid) {
1065 1051                          rval = B_TRUE;
1066 1052                          break;
1067 1053                  }
1068 1054          }
1069 1055  
1070 1056          mutex_exit(&netstack_g_lock);
1071 1057  
1072 1058          return (rval);
1073 1059  }
1074 1060  
1075 1061  
1076 1062  static void
1077 1063  netstack_reap(void *arg)
1078 1064  {
1079 1065          netstack_t **nsp, *ns = (netstack_t *)arg;
1080 1066          boolean_t found;
1081 1067          int i;
1082 1068  
1083 1069          /*
1084 1070           * Time to call the destroy functions and free up
1085 1071           * the structure
1086 1072           */
1087 1073          netstack_stack_inactive(ns);
1088 1074  
1089 1075          /* Make sure nothing increased the references */
1090 1076          ASSERT(ns->netstack_refcnt == 0);
1091 1077          ASSERT(ns->netstack_numzones == 0);
1092 1078  
1093 1079          /* Finally remove from list of netstacks */
1094 1080          mutex_enter(&netstack_g_lock);
1095 1081          found = B_FALSE;
1096 1082          for (nsp = &netstack_head; *nsp != NULL;
1097 1083              nsp = &(*nsp)->netstack_next) {
1098 1084                  if (*nsp == ns) {
1099 1085                          *nsp = ns->netstack_next;
1100 1086                          ns->netstack_next = NULL;
1101 1087                          found = B_TRUE;
1102 1088                          break;
1103 1089                  }
1104 1090          }
1105 1091          ASSERT(found);
1106 1092          mutex_exit(&netstack_g_lock);
1107 1093  
1108 1094          /* Make sure nothing increased the references */
1109 1095          ASSERT(ns->netstack_refcnt == 0);
1110 1096          ASSERT(ns->netstack_numzones == 0);
1111 1097  
1112 1098          ASSERT(ns->netstack_flags & NSF_CLOSING);
1113 1099  
1114 1100          for (i = 0; i < NS_MAX; i++) {
1115 1101                  nm_state_t *nms = &ns->netstack_m_state[i];
1116 1102  
1117 1103                  cv_destroy(&nms->nms_cv);
1118 1104          }
1119 1105          mutex_destroy(&ns->netstack_lock);
1120 1106          cv_destroy(&ns->netstack_cv);
1121 1107          kmem_free(ns, sizeof (*ns));
1122 1108          /* Allow another reap to be scheduled. */
1123 1109          sema_v(&netstack_reap_limiter);
1124 1110  }
1125 1111  
1126 1112  void
1127 1113  netstack_rele(netstack_t *ns)
1128 1114  {
1129 1115          int refcnt, numzones;
1130 1116  
1131 1117          mutex_enter(&ns->netstack_lock);
1132 1118          ASSERT(ns->netstack_refcnt > 0);
1133 1119          ns->netstack_refcnt--;
1134 1120          /*
1135 1121           * As we drop the lock additional netstack_rele()s can come in
1136 1122           * and decrement the refcnt to zero and free the netstack_t.
1137 1123           * Store pointers in local variables and if we were not the last
1138 1124           * then don't reference the netstack_t after that.
1139 1125           */
1140 1126          refcnt = ns->netstack_refcnt;
1141 1127          numzones = ns->netstack_numzones;
1142 1128          DTRACE_PROBE1(netstack__dec__ref, netstack_t *, ns);
1143 1129          mutex_exit(&ns->netstack_lock);
1144 1130  
1145 1131          if (refcnt == 0 && numzones == 0) {
1146 1132                  /*
1147 1133                   * Because there are possibilities of re-entrancy in various
1148 1134                   * netstack structures by callers, which might cause a lock up
1149 1135                   * due to odd reference models, or other factors, we choose to
1150 1136                   * schedule the actual deletion of this netstack as a deferred
1151 1137                   * task on the system taskq.  This way, any such reference
1152 1138                   * models won't trip over themselves.
1153 1139                   *
1154 1140                   * Assume we aren't in a high-priority interrupt context, so
1155 1141                   * we can use KM_SLEEP and semaphores.
1156 1142                   */
1157 1143                  if (sema_tryp(&netstack_reap_limiter) == 0) {
1158 1144                          /*
1159 1145                           * Indicate we're slamming against a limit.
1160 1146                           */
1161 1147                          hrtime_t measurement = gethrtime();
1162 1148  
1163 1149                          sema_p(&netstack_reap_limiter);
1164 1150                          /* Capture delay in ns. */
  
    | 
      ↓ open down ↓ | 
    109 lines elided | 
    
      ↑ open up ↑ | 
  
1165 1151                          DTRACE_PROBE1(netstack__reap__rate__limited,
1166 1152                              hrtime_t, gethrtime() - measurement);
1167 1153                  }
1168 1154  
1169 1155                  /* TQ_SLEEP should prevent taskq_dispatch() from failing. */
1170 1156                  (void) taskq_dispatch(system_taskq, netstack_reap, ns,
1171 1157                      TQ_SLEEP);
1172 1158          }
1173 1159  }
1174 1160  
     1161 +static void
     1162 +netstack_hold_locked(netstack_t *ns)
     1163 +{
     1164 +        ASSERT(MUTEX_HELD(&ns->netstack_lock));
     1165 +        ns->netstack_refcnt++;
     1166 +        ASSERT(ns->netstack_refcnt > 0);
     1167 +        DTRACE_PROBE1(netstack__inc__ref, netstack_t *, ns);
     1168 +}
     1169 +
     1170 +/*
     1171 + * If the passed-in netstack isn't active (i.e. it's uninitialized or closing),
     1172 + * return NULL, otherwise return it with its reference held.  Common code
     1173 + * for many netstack_find*() functions.
     1174 + */
     1175 +netstack_t *
     1176 +netstack_hold_if_active(netstack_t *ns)
     1177 +{
     1178 +        netstack_t *retval;
     1179 +
     1180 +        mutex_enter(&ns->netstack_lock);
     1181 +        if (ns->netstack_flags & (NSF_UNINIT | NSF_CLOSING)) {
     1182 +                retval = NULL;
     1183 +        } else {
     1184 +                netstack_hold_locked(ns);
     1185 +                retval = ns;
     1186 +        }
     1187 +        mutex_exit(&ns->netstack_lock);
     1188 +
     1189 +        return (retval);
     1190 +}
     1191 +
1175 1192  void
1176 1193  netstack_hold(netstack_t *ns)
1177 1194  {
1178 1195          mutex_enter(&ns->netstack_lock);
1179      -        ns->netstack_refcnt++;
1180      -        ASSERT(ns->netstack_refcnt > 0);
     1196 +        netstack_hold_locked(ns);
1181 1197          mutex_exit(&ns->netstack_lock);
1182      -        DTRACE_PROBE1(netstack__inc__ref, netstack_t *, ns);
1183 1198  }
1184 1199  
1185 1200  /*
1186 1201   * To support kstat_create_netstack() using kstat_zone_add we need
1187 1202   * to track both
1188 1203   *  - all zoneids that use the global/shared stack
1189 1204   *  - all kstats that have been added for the shared stack
1190 1205   */
1191 1206  kstat_t *
1192 1207  kstat_create_netstack(char *ks_module, int ks_instance, char *ks_name,
1193 1208      char *ks_class, uchar_t ks_type, uint_t ks_ndata, uchar_t ks_flags,
1194 1209      netstackid_t ks_netstackid)
1195 1210  {
1196 1211          kstat_t *ks;
1197 1212  
1198 1213          if (ks_netstackid == GLOBAL_NETSTACKID) {
1199 1214                  ks = kstat_create_zone(ks_module, ks_instance, ks_name,
1200 1215                      ks_class, ks_type, ks_ndata, ks_flags, GLOBAL_ZONEID);
1201 1216                  if (ks != NULL)
1202 1217                          netstack_shared_kstat_add(ks);
1203 1218                  return (ks);
1204 1219          } else {
1205 1220                  zoneid_t zoneid = ks_netstackid;
1206 1221  
1207 1222                  return (kstat_create_zone(ks_module, ks_instance, ks_name,
1208 1223                      ks_class, ks_type, ks_ndata, ks_flags, zoneid));
1209 1224          }
1210 1225  }
1211 1226  
1212 1227  void
1213 1228  kstat_delete_netstack(kstat_t *ks, netstackid_t ks_netstackid)
1214 1229  {
1215 1230          if (ks_netstackid == GLOBAL_NETSTACKID) {
1216 1231                  netstack_shared_kstat_remove(ks);
1217 1232          }
1218 1233          kstat_delete(ks);
1219 1234  }
1220 1235  
1221 1236  static void
1222 1237  netstack_shared_zone_add(zoneid_t zoneid)
1223 1238  {
1224 1239          struct shared_zone_list *sz;
1225 1240          struct shared_kstat_list *sk;
1226 1241  
1227 1242          sz = (struct shared_zone_list *)kmem_zalloc(sizeof (*sz), KM_SLEEP);
1228 1243          sz->sz_zoneid = zoneid;
1229 1244  
1230 1245          /* Insert in list */
1231 1246          mutex_enter(&netstack_shared_lock);
1232 1247          sz->sz_next = netstack_shared_zones;
1233 1248          netstack_shared_zones = sz;
1234 1249  
1235 1250          /*
1236 1251           * Perform kstat_zone_add for each existing shared stack kstat.
1237 1252           * Note: Holds netstack_shared_lock lock across kstat_zone_add.
1238 1253           */
1239 1254          for (sk = netstack_shared_kstats; sk != NULL; sk = sk->sk_next) {
1240 1255                  kstat_zone_add(sk->sk_kstat, zoneid);
1241 1256          }
1242 1257          mutex_exit(&netstack_shared_lock);
1243 1258  }
1244 1259  
1245 1260  static void
1246 1261  netstack_shared_zone_remove(zoneid_t zoneid)
1247 1262  {
1248 1263          struct shared_zone_list **szp, *sz;
1249 1264          struct shared_kstat_list *sk;
1250 1265  
1251 1266          /* Find in list */
1252 1267          mutex_enter(&netstack_shared_lock);
1253 1268          sz = NULL;
1254 1269          for (szp = &netstack_shared_zones; *szp != NULL;
1255 1270              szp = &((*szp)->sz_next)) {
1256 1271                  if ((*szp)->sz_zoneid == zoneid) {
1257 1272                          sz = *szp;
1258 1273                          break;
1259 1274                  }
1260 1275          }
1261 1276          /* We must find it */
1262 1277          ASSERT(sz != NULL);
1263 1278          *szp = sz->sz_next;
1264 1279          sz->sz_next = NULL;
1265 1280  
1266 1281          /*
1267 1282           * Perform kstat_zone_remove for each existing shared stack kstat.
1268 1283           * Note: Holds netstack_shared_lock lock across kstat_zone_remove.
1269 1284           */
1270 1285          for (sk = netstack_shared_kstats; sk != NULL; sk = sk->sk_next) {
1271 1286                  kstat_zone_remove(sk->sk_kstat, zoneid);
1272 1287          }
1273 1288          mutex_exit(&netstack_shared_lock);
1274 1289  
1275 1290          kmem_free(sz, sizeof (*sz));
1276 1291  }
1277 1292  
1278 1293  static void
1279 1294  netstack_shared_kstat_add(kstat_t *ks)
1280 1295  {
1281 1296          struct shared_zone_list *sz;
1282 1297          struct shared_kstat_list *sk;
1283 1298  
1284 1299          sk = (struct shared_kstat_list *)kmem_zalloc(sizeof (*sk), KM_SLEEP);
1285 1300          sk->sk_kstat = ks;
1286 1301  
1287 1302          /* Insert in list */
1288 1303          mutex_enter(&netstack_shared_lock);
1289 1304          sk->sk_next = netstack_shared_kstats;
1290 1305          netstack_shared_kstats = sk;
1291 1306  
1292 1307          /*
1293 1308           * Perform kstat_zone_add for each existing shared stack zone.
1294 1309           * Note: Holds netstack_shared_lock lock across kstat_zone_add.
1295 1310           */
1296 1311          for (sz = netstack_shared_zones; sz != NULL; sz = sz->sz_next) {
1297 1312                  kstat_zone_add(ks, sz->sz_zoneid);
1298 1313          }
1299 1314          mutex_exit(&netstack_shared_lock);
1300 1315  }
1301 1316  
1302 1317  static void
1303 1318  netstack_shared_kstat_remove(kstat_t *ks)
1304 1319  {
1305 1320          struct shared_zone_list *sz;
1306 1321          struct shared_kstat_list **skp, *sk;
1307 1322  
1308 1323          /* Find in list */
1309 1324          mutex_enter(&netstack_shared_lock);
1310 1325          sk = NULL;
1311 1326          for (skp = &netstack_shared_kstats; *skp != NULL;
1312 1327              skp = &((*skp)->sk_next)) {
1313 1328                  if ((*skp)->sk_kstat == ks) {
1314 1329                          sk = *skp;
1315 1330                          break;
1316 1331                  }
1317 1332          }
1318 1333          /* Must find it */
1319 1334          ASSERT(sk != NULL);
1320 1335          *skp = sk->sk_next;
1321 1336          sk->sk_next = NULL;
1322 1337  
1323 1338          /*
1324 1339           * Perform kstat_zone_remove for each existing shared stack kstat.
1325 1340           * Note: Holds netstack_shared_lock lock across kstat_zone_remove.
1326 1341           */
1327 1342          for (sz = netstack_shared_zones; sz != NULL; sz = sz->sz_next) {
1328 1343                  kstat_zone_remove(ks, sz->sz_zoneid);
1329 1344          }
1330 1345          mutex_exit(&netstack_shared_lock);
1331 1346          kmem_free(sk, sizeof (*sk));
1332 1347  }
1333 1348  
1334 1349  /*
1335 1350   * If a zoneid is part of the shared zone, return true
1336 1351   */
1337 1352  static boolean_t
1338 1353  netstack_find_shared_zoneid(zoneid_t zoneid)
1339 1354  {
1340 1355          struct shared_zone_list *sz;
1341 1356  
1342 1357          mutex_enter(&netstack_shared_lock);
1343 1358          for (sz = netstack_shared_zones; sz != NULL; sz = sz->sz_next) {
1344 1359                  if (sz->sz_zoneid == zoneid) {
1345 1360                          mutex_exit(&netstack_shared_lock);
1346 1361                          return (B_TRUE);
1347 1362                  }
1348 1363          }
1349 1364          mutex_exit(&netstack_shared_lock);
1350 1365          return (B_FALSE);
1351 1366  }
1352 1367  
1353 1368  /*
1354 1369   * Hide the fact that zoneids and netstackids are allocated from
1355 1370   * the same space in the current implementation.
1356 1371   * We currently do not check that the stackid/zoneids are valid, since there
1357 1372   * is no need for that. But this should only be done for ids that are
1358 1373   * valid.
1359 1374   */
1360 1375  zoneid_t
1361 1376  netstackid_to_zoneid(netstackid_t stackid)
1362 1377  {
1363 1378          return (stackid);
1364 1379  }
1365 1380  
1366 1381  netstackid_t
1367 1382  zoneid_to_netstackid(zoneid_t zoneid)
1368 1383  {
1369 1384          if (netstack_find_shared_zoneid(zoneid))
1370 1385                  return (GLOBAL_ZONEID);
1371 1386          else
1372 1387                  return (zoneid);
1373 1388  }
1374 1389  
1375 1390  zoneid_t
1376 1391  netstack_get_zoneid(netstack_t *ns)
1377 1392  {
1378 1393          return (netstackid_to_zoneid(ns->netstack_stackid));
1379 1394  }
1380 1395  
1381 1396  /*
1382 1397   * Simplistic support for walking all the handles.
1383 1398   * Example usage:
1384 1399   *      netstack_handle_t nh;
1385 1400   *      netstack_t *ns;
1386 1401   *
1387 1402   *      netstack_next_init(&nh);
1388 1403   *      while ((ns = netstack_next(&nh)) != NULL) {
1389 1404   *              do something;
1390 1405   *              netstack_rele(ns);
1391 1406   *      }
1392 1407   *      netstack_next_fini(&nh);
1393 1408   */
1394 1409  void
1395 1410  netstack_next_init(netstack_handle_t *handle)
1396 1411  {
1397 1412          *handle = 0;
1398 1413  }
1399 1414  
1400 1415  /* ARGSUSED */
1401 1416  void
1402 1417  netstack_next_fini(netstack_handle_t *handle)
1403 1418  {
1404 1419  }
1405 1420  
1406 1421  netstack_t *
1407 1422  netstack_next(netstack_handle_t *handle)
1408 1423  {
1409 1424          netstack_t *ns;
1410 1425          int i, end;
1411 1426  
1412 1427          end = *handle;
  
    | 
      ↓ open down ↓ | 
    220 lines elided | 
    
      ↑ open up ↑ | 
  
1413 1428          /* Walk skipping *handle number of instances */
1414 1429  
1415 1430          /* Look if there is a matching stack instance */
1416 1431          mutex_enter(&netstack_g_lock);
1417 1432          ns = netstack_head;
1418 1433          for (i = 0; i < end; i++) {
1419 1434                  if (ns == NULL)
1420 1435                          break;
1421 1436                  ns = ns->netstack_next;
1422 1437          }
1423      -        /* skip those with that aren't really here */
     1438 +        /*
     1439 +         * Skip those that aren't really here (uninitialized or closing).
     1440 +         * Can't use hold_if_active because of "end" tracking.
     1441 +         */
1424 1442          while (ns != NULL) {
1425 1443                  mutex_enter(&ns->netstack_lock);
1426 1444                  if ((ns->netstack_flags & (NSF_UNINIT|NSF_CLOSING)) == 0) {
     1445 +                        *handle = end + 1;
     1446 +                        netstack_hold_locked(ns);
1427 1447                          mutex_exit(&ns->netstack_lock);
1428 1448                          break;
1429 1449                  }
1430 1450                  mutex_exit(&ns->netstack_lock);
1431 1451                  end++;
1432 1452                  ns = ns->netstack_next;
1433 1453          }
1434      -        if (ns != NULL) {
1435      -                *handle = end + 1;
1436      -                netstack_hold(ns);
1437      -        }
1438 1454          mutex_exit(&netstack_g_lock);
1439 1455          return (ns);
1440 1456  }
    
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX