Print this page
    
OS-XXXX netstack_find_by_stackid() drops-and-reacquires
    
      
        | Split | 
	Close | 
      
      | Expand all | 
      | Collapse all | 
    
    
          --- old/usr/src/uts/common/os/netstack.c
          +++ new/usr/src/uts/common/os/netstack.c
   1    1  /*
   2    2   * CDDL HEADER START
   3    3   *
   4    4   * The contents of this file are subject to the terms of the
   5    5   * Common Development and Distribution License (the "License").
   6    6   * You may not use this file except in compliance with the License.
   7    7   *
   8    8   * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9    9   * or http://www.opensolaris.org/os/licensing.
  10   10   * See the License for the specific language governing permissions
  11   11   * and limitations under the License.
  12   12   *
  13   13   * When distributing Covered Code, include this CDDL HEADER in each
  14   14   * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15   15   * If applicable, add the following below this CDDL HEADER, with the
  16   16   * fields enclosed by brackets "[]" replaced with your own identifying
  17   17   * information: Portions Copyright [yyyy] [name of copyright owner]
  18   18   *
  19   19   * CDDL HEADER END
  20   20   */
  21   21  
  22   22  /*
  23   23   * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  24   24   * Use is subject to license terms.
  25   25   * Copyright (c) 2016, Joyent, Inc.  All rights reserved.
  26   26   */
  27   27  
  28   28  #include <sys/param.h>
  29   29  #include <sys/sysmacros.h>
  30   30  #include <sys/vm.h>
  31   31  #include <sys/proc.h>
  32   32  #include <sys/tuneable.h>
  33   33  #include <sys/systm.h>
  34   34  #include <sys/cmn_err.h>
  35   35  #include <sys/debug.h>
  36   36  #include <sys/sdt.h>
  37   37  #include <sys/mutex.h>
  38   38  #include <sys/bitmap.h>
  39   39  #include <sys/atomic.h>
  40   40  #include <sys/sunddi.h>
  41   41  #include <sys/kobj.h>
  42   42  #include <sys/disp.h>
  43   43  #include <vm/seg_kmem.h>
  44   44  #include <sys/zone.h>
  45   45  #include <sys/netstack.h>
  46   46  
  47   47  /*
  48   48   * What we use so that the zones framework can tell us about new zones,
  49   49   * which we use to create new stacks.
  50   50   */
  51   51  static zone_key_t netstack_zone_key;
  52   52  
  53   53  static int      netstack_initialized = 0;
  54   54  
  55   55  /*
  56   56   * Track the registered netstacks.
  57   57   * The global lock protects
  58   58   * - ns_reg
  59   59   * - the list starting at netstack_head and following the netstack_next
  60   60   *   pointers.
  61   61   */
  62   62  static kmutex_t netstack_g_lock;
  63   63  
  64   64  /*
  65   65   * Registry of netstacks with their create/shutdown/destory functions.
  66   66   */
  67   67  static struct netstack_registry ns_reg[NS_MAX];
  68   68  
  69   69  /*
  70   70   * Global list of existing stacks.  We use this when a new zone with
  71   71   * an exclusive IP instance is created.
  72   72   *
  73   73   * Note that in some cases a netstack_t needs to stay around after the zone
  74   74   * has gone away. This is because there might be outstanding references
  75   75   * (from TCP TIME_WAIT connections, IPsec state, etc). The netstack_t data
  76   76   * structure and all the foo_stack_t's hanging off of it will be cleaned up
  77   77   * when the last reference to it is dropped.
  78   78   * However, the same zone might be rebooted. That is handled using the
  79   79   * assumption that the zones framework picks a new zoneid each time a zone
  80   80   * is (re)booted. We assert for that condition in netstack_zone_create().
  81   81   * Thus the old netstack_t can take its time for things to time out.
  82   82   */
  83   83  static netstack_t *netstack_head;
  84   84  
  85   85  /*
  86   86   * To support kstat_create_netstack() using kstat_zone_add we need
  87   87   * to track both
  88   88   *  - all zoneids that use the global/shared stack
  89   89   *  - all kstats that have been added for the shared stack
  90   90   */
  91   91  struct shared_zone_list {
  92   92          struct shared_zone_list *sz_next;
  93   93          zoneid_t                sz_zoneid;
  94   94  };
  95   95  
  96   96  struct shared_kstat_list {
  97   97          struct shared_kstat_list *sk_next;
  98   98          kstat_t                  *sk_kstat;
  99   99  };
 100  100  
 101  101  static kmutex_t netstack_shared_lock;   /* protects the following two */
 102  102  static struct shared_zone_list  *netstack_shared_zones;
 103  103  static struct shared_kstat_list *netstack_shared_kstats;
 104  104  
 105  105  static void     *netstack_zone_create(zoneid_t zoneid);
 106  106  static void     netstack_zone_shutdown(zoneid_t zoneid, void *arg);
 107  107  static void     netstack_zone_destroy(zoneid_t zoneid, void *arg);
 108  108  
 109  109  static void     netstack_shared_zone_add(zoneid_t zoneid);
 110  110  static void     netstack_shared_zone_remove(zoneid_t zoneid);
 111  111  static void     netstack_shared_kstat_add(kstat_t *ks);
 112  112  static void     netstack_shared_kstat_remove(kstat_t *ks);
 113  113  
 114  114  typedef boolean_t applyfn_t(kmutex_t *, netstack_t *, int);
 115  115  
  
    | 
      ↓ open down ↓ | 
    115 lines elided | 
    
      ↑ open up ↑ | 
  
 116  116  static void     apply_all_netstacks(int, applyfn_t *);
 117  117  static void     apply_all_modules(netstack_t *, applyfn_t *);
 118  118  static void     apply_all_modules_reverse(netstack_t *, applyfn_t *);
 119  119  static boolean_t netstack_apply_create(kmutex_t *, netstack_t *, int);
 120  120  static boolean_t netstack_apply_shutdown(kmutex_t *, netstack_t *, int);
 121  121  static boolean_t netstack_apply_destroy(kmutex_t *, netstack_t *, int);
 122  122  static boolean_t wait_for_zone_creator(netstack_t *, kmutex_t *);
 123  123  static boolean_t wait_for_nms_inprogress(netstack_t *, nm_state_t *,
 124  124      kmutex_t *);
 125  125  
      126 +static void netstack_hold_locked(netstack_t *);
 126  127  static void netstack_reap_work(netstack_t *, boolean_t);
 127  128  ksema_t netstack_reap_limiter;
 128  129  
 129  130  void
 130  131  netstack_init(void)
 131  132  {
 132  133          mutex_init(&netstack_g_lock, NULL, MUTEX_DEFAULT, NULL);
 133  134          mutex_init(&netstack_shared_lock, NULL, MUTEX_DEFAULT, NULL);
 134  135  
 135  136          /* XXX KEBE SAYS hard-coded constant needs to be fixed. */
 136  137          sema_init(&netstack_reap_limiter, 1024, NULL, SEMA_DRIVER, NULL);
 137  138  
 138  139          netstack_initialized = 1;
 139  140  
 140  141          /*
 141  142           * We want to be informed each time a zone is created or
 142  143           * destroyed in the kernel, so we can maintain the
 143  144           * stack instance information.
 144  145           */
 145  146          zone_key_create(&netstack_zone_key, netstack_zone_create,
 146  147              netstack_zone_shutdown, netstack_zone_destroy);
 147  148  }
 148  149  
 149  150  /*
 150  151   * Register a new module with the framework.
 151  152   * This registers interest in changes to the set of netstacks.
 152  153   * The createfn and destroyfn are required, but the shutdownfn can be
 153  154   * NULL.
 154  155   * Note that due to the current zsd implementation, when the create
 155  156   * function is called the zone isn't fully present, thus functions
 156  157   * like zone_find_by_* will fail, hence the create function can not
 157  158   * use many zones kernel functions including zcmn_err().
 158  159   */
 159  160  void
 160  161  netstack_register(int moduleid,
 161  162      void *(*module_create)(netstackid_t, netstack_t *),
 162  163      void (*module_shutdown)(netstackid_t, void *),
 163  164      void (*module_destroy)(netstackid_t, void *))
 164  165  {
 165  166          netstack_t *ns;
 166  167  
 167  168          ASSERT(netstack_initialized);
 168  169          ASSERT(moduleid >= 0 && moduleid < NS_MAX);
 169  170          ASSERT(module_create != NULL);
 170  171  
 171  172          /*
 172  173           * Make instances created after this point in time run the create
 173  174           * callback.
 174  175           */
 175  176          mutex_enter(&netstack_g_lock);
 176  177          ASSERT(ns_reg[moduleid].nr_create == NULL);
 177  178          ASSERT(ns_reg[moduleid].nr_flags == 0);
 178  179          ns_reg[moduleid].nr_create = module_create;
 179  180          ns_reg[moduleid].nr_shutdown = module_shutdown;
 180  181          ns_reg[moduleid].nr_destroy = module_destroy;
 181  182          ns_reg[moduleid].nr_flags = NRF_REGISTERED;
 182  183  
 183  184          /*
 184  185           * Determine the set of stacks that exist before we drop the lock.
 185  186           * Set NSS_CREATE_NEEDED for each of those.
 186  187           * netstacks which have been deleted will have NSS_CREATE_COMPLETED
 187  188           * set, but check NSF_CLOSING to be sure.
 188  189           */
 189  190          for (ns = netstack_head; ns != NULL; ns = ns->netstack_next) {
 190  191                  nm_state_t *nms = &ns->netstack_m_state[moduleid];
 191  192  
 192  193                  mutex_enter(&ns->netstack_lock);
 193  194                  if (!(ns->netstack_flags & NSF_CLOSING) &&
 194  195                      (nms->nms_flags & NSS_CREATE_ALL) == 0) {
 195  196                          nms->nms_flags |= NSS_CREATE_NEEDED;
 196  197                          DTRACE_PROBE2(netstack__create__needed,
 197  198                              netstack_t *, ns, int, moduleid);
 198  199                  }
 199  200                  mutex_exit(&ns->netstack_lock);
 200  201          }
 201  202          mutex_exit(&netstack_g_lock);
 202  203  
 203  204          /*
 204  205           * At this point in time a new instance can be created or an instance
 205  206           * can be destroyed, or some other module can register or unregister.
 206  207           * Make sure we either run all the create functions for this moduleid
 207  208           * or we wait for any other creators for this moduleid.
 208  209           */
 209  210          apply_all_netstacks(moduleid, netstack_apply_create);
 210  211  }
 211  212  
 212  213  void
 213  214  netstack_unregister(int moduleid)
 214  215  {
 215  216          netstack_t *ns;
 216  217  
 217  218          ASSERT(moduleid >= 0 && moduleid < NS_MAX);
 218  219  
 219  220          ASSERT(ns_reg[moduleid].nr_create != NULL);
 220  221          ASSERT(ns_reg[moduleid].nr_flags & NRF_REGISTERED);
 221  222  
 222  223          mutex_enter(&netstack_g_lock);
 223  224          /*
 224  225           * Determine the set of stacks that exist before we drop the lock.
 225  226           * Set NSS_SHUTDOWN_NEEDED and NSS_DESTROY_NEEDED for each of those.
 226  227           * That ensures that when we return all the callbacks for existing
 227  228           * instances have completed. And since we set NRF_DYING no new
 228  229           * instances can use this module.
 229  230           */
 230  231          for (ns = netstack_head; ns != NULL; ns = ns->netstack_next) {
 231  232                  boolean_t created = B_FALSE;
 232  233                  nm_state_t *nms = &ns->netstack_m_state[moduleid];
 233  234  
 234  235                  mutex_enter(&ns->netstack_lock);
 235  236  
 236  237                  /*
 237  238                   * We need to be careful here. We could actually have a netstack
 238  239                   * being created as we speak waiting for us to let go of this
 239  240                   * lock to proceed. It may have set NSS_CREATE_NEEDED, but not
 240  241                   * have gotten to the point of completing it yet. If
 241  242                   * NSS_CREATE_NEEDED, we can safely just remove it here and
 242  243                   * never create the module. However, if NSS_CREATE_INPROGRESS is
 243  244                   * set, we need to still flag this module for shutdown and
 244  245                   * deletion, just as though it had reached NSS_CREATE_COMPLETED.
 245  246                   *
 246  247                   * It is safe to do that because of two different guarantees
 247  248                   * that exist in the system. The first is that before we do a
 248  249                   * create, shutdown, or destroy, we ensure that nothing else is
 249  250                   * in progress in the system for this netstack and wait for it
 250  251                   * to complete. Secondly, because the zone is being created, we
 251  252                   * know that the following call to apply_all_netstack will block
 252  253                   * on the zone finishing its initialization.
 253  254                   */
 254  255                  if (nms->nms_flags & NSS_CREATE_NEEDED)
 255  256                          nms->nms_flags &= ~NSS_CREATE_NEEDED;
 256  257  
 257  258                  if (nms->nms_flags & NSS_CREATE_INPROGRESS ||
 258  259                      nms->nms_flags & NSS_CREATE_COMPLETED)
 259  260                          created = B_TRUE;
 260  261  
 261  262                  if (ns_reg[moduleid].nr_shutdown != NULL && created &&
 262  263                      (nms->nms_flags & NSS_CREATE_COMPLETED) &&
 263  264                      (nms->nms_flags & NSS_SHUTDOWN_ALL) == 0) {
 264  265                          nms->nms_flags |= NSS_SHUTDOWN_NEEDED;
 265  266                          DTRACE_PROBE2(netstack__shutdown__needed,
 266  267                              netstack_t *, ns, int, moduleid);
 267  268                  }
 268  269                  if ((ns_reg[moduleid].nr_flags & NRF_REGISTERED) &&
 269  270                      ns_reg[moduleid].nr_destroy != NULL && created &&
 270  271                      (nms->nms_flags & NSS_DESTROY_ALL) == 0) {
 271  272                          nms->nms_flags |= NSS_DESTROY_NEEDED;
 272  273                          DTRACE_PROBE2(netstack__destroy__needed,
 273  274                              netstack_t *, ns, int, moduleid);
 274  275                  }
 275  276                  mutex_exit(&ns->netstack_lock);
 276  277          }
 277  278          /*
 278  279           * Prevent any new netstack from calling the registered create
 279  280           * function, while keeping the function pointers in place until the
 280  281           * shutdown and destroy callbacks are complete.
 281  282           */
 282  283          ns_reg[moduleid].nr_flags |= NRF_DYING;
 283  284          mutex_exit(&netstack_g_lock);
 284  285  
 285  286          apply_all_netstacks(moduleid, netstack_apply_shutdown);
 286  287          apply_all_netstacks(moduleid, netstack_apply_destroy);
 287  288  
 288  289          /*
 289  290           * Clear the nms_flags so that we can handle this module
 290  291           * being loaded again.
 291  292           * Also remove the registered functions.
 292  293           */
 293  294          mutex_enter(&netstack_g_lock);
 294  295          ASSERT(ns_reg[moduleid].nr_flags & NRF_REGISTERED);
 295  296          ASSERT(ns_reg[moduleid].nr_flags & NRF_DYING);
 296  297          for (ns = netstack_head; ns != NULL; ns = ns->netstack_next) {
 297  298                  nm_state_t *nms = &ns->netstack_m_state[moduleid];
 298  299  
 299  300                  mutex_enter(&ns->netstack_lock);
 300  301                  if (nms->nms_flags & NSS_DESTROY_COMPLETED) {
 301  302                          nms->nms_flags = 0;
 302  303                          DTRACE_PROBE2(netstack__destroy__done,
 303  304                              netstack_t *, ns, int, moduleid);
 304  305                  }
 305  306                  mutex_exit(&ns->netstack_lock);
 306  307          }
 307  308  
 308  309          ns_reg[moduleid].nr_create = NULL;
 309  310          ns_reg[moduleid].nr_shutdown = NULL;
 310  311          ns_reg[moduleid].nr_destroy = NULL;
 311  312          ns_reg[moduleid].nr_flags = 0;
 312  313          mutex_exit(&netstack_g_lock);
 313  314  }
 314  315  
 315  316  /*
 316  317   * Lookup and/or allocate a netstack for this zone.
 317  318   */
 318  319  static void *
 319  320  netstack_zone_create(zoneid_t zoneid)
 320  321  {
 321  322          netstackid_t stackid;
 322  323          netstack_t *ns;
 323  324          netstack_t **nsp;
 324  325          zone_t  *zone;
 325  326          int i;
 326  327  
 327  328          ASSERT(netstack_initialized);
 328  329  
 329  330          zone = zone_find_by_id_nolock(zoneid);
 330  331          ASSERT(zone != NULL);
 331  332  
 332  333          if (zone->zone_flags & ZF_NET_EXCL) {
 333  334                  stackid = zoneid;
 334  335          } else {
 335  336                  /* Look for the stack instance for the global */
 336  337                  stackid = GLOBAL_NETSTACKID;
 337  338          }
 338  339  
 339  340          /* Allocate even if it isn't needed; simplifies locking */
 340  341          ns = (netstack_t *)kmem_zalloc(sizeof (netstack_t), KM_SLEEP);
 341  342  
 342  343          /* Look if there is a matching stack instance */
 343  344          mutex_enter(&netstack_g_lock);
 344  345          for (nsp = &netstack_head; *nsp != NULL;
 345  346              nsp = &((*nsp)->netstack_next)) {
 346  347                  if ((*nsp)->netstack_stackid == stackid) {
 347  348                          /*
 348  349                           * Should never find a pre-existing exclusive stack
 349  350                           */
 350  351                          VERIFY(stackid == GLOBAL_NETSTACKID);
 351  352                          kmem_free(ns, sizeof (netstack_t));
 352  353                          ns = *nsp;
 353  354                          mutex_enter(&ns->netstack_lock);
 354  355                          ns->netstack_numzones++;
 355  356                          mutex_exit(&ns->netstack_lock);
 356  357                          mutex_exit(&netstack_g_lock);
 357  358                          DTRACE_PROBE1(netstack__inc__numzones,
 358  359                              netstack_t *, ns);
 359  360                          /* Record that we have a new shared stack zone */
 360  361                          netstack_shared_zone_add(zoneid);
 361  362                          zone->zone_netstack = ns;
 362  363                          return (ns);
 363  364                  }
 364  365          }
 365  366          /* Not found */
 366  367          mutex_init(&ns->netstack_lock, NULL, MUTEX_DEFAULT, NULL);
 367  368          cv_init(&ns->netstack_cv, NULL, CV_DEFAULT, NULL);
 368  369          ns->netstack_stackid = zoneid;
 369  370          ns->netstack_numzones = 1;
 370  371          ns->netstack_refcnt = 1; /* Decremented by netstack_zone_destroy */
 371  372          ns->netstack_flags = NSF_UNINIT;
 372  373          *nsp = ns;
 373  374          zone->zone_netstack = ns;
 374  375  
 375  376          mutex_enter(&ns->netstack_lock);
 376  377          /*
 377  378           * Mark this netstack as having a CREATE running so
 378  379           * any netstack_register/netstack_unregister waits for
 379  380           * the existing create callbacks to complete in moduleid order
 380  381           */
 381  382          ns->netstack_flags |= NSF_ZONE_CREATE;
 382  383  
 383  384          /*
 384  385           * Determine the set of module create functions that need to be
 385  386           * called before we drop the lock.
 386  387           * Set NSS_CREATE_NEEDED for each of those.
 387  388           * Skip any with NRF_DYING set, since those are in the process of
 388  389           * going away, by checking for flags being exactly NRF_REGISTERED.
 389  390           */
 390  391          for (i = 0; i < NS_MAX; i++) {
 391  392                  nm_state_t *nms = &ns->netstack_m_state[i];
 392  393  
 393  394                  cv_init(&nms->nms_cv, NULL, CV_DEFAULT, NULL);
 394  395  
 395  396                  if ((ns_reg[i].nr_flags == NRF_REGISTERED) &&
 396  397                      (nms->nms_flags & NSS_CREATE_ALL) == 0) {
 397  398                          nms->nms_flags |= NSS_CREATE_NEEDED;
 398  399                          DTRACE_PROBE2(netstack__create__needed,
 399  400                              netstack_t *, ns, int, i);
 400  401                  }
 401  402          }
 402  403          mutex_exit(&ns->netstack_lock);
 403  404          mutex_exit(&netstack_g_lock);
 404  405  
 405  406          apply_all_modules(ns, netstack_apply_create);
 406  407  
 407  408          /* Tell any waiting netstack_register/netstack_unregister to proceed */
 408  409          mutex_enter(&ns->netstack_lock);
 409  410          ns->netstack_flags &= ~NSF_UNINIT;
 410  411          ASSERT(ns->netstack_flags & NSF_ZONE_CREATE);
 411  412          ns->netstack_flags &= ~NSF_ZONE_CREATE;
 412  413          cv_broadcast(&ns->netstack_cv);
 413  414          mutex_exit(&ns->netstack_lock);
 414  415  
 415  416          return (ns);
 416  417  }
 417  418  
 418  419  /* ARGSUSED */
 419  420  static void
 420  421  netstack_zone_shutdown(zoneid_t zoneid, void *arg)
 421  422  {
 422  423          netstack_t *ns = (netstack_t *)arg;
 423  424          int i;
 424  425  
 425  426          ASSERT(arg != NULL);
 426  427  
 427  428          mutex_enter(&ns->netstack_lock);
 428  429          ASSERT(ns->netstack_numzones > 0);
 429  430          if (ns->netstack_numzones != 1) {
 430  431                  /* Stack instance being used by other zone */
 431  432                  mutex_exit(&ns->netstack_lock);
 432  433                  ASSERT(ns->netstack_stackid == GLOBAL_NETSTACKID);
 433  434                  return;
 434  435          }
 435  436          mutex_exit(&ns->netstack_lock);
 436  437  
 437  438          mutex_enter(&netstack_g_lock);
 438  439          mutex_enter(&ns->netstack_lock);
 439  440          /*
 440  441           * Mark this netstack as having a SHUTDOWN running so
 441  442           * any netstack_register/netstack_unregister waits for
 442  443           * the existing create callbacks to complete in moduleid order
 443  444           */
 444  445          ASSERT(!(ns->netstack_flags & NSF_ZONE_INPROGRESS));
 445  446          ns->netstack_flags |= NSF_ZONE_SHUTDOWN;
 446  447  
 447  448          /*
 448  449           * Determine the set of stacks that exist before we drop the lock.
 449  450           * Set NSS_SHUTDOWN_NEEDED for each of those.
 450  451           */
 451  452          for (i = 0; i < NS_MAX; i++) {
 452  453                  nm_state_t *nms = &ns->netstack_m_state[i];
 453  454  
 454  455                  if ((ns_reg[i].nr_flags & NRF_REGISTERED) &&
 455  456                      ns_reg[i].nr_shutdown != NULL &&
 456  457                      (nms->nms_flags & NSS_CREATE_COMPLETED) &&
 457  458                      (nms->nms_flags & NSS_SHUTDOWN_ALL) == 0) {
 458  459                          nms->nms_flags |= NSS_SHUTDOWN_NEEDED;
 459  460                          DTRACE_PROBE2(netstack__shutdown__needed,
 460  461                              netstack_t *, ns, int, i);
 461  462                  }
 462  463          }
 463  464          mutex_exit(&ns->netstack_lock);
 464  465          mutex_exit(&netstack_g_lock);
 465  466  
 466  467          /*
 467  468           * Call the shutdown function for all registered modules for this
 468  469           * netstack.
 469  470           */
 470  471          apply_all_modules_reverse(ns, netstack_apply_shutdown);
 471  472  
 472  473          /* Tell any waiting netstack_register/netstack_unregister to proceed */
 473  474          mutex_enter(&ns->netstack_lock);
 474  475          ASSERT(ns->netstack_flags & NSF_ZONE_SHUTDOWN);
 475  476          ns->netstack_flags &= ~NSF_ZONE_SHUTDOWN;
 476  477          cv_broadcast(&ns->netstack_cv);
 477  478          mutex_exit(&ns->netstack_lock);
 478  479  }
 479  480  
 480  481  /*
 481  482   * Common routine to release a zone.
 482  483   * If this was the last zone using the stack instance then prepare to
 483  484   * have the refcnt dropping to zero free the zone.
 484  485   */
 485  486  /* ARGSUSED */
 486  487  static void
 487  488  netstack_zone_destroy(zoneid_t zoneid, void *arg)
 488  489  {
 489  490          netstack_t *ns = (netstack_t *)arg;
 490  491  
 491  492          ASSERT(arg != NULL);
 492  493  
 493  494          mutex_enter(&ns->netstack_lock);
 494  495          ASSERT(ns->netstack_numzones > 0);
 495  496          ns->netstack_numzones--;
 496  497          if (ns->netstack_numzones != 0) {
 497  498                  /* Stack instance being used by other zone */
 498  499                  mutex_exit(&ns->netstack_lock);
 499  500                  ASSERT(ns->netstack_stackid == GLOBAL_NETSTACKID);
 500  501                  /* Record that we a shared stack zone has gone away */
 501  502                  netstack_shared_zone_remove(zoneid);
 502  503                  return;
 503  504          }
 504  505          /*
 505  506           * Set CLOSING so that netstack_find_by will not find it.
 506  507           */
 507  508          ns->netstack_flags |= NSF_CLOSING;
 508  509          mutex_exit(&ns->netstack_lock);
 509  510          DTRACE_PROBE1(netstack__dec__numzones, netstack_t *, ns);
 510  511          /* No other thread can call zone_destroy for this stack */
 511  512  
 512  513          /*
 513  514           * Decrease refcnt to account for the one in netstack_zone_init()
 514  515           */
 515  516          netstack_rele(ns);
 516  517  }
 517  518  
 518  519  /*
 519  520   * Called when the reference count drops to zero.
 520  521   * Call the destroy functions for each registered module.
 521  522   */
 522  523  static void
 523  524  netstack_stack_inactive(netstack_t *ns)
 524  525  {
 525  526          int i;
 526  527  
 527  528          mutex_enter(&netstack_g_lock);
 528  529          mutex_enter(&ns->netstack_lock);
 529  530          /*
 530  531           * Mark this netstack as having a DESTROY running so
 531  532           * any netstack_register/netstack_unregister waits for
 532  533           * the existing destroy callbacks to complete in reverse moduleid order
 533  534           */
 534  535          ASSERT(!(ns->netstack_flags & NSF_ZONE_INPROGRESS));
 535  536          ns->netstack_flags |= NSF_ZONE_DESTROY;
 536  537          /*
 537  538           * If the shutdown callback wasn't called earlier (e.g., if this is
 538  539           * a netstack shared between multiple zones), then we schedule it now.
 539  540           *
 540  541           * Determine the set of stacks that exist before we drop the lock.
 541  542           * Set NSS_DESTROY_NEEDED for each of those. That
 542  543           * ensures that when we return all the callbacks for existing
 543  544           * instances have completed.
 544  545           */
 545  546          for (i = 0; i < NS_MAX; i++) {
 546  547                  nm_state_t *nms = &ns->netstack_m_state[i];
 547  548  
 548  549                  if ((ns_reg[i].nr_flags & NRF_REGISTERED) &&
 549  550                      ns_reg[i].nr_shutdown != NULL &&
 550  551                      (nms->nms_flags & NSS_CREATE_COMPLETED) &&
 551  552                      (nms->nms_flags & NSS_SHUTDOWN_ALL) == 0) {
 552  553                          nms->nms_flags |= NSS_SHUTDOWN_NEEDED;
 553  554                          DTRACE_PROBE2(netstack__shutdown__needed,
 554  555                              netstack_t *, ns, int, i);
 555  556                  }
 556  557  
 557  558                  if ((ns_reg[i].nr_flags & NRF_REGISTERED) &&
 558  559                      ns_reg[i].nr_destroy != NULL &&
 559  560                      (nms->nms_flags & NSS_CREATE_COMPLETED) &&
 560  561                      (nms->nms_flags & NSS_DESTROY_ALL) == 0) {
 561  562                          nms->nms_flags |= NSS_DESTROY_NEEDED;
 562  563                          DTRACE_PROBE2(netstack__destroy__needed,
 563  564                              netstack_t *, ns, int, i);
 564  565                  }
 565  566          }
 566  567          mutex_exit(&ns->netstack_lock);
 567  568          mutex_exit(&netstack_g_lock);
 568  569  
 569  570          /*
 570  571           * Call the shutdown and destroy functions for all registered modules
 571  572           * for this netstack.
 572  573           *
 573  574           * Since there are some ordering dependencies between the modules we
 574  575           * tear them down in the reverse order of what was used to create them.
 575  576           *
 576  577           * Since a netstack_t is never reused (when a zone is rebooted it gets
 577  578           * a new zoneid == netstackid i.e. a new netstack_t is allocated) we
 578  579           * leave nms_flags the way it is i.e. with NSS_DESTROY_COMPLETED set.
 579  580           * That is different than in the netstack_unregister() case.
 580  581           */
 581  582          apply_all_modules_reverse(ns, netstack_apply_shutdown);
 582  583          apply_all_modules_reverse(ns, netstack_apply_destroy);
 583  584  
 584  585          /* Tell any waiting netstack_register/netstack_unregister to proceed */
 585  586          mutex_enter(&ns->netstack_lock);
 586  587          ASSERT(ns->netstack_flags & NSF_ZONE_DESTROY);
 587  588          ns->netstack_flags &= ~NSF_ZONE_DESTROY;
 588  589          cv_broadcast(&ns->netstack_cv);
 589  590          mutex_exit(&ns->netstack_lock);
 590  591  }
 591  592  
 592  593  /*
 593  594   * Apply a function to all netstacks for a particular moduleid.
 594  595   *
 595  596   * If there is any zone activity (due to a zone being created, shutdown,
 596  597   * or destroyed) we wait for that to complete before we proceed. This ensures
 597  598   * that the moduleids are processed in order when a zone is created or
 598  599   * destroyed.
 599  600   *
 600  601   * The applyfn has to drop netstack_g_lock if it does some work.
 601  602   * In that case we don't follow netstack_next,
 602  603   * even if it is possible to do so without any hazards. This is
 603  604   * because we want the design to allow for the list of netstacks threaded
 604  605   * by netstack_next to change in any arbitrary way during the time the
 605  606   * lock was dropped.
 606  607   *
 607  608   * It is safe to restart the loop at netstack_head since the applyfn
 608  609   * changes netstack_m_state as it processes things, so a subsequent
 609  610   * pass through will have no effect in applyfn, hence the loop will terminate
 610  611   * in at worst O(N^2).
 611  612   */
 612  613  static void
 613  614  apply_all_netstacks(int moduleid, applyfn_t *applyfn)
 614  615  {
 615  616          netstack_t *ns;
 616  617  
 617  618          mutex_enter(&netstack_g_lock);
 618  619          ns = netstack_head;
 619  620          while (ns != NULL) {
 620  621                  if (wait_for_zone_creator(ns, &netstack_g_lock)) {
 621  622                          /* Lock dropped - restart at head */
 622  623                          ns = netstack_head;
 623  624                  } else if ((applyfn)(&netstack_g_lock, ns, moduleid)) {
 624  625                          /* Lock dropped - restart at head */
 625  626                          ns = netstack_head;
 626  627                  } else {
 627  628                          ns = ns->netstack_next;
 628  629                  }
 629  630          }
 630  631          mutex_exit(&netstack_g_lock);
 631  632  }
 632  633  
 633  634  /*
 634  635   * Apply a function to all moduleids for a particular netstack.
 635  636   *
 636  637   * Since the netstack linkage doesn't matter in this case we can
 637  638   * ignore whether the function drops the lock.
 638  639   */
 639  640  static void
 640  641  apply_all_modules(netstack_t *ns, applyfn_t *applyfn)
 641  642  {
 642  643          int i;
 643  644  
 644  645          mutex_enter(&netstack_g_lock);
 645  646          for (i = 0; i < NS_MAX; i++) {
 646  647                  /*
 647  648                   * We don't care whether the lock was dropped
 648  649                   * since we are not iterating over netstack_head.
 649  650                   */
 650  651                  (void) (applyfn)(&netstack_g_lock, ns, i);
 651  652          }
 652  653          mutex_exit(&netstack_g_lock);
 653  654  }
 654  655  
 655  656  /* Like the above but in reverse moduleid order */
 656  657  static void
 657  658  apply_all_modules_reverse(netstack_t *ns, applyfn_t *applyfn)
 658  659  {
 659  660          int i;
 660  661  
 661  662          mutex_enter(&netstack_g_lock);
 662  663          for (i = NS_MAX-1; i >= 0; i--) {
 663  664                  /*
 664  665                   * We don't care whether the lock was dropped
 665  666                   * since we are not iterating over netstack_head.
 666  667                   */
 667  668                  (void) (applyfn)(&netstack_g_lock, ns, i);
 668  669          }
 669  670          mutex_exit(&netstack_g_lock);
 670  671  }
 671  672  
 672  673  /*
 673  674   * Call the create function for the ns and moduleid if CREATE_NEEDED
 674  675   * is set.
 675  676   * If some other thread gets here first and sets *_INPROGRESS, then
 676  677   * we wait for that thread to complete so that we can ensure that
 677  678   * all the callbacks are done when we've looped over all netstacks/moduleids.
 678  679   *
 679  680   * When we call the create function, we temporarily drop the netstack_lock
 680  681   * held by the caller, and return true to tell the caller it needs to
 681  682   * re-evalute the state.
 682  683   */
 683  684  static boolean_t
 684  685  netstack_apply_create(kmutex_t *lockp, netstack_t *ns, int moduleid)
 685  686  {
 686  687          void *result;
 687  688          netstackid_t stackid;
 688  689          nm_state_t *nms = &ns->netstack_m_state[moduleid];
 689  690          boolean_t dropped = B_FALSE;
 690  691  
 691  692          ASSERT(MUTEX_HELD(lockp));
 692  693          mutex_enter(&ns->netstack_lock);
 693  694  
 694  695          if (wait_for_nms_inprogress(ns, nms, lockp))
 695  696                  dropped = B_TRUE;
 696  697  
 697  698          if (nms->nms_flags & NSS_CREATE_NEEDED) {
 698  699                  nms->nms_flags &= ~NSS_CREATE_NEEDED;
 699  700                  nms->nms_flags |= NSS_CREATE_INPROGRESS;
 700  701                  DTRACE_PROBE2(netstack__create__inprogress,
 701  702                      netstack_t *, ns, int, moduleid);
 702  703                  mutex_exit(&ns->netstack_lock);
 703  704                  mutex_exit(lockp);
 704  705                  dropped = B_TRUE;
 705  706  
 706  707                  ASSERT(ns_reg[moduleid].nr_create != NULL);
 707  708                  stackid = ns->netstack_stackid;
 708  709                  DTRACE_PROBE2(netstack__create__start,
 709  710                      netstackid_t, stackid,
 710  711                      netstack_t *, ns);
 711  712                  result = (ns_reg[moduleid].nr_create)(stackid, ns);
 712  713                  DTRACE_PROBE2(netstack__create__end,
 713  714                      void *, result, netstack_t *, ns);
 714  715  
 715  716                  ASSERT(result != NULL);
 716  717                  mutex_enter(lockp);
 717  718                  mutex_enter(&ns->netstack_lock);
 718  719                  ns->netstack_modules[moduleid] = result;
 719  720                  nms->nms_flags &= ~NSS_CREATE_INPROGRESS;
 720  721                  nms->nms_flags |= NSS_CREATE_COMPLETED;
 721  722                  cv_broadcast(&nms->nms_cv);
 722  723                  DTRACE_PROBE2(netstack__create__completed,
 723  724                      netstack_t *, ns, int, moduleid);
 724  725                  mutex_exit(&ns->netstack_lock);
 725  726                  return (dropped);
 726  727          } else {
 727  728                  mutex_exit(&ns->netstack_lock);
 728  729                  return (dropped);
 729  730          }
 730  731  }
 731  732  
 732  733  /*
 733  734   * Call the shutdown function for the ns and moduleid if SHUTDOWN_NEEDED
 734  735   * is set.
 735  736   * If some other thread gets here first and sets *_INPROGRESS, then
 736  737   * we wait for that thread to complete so that we can ensure that
 737  738   * all the callbacks are done when we've looped over all netstacks/moduleids.
 738  739   *
 739  740   * When we call the shutdown function, we temporarily drop the netstack_lock
 740  741   * held by the caller, and return true to tell the caller it needs to
 741  742   * re-evalute the state.
 742  743   */
 743  744  static boolean_t
 744  745  netstack_apply_shutdown(kmutex_t *lockp, netstack_t *ns, int moduleid)
 745  746  {
 746  747          netstackid_t stackid;
 747  748          void * netstack_module;
 748  749          nm_state_t *nms = &ns->netstack_m_state[moduleid];
 749  750          boolean_t dropped = B_FALSE;
 750  751  
 751  752          ASSERT(MUTEX_HELD(lockp));
 752  753          mutex_enter(&ns->netstack_lock);
 753  754  
 754  755          if (wait_for_nms_inprogress(ns, nms, lockp))
 755  756                  dropped = B_TRUE;
 756  757  
 757  758          if (nms->nms_flags & NSS_SHUTDOWN_NEEDED) {
 758  759                  nms->nms_flags &= ~NSS_SHUTDOWN_NEEDED;
 759  760                  nms->nms_flags |= NSS_SHUTDOWN_INPROGRESS;
 760  761                  DTRACE_PROBE2(netstack__shutdown__inprogress,
 761  762                      netstack_t *, ns, int, moduleid);
 762  763                  mutex_exit(&ns->netstack_lock);
 763  764                  mutex_exit(lockp);
 764  765                  dropped = B_TRUE;
 765  766  
 766  767                  ASSERT(ns_reg[moduleid].nr_shutdown != NULL);
 767  768                  stackid = ns->netstack_stackid;
 768  769                  netstack_module = ns->netstack_modules[moduleid];
 769  770                  DTRACE_PROBE2(netstack__shutdown__start,
 770  771                      netstackid_t, stackid,
 771  772                      void *, netstack_module);
 772  773                  (ns_reg[moduleid].nr_shutdown)(stackid, netstack_module);
 773  774                  DTRACE_PROBE1(netstack__shutdown__end,
 774  775                      netstack_t *, ns);
 775  776  
 776  777                  mutex_enter(lockp);
 777  778                  mutex_enter(&ns->netstack_lock);
 778  779                  nms->nms_flags &= ~NSS_SHUTDOWN_INPROGRESS;
 779  780                  nms->nms_flags |= NSS_SHUTDOWN_COMPLETED;
 780  781                  cv_broadcast(&nms->nms_cv);
 781  782                  DTRACE_PROBE2(netstack__shutdown__completed,
 782  783                      netstack_t *, ns, int, moduleid);
 783  784                  mutex_exit(&ns->netstack_lock);
 784  785                  return (dropped);
 785  786          } else {
 786  787                  mutex_exit(&ns->netstack_lock);
 787  788                  return (dropped);
 788  789          }
 789  790  }
 790  791  
 791  792  /*
 792  793   * Call the destroy function for the ns and moduleid if DESTROY_NEEDED
 793  794   * is set.
 794  795   * If some other thread gets here first and sets *_INPROGRESS, then
 795  796   * we wait for that thread to complete so that we can ensure that
 796  797   * all the callbacks are done when we've looped over all netstacks/moduleids.
 797  798   *
 798  799   * When we call the destroy function, we temporarily drop the netstack_lock
 799  800   * held by the caller, and return true to tell the caller it needs to
 800  801   * re-evalute the state.
 801  802   */
 802  803  static boolean_t
 803  804  netstack_apply_destroy(kmutex_t *lockp, netstack_t *ns, int moduleid)
 804  805  {
 805  806          netstackid_t stackid;
 806  807          void * netstack_module;
 807  808          nm_state_t *nms = &ns->netstack_m_state[moduleid];
 808  809          boolean_t dropped = B_FALSE;
 809  810  
 810  811          ASSERT(MUTEX_HELD(lockp));
 811  812          mutex_enter(&ns->netstack_lock);
 812  813  
 813  814          if (wait_for_nms_inprogress(ns, nms, lockp))
 814  815                  dropped = B_TRUE;
 815  816  
 816  817          if (nms->nms_flags & NSS_DESTROY_NEEDED) {
 817  818                  nms->nms_flags &= ~NSS_DESTROY_NEEDED;
 818  819                  nms->nms_flags |= NSS_DESTROY_INPROGRESS;
 819  820                  DTRACE_PROBE2(netstack__destroy__inprogress,
 820  821                      netstack_t *, ns, int, moduleid);
 821  822                  mutex_exit(&ns->netstack_lock);
 822  823                  mutex_exit(lockp);
 823  824                  dropped = B_TRUE;
 824  825  
 825  826                  ASSERT(ns_reg[moduleid].nr_destroy != NULL);
 826  827                  stackid = ns->netstack_stackid;
 827  828                  netstack_module = ns->netstack_modules[moduleid];
 828  829                  DTRACE_PROBE2(netstack__destroy__start,
 829  830                      netstackid_t, stackid,
 830  831                      void *, netstack_module);
 831  832                  (ns_reg[moduleid].nr_destroy)(stackid, netstack_module);
 832  833                  DTRACE_PROBE1(netstack__destroy__end,
 833  834                      netstack_t *, ns);
 834  835  
 835  836                  mutex_enter(lockp);
 836  837                  mutex_enter(&ns->netstack_lock);
 837  838                  ns->netstack_modules[moduleid] = NULL;
 838  839                  nms->nms_flags &= ~NSS_DESTROY_INPROGRESS;
 839  840                  nms->nms_flags |= NSS_DESTROY_COMPLETED;
 840  841                  cv_broadcast(&nms->nms_cv);
 841  842                  DTRACE_PROBE2(netstack__destroy__completed,
 842  843                      netstack_t *, ns, int, moduleid);
 843  844                  mutex_exit(&ns->netstack_lock);
 844  845                  return (dropped);
 845  846          } else {
 846  847                  mutex_exit(&ns->netstack_lock);
 847  848                  return (dropped);
 848  849          }
 849  850  }
 850  851  
 851  852  /*
 852  853   * If somebody  is creating the netstack (due to a new zone being created)
 853  854   * then we wait for them to complete. This ensures that any additional
 854  855   * netstack_register() doesn't cause the create functions to run out of
 855  856   * order.
 856  857   * Note that we do not need such a global wait in the case of the shutdown
 857  858   * and destroy callbacks, since in that case it is sufficient for both
 858  859   * threads to set NEEDED and wait for INPROGRESS to ensure ordering.
 859  860   * Returns true if lockp was temporarily dropped while waiting.
 860  861   */
 861  862  static boolean_t
 862  863  wait_for_zone_creator(netstack_t *ns, kmutex_t *lockp)
 863  864  {
 864  865          boolean_t dropped = B_FALSE;
 865  866  
 866  867          mutex_enter(&ns->netstack_lock);
 867  868          while (ns->netstack_flags & NSF_ZONE_CREATE) {
 868  869                  DTRACE_PROBE1(netstack__wait__zone__inprogress,
 869  870                      netstack_t *, ns);
 870  871                  if (lockp != NULL) {
 871  872                          dropped = B_TRUE;
 872  873                          mutex_exit(lockp);
 873  874                  }
 874  875                  cv_wait(&ns->netstack_cv, &ns->netstack_lock);
 875  876                  if (lockp != NULL) {
 876  877                          /* First drop netstack_lock to preserve order */
 877  878                          mutex_exit(&ns->netstack_lock);
 878  879                          mutex_enter(lockp);
 879  880                          mutex_enter(&ns->netstack_lock);
 880  881                  }
 881  882          }
 882  883          mutex_exit(&ns->netstack_lock);
 883  884          return (dropped);
 884  885  }
 885  886  
 886  887  /*
 887  888   * Wait for any INPROGRESS flag to be cleared for the netstack/moduleid
 888  889   * combination.
 889  890   * Returns true if lockp was temporarily dropped while waiting.
 890  891   */
 891  892  static boolean_t
 892  893  wait_for_nms_inprogress(netstack_t *ns, nm_state_t *nms, kmutex_t *lockp)
 893  894  {
 894  895          boolean_t dropped = B_FALSE;
 895  896  
 896  897          while (nms->nms_flags & NSS_ALL_INPROGRESS) {
 897  898                  DTRACE_PROBE2(netstack__wait__nms__inprogress,
 898  899                      netstack_t *, ns, nm_state_t *, nms);
 899  900                  if (lockp != NULL) {
 900  901                          dropped = B_TRUE;
 901  902                          mutex_exit(lockp);
 902  903                  }
 903  904                  cv_wait(&nms->nms_cv, &ns->netstack_lock);
 904  905                  if (lockp != NULL) {
 905  906                          /* First drop netstack_lock to preserve order */
 906  907                          mutex_exit(&ns->netstack_lock);
 907  908                          mutex_enter(lockp);
 908  909                          mutex_enter(&ns->netstack_lock);
 909  910                  }
 910  911          }
 911  912          return (dropped);
 912  913  }
 913  914  
 914  915  /*
 915  916   * Get the stack instance used in caller's zone.
 916  917   * Increases the reference count, caller must do a netstack_rele.
 917  918   * It can't be called after zone_destroy() has started.
 918  919   */
 919  920  netstack_t *
 920  921  netstack_get_current(void)
 921  922  {
 922  923          netstack_t *ns;
 923  924  
 924  925          ns = curproc->p_zone->zone_netstack;
 925  926          ASSERT(ns != NULL);
 926  927          if (ns->netstack_flags & (NSF_UNINIT|NSF_CLOSING))
 927  928                  return (NULL);
 928  929  
 929  930          netstack_hold(ns);
 930  931  
 931  932          return (ns);
 932  933  }
 933  934  
 934  935  /*
 935  936   * Find a stack instance given the cred.
 936  937   * This is used by the modules to potentially allow for a future when
 937  938   * something other than the zoneid is used to determine the stack.
 938  939   */
 939  940  netstack_t *
 940  941  netstack_find_by_cred(const cred_t *cr)
 941  942  {
 942  943          zoneid_t zoneid = crgetzoneid(cr);
 943  944  
 944  945          /* Handle the case when cr_zone is NULL */
 945  946          if (zoneid == (zoneid_t)-1)
 946  947                  zoneid = GLOBAL_ZONEID;
 947  948  
 948  949          /* For performance ... */
 949  950          if (curproc->p_zone->zone_id == zoneid)
 950  951                  return (netstack_get_current());
 951  952          else
 952  953                  return (netstack_find_by_zoneid(zoneid));
 953  954  }
 954  955  
 955  956  /*
 956  957   * Find a stack instance given the zoneid.
 957  958   * Increases the reference count if found; caller must do a
 958  959   * netstack_rele().
 959  960   *
 960  961   * If there is no exact match then assume the shared stack instance
 961  962   * matches.
 962  963   *
 963  964   * Skip the unitialized ones.
 964  965   */
 965  966  netstack_t *
 966  967  netstack_find_by_zoneid(zoneid_t zoneid)
 967  968  {
 968  969          netstack_t *ns;
 969  970          zone_t *zone;
 970  971  
 971  972          zone = zone_find_by_id(zoneid);
 972  973  
 973  974          if (zone == NULL)
 974  975                  return (NULL);
 975  976  
 976  977          ns = zone->zone_netstack;
 977  978          ASSERT(ns != NULL);
 978  979          if (ns->netstack_flags & (NSF_UNINIT|NSF_CLOSING))
 979  980                  ns = NULL;
 980  981          else
 981  982                  netstack_hold(ns);
 982  983  
 983  984          zone_rele(zone);
 984  985          return (ns);
 985  986  }
 986  987  
 987  988  /*
 988  989   * Find a stack instance given the zoneid. Can only be called from
 989  990   * the create callback. See the comments in zone_find_by_id_nolock why
 990  991   * that limitation exists.
 991  992   *
 992  993   * Increases the reference count if found; caller must do a
 993  994   * netstack_rele().
 994  995   *
 995  996   * If there is no exact match then assume the shared stack instance
 996  997   * matches.
 997  998   *
 998  999   * Skip the unitialized ones.
 999 1000   */
1000 1001  netstack_t *
1001 1002  netstack_find_by_zoneid_nolock(zoneid_t zoneid)
1002 1003  {
1003 1004          netstack_t *ns;
1004 1005          zone_t *zone;
1005 1006  
1006 1007          zone = zone_find_by_id_nolock(zoneid);
1007 1008  
1008 1009          if (zone == NULL)
1009 1010                  return (NULL);
1010 1011  
1011 1012          ns = zone->zone_netstack;
1012 1013          ASSERT(ns != NULL);
1013 1014  
1014 1015          if (ns->netstack_flags & (NSF_UNINIT|NSF_CLOSING))
1015 1016                  ns = NULL;
1016 1017          else
1017 1018                  netstack_hold(ns);
1018 1019  
1019 1020          /* zone_find_by_id_nolock does not have a hold on the zone */
1020 1021          return (ns);
1021 1022  }
1022 1023  
1023 1024  /*
1024 1025   * Find a stack instance given the stackid with exact match?
1025 1026   * Increases the reference count if found; caller must do a
1026 1027   * netstack_rele().
1027 1028   *
1028 1029   * Skip the unitialized ones.
1029 1030   */
  
    | 
      ↓ open down ↓ | 
    894 lines elided | 
    
      ↑ open up ↑ | 
  
1030 1031  netstack_t *
1031 1032  netstack_find_by_stackid(netstackid_t stackid)
1032 1033  {
1033 1034          netstack_t *ns;
1034 1035  
1035 1036          mutex_enter(&netstack_g_lock);
1036 1037          for (ns = netstack_head; ns != NULL; ns = ns->netstack_next) {
1037 1038                  mutex_enter(&ns->netstack_lock);
1038 1039                  if (ns->netstack_stackid == stackid &&
1039 1040                      !(ns->netstack_flags & (NSF_UNINIT|NSF_CLOSING))) {
     1041 +                        netstack_hold_locked(ns);
1040 1042                          mutex_exit(&ns->netstack_lock);
1041      -                        netstack_hold(ns);
1042 1043                          mutex_exit(&netstack_g_lock);
1043 1044                          return (ns);
1044 1045                  }
1045 1046                  mutex_exit(&ns->netstack_lock);
1046 1047          }
1047 1048          mutex_exit(&netstack_g_lock);
1048 1049          return (NULL);
1049 1050  }
1050 1051  
1051 1052  boolean_t
1052 1053  netstack_inuse_by_stackid(netstackid_t stackid)
1053 1054  {
1054 1055          netstack_t *ns;
1055 1056          boolean_t rval = B_FALSE;
1056 1057  
1057 1058          mutex_enter(&netstack_g_lock);
1058 1059  
1059 1060          for (ns = netstack_head; ns != NULL; ns = ns->netstack_next) {
1060 1061                  if (ns->netstack_stackid == stackid) {
1061 1062                          rval = B_TRUE;
1062 1063                          break;
1063 1064                  }
1064 1065          }
1065 1066  
1066 1067          mutex_exit(&netstack_g_lock);
1067 1068  
1068 1069          return (rval);
1069 1070  }
1070 1071  
1071 1072  
1072 1073  static void
1073 1074  netstack_reap(void *arg)
1074 1075  {
1075 1076          /* Indicate we took a semaphore to get here. */
1076 1077          netstack_reap_work((netstack_t *)arg, B_TRUE);
1077 1078  }
1078 1079  
1079 1080  static void
1080 1081  netstack_reap_intr(void *arg)
1081 1082  {
1082 1083          /* Indicate we did NOT TAKE a semaphore to get here. */
1083 1084          netstack_reap_work((netstack_t *)arg, B_FALSE);
1084 1085  }
1085 1086  
1086 1087  static void
1087 1088  netstack_reap_work(netstack_t *ns, boolean_t semaphore_signal)
1088 1089  {
1089 1090          netstack_t **nsp;
1090 1091          boolean_t found;
1091 1092          int i;
1092 1093  
1093 1094          /*
1094 1095           * Time to call the destroy functions and free up
1095 1096           * the structure
1096 1097           */
1097 1098          netstack_stack_inactive(ns);
1098 1099  
1099 1100          /* Make sure nothing increased the references */
1100 1101          ASSERT(ns->netstack_refcnt == 0);
1101 1102          ASSERT(ns->netstack_numzones == 0);
1102 1103  
1103 1104          /* Finally remove from list of netstacks */
1104 1105          mutex_enter(&netstack_g_lock);
1105 1106          found = B_FALSE;
1106 1107          for (nsp = &netstack_head; *nsp != NULL;
1107 1108               nsp = &(*nsp)->netstack_next) {
1108 1109                  if (*nsp == ns) {
1109 1110                          *nsp = ns->netstack_next;
1110 1111                          ns->netstack_next = NULL;
1111 1112                          found = B_TRUE;
1112 1113                          break;
1113 1114                  }
1114 1115          }
1115 1116          ASSERT(found);
1116 1117          mutex_exit(&netstack_g_lock);
1117 1118  
1118 1119          /* Make sure nothing increased the references */
1119 1120          ASSERT(ns->netstack_refcnt == 0);
1120 1121          ASSERT(ns->netstack_numzones == 0);
1121 1122  
1122 1123          ASSERT(ns->netstack_flags & NSF_CLOSING);
1123 1124  
1124 1125          for (i = 0; i < NS_MAX; i++) {
1125 1126                  nm_state_t *nms = &ns->netstack_m_state[i];
1126 1127  
1127 1128                  cv_destroy(&nms->nms_cv);
1128 1129          }
1129 1130          mutex_destroy(&ns->netstack_lock);
1130 1131          cv_destroy(&ns->netstack_cv);
1131 1132          kmem_free(ns, sizeof (*ns));
1132 1133          /* Allow another reap to be scheduled. */
1133 1134          if (semaphore_signal)
1134 1135                  sema_v(&netstack_reap_limiter);
1135 1136  }
1136 1137  
1137 1138  void
1138 1139  netstack_rele(netstack_t *ns)
1139 1140  {
1140 1141          int refcnt, numzones;
1141 1142  
1142 1143          mutex_enter(&ns->netstack_lock);
1143 1144          ASSERT(ns->netstack_refcnt > 0);
1144 1145          ns->netstack_refcnt--;
1145 1146          /*
1146 1147           * As we drop the lock additional netstack_rele()s can come in
1147 1148           * and decrement the refcnt to zero and free the netstack_t.
1148 1149           * Store pointers in local variables and if we were not the last
1149 1150           * then don't reference the netstack_t after that.
1150 1151           */
1151 1152          refcnt = ns->netstack_refcnt;
1152 1153          numzones = ns->netstack_numzones;
1153 1154          DTRACE_PROBE1(netstack__dec__ref, netstack_t *, ns);
1154 1155          mutex_exit(&ns->netstack_lock);
1155 1156  
1156 1157          if (refcnt == 0 && numzones == 0) {
1157 1158                  boolean_t is_not_intr = !servicing_interrupt();
1158 1159  
1159 1160                  /*
1160 1161                   * Because there are possibilities of kstats being held by
1161 1162                   * callers, which would then be immediately freed, but held up
1162 1163                   * due to kstat's odd reference model recording the thread, we
1163 1164                   * choose to schedule the actual deletion of this netstack as
1164 1165                   * a deferred task on the system taskq.  This way, any
1165 1166                   * store-the-thread-pointer semantics won't trip over
1166 1167                   * themselves.
1167 1168                   *
1168 1169                   * On the off chance this is called in interrupt context, we
1169 1170                   * cannot use the semaphore to enforce rate-limiting.
1170 1171                   */
1171 1172                  if (is_not_intr && sema_tryp(&netstack_reap_limiter) == 0) {
1172 1173                          /*
1173 1174                           * XXX KEBE SAYS inidicate we're slamming against
1174 1175                           * a limit.
1175 1176                           */
1176 1177                          hrtime_t measurement = gethrtime();
1177 1178  
1178 1179                          sema_p(&netstack_reap_limiter);
1179 1180                          /* Caputre delay in ns. */
1180 1181                          DTRACE_PROBE1(netstack__reap__rate__limited,
1181 1182                              hrtime_t *, gethrtime() - measurement);
1182 1183                  }
1183 1184  
1184 1185                  if (taskq_dispatch(system_taskq,
1185 1186                      is_not_intr ? netstack_reap : netstack_reap_intr, ns,
1186 1187                      TQ_NOSLEEP) == NULL) {
1187 1188                          /*
  
    | 
      ↓ open down ↓ | 
    136 lines elided | 
    
      ↑ open up ↑ | 
  
1188 1189                           * Well shoot, why can't we taskq_dispatch?
1189 1190                           * Take our chances with a direct call.
1190 1191                           */
1191 1192                          DTRACE_PROBE1(netstack__reap__taskq__fail,
1192 1193                              netstack_t *, ns);
1193 1194                          netstack_reap_work(ns, is_not_intr);
1194 1195                  }
1195 1196          }
1196 1197  }
1197 1198  
     1199 +static void
     1200 +netstack_hold_locked(netstack_t *ns)
     1201 +{
     1202 +        ASSERT(MUTEX_HELD(&ns->netstack_lock));
     1203 +        ns->netstack_refcnt++;
     1204 +        ASSERT(ns->netstack_refcnt > 0);
     1205 +        DTRACE_PROBE1(netstack__inc__ref, netstack_t *, ns);
     1206 +}
     1207 +
1198 1208  void
1199 1209  netstack_hold(netstack_t *ns)
1200 1210  {
1201 1211          mutex_enter(&ns->netstack_lock);
1202      -        ns->netstack_refcnt++;
1203      -        ASSERT(ns->netstack_refcnt > 0);
     1212 +        netstack_hold_locked(ns);
1204 1213          mutex_exit(&ns->netstack_lock);
1205      -        DTRACE_PROBE1(netstack__inc__ref, netstack_t *, ns);
1206 1214  }
1207 1215  
1208 1216  /*
1209 1217   * To support kstat_create_netstack() using kstat_zone_add we need
1210 1218   * to track both
1211 1219   *  - all zoneids that use the global/shared stack
1212 1220   *  - all kstats that have been added for the shared stack
1213 1221   */
1214 1222  kstat_t *
1215 1223  kstat_create_netstack(char *ks_module, int ks_instance, char *ks_name,
1216 1224      char *ks_class, uchar_t ks_type, uint_t ks_ndata, uchar_t ks_flags,
1217 1225      netstackid_t ks_netstackid)
1218 1226  {
1219 1227          kstat_t *ks;
1220 1228  
1221 1229          if (ks_netstackid == GLOBAL_NETSTACKID) {
1222 1230                  ks = kstat_create_zone(ks_module, ks_instance, ks_name,
1223 1231                      ks_class, ks_type, ks_ndata, ks_flags, GLOBAL_ZONEID);
1224 1232                  if (ks != NULL)
1225 1233                          netstack_shared_kstat_add(ks);
1226 1234                  return (ks);
1227 1235          } else {
1228 1236                  zoneid_t zoneid = ks_netstackid;
1229 1237  
1230 1238                  return (kstat_create_zone(ks_module, ks_instance, ks_name,
1231 1239                      ks_class, ks_type, ks_ndata, ks_flags, zoneid));
1232 1240          }
1233 1241  }
1234 1242  
1235 1243  void
1236 1244  kstat_delete_netstack(kstat_t *ks, netstackid_t ks_netstackid)
1237 1245  {
1238 1246          if (ks_netstackid == GLOBAL_NETSTACKID) {
1239 1247                  netstack_shared_kstat_remove(ks);
1240 1248          }
1241 1249          kstat_delete(ks);
1242 1250  }
1243 1251  
1244 1252  static void
1245 1253  netstack_shared_zone_add(zoneid_t zoneid)
1246 1254  {
1247 1255          struct shared_zone_list *sz;
1248 1256          struct shared_kstat_list *sk;
1249 1257  
1250 1258          sz = (struct shared_zone_list *)kmem_zalloc(sizeof (*sz), KM_SLEEP);
1251 1259          sz->sz_zoneid = zoneid;
1252 1260  
1253 1261          /* Insert in list */
1254 1262          mutex_enter(&netstack_shared_lock);
1255 1263          sz->sz_next = netstack_shared_zones;
1256 1264          netstack_shared_zones = sz;
1257 1265  
1258 1266          /*
1259 1267           * Perform kstat_zone_add for each existing shared stack kstat.
1260 1268           * Note: Holds netstack_shared_lock lock across kstat_zone_add.
1261 1269           */
1262 1270          for (sk = netstack_shared_kstats; sk != NULL; sk = sk->sk_next) {
1263 1271                  kstat_zone_add(sk->sk_kstat, zoneid);
1264 1272          }
1265 1273          mutex_exit(&netstack_shared_lock);
1266 1274  }
1267 1275  
1268 1276  static void
1269 1277  netstack_shared_zone_remove(zoneid_t zoneid)
1270 1278  {
1271 1279          struct shared_zone_list **szp, *sz;
1272 1280          struct shared_kstat_list *sk;
1273 1281  
1274 1282          /* Find in list */
1275 1283          mutex_enter(&netstack_shared_lock);
1276 1284          sz = NULL;
1277 1285          for (szp = &netstack_shared_zones; *szp != NULL;
1278 1286              szp = &((*szp)->sz_next)) {
1279 1287                  if ((*szp)->sz_zoneid == zoneid) {
1280 1288                          sz = *szp;
1281 1289                          break;
1282 1290                  }
1283 1291          }
1284 1292          /* We must find it */
1285 1293          ASSERT(sz != NULL);
1286 1294          *szp = sz->sz_next;
1287 1295          sz->sz_next = NULL;
1288 1296  
1289 1297          /*
1290 1298           * Perform kstat_zone_remove for each existing shared stack kstat.
1291 1299           * Note: Holds netstack_shared_lock lock across kstat_zone_remove.
1292 1300           */
1293 1301          for (sk = netstack_shared_kstats; sk != NULL; sk = sk->sk_next) {
1294 1302                  kstat_zone_remove(sk->sk_kstat, zoneid);
1295 1303          }
1296 1304          mutex_exit(&netstack_shared_lock);
1297 1305  
1298 1306          kmem_free(sz, sizeof (*sz));
1299 1307  }
1300 1308  
1301 1309  static void
1302 1310  netstack_shared_kstat_add(kstat_t *ks)
1303 1311  {
1304 1312          struct shared_zone_list *sz;
1305 1313          struct shared_kstat_list *sk;
1306 1314  
1307 1315          sk = (struct shared_kstat_list *)kmem_zalloc(sizeof (*sk), KM_SLEEP);
1308 1316          sk->sk_kstat = ks;
1309 1317  
1310 1318          /* Insert in list */
1311 1319          mutex_enter(&netstack_shared_lock);
1312 1320          sk->sk_next = netstack_shared_kstats;
1313 1321          netstack_shared_kstats = sk;
1314 1322  
1315 1323          /*
1316 1324           * Perform kstat_zone_add for each existing shared stack zone.
1317 1325           * Note: Holds netstack_shared_lock lock across kstat_zone_add.
1318 1326           */
1319 1327          for (sz = netstack_shared_zones; sz != NULL; sz = sz->sz_next) {
1320 1328                  kstat_zone_add(ks, sz->sz_zoneid);
1321 1329          }
1322 1330          mutex_exit(&netstack_shared_lock);
1323 1331  }
1324 1332  
1325 1333  static void
1326 1334  netstack_shared_kstat_remove(kstat_t *ks)
1327 1335  {
1328 1336          struct shared_zone_list *sz;
1329 1337          struct shared_kstat_list **skp, *sk;
1330 1338  
1331 1339          /* Find in list */
1332 1340          mutex_enter(&netstack_shared_lock);
1333 1341          sk = NULL;
1334 1342          for (skp = &netstack_shared_kstats; *skp != NULL;
1335 1343              skp = &((*skp)->sk_next)) {
1336 1344                  if ((*skp)->sk_kstat == ks) {
1337 1345                          sk = *skp;
1338 1346                          break;
1339 1347                  }
1340 1348          }
1341 1349          /* Must find it */
1342 1350          ASSERT(sk != NULL);
1343 1351          *skp = sk->sk_next;
1344 1352          sk->sk_next = NULL;
1345 1353  
1346 1354          /*
1347 1355           * Perform kstat_zone_remove for each existing shared stack kstat.
1348 1356           * Note: Holds netstack_shared_lock lock across kstat_zone_remove.
1349 1357           */
1350 1358          for (sz = netstack_shared_zones; sz != NULL; sz = sz->sz_next) {
1351 1359                  kstat_zone_remove(ks, sz->sz_zoneid);
1352 1360          }
1353 1361          mutex_exit(&netstack_shared_lock);
1354 1362          kmem_free(sk, sizeof (*sk));
1355 1363  }
1356 1364  
1357 1365  /*
1358 1366   * If a zoneid is part of the shared zone, return true
1359 1367   */
1360 1368  static boolean_t
1361 1369  netstack_find_shared_zoneid(zoneid_t zoneid)
1362 1370  {
1363 1371          struct shared_zone_list *sz;
1364 1372  
1365 1373          mutex_enter(&netstack_shared_lock);
1366 1374          for (sz = netstack_shared_zones; sz != NULL; sz = sz->sz_next) {
1367 1375                  if (sz->sz_zoneid == zoneid) {
1368 1376                          mutex_exit(&netstack_shared_lock);
1369 1377                          return (B_TRUE);
1370 1378                  }
1371 1379          }
1372 1380          mutex_exit(&netstack_shared_lock);
1373 1381          return (B_FALSE);
1374 1382  }
1375 1383  
1376 1384  /*
1377 1385   * Hide the fact that zoneids and netstackids are allocated from
1378 1386   * the same space in the current implementation.
1379 1387   * We currently do not check that the stackid/zoneids are valid, since there
1380 1388   * is no need for that. But this should only be done for ids that are
1381 1389   * valid.
1382 1390   */
1383 1391  zoneid_t
1384 1392  netstackid_to_zoneid(netstackid_t stackid)
1385 1393  {
1386 1394          return (stackid);
1387 1395  }
1388 1396  
1389 1397  netstackid_t
1390 1398  zoneid_to_netstackid(zoneid_t zoneid)
1391 1399  {
1392 1400          if (netstack_find_shared_zoneid(zoneid))
1393 1401                  return (GLOBAL_ZONEID);
1394 1402          else
1395 1403                  return (zoneid);
1396 1404  }
1397 1405  
1398 1406  zoneid_t
1399 1407  netstack_get_zoneid(netstack_t *ns)
1400 1408  {
1401 1409          return (netstackid_to_zoneid(ns->netstack_stackid));
1402 1410  }
1403 1411  
1404 1412  /*
1405 1413   * Simplistic support for walking all the handles.
1406 1414   * Example usage:
1407 1415   *      netstack_handle_t nh;
1408 1416   *      netstack_t *ns;
1409 1417   *
1410 1418   *      netstack_next_init(&nh);
1411 1419   *      while ((ns = netstack_next(&nh)) != NULL) {
1412 1420   *              do something;
1413 1421   *              netstack_rele(ns);
1414 1422   *      }
1415 1423   *      netstack_next_fini(&nh);
1416 1424   */
1417 1425  void
1418 1426  netstack_next_init(netstack_handle_t *handle)
1419 1427  {
1420 1428          *handle = 0;
1421 1429  }
1422 1430  
1423 1431  /* ARGSUSED */
1424 1432  void
1425 1433  netstack_next_fini(netstack_handle_t *handle)
1426 1434  {
1427 1435  }
1428 1436  
1429 1437  netstack_t *
1430 1438  netstack_next(netstack_handle_t *handle)
1431 1439  {
1432 1440          netstack_t *ns;
1433 1441          int i, end;
1434 1442  
1435 1443          end = *handle;
1436 1444          /* Walk skipping *handle number of instances */
1437 1445  
1438 1446          /* Look if there is a matching stack instance */
1439 1447          mutex_enter(&netstack_g_lock);
1440 1448          ns = netstack_head;
1441 1449          for (i = 0; i < end; i++) {
1442 1450                  if (ns == NULL)
1443 1451                          break;
1444 1452                  ns = ns->netstack_next;
1445 1453          }
1446 1454          /* skip those with that aren't really here */
1447 1455          while (ns != NULL) {
1448 1456                  mutex_enter(&ns->netstack_lock);
1449 1457                  if ((ns->netstack_flags & (NSF_UNINIT|NSF_CLOSING)) == 0) {
1450 1458                          mutex_exit(&ns->netstack_lock);
1451 1459                          break;
1452 1460                  }
1453 1461                  mutex_exit(&ns->netstack_lock);
1454 1462                  end++;
1455 1463                  ns = ns->netstack_next;
1456 1464          }
1457 1465          if (ns != NULL) {
1458 1466                  *handle = end + 1;
1459 1467                  netstack_hold(ns);
1460 1468          }
1461 1469          mutex_exit(&netstack_g_lock);
1462 1470          return (ns);
1463 1471  }
  
    | 
      ↓ open down ↓ | 
    248 lines elided | 
    
      ↑ open up ↑ | 
  
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX