8900 Cdiff usr/src/uts/common/os/netstack.c

Print this page

8900 deadlock between netstack teardown and kstat read
Reviewed by: Jason King <jason.king@joyent.com>
Reviewed by: Ryan Zezeski <rpz@joyent.com>
Reviewed by: Robert Mustacchi <rm@joyent.com>


*** 20,30 ****
   */
  
  /*
   * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
   * Use is subject to license terms.
!  * Copyright (c) 2016, Joyent, Inc.  All rights reserved.
   */
  
  #include <sys/param.h>
  #include <sys/sysmacros.h>
  #include <sys/vm.h>
--- 20,30 ----
   */
  
  /*
   * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
   * Use is subject to license terms.
!  * Copyright (c) 2017, Joyent, Inc.  All rights reserved.
   */
  
  #include <sys/param.h>
  #include <sys/sysmacros.h>
  #include <sys/vm.h>
*** 35,44 ****
--- 35,45 ----
  #include <sys/debug.h>
  #include <sys/sdt.h>
  #include <sys/mutex.h>
  #include <sys/bitmap.h>
  #include <sys/atomic.h>
+ #include <sys/sunddi.h>
  #include <sys/kobj.h>
  #include <sys/disp.h>
  #include <vm/seg_kmem.h>
  #include <sys/zone.h>
  #include <sys/netstack.h>
*** 120,135 ****
--- 121,146 ----
  static boolean_t netstack_apply_destroy(kmutex_t *, netstack_t *, int);
  static boolean_t wait_for_zone_creator(netstack_t *, kmutex_t *);
  static boolean_t wait_for_nms_inprogress(netstack_t *, nm_state_t *,
      kmutex_t *);
  
+ static ksema_t netstack_reap_limiter;
+ /*
+  * Hard-coded constant, but since this is not tunable in real-time, it seems
+  * making it an /etc/system tunable is better than nothing.
+  */
+ uint_t netstack_outstanding_reaps = 1024;
+ 
  void
  netstack_init(void)
  {
          mutex_init(&netstack_g_lock, NULL, MUTEX_DEFAULT, NULL);
          mutex_init(&netstack_shared_lock, NULL, MUTEX_DEFAULT, NULL);
  
+         sema_init(&netstack_reap_limiter, netstack_outstanding_reaps, NULL,
+             SEMA_DRIVER, NULL);
+ 
          netstack_initialized = 1;
  
          /*
           * We want to be informed each time a zone is created or
           * destroyed in the kernel, so we can maintain the
*** 1059,1092 ****
          mutex_exit(&netstack_g_lock);
  
          return (rval);
  }
  
! void
! netstack_rele(netstack_t *ns)
  {
!         netstack_t **nsp;
          boolean_t found;
-         int refcnt, numzones;
          int i;
  
-         mutex_enter(&ns->netstack_lock);
-         ASSERT(ns->netstack_refcnt > 0);
-         ns->netstack_refcnt--;
          /*
-          * As we drop the lock additional netstack_rele()s can come in
-          * and decrement the refcnt to zero and free the netstack_t.
-          * Store pointers in local variables and if we were not the last
-          * then don't reference the netstack_t after that.
-          */
-         refcnt = ns->netstack_refcnt;
-         numzones = ns->netstack_numzones;
-         DTRACE_PROBE1(netstack__dec__ref, netstack_t *, ns);
-         mutex_exit(&ns->netstack_lock);
- 
-         if (refcnt == 0 && numzones == 0) {
-                 /*
                   * Time to call the destroy functions and free up
                   * the structure
                   */
                  netstack_stack_inactive(ns);
  
--- 1070,1088 ----
          mutex_exit(&netstack_g_lock);
  
          return (rval);
  }
  
! 
! static void
! netstack_reap(void *arg)
  {
!         netstack_t **nsp, *ns = (netstack_t *)arg;
          boolean_t found;
          int i;
  
          /*
           * Time to call the destroy functions and free up
           * the structure
           */
          netstack_stack_inactive(ns);
  
*** 1121,1131 ****
--- 1117,1177 ----
                  cv_destroy(&nms->nms_cv);
          }
          mutex_destroy(&ns->netstack_lock);
          cv_destroy(&ns->netstack_cv);
          kmem_free(ns, sizeof (*ns));
+         /* Allow another reap to be scheduled. */
+         sema_v(&netstack_reap_limiter);
+ }
+ 
+ void
+ netstack_rele(netstack_t *ns)
+ {
+         int refcnt, numzones;
+ 
+         mutex_enter(&ns->netstack_lock);
+         ASSERT(ns->netstack_refcnt > 0);
+         ns->netstack_refcnt--;
+         /*
+          * As we drop the lock additional netstack_rele()s can come in
+          * and decrement the refcnt to zero and free the netstack_t.
+          * Store pointers in local variables and if we were not the last
+          * then don't reference the netstack_t after that.
+          */
+         refcnt = ns->netstack_refcnt;
+         numzones = ns->netstack_numzones;
+         DTRACE_PROBE1(netstack__dec__ref, netstack_t *, ns);
+         mutex_exit(&ns->netstack_lock);
+ 
+         if (refcnt == 0 && numzones == 0) {
+                 /*
+                  * Because there are possibilities of re-entrancy in various
+                  * netstack structures by callers, which might cause a lock up
+                  * due to odd reference models, or other factors, we choose to
+                  * schedule the actual deletion of this netstack as a deferred
+                  * task on the system taskq.  This way, any such reference
+                  * models won't trip over themselves.
+                  *
+                  * Assume we aren't in a high-priority interrupt context, so
+                  * we can use KM_SLEEP and semaphores.
+                  */
+                 if (sema_tryp(&netstack_reap_limiter) == 0) {
+                         /*
+                          * Indicate we're slamming against a limit.
+                          */
+                         hrtime_t measurement = gethrtime();
+ 
+                         sema_p(&netstack_reap_limiter);
+                         /* Capture delay in ns. */
+                         DTRACE_PROBE1(netstack__reap__rate__limited,
+                             hrtime_t, gethrtime() - measurement);
                  }
+ 
+                 /* TQ_SLEEP should prevent taskq_dispatch() from failing. */
+                 (void) taskq_dispatch(system_taskq, netstack_reap, ns,
+                     TQ_SLEEP);
+         }
  }
  
  void
  netstack_hold(netstack_t *ns)
  {