Print this page
8900 deadlock between netstack teardown and kstat read
Reviewed by: Jason King <jason.king@joyent.com>
Reviewed by: Ryan Zezeski <rpz@joyent.com>
Reviewed by: Robert Mustacchi <rm@joyent.com>
@@ -20,11 +20,11 @@
*/
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
- * Copyright (c) 2016, Joyent, Inc. All rights reserved.
+ * Copyright (c) 2017, Joyent, Inc. All rights reserved.
*/
#include <sys/param.h>
#include <sys/sysmacros.h>
#include <sys/vm.h>
@@ -35,10 +35,11 @@
#include <sys/debug.h>
#include <sys/sdt.h>
#include <sys/mutex.h>
#include <sys/bitmap.h>
#include <sys/atomic.h>
+#include <sys/sunddi.h>
#include <sys/kobj.h>
#include <sys/disp.h>
#include <vm/seg_kmem.h>
#include <sys/zone.h>
#include <sys/netstack.h>
@@ -120,16 +121,26 @@
static boolean_t netstack_apply_destroy(kmutex_t *, netstack_t *, int);
static boolean_t wait_for_zone_creator(netstack_t *, kmutex_t *);
static boolean_t wait_for_nms_inprogress(netstack_t *, nm_state_t *,
kmutex_t *);
+static ksema_t netstack_reap_limiter;
+/*
+ * Hard-coded constant, but since this is not tunable in real-time, it seems
+ * making it an /etc/system tunable is better than nothing.
+ */
+uint_t netstack_outstanding_reaps = 1024;
+
void
netstack_init(void)
{
mutex_init(&netstack_g_lock, NULL, MUTEX_DEFAULT, NULL);
mutex_init(&netstack_shared_lock, NULL, MUTEX_DEFAULT, NULL);
+ sema_init(&netstack_reap_limiter, netstack_outstanding_reaps, NULL,
+ SEMA_DRIVER, NULL);
+
netstack_initialized = 1;
/*
* We want to be informed each time a zone is created or
* destroyed in the kernel, so we can maintain the
@@ -1059,34 +1070,19 @@
mutex_exit(&netstack_g_lock);
return (rval);
}
-void
-netstack_rele(netstack_t *ns)
+
+static void
+netstack_reap(void *arg)
{
- netstack_t **nsp;
+ netstack_t **nsp, *ns = (netstack_t *)arg;
boolean_t found;
- int refcnt, numzones;
int i;
- mutex_enter(&ns->netstack_lock);
- ASSERT(ns->netstack_refcnt > 0);
- ns->netstack_refcnt--;
/*
- * As we drop the lock additional netstack_rele()s can come in
- * and decrement the refcnt to zero and free the netstack_t.
- * Store pointers in local variables and if we were not the last
- * then don't reference the netstack_t after that.
- */
- refcnt = ns->netstack_refcnt;
- numzones = ns->netstack_numzones;
- DTRACE_PROBE1(netstack__dec__ref, netstack_t *, ns);
- mutex_exit(&ns->netstack_lock);
-
- if (refcnt == 0 && numzones == 0) {
- /*
* Time to call the destroy functions and free up
* the structure
*/
netstack_stack_inactive(ns);
@@ -1121,11 +1117,61 @@
cv_destroy(&nms->nms_cv);
}
mutex_destroy(&ns->netstack_lock);
cv_destroy(&ns->netstack_cv);
kmem_free(ns, sizeof (*ns));
+ /* Allow another reap to be scheduled. */
+ sema_v(&netstack_reap_limiter);
+}
+
+void
+netstack_rele(netstack_t *ns)
+{
+ int refcnt, numzones;
+
+ mutex_enter(&ns->netstack_lock);
+ ASSERT(ns->netstack_refcnt > 0);
+ ns->netstack_refcnt--;
+ /*
+ * As we drop the lock additional netstack_rele()s can come in
+ * and decrement the refcnt to zero and free the netstack_t.
+ * Store pointers in local variables and if we were not the last
+ * then don't reference the netstack_t after that.
+ */
+ refcnt = ns->netstack_refcnt;
+ numzones = ns->netstack_numzones;
+ DTRACE_PROBE1(netstack__dec__ref, netstack_t *, ns);
+ mutex_exit(&ns->netstack_lock);
+
+ if (refcnt == 0 && numzones == 0) {
+ /*
+ * Because there are possibilities of re-entrancy in various
+ * netstack structures by callers, which might cause a lock up
+ * due to odd reference models, or other factors, we choose to
+ * schedule the actual deletion of this netstack as a deferred
+ * task on the system taskq. This way, any such reference
+ * models won't trip over themselves.
+ *
+ * Assume we aren't in a high-priority interrupt context, so
+ * we can use KM_SLEEP and semaphores.
+ */
+ if (sema_tryp(&netstack_reap_limiter) == 0) {
+ /*
+ * Indicate we're slamming against a limit.
+ */
+ hrtime_t measurement = gethrtime();
+
+ sema_p(&netstack_reap_limiter);
+ /* Capture delay in ns. */
+ DTRACE_PROBE1(netstack__reap__rate__limited,
+ hrtime_t, gethrtime() - measurement);
}
+
+ /* TQ_SLEEP should prevent taskq_dispatch() from failing. */
+ (void) taskq_dispatch(system_taskq, netstack_reap, ns,
+ TQ_SLEEP);
+ }
}
void
netstack_hold(netstack_t *ns)
{