Print this page
OS-XXXX netstack_find_by_stackid() drops-and-reacquires
OS-5423 deadlock between netstack teardown and kstat read
@@ -35,10 +35,11 @@
#include <sys/debug.h>
#include <sys/sdt.h>
#include <sys/mutex.h>
#include <sys/bitmap.h>
#include <sys/atomic.h>
+#include <sys/sunddi.h>
#include <sys/kobj.h>
#include <sys/disp.h>
#include <vm/seg_kmem.h>
#include <sys/zone.h>
#include <sys/netstack.h>
@@ -120,16 +121,23 @@
static boolean_t netstack_apply_destroy(kmutex_t *, netstack_t *, int);
static boolean_t wait_for_zone_creator(netstack_t *, kmutex_t *);
static boolean_t wait_for_nms_inprogress(netstack_t *, nm_state_t *,
kmutex_t *);
+static void netstack_hold_locked(netstack_t *);
+static void netstack_reap_work(netstack_t *, boolean_t);
+ksema_t netstack_reap_limiter;
+
void
netstack_init(void)
{
mutex_init(&netstack_g_lock, NULL, MUTEX_DEFAULT, NULL);
mutex_init(&netstack_shared_lock, NULL, MUTEX_DEFAULT, NULL);
+ /* XXX KEBE SAYS hard-coded constant needs to be fixed. */
+ sema_init(&netstack_reap_limiter, 1024, NULL, SEMA_DRIVER, NULL);
+
netstack_initialized = 1;
/*
* We want to be informed each time a zone is created or
* destroyed in the kernel, so we can maintain the
@@ -1028,12 +1036,12 @@
mutex_enter(&netstack_g_lock);
for (ns = netstack_head; ns != NULL; ns = ns->netstack_next) {
mutex_enter(&ns->netstack_lock);
if (ns->netstack_stackid == stackid &&
!(ns->netstack_flags & (NSF_UNINIT|NSF_CLOSING))) {
+ netstack_hold_locked(ns);
mutex_exit(&ns->netstack_lock);
- netstack_hold(ns);
mutex_exit(&netstack_g_lock);
return (ns);
}
mutex_exit(&ns->netstack_lock);
}
@@ -1059,34 +1067,33 @@
mutex_exit(&netstack_g_lock);
return (rval);
}
-void
-netstack_rele(netstack_t *ns)
+
+static void
+netstack_reap(void *arg)
{
+ /* Indicate we took a semaphore to get here. */
+ netstack_reap_work((netstack_t *)arg, B_TRUE);
+}
+
+static void
+netstack_reap_intr(void *arg)
+{
+ /* Indicate we did NOT TAKE a semaphore to get here. */
+ netstack_reap_work((netstack_t *)arg, B_FALSE);
+}
+
+static void
+netstack_reap_work(netstack_t *ns, boolean_t semaphore_signal)
+{
netstack_t **nsp;
boolean_t found;
- int refcnt, numzones;
int i;
- mutex_enter(&ns->netstack_lock);
- ASSERT(ns->netstack_refcnt > 0);
- ns->netstack_refcnt--;
/*
- * As we drop the lock additional netstack_rele()s can come in
- * and decrement the refcnt to zero and free the netstack_t.
- * Store pointers in local variables and if we were not the last
- * then don't reference the netstack_t after that.
- */
- refcnt = ns->netstack_refcnt;
- numzones = ns->netstack_numzones;
- DTRACE_PROBE1(netstack__dec__ref, netstack_t *, ns);
- mutex_exit(&ns->netstack_lock);
-
- if (refcnt == 0 && numzones == 0) {
- /*
* Time to call the destroy functions and free up
* the structure
*/
netstack_stack_inactive(ns);
@@ -1121,23 +1128,93 @@
cv_destroy(&nms->nms_cv);
}
mutex_destroy(&ns->netstack_lock);
cv_destroy(&ns->netstack_cv);
kmem_free(ns, sizeof (*ns));
- }
+ /* Allow another reap to be scheduled. */
+ if (semaphore_signal)
+ sema_v(&netstack_reap_limiter);
}
void
-netstack_hold(netstack_t *ns)
+netstack_rele(netstack_t *ns)
{
+ int refcnt, numzones;
+
mutex_enter(&ns->netstack_lock);
- ns->netstack_refcnt++;
ASSERT(ns->netstack_refcnt > 0);
+ ns->netstack_refcnt--;
+ /*
+ * As we drop the lock additional netstack_rele()s can come in
+ * and decrement the refcnt to zero and free the netstack_t.
+ * Store pointers in local variables and if we were not the last
+ * then don't reference the netstack_t after that.
+ */
+ refcnt = ns->netstack_refcnt;
+ numzones = ns->netstack_numzones;
+ DTRACE_PROBE1(netstack__dec__ref, netstack_t *, ns);
mutex_exit(&ns->netstack_lock);
+
+ if (refcnt == 0 && numzones == 0) {
+ boolean_t is_not_intr = !servicing_interrupt();
+
+ /*
+ * Because there are possibilities of kstats being held by
+ * callers, which would then be immediately freed, but held up
+ * due to kstat's odd reference model recording the thread, we
+ * choose to schedule the actual deletion of this netstack as
+ * a deferred task on the system taskq. This way, any
+ * store-the-thread-pointer semantics won't trip over
+ * themselves.
+ *
+ * On the off chance this is called in interrupt context, we
+ * cannot use the semaphore to enforce rate-limiting.
+ */
+ if (is_not_intr && sema_tryp(&netstack_reap_limiter) == 0) {
+ /*
+ * XXX KEBE SAYS inidicate we're slamming against
+ * a limit.
+ */
+ hrtime_t measurement = gethrtime();
+
+ sema_p(&netstack_reap_limiter);
+ /* Caputre delay in ns. */
+ DTRACE_PROBE1(netstack__reap__rate__limited,
+ hrtime_t *, gethrtime() - measurement);
+ }
+
+ if (taskq_dispatch(system_taskq,
+ is_not_intr ? netstack_reap : netstack_reap_intr, ns,
+ TQ_NOSLEEP) == NULL) {
+ /*
+ * Well shoot, why can't we taskq_dispatch?
+ * Take our chances with a direct call.
+ */
+ DTRACE_PROBE1(netstack__reap__taskq__fail,
+ netstack_t *, ns);
+ netstack_reap_work(ns, is_not_intr);
+ }
+ }
+}
+
+static void
+netstack_hold_locked(netstack_t *ns)
+{
+ ASSERT(MUTEX_HELD(&ns->netstack_lock));
+ ns->netstack_refcnt++;
+ ASSERT(ns->netstack_refcnt > 0);
DTRACE_PROBE1(netstack__inc__ref, netstack_t *, ns);
}
+void
+netstack_hold(netstack_t *ns)
+{
+ mutex_enter(&ns->netstack_lock);
+ netstack_hold_locked(ns);
+ mutex_exit(&ns->netstack_lock);
+}
+
/*
* To support kstat_create_netstack() using kstat_zone_add we need
* to track both
* - all zoneids that use the global/shared stack
* - all kstats that have been added for the shared stack