Print this page
8900 deadlock between netstack teardown and kstat read
Reviewed by: Jason King <jason.king@joyent.com>
Reviewed by: Ryan Zezeski <rpz@joyent.com>
Reviewed by: Robert Mustacchi <rm@joyent.com>

*** 20,30 **** */ /* * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. ! * Copyright (c) 2016, Joyent, Inc. All rights reserved. */ #include <sys/param.h> #include <sys/sysmacros.h> #include <sys/vm.h> --- 20,30 ---- */ /* * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. ! * Copyright (c) 2017, Joyent, Inc. All rights reserved. */ #include <sys/param.h> #include <sys/sysmacros.h> #include <sys/vm.h>
*** 35,44 **** --- 35,45 ---- #include <sys/debug.h> #include <sys/sdt.h> #include <sys/mutex.h> #include <sys/bitmap.h> #include <sys/atomic.h> + #include <sys/sunddi.h> #include <sys/kobj.h> #include <sys/disp.h> #include <vm/seg_kmem.h> #include <sys/zone.h> #include <sys/netstack.h>
*** 120,135 **** --- 121,146 ---- static boolean_t netstack_apply_destroy(kmutex_t *, netstack_t *, int); static boolean_t wait_for_zone_creator(netstack_t *, kmutex_t *); static boolean_t wait_for_nms_inprogress(netstack_t *, nm_state_t *, kmutex_t *); + static ksema_t netstack_reap_limiter; + /* + * Hard-coded constant, but since this is not tunable in real-time, it seems + * making it an /etc/system tunable is better than nothing. + */ + uint_t netstack_outstanding_reaps = 1024; + void netstack_init(void) { mutex_init(&netstack_g_lock, NULL, MUTEX_DEFAULT, NULL); mutex_init(&netstack_shared_lock, NULL, MUTEX_DEFAULT, NULL); + sema_init(&netstack_reap_limiter, netstack_outstanding_reaps, NULL, + SEMA_DRIVER, NULL); + netstack_initialized = 1; /* * We want to be informed each time a zone is created or * destroyed in the kernel, so we can maintain the
*** 1059,1092 **** mutex_exit(&netstack_g_lock); return (rval); } ! void ! netstack_rele(netstack_t *ns) { ! netstack_t **nsp; boolean_t found; - int refcnt, numzones; int i; - mutex_enter(&ns->netstack_lock); - ASSERT(ns->netstack_refcnt > 0); - ns->netstack_refcnt--; /* - * As we drop the lock additional netstack_rele()s can come in - * and decrement the refcnt to zero and free the netstack_t. - * Store pointers in local variables and if we were not the last - * then don't reference the netstack_t after that. - */ - refcnt = ns->netstack_refcnt; - numzones = ns->netstack_numzones; - DTRACE_PROBE1(netstack__dec__ref, netstack_t *, ns); - mutex_exit(&ns->netstack_lock); - - if (refcnt == 0 && numzones == 0) { - /* * Time to call the destroy functions and free up * the structure */ netstack_stack_inactive(ns); --- 1070,1088 ---- mutex_exit(&netstack_g_lock); return (rval); } ! ! static void ! netstack_reap(void *arg) { ! netstack_t **nsp, *ns = (netstack_t *)arg; boolean_t found; int i; /* * Time to call the destroy functions and free up * the structure */ netstack_stack_inactive(ns);
*** 1121,1131 **** --- 1117,1177 ---- cv_destroy(&nms->nms_cv); } mutex_destroy(&ns->netstack_lock); cv_destroy(&ns->netstack_cv); kmem_free(ns, sizeof (*ns)); + /* Allow another reap to be scheduled. */ + sema_v(&netstack_reap_limiter); + } + + void + netstack_rele(netstack_t *ns) + { + int refcnt, numzones; + + mutex_enter(&ns->netstack_lock); + ASSERT(ns->netstack_refcnt > 0); + ns->netstack_refcnt--; + /* + * As we drop the lock additional netstack_rele()s can come in + * and decrement the refcnt to zero and free the netstack_t. + * Store pointers in local variables and if we were not the last + * then don't reference the netstack_t after that. + */ + refcnt = ns->netstack_refcnt; + numzones = ns->netstack_numzones; + DTRACE_PROBE1(netstack__dec__ref, netstack_t *, ns); + mutex_exit(&ns->netstack_lock); + + if (refcnt == 0 && numzones == 0) { + /* + * Because there are possibilities of re-entrancy in various + * netstack structures by callers, which might cause a lock up + * due to odd reference models, or other factors, we choose to + * schedule the actual deletion of this netstack as a deferred + * task on the system taskq. This way, any such reference + * models won't trip over themselves. + * + * Assume we aren't in a high-priority interrupt context, so + * we can use KM_SLEEP and semaphores. + */ + if (sema_tryp(&netstack_reap_limiter) == 0) { + /* + * Indicate we're slamming against a limit. + */ + hrtime_t measurement = gethrtime(); + + sema_p(&netstack_reap_limiter); + /* Capture delay in ns. */ + DTRACE_PROBE1(netstack__reap__rate__limited, + hrtime_t, gethrtime() - measurement); } + + /* TQ_SLEEP should prevent taskq_dispatch() from failing. */ + (void) taskq_dispatch(system_taskq, netstack_reap, ns, + TQ_SLEEP); + } } void netstack_hold(netstack_t *ns) {