Print this page
8900 deadlock between netstack teardown and kstat read
Reviewed by: Jason King <jason.king@joyent.com>
Reviewed by: Ryan Zezeski <rpz@joyent.com>
Reviewed by: Robert Mustacchi <rm@joyent.com>


   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  24  * Use is subject to license terms.
  25  * Copyright (c) 2016, Joyent, Inc.  All rights reserved.
  26  */
  27 
  28 #include <sys/param.h>
  29 #include <sys/sysmacros.h>
  30 #include <sys/vm.h>
  31 #include <sys/proc.h>
  32 #include <sys/tuneable.h>
  33 #include <sys/systm.h>
  34 #include <sys/cmn_err.h>
  35 #include <sys/debug.h>
  36 #include <sys/sdt.h>
  37 #include <sys/mutex.h>
  38 #include <sys/bitmap.h>
  39 #include <sys/atomic.h>

  40 #include <sys/kobj.h>
  41 #include <sys/disp.h>
  42 #include <vm/seg_kmem.h>
  43 #include <sys/zone.h>
  44 #include <sys/netstack.h>
  45 
  46 /*
  47  * What we use so that the zones framework can tell us about new zones,
  48  * which we use to create new stacks.
  49  */
  50 static zone_key_t netstack_zone_key;
  51 
  52 static int      netstack_initialized = 0;
  53 
  54 /*
  55  * Track the registered netstacks.
  56  * The global lock protects
  57  * - ns_reg
  58  * - the list starting at netstack_head and following the netstack_next
  59  *   pointers.


 105 static void     netstack_zone_shutdown(zoneid_t zoneid, void *arg);
 106 static void     netstack_zone_destroy(zoneid_t zoneid, void *arg);
 107 
 108 static void     netstack_shared_zone_add(zoneid_t zoneid);
 109 static void     netstack_shared_zone_remove(zoneid_t zoneid);
 110 static void     netstack_shared_kstat_add(kstat_t *ks);
 111 static void     netstack_shared_kstat_remove(kstat_t *ks);
 112 
 113 typedef boolean_t applyfn_t(kmutex_t *, netstack_t *, int);
 114 
 115 static void     apply_all_netstacks(int, applyfn_t *);
 116 static void     apply_all_modules(netstack_t *, applyfn_t *);
 117 static void     apply_all_modules_reverse(netstack_t *, applyfn_t *);
 118 static boolean_t netstack_apply_create(kmutex_t *, netstack_t *, int);
 119 static boolean_t netstack_apply_shutdown(kmutex_t *, netstack_t *, int);
 120 static boolean_t netstack_apply_destroy(kmutex_t *, netstack_t *, int);
 121 static boolean_t wait_for_zone_creator(netstack_t *, kmutex_t *);
 122 static boolean_t wait_for_nms_inprogress(netstack_t *, nm_state_t *,
 123     kmutex_t *);
 124 







 125 void
 126 netstack_init(void)
 127 {
 128         mutex_init(&netstack_g_lock, NULL, MUTEX_DEFAULT, NULL);
 129         mutex_init(&netstack_shared_lock, NULL, MUTEX_DEFAULT, NULL);
 130 



 131         netstack_initialized = 1;
 132 
 133         /*
 134          * We want to be informed each time a zone is created or
 135          * destroyed in the kernel, so we can maintain the
 136          * stack instance information.
 137          */
 138         zone_key_create(&netstack_zone_key, netstack_zone_create,
 139             netstack_zone_shutdown, netstack_zone_destroy);
 140 }
 141 
 142 /*
 143  * Register a new module with the framework.
 144  * This registers interest in changes to the set of netstacks.
 145  * The createfn and destroyfn are required, but the shutdownfn can be
 146  * NULL.
 147  * Note that due to the current zsd implementation, when the create
 148  * function is called the zone isn't fully present, thus functions
 149  * like zone_find_by_* will fail, hence the create function can not
 150  * use many zones kernel functions including zcmn_err().


1044 boolean_t
1045 netstack_inuse_by_stackid(netstackid_t stackid)
1046 {
1047         netstack_t *ns;
1048         boolean_t rval = B_FALSE;
1049 
1050         mutex_enter(&netstack_g_lock);
1051 
1052         for (ns = netstack_head; ns != NULL; ns = ns->netstack_next) {
1053                 if (ns->netstack_stackid == stackid) {
1054                         rval = B_TRUE;
1055                         break;
1056                 }
1057         }
1058 
1059         mutex_exit(&netstack_g_lock);
1060 
1061         return (rval);
1062 }
1063 
1064 void
1065 netstack_rele(netstack_t *ns)

1066 {
1067         netstack_t **nsp;
1068         boolean_t found;
1069         int refcnt, numzones;
1070         int i;
1071 
1072         mutex_enter(&ns->netstack_lock);
1073         ASSERT(ns->netstack_refcnt > 0);
1074         ns->netstack_refcnt--;
1075         /*
1076          * As we drop the lock additional netstack_rele()s can come in
1077          * and decrement the refcnt to zero and free the netstack_t.
1078          * Store pointers in local variables and if we were not the last
1079          * then don't reference the netstack_t after that.
1080          */
1081         refcnt = ns->netstack_refcnt;
1082         numzones = ns->netstack_numzones;
1083         DTRACE_PROBE1(netstack__dec__ref, netstack_t *, ns);
1084         mutex_exit(&ns->netstack_lock);
1085 
1086         if (refcnt == 0 && numzones == 0) {
1087                 /*
1088                  * Time to call the destroy functions and free up
1089                  * the structure
1090                  */
1091                 netstack_stack_inactive(ns);
1092 
1093                 /* Make sure nothing increased the references */
1094                 ASSERT(ns->netstack_refcnt == 0);
1095                 ASSERT(ns->netstack_numzones == 0);
1096 
1097                 /* Finally remove from list of netstacks */
1098                 mutex_enter(&netstack_g_lock);
1099                 found = B_FALSE;
1100                 for (nsp = &netstack_head; *nsp != NULL;
1101                     nsp = &(*nsp)->netstack_next) {
1102                         if (*nsp == ns) {
1103                                 *nsp = ns->netstack_next;
1104                                 ns->netstack_next = NULL;
1105                                 found = B_TRUE;
1106                                 break;
1107                         }
1108                 }
1109                 ASSERT(found);
1110                 mutex_exit(&netstack_g_lock);
1111 
1112                 /* Make sure nothing increased the references */
1113                 ASSERT(ns->netstack_refcnt == 0);
1114                 ASSERT(ns->netstack_numzones == 0);
1115 
1116                 ASSERT(ns->netstack_flags & NSF_CLOSING);
1117 
1118                 for (i = 0; i < NS_MAX; i++) {
1119                         nm_state_t *nms = &ns->netstack_m_state[i];
1120 
1121                         cv_destroy(&nms->nms_cv);
1122                 }
1123                 mutex_destroy(&ns->netstack_lock);
1124                 cv_destroy(&ns->netstack_cv);
1125                 kmem_free(ns, sizeof (*ns));













































1126         }





1127 }
1128 
1129 void
1130 netstack_hold(netstack_t *ns)
1131 {
1132         mutex_enter(&ns->netstack_lock);
1133         ns->netstack_refcnt++;
1134         ASSERT(ns->netstack_refcnt > 0);
1135         mutex_exit(&ns->netstack_lock);
1136         DTRACE_PROBE1(netstack__inc__ref, netstack_t *, ns);
1137 }
1138 
1139 /*
1140  * To support kstat_create_netstack() using kstat_zone_add we need
1141  * to track both
1142  *  - all zoneids that use the global/shared stack
1143  *  - all kstats that have been added for the shared stack
1144  */
1145 kstat_t *
1146 kstat_create_netstack(char *ks_module, int ks_instance, char *ks_name,




   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  24  * Use is subject to license terms.
  25  * Copyright (c) 2017, Joyent, Inc.  All rights reserved.
  26  */
  27 
  28 #include <sys/param.h>
  29 #include <sys/sysmacros.h>
  30 #include <sys/vm.h>
  31 #include <sys/proc.h>
  32 #include <sys/tuneable.h>
  33 #include <sys/systm.h>
  34 #include <sys/cmn_err.h>
  35 #include <sys/debug.h>
  36 #include <sys/sdt.h>
  37 #include <sys/mutex.h>
  38 #include <sys/bitmap.h>
  39 #include <sys/atomic.h>
  40 #include <sys/sunddi.h>
  41 #include <sys/kobj.h>
  42 #include <sys/disp.h>
  43 #include <vm/seg_kmem.h>
  44 #include <sys/zone.h>
  45 #include <sys/netstack.h>
  46 
  47 /*
  48  * What we use so that the zones framework can tell us about new zones,
  49  * which we use to create new stacks.
  50  */
  51 static zone_key_t netstack_zone_key;
  52 
  53 static int      netstack_initialized = 0;
  54 
  55 /*
  56  * Track the registered netstacks.
  57  * The global lock protects
  58  * - ns_reg
  59  * - the list starting at netstack_head and following the netstack_next
  60  *   pointers.


 106 static void     netstack_zone_shutdown(zoneid_t zoneid, void *arg);
 107 static void     netstack_zone_destroy(zoneid_t zoneid, void *arg);
 108 
 109 static void     netstack_shared_zone_add(zoneid_t zoneid);
 110 static void     netstack_shared_zone_remove(zoneid_t zoneid);
 111 static void     netstack_shared_kstat_add(kstat_t *ks);
 112 static void     netstack_shared_kstat_remove(kstat_t *ks);
 113 
 114 typedef boolean_t applyfn_t(kmutex_t *, netstack_t *, int);
 115 
 116 static void     apply_all_netstacks(int, applyfn_t *);
 117 static void     apply_all_modules(netstack_t *, applyfn_t *);
 118 static void     apply_all_modules_reverse(netstack_t *, applyfn_t *);
 119 static boolean_t netstack_apply_create(kmutex_t *, netstack_t *, int);
 120 static boolean_t netstack_apply_shutdown(kmutex_t *, netstack_t *, int);
 121 static boolean_t netstack_apply_destroy(kmutex_t *, netstack_t *, int);
 122 static boolean_t wait_for_zone_creator(netstack_t *, kmutex_t *);
 123 static boolean_t wait_for_nms_inprogress(netstack_t *, nm_state_t *,
 124     kmutex_t *);
 125 
 126 static ksema_t netstack_reap_limiter;
 127 /*
 128  * Hard-coded constant, but since this is not tunable in real-time, it seems
 129  * making it an /etc/system tunable is better than nothing.
 130  */
 131 uint_t netstack_outstanding_reaps = 1024;
 132 
 133 void
 134 netstack_init(void)
 135 {
 136         mutex_init(&netstack_g_lock, NULL, MUTEX_DEFAULT, NULL);
 137         mutex_init(&netstack_shared_lock, NULL, MUTEX_DEFAULT, NULL);
 138 
 139         sema_init(&netstack_reap_limiter, netstack_outstanding_reaps, NULL,
 140             SEMA_DRIVER, NULL);
 141 
 142         netstack_initialized = 1;
 143 
 144         /*
 145          * We want to be informed each time a zone is created or
 146          * destroyed in the kernel, so we can maintain the
 147          * stack instance information.
 148          */
 149         zone_key_create(&netstack_zone_key, netstack_zone_create,
 150             netstack_zone_shutdown, netstack_zone_destroy);
 151 }
 152 
 153 /*
 154  * Register a new module with the framework.
 155  * This registers interest in changes to the set of netstacks.
 156  * The createfn and destroyfn are required, but the shutdownfn can be
 157  * NULL.
 158  * Note that due to the current zsd implementation, when the create
 159  * function is called the zone isn't fully present, thus functions
 160  * like zone_find_by_* will fail, hence the create function can not
 161  * use many zones kernel functions including zcmn_err().


1055 boolean_t
1056 netstack_inuse_by_stackid(netstackid_t stackid)
1057 {
1058         netstack_t *ns;
1059         boolean_t rval = B_FALSE;
1060 
1061         mutex_enter(&netstack_g_lock);
1062 
1063         for (ns = netstack_head; ns != NULL; ns = ns->netstack_next) {
1064                 if (ns->netstack_stackid == stackid) {
1065                         rval = B_TRUE;
1066                         break;
1067                 }
1068         }
1069 
1070         mutex_exit(&netstack_g_lock);
1071 
1072         return (rval);
1073 }
1074 
1075 
1076 static void
1077 netstack_reap(void *arg)
1078 {
1079         netstack_t **nsp, *ns = (netstack_t *)arg;
1080         boolean_t found;

1081         int i;
1082 



1083         /*












1084          * Time to call the destroy functions and free up
1085          * the structure
1086          */
1087         netstack_stack_inactive(ns);
1088 
1089         /* Make sure nothing increased the references */
1090         ASSERT(ns->netstack_refcnt == 0);
1091         ASSERT(ns->netstack_numzones == 0);
1092 
1093         /* Finally remove from list of netstacks */
1094         mutex_enter(&netstack_g_lock);
1095         found = B_FALSE;
1096         for (nsp = &netstack_head; *nsp != NULL;
1097             nsp = &(*nsp)->netstack_next) {
1098                 if (*nsp == ns) {
1099                         *nsp = ns->netstack_next;
1100                         ns->netstack_next = NULL;
1101                         found = B_TRUE;
1102                         break;
1103                 }
1104         }
1105         ASSERT(found);
1106         mutex_exit(&netstack_g_lock);
1107 
1108         /* Make sure nothing increased the references */
1109         ASSERT(ns->netstack_refcnt == 0);
1110         ASSERT(ns->netstack_numzones == 0);
1111 
1112         ASSERT(ns->netstack_flags & NSF_CLOSING);
1113 
1114         for (i = 0; i < NS_MAX; i++) {
1115                 nm_state_t *nms = &ns->netstack_m_state[i];
1116 
1117                 cv_destroy(&nms->nms_cv);
1118         }
1119         mutex_destroy(&ns->netstack_lock);
1120         cv_destroy(&ns->netstack_cv);
1121         kmem_free(ns, sizeof (*ns));
1122         /* Allow another reap to be scheduled. */
1123         sema_v(&netstack_reap_limiter);
1124 }
1125 
1126 void
1127 netstack_rele(netstack_t *ns)
1128 {
1129         int refcnt, numzones;
1130 
1131         mutex_enter(&ns->netstack_lock);
1132         ASSERT(ns->netstack_refcnt > 0);
1133         ns->netstack_refcnt--;
1134         /*
1135          * As we drop the lock additional netstack_rele()s can come in
1136          * and decrement the refcnt to zero and free the netstack_t.
1137          * Store pointers in local variables and if we were not the last
1138          * then don't reference the netstack_t after that.
1139          */
1140         refcnt = ns->netstack_refcnt;
1141         numzones = ns->netstack_numzones;
1142         DTRACE_PROBE1(netstack__dec__ref, netstack_t *, ns);
1143         mutex_exit(&ns->netstack_lock);
1144 
1145         if (refcnt == 0 && numzones == 0) {
1146                 /*
1147                  * Because there are possibilities of re-entrancy in various
1148                  * netstack structures by callers, which might cause a lock up
1149                  * due to odd reference models, or other factors, we choose to
1150                  * schedule the actual deletion of this netstack as a deferred
1151                  * task on the system taskq.  This way, any such reference
1152                  * models won't trip over themselves.
1153                  *
1154                  * Assume we aren't in a high-priority interrupt context, so
1155                  * we can use KM_SLEEP and semaphores.
1156                  */
1157                 if (sema_tryp(&netstack_reap_limiter) == 0) {
1158                         /*
1159                          * Indicate we're slamming against a limit.
1160                          */
1161                         hrtime_t measurement = gethrtime();
1162 
1163                         sema_p(&netstack_reap_limiter);
1164                         /* Capture delay in ns. */
1165                         DTRACE_PROBE1(netstack__reap__rate__limited,
1166                             hrtime_t, gethrtime() - measurement);
1167                 }
1168 
1169                 /* TQ_SLEEP should prevent taskq_dispatch() from failing. */
1170                 (void) taskq_dispatch(system_taskq, netstack_reap, ns,
1171                     TQ_SLEEP);
1172         }
1173 }
1174 
1175 void
1176 netstack_hold(netstack_t *ns)
1177 {
1178         mutex_enter(&ns->netstack_lock);
1179         ns->netstack_refcnt++;
1180         ASSERT(ns->netstack_refcnt > 0);
1181         mutex_exit(&ns->netstack_lock);
1182         DTRACE_PROBE1(netstack__inc__ref, netstack_t *, ns);
1183 }
1184 
1185 /*
1186  * To support kstat_create_netstack() using kstat_zone_add we need
1187  * to track both
1188  *  - all zoneids that use the global/shared stack
1189  *  - all kstats that have been added for the shared stack
1190  */
1191 kstat_t *
1192 kstat_create_netstack(char *ks_module, int ks_instance, char *ks_name,