Print this page
OS-XXXX netstack_find_by_stackid() drops-and-reacquires
OS-5423 deadlock between netstack teardown and kstat read

Split Close
Expand all
Collapse all
          --- old/usr/src/uts/common/os/netstack.c
          +++ new/usr/src/uts/common/os/netstack.c
↓ open down ↓ 29 lines elided ↑ open up ↑
  30   30  #include <sys/vm.h>
  31   31  #include <sys/proc.h>
  32   32  #include <sys/tuneable.h>
  33   33  #include <sys/systm.h>
  34   34  #include <sys/cmn_err.h>
  35   35  #include <sys/debug.h>
  36   36  #include <sys/sdt.h>
  37   37  #include <sys/mutex.h>
  38   38  #include <sys/bitmap.h>
  39   39  #include <sys/atomic.h>
       40 +#include <sys/sunddi.h>
  40   41  #include <sys/kobj.h>
  41   42  #include <sys/disp.h>
  42   43  #include <vm/seg_kmem.h>
  43   44  #include <sys/zone.h>
  44   45  #include <sys/netstack.h>
  45   46  
  46   47  /*
  47   48   * What we use so that the zones framework can tell us about new zones,
  48   49   * which we use to create new stacks.
  49   50   */
↓ open down ↓ 65 lines elided ↑ open up ↑
 115  116  static void     apply_all_netstacks(int, applyfn_t *);
 116  117  static void     apply_all_modules(netstack_t *, applyfn_t *);
 117  118  static void     apply_all_modules_reverse(netstack_t *, applyfn_t *);
 118  119  static boolean_t netstack_apply_create(kmutex_t *, netstack_t *, int);
 119  120  static boolean_t netstack_apply_shutdown(kmutex_t *, netstack_t *, int);
 120  121  static boolean_t netstack_apply_destroy(kmutex_t *, netstack_t *, int);
 121  122  static boolean_t wait_for_zone_creator(netstack_t *, kmutex_t *);
 122  123  static boolean_t wait_for_nms_inprogress(netstack_t *, nm_state_t *,
 123  124      kmutex_t *);
 124  125  
      126 +static void netstack_hold_locked(netstack_t *);
      127 +static void netstack_reap_work(netstack_t *, boolean_t);
      128 +ksema_t netstack_reap_limiter;
      129 +
 125  130  void
 126  131  netstack_init(void)
 127  132  {
 128  133          mutex_init(&netstack_g_lock, NULL, MUTEX_DEFAULT, NULL);
 129  134          mutex_init(&netstack_shared_lock, NULL, MUTEX_DEFAULT, NULL);
 130  135  
      136 +        /* XXX KEBE SAYS hard-coded constant needs to be fixed. */
      137 +        sema_init(&netstack_reap_limiter, 1024, NULL, SEMA_DRIVER, NULL);
      138 +
 131  139          netstack_initialized = 1;
 132  140  
 133  141          /*
 134  142           * We want to be informed each time a zone is created or
 135  143           * destroyed in the kernel, so we can maintain the
 136  144           * stack instance information.
 137  145           */
 138  146          zone_key_create(&netstack_zone_key, netstack_zone_create,
 139  147              netstack_zone_shutdown, netstack_zone_destroy);
 140  148  }
↓ open down ↓ 882 lines elided ↑ open up ↑
1023 1031  netstack_t *
1024 1032  netstack_find_by_stackid(netstackid_t stackid)
1025 1033  {
1026 1034          netstack_t *ns;
1027 1035  
1028 1036          mutex_enter(&netstack_g_lock);
1029 1037          for (ns = netstack_head; ns != NULL; ns = ns->netstack_next) {
1030 1038                  mutex_enter(&ns->netstack_lock);
1031 1039                  if (ns->netstack_stackid == stackid &&
1032 1040                      !(ns->netstack_flags & (NSF_UNINIT|NSF_CLOSING))) {
     1041 +                        netstack_hold_locked(ns);
1033 1042                          mutex_exit(&ns->netstack_lock);
1034      -                        netstack_hold(ns);
1035 1043                          mutex_exit(&netstack_g_lock);
1036 1044                          return (ns);
1037 1045                  }
1038 1046                  mutex_exit(&ns->netstack_lock);
1039 1047          }
1040 1048          mutex_exit(&netstack_g_lock);
1041 1049          return (NULL);
1042 1050  }
1043 1051  
1044 1052  boolean_t
↓ open down ↓ 9 lines elided ↑ open up ↑
1054 1062                          rval = B_TRUE;
1055 1063                          break;
1056 1064                  }
1057 1065          }
1058 1066  
1059 1067          mutex_exit(&netstack_g_lock);
1060 1068  
1061 1069          return (rval);
1062 1070  }
1063 1071  
1064      -void
1065      -netstack_rele(netstack_t *ns)
     1072 +
     1073 +static void
     1074 +netstack_reap(void *arg)
1066 1075  {
     1076 +        /* Indicate we took a semaphore to get here. */
     1077 +        netstack_reap_work((netstack_t *)arg, B_TRUE);
     1078 +}
     1079 +
     1080 +static void
     1081 +netstack_reap_intr(void *arg)
     1082 +{
     1083 +        /* Indicate we did NOT TAKE a semaphore to get here. */
     1084 +        netstack_reap_work((netstack_t *)arg, B_FALSE);
     1085 +}
     1086 +
     1087 +static void
     1088 +netstack_reap_work(netstack_t *ns, boolean_t semaphore_signal)
     1089 +{
1067 1090          netstack_t **nsp;
1068 1091          boolean_t found;
1069      -        int refcnt, numzones;
1070 1092          int i;
1071 1093  
     1094 +        /*
     1095 +         * Time to call the destroy functions and free up
     1096 +         * the structure
     1097 +         */
     1098 +        netstack_stack_inactive(ns);
     1099 +
     1100 +        /* Make sure nothing increased the references */
     1101 +        ASSERT(ns->netstack_refcnt == 0);
     1102 +        ASSERT(ns->netstack_numzones == 0);
     1103 +
     1104 +        /* Finally remove from list of netstacks */
     1105 +        mutex_enter(&netstack_g_lock);
     1106 +        found = B_FALSE;
     1107 +        for (nsp = &netstack_head; *nsp != NULL;
     1108 +             nsp = &(*nsp)->netstack_next) {
     1109 +                if (*nsp == ns) {
     1110 +                        *nsp = ns->netstack_next;
     1111 +                        ns->netstack_next = NULL;
     1112 +                        found = B_TRUE;
     1113 +                        break;
     1114 +                }
     1115 +        }
     1116 +        ASSERT(found);
     1117 +        mutex_exit(&netstack_g_lock);
     1118 +
     1119 +        /* Make sure nothing increased the references */
     1120 +        ASSERT(ns->netstack_refcnt == 0);
     1121 +        ASSERT(ns->netstack_numzones == 0);
     1122 +
     1123 +        ASSERT(ns->netstack_flags & NSF_CLOSING);
     1124 +
     1125 +        for (i = 0; i < NS_MAX; i++) {
     1126 +                nm_state_t *nms = &ns->netstack_m_state[i];
     1127 +
     1128 +                cv_destroy(&nms->nms_cv);
     1129 +        }
     1130 +        mutex_destroy(&ns->netstack_lock);
     1131 +        cv_destroy(&ns->netstack_cv);
     1132 +        kmem_free(ns, sizeof (*ns));
     1133 +        /* Allow another reap to be scheduled. */
     1134 +        if (semaphore_signal)
     1135 +                sema_v(&netstack_reap_limiter);
     1136 +}
     1137 +
     1138 +void
     1139 +netstack_rele(netstack_t *ns)
     1140 +{
     1141 +        int refcnt, numzones;
     1142 +
1072 1143          mutex_enter(&ns->netstack_lock);
1073 1144          ASSERT(ns->netstack_refcnt > 0);
1074 1145          ns->netstack_refcnt--;
1075 1146          /*
1076 1147           * As we drop the lock additional netstack_rele()s can come in
1077 1148           * and decrement the refcnt to zero and free the netstack_t.
1078 1149           * Store pointers in local variables and if we were not the last
1079 1150           * then don't reference the netstack_t after that.
1080 1151           */
1081 1152          refcnt = ns->netstack_refcnt;
1082 1153          numzones = ns->netstack_numzones;
1083 1154          DTRACE_PROBE1(netstack__dec__ref, netstack_t *, ns);
1084 1155          mutex_exit(&ns->netstack_lock);
1085 1156  
1086 1157          if (refcnt == 0 && numzones == 0) {
     1158 +                boolean_t is_not_intr = !servicing_interrupt();
     1159 +
1087 1160                  /*
1088      -                 * Time to call the destroy functions and free up
1089      -                 * the structure
     1161 +                 * Because there are possibilities of kstats being held by
     1162 +                 * callers, which would then be immediately freed, but held up
     1163 +                 * due to kstat's odd reference model recording the thread, we
     1164 +                 * choose to schedule the actual deletion of this netstack as
     1165 +                 * a deferred task on the system taskq.  This way, any
     1166 +                 * store-the-thread-pointer semantics won't trip over
     1167 +                 * themselves.
     1168 +                 *
     1169 +                 * On the off chance this is called in interrupt context, we
     1170 +                 * cannot use the semaphore to enforce rate-limiting.
1090 1171                   */
1091      -                netstack_stack_inactive(ns);
     1172 +                if (is_not_intr && sema_tryp(&netstack_reap_limiter) == 0) {
     1173 +                        /*
     1174 +                         * XXX KEBE SAYS inidicate we're slamming against
     1175 +                         * a limit.
     1176 +                         */
     1177 +                        hrtime_t measurement = gethrtime();
1092 1178  
1093      -                /* Make sure nothing increased the references */
1094      -                ASSERT(ns->netstack_refcnt == 0);
1095      -                ASSERT(ns->netstack_numzones == 0);
1096      -
1097      -                /* Finally remove from list of netstacks */
1098      -                mutex_enter(&netstack_g_lock);
1099      -                found = B_FALSE;
1100      -                for (nsp = &netstack_head; *nsp != NULL;
1101      -                    nsp = &(*nsp)->netstack_next) {
1102      -                        if (*nsp == ns) {
1103      -                                *nsp = ns->netstack_next;
1104      -                                ns->netstack_next = NULL;
1105      -                                found = B_TRUE;
1106      -                                break;
1107      -                        }
     1179 +                        sema_p(&netstack_reap_limiter);
     1180 +                        /* Caputre delay in ns. */
     1181 +                        DTRACE_PROBE1(netstack__reap__rate__limited,
     1182 +                            hrtime_t *, gethrtime() - measurement);
1108 1183                  }
1109      -                ASSERT(found);
1110      -                mutex_exit(&netstack_g_lock);
1111 1184  
1112      -                /* Make sure nothing increased the references */
1113      -                ASSERT(ns->netstack_refcnt == 0);
1114      -                ASSERT(ns->netstack_numzones == 0);
1115      -
1116      -                ASSERT(ns->netstack_flags & NSF_CLOSING);
1117      -
1118      -                for (i = 0; i < NS_MAX; i++) {
1119      -                        nm_state_t *nms = &ns->netstack_m_state[i];
1120      -
1121      -                        cv_destroy(&nms->nms_cv);
     1185 +                if (taskq_dispatch(system_taskq,
     1186 +                    is_not_intr ? netstack_reap : netstack_reap_intr, ns,
     1187 +                    TQ_NOSLEEP) == NULL) {
     1188 +                        /*
     1189 +                         * Well shoot, why can't we taskq_dispatch?
     1190 +                         * Take our chances with a direct call.
     1191 +                         */
     1192 +                        DTRACE_PROBE1(netstack__reap__taskq__fail,
     1193 +                            netstack_t *, ns);
     1194 +                        netstack_reap_work(ns, is_not_intr);
1122 1195                  }
1123      -                mutex_destroy(&ns->netstack_lock);
1124      -                cv_destroy(&ns->netstack_cv);
1125      -                kmem_free(ns, sizeof (*ns));
1126 1196          }
1127 1197  }
1128 1198  
     1199 +static void
     1200 +netstack_hold_locked(netstack_t *ns)
     1201 +{
     1202 +        ASSERT(MUTEX_HELD(&ns->netstack_lock));
     1203 +        ns->netstack_refcnt++;
     1204 +        ASSERT(ns->netstack_refcnt > 0);
     1205 +        DTRACE_PROBE1(netstack__inc__ref, netstack_t *, ns);
     1206 +}
     1207 +
1129 1208  void
1130 1209  netstack_hold(netstack_t *ns)
1131 1210  {
1132 1211          mutex_enter(&ns->netstack_lock);
1133      -        ns->netstack_refcnt++;
1134      -        ASSERT(ns->netstack_refcnt > 0);
     1212 +        netstack_hold_locked(ns);
1135 1213          mutex_exit(&ns->netstack_lock);
1136      -        DTRACE_PROBE1(netstack__inc__ref, netstack_t *, ns);
1137 1214  }
1138 1215  
1139 1216  /*
1140 1217   * To support kstat_create_netstack() using kstat_zone_add we need
1141 1218   * to track both
1142 1219   *  - all zoneids that use the global/shared stack
1143 1220   *  - all kstats that have been added for the shared stack
1144 1221   */
1145 1222  kstat_t *
1146 1223  kstat_create_netstack(char *ks_module, int ks_instance, char *ks_name,
↓ open down ↓ 248 lines elided ↑ open up ↑
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX