Print this page
8900 deadlock between netstack teardown and kstat read
Reviewed by: Jason King <jason.king@joyent.com>
Reviewed by: Ryan Zezeski <rpz@joyent.com>
Reviewed by: Robert Mustacchi <rm@joyent.com>

Split Close
Expand all
Collapse all
          --- old/usr/src/uts/common/os/netstack.c
          +++ new/usr/src/uts/common/os/netstack.c
↓ open down ↓ 14 lines elided ↑ open up ↑
  15   15   * If applicable, add the following below this CDDL HEADER, with the
  16   16   * fields enclosed by brackets "[]" replaced with your own identifying
  17   17   * information: Portions Copyright [yyyy] [name of copyright owner]
  18   18   *
  19   19   * CDDL HEADER END
  20   20   */
  21   21  
  22   22  /*
  23   23   * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  24   24   * Use is subject to license terms.
  25      - * Copyright (c) 2016, Joyent, Inc.  All rights reserved.
       25 + * Copyright (c) 2017, Joyent, Inc.  All rights reserved.
  26   26   */
  27   27  
  28   28  #include <sys/param.h>
  29   29  #include <sys/sysmacros.h>
  30   30  #include <sys/vm.h>
  31   31  #include <sys/proc.h>
  32   32  #include <sys/tuneable.h>
  33   33  #include <sys/systm.h>
  34   34  #include <sys/cmn_err.h>
  35   35  #include <sys/debug.h>
  36   36  #include <sys/sdt.h>
  37   37  #include <sys/mutex.h>
  38   38  #include <sys/bitmap.h>
  39   39  #include <sys/atomic.h>
       40 +#include <sys/sunddi.h>
  40   41  #include <sys/kobj.h>
  41   42  #include <sys/disp.h>
  42   43  #include <vm/seg_kmem.h>
  43   44  #include <sys/zone.h>
  44   45  #include <sys/netstack.h>
  45   46  
  46   47  /*
  47   48   * What we use so that the zones framework can tell us about new zones,
  48   49   * which we use to create new stacks.
  49   50   */
↓ open down ↓ 65 lines elided ↑ open up ↑
 115  116  static void     apply_all_netstacks(int, applyfn_t *);
 116  117  static void     apply_all_modules(netstack_t *, applyfn_t *);
 117  118  static void     apply_all_modules_reverse(netstack_t *, applyfn_t *);
 118  119  static boolean_t netstack_apply_create(kmutex_t *, netstack_t *, int);
 119  120  static boolean_t netstack_apply_shutdown(kmutex_t *, netstack_t *, int);
 120  121  static boolean_t netstack_apply_destroy(kmutex_t *, netstack_t *, int);
 121  122  static boolean_t wait_for_zone_creator(netstack_t *, kmutex_t *);
 122  123  static boolean_t wait_for_nms_inprogress(netstack_t *, nm_state_t *,
 123  124      kmutex_t *);
 124  125  
      126 +static ksema_t netstack_reap_limiter;
      127 +/*
      128 + * Hard-coded constant, but since this is not tunable in real-time, it seems
      129 + * making it an /etc/system tunable is better than nothing.
      130 + */
      131 +uint_t netstack_outstanding_reaps = 1024;
      132 +
 125  133  void
 126  134  netstack_init(void)
 127  135  {
 128  136          mutex_init(&netstack_g_lock, NULL, MUTEX_DEFAULT, NULL);
 129  137          mutex_init(&netstack_shared_lock, NULL, MUTEX_DEFAULT, NULL);
 130  138  
      139 +        sema_init(&netstack_reap_limiter, netstack_outstanding_reaps, NULL,
      140 +            SEMA_DRIVER, NULL);
      141 +
 131  142          netstack_initialized = 1;
 132  143  
 133  144          /*
 134  145           * We want to be informed each time a zone is created or
 135  146           * destroyed in the kernel, so we can maintain the
 136  147           * stack instance information.
 137  148           */
 138  149          zone_key_create(&netstack_zone_key, netstack_zone_create,
 139  150              netstack_zone_shutdown, netstack_zone_destroy);
 140  151  }
↓ open down ↓ 913 lines elided ↑ open up ↑
1054 1065                          rval = B_TRUE;
1055 1066                          break;
1056 1067                  }
1057 1068          }
1058 1069  
1059 1070          mutex_exit(&netstack_g_lock);
1060 1071  
1061 1072          return (rval);
1062 1073  }
1063 1074  
     1075 +
     1076 +static void
     1077 +netstack_reap(void *arg)
     1078 +{
     1079 +        netstack_t **nsp, *ns = (netstack_t *)arg;
     1080 +        boolean_t found;
     1081 +        int i;
     1082 +
     1083 +        /*
     1084 +         * Time to call the destroy functions and free up
     1085 +         * the structure
     1086 +         */
     1087 +        netstack_stack_inactive(ns);
     1088 +
     1089 +        /* Make sure nothing increased the references */
     1090 +        ASSERT(ns->netstack_refcnt == 0);
     1091 +        ASSERT(ns->netstack_numzones == 0);
     1092 +
     1093 +        /* Finally remove from list of netstacks */
     1094 +        mutex_enter(&netstack_g_lock);
     1095 +        found = B_FALSE;
     1096 +        for (nsp = &netstack_head; *nsp != NULL;
     1097 +            nsp = &(*nsp)->netstack_next) {
     1098 +                if (*nsp == ns) {
     1099 +                        *nsp = ns->netstack_next;
     1100 +                        ns->netstack_next = NULL;
     1101 +                        found = B_TRUE;
     1102 +                        break;
     1103 +                }
     1104 +        }
     1105 +        ASSERT(found);
     1106 +        mutex_exit(&netstack_g_lock);
     1107 +
     1108 +        /* Make sure nothing increased the references */
     1109 +        ASSERT(ns->netstack_refcnt == 0);
     1110 +        ASSERT(ns->netstack_numzones == 0);
     1111 +
     1112 +        ASSERT(ns->netstack_flags & NSF_CLOSING);
     1113 +
     1114 +        for (i = 0; i < NS_MAX; i++) {
     1115 +                nm_state_t *nms = &ns->netstack_m_state[i];
     1116 +
     1117 +                cv_destroy(&nms->nms_cv);
     1118 +        }
     1119 +        mutex_destroy(&ns->netstack_lock);
     1120 +        cv_destroy(&ns->netstack_cv);
     1121 +        kmem_free(ns, sizeof (*ns));
     1122 +        /* Allow another reap to be scheduled. */
     1123 +        sema_v(&netstack_reap_limiter);
     1124 +}
     1125 +
1064 1126  void
1065 1127  netstack_rele(netstack_t *ns)
1066 1128  {
1067      -        netstack_t **nsp;
1068      -        boolean_t found;
1069 1129          int refcnt, numzones;
1070      -        int i;
1071 1130  
1072 1131          mutex_enter(&ns->netstack_lock);
1073 1132          ASSERT(ns->netstack_refcnt > 0);
1074 1133          ns->netstack_refcnt--;
1075 1134          /*
1076 1135           * As we drop the lock additional netstack_rele()s can come in
1077 1136           * and decrement the refcnt to zero and free the netstack_t.
1078 1137           * Store pointers in local variables and if we were not the last
1079 1138           * then don't reference the netstack_t after that.
1080 1139           */
1081 1140          refcnt = ns->netstack_refcnt;
1082 1141          numzones = ns->netstack_numzones;
1083 1142          DTRACE_PROBE1(netstack__dec__ref, netstack_t *, ns);
1084 1143          mutex_exit(&ns->netstack_lock);
1085 1144  
1086 1145          if (refcnt == 0 && numzones == 0) {
1087 1146                  /*
1088      -                 * Time to call the destroy functions and free up
1089      -                 * the structure
     1147 +                 * Because there are possibilities of re-entrancy in various
     1148 +                 * netstack structures by callers, which might cause a lock up
     1149 +                 * due to odd reference models, or other factors, we choose to
     1150 +                 * schedule the actual deletion of this netstack as a deferred
     1151 +                 * task on the system taskq.  This way, any such reference
     1152 +                 * models won't trip over themselves.
     1153 +                 *
     1154 +                 * Assume we aren't in a high-priority interrupt context, so
     1155 +                 * we can use KM_SLEEP and semaphores.
1090 1156                   */
1091      -                netstack_stack_inactive(ns);
     1157 +                if (sema_tryp(&netstack_reap_limiter) == 0) {
     1158 +                        /*
     1159 +                         * Indicate we're slamming against a limit.
     1160 +                         */
     1161 +                        hrtime_t measurement = gethrtime();
1092 1162  
1093      -                /* Make sure nothing increased the references */
1094      -                ASSERT(ns->netstack_refcnt == 0);
1095      -                ASSERT(ns->netstack_numzones == 0);
1096      -
1097      -                /* Finally remove from list of netstacks */
1098      -                mutex_enter(&netstack_g_lock);
1099      -                found = B_FALSE;
1100      -                for (nsp = &netstack_head; *nsp != NULL;
1101      -                    nsp = &(*nsp)->netstack_next) {
1102      -                        if (*nsp == ns) {
1103      -                                *nsp = ns->netstack_next;
1104      -                                ns->netstack_next = NULL;
1105      -                                found = B_TRUE;
1106      -                                break;
1107      -                        }
     1163 +                        sema_p(&netstack_reap_limiter);
     1164 +                        /* Capture delay in ns. */
     1165 +                        DTRACE_PROBE1(netstack__reap__rate__limited,
     1166 +                            hrtime_t, gethrtime() - measurement);
1108 1167                  }
1109      -                ASSERT(found);
1110      -                mutex_exit(&netstack_g_lock);
1111 1168  
1112      -                /* Make sure nothing increased the references */
1113      -                ASSERT(ns->netstack_refcnt == 0);
1114      -                ASSERT(ns->netstack_numzones == 0);
1115      -
1116      -                ASSERT(ns->netstack_flags & NSF_CLOSING);
1117      -
1118      -                for (i = 0; i < NS_MAX; i++) {
1119      -                        nm_state_t *nms = &ns->netstack_m_state[i];
1120      -
1121      -                        cv_destroy(&nms->nms_cv);
1122      -                }
1123      -                mutex_destroy(&ns->netstack_lock);
1124      -                cv_destroy(&ns->netstack_cv);
1125      -                kmem_free(ns, sizeof (*ns));
     1169 +                /* TQ_SLEEP should prevent taskq_dispatch() from failing. */
     1170 +                (void) taskq_dispatch(system_taskq, netstack_reap, ns,
     1171 +                    TQ_SLEEP);
1126 1172          }
1127 1173  }
1128 1174  
1129 1175  void
1130 1176  netstack_hold(netstack_t *ns)
1131 1177  {
1132 1178          mutex_enter(&ns->netstack_lock);
1133 1179          ns->netstack_refcnt++;
1134 1180          ASSERT(ns->netstack_refcnt > 0);
1135 1181          mutex_exit(&ns->netstack_lock);
↓ open down ↓ 259 lines elided ↑ open up ↑
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX