20  */
  21 
  22 /*
  23  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  24  * Use is subject to license terms.
  25  * Copyright (c) 2016, Joyent, Inc.  All rights reserved.
  26  */
  27 
  28 #include <sys/param.h>
  29 #include <sys/sysmacros.h>
  30 #include <sys/vm.h>
  31 #include <sys/proc.h>
  32 #include <sys/tuneable.h>
  33 #include <sys/systm.h>
  34 #include <sys/cmn_err.h>
  35 #include <sys/debug.h>
  36 #include <sys/sdt.h>
  37 #include <sys/mutex.h>
  38 #include <sys/bitmap.h>
  39 #include <sys/atomic.h>
  40 #include <sys/kobj.h>
  41 #include <sys/disp.h>
  42 #include <vm/seg_kmem.h>
  43 #include <sys/zone.h>
  44 #include <sys/netstack.h>
  45 
  46 /*
  47  * What we use so that the zones framework can tell us about new zones,
  48  * which we use to create new stacks.
  49  */
  50 static zone_key_t netstack_zone_key;
  51 
  52 static int      netstack_initialized = 0;
  53 
  54 /*
  55  * Track the registered netstacks.
  56  * The global lock protects
  57  * - ns_reg
  58  * - the list starting at netstack_head and following the netstack_next
  59  *   pointers.
 
 
 105 static void     netstack_zone_shutdown(zoneid_t zoneid, void *arg);
 106 static void     netstack_zone_destroy(zoneid_t zoneid, void *arg);
 107 
 108 static void     netstack_shared_zone_add(zoneid_t zoneid);
 109 static void     netstack_shared_zone_remove(zoneid_t zoneid);
 110 static void     netstack_shared_kstat_add(kstat_t *ks);
 111 static void     netstack_shared_kstat_remove(kstat_t *ks);
 112 
 113 typedef boolean_t applyfn_t(kmutex_t *, netstack_t *, int);
 114 
 115 static void     apply_all_netstacks(int, applyfn_t *);
 116 static void     apply_all_modules(netstack_t *, applyfn_t *);
 117 static void     apply_all_modules_reverse(netstack_t *, applyfn_t *);
 118 static boolean_t netstack_apply_create(kmutex_t *, netstack_t *, int);
 119 static boolean_t netstack_apply_shutdown(kmutex_t *, netstack_t *, int);
 120 static boolean_t netstack_apply_destroy(kmutex_t *, netstack_t *, int);
 121 static boolean_t wait_for_zone_creator(netstack_t *, kmutex_t *);
 122 static boolean_t wait_for_nms_inprogress(netstack_t *, nm_state_t *,
 123     kmutex_t *);
 124 
 125 void
 126 netstack_init(void)
 127 {
 128         mutex_init(&netstack_g_lock, NULL, MUTEX_DEFAULT, NULL);
 129         mutex_init(&netstack_shared_lock, NULL, MUTEX_DEFAULT, NULL);
 130 
 131         netstack_initialized = 1;
 132 
 133         /*
 134          * We want to be informed each time a zone is created or
 135          * destroyed in the kernel, so we can maintain the
 136          * stack instance information.
 137          */
 138         zone_key_create(&netstack_zone_key, netstack_zone_create,
 139             netstack_zone_shutdown, netstack_zone_destroy);
 140 }
 141 
 142 /*
 143  * Register a new module with the framework.
 144  * This registers interest in changes to the set of netstacks.
 145  * The createfn and destroyfn are required, but the shutdownfn can be
 146  * NULL.
 147  * Note that due to the current zsd implementation, when the create
 148  * function is called the zone isn't fully present, thus functions
 149  * like zone_find_by_* will fail, hence the create function can not
 150  * use many zones kernel functions including zcmn_err().
 
1013         return (ns);
1014 }
1015 
1016 /*
1017  * Find a stack instance given the stackid with exact match?
1018  * Increases the reference count if found; caller must do a
1019  * netstack_rele().
1020  *
1021  * Skip the unitialized ones.
1022  */
1023 netstack_t *
1024 netstack_find_by_stackid(netstackid_t stackid)
1025 {
1026         netstack_t *ns;
1027 
1028         mutex_enter(&netstack_g_lock);
1029         for (ns = netstack_head; ns != NULL; ns = ns->netstack_next) {
1030                 mutex_enter(&ns->netstack_lock);
1031                 if (ns->netstack_stackid == stackid &&
1032                     !(ns->netstack_flags & (NSF_UNINIT|NSF_CLOSING))) {
1033                         mutex_exit(&ns->netstack_lock);
1034                         netstack_hold(ns);
1035                         mutex_exit(&netstack_g_lock);
1036                         return (ns);
1037                 }
1038                 mutex_exit(&ns->netstack_lock);
1039         }
1040         mutex_exit(&netstack_g_lock);
1041         return (NULL);
1042 }
1043 
1044 boolean_t
1045 netstack_inuse_by_stackid(netstackid_t stackid)
1046 {
1047         netstack_t *ns;
1048         boolean_t rval = B_FALSE;
1049 
1050         mutex_enter(&netstack_g_lock);
1051 
1052         for (ns = netstack_head; ns != NULL; ns = ns->netstack_next) {
1053                 if (ns->netstack_stackid == stackid) {
1054                         rval = B_TRUE;
1055                         break;
1056                 }
1057         }
1058 
1059         mutex_exit(&netstack_g_lock);
1060 
1061         return (rval);
1062 }
1063 
1064 void
1065 netstack_rele(netstack_t *ns)
1066 {
1067         netstack_t **nsp;
1068         boolean_t found;
1069         int refcnt, numzones;
1070         int i;
1071 
1072         mutex_enter(&ns->netstack_lock);
1073         ASSERT(ns->netstack_refcnt > 0);
1074         ns->netstack_refcnt--;
1075         /*
1076          * As we drop the lock additional netstack_rele()s can come in
1077          * and decrement the refcnt to zero and free the netstack_t.
1078          * Store pointers in local variables and if we were not the last
1079          * then don't reference the netstack_t after that.
1080          */
1081         refcnt = ns->netstack_refcnt;
1082         numzones = ns->netstack_numzones;
1083         DTRACE_PROBE1(netstack__dec__ref, netstack_t *, ns);
1084         mutex_exit(&ns->netstack_lock);
1085 
1086         if (refcnt == 0 && numzones == 0) {
1087                 /*
1088                  * Time to call the destroy functions and free up
1089                  * the structure
1090                  */
1091                 netstack_stack_inactive(ns);
1092 
1093                 /* Make sure nothing increased the references */
1094                 ASSERT(ns->netstack_refcnt == 0);
1095                 ASSERT(ns->netstack_numzones == 0);
1096 
1097                 /* Finally remove from list of netstacks */
1098                 mutex_enter(&netstack_g_lock);
1099                 found = B_FALSE;
1100                 for (nsp = &netstack_head; *nsp != NULL;
1101                     nsp = &(*nsp)->netstack_next) {
1102                         if (*nsp == ns) {
1103                                 *nsp = ns->netstack_next;
1104                                 ns->netstack_next = NULL;
1105                                 found = B_TRUE;
1106                                 break;
1107                         }
1108                 }
1109                 ASSERT(found);
1110                 mutex_exit(&netstack_g_lock);
1111 
1112                 /* Make sure nothing increased the references */
1113                 ASSERT(ns->netstack_refcnt == 0);
1114                 ASSERT(ns->netstack_numzones == 0);
1115 
1116                 ASSERT(ns->netstack_flags & NSF_CLOSING);
1117 
1118                 for (i = 0; i < NS_MAX; i++) {
1119                         nm_state_t *nms = &ns->netstack_m_state[i];
1120 
1121                         cv_destroy(&nms->nms_cv);
1122                 }
1123                 mutex_destroy(&ns->netstack_lock);
1124                 cv_destroy(&ns->netstack_cv);
1125                 kmem_free(ns, sizeof (*ns));
1126         }
1127 }
1128 
1129 void
1130 netstack_hold(netstack_t *ns)
1131 {
1132         mutex_enter(&ns->netstack_lock);
1133         ns->netstack_refcnt++;
1134         ASSERT(ns->netstack_refcnt > 0);
1135         mutex_exit(&ns->netstack_lock);
1136         DTRACE_PROBE1(netstack__inc__ref, netstack_t *, ns);
1137 }
1138 
1139 /*
1140  * To support kstat_create_netstack() using kstat_zone_add we need
1141  * to track both
1142  *  - all zoneids that use the global/shared stack
1143  *  - all kstats that have been added for the shared stack
1144  */
1145 kstat_t *
1146 kstat_create_netstack(char *ks_module, int ks_instance, char *ks_name,
1147     char *ks_class, uchar_t ks_type, uint_t ks_ndata, uchar_t ks_flags,
1148     netstackid_t ks_netstackid)
1149 {
1150         kstat_t *ks;
1151 
1152         if (ks_netstackid == GLOBAL_NETSTACKID) {
1153                 ks = kstat_create_zone(ks_module, ks_instance, ks_name,
1154                     ks_class, ks_type, ks_ndata, ks_flags, GLOBAL_ZONEID);
1155                 if (ks != NULL)
1156                         netstack_shared_kstat_add(ks);
1157                 return (ks);
1158         } else {
 
 | 
 
 
  20  */
  21 
  22 /*
  23  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  24  * Use is subject to license terms.
  25  * Copyright (c) 2016, Joyent, Inc.  All rights reserved.
  26  */
  27 
  28 #include <sys/param.h>
  29 #include <sys/sysmacros.h>
  30 #include <sys/vm.h>
  31 #include <sys/proc.h>
  32 #include <sys/tuneable.h>
  33 #include <sys/systm.h>
  34 #include <sys/cmn_err.h>
  35 #include <sys/debug.h>
  36 #include <sys/sdt.h>
  37 #include <sys/mutex.h>
  38 #include <sys/bitmap.h>
  39 #include <sys/atomic.h>
  40 #include <sys/sunddi.h>
  41 #include <sys/kobj.h>
  42 #include <sys/disp.h>
  43 #include <vm/seg_kmem.h>
  44 #include <sys/zone.h>
  45 #include <sys/netstack.h>
  46 
  47 /*
  48  * What we use so that the zones framework can tell us about new zones,
  49  * which we use to create new stacks.
  50  */
  51 static zone_key_t netstack_zone_key;
  52 
  53 static int      netstack_initialized = 0;
  54 
  55 /*
  56  * Track the registered netstacks.
  57  * The global lock protects
  58  * - ns_reg
  59  * - the list starting at netstack_head and following the netstack_next
  60  *   pointers.
 
 
 106 static void     netstack_zone_shutdown(zoneid_t zoneid, void *arg);
 107 static void     netstack_zone_destroy(zoneid_t zoneid, void *arg);
 108 
 109 static void     netstack_shared_zone_add(zoneid_t zoneid);
 110 static void     netstack_shared_zone_remove(zoneid_t zoneid);
 111 static void     netstack_shared_kstat_add(kstat_t *ks);
 112 static void     netstack_shared_kstat_remove(kstat_t *ks);
 113 
 114 typedef boolean_t applyfn_t(kmutex_t *, netstack_t *, int);
 115 
 116 static void     apply_all_netstacks(int, applyfn_t *);
 117 static void     apply_all_modules(netstack_t *, applyfn_t *);
 118 static void     apply_all_modules_reverse(netstack_t *, applyfn_t *);
 119 static boolean_t netstack_apply_create(kmutex_t *, netstack_t *, int);
 120 static boolean_t netstack_apply_shutdown(kmutex_t *, netstack_t *, int);
 121 static boolean_t netstack_apply_destroy(kmutex_t *, netstack_t *, int);
 122 static boolean_t wait_for_zone_creator(netstack_t *, kmutex_t *);
 123 static boolean_t wait_for_nms_inprogress(netstack_t *, nm_state_t *,
 124     kmutex_t *);
 125 
 126 static void netstack_hold_locked(netstack_t *);
 127 static void netstack_reap_work(netstack_t *, boolean_t);
 128 ksema_t netstack_reap_limiter;
 129 
 130 void
 131 netstack_init(void)
 132 {
 133         mutex_init(&netstack_g_lock, NULL, MUTEX_DEFAULT, NULL);
 134         mutex_init(&netstack_shared_lock, NULL, MUTEX_DEFAULT, NULL);
 135 
 136         /* XXX KEBE SAYS hard-coded constant needs to be fixed. */
 137         sema_init(&netstack_reap_limiter, 1024, NULL, SEMA_DRIVER, NULL);
 138 
 139         netstack_initialized = 1;
 140 
 141         /*
 142          * We want to be informed each time a zone is created or
 143          * destroyed in the kernel, so we can maintain the
 144          * stack instance information.
 145          */
 146         zone_key_create(&netstack_zone_key, netstack_zone_create,
 147             netstack_zone_shutdown, netstack_zone_destroy);
 148 }
 149 
 150 /*
 151  * Register a new module with the framework.
 152  * This registers interest in changes to the set of netstacks.
 153  * The createfn and destroyfn are required, but the shutdownfn can be
 154  * NULL.
 155  * Note that due to the current zsd implementation, when the create
 156  * function is called the zone isn't fully present, thus functions
 157  * like zone_find_by_* will fail, hence the create function can not
 158  * use many zones kernel functions including zcmn_err().
 
1021         return (ns);
1022 }
1023 
1024 /*
1025  * Find a stack instance given the stackid with exact match?
1026  * Increases the reference count if found; caller must do a
1027  * netstack_rele().
1028  *
1029  * Skip the unitialized ones.
1030  */
1031 netstack_t *
1032 netstack_find_by_stackid(netstackid_t stackid)
1033 {
1034         netstack_t *ns;
1035 
1036         mutex_enter(&netstack_g_lock);
1037         for (ns = netstack_head; ns != NULL; ns = ns->netstack_next) {
1038                 mutex_enter(&ns->netstack_lock);
1039                 if (ns->netstack_stackid == stackid &&
1040                     !(ns->netstack_flags & (NSF_UNINIT|NSF_CLOSING))) {
1041                         netstack_hold_locked(ns);
1042                         mutex_exit(&ns->netstack_lock);
1043                         mutex_exit(&netstack_g_lock);
1044                         return (ns);
1045                 }
1046                 mutex_exit(&ns->netstack_lock);
1047         }
1048         mutex_exit(&netstack_g_lock);
1049         return (NULL);
1050 }
1051 
1052 boolean_t
1053 netstack_inuse_by_stackid(netstackid_t stackid)
1054 {
1055         netstack_t *ns;
1056         boolean_t rval = B_FALSE;
1057 
1058         mutex_enter(&netstack_g_lock);
1059 
1060         for (ns = netstack_head; ns != NULL; ns = ns->netstack_next) {
1061                 if (ns->netstack_stackid == stackid) {
1062                         rval = B_TRUE;
1063                         break;
1064                 }
1065         }
1066 
1067         mutex_exit(&netstack_g_lock);
1068 
1069         return (rval);
1070 }
1071 
1072 
1073 static void
1074 netstack_reap(void *arg)
1075 {
1076         /* Indicate we took a semaphore to get here. */
1077         netstack_reap_work((netstack_t *)arg, B_TRUE);
1078 }
1079 
1080 static void
1081 netstack_reap_intr(void *arg)
1082 {
1083         /* Indicate we did NOT TAKE a semaphore to get here. */
1084         netstack_reap_work((netstack_t *)arg, B_FALSE);
1085 }
1086 
1087 static void
1088 netstack_reap_work(netstack_t *ns, boolean_t semaphore_signal)
1089 {
1090         netstack_t **nsp;
1091         boolean_t found;
1092         int i;
1093 
1094         /*
1095          * Time to call the destroy functions and free up
1096          * the structure
1097          */
1098         netstack_stack_inactive(ns);
1099 
1100         /* Make sure nothing increased the references */
1101         ASSERT(ns->netstack_refcnt == 0);
1102         ASSERT(ns->netstack_numzones == 0);
1103 
1104         /* Finally remove from list of netstacks */
1105         mutex_enter(&netstack_g_lock);
1106         found = B_FALSE;
1107         for (nsp = &netstack_head; *nsp != NULL;
1108              nsp = &(*nsp)->netstack_next) {
1109                 if (*nsp == ns) {
1110                         *nsp = ns->netstack_next;
1111                         ns->netstack_next = NULL;
1112                         found = B_TRUE;
1113                         break;
1114                 }
1115         }
1116         ASSERT(found);
1117         mutex_exit(&netstack_g_lock);
1118 
1119         /* Make sure nothing increased the references */
1120         ASSERT(ns->netstack_refcnt == 0);
1121         ASSERT(ns->netstack_numzones == 0);
1122 
1123         ASSERT(ns->netstack_flags & NSF_CLOSING);
1124 
1125         for (i = 0; i < NS_MAX; i++) {
1126                 nm_state_t *nms = &ns->netstack_m_state[i];
1127 
1128                 cv_destroy(&nms->nms_cv);
1129         }
1130         mutex_destroy(&ns->netstack_lock);
1131         cv_destroy(&ns->netstack_cv);
1132         kmem_free(ns, sizeof (*ns));
1133         /* Allow another reap to be scheduled. */
1134         if (semaphore_signal)
1135                 sema_v(&netstack_reap_limiter);
1136 }
1137 
1138 void
1139 netstack_rele(netstack_t *ns)
1140 {
1141         int refcnt, numzones;
1142 
1143         mutex_enter(&ns->netstack_lock);
1144         ASSERT(ns->netstack_refcnt > 0);
1145         ns->netstack_refcnt--;
1146         /*
1147          * As we drop the lock additional netstack_rele()s can come in
1148          * and decrement the refcnt to zero and free the netstack_t.
1149          * Store pointers in local variables and if we were not the last
1150          * then don't reference the netstack_t after that.
1151          */
1152         refcnt = ns->netstack_refcnt;
1153         numzones = ns->netstack_numzones;
1154         DTRACE_PROBE1(netstack__dec__ref, netstack_t *, ns);
1155         mutex_exit(&ns->netstack_lock);
1156 
1157         if (refcnt == 0 && numzones == 0) {
1158                 boolean_t is_not_intr = !servicing_interrupt();
1159 
1160                 /*
1161                  * Because there are possibilities of kstats being held by
1162                  * callers, which would then be immediately freed, but held up
1163                  * due to kstat's odd reference model recording the thread, we
1164                  * choose to schedule the actual deletion of this netstack as
1165                  * a deferred task on the system taskq.  This way, any
1166                  * store-the-thread-pointer semantics won't trip over
1167                  * themselves.
1168                  *
1169                  * On the off chance this is called in interrupt context, we
1170                  * cannot use the semaphore to enforce rate-limiting.
1171                  */
1172                 if (is_not_intr && sema_tryp(&netstack_reap_limiter) == 0) {
1173                         /*
1174                          * XXX KEBE SAYS inidicate we're slamming against
1175                          * a limit.
1176                          */
1177                         hrtime_t measurement = gethrtime();
1178 
1179                         sema_p(&netstack_reap_limiter);
1180                         /* Caputre delay in ns. */
1181                         DTRACE_PROBE1(netstack__reap__rate__limited,
1182                             hrtime_t *, gethrtime() - measurement);
1183                 }
1184 
1185                 if (taskq_dispatch(system_taskq,
1186                     is_not_intr ? netstack_reap : netstack_reap_intr, ns,
1187                     TQ_NOSLEEP) == NULL) {
1188                         /*
1189                          * Well shoot, why can't we taskq_dispatch?
1190                          * Take our chances with a direct call.
1191                          */
1192                         DTRACE_PROBE1(netstack__reap__taskq__fail,
1193                             netstack_t *, ns);
1194                         netstack_reap_work(ns, is_not_intr);
1195                 }
1196         }
1197 }
1198 
1199 static void
1200 netstack_hold_locked(netstack_t *ns)
1201 {
1202         ASSERT(MUTEX_HELD(&ns->netstack_lock));
1203         ns->netstack_refcnt++;
1204         ASSERT(ns->netstack_refcnt > 0);
1205         DTRACE_PROBE1(netstack__inc__ref, netstack_t *, ns);
1206 }
1207 
1208 void
1209 netstack_hold(netstack_t *ns)
1210 {
1211         mutex_enter(&ns->netstack_lock);
1212         netstack_hold_locked(ns);
1213         mutex_exit(&ns->netstack_lock);
1214 }
1215 
1216 /*
1217  * To support kstat_create_netstack() using kstat_zone_add we need
1218  * to track both
1219  *  - all zoneids that use the global/shared stack
1220  *  - all kstats that have been added for the shared stack
1221  */
1222 kstat_t *
1223 kstat_create_netstack(char *ks_module, int ks_instance, char *ks_name,
1224     char *ks_class, uchar_t ks_type, uint_t ks_ndata, uchar_t ks_flags,
1225     netstackid_t ks_netstackid)
1226 {
1227         kstat_t *ks;
1228 
1229         if (ks_netstackid == GLOBAL_NETSTACKID) {
1230                 ks = kstat_create_zone(ks_module, ks_instance, ks_name,
1231                     ks_class, ks_type, ks_ndata, ks_flags, GLOBAL_ZONEID);
1232                 if (ks != NULL)
1233                         netstack_shared_kstat_add(ks);
1234                 return (ks);
1235         } else {
 
 |