Print this page
OS-7088 cyclics corked on overlay socket with full queue
| Split |
Close |
| Expand all |
| Collapse all |
--- old/usr/src/uts/common/io/overlay/overlay_mux.c
+++ new/usr/src/uts/common/io/overlay/overlay_mux.c
1 1 /*
2 2 * This file and its contents are supplied under the terms of the
3 3 * Common Development and Distribution License ("CDDL"), version 1.0.
4 4 * You may only use this file in accordance with the terms of version
5 5 * 1.0 of the CDDL.
6 6 *
7 7 * A full copy of the text of the CDDL should have accompanied this
8 8 * source. A copy of the CDDL is also available via the Internet at
9 9 * http://www.illumos.org/license/CDDL.
10 10 */
11 11
12 12 /*
13 13 * Copyright 2015 Joyent, Inc.
14 14 */
15 15
16 16 /*
17 17 * Overlay device ksocket multiplexer.
18 18 *
19 19 * For more information, see the big theory statement in
20 20 * uts/common/io/overlay/overlay.c
21 21 */
22 22
23 23 #include <sys/types.h>
24 24 #include <sys/socket.h>
25 25 #include <sys/ksynch.h>
26 26 #include <sys/ksocket.h>
27 27 #include <sys/avl.h>
28 28 #include <sys/list.h>
29 29 #include <sys/sysmacros.h>
30 30 #include <sys/strsubr.h>
31 31 #include <sys/strsun.h>
32 32 #include <sys/tihdr.h>
33 33
34 34 #include <sys/overlay_impl.h>
35 35
36 36 #include <sys/sdt.h>
37 37
38 38 #define OVERLAY_FREEMSG(mp, reason) \
39 39 DTRACE_PROBE2(overlay__fremsg, mblk_t *, mp, char *, reason)
40 40
41 41 static list_t overlay_mux_list;
42 42 static kmutex_t overlay_mux_lock;
43 43
44 44 void
45 45 overlay_mux_init(void)
46 46 {
47 47 list_create(&overlay_mux_list, sizeof (overlay_mux_t),
48 48 offsetof(overlay_mux_t, omux_lnode));
49 49 mutex_init(&overlay_mux_lock, NULL, MUTEX_DRIVER, NULL);
50 50 }
51 51
52 52 void
53 53 overlay_mux_fini(void)
54 54 {
55 55 mutex_destroy(&overlay_mux_lock);
56 56 list_destroy(&overlay_mux_list);
57 57 }
58 58
59 59 static int
60 60 overlay_mux_comparator(const void *a, const void *b)
61 61 {
62 62 const overlay_dev_t *odl, *odr;
63 63 odl = a;
64 64 odr = b;
65 65 if (odl->odd_vid > odr->odd_vid)
66 66 return (1);
67 67 else if (odl->odd_vid < odr->odd_vid)
68 68 return (-1);
69 69 else
70 70 return (0);
71 71 }
72 72
73 73 /*
74 74 * This is the central receive data path. We need to decode the packet, if we
75 75 * can, and then deliver it to the appropriate overlay.
76 76 */
77 77 /* ARGSUSED */
78 78 static boolean_t
79 79 overlay_mux_recv(ksocket_t ks, mblk_t *mpchain, size_t msgsize, int oob,
80 80 void *arg)
81 81 {
82 82 mblk_t *mp, *nmp, *fmp;
83 83 overlay_mux_t *mux = arg;
84 84
85 85 /*
86 86 * We may have a received a chain of messages. Each messsage in the
87 87 * chain will likely have a T_unitdata_ind attached to it as an M_PROTO.
88 88 * If we aren't getting that, we should probably drop that for the
89 89 * moment.
90 90 */
91 91 for (mp = mpchain; mp != NULL; mp = nmp) {
92 92 struct T_unitdata_ind *tudi;
93 93 ovep_encap_info_t infop;
94 94 overlay_dev_t od, *odd;
95 95 int ret;
96 96
97 97 nmp = mp->b_next;
98 98 mp->b_next = NULL;
99 99
100 100 if (DB_TYPE(mp) != M_PROTO) {
101 101 OVERLAY_FREEMSG(mp, "first one isn't M_PROTO");
102 102 freemsg(mp);
103 103 continue;
104 104 }
105 105
106 106 if (mp->b_cont == NULL) {
107 107 OVERLAY_FREEMSG(mp, "missing a b_cont");
108 108 freemsg(mp);
109 109 continue;
110 110 }
111 111
112 112 tudi = (struct T_unitdata_ind *)mp->b_rptr;
113 113 if (tudi->PRIM_type != T_UNITDATA_IND) {
114 114 OVERLAY_FREEMSG(mp, "Not a T_unitdata_ind *");
115 115 freemsg(mp);
116 116 continue;
117 117 }
118 118
119 119 /*
120 120 * In the future, we'll care about the source information
121 121 * for purposes of telling varpd for oob invalidation. But for
122 122 * now, just drop that block.
123 123 */
124 124 fmp = mp;
125 125 mp = fmp->b_cont;
126 126 fmp->b_cont = NULL;
127 127 freemsg(fmp);
128 128
129 129 /*
130 130 * Decap and deliver.
131 131 */
132 132 bzero(&infop, sizeof (ovep_encap_info_t));
133 133 ret = mux->omux_plugin->ovp_ops->ovpo_decap(NULL, mp, &infop);
134 134 if (ret != 0) {
135 135 OVERLAY_FREEMSG(mp, "decap failed");
136 136 freemsg(mp);
137 137 continue;
138 138 }
139 139 if (MBLKL(mp) > infop.ovdi_hdr_size) {
140 140 mp->b_rptr += infop.ovdi_hdr_size;
141 141 } else {
142 142 while (infop.ovdi_hdr_size != 0) {
143 143 size_t rem, blkl;
144 144
145 145 if (mp == NULL)
146 146 break;
147 147
148 148 blkl = MBLKL(mp);
149 149 rem = MIN(infop.ovdi_hdr_size, blkl);
150 150 infop.ovdi_hdr_size -= rem;
151 151 mp->b_rptr += rem;
152 152 if (rem == blkl) {
153 153 fmp = mp;
154 154 mp = fmp->b_cont;
155 155 fmp->b_cont = NULL;
156 156 OVERLAY_FREEMSG(mp,
157 157 "freed a fmp block");
158 158 freemsg(fmp);
159 159 }
160 160 }
161 161 if (mp == NULL) {
162 162 OVERLAY_FREEMSG(mp, "freed it all...");
163 163 continue;
164 164 }
165 165 }
166 166
167 167
168 168 od.odd_vid = infop.ovdi_id;
169 169 mutex_enter(&mux->omux_lock);
170 170 odd = avl_find(&mux->omux_devices, &od, NULL);
171 171 if (odd == NULL) {
172 172 mutex_exit(&mux->omux_lock);
173 173 OVERLAY_FREEMSG(mp, "no matching vid");
174 174 freemsg(mp);
175 175 continue;
176 176 }
177 177 mutex_enter(&odd->odd_lock);
178 178 if ((odd->odd_flags & OVERLAY_F_MDDROP) ||
179 179 !(odd->odd_flags & OVERLAY_F_IN_MUX)) {
180 180 mutex_exit(&odd->odd_lock);
181 181 mutex_exit(&mux->omux_lock);
182 182 OVERLAY_FREEMSG(mp, "dev dropped");
183 183 freemsg(mp);
184 184 continue;
185 185 }
186 186 overlay_io_start(odd, OVERLAY_F_IN_RX);
187 187 mutex_exit(&odd->odd_lock);
188 188 mutex_exit(&mux->omux_lock);
189 189
190 190 mac_rx(odd->odd_mh, NULL, mp);
191 191
192 192 mutex_enter(&odd->odd_lock);
193 193 overlay_io_done(odd, OVERLAY_F_IN_RX);
194 194 mutex_exit(&odd->odd_lock);
195 195 }
196 196
197 197 return (B_TRUE);
198 198 }
199 199
200 200 /*
201 201 * Register a given device with a socket backend. If no such device socket
202 202 * exists, create a new one.
203 203 */
204 204 overlay_mux_t *
205 205 overlay_mux_open(overlay_plugin_t *opp, int domain, int family, int protocol,
206 206 struct sockaddr *addr, socklen_t len, int *errp)
207 207 {
208 208 int err;
209 209 overlay_mux_t *mux;
210 210 ksocket_t ksock;
211 211
212 212 if (errp == NULL)
213 213 errp = &err;
214 214
215 215 mutex_enter(&overlay_mux_lock);
216 216 for (mux = list_head(&overlay_mux_list); mux != NULL;
217 217 mux = list_next(&overlay_mux_list, mux)) {
218 218 if (domain == mux->omux_domain &&
219 219 family == mux->omux_family &&
220 220 protocol == mux->omux_protocol &&
221 221 len == mux->omux_alen &&
222 222 bcmp(addr, mux->omux_addr, len) == 0) {
223 223
224 224 if (opp != mux->omux_plugin) {
225 225 *errp = EEXIST;
226 226 return (NULL);
227 227 }
228 228
229 229 mutex_enter(&mux->omux_lock);
230 230 mux->omux_count++;
231 231 mutex_exit(&mux->omux_lock);
232 232 mutex_exit(&overlay_mux_lock);
233 233 *errp = 0;
234 234 return (mux);
235 235 }
236 236 }
237 237
238 238 /*
239 239 * Today we aren't zone-aware and only exist in the global zone. When we
240 240 * allow for things to exist in the non-global zone, we'll want to use a
241 241 * credential that's actually specific to the zone.
242 242 */
243 243 *errp = ksocket_socket(&ksock, domain, family, protocol, KSOCKET_SLEEP,
244 244 kcred);
245 245 if (*errp != 0) {
246 246 mutex_exit(&overlay_mux_lock);
247 247 return (NULL);
248 248 }
249 249
250 250 *errp = ksocket_bind(ksock, addr, len, kcred);
251 251 if (*errp != 0) {
252 252 mutex_exit(&overlay_mux_lock);
253 253 ksocket_close(ksock, kcred);
254 254 return (NULL);
255 255 }
256 256
257 257 /*
258 258 * Ask our lower layer to optionally toggle anything they need on this
259 259 * socket. Because a socket is owned by a single type of plugin, we can
260 260 * then ask it to perform any additional socket set up it'd like to do.
261 261 */
262 262 if (opp->ovp_ops->ovpo_sockopt != NULL &&
263 263 (*errp = opp->ovp_ops->ovpo_sockopt(ksock)) != 0) {
264 264 mutex_exit(&overlay_mux_lock);
265 265 ksocket_close(ksock, kcred);
266 266 return (NULL);
267 267 }
268 268
269 269 mux = kmem_alloc(sizeof (overlay_mux_t), KM_SLEEP);
270 270 list_link_init(&mux->omux_lnode);
271 271 mux->omux_ksock = ksock;
272 272 mux->omux_plugin = opp;
273 273 mux->omux_domain = domain;
274 274 mux->omux_family = family;
275 275 mux->omux_protocol = protocol;
276 276 mux->omux_addr = kmem_alloc(len, KM_SLEEP);
277 277 bcopy(addr, mux->omux_addr, len);
278 278 mux->omux_alen = len;
279 279 mux->omux_count = 1;
280 280 avl_create(&mux->omux_devices, overlay_mux_comparator,
281 281 sizeof (overlay_dev_t), offsetof(overlay_dev_t, odd_muxnode));
282 282 mutex_init(&mux->omux_lock, NULL, MUTEX_DRIVER, NULL);
283 283
284 284
285 285 /* Once this is called, we need to expect to rx data */
286 286 *errp = ksocket_krecv_set(ksock, overlay_mux_recv, mux);
287 287 if (*errp != 0) {
288 288 ksocket_close(ksock, kcred);
289 289 mutex_destroy(&mux->omux_lock);
290 290 avl_destroy(&mux->omux_devices);
291 291 kmem_free(mux->omux_addr, len);
292 292 kmem_free(mux, sizeof (overlay_mux_t));
293 293 return (NULL);
294 294 }
295 295
296 296 list_insert_tail(&overlay_mux_list, mux);
297 297 mutex_exit(&overlay_mux_lock);
298 298
299 299 *errp = 0;
300 300 return (mux);
301 301 }
302 302
303 303 void
304 304 overlay_mux_close(overlay_mux_t *mux)
305 305 {
306 306 mutex_enter(&overlay_mux_lock);
307 307 mutex_enter(&mux->omux_lock);
308 308 mux->omux_count--;
309 309 if (mux->omux_count != 0) {
310 310 mutex_exit(&mux->omux_lock);
311 311 mutex_exit(&overlay_mux_lock);
312 312 return;
313 313 }
314 314 list_remove(&overlay_mux_list, mux);
315 315 mutex_exit(&mux->omux_lock);
316 316 mutex_exit(&overlay_mux_lock);
317 317
318 318 ksocket_close(mux->omux_ksock, kcred);
319 319 avl_destroy(&mux->omux_devices);
320 320 kmem_free(mux->omux_addr, mux->omux_alen);
321 321 kmem_free(mux, sizeof (overlay_mux_t));
322 322 }
323 323
324 324 void
325 325 overlay_mux_add_dev(overlay_mux_t *mux, overlay_dev_t *odd)
326 326 {
327 327 mutex_enter(&mux->omux_lock);
328 328 avl_add(&mux->omux_devices, odd);
329 329 mutex_exit(&mux->omux_lock);
330 330 }
331 331
332 332 void
333 333 overlay_mux_remove_dev(overlay_mux_t *mux, overlay_dev_t *odd)
334 334 {
335 335 mutex_enter(&mux->omux_lock);
336 336 avl_remove(&mux->omux_devices, odd);
337 337 mutex_exit(&mux->omux_lock);
|
↓ open down ↓ |
337 lines elided |
↑ open up ↑ |
338 338 }
339 339
340 340 int
341 341 overlay_mux_tx(overlay_mux_t *mux, struct msghdr *hdr, mblk_t *mp)
342 342 {
343 343 int ret;
344 344
345 345 /*
346 346 * It'd be nice to be able to use MSG_MBLK_QUICKRELE, unfortunately,
347 347 * that isn't actually supported by UDP at this time.
348 + *
349 + * Send with MSG_DONTWAIT to indicate clogged UDP sockets upstack.
348 350 */
349 - ret = ksocket_sendmblk(mux->omux_ksock, hdr, 0, &mp, kcred);
351 + ret = ksocket_sendmblk(mux->omux_ksock, hdr, MSG_DONTWAIT, &mp, kcred);
352 + /*
353 + * NOTE: ksocket_sendmblk() may send partial packets downstack,
354 + * returning what's not sent in &mp (i.e. mp pre-call might be a
355 + * b_cont of mp post-call). We can't hold up this message (it's a
356 + * datagram), so we drop, and let the caller cope.
357 + */
350 358 if (ret != 0)
351 359 freemsg(mp);
352 360
353 361 return (ret);
354 362 }
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX