1 /*
2 * This file and its contents are supplied under the terms of the
3 * Common Development and Distribution License ("CDDL"), version 1.0.
4 * You may only use this file in accordance with the terms of version
5 * 1.0 of the CDDL.
6 *
7 * A full copy of the text of the CDDL should have accompanied this
8 * source. A copy of the CDDL is also available via the Internet at
9 * http://www.illumos.org/license/CDDL.
10 */
11
12 /*
13 * Copyright 2015 Joyent, Inc.
14 */
15
16 /*
17 * Shootdown processing logic.
18 *
19 * For more information, see the big theory statement in
20 * lib/varpd/svp/common/libvarpd_svp.c.
21 */
22
23 #include <umem.h>
24 #include <sys/uuid.h>
25 #include <assert.h>
26 #include <strings.h>
27 #include <errno.h>
28 #include <sys/debug.h>
29
30 #include <libvarpd_provider.h>
31 #include <libvarpd_svp.h>
32
33 /*
34 * When we've determined that there's nothing left for us to do, then we go
35 * ahead and wait svp_shootdown_base seconds + up to an additional
36 * svp_shootdown_base seconds before asking again. However, if there is actually
37 * some work going on, just use the svp_shootdown_cont time.
38 */
39 static int svp_shootdown_base = 5;
40 static int svp_shootdown_cont = 1;
41
42 /*
43 * These are sizes for our logack and logrm buffers. The sizing of the shootdown
44 * buffere would give us approximately 18 or so VL3 entries and 32 VL2 entries
45 * or some combination thereof. While it's a bit of overkill, we just use the
46 * same sized buffer for the list of uuids that we pass to remove log entries
47 * that we've acted upon.
48 */
49 static int svp_shootdown_buf = 1024;
50
51 static void
52 svp_shootdown_schedule(svp_sdlog_t *sdl, boolean_t cont)
53 {
54 assert(MUTEX_HELD(&sdl->sdl_lock));
55
56 if (cont == B_TRUE) {
57 sdl->sdl_timer.st_value = svp_shootdown_cont;
58 } else {
59 sdl->sdl_timer.st_value = svp_shootdown_base +
60 arc4random_uniform(svp_shootdown_base + 1);
61 }
62 svp_timer_add(&sdl->sdl_timer);
63 }
64
65 void
66 svp_shootdown_lrm_cb(svp_remote_t *srp, svp_status_t status)
67 {
68 svp_sdlog_t *sdl = &srp->sr_shoot;
69
70 mutex_enter(&sdl->sdl_lock);
71 sdl->sdl_flags &= ~SVP_SD_RUNNING;
72 svp_shootdown_schedule(sdl, B_TRUE);
73 mutex_exit(&sdl->sdl_lock);
74
75 if (status != SVP_S_OK) {
76 (void) bunyan_warn(svp_bunyan, "SVP_R_LOG_RM failed",
77 BUNYAN_T_STRING, "remote_host", srp->sr_hostname,
78 BUNYAN_T_INT32, "remote_port", srp->sr_rport,
79 BUNYAN_T_INT32, "status", status,
80 BUNYAN_T_END);
81 }
82 }
83
84 static void
85 svp_shootdown_ref(svp_sdlog_t *sdl)
86 {
87 mutex_enter(&sdl->sdl_lock);
88 sdl->sdl_ref++;
89 mutex_exit(&sdl->sdl_lock);
90 }
91
92 static void
93 svp_shootdown_rele(svp_sdlog_t *sdl)
94 {
95 svp_lrm_req_t *svrr = sdl->sdl_logrm;
96 boolean_t next;
97
98 mutex_enter(&sdl->sdl_lock);
99 VERIFY(sdl->sdl_ref > 0);
100 sdl->sdl_ref--;
101 if (sdl->sdl_ref > 0) {
102 mutex_exit(&sdl->sdl_lock);
103 return;
104 }
105
106 /*
107 * At this point we know that we hold the last reference, therefore it's
108 * safe for us to go ahead and clean up and move on and attempt to
109 * deliver the reply. We always deliver the reply by going through the
110 * timer. This can be rather important as the final reference may be
111 * coming through a failed query and it's not always safe for us to
112 * callback into the remote routines from this context.
113 *
114 * We should only do this if we have a non-zero number of entries to
115 * take down.
116 */
117 sdl->sdl_flags &= ~SVP_SD_RUNNING;
118 if (svrr->svrr_count > 0) {
119 sdl->sdl_flags |= SVP_SD_DORM;
120 next = B_TRUE;
121 } else {
122 next = B_FALSE;
123 }
124 svp_shootdown_schedule(sdl, next);
125 mutex_exit(&sdl->sdl_lock);
126 }
127
128 /*
129 * This is a callback used to indicate that the VL3 lookup has completed and an
130 * entry, if any, has been injected. If the command succeeded, eg. we got that
131 * the status was OK or that it was not found, then we will add it to he list to
132 * shoot down. Otherwise, there's nothing else for us to really do here.
133 */
134 void
135 svp_shootdown_vl3_cb(svp_status_t status, svp_log_vl3_t *vl3, svp_sdlog_t *sdl)
136 {
137 svp_lrm_req_t *svrr = sdl->sdl_logrm;
138
139 mutex_enter(&sdl->sdl_lock);
140 if (status == SVP_S_OK || status == SVP_S_NOTFOUND) {
141 bcopy(vl3->svl3_id, &svrr->svrr_ids[svrr->svrr_count * 16],
142 UUID_LEN);
143 svrr->svrr_count++;
144 }
145 mutex_exit(&sdl->sdl_lock);
146
147 svp_shootdown_rele(sdl);
148 }
149
150 static int
151 svp_shootdown_logr_shoot(void *data, svp_log_type_t type, void *arg)
152 {
153 svp_sdlog_t *sdl = arg;
154 svp_remote_t *srp = sdl->sdl_remote;
155 svp_lrm_req_t *svrr = sdl->sdl_logrm;
156
157 if (type != SVP_LOG_VL2 && type != SVP_LOG_VL3 && type != SVP_LOG_ROUTE)
158 libvarpd_panic("encountered unknown type: %d\n", type);
159
160 if (type == SVP_LOG_VL2) {
161 svp_log_vl2_t *svl2 = data;
162 svp_remote_shootdown_vl2(srp, svl2);
163 mutex_enter(&sdl->sdl_lock);
164 bcopy(svl2->svl2_id, &svrr->svrr_ids[svrr->svrr_count * 16],
165 UUID_LEN);
166 svrr->svrr_count++;
167 mutex_exit(&sdl->sdl_lock);
168 } else if (type == SVP_LOG_VL3) {
169 svp_log_vl3_t *svl3 = data;
170
171 /* Take a hold for the duration of this request */
172 svp_shootdown_ref(sdl);
173 svp_remote_shootdown_vl3(srp, svl3, sdl);
174 } else {
175 svp_log_route_t *svlr = data;
176
177 svp_remote_shootdown_route(srp, svlr);
178 mutex_enter(&sdl->sdl_lock);
179 bcopy(svlr->svlr_id, &svrr->svrr_ids[svrr->svrr_count * 16],
180 UUID_LEN);
181 svrr->svrr_count++;
182 mutex_exit(&sdl->sdl_lock);
183 }
184
185 return (0);
186 }
187
188 static int
189 svp_shootdown_logr_count(void *data, svp_log_type_t type, void *arg)
190 {
191 uint_t *u = arg;
192 *u = *u + 1;
193 return (0);
194 }
195
196
197 static int
198 svp_shootdown_logr_iter(svp_remote_t *srp, void *buf, size_t len,
199 int (*cb)(void *, svp_log_type_t, void *), void *arg, uint16_t version)
200 {
201 int ret;
202 off_t cboff = 0;
203 uint32_t *typep, type;
204 svp_log_vl2_t *svl2;
205 svp_log_vl3_t *svl3;
206
207 /* Adjust for initial status word */
208 assert(len >= sizeof (uint32_t));
209 len -= sizeof (uint32_t);
210 cboff += sizeof (uint32_t);
211
212 while (len > 0) {
213 size_t opsz;
214 char *typestring;
215
216 if (len < sizeof (uint32_t)) {
217 (void) bunyan_warn(svp_bunyan,
218 "failed to get initial shootdown tag",
219 BUNYAN_T_STRING, "remote_host", srp->sr_hostname,
220 BUNYAN_T_INT32, "remote_port", srp->sr_rport,
221 BUNYAN_T_INT32, "response_size", cboff + len,
222 BUNYAN_T_INT32, "response_offset", cboff,
223 BUNYAN_T_END);
224 return (-1);
225 }
226
227 typep = buf + cboff;
228 type = ntohl(*typep);
229 switch (type) {
230 case SVP_LOG_VL2:
231 opsz = sizeof (svp_log_vl2_t);
232 typestring = "svp_log_vl2_t";
233 break;
234 case SVP_LOG_VL3:
235 opsz = sizeof (svp_log_vl3_t);
236 typestring = "svp_log_vl3_t";
237 break;
238 case SVP_LOG_ROUTE:
239 if (version < SVP_VERSION_TWO) {
240 (void) bunyan_warn(svp_bunyan,
241 "insufficient version for SVP_LOG_ROUTE",
242 BUNYAN_T_UINT32, "version", version,
243 BUNYAN_T_STRING, "remote_host",
244 srp->sr_hostname,
245 BUNYAN_T_INT32, "remote_port",
246 srp->sr_rport,
247 BUNYAN_T_INT32, "response_size",
248 cboff + len,
249 BUNYAN_T_INT32, "response_offset", cboff,
250 BUNYAN_T_END);
251 return (-1);
252 }
253 opsz = sizeof (svp_log_route_t);
254 typestring = "svp_log_route_t";
255 break;
256 default:
257 (void) bunyan_warn(svp_bunyan,
258 "unknown log structure type",
259 BUNYAN_T_STRING, "remote_host",
260 srp->sr_hostname,
261 BUNYAN_T_INT32, "remote_port", srp->sr_rport,
262 BUNYAN_T_INT32, "response_size", cboff + len,
263 BUNYAN_T_INT32, "response_offset", cboff,
264 BUNYAN_T_INT32, "structure_type", type,
265 BUNYAN_T_END);
266 return (-1);
267 }
268 if (len < opsz) {
269 (void) bunyan_warn(svp_bunyan,
270 "not enough data for",
271 BUNYAN_T_STRING, "", typestring,
272 BUNYAN_T_STRING, "remote_host", srp->sr_hostname,
273 BUNYAN_T_INT32, "remote_port", srp->sr_rport,
274 BUNYAN_T_INT32, "response_size", cboff + len,
275 BUNYAN_T_INT32, "response_offset", cboff,
276 BUNYAN_T_END);
277 return (-1);
278 }
279 if ((ret = cb((void *)typep, type, arg)) != 0)
280 return (ret);
281
282 len -= opsz;
283 cboff += opsz;
284 }
285
286 return (0);
287 }
288
289 void
290 svp_shootdown_logr_cb(svp_remote_t *srp, svp_status_t status, void *cbdata,
291 size_t cbsize, uint16_t version)
292 {
293 uint_t count;
294 svp_sdlog_t *sdl = &srp->sr_shoot;
295
296 if (status != SVP_S_OK) {
297 (void) bunyan_warn(svp_bunyan,
298 "log request not OK",
299 BUNYAN_T_STRING, "remote_host", srp->sr_hostname,
300 BUNYAN_T_INT32, "remote_port", srp->sr_rport,
301 BUNYAN_T_INT32, "response_size", cbsize,
302 BUNYAN_T_INT32, "status", status,
303 BUNYAN_T_END);
304 mutex_enter(&sdl->sdl_lock);
305 sdl->sdl_flags &= ~SVP_SD_RUNNING;
306 svp_shootdown_schedule(sdl, B_FALSE);
307 mutex_exit(&sdl->sdl_lock);
308 return;
309 }
310
311 /*
312 * First go ahead and count the number of entries. This effectively
313 * allows us to validate that all the data is valid, if this fails, then
314 * we fail the request.
315 */
316 count = 0;
317 if ((svp_shootdown_logr_iter(srp, cbdata, cbsize,
318 svp_shootdown_logr_count, &count, version)) != 0) {
319 mutex_enter(&sdl->sdl_lock);
320 sdl->sdl_flags &= ~SVP_SD_RUNNING;
321 svp_shootdown_schedule(sdl, B_FALSE);
322 mutex_exit(&sdl->sdl_lock);
323 return;
324 }
325
326 /*
327 * If we have no entries, then we're also done.
328 */
329 if (count == 0) {
330 mutex_enter(&sdl->sdl_lock);
331 sdl->sdl_flags &= ~SVP_SD_RUNNING;
332 svp_shootdown_schedule(sdl, B_FALSE);
333 mutex_exit(&sdl->sdl_lock);
334 return;
335 }
336
337 /*
338 * We have work to do. Because we may have asynchronous VL3 tasks, we're
339 * going to first grab a reference before we do the iteration. Then, for
340 * each asynchronous VL3 request we make, that'll also grab a hold. Once
341 * we're done with the iteration, we'll drop our hold. If that's the
342 * last one, it'll move on accordingly.
343 */
344 svp_shootdown_ref(sdl);
345 bzero(sdl->sdl_logrm, svp_shootdown_buf);
346
347 /*
348 * If this fails, we're going to determine what to do next based on the
349 * number of entries that were entered into the log removal. At this
350 * point success or failure don't really look different, all it changes
351 * is how many entries we have to remove.
352 */
353 (void) svp_shootdown_logr_iter(srp, cbdata, cbsize,
354 svp_shootdown_logr_shoot, sdl, version);
355
356 /*
357 * Now that we're done with our work, release the hold. If we don't have
358 * any vl3 tasks outstanding, this'll trigger the next phase of the log
359 * removals.
360 */
361 svp_shootdown_rele(sdl);
362 }
363
364 static void
365 svp_shootdown_timer(void *arg)
366 {
367 svp_sdlog_t *sdl = arg;
368 svp_remote_t *srp = sdl->sdl_remote;
369 boolean_t init = B_TRUE;
370
371 mutex_enter(&sdl->sdl_lock);
372
373 /*
374 * If we've been asked to quiesce, we're done.
375 */
376 if ((sdl->sdl_flags & SVP_SD_QUIESCE) != 0) {
377 mutex_exit(&sdl->sdl_lock);
378 return;
379 }
380
381 /*
382 * We shouldn't be able to have ourselves currently be running and reach
383 * here. If that's the case, we should immediately panic.
384 */
385 if ((sdl->sdl_flags & SVP_SD_RUNNING) != 0) {
386 libvarpd_panic("remote %p shootdown timer fired while still "
387 "running", srp);
388 }
389
390 if ((sdl->sdl_flags & SVP_SD_DORM) != 0) {
391 sdl->sdl_flags &= ~SVP_SD_DORM;
392 init = B_FALSE;
393 }
394
395 sdl->sdl_flags |= SVP_SD_RUNNING;
396 mutex_exit(&sdl->sdl_lock);
397
398 if (init == B_FALSE) {
399 svp_lrm_req_t *svrr = sdl->sdl_logrm;
400
401 bzero(&sdl->sdl_query, sizeof (svp_query_t));
402 svp_remote_lrm_request(sdl->sdl_remote, &sdl->sdl_query, svrr,
403 sizeof (*svrr) + 16 * svrr->svrr_count);
404 } else {
405 bzero(&sdl->sdl_query, sizeof (svp_query_t));
406 svp_remote_log_request(srp, &sdl->sdl_query, sdl->sdl_logack,
407 svp_shootdown_buf);
408 }
409 }
410
411 void
412 svp_shootdown_fini(svp_remote_t *srp)
413 {
414 svp_sdlog_t *sdl = &srp->sr_shoot;
415
416 mutex_enter(&sdl->sdl_lock);
417 sdl->sdl_flags |= SVP_SD_QUIESCE;
418 mutex_exit(&sdl->sdl_lock);
419
420 svp_timer_remove(&sdl->sdl_timer);
421
422 mutex_enter(&sdl->sdl_lock);
423
424 /*
425 * Normally svp_timer_remove would be enough. However, the query could
426 * have been put out again outside of the svp_timer interface. Therefore
427 * we still need to check for SVP_SD_RUNNING.
428 */
429 while (sdl->sdl_flags & SVP_SD_RUNNING)
430 (void) cond_wait(&sdl->sdl_cond, &sdl->sdl_lock);
431 mutex_exit(&sdl->sdl_lock);
432
433 umem_free(sdl->sdl_logack, svp_shootdown_buf);
434 umem_free(sdl->sdl_logrm, svp_shootdown_buf);
435 sdl->sdl_logack = NULL;
436 sdl->sdl_logrm = NULL;
437 (void) cond_destroy(&sdl->sdl_cond);
438 (void) mutex_destroy(&sdl->sdl_lock);
439 }
440
441 void
442 svp_shootdown_start(svp_remote_t *srp)
443 {
444 svp_sdlog_t *sdl = &srp->sr_shoot;
445
446 mutex_enter(&sdl->sdl_lock);
447 svp_shootdown_schedule(sdl, B_FALSE);
448 mutex_exit(&sdl->sdl_lock);
449 }
450
451 int
452 svp_shootdown_init(svp_remote_t *srp)
453 {
454 int ret;
455 svp_sdlog_t *sdl = &srp->sr_shoot;
456 if ((ret = mutex_init(&sdl->sdl_lock, USYNC_THREAD | LOCK_ERRORCHECK,
457 NULL)) != 0)
458 return (ret);
459
460 if ((ret = cond_init(&sdl->sdl_cond, USYNC_THREAD, NULL)) != 0) {
461 (void) mutex_destroy(&sdl->sdl_lock);
462 return (ret);
463 }
464
465 if ((sdl->sdl_logack = umem_alloc(svp_shootdown_buf, UMEM_DEFAULT)) ==
466 NULL) {
467 ret = errno;
468 (void) cond_destroy(&sdl->sdl_cond);
469 (void) mutex_destroy(&sdl->sdl_lock);
470 return (ret);
471 }
472
473 if ((sdl->sdl_logrm = umem_alloc(svp_shootdown_buf, UMEM_DEFAULT)) ==
474 NULL) {
475 ret = errno;
476 umem_free(sdl->sdl_logack, svp_shootdown_buf);
477 (void) cond_destroy(&sdl->sdl_cond);
478 (void) mutex_destroy(&sdl->sdl_lock);
479 return (ret);
480 }
481
482 sdl->sdl_remote = srp;
483 sdl->sdl_timer.st_oneshot = B_TRUE;
484 sdl->sdl_timer.st_func = svp_shootdown_timer;
485 sdl->sdl_timer.st_arg = sdl;
486
487 return (0);
488 }