1 /*
2 * This file and its contents are supplied under the terms of the
3 * Common Development and Distribution License ("CDDL"), version 1.0.
4 * You may only use this file in accordance with the terms of version
5 * 1.0 of the CDDL.
6 *
7 * A full copy of the text of the CDDL should have accompanied this
8 * source. A copy of the CDDL is also available via the Internet at
9 * http://www.illumos.org/license/CDDL.
10 */
11
12 /*
13 * Copyright 2015 Joyent, Inc.
14 */
15
16 /*
17 * Shootdown processing logic.
18 *
19 * For more information, see the big theory statement in
20 * lib/varpd/svp/common/libvarpd_svp.c.
21 */
22
23 #include <umem.h>
24 #include <sys/uuid.h>
25 #include <assert.h>
26 #include <strings.h>
27 #include <errno.h>
28 #include <sys/debug.h>
29
30 #include <libvarpd_provider.h>
31 #include <libvarpd_svp.h>
32
33 /*
34 * When we've determined that there's nothing left for us to do, then we go
35 * ahead and wait svp_shootdown_base seconds + up to an additional
36 * svp_shootdown_base seconds before asking again. However, if there is actually
37 * some work going on, just use the svp_shootdown_cont time.
38 */
39 static int svp_shootdown_base = 5;
40 static int svp_shootdown_cont = 1;
41
42 /*
43 * These are sizes for our logack and logrm buffers. The sizing of the shootdown
44 * buffere would give us approximately 18 or so VL3 entries and 32 VL2 entries
45 * or some combination thereof. While it's a bit of overkill, we just use the
46 * same sized buffer for the list of uuids that we pass to remove log entries
47 * that we've acted upon.
48 */
49 static int svp_shootdown_buf = 1024;
50
51 static void
52 svp_shootdown_schedule(svp_sdlog_t *sdl, boolean_t cont)
53 {
54 assert(MUTEX_HELD(&sdl->sdl_lock));
55
56 if (cont == B_TRUE) {
57 sdl->sdl_timer.st_value = svp_shootdown_cont;
58 } else {
59 sdl->sdl_timer.st_value = svp_shootdown_base +
60 arc4random_uniform(svp_shootdown_base + 1);
61 }
62 svp_timer_add(&sdl->sdl_timer);
63 }
64
65 void
66 svp_shootdown_lrm_cb(svp_remote_t *srp, svp_status_t status)
67 {
68 svp_sdlog_t *sdl = &srp->sr_shoot;
69
70 mutex_enter(&sdl->sdl_lock);
71 sdl->sdl_flags &= ~SVP_SD_RUNNING;
72 svp_shootdown_schedule(sdl, B_TRUE);
73 mutex_exit(&sdl->sdl_lock);
74
75 if (status != SVP_S_OK) {
76 (void) bunyan_warn(svp_bunyan, "SVP_R_LOG_RM failed",
77 BUNYAN_T_STRING, "remote_host", srp->sr_hostname,
78 BUNYAN_T_INT32, "remote_port", srp->sr_rport,
79 BUNYAN_T_INT32, "status", status,
80 BUNYAN_T_END);
81 }
82 }
83
84 static void
85 svp_shootdown_ref(svp_sdlog_t *sdl)
86 {
87 mutex_enter(&sdl->sdl_lock);
88 sdl->sdl_ref++;
89 mutex_exit(&sdl->sdl_lock);
90 }
91
92 static void
93 svp_shootdown_rele(svp_sdlog_t *sdl)
94 {
95 svp_lrm_req_t *svrr = sdl->sdl_logrm;
96 boolean_t next;
97
98 mutex_enter(&sdl->sdl_lock);
99 VERIFY(sdl->sdl_ref > 0);
100 sdl->sdl_ref--;
101 if (sdl->sdl_ref > 0) {
102 mutex_exit(&sdl->sdl_lock);
103 return;
104 }
105
106 /*
107 * At this point we know that we hold the last reference, therefore it's
108 * safe for us to go ahead and clean up and move on and attempt to
109 * deliver the reply. We always deliver the reply by going through the
110 * timer. This can be rather important as the final reference may be
111 * coming through a failed query and it's not always safe for us to
112 * callback into the remote routines from this context.
113 *
114 * We should only do this if we have a non-zero number of entries to
115 * take down.
116 */
117 sdl->sdl_flags &= ~SVP_SD_RUNNING;
118 if (svrr->svrr_count > 0) {
119 sdl->sdl_flags |= SVP_SD_DORM;
120 next = B_TRUE;
121 } else {
122 next = B_FALSE;
123 }
124 svp_shootdown_schedule(sdl, next);
125 mutex_exit(&sdl->sdl_lock);
126 }
127
128 /*
129 * This is a callback used to indicate that the VL3 lookup has completed and an
130 * entry, if any, has been injected. If the command succeeded, eg. we got that
131 * the status was OK or that it was not found, then we will add it to he list to
132 * shoot down. Otherwise, there's nothing else for us to really do here.
133 */
134 void
135 svp_shootdown_vl3_cb(svp_status_t status, svp_log_vl3_t *vl3, svp_sdlog_t *sdl)
136 {
137 svp_lrm_req_t *svrr = sdl->sdl_logrm;
138
139 mutex_enter(&sdl->sdl_lock);
140 if (status == SVP_S_OK || status == SVP_S_NOTFOUND) {
141 bcopy(vl3->svl3_id, &svrr->svrr_ids[svrr->svrr_count * 16],
142 UUID_LEN);
143 svrr->svrr_count++;
144 }
145 mutex_exit(&sdl->sdl_lock);
146
147 svp_shootdown_rele(sdl);
148 }
149
150 static int
151 svp_shootdown_logr_shoot(void *data, svp_log_type_t type, void *arg)
152 {
153 svp_sdlog_t *sdl = arg;
154 svp_remote_t *srp = sdl->sdl_remote;
155 svp_lrm_req_t *svrr = sdl->sdl_logrm;
156
157 if (type != SVP_LOG_VL2 && type != SVP_LOG_VL3)
158 libvarpd_panic("encountered unknown type: %d\n", type);
159
160 if (type == SVP_LOG_VL2) {
161 svp_log_vl2_t *svl2 = data;
162 svp_remote_shootdown_vl2(srp, svl2);
163 mutex_enter(&sdl->sdl_lock);
164 bcopy(svl2->svl2_id, &svrr->svrr_ids[svrr->svrr_count * 16],
165 UUID_LEN);
166 svrr->svrr_count++;
167 mutex_exit(&sdl->sdl_lock);
168 } else {
169 svp_log_vl3_t *svl3 = data;
170
171 /* Take a hold for the duration of this request */
172 svp_shootdown_ref(sdl);
173 svp_remote_shootdown_vl3(srp, svl3, sdl);
174 }
175
176 return (0);
177 }
178
179 static int
180 svp_shootdown_logr_count(void *data, svp_log_type_t type, void *arg)
181 {
182 uint_t *u = arg;
183 *u = *u + 1;
184 return (0);
185 }
186
187
188 static int
189 svp_shootdown_logr_iter(svp_remote_t *srp, void *buf, size_t len,
190 int (*cb)(void *, svp_log_type_t, void *), void *arg)
191 {
192 int ret;
193 off_t cboff = 0;
194 uint32_t *typep, type;
195 svp_log_vl2_t *svl2;
196 svp_log_vl3_t *svl3;
197
198 /* Adjust for initial status word */
199 assert(len >= sizeof (uint32_t));
200 len -= sizeof (uint32_t);
201 cboff += sizeof (uint32_t);
202
203 while (len > 0) {
204 size_t opsz;
205
206 if (len < sizeof (uint32_t)) {
207 (void) bunyan_warn(svp_bunyan,
208 "failed to get initial shootdown tag",
209 BUNYAN_T_STRING, "remote_host", srp->sr_hostname,
210 BUNYAN_T_INT32, "remote_port", srp->sr_rport,
211 BUNYAN_T_INT32, "response_size", cboff + len,
212 BUNYAN_T_INT32, "response_offset", cboff,
213 BUNYAN_T_END);
214 return (-1);
215 }
216
217 typep = buf + cboff;
218 type = ntohl(*typep);
219 if (type == SVP_LOG_VL2) {
220 opsz = sizeof (svp_log_vl2_t);
221 if (len < opsz) {
222 (void) bunyan_warn(svp_bunyan,
223 "not enough data for svp_log_vl2_t",
224 BUNYAN_T_STRING, "remote_host",
225 srp->sr_hostname,
226 BUNYAN_T_INT32, "remote_port",
227 srp->sr_rport,
228 BUNYAN_T_INT32, "response_size",
229 cboff + len,
230 BUNYAN_T_INT32, "response_offset", cboff,
231 BUNYAN_T_END);
232 return (-1);
233 }
234 svl2 = (void *)typep;
235 if ((ret = cb(svl2, type, arg)) != 0)
236 return (ret);
237 } else if (type == SVP_LOG_VL3) {
238
239 opsz = sizeof (svp_log_vl3_t);
240 if (len < opsz) {
241 (void) bunyan_warn(svp_bunyan,
242 "not enough data for svp_log_vl3_t",
243 BUNYAN_T_STRING, "remote_host",
244 srp->sr_hostname,
245 BUNYAN_T_INT32, "remote_port",
246 srp->sr_rport,
247 BUNYAN_T_INT32, "response_size",
248 cboff + len,
249 BUNYAN_T_INT32, "response_offset", cboff,
250 BUNYAN_T_END);
251 return (-1);
252 }
253 svl3 = (void *)typep;
254 if ((ret = cb(svl3, type, arg)) != 0)
255 return (ret);
256 } else {
257 (void) bunyan_warn(svp_bunyan,
258 "unknown log structure type",
259 BUNYAN_T_STRING, "remote_host",
260 srp->sr_hostname,
261 BUNYAN_T_INT32, "remote_port", srp->sr_rport,
262 BUNYAN_T_INT32, "response_size", cboff + len,
263 BUNYAN_T_INT32, "response_offset", cboff,
264 BUNYAN_T_INT32, "structure_type", type,
265 BUNYAN_T_END);
266 return (-1);
267 }
268 len -= opsz;
269 cboff += opsz;
270 }
271
272 return (0);
273 }
274
275 void
276 svp_shootdown_logr_cb(svp_remote_t *srp, svp_status_t status, void *cbdata,
277 size_t cbsize)
278 {
279 uint_t count;
280 svp_sdlog_t *sdl = &srp->sr_shoot;
281
282 if (status != SVP_S_OK) {
283 (void) bunyan_warn(svp_bunyan,
284 "log request not OK",
285 BUNYAN_T_STRING, "remote_host", srp->sr_hostname,
286 BUNYAN_T_INT32, "remote_port", srp->sr_rport,
287 BUNYAN_T_INT32, "response_size", cbsize,
288 BUNYAN_T_INT32, "status", status,
289 BUNYAN_T_END);
290 mutex_enter(&sdl->sdl_lock);
291 sdl->sdl_flags &= ~SVP_SD_RUNNING;
292 svp_shootdown_schedule(sdl, B_FALSE);
293 mutex_exit(&sdl->sdl_lock);
294 return;
295 }
296
297 /*
298 * First go ahead and count the number of entries. This effectively
299 * allows us to validate that all the data is valid, if this fails, then
300 * we fail the request.
301 */
302 count = 0;
303 if ((svp_shootdown_logr_iter(srp, cbdata, cbsize,
304 svp_shootdown_logr_count, &count)) != 0) {
305 mutex_enter(&sdl->sdl_lock);
306 sdl->sdl_flags &= ~SVP_SD_RUNNING;
307 svp_shootdown_schedule(sdl, B_FALSE);
308 mutex_exit(&sdl->sdl_lock);
309 return;
310 }
311
312 /*
313 * If we have no entries, then we're also done.
314 */
315 if (count == 0) {
316 mutex_enter(&sdl->sdl_lock);
317 sdl->sdl_flags &= ~SVP_SD_RUNNING;
318 svp_shootdown_schedule(sdl, B_FALSE);
319 mutex_exit(&sdl->sdl_lock);
320 return;
321 }
322
323 /*
324 * We have work to do. Because we may have asynchronous VL3 tasks, we're
325 * going to first grab a reference before we do the iteration. Then, for
326 * each asynchronous VL3 request we make, that'll also grab a hold. Once
327 * we're done with the iteration, we'll drop our hold. If that's the
328 * last one, it'll move on accordingly.
329 */
330 svp_shootdown_ref(sdl);
331 bzero(sdl->sdl_logrm, svp_shootdown_buf);
332
333 /*
334 * If this fails, we're going to determine what to do next based on the
335 * number of entries that were entered into the log removal. At this
336 * point success or failure don't really look different, all it changes
337 * is how many entries we have to remove.
338 */
339 (void) svp_shootdown_logr_iter(srp, cbdata, cbsize,
340 svp_shootdown_logr_shoot, sdl);
341
342 /*
343 * Now that we're done with our work, release the hold. If we don't have
344 * any vl3 tasks outstanding, this'll trigger the next phase of the log
345 * removals.
346 */
347 svp_shootdown_rele(sdl);
348 }
349
350 static void
351 svp_shootdown_timer(void *arg)
352 {
353 svp_sdlog_t *sdl = arg;
354 svp_remote_t *srp = sdl->sdl_remote;
355 boolean_t init = B_TRUE;
356
357 mutex_enter(&sdl->sdl_lock);
358
359 /*
360 * If we've been asked to quiesce, we're done.
361 */
362 if ((sdl->sdl_flags & SVP_SD_QUIESCE) != 0) {
363 mutex_exit(&sdl->sdl_lock);
364 return;
365 }
366
367 /*
368 * We shouldn't be able to have ourselves currently be running and reach
369 * here. If that's the case, we should immediately panic.
370 */
371 if ((sdl->sdl_flags & SVP_SD_RUNNING) != 0) {
372 libvarpd_panic("remote %p shootdown timer fired while still "
373 "running", srp);
374 }
375
376 if ((sdl->sdl_flags & SVP_SD_DORM) != 0) {
377 sdl->sdl_flags &= ~SVP_SD_DORM;
378 init = B_FALSE;
379 }
380
381 sdl->sdl_flags |= SVP_SD_RUNNING;
382 mutex_exit(&sdl->sdl_lock);
383
384 if (init == B_FALSE) {
385 svp_lrm_req_t *svrr = sdl->sdl_logrm;
386
387 bzero(&sdl->sdl_query, sizeof (svp_query_t));
388 svp_remote_lrm_request(sdl->sdl_remote, &sdl->sdl_query, svrr,
389 sizeof (*svrr) + 16 * svrr->svrr_count);
390 } else {
391 bzero(&sdl->sdl_query, sizeof (svp_query_t));
392 svp_remote_log_request(srp, &sdl->sdl_query, sdl->sdl_logack,
393 svp_shootdown_buf);
394 }
395 }
396
397 void
398 svp_shootdown_fini(svp_remote_t *srp)
399 {
400 svp_sdlog_t *sdl = &srp->sr_shoot;
401
402 mutex_enter(&sdl->sdl_lock);
403 sdl->sdl_flags |= SVP_SD_QUIESCE;
404 mutex_exit(&sdl->sdl_lock);
405
406 svp_timer_remove(&sdl->sdl_timer);
407
408 mutex_enter(&sdl->sdl_lock);
409
410 /*
411 * Normally svp_timer_remove would be enough. However, the query could
412 * have been put out again outside of the svp_timer interface. Therefore
413 * we still need to check for SVP_SD_RUNNING.
414 */
415 while (sdl->sdl_flags & SVP_SD_RUNNING)
416 (void) cond_wait(&sdl->sdl_cond, &sdl->sdl_lock);
417 mutex_exit(&sdl->sdl_lock);
418
419 umem_free(sdl->sdl_logack, svp_shootdown_buf);
420 umem_free(sdl->sdl_logrm, svp_shootdown_buf);
421 sdl->sdl_logack = NULL;
422 sdl->sdl_logrm = NULL;
423 (void) cond_destroy(&sdl->sdl_cond);
424 (void) mutex_destroy(&sdl->sdl_lock);
425 }
426
427 void
428 svp_shootdown_start(svp_remote_t *srp)
429 {
430 svp_sdlog_t *sdl = &srp->sr_shoot;
431
432 mutex_enter(&sdl->sdl_lock);
433 svp_shootdown_schedule(sdl, B_FALSE);
434 mutex_exit(&sdl->sdl_lock);
435 }
436
437 int
438 svp_shootdown_init(svp_remote_t *srp)
439 {
440 int ret;
441 svp_sdlog_t *sdl = &srp->sr_shoot;
442 if ((ret = mutex_init(&sdl->sdl_lock, USYNC_THREAD | LOCK_ERRORCHECK,
443 NULL)) != 0)
444 return (ret);
445
446 if ((ret = cond_init(&sdl->sdl_cond, USYNC_THREAD, NULL)) != 0) {
447 (void) mutex_destroy(&sdl->sdl_lock);
448 return (ret);
449 }
450
451 if ((sdl->sdl_logack = umem_alloc(svp_shootdown_buf, UMEM_DEFAULT)) ==
452 NULL) {
453 ret = errno;
454 (void) cond_destroy(&sdl->sdl_cond);
455 (void) mutex_destroy(&sdl->sdl_lock);
456 return (ret);
457 }
458
459 if ((sdl->sdl_logrm = umem_alloc(svp_shootdown_buf, UMEM_DEFAULT)) ==
460 NULL) {
461 ret = errno;
462 umem_free(sdl->sdl_logack, svp_shootdown_buf);
463 (void) cond_destroy(&sdl->sdl_cond);
464 (void) mutex_destroy(&sdl->sdl_lock);
465 return (ret);
466 }
467
468 sdl->sdl_remote = srp;
469 sdl->sdl_timer.st_oneshot = B_TRUE;
470 sdl->sdl_timer.st_func = svp_shootdown_timer;
471 sdl->sdl_timer.st_arg = sdl;
472
473 return (0);
474 }