1 /*
2 * This file and its contents are supplied under the terms of the
3 * Common Development and Distribution License ("CDDL"), version 1.0.
4 * You may only use this file in accordance with the terms of version
5 * 1.0 of the CDDL.
6 *
7 * A full copy of the text of the CDDL should have accompanied this
8 * source. A copy of the CDDL is also available via the Internet at
9 * http://www.illumos.org/license/CDDL.
10 */
11
12 /*
13 * Copyright 2018 Joyent, Inc.
14 */
15
16 /*
17 * Remote backend management
18 *
19 * For more information, see the big theory statement in
20 * lib/varpd/svp/common/libvarpd_svp.c.
21 */
22
23 #include <umem.h>
24 #include <strings.h>
25 #include <string.h>
26 #include <stddef.h>
27 #include <thread.h>
28 #include <synch.h>
29 #include <assert.h>
30 #include <sys/socket.h>
31 #include <netdb.h>
32 #include <errno.h>
33 #include <libidspace.h>
34
35 #include <libvarpd_provider.h>
36 #include <libvarpd_svp.h>
37
38 typedef struct svp_shoot_vl3 {
39 svp_query_t ssv_query;
40 struct sockaddr_in6 ssv_sock;
41 svp_log_vl3_t *ssv_vl3;
42 svp_sdlog_t *ssv_log;
43 } svp_shoot_vl3_t;
44
45 static mutex_t svp_remote_lock = ERRORCHECKMUTEX;
46 static avl_tree_t svp_remote_tree;
47 static svp_timer_t svp_dns_timer;
48 static id_space_t *svp_idspace;
49 static int svp_dns_timer_rate = 30; /* seconds */
50
51 id_t
52 svp_id_alloc(void)
53 {
54 return (id_alloc(svp_idspace));
55 }
56
57 static void
58 svp_remote_mkfmamsg(svp_remote_t *srp, svp_degrade_state_t state, char *buf,
59 size_t buflen)
60 {
61 switch (state) {
62 case SVP_RD_DNS_FAIL:
63 (void) snprintf(buf, buflen, "failed to resolve or find "
64 "entries for hostname %s", srp->sr_hostname);
65 break;
66 case SVP_RD_REMOTE_FAIL:
67 (void) snprintf(buf, buflen, "cannot reach any remote peers");
68 break;
69 default:
70 (void) snprintf(buf, buflen, "unkonwn error state: %d", state);
71 }
72 }
73
74 static int
75 svp_remote_comparator(const void *l, const void *r)
76 {
77 int ret;
78 const svp_remote_t *lr = l, *rr = r;
79
80 ret = strcmp(lr->sr_hostname, rr->sr_hostname);
81 if (ret > 0)
82 return (1);
83 else if (ret < 0)
84 return (-1);
85
86 if (lr->sr_rport > rr->sr_rport)
87 return (1);
88 else if (lr->sr_rport < rr->sr_rport)
89 return (-1);
90
91 return (memcmp(&lr->sr_uip, &rr->sr_uip, sizeof (struct in6_addr)));
92 }
93
94 void
95 svp_query_release(svp_query_t *sqp)
96 {
97 id_free(svp_idspace, sqp->sq_header.svp_id);
98 }
99
100 static void
101 svp_remote_destroy(svp_remote_t *srp)
102 {
103 size_t len;
104
105 /*
106 * Clean up any unrelated DNS information. At this point we know that
107 * we're not in the remote tree. That means, that svp_remote_dns_timer
108 * cannot queue us. However, if any of our DNS related state flags are
109 * set, we have to hang out.
110 */
111 mutex_enter(&srp->sr_lock);
112 while (srp->sr_state &
113 (SVP_RS_LOOKUP_SCHEDULED | SVP_RS_LOOKUP_INPROGRESS)) {
114 (void) cond_wait(&srp->sr_cond, &srp->sr_lock);
115 }
116 mutex_exit(&srp->sr_lock);
117 svp_shootdown_fini(srp);
118
119 if (cond_destroy(&srp->sr_cond) != 0)
120 libvarpd_panic("failed to destroy cond sr_cond");
121
122 if (mutex_destroy(&srp->sr_lock) != 0)
123 libvarpd_panic("failed to destroy mutex sr_lock");
124
125 if (srp->sr_addrinfo != NULL)
126 freeaddrinfo(srp->sr_addrinfo);
127 len = strlen(srp->sr_hostname) + 1;
128 umem_free(srp->sr_hostname, len);
129 umem_free(srp, sizeof (svp_remote_t));
130 }
131
132 static int
133 svp_remote_create(const char *host, uint16_t port, struct in6_addr *uip,
134 svp_remote_t **outp)
135 {
136 size_t hlen;
137 svp_remote_t *remote;
138
139 assert(MUTEX_HELD(&svp_remote_lock));
140
141 remote = umem_zalloc(sizeof (svp_remote_t), UMEM_DEFAULT);
142 if (remote == NULL) {
143 mutex_exit(&svp_remote_lock);
144 return (ENOMEM);
145 }
146
147 if (svp_shootdown_init(remote) != 0) {
148 umem_free(remote, sizeof (svp_remote_t));
149 mutex_exit(&svp_remote_lock);
150 return (ENOMEM);
151 }
152
153 hlen = strlen(host) + 1;
154 remote->sr_hostname = umem_alloc(hlen, UMEM_DEFAULT);
155 if (remote->sr_hostname == NULL) {
156 svp_shootdown_fini(remote);
157 umem_free(remote, sizeof (svp_remote_t));
158 mutex_exit(&svp_remote_lock);
159 return (ENOMEM);
160 }
161 remote->sr_rport = port;
162 if (mutex_init(&remote->sr_lock,
163 USYNC_THREAD | LOCK_ERRORCHECK, NULL) != 0)
164 libvarpd_panic("failed to create mutex sr_lock");
165 if (cond_init(&remote->sr_cond, USYNC_PROCESS, NULL) != 0)
166 libvarpd_panic("failed to create cond sr_cond");
167 list_create(&remote->sr_conns, sizeof (svp_conn_t),
168 offsetof(svp_conn_t, sc_rlist));
169 avl_create(&remote->sr_tree, svp_comparator, sizeof (svp_t),
170 offsetof(svp_t, svp_rlink));
171 (void) strlcpy(remote->sr_hostname, host, hlen);
172 remote->sr_count = 1;
173 remote->sr_uip = *uip;
174
175 svp_shootdown_start(remote);
176
177 *outp = remote;
178 return (0);
179 }
180
181 int
182 svp_remote_find(char *host, uint16_t port, struct in6_addr *uip,
183 svp_remote_t **outp)
184 {
185 int ret;
186 svp_remote_t lookup, *remote;
187
188 lookup.sr_hostname = host;
189 lookup.sr_rport = port;
190 lookup.sr_uip = *uip;
191 mutex_enter(&svp_remote_lock);
192 remote = avl_find(&svp_remote_tree, &lookup, NULL);
193 if (remote != NULL) {
194 assert(remote->sr_count > 0);
195 remote->sr_count++;
196 *outp = remote;
197 mutex_exit(&svp_remote_lock);
198 return (0);
199 }
200
201 if ((ret = svp_remote_create(host, port, uip, outp)) != 0) {
202 mutex_exit(&svp_remote_lock);
203 return (ret);
204 }
205
206 avl_add(&svp_remote_tree, *outp);
207 mutex_exit(&svp_remote_lock);
208
209 /* Make sure DNS is up to date */
210 svp_host_queue(*outp);
211
212 return (0);
213 }
214
215 void
216 svp_remote_release(svp_remote_t *srp)
217 {
218 mutex_enter(&svp_remote_lock);
219 mutex_enter(&srp->sr_lock);
220 srp->sr_count--;
221 if (srp->sr_count != 0) {
222 mutex_exit(&srp->sr_lock);
223 mutex_exit(&svp_remote_lock);
224 return;
225 }
226 mutex_exit(&srp->sr_lock);
227
228 avl_remove(&svp_remote_tree, srp);
229 mutex_exit(&svp_remote_lock);
230 svp_remote_destroy(srp);
231 }
232
233 int
234 svp_remote_attach(svp_remote_t *srp, svp_t *svp)
235 {
236 svp_t check;
237 avl_index_t where;
238
239 mutex_enter(&srp->sr_lock);
240 if (svp->svp_remote != NULL)
241 libvarpd_panic("failed to create mutex sr_lock");
242
243 /*
244 * We require everything except shootdowns
245 */
246 if (svp->svp_cb.scb_vl2_lookup == NULL)
247 libvarpd_panic("missing callback scb_vl2_lookup");
248 if (svp->svp_cb.scb_vl3_lookup == NULL)
249 libvarpd_panic("missing callback scb_vl3_lookup");
250 if (svp->svp_cb.scb_vl2_invalidate == NULL)
251 libvarpd_panic("missing callback scb_vl2_invalidate");
252 if (svp->svp_cb.scb_vl3_inject == NULL)
253 libvarpd_panic("missing callback scb_vl3_inject");
254 if (svp->svp_cb.scb_route_lookup == NULL)
255 libvarpd_panic("missing callback scb_route_lookup");
256
257 check.svp_vid = svp->svp_vid;
258 if (avl_find(&srp->sr_tree, &check, &where) != NULL)
259 libvarpd_panic("found duplicate entry with vid %ld",
260 svp->svp_vid);
261 avl_insert(&srp->sr_tree, svp, where);
262 svp->svp_remote = srp;
263 mutex_exit(&srp->sr_lock);
264
265 return (0);
266 }
267
268 void
269 svp_remote_detach(svp_t *svp)
270 {
271 svp_t *lookup;
272 svp_remote_t *srp = svp->svp_remote;
273
274 if (srp == NULL)
275 libvarpd_panic("trying to detach remote when none exists");
276
277 mutex_enter(&srp->sr_lock);
278 lookup = avl_find(&srp->sr_tree, svp, NULL);
279 if (lookup == NULL || lookup != svp)
280 libvarpd_panic("inconsitent remote avl tree...");
281 avl_remove(&srp->sr_tree, svp);
282 svp->svp_remote = NULL;
283 mutex_exit(&srp->sr_lock);
284 svp_remote_release(srp);
285 }
286
287 /*
288 * See if the request can be sent over the connection's supported version.
289 * Scribble the version in the request itself. NOTE that we do not check the
290 * version that already exists in sqp->sq_header.svp_ver, as we may be called
291 * from svp_remote_reassign() (and change versions when arriving at a new
292 * connection).
293 */
294 static boolean_t
295 svp_outbound_version_check(int version, svp_query_t *sqp)
296 {
297 uint16_t op = htons(sqp->sq_header.svp_op);
298
299 /*
300 * As of v1 -> v2, we really only need to restrict SVP_R_ROUTE_REQ
301 * as v2-only. Reflect that here.
302 *
303 * NOTE that if any message semantics change between future versions,
304 * (e.g. "in v3 SVP_R_VL2_REQ takes on additional work"), we'll
305 * need to more-deeply inspect the query. It's possible that the
306 * svp_op space is big enough to just continue op-only inspections.
307 */
308
309 assert(version > 0 && version <= SVP_CURRENT_VERSION);
310
311 if (op != SVP_R_ROUTE_REQ || version >= SVP_VERSION_TWO) {
312 sqp->sq_header.svp_ver = htons(version);
313 return (B_TRUE);
314 }
315 return (B_FALSE);
316 }
317
318 /*
319 * Walk the list of connections and find the first one that's available AND
320 * version-appropriate for the message, then move the matched connection to
321 * the back of the list so it's less likely to be used again.
322 */
323 static boolean_t
324 svp_remote_conn_queue(svp_remote_t *srp, svp_query_t *sqp)
325 {
326 svp_conn_t *scp;
327
328 assert(MUTEX_HELD(&srp->sr_lock));
329 for (scp = list_head(&srp->sr_conns); scp != NULL;
330 scp = list_next(&srp->sr_conns, scp)) {
331 mutex_enter(&scp->sc_lock);
332 if (scp->sc_cstate != SVP_CS_ACTIVE ||
333 !svp_outbound_version_check(scp->sc_version, sqp)) {
334 mutex_exit(&scp->sc_lock);
335 continue;
336 }
337 svp_conn_queue(scp, sqp);
338 mutex_exit(&scp->sc_lock);
339 list_remove(&srp->sr_conns, scp);
340 list_insert_tail(&srp->sr_conns, scp);
341 return (B_TRUE);
342 }
343
344 return (B_FALSE);
345 }
346
347 static void
348 svp_remote_vl2_lookup_cb(svp_query_t *sqp, void *arg)
349 {
350 svp_t *svp = sqp->sq_svp;
351 svp_vl2_ack_t *vl2a = (svp_vl2_ack_t *)sqp->sq_wdata;
352
353 if (sqp->sq_status == SVP_S_OK)
354 svp->svp_cb.scb_vl2_lookup(svp, sqp->sq_status,
355 (struct in6_addr *)vl2a->sl2a_addr, ntohs(vl2a->sl2a_port),
356 arg);
357 else
358 svp->svp_cb.scb_vl2_lookup(svp, sqp->sq_status, NULL, 0, arg);
359 }
360
361 void
362 svp_remote_vl2_lookup(svp_t *svp, svp_query_t *sqp, const uint8_t *mac,
363 void *arg)
364 {
365 svp_remote_t *srp;
366 svp_vl2_req_t *vl2r = &sqp->sq_rdun.sqd_vl2r;
367
368 srp = svp->svp_remote;
369 sqp->sq_func = svp_remote_vl2_lookup_cb;
370 sqp->sq_arg = arg;
371 sqp->sq_svp = svp;
372 sqp->sq_state = SVP_QUERY_INIT;
373 sqp->sq_header.svp_op = htons(SVP_R_VL2_REQ);
374 sqp->sq_header.svp_size = htonl(sizeof (svp_vl2_req_t));
375 sqp->sq_header.svp_id = id_alloc(svp_idspace);
376 if (sqp->sq_header.svp_id == (id_t)-1)
377 libvarpd_panic("failed to allcoate from svp_idspace: %d",
378 errno);
379 sqp->sq_header.svp_crc32 = htonl(0);
380 sqp->sq_rdata = vl2r;
381 sqp->sq_rsize = sizeof (svp_vl2_req_t);
382 sqp->sq_wdata = NULL;
383 sqp->sq_wsize = 0;
384
385 bcopy(mac, vl2r->sl2r_mac, ETHERADDRL);
386 vl2r->sl2r_vnetid = ntohl(svp->svp_vid);
387
388 mutex_enter(&srp->sr_lock);
389 if (svp_remote_conn_queue(srp, sqp) == B_FALSE)
390 svp->svp_cb.scb_vl2_lookup(svp, SVP_S_FATAL, NULL, NULL, arg);
391 mutex_exit(&srp->sr_lock);
392 }
393
394 static void
395 svp_remote_route_lookup_cb(svp_query_t *sqp, void *arg)
396 {
397 svp_t *svp = sqp->sq_svp;
398 svp_route_ack_t *sra = (svp_route_ack_t *)sqp->sq_wdata;
399
400 if (sqp->sq_status == SVP_S_OK) {
401 svp->svp_cb.scb_route_lookup(svp, sqp->sq_status,
402 sra->sra_dcid, sra->sra_vnetid, sra->sra_vlan,
403 sra->sra_srcmac, sra->sra_dstmac, sra->sra_port,
404 sra->sra_ip, sra->sra_src_pfx, sra->sra_dst_pfx, arg);
405 } else {
406 svp->svp_cb.scb_route_lookup(svp, sqp->sq_status,
407 0, 0, 0, NULL, NULL, 0, NULL, 0, 0, arg);
408 }
409 }
410
411 void
412 svp_remote_route_lookup(svp_t *svp, svp_query_t *sqp,
413 const struct in6_addr *src, const struct in6_addr *dst, uint32_t vnetid,
414 uint16_t vlan, void *arg)
415 {
416 svp_remote_t *srp;
417 svp_route_req_t *srr = &sqp->sq_rdun.sqd_rr;
418
419 srp = svp->svp_remote;
420 sqp->sq_func = svp_remote_route_lookup_cb;
421 sqp->sq_arg = arg;
422 sqp->sq_svp = svp;
423 sqp->sq_state = SVP_QUERY_INIT;
424 sqp->sq_header.svp_op = htons(SVP_R_ROUTE_REQ);
425 sqp->sq_header.svp_size = htonl(sizeof (svp_route_req_t));
426 sqp->sq_header.svp_id = id_alloc(svp_idspace);
427 if (sqp->sq_header.svp_id == (id_t)-1)
428 libvarpd_panic("failed to allcoate from svp_idspace: %d",
429 errno);
430 sqp->sq_header.svp_crc32 = htonl(0);
431 sqp->sq_rdata = srr;
432
433 bcopy(src, srr->srr_srcip, sizeof (struct in6_addr));
434 bcopy(dst, srr->srr_dstip, sizeof (struct in6_addr));
435 /* Caller should've checked both are the same type... */
436 srr->srr_vnetid = vnetid;
437 srr->srr_vlan = vlan;
438 srr->srr_pad = 0;
439
440 mutex_enter(&srp->sr_lock);
441 if (!svp_remote_conn_queue(srp, sqp)) {
442 sqp->sq_status = SVP_S_FATAL;
443 sqp->sq_func(sqp, arg);
444 }
445 mutex_exit(&srp->sr_lock);
446 }
447
448 static void
449 svp_remote_vl3_lookup_cb(svp_query_t *sqp, void *arg)
450 {
451 svp_t *svp = sqp->sq_svp;
452 svp_vl3_ack_t *vl3a = (svp_vl3_ack_t *)sqp->sq_wdata;
453
454 if (sqp->sq_status == SVP_S_OK)
455 svp->svp_cb.scb_vl3_lookup(svp, sqp->sq_status, vl3a->sl3a_mac,
456 (struct in6_addr *)vl3a->sl3a_uip, ntohs(vl3a->sl3a_uport),
457 arg);
458 else
459 svp->svp_cb.scb_vl3_lookup(svp, sqp->sq_status, NULL, NULL, 0,
460 arg);
461 }
462
463 static void
464 svp_remote_vl3_common(svp_remote_t *srp, svp_query_t *sqp,
465 const struct sockaddr *addr, svp_query_f func, void *arg, uint32_t vid)
466 {
467 svp_vl3_req_t *vl3r = &sqp->sq_rdun.sdq_vl3r;
468
469 if (addr->sa_family != AF_INET && addr->sa_family != AF_INET6)
470 libvarpd_panic("unexpected sa_family for the vl3 lookup");
471
472 sqp->sq_func = func;
473 sqp->sq_arg = arg;
474 sqp->sq_state = SVP_QUERY_INIT;
475 sqp->sq_header.svp_op = htons(SVP_R_VL3_REQ);
476 sqp->sq_header.svp_size = htonl(sizeof (svp_vl3_req_t));
477 sqp->sq_header.svp_id = id_alloc(svp_idspace);
478 if (sqp->sq_header.svp_id == (id_t)-1)
479 libvarpd_panic("failed to allcoate from svp_idspace: %d",
480 errno);
481 sqp->sq_header.svp_crc32 = htonl(0);
482 sqp->sq_rdata = vl3r;
483 sqp->sq_rsize = sizeof (svp_vl3_req_t);
484 sqp->sq_wdata = NULL;
485 sqp->sq_wsize = 0;
486
487 if (addr->sa_family == AF_INET6) {
488 struct sockaddr_in6 *s6 = (struct sockaddr_in6 *)addr;
489 vl3r->sl3r_type = htonl(SVP_VL3_IPV6);
490 bcopy(&s6->sin6_addr, vl3r->sl3r_ip,
491 sizeof (struct in6_addr));
492 } else {
493 struct sockaddr_in *s4 = (struct sockaddr_in *)addr;
494 struct in6_addr v6;
495
496 vl3r->sl3r_type = htonl(SVP_VL3_IP);
497 IN6_INADDR_TO_V4MAPPED(&s4->sin_addr, &v6);
498 bcopy(&v6, vl3r->sl3r_ip, sizeof (struct in6_addr));
499 }
500 vl3r->sl3r_vnetid = htonl(vid);
501
502 mutex_enter(&srp->sr_lock);
503 if (svp_remote_conn_queue(srp, sqp) == B_FALSE) {
504 sqp->sq_status = SVP_S_FATAL;
505 sqp->sq_func(sqp, arg);
506 }
507 mutex_exit(&srp->sr_lock);
508 }
509
510 /*
511 * This is a request to do a VL3 look-up that originated internally as opposed
512 * to coming from varpd. As such we need a slightly different query callback
513 * function upon completion and don't go through the normal path with the svp_t.
514 */
515 void
516 svp_remote_vl3_logreq(svp_remote_t *srp, svp_query_t *sqp, uint32_t vid,
517 const struct sockaddr *addr, svp_query_f func, void *arg)
518 {
519 svp_remote_vl3_common(srp, sqp, addr, func, arg, vid);
520 }
521
522 void
523 svp_remote_vl3_lookup(svp_t *svp, svp_query_t *sqp,
524 const struct sockaddr *addr, void *arg)
525 {
526 svp_remote_t *srp = svp->svp_remote;
527
528 sqp->sq_svp = svp;
529 svp_remote_vl3_common(srp, sqp, addr, svp_remote_vl3_lookup_cb,
530 arg, svp->svp_vid);
531 }
532
533 static void
534 svp_remote_log_request_cb(svp_query_t *sqp, void *arg)
535 {
536 svp_remote_t *srp = sqp->sq_arg;
537
538 assert(sqp->sq_wdata != NULL);
539 if (sqp->sq_status == SVP_S_OK)
540 svp_shootdown_logr_cb(srp, sqp->sq_status, sqp->sq_wdata,
541 sqp->sq_size);
542 else
543 svp_shootdown_logr_cb(srp, sqp->sq_status, NULL, 0);
544 }
545
546 void
547 svp_remote_log_request(svp_remote_t *srp, svp_query_t *sqp, void *buf,
548 size_t buflen)
549 {
550 svp_log_req_t *logr = &sqp->sq_rdun.sdq_logr;
551 boolean_t queued;
552
553 sqp->sq_func = svp_remote_log_request_cb;
554 sqp->sq_state = SVP_QUERY_INIT;
555 sqp->sq_arg = srp;
556 sqp->sq_header.svp_op = htons(SVP_R_LOG_REQ);
557 sqp->sq_header.svp_size = htonl(sizeof (svp_log_req_t));
558 sqp->sq_header.svp_id = id_alloc(svp_idspace);
559 if (sqp->sq_header.svp_id == (id_t)-1)
560 libvarpd_panic("failed to allcoate from svp_idspace: %d",
561 errno);
562 sqp->sq_header.svp_crc32 = htonl(0);
563 sqp->sq_rdata = logr;
564 sqp->sq_rsize = sizeof (svp_log_req_t);
565 sqp->sq_wdata = buf;
566 sqp->sq_wsize = buflen;
567
568 logr->svlr_count = htonl(buflen);
569 bcopy(&srp->sr_uip, logr->svlr_ip, sizeof (struct in6_addr));
570
571 /*
572 * If this fails, there isn't much that we can't do. Give the callback
573 * with a fatal status.
574 */
575 mutex_enter(&srp->sr_lock);
576 queued = svp_remote_conn_queue(srp, sqp);
577 mutex_exit(&srp->sr_lock);
578
579 if (queued == B_FALSE)
580 svp_shootdown_logr_cb(srp, SVP_S_FATAL, NULL, 0);
581 }
582
583 static void
584 svp_remote_lrm_request_cb(svp_query_t *sqp, void *arg)
585 {
586 svp_remote_t *srp = arg;
587
588 svp_shootdown_lrm_cb(srp, sqp->sq_status);
589 }
590
591 void
592 svp_remote_lrm_request(svp_remote_t *srp, svp_query_t *sqp, void *buf,
593 size_t buflen)
594 {
595 boolean_t queued;
596 svp_lrm_req_t *svrr = buf;
597
598 sqp->sq_func = svp_remote_lrm_request_cb;
599 sqp->sq_state = SVP_QUERY_INIT;
600 sqp->sq_arg = srp;
601 sqp->sq_header.svp_op = htons(SVP_R_LOG_RM);
602 sqp->sq_header.svp_size = htonl(buflen);
603 sqp->sq_header.svp_id = id_alloc(svp_idspace);
604 if (sqp->sq_header.svp_id == (id_t)-1)
605 libvarpd_panic("failed to allcoate from svp_idspace: %d",
606 errno);
607 sqp->sq_header.svp_crc32 = htonl(0);
608 sqp->sq_rdata = buf;
609 sqp->sq_rsize = buflen;
610 sqp->sq_wdata = NULL;
611 sqp->sq_wsize = 0;
612
613 /*
614 * We need to fix up the count to be in proper network order.
615 */
616 svrr->svrr_count = htonl(svrr->svrr_count);
617
618 /*
619 * If this fails, there isn't much that we can't do. Give the callback
620 * with a fatal status.
621 */
622 mutex_enter(&srp->sr_lock);
623 queued = svp_remote_conn_queue(srp, sqp);
624 mutex_exit(&srp->sr_lock);
625
626 if (queued == B_FALSE)
627 svp_shootdown_logr_cb(srp, SVP_S_FATAL, NULL, 0);
628 }
629
630 /* ARGSUSED */
631 void
632 svp_remote_dns_timer(void *unused)
633 {
634 svp_remote_t *s;
635 mutex_enter(&svp_remote_lock);
636 for (s = avl_first(&svp_remote_tree); s != NULL;
637 s = AVL_NEXT(&svp_remote_tree, s)) {
638 svp_host_queue(s);
639 }
640 mutex_exit(&svp_remote_lock);
641 }
642
643 void
644 svp_remote_resolved(svp_remote_t *srp, struct addrinfo *newaddrs)
645 {
646 struct addrinfo *a;
647 svp_conn_t *scp;
648 int ngen;
649
650 mutex_enter(&srp->sr_lock);
651 srp->sr_gen++;
652 ngen = srp->sr_gen;
653 mutex_exit(&srp->sr_lock);
654
655 for (a = newaddrs; a != NULL; a = a->ai_next) {
656 struct in6_addr in6;
657 struct in6_addr *addrp;
658
659 if (a->ai_family != AF_INET && a->ai_family != AF_INET6)
660 continue;
661
662 if (a->ai_family == AF_INET) {
663 struct sockaddr_in *v4;
664 v4 = (struct sockaddr_in *)a->ai_addr;
665 addrp = &in6;
666 IN6_INADDR_TO_V4MAPPED(&v4->sin_addr, addrp);
667 } else {
668 struct sockaddr_in6 *v6;
669 v6 = (struct sockaddr_in6 *)a->ai_addr;
670 addrp = &v6->sin6_addr;
671 }
672
673 mutex_enter(&srp->sr_lock);
674 for (scp = list_head(&srp->sr_conns); scp != NULL;
675 scp = list_next(&srp->sr_conns, scp)) {
676 mutex_enter(&scp->sc_lock);
677 if (bcmp(addrp, &scp->sc_addr,
678 sizeof (struct in6_addr)) == 0) {
679 scp->sc_gen = ngen;
680 mutex_exit(&scp->sc_lock);
681 break;
682 }
683 mutex_exit(&scp->sc_lock);
684 }
685
686 /*
687 * We need to be careful in the assumptions that we make here,
688 * as there's a good chance that svp_conn_create will
689 * drop the svp_remote_t`sr_lock to kick off its effective event
690 * loop.
691 */
692 if (scp == NULL)
693 (void) svp_conn_create(srp, addrp);
694 mutex_exit(&srp->sr_lock);
695 }
696
697 /*
698 * Now it's time to clean things up. We do not actively clean up the
699 * current connections that we have, instead allowing them to stay
700 * around assuming that they're still useful. Instead, we go through and
701 * purge the degraded list for anything that's from an older generation.
702 */
703 mutex_enter(&srp->sr_lock);
704 for (scp = list_head(&srp->sr_conns); scp != NULL;
705 scp = list_next(&srp->sr_conns, scp)) {
706 boolean_t fall = B_FALSE;
707 mutex_enter(&scp->sc_lock);
708 if (scp->sc_gen < srp->sr_gen)
709 fall = B_TRUE;
710 mutex_exit(&scp->sc_lock);
711 if (fall == B_TRUE)
712 svp_conn_fallout(scp);
713 }
714 mutex_exit(&srp->sr_lock);
715 }
716
717 /*
718 * This connection is in the process of being reset, we need to reassign all of
719 * its queries to other places or mark them as fatal. Note that the first
720 * connection was the one in flight when this failed. We always mark it as
721 * failed to avoid trying to reset its state.
722 */
723 void
724 svp_remote_reassign(svp_remote_t *srp, svp_conn_t *scp)
725 {
726 boolean_t first = B_TRUE;
727 assert(MUTEX_HELD(&srp->sr_lock));
728 assert(MUTEX_HELD(&srp->sr_lock));
729 svp_query_t *sqp;
730
731 /*
732 * As we try to reassigning all of its queries, remove it from the list.
733 */
734 list_remove(&srp->sr_conns, scp);
735
736 while ((sqp = list_remove_head(&scp->sc_queries)) != NULL) {
737
738 if (first == B_TRUE) {
739 sqp->sq_status = SVP_S_FATAL;
740 sqp->sq_func(sqp, sqp->sq_arg);
741 continue;
742 }
743
744 sqp->sq_acttime = -1;
745
746 /*
747 * We may want to maintain a queue of these for some time rather
748 * than just failing them all.
749 */
750 if (svp_remote_conn_queue(srp, sqp) == B_FALSE) {
751 sqp->sq_status = SVP_S_FATAL;
752 sqp->sq_func(sqp, sqp->sq_arg);
753 }
754 }
755
756 /*
757 * Now that we're done, go ahead and re-insert.
758 */
759 list_insert_tail(&srp->sr_conns, scp);
760 }
761
762 void
763 svp_remote_degrade(svp_remote_t *srp, svp_degrade_state_t flag)
764 {
765 int sf, nf;
766 char buf[256];
767
768 assert(MUTEX_HELD(&srp->sr_lock));
769
770 if (flag == SVP_RD_ALL || flag == 0)
771 libvarpd_panic("invalid flag passed to degrade");
772
773 if ((flag & srp->sr_degrade) != 0) {
774 return;
775 }
776
777 sf = ffs(srp->sr_degrade);
778 nf = ffs(flag);
779 srp->sr_degrade |= flag;
780 if (sf == 0 || sf > nf) {
781 svp_t *svp;
782 svp_remote_mkfmamsg(srp, flag, buf, sizeof (buf));
783
784 for (svp = avl_first(&srp->sr_tree); svp != NULL;
785 svp = AVL_NEXT(&srp->sr_tree, svp)) {
786 libvarpd_fma_degrade(svp->svp_hdl, buf);
787 }
788 }
789 }
790
791 void
792 svp_remote_restore(svp_remote_t *srp, svp_degrade_state_t flag)
793 {
794 int sf, nf;
795
796 assert(MUTEX_HELD(&srp->sr_lock));
797 sf = ffs(srp->sr_degrade);
798 if ((srp->sr_degrade & flag) != flag)
799 return;
800 srp->sr_degrade &= ~flag;
801 nf = ffs(srp->sr_degrade);
802
803 /*
804 * If we're now empty, restore the device. If we still are degraded, but
805 * we now have a higher base than we used to, change the message.
806 */
807 if (srp->sr_degrade == 0) {
808 svp_t *svp;
809 for (svp = avl_first(&srp->sr_tree); svp != NULL;
810 svp = AVL_NEXT(&srp->sr_tree, svp)) {
811 libvarpd_fma_restore(svp->svp_hdl);
812 }
813 } else if (nf != sf) {
814 svp_t *svp;
815 char buf[256];
816
817 svp_remote_mkfmamsg(srp, 1U << (nf - 1), buf, sizeof (buf));
818 for (svp = avl_first(&srp->sr_tree); svp != NULL;
819 svp = AVL_NEXT(&srp->sr_tree, svp)) {
820 libvarpd_fma_degrade(svp->svp_hdl, buf);
821 }
822 }
823 }
824
825 void
826 svp_remote_shootdown_vl3_cb(svp_query_t *sqp, void *arg)
827 {
828 svp_shoot_vl3_t *squery = arg;
829 svp_log_vl3_t *svl3 = squery->ssv_vl3;
830 svp_sdlog_t *sdl = squery->ssv_log;
831
832 if (sqp->sq_status == SVP_S_OK) {
833 svp_t *svp, lookup;
834
835 svp_remote_t *srp = sdl->sdl_remote;
836 svp_vl3_ack_t *vl3a = (svp_vl3_ack_t *)sqp->sq_wdata;
837
838 lookup.svp_vid = ntohl(svl3->svl3_vnetid);
839 mutex_enter(&srp->sr_lock);
840 if ((svp = avl_find(&srp->sr_tree, &lookup, NULL)) != NULL) {
841 svp->svp_cb.scb_vl3_inject(svp, ntohs(svl3->svl3_vlan),
842 (struct in6_addr *)svl3->svl3_ip, vl3a->sl3a_mac,
843 NULL);
844 }
845 mutex_exit(&srp->sr_lock);
846
847 }
848
849 svp_shootdown_vl3_cb(sqp->sq_status, svl3, sdl);
850
851 umem_free(squery, sizeof (svp_shoot_vl3_t));
852 }
853
854 void
855 svp_remote_shootdown_vl3(svp_remote_t *srp, svp_log_vl3_t *svl3,
856 svp_sdlog_t *sdl)
857 {
858 svp_shoot_vl3_t *squery;
859
860 squery = umem_zalloc(sizeof (svp_shoot_vl3_t), UMEM_DEFAULT);
861 if (squery == NULL) {
862 svp_shootdown_vl3_cb(SVP_S_FATAL, svl3, sdl);
863 return;
864 }
865
866 squery->ssv_vl3 = svl3;
867 squery->ssv_log = sdl;
868 squery->ssv_sock.sin6_family = AF_INET6;
869 bcopy(svl3->svl3_ip, &squery->ssv_sock.sin6_addr,
870 sizeof (svl3->svl3_ip));
871 svp_remote_vl3_logreq(srp, &squery->ssv_query, ntohl(svl3->svl3_vnetid),
872 (struct sockaddr *)&squery->ssv_sock, svp_remote_shootdown_vl3_cb,
873 squery);
874 }
875
876 void
877 svp_remote_shootdown_vl2(svp_remote_t *srp, svp_log_vl2_t *svl2)
878 {
879 svp_t *svp, lookup;
880
881 lookup.svp_vid = ntohl(svl2->svl2_vnetid);
882 mutex_enter(&srp->sr_lock);
883 if ((svp = avl_find(&srp->sr_tree, &lookup, NULL)) != NULL) {
884 svp->svp_cb.scb_vl2_invalidate(svp, svl2->svl2_mac);
885 }
886 mutex_exit(&srp->sr_lock);
887 }
888
889 int
890 svp_remote_init(void)
891 {
892 svp_idspace = id_space_create("svp_req_ids", 1, INT32_MAX);
893 if (svp_idspace == NULL)
894 return (errno);
895 avl_create(&svp_remote_tree, svp_remote_comparator,
896 sizeof (svp_remote_t), offsetof(svp_remote_t, sr_gnode));
897 svp_dns_timer.st_func = svp_remote_dns_timer;
898 svp_dns_timer.st_arg = NULL;
899 svp_dns_timer.st_oneshot = B_FALSE;
900 svp_dns_timer.st_value = svp_dns_timer_rate;
901 svp_timer_add(&svp_dns_timer);
902 return (0);
903 }
904
905 void
906 svp_remote_fini(void)
907 {
908 svp_timer_remove(&svp_dns_timer);
909 avl_destroy(&svp_remote_tree);
910 if (svp_idspace == NULL)
911 id_space_destroy(svp_idspace);
912 }