1 /*
2 * This file and its contents are supplied under the terms of the
3 * Common Development and Distribution License ("CDDL"), version 1.0.
4 * You may only use this file in accordance with the terms of version
5 * 1.0 of the CDDL.
6 *
7 * A full copy of the text of the CDDL should have accompanied this
8 * source. A copy of the CDDL is also available via the Internet at
9 * http://www.illumos.org/license/CDDL.
10 */
11
12 /*
13 * Copyright 2018 Joyent, Inc.
14 */
15
16 /*
17 * Remote backend management
18 *
19 * For more information, see the big theory statement in
20 * lib/varpd/svp/common/libvarpd_svp.c.
21 */
22
23 #include <umem.h>
24 #include <strings.h>
25 #include <string.h>
26 #include <stddef.h>
27 #include <thread.h>
28 #include <synch.h>
29 #include <assert.h>
30 #include <sys/socket.h>
31 #include <netdb.h>
32 #include <errno.h>
33 #include <libidspace.h>
34
35 #include <libvarpd_provider.h>
36 #include <libvarpd_svp.h>
37
38 typedef struct svp_shoot_vl3 {
39 svp_query_t ssv_query;
40 struct sockaddr_in6 ssv_sock;
41 svp_log_vl3_t *ssv_vl3;
42 svp_sdlog_t *ssv_log;
43 } svp_shoot_vl3_t;
44
45 static mutex_t svp_remote_lock = ERRORCHECKMUTEX;
46 static avl_tree_t svp_remote_tree;
47 static svp_timer_t svp_dns_timer;
48 static id_space_t *svp_idspace;
49 static int svp_dns_timer_rate = 30; /* seconds */
50
51 id_t
52 svp_id_alloc(void)
53 {
54 return (id_alloc(svp_idspace));
55 }
56
57 static void
58 svp_remote_mkfmamsg(svp_remote_t *srp, svp_degrade_state_t state, char *buf,
59 size_t buflen)
60 {
61 switch (state) {
62 case SVP_RD_DNS_FAIL:
63 (void) snprintf(buf, buflen, "failed to resolve or find "
64 "entries for hostname %s", srp->sr_hostname);
65 break;
66 case SVP_RD_REMOTE_FAIL:
67 (void) snprintf(buf, buflen, "cannot reach any remote peers");
68 break;
69 default:
70 (void) snprintf(buf, buflen, "unkonwn error state: %d", state);
71 }
72 }
73
74 static int
75 svp_remote_comparator(const void *l, const void *r)
76 {
77 int ret;
78 const svp_remote_t *lr = l, *rr = r;
79
80 ret = strcmp(lr->sr_hostname, rr->sr_hostname);
81 if (ret > 0)
82 return (1);
83 else if (ret < 0)
84 return (-1);
85
86 if (lr->sr_rport > rr->sr_rport)
87 return (1);
88 else if (lr->sr_rport < rr->sr_rport)
89 return (-1);
90
91 return (memcmp(&lr->sr_uip, &rr->sr_uip, sizeof (struct in6_addr)));
92 }
93
94 void
95 svp_query_release(svp_query_t *sqp)
96 {
97 id_free(svp_idspace, sqp->sq_header.svp_id);
98 }
99
100 static void
101 svp_remote_destroy(svp_remote_t *srp)
102 {
103 size_t len;
104
105 /*
106 * Clean up any unrelated DNS information. At this point we know that
107 * we're not in the remote tree. That means, that svp_remote_dns_timer
108 * cannot queue us. However, if any of our DNS related state flags are
109 * set, we have to hang out.
110 */
111 mutex_enter(&srp->sr_lock);
112 while (srp->sr_state &
113 (SVP_RS_LOOKUP_SCHEDULED | SVP_RS_LOOKUP_INPROGRESS)) {
114 (void) cond_wait(&srp->sr_cond, &srp->sr_lock);
115 }
116 mutex_exit(&srp->sr_lock);
117 svp_shootdown_fini(srp);
118
119 if (cond_destroy(&srp->sr_cond) != 0)
120 libvarpd_panic("failed to destroy cond sr_cond");
121
122 if (mutex_destroy(&srp->sr_lock) != 0)
123 libvarpd_panic("failed to destroy mutex sr_lock");
124
125 if (srp->sr_addrinfo != NULL)
126 freeaddrinfo(srp->sr_addrinfo);
127 len = strlen(srp->sr_hostname) + 1;
128 umem_free(srp->sr_hostname, len);
129 umem_free(srp, sizeof (svp_remote_t));
130 }
131
132 static int
133 svp_remote_create(const char *host, uint16_t port, struct in6_addr *uip,
134 svp_remote_t **outp)
135 {
136 size_t hlen;
137 svp_remote_t *remote;
138
139 assert(MUTEX_HELD(&svp_remote_lock));
140
141 remote = umem_zalloc(sizeof (svp_remote_t), UMEM_DEFAULT);
142 if (remote == NULL) {
143 mutex_exit(&svp_remote_lock);
144 return (ENOMEM);
145 }
146
147 if (svp_shootdown_init(remote) != 0) {
148 umem_free(remote, sizeof (svp_remote_t));
149 mutex_exit(&svp_remote_lock);
150 return (ENOMEM);
151 }
152
153 hlen = strlen(host) + 1;
154 remote->sr_hostname = umem_alloc(hlen, UMEM_DEFAULT);
155 if (remote->sr_hostname == NULL) {
156 svp_shootdown_fini(remote);
157 umem_free(remote, sizeof (svp_remote_t));
158 mutex_exit(&svp_remote_lock);
159 return (ENOMEM);
160 }
161 remote->sr_rport = port;
162 if (mutex_init(&remote->sr_lock,
163 USYNC_THREAD | LOCK_ERRORCHECK, NULL) != 0)
164 libvarpd_panic("failed to create mutex sr_lock");
165 if (cond_init(&remote->sr_cond, USYNC_PROCESS, NULL) != 0)
166 libvarpd_panic("failed to create cond sr_cond");
167 list_create(&remote->sr_conns, sizeof (svp_conn_t),
168 offsetof(svp_conn_t, sc_rlist));
169 avl_create(&remote->sr_tree, svp_comparator, sizeof (svp_t),
170 offsetof(svp_t, svp_rlink));
171 (void) strlcpy(remote->sr_hostname, host, hlen);
172 remote->sr_count = 1;
173 remote->sr_uip = *uip;
174
175 svp_shootdown_start(remote);
176
177 *outp = remote;
178 return (0);
179 }
180
181 int
182 svp_remote_find(char *host, uint16_t port, struct in6_addr *uip,
183 svp_remote_t **outp)
184 {
185 int ret;
186 svp_remote_t lookup, *remote;
187
188 lookup.sr_hostname = host;
189 lookup.sr_rport = port;
190 lookup.sr_uip = *uip;
191 mutex_enter(&svp_remote_lock);
192 remote = avl_find(&svp_remote_tree, &lookup, NULL);
193 if (remote != NULL) {
194 assert(remote->sr_count > 0);
195 remote->sr_count++;
196 *outp = remote;
197 mutex_exit(&svp_remote_lock);
198 return (0);
199 }
200
201 if ((ret = svp_remote_create(host, port, uip, outp)) != 0) {
202 mutex_exit(&svp_remote_lock);
203 return (ret);
204 }
205
206 avl_add(&svp_remote_tree, *outp);
207 mutex_exit(&svp_remote_lock);
208
209 /* Make sure DNS is up to date */
210 svp_host_queue(*outp);
211
212 return (0);
213 }
214
215 void
216 svp_remote_release(svp_remote_t *srp)
217 {
218 mutex_enter(&svp_remote_lock);
219 mutex_enter(&srp->sr_lock);
220 srp->sr_count--;
221 if (srp->sr_count != 0) {
222 mutex_exit(&srp->sr_lock);
223 mutex_exit(&svp_remote_lock);
224 return;
225 }
226 mutex_exit(&srp->sr_lock);
227
228 avl_remove(&svp_remote_tree, srp);
229 mutex_exit(&svp_remote_lock);
230 svp_remote_destroy(srp);
231 }
232
233 int
234 svp_remote_attach(svp_remote_t *srp, svp_t *svp)
235 {
236 svp_t check;
237 avl_index_t where;
238
239 mutex_enter(&srp->sr_lock);
240 if (svp->svp_remote != NULL)
241 libvarpd_panic("failed to create mutex sr_lock");
242
243 /*
244 * We require everything except shootdowns
245 */
246 if (svp->svp_cb.scb_vl2_lookup == NULL)
247 libvarpd_panic("missing callback scb_vl2_lookup");
248 if (svp->svp_cb.scb_vl3_lookup == NULL)
249 libvarpd_panic("missing callback scb_vl3_lookup");
250 if (svp->svp_cb.scb_vl2_invalidate == NULL)
251 libvarpd_panic("missing callback scb_vl2_invalidate");
252 if (svp->svp_cb.scb_vl3_inject == NULL)
253 libvarpd_panic("missing callback scb_vl3_inject");
254 if (svp->svp_cb.scb_route_lookup == NULL)
255 libvarpd_panic("missing callback scb_route_lookup");
256
257 check.svp_vid = svp->svp_vid;
258 if (avl_find(&srp->sr_tree, &check, &where) != NULL)
259 libvarpd_panic("found duplicate entry with vid %ld",
260 svp->svp_vid);
261 avl_insert(&srp->sr_tree, svp, where);
262 svp->svp_remote = srp;
263 mutex_exit(&srp->sr_lock);
264
265 return (0);
266 }
267
268 void
269 svp_remote_detach(svp_t *svp)
270 {
271 svp_t *lookup;
272 svp_remote_t *srp = svp->svp_remote;
273
274 if (srp == NULL)
275 libvarpd_panic("trying to detach remote when none exists");
276
277 mutex_enter(&srp->sr_lock);
278 lookup = avl_find(&srp->sr_tree, svp, NULL);
279 if (lookup == NULL || lookup != svp)
280 libvarpd_panic("inconsitent remote avl tree...");
281 avl_remove(&srp->sr_tree, svp);
282 svp->svp_remote = NULL;
283 mutex_exit(&srp->sr_lock);
284 svp_remote_release(srp);
285 }
286
287 /*
288 * See if the request can be sent over the connection's supported version.
289 * Scribble the version in the request itself. NOTE that we do not check the
290 * version that already exists in sqp->sq_header.svp_ver, as we may be called
291 * from svp_remote_reassign() (and change versions when arriving at a new
292 * connection).
293 */
294 static boolean_t
295 svp_outbound_version_check(int version, svp_query_t *sqp)
296 {
297 uint16_t op = htons(sqp->sq_header.svp_op);
298
299 /*
300 * As of v1 -> v2, we really only need to restrict SVP_R_ROUTE_REQ
301 * as v2-only. Reflect that here.
302 *
303 * NOTE that if any message semantics change between versions,
304 * (e.g. "in v3 SVP_R_VL2_REQ takes on additional work"), we'll
305 * need to more-deeply inspect the query. It's possible that the
306 * svp_op space is big enough to just continue op-only inspections.
307 */
308
309 assert(version > 0 && version <= SVP_CURRENT_VERSION);
310
311 if (op != SVP_R_ROUTE_REQ || version >= SVP_VERSION_TWO) {
312 sqp->sq_header.svp_ver = htons(version);
313 return (B_TRUE);
314 }
315
316 return (B_FALSE);
317 }
318
319 /*
320 * Walk the list of connections and find the first one that's available AND
321 * version-appropriate for the message, then move the matched connection to
322 * the back of the list so it's less likely to be used again.
323 */
324 static boolean_t
325 svp_remote_conn_queue(svp_remote_t *srp, svp_query_t *sqp)
326 {
327 svp_conn_t *scp;
328
329 assert(MUTEX_HELD(&srp->sr_lock));
330 for (scp = list_head(&srp->sr_conns); scp != NULL;
331 scp = list_next(&srp->sr_conns, scp)) {
332 mutex_enter(&scp->sc_lock);
333 if (scp->sc_cstate != SVP_CS_ACTIVE ||
334 !svp_outbound_version_check(scp->sc_version, sqp)) {
335 mutex_exit(&scp->sc_lock);
336 continue;
337 }
338 svp_conn_queue(scp, sqp);
339 mutex_exit(&scp->sc_lock);
340 list_remove(&srp->sr_conns, scp);
341 list_insert_tail(&srp->sr_conns, scp);
342 return (B_TRUE);
343 }
344
345 return (B_FALSE);
346 }
347
348 static void
349 svp_remote_vl2_lookup_cb(svp_query_t *sqp, void *arg)
350 {
351 svp_t *svp = sqp->sq_svp;
352 svp_vl2_ack_t *vl2a = (svp_vl2_ack_t *)sqp->sq_wdata;
353
354 if (sqp->sq_status == SVP_S_OK)
355 svp->svp_cb.scb_vl2_lookup(svp, sqp->sq_status,
356 (struct in6_addr *)vl2a->sl2a_addr, ntohs(vl2a->sl2a_port),
357 arg);
358 else
359 svp->svp_cb.scb_vl2_lookup(svp, sqp->sq_status, NULL, 0, arg);
360 }
361
362 void
363 svp_remote_vl2_lookup(svp_t *svp, svp_query_t *sqp, const uint8_t *mac,
364 void *arg)
365 {
366 svp_remote_t *srp;
367 svp_vl2_req_t *vl2r = &sqp->sq_rdun.sqd_vl2r;
368
369 srp = svp->svp_remote;
370 sqp->sq_func = svp_remote_vl2_lookup_cb;
371 sqp->sq_arg = arg;
372 sqp->sq_svp = svp;
373 sqp->sq_state = SVP_QUERY_INIT;
374 sqp->sq_header.svp_op = htons(SVP_R_VL2_REQ);
375 sqp->sq_header.svp_size = htonl(sizeof (svp_vl2_req_t));
376 sqp->sq_header.svp_id = id_alloc(svp_idspace);
377 if (sqp->sq_header.svp_id == (id_t)-1)
378 libvarpd_panic("failed to allcoate from svp_idspace: %d",
379 errno);
380 sqp->sq_header.svp_crc32 = 0;
381 sqp->sq_rdata = vl2r;
382 sqp->sq_rsize = sizeof (svp_vl2_req_t);
383 sqp->sq_wdata = NULL;
384 sqp->sq_wsize = 0;
385
386 bcopy(mac, vl2r->sl2r_mac, ETHERADDRL);
387 vl2r->sl2r_vnetid = ntohl(svp->svp_vid);
388
389 mutex_enter(&srp->sr_lock);
390 if (svp_remote_conn_queue(srp, sqp) == B_FALSE)
391 svp->svp_cb.scb_vl2_lookup(svp, SVP_S_FATAL, NULL, NULL, arg);
392 mutex_exit(&srp->sr_lock);
393 }
394
395 static void
396 svp_remote_route_lookup_cb(svp_query_t *sqp, void *arg)
397 {
398 svp_t *svp = sqp->sq_svp;
399 svp_route_ack_t *sra = (svp_route_ack_t *)sqp->sq_wdata;
400
401 /*
402 * Do the ntoh*()-ing here.
403 */
404 if (sqp->sq_status == SVP_S_OK) {
405 svp->svp_cb.scb_route_lookup(svp, ntohl(sqp->sq_status),
406 ntohl(sra->sra_dcid), ntohl(sra->sra_vnetid),
407 ntohs(sra->sra_vlan), sra->sra_srcmac, sra->sra_dstmac,
408 ntohs(sra->sra_port), sra->sra_ip, sra->sra_src_pfx,
409 sra->sra_dst_pfx, arg);
410 } else {
411 svp->svp_cb.scb_route_lookup(svp, sqp->sq_status,
412 0, 0, 0, NULL, NULL, 0, NULL, 0, 0, arg);
413 }
414 }
415
416 void
417 svp_remote_route_lookup(svp_t *svp, svp_query_t *sqp,
418 const struct in6_addr *src, const struct in6_addr *dst, uint32_t vnetid,
419 uint16_t vlan, void *arg)
420 {
421 svp_remote_t *srp;
422 svp_route_req_t *srr = &sqp->sq_rdun.sqd_rr;
423
424 srp = svp->svp_remote;
425 sqp->sq_func = svp_remote_route_lookup_cb;
426 sqp->sq_arg = arg;
427 sqp->sq_svp = svp;
428 sqp->sq_state = SVP_QUERY_INIT;
429 sqp->sq_header.svp_op = htons(SVP_R_ROUTE_REQ);
430 sqp->sq_header.svp_size = htonl(sizeof (svp_route_req_t));
431 sqp->sq_header.svp_id = id_alloc(svp_idspace);
432 if (sqp->sq_header.svp_id == (id_t)-1)
433 libvarpd_panic("failed to allcoate from svp_idspace: %d",
434 errno);
435 sqp->sq_header.svp_crc32 = 0;
436 sqp->sq_rdata = srr;
437 sqp->sq_rsize = sizeof (svp_route_req_t);
438 sqp->sq_wdata = NULL;
439 sqp->sq_wsize = 0;
440
441 bcopy(src, srr->srr_srcip, sizeof (struct in6_addr));
442 bcopy(dst, srr->srr_dstip, sizeof (struct in6_addr));
443 /* Caller should've checked both are the same type... */
444 srr->srr_vnetid = htonl(vnetid);
445 srr->srr_vlan = htons(vlan);
446 srr->srr_pad = 0;
447
448 mutex_enter(&srp->sr_lock);
449 if (!svp_remote_conn_queue(srp, sqp)) {
450 sqp->sq_status = SVP_S_FATAL;
451 sqp->sq_func(sqp, arg);
452 }
453 mutex_exit(&srp->sr_lock);
454 }
455
456 static void
457 svp_remote_vl3_lookup_cb(svp_query_t *sqp, void *arg)
458 {
459 svp_t *svp = sqp->sq_svp;
460 svp_vl3_ack_t *vl3a = (svp_vl3_ack_t *)sqp->sq_wdata;
461
462 if (sqp->sq_status == SVP_S_OK)
463 svp->svp_cb.scb_vl3_lookup(svp, sqp->sq_status, vl3a->sl3a_mac,
464 (struct in6_addr *)vl3a->sl3a_uip, ntohs(vl3a->sl3a_uport),
465 arg);
466 else
467 svp->svp_cb.scb_vl3_lookup(svp, sqp->sq_status, NULL, NULL, 0,
468 arg);
469 }
470
471 static void
472 svp_remote_vl3_common(svp_remote_t *srp, svp_query_t *sqp,
473 const struct sockaddr *addr, svp_query_f func, void *arg, uint32_t vid)
474 {
475 svp_vl3_req_t *vl3r = &sqp->sq_rdun.sdq_vl3r;
476
477 if (addr->sa_family != AF_INET && addr->sa_family != AF_INET6)
478 libvarpd_panic("unexpected sa_family for the vl3 lookup");
479
480 sqp->sq_func = func;
481 sqp->sq_arg = arg;
482 sqp->sq_state = SVP_QUERY_INIT;
483 sqp->sq_header.svp_op = htons(SVP_R_VL3_REQ);
484 sqp->sq_header.svp_size = htonl(sizeof (svp_vl3_req_t));
485 sqp->sq_header.svp_id = id_alloc(svp_idspace);
486 if (sqp->sq_header.svp_id == (id_t)-1)
487 libvarpd_panic("failed to allcoate from svp_idspace: %d",
488 errno);
489 sqp->sq_header.svp_crc32 = 0;
490 sqp->sq_rdata = vl3r;
491 sqp->sq_rsize = sizeof (svp_vl3_req_t);
492 sqp->sq_wdata = NULL;
493 sqp->sq_wsize = 0;
494
495 if (addr->sa_family == AF_INET6) {
496 struct sockaddr_in6 *s6 = (struct sockaddr_in6 *)addr;
497 vl3r->sl3r_type = htonl(SVP_VL3_IPV6);
498 bcopy(&s6->sin6_addr, vl3r->sl3r_ip,
499 sizeof (struct in6_addr));
500 } else {
501 struct sockaddr_in *s4 = (struct sockaddr_in *)addr;
502 struct in6_addr v6;
503
504 vl3r->sl3r_type = htonl(SVP_VL3_IP);
505 IN6_INADDR_TO_V4MAPPED(&s4->sin_addr, &v6);
506 bcopy(&v6, vl3r->sl3r_ip, sizeof (struct in6_addr));
507 }
508 vl3r->sl3r_vnetid = htonl(vid);
509
510 mutex_enter(&srp->sr_lock);
511 if (svp_remote_conn_queue(srp, sqp) == B_FALSE) {
512 sqp->sq_status = SVP_S_FATAL;
513 sqp->sq_func(sqp, arg);
514 }
515 mutex_exit(&srp->sr_lock);
516 }
517
518 /*
519 * This is a request to do a VL3 look-up that originated internally as opposed
520 * to coming from varpd. As such we need a slightly different query callback
521 * function upon completion and don't go through the normal path with the svp_t.
522 */
523 void
524 svp_remote_vl3_logreq(svp_remote_t *srp, svp_query_t *sqp, uint32_t vid,
525 const struct sockaddr *addr, svp_query_f func, void *arg)
526 {
527 svp_remote_vl3_common(srp, sqp, addr, func, arg, vid);
528 }
529
530 void
531 svp_remote_vl3_lookup(svp_t *svp, svp_query_t *sqp,
532 const struct sockaddr *addr, void *arg)
533 {
534 svp_remote_t *srp = svp->svp_remote;
535
536 sqp->sq_svp = svp;
537 svp_remote_vl3_common(srp, sqp, addr, svp_remote_vl3_lookup_cb,
538 arg, svp->svp_vid);
539 }
540
541 static void
542 svp_remote_log_request_cb(svp_query_t *sqp, void *arg)
543 {
544 svp_remote_t *srp = sqp->sq_arg;
545 uint16_t version;
546
547 /*
548 * Version in request is set in this sqp's read-data/sq_header by
549 * now.
550 */
551 assert(sqp->sq_header.svp_op == htons(SVP_R_LOG_REQ));
552 assert(sqp->sq_header.svp_ver != 0);
553 version = htons(sqp->sq_header.svp_ver);
554
555 assert(sqp->sq_wdata != NULL);
556 if (sqp->sq_status == SVP_S_OK)
557 svp_shootdown_logr_cb(srp, sqp->sq_status, sqp->sq_wdata,
558 sqp->sq_size, version);
559 else
560 svp_shootdown_logr_cb(srp, sqp->sq_status, NULL, 0, 0);
561 }
562
563 void
564 svp_remote_log_request(svp_remote_t *srp, svp_query_t *sqp, void *buf,
565 size_t buflen)
566 {
567 svp_log_req_t *logr = &sqp->sq_rdun.sdq_logr;
568 boolean_t queued;
569
570 sqp->sq_func = svp_remote_log_request_cb;
571 sqp->sq_state = SVP_QUERY_INIT;
572 sqp->sq_arg = srp;
573 sqp->sq_header.svp_op = htons(SVP_R_LOG_REQ);
574 sqp->sq_header.svp_size = htonl(sizeof (svp_log_req_t));
575 sqp->sq_header.svp_id = id_alloc(svp_idspace);
576 if (sqp->sq_header.svp_id == (id_t)-1)
577 libvarpd_panic("failed to allcoate from svp_idspace: %d",
578 errno);
579 sqp->sq_header.svp_crc32 = 0;
580 sqp->sq_rdata = logr;
581 sqp->sq_rsize = sizeof (svp_log_req_t);
582 sqp->sq_wdata = buf;
583 sqp->sq_wsize = buflen;
584
585 logr->svlr_count = htonl(buflen);
586 bcopy(&srp->sr_uip, logr->svlr_ip, sizeof (struct in6_addr));
587
588 /*
589 * If this fails, there isn't much that we can't do. Give the callback
590 * with a fatal status.
591 */
592 mutex_enter(&srp->sr_lock);
593 queued = svp_remote_conn_queue(srp, sqp);
594 mutex_exit(&srp->sr_lock);
595
596 if (queued == B_FALSE)
597 svp_shootdown_logr_cb(srp, SVP_S_FATAL, NULL, 0, 0);
598 }
599
600 static void
601 svp_remote_lrm_request_cb(svp_query_t *sqp, void *arg)
602 {
603 svp_remote_t *srp = arg;
604
605 svp_shootdown_lrm_cb(srp, sqp->sq_status);
606 }
607
608 void
609 svp_remote_lrm_request(svp_remote_t *srp, svp_query_t *sqp, void *buf,
610 size_t buflen)
611 {
612 boolean_t queued;
613 svp_lrm_req_t *svrr = buf;
614
615 sqp->sq_func = svp_remote_lrm_request_cb;
616 sqp->sq_state = SVP_QUERY_INIT;
617 sqp->sq_arg = srp;
618 sqp->sq_header.svp_op = htons(SVP_R_LOG_RM);
619 sqp->sq_header.svp_size = htonl(buflen);
620 sqp->sq_header.svp_id = id_alloc(svp_idspace);
621 if (sqp->sq_header.svp_id == (id_t)-1)
622 libvarpd_panic("failed to allcoate from svp_idspace: %d",
623 errno);
624 sqp->sq_header.svp_crc32 = 0;
625 sqp->sq_rdata = buf;
626 sqp->sq_rsize = buflen;
627 sqp->sq_wdata = NULL;
628 sqp->sq_wsize = 0;
629
630 /*
631 * We need to fix up the count to be in proper network order.
632 */
633 svrr->svrr_count = htonl(svrr->svrr_count);
634
635 /*
636 * If this fails, there isn't much that we can't do. Give the callback
637 * with a fatal status.
638 */
639 mutex_enter(&srp->sr_lock);
640 queued = svp_remote_conn_queue(srp, sqp);
641 mutex_exit(&srp->sr_lock);
642
643 if (queued == B_FALSE)
644 svp_shootdown_logr_cb(srp, SVP_S_FATAL, NULL, 0, 0);
645 }
646
647 /* ARGSUSED */
648 void
649 svp_remote_dns_timer(void *unused)
650 {
651 svp_remote_t *s;
652 mutex_enter(&svp_remote_lock);
653 for (s = avl_first(&svp_remote_tree); s != NULL;
654 s = AVL_NEXT(&svp_remote_tree, s)) {
655 svp_host_queue(s);
656 }
657 mutex_exit(&svp_remote_lock);
658 }
659
660 void
661 svp_remote_resolved(svp_remote_t *srp, struct addrinfo *newaddrs)
662 {
663 struct addrinfo *a;
664 svp_conn_t *scp;
665 int ngen;
666
667 mutex_enter(&srp->sr_lock);
668 srp->sr_gen++;
669 ngen = srp->sr_gen;
670 mutex_exit(&srp->sr_lock);
671
672 for (a = newaddrs; a != NULL; a = a->ai_next) {
673 struct in6_addr in6;
674 struct in6_addr *addrp;
675
676 if (a->ai_family != AF_INET && a->ai_family != AF_INET6)
677 continue;
678
679 if (a->ai_family == AF_INET) {
680 struct sockaddr_in *v4;
681 v4 = (struct sockaddr_in *)a->ai_addr;
682 addrp = &in6;
683 IN6_INADDR_TO_V4MAPPED(&v4->sin_addr, addrp);
684 } else {
685 struct sockaddr_in6 *v6;
686 v6 = (struct sockaddr_in6 *)a->ai_addr;
687 addrp = &v6->sin6_addr;
688 }
689
690 mutex_enter(&srp->sr_lock);
691 for (scp = list_head(&srp->sr_conns); scp != NULL;
692 scp = list_next(&srp->sr_conns, scp)) {
693 mutex_enter(&scp->sc_lock);
694 if (bcmp(addrp, &scp->sc_addr,
695 sizeof (struct in6_addr)) == 0) {
696 scp->sc_gen = ngen;
697 mutex_exit(&scp->sc_lock);
698 break;
699 }
700 mutex_exit(&scp->sc_lock);
701 }
702
703 /*
704 * We need to be careful in the assumptions that we make here,
705 * as there's a good chance that svp_conn_create will
706 * drop the svp_remote_t`sr_lock to kick off its effective event
707 * loop.
708 */
709 if (scp == NULL)
710 (void) svp_conn_create(srp, addrp);
711 mutex_exit(&srp->sr_lock);
712 }
713
714 /*
715 * Now it's time to clean things up. We do not actively clean up the
716 * current connections that we have, instead allowing them to stay
717 * around assuming that they're still useful. Instead, we go through and
718 * purge the degraded list for anything that's from an older generation.
719 */
720 mutex_enter(&srp->sr_lock);
721 for (scp = list_head(&srp->sr_conns); scp != NULL;
722 scp = list_next(&srp->sr_conns, scp)) {
723 boolean_t fall = B_FALSE;
724 mutex_enter(&scp->sc_lock);
725 if (scp->sc_gen < srp->sr_gen)
726 fall = B_TRUE;
727 mutex_exit(&scp->sc_lock);
728 if (fall == B_TRUE)
729 svp_conn_fallout(scp);
730 }
731 mutex_exit(&srp->sr_lock);
732 }
733
734 /*
735 * This connection is in the process of being reset, we need to reassign all of
736 * its queries to other places or mark them as fatal. Note that the first
737 * connection was the one in flight when this failed. We always mark it as
738 * failed to avoid trying to reset its state.
739 */
740 void
741 svp_remote_reassign(svp_remote_t *srp, svp_conn_t *scp)
742 {
743 boolean_t first = B_TRUE;
744 assert(MUTEX_HELD(&srp->sr_lock));
745 assert(MUTEX_HELD(&srp->sr_lock));
746 svp_query_t *sqp;
747
748 /*
749 * As we try to reassigning all of its queries, remove it from the list.
750 */
751 list_remove(&srp->sr_conns, scp);
752
753 while ((sqp = list_remove_head(&scp->sc_queries)) != NULL) {
754
755 if (first == B_TRUE) {
756 sqp->sq_status = SVP_S_FATAL;
757 sqp->sq_func(sqp, sqp->sq_arg);
758 continue;
759 }
760
761 sqp->sq_acttime = -1;
762
763 /*
764 * We may want to maintain a queue of these for some time rather
765 * than just failing them all.
766 */
767 if (svp_remote_conn_queue(srp, sqp) == B_FALSE) {
768 sqp->sq_status = SVP_S_FATAL;
769 sqp->sq_func(sqp, sqp->sq_arg);
770 }
771 }
772
773 /*
774 * Now that we're done, go ahead and re-insert.
775 */
776 list_insert_tail(&srp->sr_conns, scp);
777 }
778
779 void
780 svp_remote_degrade(svp_remote_t *srp, svp_degrade_state_t flag)
781 {
782 int sf, nf;
783 char buf[256];
784
785 assert(MUTEX_HELD(&srp->sr_lock));
786
787 if (flag == SVP_RD_ALL || flag == 0)
788 libvarpd_panic("invalid flag passed to degrade");
789
790 if ((flag & srp->sr_degrade) != 0) {
791 return;
792 }
793
794 sf = ffs(srp->sr_degrade);
795 nf = ffs(flag);
796 srp->sr_degrade |= flag;
797 if (sf == 0 || sf > nf) {
798 svp_t *svp;
799 svp_remote_mkfmamsg(srp, flag, buf, sizeof (buf));
800
801 for (svp = avl_first(&srp->sr_tree); svp != NULL;
802 svp = AVL_NEXT(&srp->sr_tree, svp)) {
803 libvarpd_fma_degrade(svp->svp_hdl, buf);
804 }
805 }
806 }
807
808 void
809 svp_remote_restore(svp_remote_t *srp, svp_degrade_state_t flag)
810 {
811 int sf, nf;
812
813 assert(MUTEX_HELD(&srp->sr_lock));
814 sf = ffs(srp->sr_degrade);
815 if ((srp->sr_degrade & flag) != flag)
816 return;
817 srp->sr_degrade &= ~flag;
818 nf = ffs(srp->sr_degrade);
819
820 /*
821 * If we're now empty, restore the device. If we still are degraded, but
822 * we now have a higher base than we used to, change the message.
823 */
824 if (srp->sr_degrade == 0) {
825 svp_t *svp;
826 for (svp = avl_first(&srp->sr_tree); svp != NULL;
827 svp = AVL_NEXT(&srp->sr_tree, svp)) {
828 libvarpd_fma_restore(svp->svp_hdl);
829 }
830 } else if (nf != sf) {
831 svp_t *svp;
832 char buf[256];
833
834 svp_remote_mkfmamsg(srp, 1U << (nf - 1), buf, sizeof (buf));
835 for (svp = avl_first(&srp->sr_tree); svp != NULL;
836 svp = AVL_NEXT(&srp->sr_tree, svp)) {
837 libvarpd_fma_degrade(svp->svp_hdl, buf);
838 }
839 }
840 }
841
842 void
843 svp_remote_shootdown_vl3_cb(svp_query_t *sqp, void *arg)
844 {
845 svp_shoot_vl3_t *squery = arg;
846 svp_log_vl3_t *svl3 = squery->ssv_vl3;
847 svp_sdlog_t *sdl = squery->ssv_log;
848
849 if (sqp->sq_status == SVP_S_OK) {
850 svp_t *svp, lookup;
851
852 svp_remote_t *srp = sdl->sdl_remote;
853 svp_vl3_ack_t *vl3a = (svp_vl3_ack_t *)sqp->sq_wdata;
854
855 lookup.svp_vid = ntohl(svl3->svl3_vnetid);
856 mutex_enter(&srp->sr_lock);
857 if ((svp = avl_find(&srp->sr_tree, &lookup, NULL)) != NULL) {
858 svp->svp_cb.scb_vl3_inject(svp, ntohs(svl3->svl3_vlan),
859 (struct in6_addr *)svl3->svl3_ip, vl3a->sl3a_mac,
860 NULL);
861 }
862 mutex_exit(&srp->sr_lock);
863
864 }
865
866 svp_shootdown_vl3_cb(sqp->sq_status, svl3, sdl);
867
868 umem_free(squery, sizeof (svp_shoot_vl3_t));
869 }
870
871 void
872 svp_remote_shootdown_vl3(svp_remote_t *srp, svp_log_vl3_t *svl3,
873 svp_sdlog_t *sdl)
874 {
875 svp_shoot_vl3_t *squery;
876
877 squery = umem_zalloc(sizeof (svp_shoot_vl3_t), UMEM_DEFAULT);
878 if (squery == NULL) {
879 svp_shootdown_vl3_cb(SVP_S_FATAL, svl3, sdl);
880 return;
881 }
882
883 squery->ssv_vl3 = svl3;
884 squery->ssv_log = sdl;
885 squery->ssv_sock.sin6_family = AF_INET6;
886 bcopy(svl3->svl3_ip, &squery->ssv_sock.sin6_addr,
887 sizeof (svl3->svl3_ip));
888 svp_remote_vl3_logreq(srp, &squery->ssv_query, ntohl(svl3->svl3_vnetid),
889 (struct sockaddr *)&squery->ssv_sock, svp_remote_shootdown_vl3_cb,
890 squery);
891 }
892
893 void
894 svp_remote_shootdown_vl2(svp_remote_t *srp, svp_log_vl2_t *svl2)
895 {
896 svp_t *svp, lookup;
897
898 lookup.svp_vid = ntohl(svl2->svl2_vnetid);
899 mutex_enter(&srp->sr_lock);
900 if ((svp = avl_find(&srp->sr_tree, &lookup, NULL)) != NULL) {
901 svp->svp_cb.scb_vl2_invalidate(svp, svl2->svl2_mac);
902 }
903 mutex_exit(&srp->sr_lock);
904 }
905
906 void
907 svp_remote_shootdown_route(svp_remote_t *srp, svp_log_route_t *svlr)
908 {
909 svp_t *svp, lookup;
910
911 lookup.svp_vid = ntohl(svlr->svlr_src_vnetid);
912 mutex_enter(&srp->sr_lock);
913 if ((svp = avl_find(&srp->sr_tree, &lookup, NULL)) != NULL) {
914 svp->svp_cb.scb_route_shootdown(svp, svlr->svlr_srcip,
915 svlr->svlr_dstip, svlr->svlr_src_prefixlen,
916 svlr->svlr_dst_prefixlen, htons(svlr->svlr_src_vlan));
917 }
918 mutex_exit(&srp->sr_lock);
919 }
920
921 int
922 svp_remote_init(void)
923 {
924 svp_idspace = id_space_create("svp_req_ids", 1, INT32_MAX);
925 if (svp_idspace == NULL)
926 return (errno);
927 avl_create(&svp_remote_tree, svp_remote_comparator,
928 sizeof (svp_remote_t), offsetof(svp_remote_t, sr_gnode));
929 svp_dns_timer.st_func = svp_remote_dns_timer;
930 svp_dns_timer.st_arg = NULL;
931 svp_dns_timer.st_oneshot = B_FALSE;
932 svp_dns_timer.st_value = svp_dns_timer_rate;
933 svp_timer_add(&svp_dns_timer);
934 return (0);
935 }
936
937 void
938 svp_remote_fini(void)
939 {
940 svp_timer_remove(&svp_dns_timer);
941 avl_destroy(&svp_remote_tree);
942 if (svp_idspace == NULL)
943 id_space_destroy(svp_idspace);
944 }