1 /*
2 * This file and its contents are supplied under the terms of the
3 * Common Development and Distribution License ("CDDL"), version 1.0.
4 * You may only use this file in accordance with the terms of version
5 * 1.0 of the CDDL.
6 *
7 * A full copy of the text of the CDDL should have accompanied this
8 * source. A copy of the CDDL is also available via the Internet at
9 * http://www.illumos.org/license/CDDL.
10 */
11
12 /*
13 * Copyright 2018 Joyent, Inc.
14 */
15
16 /*
17 * Remote backend management
18 *
19 * For more information, see the big theory statement in
20 * lib/varpd/svp/common/libvarpd_svp.c.
21 */
22
23 #include <umem.h>
24 #include <strings.h>
25 #include <string.h>
26 #include <stddef.h>
27 #include <thread.h>
28 #include <synch.h>
29 #include <assert.h>
30 #include <sys/socket.h>
31 #include <netdb.h>
32 #include <errno.h>
33 #include <libidspace.h>
34
35 #include <libvarpd_provider.h>
36 #include <libvarpd_svp.h>
37
38 typedef struct svp_shoot_vl3 {
39 svp_query_t ssv_query;
40 struct sockaddr_in6 ssv_sock;
41 svp_log_vl3_t *ssv_vl3;
42 svp_sdlog_t *ssv_log;
43 } svp_shoot_vl3_t;
44
45 static mutex_t svp_remote_lock = ERRORCHECKMUTEX;
46 static avl_tree_t svp_remote_tree;
47 static svp_timer_t svp_dns_timer;
48 static id_space_t *svp_idspace;
49 static int svp_dns_timer_rate = 30; /* seconds */
50
51 id_t
52 svp_id_alloc(void)
53 {
54 return (id_alloc(svp_idspace));
55 }
56
57 static void
58 svp_remote_mkfmamsg(svp_remote_t *srp, svp_degrade_state_t state, char *buf,
59 size_t buflen)
60 {
61 switch (state) {
62 case SVP_RD_DNS_FAIL:
63 (void) snprintf(buf, buflen, "failed to resolve or find "
64 "entries for hostname %s", srp->sr_hostname);
65 break;
66 case SVP_RD_REMOTE_FAIL:
67 (void) snprintf(buf, buflen, "cannot reach any remote peers");
68 break;
69 default:
70 (void) snprintf(buf, buflen, "unkonwn error state: %d", state);
71 }
72 }
73
74 static int
75 svp_remote_comparator(const void *l, const void *r)
76 {
77 int ret;
78 const svp_remote_t *lr = l, *rr = r;
79
80 ret = strcmp(lr->sr_hostname, rr->sr_hostname);
81 if (ret > 0)
82 return (1);
83 else if (ret < 0)
84 return (-1);
85
86 if (lr->sr_rport > rr->sr_rport)
87 return (1);
88 else if (lr->sr_rport < rr->sr_rport)
89 return (-1);
90
91 return (memcmp(&lr->sr_uip, &rr->sr_uip, sizeof (struct in6_addr)));
92 }
93
94 void
95 svp_query_release(svp_query_t *sqp)
96 {
97 id_free(svp_idspace, sqp->sq_header.svp_id);
98 }
99
100 static void
101 svp_remote_destroy(svp_remote_t *srp)
102 {
103 size_t len;
104
105 /*
106 * Clean up any unrelated DNS information. At this point we know that
107 * we're not in the remote tree. That means, that svp_remote_dns_timer
108 * cannot queue us. However, if any of our DNS related state flags are
109 * set, we have to hang out.
110 */
111 mutex_enter(&srp->sr_lock);
112 while (srp->sr_state &
113 (SVP_RS_LOOKUP_SCHEDULED | SVP_RS_LOOKUP_INPROGRESS)) {
114 (void) cond_wait(&srp->sr_cond, &srp->sr_lock);
115 }
116 mutex_exit(&srp->sr_lock);
117 svp_shootdown_fini(srp);
118
119 if (cond_destroy(&srp->sr_cond) != 0)
120 libvarpd_panic("failed to destroy cond sr_cond");
121
122 if (mutex_destroy(&srp->sr_lock) != 0)
123 libvarpd_panic("failed to destroy mutex sr_lock");
124
125 if (srp->sr_addrinfo != NULL)
126 freeaddrinfo(srp->sr_addrinfo);
127 len = strlen(srp->sr_hostname) + 1;
128 umem_free(srp->sr_hostname, len);
129 umem_free(srp, sizeof (svp_remote_t));
130 }
131
132 static int
133 svp_remote_create(const char *host, uint16_t port, struct in6_addr *uip,
134 svp_remote_t **outp)
135 {
136 size_t hlen;
137 svp_remote_t *remote;
138
139 assert(MUTEX_HELD(&svp_remote_lock));
140
141 remote = umem_zalloc(sizeof (svp_remote_t), UMEM_DEFAULT);
142 if (remote == NULL) {
143 mutex_exit(&svp_remote_lock);
144 return (ENOMEM);
145 }
146
147 if (svp_shootdown_init(remote) != 0) {
148 umem_free(remote, sizeof (svp_remote_t));
149 mutex_exit(&svp_remote_lock);
150 return (ENOMEM);
151 }
152
153 hlen = strlen(host) + 1;
154 remote->sr_hostname = umem_alloc(hlen, UMEM_DEFAULT);
155 if (remote->sr_hostname == NULL) {
156 svp_shootdown_fini(remote);
157 umem_free(remote, sizeof (svp_remote_t));
158 mutex_exit(&svp_remote_lock);
159 return (ENOMEM);
160 }
161 remote->sr_rport = port;
162 if (mutex_init(&remote->sr_lock,
163 USYNC_THREAD | LOCK_ERRORCHECK, NULL) != 0)
164 libvarpd_panic("failed to create mutex sr_lock");
165 if (cond_init(&remote->sr_cond, USYNC_PROCESS, NULL) != 0)
166 libvarpd_panic("failed to create cond sr_cond");
167 list_create(&remote->sr_conns, sizeof (svp_conn_t),
168 offsetof(svp_conn_t, sc_rlist));
169 avl_create(&remote->sr_tree, svp_comparator, sizeof (svp_t),
170 offsetof(svp_t, svp_rlink));
171 (void) strlcpy(remote->sr_hostname, host, hlen);
172 remote->sr_count = 1;
173 remote->sr_uip = *uip;
174
175 svp_shootdown_start(remote);
176
177 *outp = remote;
178 return (0);
179 }
180
181 int
182 svp_remote_find(char *host, uint16_t port, struct in6_addr *uip,
183 svp_remote_t **outp)
184 {
185 int ret;
186 svp_remote_t lookup, *remote;
187
188 lookup.sr_hostname = host;
189 lookup.sr_rport = port;
190 lookup.sr_uip = *uip;
191 mutex_enter(&svp_remote_lock);
192 remote = avl_find(&svp_remote_tree, &lookup, NULL);
193 if (remote != NULL) {
194 assert(remote->sr_count > 0);
195 remote->sr_count++;
196 *outp = remote;
197 mutex_exit(&svp_remote_lock);
198 return (0);
199 }
200
201 if ((ret = svp_remote_create(host, port, uip, outp)) != 0) {
202 mutex_exit(&svp_remote_lock);
203 return (ret);
204 }
205
206 avl_add(&svp_remote_tree, *outp);
207 mutex_exit(&svp_remote_lock);
208
209 /* Make sure DNS is up to date */
210 svp_host_queue(*outp);
211
212 return (0);
213 }
214
215 void
216 svp_remote_release(svp_remote_t *srp)
217 {
218 mutex_enter(&svp_remote_lock);
219 mutex_enter(&srp->sr_lock);
220 srp->sr_count--;
221 if (srp->sr_count != 0) {
222 mutex_exit(&srp->sr_lock);
223 mutex_exit(&svp_remote_lock);
224 return;
225 }
226 mutex_exit(&srp->sr_lock);
227
228 avl_remove(&svp_remote_tree, srp);
229 mutex_exit(&svp_remote_lock);
230 svp_remote_destroy(srp);
231 }
232
233 int
234 svp_remote_attach(svp_remote_t *srp, svp_t *svp)
235 {
236 svp_t check;
237 avl_index_t where;
238
239 mutex_enter(&srp->sr_lock);
240 if (svp->svp_remote != NULL)
241 libvarpd_panic("failed to create mutex sr_lock");
242
243 /*
244 * We require everything except shootdowns
245 */
246 if (svp->svp_cb.scb_vl2_lookup == NULL)
247 libvarpd_panic("missing callback scb_vl2_lookup");
248 if (svp->svp_cb.scb_vl3_lookup == NULL)
249 libvarpd_panic("missing callback scb_vl3_lookup");
250 if (svp->svp_cb.scb_vl2_invalidate == NULL)
251 libvarpd_panic("missing callback scb_vl2_invalidate");
252 if (svp->svp_cb.scb_vl3_inject == NULL)
253 libvarpd_panic("missing callback scb_vl3_inject");
254 if (svp->svp_cb.scb_route_lookup == NULL)
255 libvarpd_panic("missing callback scb_route_lookup");
256
257 check.svp_vid = svp->svp_vid;
258 if (avl_find(&srp->sr_tree, &check, &where) != NULL)
259 libvarpd_panic("found duplicate entry with vid %ld",
260 svp->svp_vid);
261 avl_insert(&srp->sr_tree, svp, where);
262 svp->svp_remote = srp;
263 mutex_exit(&srp->sr_lock);
264
265 return (0);
266 }
267
268 void
269 svp_remote_detach(svp_t *svp)
270 {
271 svp_t *lookup;
272 svp_remote_t *srp = svp->svp_remote;
273
274 if (srp == NULL)
275 libvarpd_panic("trying to detach remote when none exists");
276
277 mutex_enter(&srp->sr_lock);
278 lookup = avl_find(&srp->sr_tree, svp, NULL);
279 if (lookup == NULL || lookup != svp)
280 libvarpd_panic("inconsitent remote avl tree...");
281 avl_remove(&srp->sr_tree, svp);
282 svp->svp_remote = NULL;
283 mutex_exit(&srp->sr_lock);
284 svp_remote_release(srp);
285 }
286
287 /*
288 * See if the request can be sent over the connection's supported version.
289 * Scribble the version in the request itself. NOTE that we do not check the
290 * version that already exists in sqp->sq_header.svp_ver, as we may be called
291 * from svp_remote_reassign() (and change versions when arriving at a new
292 * connection).
293 */
294 static boolean_t
295 svp_outbound_version_check(int version, svp_query_t *sqp)
296 {
297 uint16_t op = htons(sqp->sq_header.svp_op);
298
299 /*
300 * As of v1 -> v2, we really only need to restrict SVP_R_ROUTE_REQ
301 * as v2-only. Reflect that here.
302 *
303 * NOTE that if any message semantics change between future versions,
304 * (e.g. "in v3 SVP_R_VL2_REQ takes on additional work"), we'll
305 * need to more-deeply inspect the query. It's possible that the
306 * svp_op space is big enough to just continue op-only inspections.
307 */
308
309 assert(version > 0 && version <= SVP_CURRENT_VERSION);
310
311 if (op != SVP_R_ROUTE_REQ || version >= SVP_VERSION_TWO) {
312 sqp->sq_header.svp_ver = htons(version);
313 return (B_TRUE);
314 }
315 return (B_FALSE);
316 }
317
318 /*
319 * Walk the list of connections and find the first one that's available AND
320 * version-appropriate for the message, then move the matched connection to
321 * the back of the list so it's less likely to be used again.
322 */
323 static boolean_t
324 svp_remote_conn_queue(svp_remote_t *srp, svp_query_t *sqp)
325 {
326 svp_conn_t *scp;
327
328 assert(MUTEX_HELD(&srp->sr_lock));
329 for (scp = list_head(&srp->sr_conns); scp != NULL;
330 scp = list_next(&srp->sr_conns, scp)) {
331 mutex_enter(&scp->sc_lock);
332 if (scp->sc_cstate != SVP_CS_ACTIVE ||
333 !svp_outbound_version_check(scp->sc_version, sqp)) {
334 mutex_exit(&scp->sc_lock);
335 continue;
336 }
337 svp_conn_queue(scp, sqp);
338 mutex_exit(&scp->sc_lock);
339 list_remove(&srp->sr_conns, scp);
340 list_insert_tail(&srp->sr_conns, scp);
341 return (B_TRUE);
342 }
343
344 return (B_FALSE);
345 }
346
347 static void
348 svp_remote_vl2_lookup_cb(svp_query_t *sqp, void *arg)
349 {
350 svp_t *svp = sqp->sq_svp;
351 svp_vl2_ack_t *vl2a = (svp_vl2_ack_t *)sqp->sq_wdata;
352
353 if (sqp->sq_status == SVP_S_OK)
354 svp->svp_cb.scb_vl2_lookup(svp, sqp->sq_status,
355 (struct in6_addr *)vl2a->sl2a_addr, ntohs(vl2a->sl2a_port),
356 arg);
357 else
358 svp->svp_cb.scb_vl2_lookup(svp, sqp->sq_status, NULL, 0, arg);
359 }
360
361 void
362 svp_remote_vl2_lookup(svp_t *svp, svp_query_t *sqp, const uint8_t *mac,
363 void *arg)
364 {
365 svp_remote_t *srp;
366 svp_vl2_req_t *vl2r = &sqp->sq_rdun.sqd_vl2r;
367
368 srp = svp->svp_remote;
369 sqp->sq_func = svp_remote_vl2_lookup_cb;
370 sqp->sq_arg = arg;
371 sqp->sq_svp = svp;
372 sqp->sq_state = SVP_QUERY_INIT;
373 sqp->sq_header.svp_op = htons(SVP_R_VL2_REQ);
374 sqp->sq_header.svp_size = htonl(sizeof (svp_vl2_req_t));
375 sqp->sq_header.svp_id = id_alloc(svp_idspace);
376 if (sqp->sq_header.svp_id == (id_t)-1)
377 libvarpd_panic("failed to allcoate from svp_idspace: %d",
378 errno);
379 sqp->sq_header.svp_crc32 = 0;
380 sqp->sq_rdata = vl2r;
381 sqp->sq_rsize = sizeof (svp_vl2_req_t);
382 sqp->sq_wdata = NULL;
383 sqp->sq_wsize = 0;
384
385 bcopy(mac, vl2r->sl2r_mac, ETHERADDRL);
386 vl2r->sl2r_vnetid = ntohl(svp->svp_vid);
387
388 mutex_enter(&srp->sr_lock);
389 if (svp_remote_conn_queue(srp, sqp) == B_FALSE)
390 svp->svp_cb.scb_vl2_lookup(svp, SVP_S_FATAL, NULL, NULL, arg);
391 mutex_exit(&srp->sr_lock);
392 }
393
394 static void
395 svp_remote_route_lookup_cb(svp_query_t *sqp, void *arg)
396 {
397 svp_t *svp = sqp->sq_svp;
398 svp_route_ack_t *sra = (svp_route_ack_t *)sqp->sq_wdata;
399
400 /*
401 * Do the ntoh*()-ing here.
402 */
403 if (sqp->sq_status == SVP_S_OK) {
404 svp->svp_cb.scb_route_lookup(svp, ntohl(sqp->sq_status),
405 ntohl(sra->sra_dcid), ntohl(sra->sra_vnetid),
406 ntohs(sra->sra_vlan), sra->sra_srcmac, sra->sra_dstmac,
407 ntohs(sra->sra_port), sra->sra_ip, sra->sra_src_pfx,
408 sra->sra_dst_pfx, arg);
409 } else {
410 svp->svp_cb.scb_route_lookup(svp, sqp->sq_status,
411 0, 0, 0, NULL, NULL, 0, NULL, 0, 0, arg);
412 }
413 }
414
415 void
416 svp_remote_route_lookup(svp_t *svp, svp_query_t *sqp,
417 const struct in6_addr *src, const struct in6_addr *dst, uint32_t vnetid,
418 uint16_t vlan, void *arg)
419 {
420 svp_remote_t *srp;
421 svp_route_req_t *srr = &sqp->sq_rdun.sqd_rr;
422
423 srp = svp->svp_remote;
424 sqp->sq_func = svp_remote_route_lookup_cb;
425 sqp->sq_arg = arg;
426 sqp->sq_svp = svp;
427 sqp->sq_state = SVP_QUERY_INIT;
428 sqp->sq_header.svp_op = htons(SVP_R_ROUTE_REQ);
429 sqp->sq_header.svp_size = htonl(sizeof (svp_route_req_t));
430 sqp->sq_header.svp_id = id_alloc(svp_idspace);
431 if (sqp->sq_header.svp_id == (id_t)-1)
432 libvarpd_panic("failed to allcoate from svp_idspace: %d",
433 errno);
434 sqp->sq_header.svp_crc32 = 0;
435 sqp->sq_rdata = srr;
436 sqp->sq_rsize = sizeof (svp_route_req_t);
437 sqp->sq_wdata = NULL;
438 sqp->sq_wsize = 0;
439
440 bcopy(src, srr->srr_srcip, sizeof (struct in6_addr));
441 bcopy(dst, srr->srr_dstip, sizeof (struct in6_addr));
442 /* Caller should've checked both are the same type... */
443 srr->srr_vnetid = htonl(vnetid);
444 srr->srr_vlan = htons(vlan);
445 srr->srr_pad = 0;
446
447 mutex_enter(&srp->sr_lock);
448 if (!svp_remote_conn_queue(srp, sqp)) {
449 sqp->sq_status = SVP_S_FATAL;
450 sqp->sq_func(sqp, arg);
451 }
452 mutex_exit(&srp->sr_lock);
453 }
454
455 static void
456 svp_remote_vl3_lookup_cb(svp_query_t *sqp, void *arg)
457 {
458 svp_t *svp = sqp->sq_svp;
459 svp_vl3_ack_t *vl3a = (svp_vl3_ack_t *)sqp->sq_wdata;
460
461 if (sqp->sq_status == SVP_S_OK)
462 svp->svp_cb.scb_vl3_lookup(svp, sqp->sq_status, vl3a->sl3a_mac,
463 (struct in6_addr *)vl3a->sl3a_uip, ntohs(vl3a->sl3a_uport),
464 arg);
465 else
466 svp->svp_cb.scb_vl3_lookup(svp, sqp->sq_status, NULL, NULL, 0,
467 arg);
468 }
469
470 static void
471 svp_remote_vl3_common(svp_remote_t *srp, svp_query_t *sqp,
472 const struct sockaddr *addr, svp_query_f func, void *arg, uint32_t vid)
473 {
474 svp_vl3_req_t *vl3r = &sqp->sq_rdun.sdq_vl3r;
475
476 if (addr->sa_family != AF_INET && addr->sa_family != AF_INET6)
477 libvarpd_panic("unexpected sa_family for the vl3 lookup");
478
479 sqp->sq_func = func;
480 sqp->sq_arg = arg;
481 sqp->sq_state = SVP_QUERY_INIT;
482 sqp->sq_header.svp_op = htons(SVP_R_VL3_REQ);
483 sqp->sq_header.svp_size = htonl(sizeof (svp_vl3_req_t));
484 sqp->sq_header.svp_id = id_alloc(svp_idspace);
485 if (sqp->sq_header.svp_id == (id_t)-1)
486 libvarpd_panic("failed to allcoate from svp_idspace: %d",
487 errno);
488 sqp->sq_header.svp_crc32 = 0;
489 sqp->sq_rdata = vl3r;
490 sqp->sq_rsize = sizeof (svp_vl3_req_t);
491 sqp->sq_wdata = NULL;
492 sqp->sq_wsize = 0;
493
494 if (addr->sa_family == AF_INET6) {
495 struct sockaddr_in6 *s6 = (struct sockaddr_in6 *)addr;
496 vl3r->sl3r_type = htonl(SVP_VL3_IPV6);
497 bcopy(&s6->sin6_addr, vl3r->sl3r_ip,
498 sizeof (struct in6_addr));
499 } else {
500 struct sockaddr_in *s4 = (struct sockaddr_in *)addr;
501 struct in6_addr v6;
502
503 vl3r->sl3r_type = htonl(SVP_VL3_IP);
504 IN6_INADDR_TO_V4MAPPED(&s4->sin_addr, &v6);
505 bcopy(&v6, vl3r->sl3r_ip, sizeof (struct in6_addr));
506 }
507 vl3r->sl3r_vnetid = htonl(vid);
508
509 mutex_enter(&srp->sr_lock);
510 if (svp_remote_conn_queue(srp, sqp) == B_FALSE) {
511 sqp->sq_status = SVP_S_FATAL;
512 sqp->sq_func(sqp, arg);
513 }
514 mutex_exit(&srp->sr_lock);
515 }
516
517 /*
518 * This is a request to do a VL3 look-up that originated internally as opposed
519 * to coming from varpd. As such we need a slightly different query callback
520 * function upon completion and don't go through the normal path with the svp_t.
521 */
522 void
523 svp_remote_vl3_logreq(svp_remote_t *srp, svp_query_t *sqp, uint32_t vid,
524 const struct sockaddr *addr, svp_query_f func, void *arg)
525 {
526 svp_remote_vl3_common(srp, sqp, addr, func, arg, vid);
527 }
528
529 void
530 svp_remote_vl3_lookup(svp_t *svp, svp_query_t *sqp,
531 const struct sockaddr *addr, void *arg)
532 {
533 svp_remote_t *srp = svp->svp_remote;
534
535 sqp->sq_svp = svp;
536 svp_remote_vl3_common(srp, sqp, addr, svp_remote_vl3_lookup_cb,
537 arg, svp->svp_vid);
538 }
539
540 static void
541 svp_remote_log_request_cb(svp_query_t *sqp, void *arg)
542 {
543 svp_remote_t *srp = sqp->sq_arg;
544
545 assert(sqp->sq_wdata != NULL);
546 if (sqp->sq_status == SVP_S_OK)
547 svp_shootdown_logr_cb(srp, sqp->sq_status, sqp->sq_wdata,
548 sqp->sq_size);
549 else
550 svp_shootdown_logr_cb(srp, sqp->sq_status, NULL, 0);
551 }
552
553 void
554 svp_remote_log_request(svp_remote_t *srp, svp_query_t *sqp, void *buf,
555 size_t buflen)
556 {
557 svp_log_req_t *logr = &sqp->sq_rdun.sdq_logr;
558 boolean_t queued;
559
560 sqp->sq_func = svp_remote_log_request_cb;
561 sqp->sq_state = SVP_QUERY_INIT;
562 sqp->sq_arg = srp;
563 sqp->sq_header.svp_op = htons(SVP_R_LOG_REQ);
564 sqp->sq_header.svp_size = htonl(sizeof (svp_log_req_t));
565 sqp->sq_header.svp_id = id_alloc(svp_idspace);
566 if (sqp->sq_header.svp_id == (id_t)-1)
567 libvarpd_panic("failed to allcoate from svp_idspace: %d",
568 errno);
569 sqp->sq_header.svp_crc32 = 0;
570 sqp->sq_rdata = logr;
571 sqp->sq_rsize = sizeof (svp_log_req_t);
572 sqp->sq_wdata = buf;
573 sqp->sq_wsize = buflen;
574
575 logr->svlr_count = htonl(buflen);
576 bcopy(&srp->sr_uip, logr->svlr_ip, sizeof (struct in6_addr));
577
578 /*
579 * If this fails, there isn't much that we can't do. Give the callback
580 * with a fatal status.
581 */
582 mutex_enter(&srp->sr_lock);
583 queued = svp_remote_conn_queue(srp, sqp);
584 mutex_exit(&srp->sr_lock);
585
586 if (queued == B_FALSE)
587 svp_shootdown_logr_cb(srp, SVP_S_FATAL, NULL, 0);
588 }
589
590 static void
591 svp_remote_lrm_request_cb(svp_query_t *sqp, void *arg)
592 {
593 svp_remote_t *srp = arg;
594
595 svp_shootdown_lrm_cb(srp, sqp->sq_status);
596 }
597
598 void
599 svp_remote_lrm_request(svp_remote_t *srp, svp_query_t *sqp, void *buf,
600 size_t buflen)
601 {
602 boolean_t queued;
603 svp_lrm_req_t *svrr = buf;
604
605 sqp->sq_func = svp_remote_lrm_request_cb;
606 sqp->sq_state = SVP_QUERY_INIT;
607 sqp->sq_arg = srp;
608 sqp->sq_header.svp_op = htons(SVP_R_LOG_RM);
609 sqp->sq_header.svp_size = htonl(buflen);
610 sqp->sq_header.svp_id = id_alloc(svp_idspace);
611 if (sqp->sq_header.svp_id == (id_t)-1)
612 libvarpd_panic("failed to allcoate from svp_idspace: %d",
613 errno);
614 sqp->sq_header.svp_crc32 = 0;
615 sqp->sq_rdata = buf;
616 sqp->sq_rsize = buflen;
617 sqp->sq_wdata = NULL;
618 sqp->sq_wsize = 0;
619
620 /*
621 * We need to fix up the count to be in proper network order.
622 */
623 svrr->svrr_count = htonl(svrr->svrr_count);
624
625 /*
626 * If this fails, there isn't much that we can't do. Give the callback
627 * with a fatal status.
628 */
629 mutex_enter(&srp->sr_lock);
630 queued = svp_remote_conn_queue(srp, sqp);
631 mutex_exit(&srp->sr_lock);
632
633 if (queued == B_FALSE)
634 svp_shootdown_logr_cb(srp, SVP_S_FATAL, NULL, 0);
635 }
636
637 /* ARGSUSED */
638 void
639 svp_remote_dns_timer(void *unused)
640 {
641 svp_remote_t *s;
642 mutex_enter(&svp_remote_lock);
643 for (s = avl_first(&svp_remote_tree); s != NULL;
644 s = AVL_NEXT(&svp_remote_tree, s)) {
645 svp_host_queue(s);
646 }
647 mutex_exit(&svp_remote_lock);
648 }
649
650 void
651 svp_remote_resolved(svp_remote_t *srp, struct addrinfo *newaddrs)
652 {
653 struct addrinfo *a;
654 svp_conn_t *scp;
655 int ngen;
656
657 mutex_enter(&srp->sr_lock);
658 srp->sr_gen++;
659 ngen = srp->sr_gen;
660 mutex_exit(&srp->sr_lock);
661
662 for (a = newaddrs; a != NULL; a = a->ai_next) {
663 struct in6_addr in6;
664 struct in6_addr *addrp;
665
666 if (a->ai_family != AF_INET && a->ai_family != AF_INET6)
667 continue;
668
669 if (a->ai_family == AF_INET) {
670 struct sockaddr_in *v4;
671 v4 = (struct sockaddr_in *)a->ai_addr;
672 addrp = &in6;
673 IN6_INADDR_TO_V4MAPPED(&v4->sin_addr, addrp);
674 } else {
675 struct sockaddr_in6 *v6;
676 v6 = (struct sockaddr_in6 *)a->ai_addr;
677 addrp = &v6->sin6_addr;
678 }
679
680 mutex_enter(&srp->sr_lock);
681 for (scp = list_head(&srp->sr_conns); scp != NULL;
682 scp = list_next(&srp->sr_conns, scp)) {
683 mutex_enter(&scp->sc_lock);
684 if (bcmp(addrp, &scp->sc_addr,
685 sizeof (struct in6_addr)) == 0) {
686 scp->sc_gen = ngen;
687 mutex_exit(&scp->sc_lock);
688 break;
689 }
690 mutex_exit(&scp->sc_lock);
691 }
692
693 /*
694 * We need to be careful in the assumptions that we make here,
695 * as there's a good chance that svp_conn_create will
696 * drop the svp_remote_t`sr_lock to kick off its effective event
697 * loop.
698 */
699 if (scp == NULL)
700 (void) svp_conn_create(srp, addrp);
701 mutex_exit(&srp->sr_lock);
702 }
703
704 /*
705 * Now it's time to clean things up. We do not actively clean up the
706 * current connections that we have, instead allowing them to stay
707 * around assuming that they're still useful. Instead, we go through and
708 * purge the degraded list for anything that's from an older generation.
709 */
710 mutex_enter(&srp->sr_lock);
711 for (scp = list_head(&srp->sr_conns); scp != NULL;
712 scp = list_next(&srp->sr_conns, scp)) {
713 boolean_t fall = B_FALSE;
714 mutex_enter(&scp->sc_lock);
715 if (scp->sc_gen < srp->sr_gen)
716 fall = B_TRUE;
717 mutex_exit(&scp->sc_lock);
718 if (fall == B_TRUE)
719 svp_conn_fallout(scp);
720 }
721 mutex_exit(&srp->sr_lock);
722 }
723
724 /*
725 * This connection is in the process of being reset, we need to reassign all of
726 * its queries to other places or mark them as fatal. Note that the first
727 * connection was the one in flight when this failed. We always mark it as
728 * failed to avoid trying to reset its state.
729 */
730 void
731 svp_remote_reassign(svp_remote_t *srp, svp_conn_t *scp)
732 {
733 boolean_t first = B_TRUE;
734 assert(MUTEX_HELD(&srp->sr_lock));
735 assert(MUTEX_HELD(&srp->sr_lock));
736 svp_query_t *sqp;
737
738 /*
739 * As we try to reassigning all of its queries, remove it from the list.
740 */
741 list_remove(&srp->sr_conns, scp);
742
743 while ((sqp = list_remove_head(&scp->sc_queries)) != NULL) {
744
745 if (first == B_TRUE) {
746 sqp->sq_status = SVP_S_FATAL;
747 sqp->sq_func(sqp, sqp->sq_arg);
748 continue;
749 }
750
751 sqp->sq_acttime = -1;
752
753 /*
754 * We may want to maintain a queue of these for some time rather
755 * than just failing them all.
756 */
757 if (svp_remote_conn_queue(srp, sqp) == B_FALSE) {
758 sqp->sq_status = SVP_S_FATAL;
759 sqp->sq_func(sqp, sqp->sq_arg);
760 }
761 }
762
763 /*
764 * Now that we're done, go ahead and re-insert.
765 */
766 list_insert_tail(&srp->sr_conns, scp);
767 }
768
769 void
770 svp_remote_degrade(svp_remote_t *srp, svp_degrade_state_t flag)
771 {
772 int sf, nf;
773 char buf[256];
774
775 assert(MUTEX_HELD(&srp->sr_lock));
776
777 if (flag == SVP_RD_ALL || flag == 0)
778 libvarpd_panic("invalid flag passed to degrade");
779
780 if ((flag & srp->sr_degrade) != 0) {
781 return;
782 }
783
784 sf = ffs(srp->sr_degrade);
785 nf = ffs(flag);
786 srp->sr_degrade |= flag;
787 if (sf == 0 || sf > nf) {
788 svp_t *svp;
789 svp_remote_mkfmamsg(srp, flag, buf, sizeof (buf));
790
791 for (svp = avl_first(&srp->sr_tree); svp != NULL;
792 svp = AVL_NEXT(&srp->sr_tree, svp)) {
793 libvarpd_fma_degrade(svp->svp_hdl, buf);
794 }
795 }
796 }
797
798 void
799 svp_remote_restore(svp_remote_t *srp, svp_degrade_state_t flag)
800 {
801 int sf, nf;
802
803 assert(MUTEX_HELD(&srp->sr_lock));
804 sf = ffs(srp->sr_degrade);
805 if ((srp->sr_degrade & flag) != flag)
806 return;
807 srp->sr_degrade &= ~flag;
808 nf = ffs(srp->sr_degrade);
809
810 /*
811 * If we're now empty, restore the device. If we still are degraded, but
812 * we now have a higher base than we used to, change the message.
813 */
814 if (srp->sr_degrade == 0) {
815 svp_t *svp;
816 for (svp = avl_first(&srp->sr_tree); svp != NULL;
817 svp = AVL_NEXT(&srp->sr_tree, svp)) {
818 libvarpd_fma_restore(svp->svp_hdl);
819 }
820 } else if (nf != sf) {
821 svp_t *svp;
822 char buf[256];
823
824 svp_remote_mkfmamsg(srp, 1U << (nf - 1), buf, sizeof (buf));
825 for (svp = avl_first(&srp->sr_tree); svp != NULL;
826 svp = AVL_NEXT(&srp->sr_tree, svp)) {
827 libvarpd_fma_degrade(svp->svp_hdl, buf);
828 }
829 }
830 }
831
832 void
833 svp_remote_shootdown_vl3_cb(svp_query_t *sqp, void *arg)
834 {
835 svp_shoot_vl3_t *squery = arg;
836 svp_log_vl3_t *svl3 = squery->ssv_vl3;
837 svp_sdlog_t *sdl = squery->ssv_log;
838
839 if (sqp->sq_status == SVP_S_OK) {
840 svp_t *svp, lookup;
841
842 svp_remote_t *srp = sdl->sdl_remote;
843 svp_vl3_ack_t *vl3a = (svp_vl3_ack_t *)sqp->sq_wdata;
844
845 lookup.svp_vid = ntohl(svl3->svl3_vnetid);
846 mutex_enter(&srp->sr_lock);
847 if ((svp = avl_find(&srp->sr_tree, &lookup, NULL)) != NULL) {
848 svp->svp_cb.scb_vl3_inject(svp, ntohs(svl3->svl3_vlan),
849 (struct in6_addr *)svl3->svl3_ip, vl3a->sl3a_mac,
850 NULL);
851 }
852 mutex_exit(&srp->sr_lock);
853
854 }
855
856 svp_shootdown_vl3_cb(sqp->sq_status, svl3, sdl);
857
858 umem_free(squery, sizeof (svp_shoot_vl3_t));
859 }
860
861 void
862 svp_remote_shootdown_vl3(svp_remote_t *srp, svp_log_vl3_t *svl3,
863 svp_sdlog_t *sdl)
864 {
865 svp_shoot_vl3_t *squery;
866
867 squery = umem_zalloc(sizeof (svp_shoot_vl3_t), UMEM_DEFAULT);
868 if (squery == NULL) {
869 svp_shootdown_vl3_cb(SVP_S_FATAL, svl3, sdl);
870 return;
871 }
872
873 squery->ssv_vl3 = svl3;
874 squery->ssv_log = sdl;
875 squery->ssv_sock.sin6_family = AF_INET6;
876 bcopy(svl3->svl3_ip, &squery->ssv_sock.sin6_addr,
877 sizeof (svl3->svl3_ip));
878 svp_remote_vl3_logreq(srp, &squery->ssv_query, ntohl(svl3->svl3_vnetid),
879 (struct sockaddr *)&squery->ssv_sock, svp_remote_shootdown_vl3_cb,
880 squery);
881 }
882
883 void
884 svp_remote_shootdown_vl2(svp_remote_t *srp, svp_log_vl2_t *svl2)
885 {
886 svp_t *svp, lookup;
887
888 lookup.svp_vid = ntohl(svl2->svl2_vnetid);
889 mutex_enter(&srp->sr_lock);
890 if ((svp = avl_find(&srp->sr_tree, &lookup, NULL)) != NULL) {
891 svp->svp_cb.scb_vl2_invalidate(svp, svl2->svl2_mac);
892 }
893 mutex_exit(&srp->sr_lock);
894 }
895
896 int
897 svp_remote_init(void)
898 {
899 svp_idspace = id_space_create("svp_req_ids", 1, INT32_MAX);
900 if (svp_idspace == NULL)
901 return (errno);
902 avl_create(&svp_remote_tree, svp_remote_comparator,
903 sizeof (svp_remote_t), offsetof(svp_remote_t, sr_gnode));
904 svp_dns_timer.st_func = svp_remote_dns_timer;
905 svp_dns_timer.st_arg = NULL;
906 svp_dns_timer.st_oneshot = B_FALSE;
907 svp_dns_timer.st_value = svp_dns_timer_rate;
908 svp_timer_add(&svp_dns_timer);
909 return (0);
910 }
911
912 void
913 svp_remote_fini(void)
914 {
915 svp_timer_remove(&svp_dns_timer);
916 avl_destroy(&svp_remote_tree);
917 if (svp_idspace == NULL)
918 id_space_destroy(svp_idspace);
919 }